Add is_translation to mod with backfill

Now, the update command will scrape all non-translation mods and translation mods separately so the is_translation value can be set correctly in the future.
This commit is contained in:
Tyler Hallada 2022-03-15 23:59:56 -04:00
parent 5d55e78283
commit a42c22cf4b
8 changed files with 339 additions and 262 deletions

View File

@ -0,0 +1 @@
ALTER TABLE "mods" ADD COLUMN "is_translation" BOOL NOT NULL DEFAULT FALSE;

View File

@ -0,0 +1,54 @@
use anyhow::{Context, Result};
use std::time::Duration;
use tokio::time::sleep;
use tracing::{debug, info, info_span};
use crate::nexus_scraper;
const REQUEST_TIMEOUT: Duration = Duration::from_secs(7200); // 2 hours
const CONNECT_TIMEOUT: Duration = Duration::from_secs(30);
struct UpdatedMods {
id: i32,
}
pub async fn backfill_is_translation(pool: &sqlx::Pool<sqlx::Postgres>) -> Result<()> {
let mut page = 0;
let mut has_next_page = true;
let client = reqwest::Client::builder()
.timeout(REQUEST_TIMEOUT)
.connect_timeout(CONNECT_TIMEOUT)
.build()?;
while has_next_page {
let page_span = info_span!("page", page);
let _page_span = page_span.enter();
let mod_list_resp = nexus_scraper::get_mod_list_page(&client, page, true).await?;
let scraped = mod_list_resp.scrape_mods()?;
let scraped_ids: Vec<i32> = scraped.mods.iter().map(|m| m.nexus_mod_id).collect();
has_next_page = scraped.has_next_page;
let updated_ids: Vec<i32> = sqlx::query_as!(
UpdatedMods,
"UPDATE mods
SET is_translation = true
WHERE nexus_mod_id = ANY($1::int[])
RETURNING id",
&scraped_ids,
)
.fetch_all(pool)
.await
.context("Failed to update mod is_translation values")?
.iter()
.map(|u| u.id)
.collect();
info!(?updated_ids, "updated mods is_translation values");
page += 1;
debug!(?page, ?has_next_page, "sleeping 1 second");
sleep(Duration::from_secs(1)).await;
}
Ok(())
}

View File

@ -0,0 +1,3 @@
pub mod is_translation;
pub use is_translation::backfill_is_translation;

View File

@ -1,3 +1,4 @@
pub mod backfills;
pub mod download_tiles; pub mod download_tiles;
pub mod dump_cell_data; pub mod dump_cell_data;
pub mod dump_cell_edit_counts; pub mod dump_cell_edit_counts;

View File

@ -24,276 +24,286 @@ pub async fn update(
start_page: usize, start_page: usize,
full: bool, full: bool,
) -> Result<()> { ) -> Result<()> {
let mut page = start_page; for include_translations in [false, true] {
let mut has_next_page = true; let mut page = start_page;
let mut pages_with_no_updates = 0; let mut has_next_page = true;
let mut pages_with_no_updates = 0;
let game = game::insert(&pool, GAME_NAME, GAME_ID as i32).await?; let game = game::insert(&pool, GAME_NAME, GAME_ID as i32).await?;
let client = reqwest::Client::builder() let client = reqwest::Client::builder()
.timeout(REQUEST_TIMEOUT) .timeout(REQUEST_TIMEOUT)
.connect_timeout(CONNECT_TIMEOUT) .connect_timeout(CONNECT_TIMEOUT)
.build()?; .build()?;
while has_next_page { while has_next_page {
if !full && pages_with_no_updates >= 50 { if !full && pages_with_no_updates >= 50 {
warn!("No updates found for 50 pages in a row, aborting"); warn!("No updates found for 50 pages in a row, aborting");
break; break;
} }
let page_span = info_span!("page", page); let page_span = info_span!("page", page, include_translations);
let _page_span = page_span.enter(); let _page_span = page_span.enter();
let mod_list_resp = nexus_scraper::get_mod_list_page(&client, page).await?; let mod_list_resp =
let scraped = mod_list_resp.scrape_mods()?; nexus_scraper::get_mod_list_page(&client, page, include_translations).await?;
let scraped = mod_list_resp.scrape_mods()?;
has_next_page = scraped.has_next_page; has_next_page = scraped.has_next_page;
let processed_mods = game_mod::bulk_get_last_updated_by_nexus_mod_ids( let processed_mods = game_mod::bulk_get_last_updated_by_nexus_mod_ids(
&pool, &pool,
&scraped &scraped
.mods
.iter()
.map(|scraped_mod| scraped_mod.nexus_mod_id)
.collect::<Vec<i32>>(),
)
.await?;
let mods_to_create_or_update: Vec<UnsavedMod> = scraped
.mods .mods
.iter() .iter()
.map(|scraped_mod| scraped_mod.nexus_mod_id) .filter(|scraped_mod| {
.collect::<Vec<i32>>(), if let Some(processed_mod) = processed_mods.iter().find(|processed_mod| {
) processed_mod.nexus_mod_id == scraped_mod.nexus_mod_id
.await?; }) {
let mods_to_create_or_update: Vec<UnsavedMod> = scraped if processed_mod.last_updated_files_at
.mods > NaiveDateTime::new(
.iter() scraped_mod.last_update_at,
.filter(|scraped_mod| { NaiveTime::from_hms(0, 0, 0),
if let Some(processed_mod) = processed_mods )
.iter() {
.find(|processed_mod| processed_mod.nexus_mod_id == scraped_mod.nexus_mod_id) return false;
{ }
if processed_mod.last_updated_files_at
> NaiveDateTime::new(
scraped_mod.last_update_at,
NaiveTime::from_hms(0, 0, 0),
)
{
return false;
} }
} true
true })
}) .map(|scraped_mod| UnsavedMod {
.map(|scraped_mod| UnsavedMod { name: scraped_mod.name,
name: scraped_mod.name, nexus_mod_id: scraped_mod.nexus_mod_id,
nexus_mod_id: scraped_mod.nexus_mod_id, author_name: scraped_mod.author_name,
author_name: scraped_mod.author_name, author_id: scraped_mod.author_id,
author_id: scraped_mod.author_id, category_name: scraped_mod.category_name,
category_name: scraped_mod.category_name, category_id: scraped_mod.category_id,
category_id: scraped_mod.category_id, description: scraped_mod.desc,
description: scraped_mod.desc, thumbnail_link: scraped_mod.thumbnail_link,
thumbnail_link: scraped_mod.thumbnail_link, game_id: game.id,
game_id: game.id, is_translation: include_translations,
last_update_at: NaiveDateTime::new( last_update_at: NaiveDateTime::new(
scraped_mod.last_update_at, scraped_mod.last_update_at,
NaiveTime::from_hms(0, 0, 0), NaiveTime::from_hms(0, 0, 0),
), ),
first_upload_at: NaiveDateTime::new( first_upload_at: NaiveDateTime::new(
scraped_mod.first_upload_at, scraped_mod.first_upload_at,
NaiveTime::from_hms(0, 0, 0), NaiveTime::from_hms(0, 0, 0),
), ),
}) })
.collect(); .collect();
let mods = game_mod::batched_insert(&pool, &mods_to_create_or_update).await?; let mods = game_mod::batched_insert(&pool, &mods_to_create_or_update).await?;
if mods.is_empty() { if mods.is_empty() {
pages_with_no_updates += 1; pages_with_no_updates += 1;
} else { } else {
pages_with_no_updates = 0; pages_with_no_updates = 0;
} }
for db_mod in mods { for db_mod in mods {
let mod_span = info_span!("mod", name = ?&db_mod.name, id = &db_mod.nexus_mod_id); let mod_span = info_span!("mod", name = ?&db_mod.name, id = &db_mod.nexus_mod_id);
let _mod_span = mod_span.enter(); let _mod_span = mod_span.enter();
let files_resp = nexus_api::files::get(&client, db_mod.nexus_mod_id).await?; let files_resp = nexus_api::files::get(&client, db_mod.nexus_mod_id).await?;
debug!(duration = ?files_resp.wait, "sleeping"); debug!(duration = ?files_resp.wait, "sleeping");
sleep(files_resp.wait).await; sleep(files_resp.wait).await;
// Filter out replaced/deleted files (indicated by null category) and archived files // Filter out replaced/deleted files (indicated by null category) and archived files
let files = files_resp let files = files_resp
.files()? .files()?
.into_iter()
.filter(|file| match file.category {
None => {
info!(
name = file.file_name,
id = file.file_id,
"skipping file with no category"
);
false
}
Some(category) if category == "ARCHIVED" => false,
Some(_) => true,
});
let processed_file_ids: HashSet<i32> =
file::get_processed_nexus_file_ids_by_mod_id(&pool, db_mod.id)
.await?
.into_iter() .into_iter()
.collect(); .filter(|file| match file.category {
None => {
for api_file in files { info!(
let file_span = name = file.file_name,
info_span!("file", name = &api_file.file_name, id = &api_file.file_id,); id = file.file_id,
let _file_span = file_span.enter(); "skipping file with no category"
if processed_file_ids.contains(&(api_file.file_id as i32)) {
info!("skipping file already present and processed in database");
continue;
}
let db_file = file::insert(
&pool,
&file::UnsavedFile {
name: api_file.name,
file_name: api_file.file_name,
nexus_file_id: api_file.file_id as i32,
mod_id: db_mod.id,
category: api_file.category,
version: api_file.version,
mod_version: api_file.mod_version,
size: api_file.size,
uploaded_at: api_file.uploaded_at,
},
)
.await?;
let mut checked_metadata = false;
match nexus_api::metadata::contains_plugin(&client, &api_file).await {
Ok(contains_plugin) => {
if let Some(contains_plugin) = contains_plugin {
checked_metadata = true;
if !contains_plugin {
info!("file metadata does not contain a plugin, skip downloading");
file::update_has_plugin(&pool, db_file.id, false).await?;
continue;
}
} else {
warn!("file has no metadata link, continuing with download");
}
}
Err(err) => {
warn!(error = %err, "error retreiving metadata for file, continuing with download");
}
};
let humanized_size = api_file
.size
.file_size(file_size_opts::CONVENTIONAL)
.expect("unable to create human-readable file size");
info!(size = %humanized_size, "decided to download file");
let download_link_resp =
nexus_api::download_link::get(&client, db_mod.nexus_mod_id, api_file.file_id)
.await;
if let Err(err) = &download_link_resp {
if let Some(reqwest_err) = err.downcast_ref::<reqwest::Error>() {
if reqwest_err.status() == Some(StatusCode::NOT_FOUND) {
warn!(
status = ?reqwest_err.status(),
"failed to get download link for file, skipping file"
); );
file::update_has_download_link(&pool, db_file.id, false).await?; false
continue;
} }
} Some(category) if category == "ARCHIVED" => false,
} Some(_) => true,
let download_link_resp = download_link_resp?; });
let mut tokio_file = match download_link_resp.download_file(&client).await { let processed_file_ids: HashSet<i32> =
Ok(file) => { file::get_processed_nexus_file_ids_by_mod_id(&pool, db_mod.id)
info!(bytes = api_file.size, "download finished"); .await?
file::update_downloaded_at(&pool, db_file.id).await?; .into_iter()
file .collect();
}
Err(err) => { for api_file in files {
warn!(error = %err, "failed all attempts at downloading file, skipping file"); let file_span =
info_span!("file", name = &api_file.file_name, id = &api_file.file_id,);
let _file_span = file_span.enter();
if processed_file_ids.contains(&(api_file.file_id as i32)) {
info!("skipping file already present and processed in database");
continue; continue;
} }
}; let db_file = file::insert(
&pool,
&file::UnsavedFile {
name: api_file.name,
file_name: api_file.file_name,
nexus_file_id: api_file.file_id as i32,
mod_id: db_mod.id,
category: api_file.category,
version: api_file.version,
mod_version: api_file.mod_version,
size: api_file.size,
uploaded_at: api_file.uploaded_at,
},
)
.await?;
let mut initial_bytes = [0; 8]; let mut checked_metadata = false;
tokio_file.seek(SeekFrom::Start(0)).await?; match nexus_api::metadata::contains_plugin(&client, &api_file).await {
if let Err(err) = tokio_file.read_exact(&mut initial_bytes).await { Ok(contains_plugin) => {
warn!(error = %err, "failed to read initial bytes, skipping file"); if let Some(contains_plugin) = contains_plugin {
file::update_unable_to_extract_plugins(&pool, db_file.id, true).await?; checked_metadata = true;
continue; if !contains_plugin {
} info!(
let kind = match infer::get(&initial_bytes) { "file metadata does not contain a plugin, skip downloading"
Some(kind) => kind, );
None => { file::update_has_plugin(&pool, db_file.id, false).await?;
warn!(initial_bytes = ?initial_bytes, "unable to determine file type of archive, skipping file"); continue;
}
} else {
warn!("file has no metadata link, continuing with download");
}
}
Err(err) => {
warn!(error = %err, "error retreiving metadata for file, continuing with download");
}
};
let humanized_size = api_file
.size
.file_size(file_size_opts::CONVENTIONAL)
.expect("unable to create human-readable file size");
info!(size = %humanized_size, "decided to download file");
let download_link_resp = nexus_api::download_link::get(
&client,
db_mod.nexus_mod_id,
api_file.file_id,
)
.await;
if let Err(err) = &download_link_resp {
if let Some(reqwest_err) = err.downcast_ref::<reqwest::Error>() {
if reqwest_err.status() == Some(StatusCode::NOT_FOUND) {
warn!(
status = ?reqwest_err.status(),
"failed to get download link for file, skipping file"
);
file::update_has_download_link(&pool, db_file.id, false).await?;
continue;
}
}
}
let download_link_resp = download_link_resp?;
let mut tokio_file = match download_link_resp.download_file(&client).await {
Ok(file) => {
info!(bytes = api_file.size, "download finished");
file::update_downloaded_at(&pool, db_file.id).await?;
file
}
Err(err) => {
warn!(error = %err, "failed all attempts at downloading file, skipping file");
continue;
}
};
let mut initial_bytes = [0; 8];
tokio_file.seek(SeekFrom::Start(0)).await?;
if let Err(err) = tokio_file.read_exact(&mut initial_bytes).await {
warn!(error = %err, "failed to read initial bytes, skipping file");
file::update_unable_to_extract_plugins(&pool, db_file.id, true).await?; file::update_unable_to_extract_plugins(&pool, db_file.id, true).await?;
continue; continue;
} }
}; let kind = match infer::get(&initial_bytes) {
info!( Some(kind) => kind,
mime_type = kind.mime_type(), None => {
"inferred mime_type of downloaded archive" warn!(initial_bytes = ?initial_bytes, "unable to determine file type of archive, skipping file");
); file::update_unable_to_extract_plugins(&pool, db_file.id, true).await?;
continue;
}
};
info!(
mime_type = kind.mime_type(),
"inferred mime_type of downloaded archive"
);
match kind.mime_type() { match kind.mime_type() {
"application/vnd.rar" => { "application/vnd.rar" => {
info!("downloaded archive is RAR archive, attempt to uncompress entire archive"); info!("downloaded archive is RAR archive, attempt to uncompress entire archive");
// Use unrar to uncompress the entire .rar file to avoid bugs with compress_tools uncompressing certain .rar files: // Use unrar to uncompress the entire .rar file to avoid bugs with compress_tools uncompressing certain .rar files:
// https://github.com/libarchive/libarchive/issues/373, https://github.com/libarchive/libarchive/issues/1426 // https://github.com/libarchive/libarchive/issues/373, https://github.com/libarchive/libarchive/issues/1426
tokio_file.seek(SeekFrom::Start(0)).await?; tokio_file.seek(SeekFrom::Start(0)).await?;
let mut file = tokio_file.try_clone().await?.into_std().await; let mut file = tokio_file.try_clone().await?.into_std().await;
match extract_with_unrar( match extract_with_unrar(
&mut file, &mut file,
&pool, &pool,
&db_file, &db_file,
&db_mod, &db_mod,
checked_metadata, checked_metadata,
) )
.await .await
{ {
Ok(_) => Ok(()), Ok(_) => Ok(()),
Err(err) => { Err(err) => {
// unrar failed to extract rar file (e.g. archive has unicode filenames) // unrar failed to extract rar file (e.g. archive has unicode filenames)
// Attempt to uncompress the archive using `7z` unix command instead
warn!(error = %err, "failed to extract file with unrar, extracting whole archive with 7z instead");
extract_with_7zip(&mut file, &pool, &db_file, &db_mod).await
}
}?;
}
_ => {
tokio_file.seek(SeekFrom::Start(0)).await?;
let mut file = tokio_file.try_clone().await?.into_std().await;
match extract_with_compress_tools(&mut file, &pool, &db_file, &db_mod).await
{
Ok(_) => Ok(()),
Err(err) => {
if err
.downcast_ref::<extractors::compress_tools::ExtractorError>()
.is_some()
&& (kind.mime_type() == "application/zip"
|| kind.mime_type() == "application/x-7z-compressed")
{
// compress_tools or libarchive failed to extract zip/7z file (e.g. archive is deflate64 compressed)
// Attempt to uncompress the archive using `7z` unix command instead // Attempt to uncompress the archive using `7z` unix command instead
warn!(error = %err, "failed to extract file with compress_tools, extracting whole archive with 7z instead"); warn!(error = %err, "failed to extract file with unrar, extracting whole archive with 7z instead");
extract_with_7zip(&mut file, &pool, &db_file, &db_mod).await extract_with_7zip(&mut file, &pool, &db_file, &db_mod).await
} else {
Err(err)
} }
} }?;
}?; }
_ => {
tokio_file.seek(SeekFrom::Start(0)).await?;
let mut file = tokio_file.try_clone().await?.into_std().await;
match extract_with_compress_tools(&mut file, &pool, &db_file, &db_mod)
.await
{
Ok(_) => Ok(()),
Err(err) => {
if err
.downcast_ref::<extractors::compress_tools::ExtractorError>(
)
.is_some()
&& (kind.mime_type() == "application/zip"
|| kind.mime_type() == "application/x-7z-compressed")
{
// compress_tools or libarchive failed to extract zip/7z file (e.g. archive is deflate64 compressed)
// Attempt to uncompress the archive using `7z` unix command instead
warn!(error = %err, "failed to extract file with compress_tools, extracting whole archive with 7z instead");
extract_with_7zip(&mut file, &pool, &db_file, &db_mod).await
} else {
Err(err)
}
}
}?;
}
} }
debug!(duration = ?download_link_resp.wait, "sleeping");
sleep(download_link_resp.wait).await;
} }
debug!(duration = ?download_link_resp.wait, "sleeping"); game_mod::update_last_updated_files_at(&pool, db_mod.id).await?;
sleep(download_link_resp.wait).await;
} }
game_mod::update_last_updated_files_at(&pool, db_mod.id).await?; page += 1;
debug!(?page, ?has_next_page, "sleeping 1 second");
sleep(Duration::from_secs(1)).await;
} }
page += 1;
debug!(?page, ?has_next_page, "sleeping 1 second");
sleep(Duration::from_secs(1)).await;
} }
Ok(()) Ok(())

View File

@ -12,8 +12,8 @@ mod nexus_scraper;
mod plugin_processor; mod plugin_processor;
use commands::{ use commands::{
download_tiles, dump_cell_data, dump_cell_edit_counts, dump_mod_data, dump_mod_search_index, backfills::backfill_is_translation, download_tiles, dump_cell_data, dump_cell_edit_counts,
dump_plugin_data, update, dump_mod_data, dump_mod_search_index, dump_plugin_data, update,
}; };
#[derive(FromArgs)] #[derive(FromArgs)]
@ -23,7 +23,7 @@ struct Args {
/// the page number to start scraping for mods on nexus mods /// the page number to start scraping for mods on nexus mods
page: usize, page: usize,
#[argh(option, short = 'f', default = "false")] #[argh(switch, short = 'f')]
/// enable full scrape of all pages, rather than stopping after 50 pages of no updates /// enable full scrape of all pages, rather than stopping after 50 pages of no updates
full: bool, full: bool,
@ -50,6 +50,10 @@ struct Args {
/// folder to output all map tile images downloaded from the UESP wiki /// folder to output all map tile images downloaded from the UESP wiki
#[argh(option, short = 't')] #[argh(option, short = 't')]
download_tiles: Option<String>, download_tiles: Option<String>,
/// backfill the is_translation column in the mods table
#[argh(switch)]
backfill_is_translation: bool,
} }
#[tokio::main] #[tokio::main]
@ -83,6 +87,9 @@ pub async fn main() -> Result<()> {
if let Some(dir) = args.download_tiles { if let Some(dir) = args.download_tiles {
return download_tiles(&dir).await; return download_tiles(&dir).await;
} }
if args.backfill_is_translation {
return backfill_is_translation(&pool).await;
}
return update(&pool, args.page, args.full).await; return update(&pool, args.page, args.full).await;
} }

View File

@ -20,6 +20,7 @@ pub struct Mod {
pub description: Option<String>, pub description: Option<String>,
pub thumbnail_link: Option<String>, pub thumbnail_link: Option<String>,
pub game_id: i32, pub game_id: i32,
pub is_translation: bool,
pub updated_at: NaiveDateTime, pub updated_at: NaiveDateTime,
pub created_at: NaiveDateTime, pub created_at: NaiveDateTime,
pub last_update_at: NaiveDateTime, pub last_update_at: NaiveDateTime,
@ -38,6 +39,7 @@ pub struct UnsavedMod<'a> {
pub description: Option<&'a str>, pub description: Option<&'a str>,
pub thumbnail_link: Option<&'a str>, pub thumbnail_link: Option<&'a str>,
pub game_id: i32, pub game_id: i32,
pub is_translation: bool,
pub last_update_at: NaiveDateTime, pub last_update_at: NaiveDateTime,
pub first_upload_at: NaiveDateTime, pub first_upload_at: NaiveDateTime,
} }
@ -61,6 +63,7 @@ pub struct ModWithCells {
pub description: Option<String>, pub description: Option<String>,
pub thumbnail_link: Option<String>, pub thumbnail_link: Option<String>,
pub game_id: i32, pub game_id: i32,
pub is_translation: bool,
pub updated_at: NaiveDateTime, pub updated_at: NaiveDateTime,
pub created_at: NaiveDateTime, pub created_at: NaiveDateTime,
pub last_update_at: NaiveDateTime, pub last_update_at: NaiveDateTime,
@ -123,17 +126,18 @@ pub async fn insert(
description: Option<&str>, description: Option<&str>,
thumbnail_link: Option<&str>, thumbnail_link: Option<&str>,
game_id: i32, game_id: i32,
is_translation: bool,
last_update_at: NaiveDateTime, last_update_at: NaiveDateTime,
first_upload_at: NaiveDateTime, first_upload_at: NaiveDateTime,
) -> Result<Mod> { ) -> Result<Mod> {
sqlx::query_as!( sqlx::query_as!(
Mod, Mod,
"INSERT INTO mods "INSERT INTO mods
(name, nexus_mod_id, author_name, author_id, category_name, category_id, description, thumbnail_link, game_id, last_update_at, first_upload_at, created_at, updated_at) (name, nexus_mod_id, author_name, author_id, category_name, category_id, description, thumbnail_link, game_id, is_translation, last_update_at, first_upload_at, created_at, updated_at)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, now(), now()) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, now(), now())
ON CONFLICT (game_id, nexus_mod_id) DO UPDATE ON CONFLICT (game_id, nexus_mod_id) DO UPDATE
SET (name, author_name, author_id, category_name, category_id, description, thumbnail_link, last_update_at, first_upload_at, updated_at) = SET (name, author_name, author_id, category_name, category_id, description, thumbnail_link, is_translation, last_update_at, first_upload_at, updated_at) =
(EXCLUDED.name, EXCLUDED.author_name, EXCLUDED.author_id, EXCLUDED.category_name, EXCLUDED.category_id, EXCLUDED.description, EXCLUDED.thumbnail_link, EXCLUDED.last_update_at, EXCLUDED.first_upload_at, now()) (EXCLUDED.name, EXCLUDED.author_name, EXCLUDED.author_id, EXCLUDED.category_name, EXCLUDED.category_id, EXCLUDED.description, EXCLUDED.thumbnail_link, EXCLUDED.is_translation, EXCLUDED.last_update_at, EXCLUDED.first_upload_at, now())
RETURNING *", RETURNING *",
name, name,
nexus_mod_id, nexus_mod_id,
@ -144,6 +148,7 @@ pub async fn insert(
description, description,
thumbnail_link, thumbnail_link,
game_id, game_id,
is_translation,
last_update_at, last_update_at,
first_upload_at first_upload_at
) )
@ -168,6 +173,7 @@ pub async fn batched_insert<'a>(
let mut descriptions: Vec<Option<&str>> = vec![]; let mut descriptions: Vec<Option<&str>> = vec![];
let mut thumbnail_links: Vec<Option<&str>> = vec![]; let mut thumbnail_links: Vec<Option<&str>> = vec![];
let mut game_ids: Vec<i32> = vec![]; let mut game_ids: Vec<i32> = vec![];
let mut is_translations: Vec<bool> = vec![];
let mut last_update_ats: Vec<NaiveDateTime> = vec![]; let mut last_update_ats: Vec<NaiveDateTime> = vec![];
let mut first_upload_ats: Vec<NaiveDateTime> = vec![]; let mut first_upload_ats: Vec<NaiveDateTime> = vec![];
batch.iter().for_each(|unsaved_mod| { batch.iter().for_each(|unsaved_mod| {
@ -180,6 +186,7 @@ pub async fn batched_insert<'a>(
descriptions.push(unsaved_mod.description); descriptions.push(unsaved_mod.description);
thumbnail_links.push(unsaved_mod.thumbnail_link); thumbnail_links.push(unsaved_mod.thumbnail_link);
game_ids.push(unsaved_mod.game_id); game_ids.push(unsaved_mod.game_id);
is_translations.push(unsaved_mod.is_translation);
last_update_ats.push(unsaved_mod.last_update_at); last_update_ats.push(unsaved_mod.last_update_at);
first_upload_ats.push(unsaved_mod.first_upload_at); first_upload_ats.push(unsaved_mod.first_upload_at);
}); });
@ -187,12 +194,12 @@ pub async fn batched_insert<'a>(
// sqlx doesn't understand arrays of Options with the query_as! macro // sqlx doesn't understand arrays of Options with the query_as! macro
&mut sqlx::query_as( &mut sqlx::query_as(
r#"INSERT INTO mods r#"INSERT INTO mods
(name, nexus_mod_id, author_name, author_id, category_name, category_id, description, thumbnail_link, game_id, last_update_at, first_upload_at, created_at, updated_at) (name, nexus_mod_id, author_name, author_id, category_name, category_id, description, thumbnail_link, game_id, is_translation, last_update_at, first_upload_at, created_at, updated_at)
SELECT *, now(), now() SELECT *, now(), now()
FROM UNNEST($1::text[], $2::int[], $3::text[], $4::int[], $5::text[], $6::int[], $7::text[], $8::text[], $9::int[], $10::timestamp(3)[], $11::timestamp(3)[]) FROM UNNEST($1::text[], $2::int[], $3::text[], $4::int[], $5::text[], $6::int[], $7::text[], $8::text[], $9::int[], $10::bool[], $11::timestamp(3)[], $12::timestamp(3)[])
ON CONFLICT (game_id, nexus_mod_id) DO UPDATE ON CONFLICT (game_id, nexus_mod_id) DO UPDATE
SET (name, author_name, author_id, category_name, category_id, description, thumbnail_link, last_update_at, first_upload_at, updated_at) = SET (name, author_name, author_id, category_name, category_id, description, thumbnail_link, is_translation, last_update_at, first_upload_at, updated_at) =
(EXCLUDED.name, EXCLUDED.author_name, EXCLUDED.author_id, EXCLUDED.category_name, EXCLUDED.category_id, EXCLUDED.description, EXCLUDED.thumbnail_link, EXCLUDED.last_update_at, EXCLUDED.first_upload_at, now()) (EXCLUDED.name, EXCLUDED.author_name, EXCLUDED.author_id, EXCLUDED.category_name, EXCLUDED.category_id, EXCLUDED.description, EXCLUDED.thumbnail_link, EXCLUDED.is_translation, EXCLUDED.last_update_at, EXCLUDED.first_upload_at, now())
RETURNING *"#, RETURNING *"#,
) )
.bind(&names) .bind(&names)
@ -204,6 +211,7 @@ pub async fn batched_insert<'a>(
.bind(&descriptions) .bind(&descriptions)
.bind(&thumbnail_links) .bind(&thumbnail_links)
.bind(&game_ids) .bind(&game_ids)
.bind(&is_translations)
.bind(&last_update_ats) .bind(&last_update_ats)
.bind(&first_upload_ats) .bind(&first_upload_ats)
.fetch_all(pool) .fetch_all(pool)
@ -240,18 +248,6 @@ pub async fn update_last_updated_files_at(
.context("Failed to update mod") .context("Failed to update mod")
} }
#[instrument(level = "debug", skip(pool))]
pub async fn bulk_get_need_backfill(pool: &sqlx::Pool<sqlx::Postgres>) -> Result<Vec<Mod>> {
sqlx::query_as!(
Mod,
"SELECT * FROM mods
WHERE author_id IS NULL"
)
.fetch_all(pool)
.await
.context("Failed to bulk get need backfill")
}
#[instrument(level = "debug", skip(pool, game_mod, mod_data))] #[instrument(level = "debug", skip(pool, game_mod, mod_data))]
pub async fn update_from_api_response<'a>( pub async fn update_from_api_response<'a>(
pool: &sqlx::Pool<sqlx::Postgres>, pool: &sqlx::Pool<sqlx::Postgres>,

View File

@ -29,11 +29,16 @@ pub struct ModListScrape<'a> {
} }
#[instrument(skip(client))] #[instrument(skip(client))]
pub async fn get_mod_list_page(client: &Client, page: usize) -> Result<ModListResponse> { pub async fn get_mod_list_page(
client: &Client,
page: usize,
include_translations: bool,
) -> Result<ModListResponse> {
let res = client let res = client
.get(format!( .get(format!(
"https://www.nexusmods.com/Core/Libs/Common/Widgets/ModList?RH_ModList=nav:true,home:false,type:0,user_id:0,game_id:{},advfilt:true,include_adult:true,page_size:20,show_game_filter:false,open:false,page:{},sort_by:lastupdate", "https://www.nexusmods.com/Core/Libs/Common/Widgets/ModList?RH_ModList=nav:true,home:false,type:0,user_id:0,game_id:{},advfilt:true,tags_{}%5B%5D:1428,include_adult:true,page_size:20,show_game_filter:false,open:false,page:{},sort_by:lastupdate",
GAME_ID, GAME_ID,
match include_translations { true => "yes", false => "no" },
page page
)) ))
.send() .send()