Start processing mod updates

This commit is contained in:
Tyler Hallada 2021-09-05 16:02:16 -04:00
parent 0989477b40
commit 804fac4eea
3 changed files with 70 additions and 21 deletions

View File

@ -1,7 +1,8 @@
use anyhow::Result; use anyhow::Result;
use argh::FromArgs; use argh::FromArgs;
use chrono::{NaiveDateTime, NaiveTime};
use dotenv::dotenv; use dotenv::dotenv;
use humansize::{FileSize, file_size_opts}; use humansize::{file_size_opts, FileSize};
use models::file::File; use models::file::File;
use models::game_mod::Mod; use models::game_mod::Mod;
use reqwest::StatusCode; use reqwest::StatusCode;
@ -160,14 +161,14 @@ async fn extract_with_unrar(
Err(err) => { Err(err) => {
warn!(error = %err, "failed to extract with unrar"); warn!(error = %err, "failed to extract with unrar");
file::update_unable_to_extract_plugins(&pool, db_file.id, true).await?; file::update_unable_to_extract_plugins(&pool, db_file.id, true).await?;
return Ok(()) return Ok(());
} }
Ok(extract) => extract Ok(extract) => extract,
}; };
if let Err(err) = extract.process() { if let Err(err) = extract.process() {
warn!(error = %err, "failed to extract with unrar"); warn!(error = %err, "failed to extract with unrar");
file::update_unable_to_extract_plugins(&pool, db_file.id, true).await?; file::update_unable_to_extract_plugins(&pool, db_file.id, true).await?;
return Ok(()) return Ok(());
} }
for file_path in plugin_file_paths.iter() { for file_path in plugin_file_paths.iter() {
@ -216,7 +217,7 @@ pub async fn main() -> Result<()> {
let scraped = mod_list_resp.scrape_mods()?; let scraped = mod_list_resp.scrape_mods()?;
has_next_page = scraped.has_next_page; has_next_page = scraped.has_next_page;
let processed_mods = game_mod::bulk_get_fully_processed_nexus_mod_ids( let processed_mods = game_mod::bulk_get_last_updated_by_nexus_mod_ids(
&pool, &pool,
&scraped &scraped
.mods .mods
@ -225,10 +226,22 @@ pub async fn main() -> Result<()> {
.collect::<Vec<i32>>(), .collect::<Vec<i32>>(),
) )
.await?; .await?;
let mods_to_create: Vec<UnsavedMod> = scraped let mods_to_create_or_update: Vec<UnsavedMod> = scraped
.mods .mods
.iter() .iter()
.filter(|scraped_mod| !processed_mods.contains(&scraped_mod.nexus_mod_id)) .filter(|scraped_mod| {
if let Some(processed_mod) = processed_mods
.iter()
.find(|processed_mod| processed_mod.nexus_mod_id == scraped_mod.nexus_mod_id)
{
if processed_mod.last_updated_files_at
> NaiveDateTime::new(scraped_mod.last_update, NaiveTime::from_hms(0, 0, 0))
{
return false;
}
}
true
})
.map(|scraped_mod| UnsavedMod { .map(|scraped_mod| UnsavedMod {
name: scraped_mod.name, name: scraped_mod.name,
nexus_mod_id: scraped_mod.nexus_mod_id, nexus_mod_id: scraped_mod.nexus_mod_id,
@ -239,7 +252,7 @@ pub async fn main() -> Result<()> {
}) })
.collect(); .collect();
let mods = game_mod::batched_insert(&pool, &mods_to_create).await?; let mods = game_mod::batched_insert(&pool, &mods_to_create_or_update).await?;
for db_mod in mods { for db_mod in mods {
let mod_span = info_span!("mod", name = ?&db_mod.name, id = &db_mod.nexus_mod_id); let mod_span = info_span!("mod", name = ?&db_mod.name, id = &db_mod.nexus_mod_id);
@ -316,7 +329,9 @@ pub async fn main() -> Result<()> {
} }
}; };
let humanized_size = api_file.size.file_size(file_size_opts::CONVENTIONAL) let humanized_size = api_file
.size
.file_size(file_size_opts::CONVENTIONAL)
.expect("unable to create human-readable file size"); .expect("unable to create human-readable file size");
info!(size = %humanized_size, "decided to download file"); info!(size = %humanized_size, "decided to download file");
let download_link_resp = let download_link_resp =
@ -361,7 +376,7 @@ pub async fn main() -> Result<()> {
warn!(initial_bytes = ?initial_bytes, "unable to determine file type of archive, skipping file"); warn!(initial_bytes = ?initial_bytes, "unable to determine file type of archive, skipping file");
file::update_unable_to_extract_plugins(&pool, db_file.id, true).await?; file::update_unable_to_extract_plugins(&pool, db_file.id, true).await?;
continue; continue;
}, }
}; };
info!( info!(
mime_type = kind.mime_type(), mime_type = kind.mime_type(),
@ -375,7 +390,15 @@ pub async fn main() -> Result<()> {
// https://github.com/libarchive/libarchive/issues/373, https://github.com/libarchive/libarchive/issues/1426 // https://github.com/libarchive/libarchive/issues/373, https://github.com/libarchive/libarchive/issues/1426
tokio_file.seek(SeekFrom::Start(0)).await?; tokio_file.seek(SeekFrom::Start(0)).await?;
let mut file = tokio_file.try_clone().await?.into_std().await; let mut file = tokio_file.try_clone().await?.into_std().await;
match extract_with_unrar(&mut file, &pool, &db_file, &db_mod, checked_metadata).await { match extract_with_unrar(
&mut file,
&pool,
&db_file,
&db_mod,
checked_metadata,
)
.await
{
Ok(_) => Ok(()), Ok(_) => Ok(()),
Err(err) => { Err(err) => {
// unrar failed to extract rar file (e.g. archive has unicode filenames) // unrar failed to extract rar file (e.g. archive has unicode filenames)
@ -396,7 +419,8 @@ pub async fn main() -> Result<()> {
if err if err
.downcast_ref::<extractors::compress_tools::ExtractorError>() .downcast_ref::<extractors::compress_tools::ExtractorError>()
.is_some() .is_some()
&& (kind.mime_type() == "application/zip" || kind.mime_type() == "application/x-7z-compressed") && (kind.mime_type() == "application/zip"
|| kind.mime_type() == "application/x-7z-compressed")
{ {
// compress_tools or libarchive failed to extract zip/7z file (e.g. archive is deflate64 compressed) // compress_tools or libarchive failed to extract zip/7z file (e.g. archive is deflate64 compressed)
// Attempt to uncompress the archive using `7z` unix command instead // Attempt to uncompress the archive using `7z` unix command instead

View File

@ -45,21 +45,31 @@ pub async fn get_by_nexus_mod_id(
.context("Failed to get mod") .context("Failed to get mod")
} }
pub struct ModLastUpdatedFilesAt {
pub nexus_mod_id: i32,
pub last_updated_files_at: NaiveDateTime,
}
#[instrument(level = "debug", skip(pool))] #[instrument(level = "debug", skip(pool))]
pub async fn bulk_get_fully_processed_nexus_mod_ids( pub async fn bulk_get_last_updated_by_nexus_mod_ids(
pool: &sqlx::Pool<sqlx::Postgres>, pool: &sqlx::Pool<sqlx::Postgres>,
nexus_mod_ids: &[i32], nexus_mod_ids: &[i32],
) -> Result<Vec<i32>> { ) -> Result<Vec<ModLastUpdatedFilesAt>> {
sqlx::query!( sqlx::query!(
"SELECT nexus_mod_id FROM mods "SELECT nexus_mod_id, last_updated_files_at FROM mods
WHERE nexus_mod_id = ANY($1::int[]) WHERE nexus_mod_id = ANY($1::int[])
AND last_updated_files_at IS NOT NULL", AND last_updated_files_at IS NOT NULL",
nexus_mod_ids, nexus_mod_ids,
) )
.map(|row| row.nexus_mod_id) .map(|row| ModLastUpdatedFilesAt {
nexus_mod_id: row.nexus_mod_id,
last_updated_files_at: row
.last_updated_files_at
.expect("last_updated_files_at is null"),
})
.fetch_all(pool) .fetch_all(pool)
.await .await
.context("Failed to get fully processed , last_updated_files_at: () mods") .context("Failed to bulk get last_updated_files_at by nexus_mod_ids")
} }
#[instrument(level = "debug", skip(pool))] #[instrument(level = "debug", skip(pool))]

View File

@ -1,4 +1,5 @@
use anyhow::Result; use anyhow::Result;
use chrono::NaiveDate;
use reqwest::Client; use reqwest::Client;
use scraper::{Html, Selector}; use scraper::{Html, Selector};
use tracing::{info, instrument}; use tracing::{info, instrument};
@ -8,12 +9,14 @@ use crate::nexus_api::GAME_ID;
pub struct ModListResponse { pub struct ModListResponse {
html: Html, html: Html,
} }
pub struct ScrapedMod<'a> { pub struct ScrapedMod<'a> {
pub nexus_mod_id: i32, pub nexus_mod_id: i32,
pub name: &'a str, pub name: &'a str,
pub category: Option<&'a str>, pub category: Option<&'a str>,
pub author: &'a str, pub author: &'a str,
pub desc: Option<&'a str>, pub desc: Option<&'a str>,
pub last_update: NaiveDate,
} }
pub struct ModListScrape<'a> { pub struct ModListScrape<'a> {
@ -25,7 +28,7 @@ pub struct ModListScrape<'a> {
pub async fn get_mod_list_page(client: &Client, page: usize) -> Result<ModListResponse> { pub async fn get_mod_list_page(client: &Client, page: usize) -> Result<ModListResponse> {
let res = client let res = client
.get(format!( .get(format!(
"https://www.nexusmods.com/Core/Libs/Common/Widgets/ModList?RH_ModList=nav:true,home:false,type:0,user_id:0,game_id:{},advfilt:true,include_adult:true,page_size:20,show_game_filter:false,open:false,page:{},sort_by:OLD_u_downloads", "https://www.nexusmods.com/Core/Libs/Common/Widgets/ModList?RH_ModList=nav:true,home:false,type:0,user_id:0,game_id:{},advfilt:true,include_adult:true,page_size:20,show_game_filter:false,open:false,page:{},sort_by:lastupdate",
GAME_ID, GAME_ID,
page page
)) ))
@ -52,6 +55,7 @@ impl ModListResponse {
Selector::parse("div.category a").expect("failed to parse CSS selector"); Selector::parse("div.category a").expect("failed to parse CSS selector");
let author_select = Selector::parse("div.author a").expect("failed to parse CSS selector"); let author_select = Selector::parse("div.author a").expect("failed to parse CSS selector");
let desc_select = Selector::parse("p.desc").expect("failed to parse CSS selector"); let desc_select = Selector::parse("p.desc").expect("failed to parse CSS selector");
let last_update_select = Selector::parse("div.date").expect("failed to parse CSS selector");
let next_page_select = let next_page_select =
Selector::parse("div.pagination li.next").expect("failed to parse CSS selector"); Selector::parse("div.pagination li.next").expect("failed to parse CSS selector");
@ -86,9 +90,7 @@ impl ModListResponse {
.select(&category_select) .select(&category_select)
.next() .next()
.expect("Missing category link for mod"); .expect("Missing category link for mod");
let category = category_elem let category = category_elem.text().next();
.text()
.next();
let author_elem = right let author_elem = right
.select(&author_select) .select(&author_select)
.next() .next()
@ -102,6 +104,18 @@ impl ModListResponse {
.next() .next()
.expect("Missing desc elem for mod"); .expect("Missing desc elem for mod");
let desc = desc_elem.text().next(); let desc = desc_elem.text().next();
let last_update_elem = right
.select(&last_update_select)
.next()
.expect("Missing last update elem for mod");
let last_update = last_update_elem
.text()
.skip(1)
.next()
.expect("Missing last update text for mod")
.trim();
let last_update = NaiveDate::parse_from_str(last_update, "%d %b %Y")
.expect("Cannot parse last update date");
ScrapedMod { ScrapedMod {
nexus_mod_id, nexus_mod_id,
@ -109,6 +123,7 @@ impl ModListResponse {
category, category,
author, author,
desc, desc,
last_update,
} }
}) })
.collect(); .collect();