Start processing mod updates
This commit is contained in:
parent
0989477b40
commit
804fac4eea
48
src/main.rs
48
src/main.rs
@ -1,7 +1,8 @@
|
|||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use argh::FromArgs;
|
use argh::FromArgs;
|
||||||
|
use chrono::{NaiveDateTime, NaiveTime};
|
||||||
use dotenv::dotenv;
|
use dotenv::dotenv;
|
||||||
use humansize::{FileSize, file_size_opts};
|
use humansize::{file_size_opts, FileSize};
|
||||||
use models::file::File;
|
use models::file::File;
|
||||||
use models::game_mod::Mod;
|
use models::game_mod::Mod;
|
||||||
use reqwest::StatusCode;
|
use reqwest::StatusCode;
|
||||||
@ -160,14 +161,14 @@ async fn extract_with_unrar(
|
|||||||
Err(err) => {
|
Err(err) => {
|
||||||
warn!(error = %err, "failed to extract with unrar");
|
warn!(error = %err, "failed to extract with unrar");
|
||||||
file::update_unable_to_extract_plugins(&pool, db_file.id, true).await?;
|
file::update_unable_to_extract_plugins(&pool, db_file.id, true).await?;
|
||||||
return Ok(())
|
return Ok(());
|
||||||
}
|
}
|
||||||
Ok(extract) => extract
|
Ok(extract) => extract,
|
||||||
};
|
};
|
||||||
if let Err(err) = extract.process() {
|
if let Err(err) = extract.process() {
|
||||||
warn!(error = %err, "failed to extract with unrar");
|
warn!(error = %err, "failed to extract with unrar");
|
||||||
file::update_unable_to_extract_plugins(&pool, db_file.id, true).await?;
|
file::update_unable_to_extract_plugins(&pool, db_file.id, true).await?;
|
||||||
return Ok(())
|
return Ok(());
|
||||||
}
|
}
|
||||||
|
|
||||||
for file_path in plugin_file_paths.iter() {
|
for file_path in plugin_file_paths.iter() {
|
||||||
@ -216,7 +217,7 @@ pub async fn main() -> Result<()> {
|
|||||||
let scraped = mod_list_resp.scrape_mods()?;
|
let scraped = mod_list_resp.scrape_mods()?;
|
||||||
|
|
||||||
has_next_page = scraped.has_next_page;
|
has_next_page = scraped.has_next_page;
|
||||||
let processed_mods = game_mod::bulk_get_fully_processed_nexus_mod_ids(
|
let processed_mods = game_mod::bulk_get_last_updated_by_nexus_mod_ids(
|
||||||
&pool,
|
&pool,
|
||||||
&scraped
|
&scraped
|
||||||
.mods
|
.mods
|
||||||
@ -225,10 +226,22 @@ pub async fn main() -> Result<()> {
|
|||||||
.collect::<Vec<i32>>(),
|
.collect::<Vec<i32>>(),
|
||||||
)
|
)
|
||||||
.await?;
|
.await?;
|
||||||
let mods_to_create: Vec<UnsavedMod> = scraped
|
let mods_to_create_or_update: Vec<UnsavedMod> = scraped
|
||||||
.mods
|
.mods
|
||||||
.iter()
|
.iter()
|
||||||
.filter(|scraped_mod| !processed_mods.contains(&scraped_mod.nexus_mod_id))
|
.filter(|scraped_mod| {
|
||||||
|
if let Some(processed_mod) = processed_mods
|
||||||
|
.iter()
|
||||||
|
.find(|processed_mod| processed_mod.nexus_mod_id == scraped_mod.nexus_mod_id)
|
||||||
|
{
|
||||||
|
if processed_mod.last_updated_files_at
|
||||||
|
> NaiveDateTime::new(scraped_mod.last_update, NaiveTime::from_hms(0, 0, 0))
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
true
|
||||||
|
})
|
||||||
.map(|scraped_mod| UnsavedMod {
|
.map(|scraped_mod| UnsavedMod {
|
||||||
name: scraped_mod.name,
|
name: scraped_mod.name,
|
||||||
nexus_mod_id: scraped_mod.nexus_mod_id,
|
nexus_mod_id: scraped_mod.nexus_mod_id,
|
||||||
@ -239,7 +252,7 @@ pub async fn main() -> Result<()> {
|
|||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
let mods = game_mod::batched_insert(&pool, &mods_to_create).await?;
|
let mods = game_mod::batched_insert(&pool, &mods_to_create_or_update).await?;
|
||||||
|
|
||||||
for db_mod in mods {
|
for db_mod in mods {
|
||||||
let mod_span = info_span!("mod", name = ?&db_mod.name, id = &db_mod.nexus_mod_id);
|
let mod_span = info_span!("mod", name = ?&db_mod.name, id = &db_mod.nexus_mod_id);
|
||||||
@ -316,7 +329,9 @@ pub async fn main() -> Result<()> {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
let humanized_size = api_file.size.file_size(file_size_opts::CONVENTIONAL)
|
let humanized_size = api_file
|
||||||
|
.size
|
||||||
|
.file_size(file_size_opts::CONVENTIONAL)
|
||||||
.expect("unable to create human-readable file size");
|
.expect("unable to create human-readable file size");
|
||||||
info!(size = %humanized_size, "decided to download file");
|
info!(size = %humanized_size, "decided to download file");
|
||||||
let download_link_resp =
|
let download_link_resp =
|
||||||
@ -361,7 +376,7 @@ pub async fn main() -> Result<()> {
|
|||||||
warn!(initial_bytes = ?initial_bytes, "unable to determine file type of archive, skipping file");
|
warn!(initial_bytes = ?initial_bytes, "unable to determine file type of archive, skipping file");
|
||||||
file::update_unable_to_extract_plugins(&pool, db_file.id, true).await?;
|
file::update_unable_to_extract_plugins(&pool, db_file.id, true).await?;
|
||||||
continue;
|
continue;
|
||||||
},
|
}
|
||||||
};
|
};
|
||||||
info!(
|
info!(
|
||||||
mime_type = kind.mime_type(),
|
mime_type = kind.mime_type(),
|
||||||
@ -375,7 +390,15 @@ pub async fn main() -> Result<()> {
|
|||||||
// https://github.com/libarchive/libarchive/issues/373, https://github.com/libarchive/libarchive/issues/1426
|
// https://github.com/libarchive/libarchive/issues/373, https://github.com/libarchive/libarchive/issues/1426
|
||||||
tokio_file.seek(SeekFrom::Start(0)).await?;
|
tokio_file.seek(SeekFrom::Start(0)).await?;
|
||||||
let mut file = tokio_file.try_clone().await?.into_std().await;
|
let mut file = tokio_file.try_clone().await?.into_std().await;
|
||||||
match extract_with_unrar(&mut file, &pool, &db_file, &db_mod, checked_metadata).await {
|
match extract_with_unrar(
|
||||||
|
&mut file,
|
||||||
|
&pool,
|
||||||
|
&db_file,
|
||||||
|
&db_mod,
|
||||||
|
checked_metadata,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
{
|
||||||
Ok(_) => Ok(()),
|
Ok(_) => Ok(()),
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
// unrar failed to extract rar file (e.g. archive has unicode filenames)
|
// unrar failed to extract rar file (e.g. archive has unicode filenames)
|
||||||
@ -396,7 +419,8 @@ pub async fn main() -> Result<()> {
|
|||||||
if err
|
if err
|
||||||
.downcast_ref::<extractors::compress_tools::ExtractorError>()
|
.downcast_ref::<extractors::compress_tools::ExtractorError>()
|
||||||
.is_some()
|
.is_some()
|
||||||
&& (kind.mime_type() == "application/zip" || kind.mime_type() == "application/x-7z-compressed")
|
&& (kind.mime_type() == "application/zip"
|
||||||
|
|| kind.mime_type() == "application/x-7z-compressed")
|
||||||
{
|
{
|
||||||
// compress_tools or libarchive failed to extract zip/7z file (e.g. archive is deflate64 compressed)
|
// compress_tools or libarchive failed to extract zip/7z file (e.g. archive is deflate64 compressed)
|
||||||
// Attempt to uncompress the archive using `7z` unix command instead
|
// Attempt to uncompress the archive using `7z` unix command instead
|
||||||
|
@ -45,21 +45,31 @@ pub async fn get_by_nexus_mod_id(
|
|||||||
.context("Failed to get mod")
|
.context("Failed to get mod")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub struct ModLastUpdatedFilesAt {
|
||||||
|
pub nexus_mod_id: i32,
|
||||||
|
pub last_updated_files_at: NaiveDateTime,
|
||||||
|
}
|
||||||
|
|
||||||
#[instrument(level = "debug", skip(pool))]
|
#[instrument(level = "debug", skip(pool))]
|
||||||
pub async fn bulk_get_fully_processed_nexus_mod_ids(
|
pub async fn bulk_get_last_updated_by_nexus_mod_ids(
|
||||||
pool: &sqlx::Pool<sqlx::Postgres>,
|
pool: &sqlx::Pool<sqlx::Postgres>,
|
||||||
nexus_mod_ids: &[i32],
|
nexus_mod_ids: &[i32],
|
||||||
) -> Result<Vec<i32>> {
|
) -> Result<Vec<ModLastUpdatedFilesAt>> {
|
||||||
sqlx::query!(
|
sqlx::query!(
|
||||||
"SELECT nexus_mod_id FROM mods
|
"SELECT nexus_mod_id, last_updated_files_at FROM mods
|
||||||
WHERE nexus_mod_id = ANY($1::int[])
|
WHERE nexus_mod_id = ANY($1::int[])
|
||||||
AND last_updated_files_at IS NOT NULL",
|
AND last_updated_files_at IS NOT NULL",
|
||||||
nexus_mod_ids,
|
nexus_mod_ids,
|
||||||
)
|
)
|
||||||
.map(|row| row.nexus_mod_id)
|
.map(|row| ModLastUpdatedFilesAt {
|
||||||
|
nexus_mod_id: row.nexus_mod_id,
|
||||||
|
last_updated_files_at: row
|
||||||
|
.last_updated_files_at
|
||||||
|
.expect("last_updated_files_at is null"),
|
||||||
|
})
|
||||||
.fetch_all(pool)
|
.fetch_all(pool)
|
||||||
.await
|
.await
|
||||||
.context("Failed to get fully processed , last_updated_files_at: () mods")
|
.context("Failed to bulk get last_updated_files_at by nexus_mod_ids")
|
||||||
}
|
}
|
||||||
|
|
||||||
#[instrument(level = "debug", skip(pool))]
|
#[instrument(level = "debug", skip(pool))]
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
|
use chrono::NaiveDate;
|
||||||
use reqwest::Client;
|
use reqwest::Client;
|
||||||
use scraper::{Html, Selector};
|
use scraper::{Html, Selector};
|
||||||
use tracing::{info, instrument};
|
use tracing::{info, instrument};
|
||||||
@ -8,12 +9,14 @@ use crate::nexus_api::GAME_ID;
|
|||||||
pub struct ModListResponse {
|
pub struct ModListResponse {
|
||||||
html: Html,
|
html: Html,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct ScrapedMod<'a> {
|
pub struct ScrapedMod<'a> {
|
||||||
pub nexus_mod_id: i32,
|
pub nexus_mod_id: i32,
|
||||||
pub name: &'a str,
|
pub name: &'a str,
|
||||||
pub category: Option<&'a str>,
|
pub category: Option<&'a str>,
|
||||||
pub author: &'a str,
|
pub author: &'a str,
|
||||||
pub desc: Option<&'a str>,
|
pub desc: Option<&'a str>,
|
||||||
|
pub last_update: NaiveDate,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct ModListScrape<'a> {
|
pub struct ModListScrape<'a> {
|
||||||
@ -25,7 +28,7 @@ pub struct ModListScrape<'a> {
|
|||||||
pub async fn get_mod_list_page(client: &Client, page: usize) -> Result<ModListResponse> {
|
pub async fn get_mod_list_page(client: &Client, page: usize) -> Result<ModListResponse> {
|
||||||
let res = client
|
let res = client
|
||||||
.get(format!(
|
.get(format!(
|
||||||
"https://www.nexusmods.com/Core/Libs/Common/Widgets/ModList?RH_ModList=nav:true,home:false,type:0,user_id:0,game_id:{},advfilt:true,include_adult:true,page_size:20,show_game_filter:false,open:false,page:{},sort_by:OLD_u_downloads",
|
"https://www.nexusmods.com/Core/Libs/Common/Widgets/ModList?RH_ModList=nav:true,home:false,type:0,user_id:0,game_id:{},advfilt:true,include_adult:true,page_size:20,show_game_filter:false,open:false,page:{},sort_by:lastupdate",
|
||||||
GAME_ID,
|
GAME_ID,
|
||||||
page
|
page
|
||||||
))
|
))
|
||||||
@ -52,6 +55,7 @@ impl ModListResponse {
|
|||||||
Selector::parse("div.category a").expect("failed to parse CSS selector");
|
Selector::parse("div.category a").expect("failed to parse CSS selector");
|
||||||
let author_select = Selector::parse("div.author a").expect("failed to parse CSS selector");
|
let author_select = Selector::parse("div.author a").expect("failed to parse CSS selector");
|
||||||
let desc_select = Selector::parse("p.desc").expect("failed to parse CSS selector");
|
let desc_select = Selector::parse("p.desc").expect("failed to parse CSS selector");
|
||||||
|
let last_update_select = Selector::parse("div.date").expect("failed to parse CSS selector");
|
||||||
let next_page_select =
|
let next_page_select =
|
||||||
Selector::parse("div.pagination li.next").expect("failed to parse CSS selector");
|
Selector::parse("div.pagination li.next").expect("failed to parse CSS selector");
|
||||||
|
|
||||||
@ -86,9 +90,7 @@ impl ModListResponse {
|
|||||||
.select(&category_select)
|
.select(&category_select)
|
||||||
.next()
|
.next()
|
||||||
.expect("Missing category link for mod");
|
.expect("Missing category link for mod");
|
||||||
let category = category_elem
|
let category = category_elem.text().next();
|
||||||
.text()
|
|
||||||
.next();
|
|
||||||
let author_elem = right
|
let author_elem = right
|
||||||
.select(&author_select)
|
.select(&author_select)
|
||||||
.next()
|
.next()
|
||||||
@ -102,6 +104,18 @@ impl ModListResponse {
|
|||||||
.next()
|
.next()
|
||||||
.expect("Missing desc elem for mod");
|
.expect("Missing desc elem for mod");
|
||||||
let desc = desc_elem.text().next();
|
let desc = desc_elem.text().next();
|
||||||
|
let last_update_elem = right
|
||||||
|
.select(&last_update_select)
|
||||||
|
.next()
|
||||||
|
.expect("Missing last update elem for mod");
|
||||||
|
let last_update = last_update_elem
|
||||||
|
.text()
|
||||||
|
.skip(1)
|
||||||
|
.next()
|
||||||
|
.expect("Missing last update text for mod")
|
||||||
|
.trim();
|
||||||
|
let last_update = NaiveDate::parse_from_str(last_update, "%d %b %Y")
|
||||||
|
.expect("Cannot parse last update date");
|
||||||
|
|
||||||
ScrapedMod {
|
ScrapedMod {
|
||||||
nexus_mod_id,
|
nexus_mod_id,
|
||||||
@ -109,6 +123,7 @@ impl ModListResponse {
|
|||||||
category,
|
category,
|
||||||
author,
|
author,
|
||||||
desc,
|
desc,
|
||||||
|
last_update,
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
|
Loading…
Reference in New Issue
Block a user