From 4d9855552bfe06d793c3d0bcb183f4008014e82e Mon Sep 17 00:00:00 2001 From: Tyler Hallada Date: Fri, 11 Feb 2022 22:38:44 -0500 Subject: [PATCH] Add option for full update and default to false When false, stop scraping pages after 50 pages of no new updates --- src/commands/update.rs | 20 +++++++++++++++++++- src/main.rs | 8 ++++++-- 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/src/commands/update.rs b/src/commands/update.rs index 165d356..d8454e9 100644 --- a/src/commands/update.rs +++ b/src/commands/update.rs @@ -19,9 +19,14 @@ use crate::nexus_scraper; const REQUEST_TIMEOUT: Duration = Duration::from_secs(7200); // 2 hours const CONNECT_TIMEOUT: Duration = Duration::from_secs(30); -pub async fn update(pool: &sqlx::Pool, start_page: usize) -> Result<()> { +pub async fn update( + pool: &sqlx::Pool, + start_page: usize, + full: bool, +) -> Result<()> { let mut page = start_page; let mut has_next_page = true; + let mut pages_with_no_updates = 0; let game = game::insert(&pool, GAME_NAME, GAME_ID as i32).await?; @@ -30,7 +35,14 @@ pub async fn update(pool: &sqlx::Pool, start_page: usize) -> Res .connect_timeout(CONNECT_TIMEOUT) .build()?; + dbg!(full); while has_next_page { + dbg!(pages_with_no_updates); + if !full && pages_with_no_updates >= 50 { + warn!("No updates found for 50 pages in a row, aborting"); + break; + } + let page_span = info_span!("page", page); let _page_span = page_span.enter(); let mod_list_resp = nexus_scraper::get_mod_list_page(&client, page).await?; @@ -88,6 +100,12 @@ pub async fn update(pool: &sqlx::Pool, start_page: usize) -> Res let mods = game_mod::batched_insert(&pool, &mods_to_create_or_update).await?; + if mods.is_empty() { + pages_with_no_updates += 1; + } else { + pages_with_no_updates = 0; + } + for db_mod in mods { let mod_span = info_span!("mod", name = ?&db_mod.name, id = &db_mod.nexus_mod_id); let _mod_span = mod_span.enter(); diff --git a/src/main.rs b/src/main.rs index 4855c8c..9930118 100644 --- a/src/main.rs +++ b/src/main.rs @@ -20,9 +20,13 @@ use commands::{ /// Downloads every mod off nexus mods, parses CELL and WRLD data from plugins in each, and saves the data to the database. struct Args { #[argh(option, short = 'p', default = "1")] - /// the page number to start scraping for mods on nexus mods. + /// the page number to start scraping for mods on nexus mods page: usize, + #[argh(option, short = 'f', default = "false")] + /// enable full scrape of all pages, rather than stopping after 50 pages of no updates + full: bool, + /// file to output the cell mod edit counts as json #[argh(option, short = 'e')] dump_edits: Option, @@ -73,5 +77,5 @@ pub async fn main() -> Result<()> { return download_tiles(&dir).await; } - return update(&pool, args.page).await; + return update(&pool, args.page, args.full).await; }