Add option for full update and default to false
When false, stop scraping pages after 50 pages of no new updates
This commit is contained in:
parent
283cad220b
commit
4d9855552b
@ -19,9 +19,14 @@ use crate::nexus_scraper;
|
||||
const REQUEST_TIMEOUT: Duration = Duration::from_secs(7200); // 2 hours
|
||||
const CONNECT_TIMEOUT: Duration = Duration::from_secs(30);
|
||||
|
||||
pub async fn update(pool: &sqlx::Pool<sqlx::Postgres>, start_page: usize) -> Result<()> {
|
||||
pub async fn update(
|
||||
pool: &sqlx::Pool<sqlx::Postgres>,
|
||||
start_page: usize,
|
||||
full: bool,
|
||||
) -> Result<()> {
|
||||
let mut page = start_page;
|
||||
let mut has_next_page = true;
|
||||
let mut pages_with_no_updates = 0;
|
||||
|
||||
let game = game::insert(&pool, GAME_NAME, GAME_ID as i32).await?;
|
||||
|
||||
@ -30,7 +35,14 @@ pub async fn update(pool: &sqlx::Pool<sqlx::Postgres>, start_page: usize) -> Res
|
||||
.connect_timeout(CONNECT_TIMEOUT)
|
||||
.build()?;
|
||||
|
||||
dbg!(full);
|
||||
while has_next_page {
|
||||
dbg!(pages_with_no_updates);
|
||||
if !full && pages_with_no_updates >= 50 {
|
||||
warn!("No updates found for 50 pages in a row, aborting");
|
||||
break;
|
||||
}
|
||||
|
||||
let page_span = info_span!("page", page);
|
||||
let _page_span = page_span.enter();
|
||||
let mod_list_resp = nexus_scraper::get_mod_list_page(&client, page).await?;
|
||||
@ -88,6 +100,12 @@ pub async fn update(pool: &sqlx::Pool<sqlx::Postgres>, start_page: usize) -> Res
|
||||
|
||||
let mods = game_mod::batched_insert(&pool, &mods_to_create_or_update).await?;
|
||||
|
||||
if mods.is_empty() {
|
||||
pages_with_no_updates += 1;
|
||||
} else {
|
||||
pages_with_no_updates = 0;
|
||||
}
|
||||
|
||||
for db_mod in mods {
|
||||
let mod_span = info_span!("mod", name = ?&db_mod.name, id = &db_mod.nexus_mod_id);
|
||||
let _mod_span = mod_span.enter();
|
||||
|
@ -20,9 +20,13 @@ use commands::{
|
||||
/// Downloads every mod off nexus mods, parses CELL and WRLD data from plugins in each, and saves the data to the database.
|
||||
struct Args {
|
||||
#[argh(option, short = 'p', default = "1")]
|
||||
/// the page number to start scraping for mods on nexus mods.
|
||||
/// the page number to start scraping for mods on nexus mods
|
||||
page: usize,
|
||||
|
||||
#[argh(option, short = 'f', default = "false")]
|
||||
/// enable full scrape of all pages, rather than stopping after 50 pages of no updates
|
||||
full: bool,
|
||||
|
||||
/// file to output the cell mod edit counts as json
|
||||
#[argh(option, short = 'e')]
|
||||
dump_edits: Option<String>,
|
||||
@ -73,5 +77,5 @@ pub async fn main() -> Result<()> {
|
||||
return download_tiles(&dir).await;
|
||||
}
|
||||
|
||||
return update(&pool, args.page).await;
|
||||
return update(&pool, args.page, args.full).await;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user