Add option for full update and default to false
When false, stop scraping pages after 50 pages of no new updates
This commit is contained in:
parent
283cad220b
commit
4d9855552b
@ -19,9 +19,14 @@ use crate::nexus_scraper;
|
|||||||
const REQUEST_TIMEOUT: Duration = Duration::from_secs(7200); // 2 hours
|
const REQUEST_TIMEOUT: Duration = Duration::from_secs(7200); // 2 hours
|
||||||
const CONNECT_TIMEOUT: Duration = Duration::from_secs(30);
|
const CONNECT_TIMEOUT: Duration = Duration::from_secs(30);
|
||||||
|
|
||||||
pub async fn update(pool: &sqlx::Pool<sqlx::Postgres>, start_page: usize) -> Result<()> {
|
pub async fn update(
|
||||||
|
pool: &sqlx::Pool<sqlx::Postgres>,
|
||||||
|
start_page: usize,
|
||||||
|
full: bool,
|
||||||
|
) -> Result<()> {
|
||||||
let mut page = start_page;
|
let mut page = start_page;
|
||||||
let mut has_next_page = true;
|
let mut has_next_page = true;
|
||||||
|
let mut pages_with_no_updates = 0;
|
||||||
|
|
||||||
let game = game::insert(&pool, GAME_NAME, GAME_ID as i32).await?;
|
let game = game::insert(&pool, GAME_NAME, GAME_ID as i32).await?;
|
||||||
|
|
||||||
@ -30,7 +35,14 @@ pub async fn update(pool: &sqlx::Pool<sqlx::Postgres>, start_page: usize) -> Res
|
|||||||
.connect_timeout(CONNECT_TIMEOUT)
|
.connect_timeout(CONNECT_TIMEOUT)
|
||||||
.build()?;
|
.build()?;
|
||||||
|
|
||||||
|
dbg!(full);
|
||||||
while has_next_page {
|
while has_next_page {
|
||||||
|
dbg!(pages_with_no_updates);
|
||||||
|
if !full && pages_with_no_updates >= 50 {
|
||||||
|
warn!("No updates found for 50 pages in a row, aborting");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
let page_span = info_span!("page", page);
|
let page_span = info_span!("page", page);
|
||||||
let _page_span = page_span.enter();
|
let _page_span = page_span.enter();
|
||||||
let mod_list_resp = nexus_scraper::get_mod_list_page(&client, page).await?;
|
let mod_list_resp = nexus_scraper::get_mod_list_page(&client, page).await?;
|
||||||
@ -88,6 +100,12 @@ pub async fn update(pool: &sqlx::Pool<sqlx::Postgres>, start_page: usize) -> Res
|
|||||||
|
|
||||||
let mods = game_mod::batched_insert(&pool, &mods_to_create_or_update).await?;
|
let mods = game_mod::batched_insert(&pool, &mods_to_create_or_update).await?;
|
||||||
|
|
||||||
|
if mods.is_empty() {
|
||||||
|
pages_with_no_updates += 1;
|
||||||
|
} else {
|
||||||
|
pages_with_no_updates = 0;
|
||||||
|
}
|
||||||
|
|
||||||
for db_mod in mods {
|
for db_mod in mods {
|
||||||
let mod_span = info_span!("mod", name = ?&db_mod.name, id = &db_mod.nexus_mod_id);
|
let mod_span = info_span!("mod", name = ?&db_mod.name, id = &db_mod.nexus_mod_id);
|
||||||
let _mod_span = mod_span.enter();
|
let _mod_span = mod_span.enter();
|
||||||
|
@ -20,9 +20,13 @@ use commands::{
|
|||||||
/// Downloads every mod off nexus mods, parses CELL and WRLD data from plugins in each, and saves the data to the database.
|
/// Downloads every mod off nexus mods, parses CELL and WRLD data from plugins in each, and saves the data to the database.
|
||||||
struct Args {
|
struct Args {
|
||||||
#[argh(option, short = 'p', default = "1")]
|
#[argh(option, short = 'p', default = "1")]
|
||||||
/// the page number to start scraping for mods on nexus mods.
|
/// the page number to start scraping for mods on nexus mods
|
||||||
page: usize,
|
page: usize,
|
||||||
|
|
||||||
|
#[argh(option, short = 'f', default = "false")]
|
||||||
|
/// enable full scrape of all pages, rather than stopping after 50 pages of no updates
|
||||||
|
full: bool,
|
||||||
|
|
||||||
/// file to output the cell mod edit counts as json
|
/// file to output the cell mod edit counts as json
|
||||||
#[argh(option, short = 'e')]
|
#[argh(option, short = 'e')]
|
||||||
dump_edits: Option<String>,
|
dump_edits: Option<String>,
|
||||||
@ -73,5 +77,5 @@ pub async fn main() -> Result<()> {
|
|||||||
return download_tiles(&dir).await;
|
return download_tiles(&dir).await;
|
||||||
}
|
}
|
||||||
|
|
||||||
return update(&pool, args.page).await;
|
return update(&pool, args.page, args.full).await;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user