Fix mode page scraping

Was returning 403s for the past month.
This commit is contained in:
2023-06-30 12:30:12 -04:00
parent 694ef6e89b
commit 56864c7f29
7 changed files with 19 additions and 12 deletions

View File

@@ -3,7 +3,7 @@ use std::time::Duration;
use tokio::time::sleep;
use tracing::{debug, info, info_span};
use crate::nexus_api::SSE_GAME_ID;
use crate::nexus_api::{SSE_GAME_ID, SSE_GAME_NAME};
use crate::nexus_scraper;
const REQUEST_TIMEOUT: Duration = Duration::from_secs(7200); // 2 hours
@@ -26,7 +26,7 @@ pub async fn backfill_is_translation(pool: &sqlx::Pool<sqlx::Postgres>) -> Resul
let page_span = info_span!("page", page);
let _page_span = page_span.enter();
let mod_list_resp =
nexus_scraper::get_mod_list_page(&client, page, SSE_GAME_ID, true).await?;
nexus_scraper::get_mod_list_page(&client, page, SSE_GAME_NAME, SSE_GAME_ID, true).await?;
let scraped = mod_list_resp.scrape_mods()?;
let scraped_ids: Vec<i32> = scraped.mods.iter().map(|m| m.nexus_mod_id).collect();

View File

@@ -2,6 +2,7 @@ use anyhow::Result;
use chrono::{NaiveDateTime, NaiveTime};
use humansize::{format_size_i, DECIMAL};
use reqwest::StatusCode;
use reqwest::header::{HeaderMap, HeaderValue};
use std::collections::HashSet;
use std::io::SeekFrom;
use std::time::Duration;
@@ -13,7 +14,7 @@ use crate::extractors::{self, extract_with_7zip, extract_with_compress_tools, ex
use crate::models::file;
use crate::models::game;
use crate::models::{game_mod, game_mod::UnsavedMod};
use crate::nexus_api::{self, get_game_id};
use crate::nexus_api::{self, get_game_id, USER_AGENT};
use crate::nexus_scraper;
const REQUEST_TIMEOUT: Duration = Duration::from_secs(7200); // 2 hours
@@ -30,9 +31,12 @@ pub async fn update(
let mut has_next_page = true;
let mut pages_with_no_updates = 0;
let mut headers = HeaderMap::new();
headers.insert("user-agent", HeaderValue::from_static(USER_AGENT));
let client = reqwest::Client::builder()
.timeout(REQUEST_TIMEOUT)
.connect_timeout(CONNECT_TIMEOUT)
.default_headers(headers)
.build()?;
let game_id = get_game_id(game_name).expect("valid game name");
@@ -49,6 +53,7 @@ pub async fn update(
let mod_list_resp = nexus_scraper::get_mod_list_page(
&client,
page,
game_name,
game.nexus_game_id,
include_translations,
)