Fix mode page scraping
Was returning 403s for the past month.
This commit is contained in:
parent
694ef6e89b
commit
56864c7f29
@ -3,7 +3,7 @@ use std::time::Duration;
|
||||
use tokio::time::sleep;
|
||||
use tracing::{debug, info, info_span};
|
||||
|
||||
use crate::nexus_api::SSE_GAME_ID;
|
||||
use crate::nexus_api::{SSE_GAME_ID, SSE_GAME_NAME};
|
||||
use crate::nexus_scraper;
|
||||
|
||||
const REQUEST_TIMEOUT: Duration = Duration::from_secs(7200); // 2 hours
|
||||
@ -26,7 +26,7 @@ pub async fn backfill_is_translation(pool: &sqlx::Pool<sqlx::Postgres>) -> Resul
|
||||
let page_span = info_span!("page", page);
|
||||
let _page_span = page_span.enter();
|
||||
let mod_list_resp =
|
||||
nexus_scraper::get_mod_list_page(&client, page, SSE_GAME_ID, true).await?;
|
||||
nexus_scraper::get_mod_list_page(&client, page, SSE_GAME_NAME, SSE_GAME_ID, true).await?;
|
||||
let scraped = mod_list_resp.scrape_mods()?;
|
||||
let scraped_ids: Vec<i32> = scraped.mods.iter().map(|m| m.nexus_mod_id).collect();
|
||||
|
||||
|
@ -2,6 +2,7 @@ use anyhow::Result;
|
||||
use chrono::{NaiveDateTime, NaiveTime};
|
||||
use humansize::{format_size_i, DECIMAL};
|
||||
use reqwest::StatusCode;
|
||||
use reqwest::header::{HeaderMap, HeaderValue};
|
||||
use std::collections::HashSet;
|
||||
use std::io::SeekFrom;
|
||||
use std::time::Duration;
|
||||
@ -13,7 +14,7 @@ use crate::extractors::{self, extract_with_7zip, extract_with_compress_tools, ex
|
||||
use crate::models::file;
|
||||
use crate::models::game;
|
||||
use crate::models::{game_mod, game_mod::UnsavedMod};
|
||||
use crate::nexus_api::{self, get_game_id};
|
||||
use crate::nexus_api::{self, get_game_id, USER_AGENT};
|
||||
use crate::nexus_scraper;
|
||||
|
||||
const REQUEST_TIMEOUT: Duration = Duration::from_secs(7200); // 2 hours
|
||||
@ -30,9 +31,12 @@ pub async fn update(
|
||||
let mut has_next_page = true;
|
||||
let mut pages_with_no_updates = 0;
|
||||
|
||||
let mut headers = HeaderMap::new();
|
||||
headers.insert("user-agent", HeaderValue::from_static(USER_AGENT));
|
||||
let client = reqwest::Client::builder()
|
||||
.timeout(REQUEST_TIMEOUT)
|
||||
.connect_timeout(CONNECT_TIMEOUT)
|
||||
.default_headers(headers)
|
||||
.build()?;
|
||||
|
||||
let game_id = get_game_id(game_name).expect("valid game name");
|
||||
@ -49,6 +53,7 @@ pub async fn update(
|
||||
let mod_list_resp = nexus_scraper::get_mod_list_page(
|
||||
&client,
|
||||
page,
|
||||
game_name,
|
||||
game.nexus_game_id,
|
||||
include_translations,
|
||||
)
|
||||
|
@ -8,7 +8,7 @@ use tokio::fs::File;
|
||||
use tokio_util::compat::FuturesAsyncReadCompatExt;
|
||||
use tracing::{info, instrument};
|
||||
|
||||
use super::{rate_limit_wait_duration, warn_and_sleep, USER_AGENT};
|
||||
use super::{rate_limit_wait_duration, warn_and_sleep};
|
||||
|
||||
pub struct DownloadLinkResponse {
|
||||
pub wait: Duration,
|
||||
@ -30,7 +30,6 @@ pub async fn get(
|
||||
))
|
||||
.header("accept", "application/json")
|
||||
.header("apikey", env::var("NEXUS_API_KEY")?)
|
||||
.header("user-agent", USER_AGENT)
|
||||
.send()
|
||||
.await
|
||||
{
|
||||
@ -80,7 +79,6 @@ impl DownloadLinkResponse {
|
||||
let res = match client
|
||||
.get(self.link()?)
|
||||
.header("apikey", env::var("NEXUS_API_KEY")?)
|
||||
.header("user-agent", USER_AGENT)
|
||||
.send()
|
||||
.await
|
||||
{
|
||||
|
@ -5,7 +5,7 @@ use serde_json::Value;
|
||||
use std::{env, time::Duration};
|
||||
use tracing::{info, instrument};
|
||||
|
||||
use super::{rate_limit_wait_duration, warn_and_sleep, USER_AGENT};
|
||||
use super::{rate_limit_wait_duration, warn_and_sleep};
|
||||
|
||||
pub struct FilesResponse {
|
||||
pub wait: Duration,
|
||||
@ -34,7 +34,6 @@ pub async fn get(client: &Client, game_name: &str, nexus_mod_id: i32) -> Result<
|
||||
))
|
||||
.header("accept", "application/json")
|
||||
.header("apikey", env::var("NEXUS_API_KEY")?)
|
||||
.header("user-agent", USER_AGENT)
|
||||
.send()
|
||||
.await
|
||||
{
|
||||
|
@ -5,7 +5,7 @@ use serde_json::Value;
|
||||
use std::{env, time::Duration};
|
||||
use tracing::{info, instrument};
|
||||
|
||||
use super::{rate_limit_wait_duration, warn_and_sleep, USER_AGENT};
|
||||
use super::{rate_limit_wait_duration, warn_and_sleep};
|
||||
|
||||
pub struct ModResponse {
|
||||
pub wait: Duration,
|
||||
@ -22,7 +22,6 @@ pub async fn get(client: &Client, game_name: &str, mod_id: i32) -> Result<ModRes
|
||||
))
|
||||
.header("accept", "application/json")
|
||||
.header("apikey", env::var("NEXUS_API_KEY")?)
|
||||
.header("user-agent", USER_AGENT)
|
||||
.send()
|
||||
.await
|
||||
{
|
||||
|
@ -5,7 +5,7 @@ use std::env;
|
||||
use tracing::{info, instrument};
|
||||
|
||||
use super::files::ApiFile;
|
||||
use super::{warn_and_sleep, USER_AGENT};
|
||||
use super::warn_and_sleep;
|
||||
|
||||
fn has_plugin(json: &Value) -> Result<bool> {
|
||||
let node_type = json
|
||||
@ -50,7 +50,6 @@ pub async fn contains_plugin(client: &Client, api_file: &ApiFile<'_>) -> Result<
|
||||
.get(metadata_link)
|
||||
.header("accept", "application/json")
|
||||
.header("apikey", env::var("NEXUS_API_KEY")?)
|
||||
.header("user-agent", USER_AGENT)
|
||||
.send()
|
||||
.await
|
||||
{
|
||||
|
@ -31,6 +31,7 @@ pub struct ModListScrape<'a> {
|
||||
pub async fn get_mod_list_page(
|
||||
client: &Client,
|
||||
page: usize,
|
||||
game_name: &str,
|
||||
game_id: i32,
|
||||
include_translations: bool,
|
||||
) -> Result<ModListResponse> {
|
||||
@ -41,6 +42,12 @@ pub async fn get_mod_list_page(
|
||||
match include_translations { true => "yes", false => "no" },
|
||||
page
|
||||
))
|
||||
.header("host", "www.nexusmods.com")
|
||||
.header("referrer", format!("https://www.nexusmods.com/{}/mods/", game_name))
|
||||
.header("sec-fetch-dest", "empty")
|
||||
.header("sec-fetch-mode", "cors")
|
||||
.header("sec-fetch-site", "same-origin")
|
||||
.header("x-requested-with", "XMLHttpRequest")
|
||||
.send()
|
||||
.await?
|
||||
.error_for_status()?;
|
||||
|
Loading…
Reference in New Issue
Block a user