Add support for scraping classic Skyrim mods
Not adding it to the update.sh script quite yet since I need to run the very long process of downloading every classic skyrim mod that currently exists on the nexus first.
This commit is contained in:
parent
89428da6e0
commit
7d229ccd1a
@ -42,7 +42,8 @@ RUST_LOG=mod_mapper=debug
|
||||
NEXUS_API_KEY=...
|
||||
```
|
||||
|
||||
7. Either run `cargo run` for development mode, or build the release binary with `cargo build --release`, which will get saved to `target/release/`.
|
||||
7. Build the release binary by running `cargo build --release`.
|
||||
8. See `./target/release/modmapper -h` for further commands or run `./scripts/update.sh` to start populating the database with scraped mods and dumping the data to JSON files.
|
||||
|
||||
## Sync and Backup Setup
|
||||
|
||||
|
@ -3,7 +3,7 @@ if [ -f cells/edits.json ]; then
|
||||
last_update_time=$(date -r cells/edits.json +'%Y-%m-%dT%H:%M:%S')
|
||||
fi
|
||||
mkdir -p logs
|
||||
./target/release/mod-mapper &>> logs/modmapper.log
|
||||
./target/release/mod-mapper -g skyrimspecialedition &>> logs/modmapper.log
|
||||
mkdir -p cells
|
||||
mkdir -p mods
|
||||
mkdir -p files
|
||||
|
@ -4,6 +4,7 @@ use tokio::time::sleep;
|
||||
use tracing::{debug, info, info_span};
|
||||
|
||||
use crate::nexus_scraper;
|
||||
use crate::nexus_api::SSE_GAME_ID;
|
||||
|
||||
const REQUEST_TIMEOUT: Duration = Duration::from_secs(7200); // 2 hours
|
||||
const CONNECT_TIMEOUT: Duration = Duration::from_secs(30);
|
||||
@ -24,7 +25,7 @@ pub async fn backfill_is_translation(pool: &sqlx::Pool<sqlx::Postgres>) -> Resul
|
||||
while has_next_page {
|
||||
let page_span = info_span!("page", page);
|
||||
let _page_span = page_span.enter();
|
||||
let mod_list_resp = nexus_scraper::get_mod_list_page(&client, page, true).await?;
|
||||
let mod_list_resp = nexus_scraper::get_mod_list_page(&client, page, SSE_GAME_ID, true).await?;
|
||||
let scraped = mod_list_resp.scrape_mods()?;
|
||||
let scraped_ids: Vec<i32> = scraped.mods.iter().map(|m| m.nexus_mod_id).collect();
|
||||
|
||||
|
@ -13,7 +13,7 @@ use crate::extractors::{self, extract_with_7zip, extract_with_compress_tools, ex
|
||||
use crate::models::file;
|
||||
use crate::models::game;
|
||||
use crate::models::{game_mod, game_mod::UnsavedMod};
|
||||
use crate::nexus_api::{self, GAME_ID, GAME_NAME};
|
||||
use crate::nexus_api::{self, get_game_id};
|
||||
use crate::nexus_scraper;
|
||||
|
||||
const REQUEST_TIMEOUT: Duration = Duration::from_secs(7200); // 2 hours
|
||||
@ -22,6 +22,7 @@ const CONNECT_TIMEOUT: Duration = Duration::from_secs(30);
|
||||
pub async fn update(
|
||||
pool: &sqlx::Pool<sqlx::Postgres>,
|
||||
start_page: usize,
|
||||
game_name: &str,
|
||||
full: bool,
|
||||
) -> Result<()> {
|
||||
for include_translations in [false, true] {
|
||||
@ -29,23 +30,24 @@ pub async fn update(
|
||||
let mut has_next_page = true;
|
||||
let mut pages_with_no_updates = 0;
|
||||
|
||||
let game = game::insert(&pool, GAME_NAME, GAME_ID as i32).await?;
|
||||
|
||||
let client = reqwest::Client::builder()
|
||||
.timeout(REQUEST_TIMEOUT)
|
||||
.connect_timeout(CONNECT_TIMEOUT)
|
||||
.build()?;
|
||||
|
||||
let game_id = get_game_id(game_name).expect("valid game name");
|
||||
let game = game::insert(&pool, game_name, game_id).await?;
|
||||
|
||||
while has_next_page {
|
||||
if !full && pages_with_no_updates >= 50 {
|
||||
warn!("No updates found for 50 pages in a row, aborting");
|
||||
break;
|
||||
}
|
||||
|
||||
let page_span = info_span!("page", page, include_translations);
|
||||
let page_span = info_span!("page", page, game_name, include_translations);
|
||||
let _page_span = page_span.enter();
|
||||
let mod_list_resp =
|
||||
nexus_scraper::get_mod_list_page(&client, page, include_translations).await?;
|
||||
nexus_scraper::get_mod_list_page(&client, page, game.nexus_game_id, include_translations).await?;
|
||||
let scraped = mod_list_resp.scrape_mods()?;
|
||||
|
||||
has_next_page = scraped.has_next_page;
|
||||
@ -109,7 +111,7 @@ pub async fn update(
|
||||
for db_mod in mods {
|
||||
let mod_span = info_span!("mod", name = ?&db_mod.name, id = &db_mod.nexus_mod_id);
|
||||
let _mod_span = mod_span.enter();
|
||||
let files_resp = nexus_api::files::get(&client, db_mod.nexus_mod_id).await?;
|
||||
let files_resp = nexus_api::files::get(&client, game_name, db_mod.nexus_mod_id).await?;
|
||||
|
||||
debug!(duration = ?files_resp.wait, "sleeping");
|
||||
sleep(files_resp.wait).await;
|
||||
@ -190,6 +192,7 @@ pub async fn update(
|
||||
info!(size = %humanized_size, "decided to download file");
|
||||
let download_link_resp = nexus_api::download_link::get(
|
||||
&client,
|
||||
game_name,
|
||||
db_mod.nexus_mod_id,
|
||||
api_file.file_id,
|
||||
)
|
||||
@ -252,6 +255,7 @@ pub async fn update(
|
||||
&pool,
|
||||
&db_file,
|
||||
&db_mod,
|
||||
game_name,
|
||||
checked_metadata,
|
||||
)
|
||||
.await
|
||||
@ -261,7 +265,7 @@ pub async fn update(
|
||||
// unrar failed to extract rar file (e.g. archive has unicode filenames)
|
||||
// Attempt to uncompress the archive using `7z` unix command instead
|
||||
warn!(error = %err, "failed to extract file with unrar, extracting whole archive with 7z instead");
|
||||
extract_with_7zip(&mut file, &pool, &db_file, &db_mod).await
|
||||
extract_with_7zip(&mut file, &pool, &db_file, &db_mod, game_name).await
|
||||
}
|
||||
}?;
|
||||
}
|
||||
@ -269,7 +273,7 @@ pub async fn update(
|
||||
tokio_file.seek(SeekFrom::Start(0)).await?;
|
||||
let mut file = tokio_file.try_clone().await?.into_std().await;
|
||||
|
||||
match extract_with_compress_tools(&mut file, &pool, &db_file, &db_mod)
|
||||
match extract_with_compress_tools(&mut file, &pool, &db_file, &db_mod, game_name)
|
||||
.await
|
||||
{
|
||||
Ok(_) => Ok(()),
|
||||
@ -284,7 +288,7 @@ pub async fn update(
|
||||
// compress_tools or libarchive failed to extract zip/7z file (e.g. archive is deflate64 compressed)
|
||||
// Attempt to uncompress the archive using `7z` unix command instead
|
||||
warn!(error = %err, "failed to extract file with compress_tools, extracting whole archive with 7z instead");
|
||||
extract_with_7zip(&mut file, &pool, &db_file, &db_mod).await
|
||||
extract_with_7zip(&mut file, &pool, &db_file, &db_mod, game_name).await
|
||||
} else {
|
||||
Err(err)
|
||||
}
|
||||
|
@ -88,13 +88,14 @@ pub async fn extract_with_compress_tools(
|
||||
pool: &sqlx::Pool<sqlx::Postgres>,
|
||||
db_file: &File,
|
||||
db_mod: &Mod,
|
||||
game_name: &str,
|
||||
) -> Result<()> {
|
||||
let extractor = Extractor::new(file);
|
||||
for plugin in extractor.into_iter() {
|
||||
let (file_path, mut plugin_buf) = plugin?;
|
||||
let plugin_span = info_span!("plugin", name = ?file_path);
|
||||
let _plugin_span = plugin_span.enter();
|
||||
process_plugin(&mut plugin_buf, &pool, &db_file, &db_mod, &file_path).await?;
|
||||
process_plugin(&mut plugin_buf, &pool, &db_file, &db_mod, &file_path, game_name).await?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
@ -14,6 +14,7 @@ pub async fn extract_with_7zip(
|
||||
pool: &sqlx::Pool<sqlx::Postgres>,
|
||||
db_file: &File,
|
||||
db_mod: &Mod,
|
||||
game_name: &str,
|
||||
) -> Result<()> {
|
||||
file.seek(SeekFrom::Start(0))?;
|
||||
let temp_dir = tempdir()?;
|
||||
@ -54,6 +55,7 @@ pub async fn extract_with_7zip(
|
||||
&db_file,
|
||||
&db_mod,
|
||||
&file_path.to_string_lossy(),
|
||||
game_name,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
|
@ -12,6 +12,7 @@ pub async fn extract_with_unrar(
|
||||
pool: &sqlx::Pool<sqlx::Postgres>,
|
||||
db_file: &File,
|
||||
db_mod: &Mod,
|
||||
game_name: &str,
|
||||
checked_metadata: bool,
|
||||
) -> Result<()> {
|
||||
let temp_dir = tempdir()?;
|
||||
@ -80,6 +81,7 @@ pub async fn extract_with_unrar(
|
||||
&db_file,
|
||||
&db_mod,
|
||||
&file_path.to_string_lossy(),
|
||||
game_name,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
|
@ -24,6 +24,10 @@ struct Args {
|
||||
/// the page number to start scraping for mods on nexus mods
|
||||
page: usize,
|
||||
|
||||
#[argh(option, short = 'g', default = "String::from(\"skyrimspecialedition\")")]
|
||||
/// name of nexus game to scrape (e.g. "skyrim" or "skyrimspecialedition")
|
||||
game: String,
|
||||
|
||||
#[argh(switch, short = 'f')]
|
||||
/// enable full scrape of all pages, rather than stopping after 50 pages of no updates
|
||||
full: bool,
|
||||
@ -110,5 +114,5 @@ pub async fn main() -> Result<()> {
|
||||
return backfill_is_translation(&pool).await;
|
||||
}
|
||||
|
||||
return update(&pool, args.page, args.full).await;
|
||||
return update(&pool, args.page, &args.game, args.full).await;
|
||||
}
|
||||
|
@ -8,7 +8,7 @@ use tokio::fs::File;
|
||||
use tokio_util::compat::FuturesAsyncReadCompatExt;
|
||||
use tracing::{info, instrument};
|
||||
|
||||
use super::{rate_limit_wait_duration, warn_and_sleep, GAME_NAME, USER_AGENT};
|
||||
use super::{rate_limit_wait_duration, warn_and_sleep, USER_AGENT};
|
||||
|
||||
pub struct DownloadLinkResponse {
|
||||
pub wait: Duration,
|
||||
@ -16,12 +16,12 @@ pub struct DownloadLinkResponse {
|
||||
}
|
||||
|
||||
#[instrument(skip(client))]
|
||||
pub async fn get(client: &Client, mod_id: i32, file_id: i64) -> Result<DownloadLinkResponse> {
|
||||
pub async fn get(client: &Client, game_name: &str, mod_id: i32, file_id: i64) -> Result<DownloadLinkResponse> {
|
||||
for attempt in 1..=3 {
|
||||
let res = match client
|
||||
.get(format!(
|
||||
"https://api.nexusmods.com/v1/games/{}/mods/{}/files/{}/download_link.json",
|
||||
GAME_NAME, mod_id, file_id
|
||||
game_name, mod_id, file_id
|
||||
))
|
||||
.header("accept", "application/json")
|
||||
.header("apikey", env::var("NEXUS_API_KEY")?)
|
||||
|
@ -5,7 +5,7 @@ use serde_json::Value;
|
||||
use std::{env, time::Duration};
|
||||
use tracing::{info, instrument};
|
||||
|
||||
use super::{rate_limit_wait_duration, warn_and_sleep, GAME_NAME, USER_AGENT};
|
||||
use super::{rate_limit_wait_duration, warn_and_sleep, USER_AGENT};
|
||||
|
||||
pub struct FilesResponse {
|
||||
pub wait: Duration,
|
||||
@ -25,12 +25,12 @@ pub struct ApiFile<'a> {
|
||||
}
|
||||
|
||||
#[instrument(skip(client))]
|
||||
pub async fn get(client: &Client, nexus_mod_id: i32) -> Result<FilesResponse> {
|
||||
pub async fn get(client: &Client, game_name: &str, nexus_mod_id: i32) -> Result<FilesResponse> {
|
||||
for attempt in 1..=3 {
|
||||
let res = match client
|
||||
.get(format!(
|
||||
"https://api.nexusmods.com/v1/games/{}/mods/{}/files.json",
|
||||
GAME_NAME, nexus_mod_id
|
||||
game_name, nexus_mod_id
|
||||
))
|
||||
.header("accept", "application/json")
|
||||
.header("apikey", env::var("NEXUS_API_KEY")?)
|
||||
|
@ -5,7 +5,7 @@ use serde_json::Value;
|
||||
use std::{env, time::Duration};
|
||||
use tracing::{info, instrument};
|
||||
|
||||
use super::{rate_limit_wait_duration, warn_and_sleep, GAME_NAME, USER_AGENT};
|
||||
use super::{rate_limit_wait_duration, warn_and_sleep, USER_AGENT};
|
||||
|
||||
pub struct ModResponse {
|
||||
pub wait: Duration,
|
||||
@ -13,12 +13,12 @@ pub struct ModResponse {
|
||||
}
|
||||
|
||||
#[instrument(skip(client))]
|
||||
pub async fn get(client: &Client, mod_id: i32) -> Result<ModResponse> {
|
||||
pub async fn get(client: &Client, game_name: &str, mod_id: i32) -> Result<ModResponse> {
|
||||
for attempt in 1..=3 {
|
||||
let res = match client
|
||||
.get(format!(
|
||||
"https://api.nexusmods.com/v1/games/{}/mods/{}.json",
|
||||
GAME_NAME, mod_id
|
||||
game_name, mod_id
|
||||
))
|
||||
.header("accept", "application/json")
|
||||
.header("apikey", env::var("NEXUS_API_KEY")?)
|
||||
|
@ -11,10 +11,20 @@ pub mod files;
|
||||
pub mod game_mod;
|
||||
pub mod metadata;
|
||||
|
||||
pub static GAME_NAME: &str = "skyrimspecialedition";
|
||||
pub const GAME_ID: u32 = 1704;
|
||||
pub const SKYRIM_GAME_NAME: &str = "skyrim";
|
||||
pub const SKYRIM_GAME_ID: i32 = 110;
|
||||
pub const SSE_GAME_NAME: &str = "skyrimspecialedition";
|
||||
pub const SSE_GAME_ID: i32 = 1704;
|
||||
pub static USER_AGENT: &str = "mod-mapper/0.1";
|
||||
|
||||
pub fn get_game_id(name: &str) -> Option<i32> {
|
||||
match name {
|
||||
SKYRIM_GAME_NAME => Some(SKYRIM_GAME_ID),
|
||||
SSE_GAME_NAME => Some(SSE_GAME_ID),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn rate_limit_wait_duration(res: &Response) -> Result<std::time::Duration> {
|
||||
let daily_remaining: i32 = res
|
||||
.headers()
|
||||
|
@ -4,8 +4,6 @@ use reqwest::Client;
|
||||
use scraper::{Html, Selector};
|
||||
use tracing::{info, instrument};
|
||||
|
||||
use crate::nexus_api::GAME_ID;
|
||||
|
||||
pub struct ModListResponse {
|
||||
html: Html,
|
||||
}
|
||||
@ -32,12 +30,13 @@ pub struct ModListScrape<'a> {
|
||||
pub async fn get_mod_list_page(
|
||||
client: &Client,
|
||||
page: usize,
|
||||
game_id: i32,
|
||||
include_translations: bool,
|
||||
) -> Result<ModListResponse> {
|
||||
let res = client
|
||||
.get(format!(
|
||||
"https://www.nexusmods.com/Core/Libs/Common/Widgets/ModList?RH_ModList=nav:true,home:false,type:0,user_id:0,game_id:{},advfilt:true,tags_{}%5B%5D:1428,include_adult:true,page_size:20,show_game_filter:false,open:false,page:{},sort_by:lastupdate",
|
||||
GAME_ID,
|
||||
game_id,
|
||||
match include_translations { true => "yes", false => "no" },
|
||||
page
|
||||
))
|
||||
|
@ -14,7 +14,6 @@ use crate::models::{plugin, plugin::UnsavedPlugin};
|
||||
use crate::models::{plugin_cell, plugin_cell::UnsavedPluginCell};
|
||||
use crate::models::{plugin_world, plugin_world::UnsavedPluginWorld};
|
||||
use crate::models::{world, world::UnsavedWorld};
|
||||
use crate::nexus_api::GAME_NAME;
|
||||
|
||||
fn get_local_form_id_and_master<'a>(
|
||||
form_id: u32,
|
||||
@ -35,6 +34,7 @@ pub async fn process_plugin(
|
||||
db_file: &File,
|
||||
db_mod: &Mod,
|
||||
file_path: &str,
|
||||
game_name: &str,
|
||||
) -> Result<()> {
|
||||
if plugin_buf.is_empty() {
|
||||
warn!("skipping processing of invalid empty plugin");
|
||||
@ -148,7 +148,7 @@ pub async fn process_plugin(
|
||||
|
||||
let plugin_path = [
|
||||
"plugins",
|
||||
GAME_NAME,
|
||||
game_name,
|
||||
&format!("{}", db_mod.nexus_mod_id),
|
||||
&format!("{}", db_file.nexus_file_id),
|
||||
file_path,
|
||||
|
Loading…
Reference in New Issue
Block a user