From b8c5f63aebced8f55a62073011ff50bd8dcf529c Mon Sep 17 00:00:00 2001 From: Tyler Hallada Date: Tue, 8 Feb 2022 00:02:26 -0500 Subject: [PATCH] Refactor main.rs into separate files Create new `commands` mod with file for each command line option. Create a couple more extractor files for moving 7zip and unrar methods into. --- src/commands/dump_cell_data.rs | 22 ++ src/commands/dump_cell_edit_counts.rs | 20 + src/commands/dump_mod_data.rs | 27 ++ src/commands/dump_mod_search_index.rs | 34 ++ src/commands/mod.rs | 11 + src/commands/update.rs | 284 ++++++++++++++ src/extractors/compress_tools.rs | 20 + src/extractors/mod.rs | 6 + src/extractors/seven_zip.rs | 61 +++ src/extractors/unrar.rs | 88 +++++ src/main.rs | 527 +------------------------- 11 files changed, 589 insertions(+), 511 deletions(-) create mode 100644 src/commands/dump_cell_data.rs create mode 100644 src/commands/dump_cell_edit_counts.rs create mode 100644 src/commands/dump_mod_data.rs create mode 100644 src/commands/dump_mod_search_index.rs create mode 100644 src/commands/mod.rs create mode 100644 src/commands/update.rs create mode 100644 src/extractors/seven_zip.rs create mode 100644 src/extractors/unrar.rs diff --git a/src/commands/dump_cell_data.rs b/src/commands/dump_cell_data.rs new file mode 100644 index 0000000..1de5dfe --- /dev/null +++ b/src/commands/dump_cell_data.rs @@ -0,0 +1,22 @@ +use anyhow::Result; +use std::fs::{create_dir_all, File}; +use std::io::Write; +use std::path::Path; + +use crate::models::cell; + +pub async fn dump_cell_data(pool: &sqlx::Pool, dir: &str) -> Result<()> { + for x in -77..75 { + for y in -50..44 { + if let Ok(data) = cell::get_cell_data(&pool, "Skyrim.esm", 1, x, y).await { + let path = format!("{}/{}", &dir, x); + let path = Path::new(&path); + create_dir_all(&path)?; + let path = path.join(format!("{}.json", y)); + let mut file = File::create(path)?; + write!(file, "{}", serde_json::to_string(&data)?)?; + } + } + } + return Ok(()); +} diff --git a/src/commands/dump_cell_edit_counts.rs b/src/commands/dump_cell_edit_counts.rs new file mode 100644 index 0000000..f7b3b94 --- /dev/null +++ b/src/commands/dump_cell_edit_counts.rs @@ -0,0 +1,20 @@ +use anyhow::Result; +use std::collections::HashMap; +use std::fs::File; +use std::io::Write; + +use crate::models::cell; + +pub async fn dump_cell_edit_counts(pool: &sqlx::Pool, path: &str) -> Result<()> { + let mut cell_mod_edit_counts = HashMap::new(); + for x in -77..75 { + for y in -50..44 { + if let Some(count) = cell::count_mod_edits(&pool, "Skyrim.esm", 1, x, y).await? { + cell_mod_edit_counts.insert(format!("{},{}", x, y), count); + } + } + } + let mut file = File::create(path)?; + write!(file, "{}", serde_json::to_string(&cell_mod_edit_counts)?)?; + return Ok(()); +} diff --git a/src/commands/dump_mod_data.rs b/src/commands/dump_mod_data.rs new file mode 100644 index 0000000..598b8a6 --- /dev/null +++ b/src/commands/dump_mod_data.rs @@ -0,0 +1,27 @@ +use anyhow::Result; +use std::fs::File; +use std::io::Write; +use std::path::Path; + +use crate::models::game_mod; + +pub async fn dump_mod_data(pool: &sqlx::Pool, dir: &str) -> Result<()> { + let page_size = 20; + let mut last_id = None; + loop { + let mods = + game_mod::batched_get_with_cells(&pool, page_size, last_id, "Skyrim.esm", 1).await?; + if mods.is_empty() { + break; + } + for mod_with_cells in mods { + let path = Path::new(&dir); + std::fs::create_dir_all(&path)?; + let path = path.join(format!("{}.json", mod_with_cells.nexus_mod_id)); + let mut file = File::create(path)?; + write!(file, "{}", serde_json::to_string(&mod_with_cells)?)?; + last_id = Some(mod_with_cells.id); + } + } + return Ok(()); +} diff --git a/src/commands/dump_mod_search_index.rs b/src/commands/dump_mod_search_index.rs new file mode 100644 index 0000000..98d2c7c --- /dev/null +++ b/src/commands/dump_mod_search_index.rs @@ -0,0 +1,34 @@ +use anyhow::Result; +use serde::Serialize; +use std::fs::File; +use std::io::Write; + +use crate::models::game_mod; + +#[derive(Serialize)] +struct ModForSearchIdTranslated { + name: String, + id: i32, +} + +pub async fn dump_mod_search_index(pool: &sqlx::Pool, path: &str) -> Result<()> { + let mut search_index = vec![]; + let page_size = 20; + let mut last_id = None; + loop { + let mods = game_mod::batched_get_for_search(&pool, page_size, last_id).await?; + if mods.is_empty() { + break; + } + for mod_for_search in mods { + search_index.push(ModForSearchIdTranslated { + name: mod_for_search.name, + id: mod_for_search.nexus_mod_id, + }); + last_id = Some(mod_for_search.id); + } + } + let mut file = File::create(path)?; + write!(file, "{}", serde_json::to_string(&search_index)?)?; + return Ok(()); +} diff --git a/src/commands/mod.rs b/src/commands/mod.rs new file mode 100644 index 0000000..79059c6 --- /dev/null +++ b/src/commands/mod.rs @@ -0,0 +1,11 @@ +pub mod dump_cell_data; +pub mod dump_cell_edit_counts; +pub mod dump_mod_data; +pub mod dump_mod_search_index; +pub mod update; + +pub use dump_cell_data::dump_cell_data; +pub use dump_cell_edit_counts::dump_cell_edit_counts; +pub use dump_mod_data::dump_mod_data; +pub use dump_mod_search_index::dump_mod_search_index; +pub use update::update; diff --git a/src/commands/update.rs b/src/commands/update.rs new file mode 100644 index 0000000..165d356 --- /dev/null +++ b/src/commands/update.rs @@ -0,0 +1,284 @@ +use anyhow::Result; +use chrono::{NaiveDateTime, NaiveTime}; +use humansize::{file_size_opts, FileSize}; +use reqwest::StatusCode; +use std::collections::HashSet; +use std::io::SeekFrom; +use std::time::Duration; +use tokio::io::{AsyncReadExt, AsyncSeekExt}; +use tokio::time::sleep; +use tracing::{debug, info, info_span, warn}; + +use crate::extractors::{self, extract_with_7zip, extract_with_compress_tools, extract_with_unrar}; +use crate::models::file; +use crate::models::game; +use crate::models::{game_mod, game_mod::UnsavedMod}; +use crate::nexus_api::{self, GAME_ID, GAME_NAME}; +use crate::nexus_scraper; + +const REQUEST_TIMEOUT: Duration = Duration::from_secs(7200); // 2 hours +const CONNECT_TIMEOUT: Duration = Duration::from_secs(30); + +pub async fn update(pool: &sqlx::Pool, start_page: usize) -> Result<()> { + let mut page = start_page; + let mut has_next_page = true; + + let game = game::insert(&pool, GAME_NAME, GAME_ID as i32).await?; + + let client = reqwest::Client::builder() + .timeout(REQUEST_TIMEOUT) + .connect_timeout(CONNECT_TIMEOUT) + .build()?; + + while has_next_page { + let page_span = info_span!("page", page); + let _page_span = page_span.enter(); + let mod_list_resp = nexus_scraper::get_mod_list_page(&client, page).await?; + let scraped = mod_list_resp.scrape_mods()?; + + has_next_page = scraped.has_next_page; + let processed_mods = game_mod::bulk_get_last_updated_by_nexus_mod_ids( + &pool, + &scraped + .mods + .iter() + .map(|scraped_mod| scraped_mod.nexus_mod_id) + .collect::>(), + ) + .await?; + let mods_to_create_or_update: Vec = scraped + .mods + .iter() + .filter(|scraped_mod| { + if let Some(processed_mod) = processed_mods + .iter() + .find(|processed_mod| processed_mod.nexus_mod_id == scraped_mod.nexus_mod_id) + { + if processed_mod.last_updated_files_at + > NaiveDateTime::new( + scraped_mod.last_update_at, + NaiveTime::from_hms(0, 0, 0), + ) + { + return false; + } + } + true + }) + .map(|scraped_mod| UnsavedMod { + name: scraped_mod.name, + nexus_mod_id: scraped_mod.nexus_mod_id, + author_name: scraped_mod.author_name, + author_id: scraped_mod.author_id, + category_name: scraped_mod.category_name, + category_id: scraped_mod.category_id, + description: scraped_mod.desc, + thumbnail_link: scraped_mod.thumbnail_link, + game_id: game.id, + last_update_at: NaiveDateTime::new( + scraped_mod.last_update_at, + NaiveTime::from_hms(0, 0, 0), + ), + first_upload_at: NaiveDateTime::new( + scraped_mod.first_upload_at, + NaiveTime::from_hms(0, 0, 0), + ), + }) + .collect(); + + let mods = game_mod::batched_insert(&pool, &mods_to_create_or_update).await?; + + for db_mod in mods { + let mod_span = info_span!("mod", name = ?&db_mod.name, id = &db_mod.nexus_mod_id); + let _mod_span = mod_span.enter(); + let files_resp = nexus_api::files::get(&client, db_mod.nexus_mod_id).await?; + + debug!(duration = ?files_resp.wait, "sleeping"); + sleep(files_resp.wait).await; + + // Filter out replaced/deleted files (indicated by null category) and archived files + let files = files_resp + .files()? + .into_iter() + .filter(|file| match file.category { + None => { + info!( + name = file.file_name, + id = file.file_id, + "skipping file with no category" + ); + false + } + Some(category) if category == "ARCHIVED" => false, + Some(_) => true, + }); + + let processed_file_ids: HashSet = + file::get_processed_nexus_file_ids_by_mod_id(&pool, db_mod.id) + .await? + .into_iter() + .collect(); + + for api_file in files { + let file_span = + info_span!("file", name = &api_file.file_name, id = &api_file.file_id,); + let _file_span = file_span.enter(); + + if processed_file_ids.contains(&(api_file.file_id as i32)) { + info!("skipping file already present and processed in database"); + continue; + } + let db_file = file::insert( + &pool, + &file::UnsavedFile { + name: api_file.name, + file_name: api_file.file_name, + nexus_file_id: api_file.file_id as i32, + mod_id: db_mod.id, + category: api_file.category, + version: api_file.version, + mod_version: api_file.mod_version, + size: api_file.size, + uploaded_at: api_file.uploaded_at, + }, + ) + .await?; + + let mut checked_metadata = false; + match nexus_api::metadata::contains_plugin(&client, &api_file).await { + Ok(contains_plugin) => { + if let Some(contains_plugin) = contains_plugin { + checked_metadata = true; + if !contains_plugin { + info!("file metadata does not contain a plugin, skip downloading"); + file::update_has_plugin(&pool, db_file.id, false).await?; + continue; + } + } else { + warn!("file has no metadata link, continuing with download"); + } + } + Err(err) => { + warn!(error = %err, "error retreiving metadata for file, continuing with download"); + } + }; + + let humanized_size = api_file + .size + .file_size(file_size_opts::CONVENTIONAL) + .expect("unable to create human-readable file size"); + info!(size = %humanized_size, "decided to download file"); + let download_link_resp = + nexus_api::download_link::get(&client, db_mod.nexus_mod_id, api_file.file_id) + .await; + if let Err(err) = &download_link_resp { + if let Some(reqwest_err) = err.downcast_ref::() { + if reqwest_err.status() == Some(StatusCode::NOT_FOUND) { + warn!( + status = ?reqwest_err.status(), + "failed to get download link for file, skipping file" + ); + file::update_has_download_link(&pool, db_file.id, false).await?; + continue; + } + } + } + let download_link_resp = download_link_resp?; + + let mut tokio_file = match download_link_resp.download_file(&client).await { + Ok(file) => { + info!(bytes = api_file.size, "download finished"); + file::update_downloaded_at(&pool, db_file.id).await?; + file + } + Err(err) => { + warn!(error = %err, "failed all attempts at downloading file, skipping file"); + continue; + } + }; + + let mut initial_bytes = [0; 8]; + tokio_file.seek(SeekFrom::Start(0)).await?; + if let Err(err) = tokio_file.read_exact(&mut initial_bytes).await { + warn!(error = %err, "failed to read initial bytes, skipping file"); + file::update_unable_to_extract_plugins(&pool, db_file.id, true).await?; + continue; + } + let kind = match infer::get(&initial_bytes) { + Some(kind) => kind, + None => { + warn!(initial_bytes = ?initial_bytes, "unable to determine file type of archive, skipping file"); + file::update_unable_to_extract_plugins(&pool, db_file.id, true).await?; + continue; + } + }; + info!( + mime_type = kind.mime_type(), + "inferred mime_type of downloaded archive" + ); + + match kind.mime_type() { + "application/vnd.rar" => { + info!("downloaded archive is RAR archive, attempt to uncompress entire archive"); + // Use unrar to uncompress the entire .rar file to avoid bugs with compress_tools uncompressing certain .rar files: + // https://github.com/libarchive/libarchive/issues/373, https://github.com/libarchive/libarchive/issues/1426 + tokio_file.seek(SeekFrom::Start(0)).await?; + let mut file = tokio_file.try_clone().await?.into_std().await; + match extract_with_unrar( + &mut file, + &pool, + &db_file, + &db_mod, + checked_metadata, + ) + .await + { + Ok(_) => Ok(()), + Err(err) => { + // unrar failed to extract rar file (e.g. archive has unicode filenames) + // Attempt to uncompress the archive using `7z` unix command instead + warn!(error = %err, "failed to extract file with unrar, extracting whole archive with 7z instead"); + extract_with_7zip(&mut file, &pool, &db_file, &db_mod).await + } + }?; + } + _ => { + tokio_file.seek(SeekFrom::Start(0)).await?; + let mut file = tokio_file.try_clone().await?.into_std().await; + + match extract_with_compress_tools(&mut file, &pool, &db_file, &db_mod).await + { + Ok(_) => Ok(()), + Err(err) => { + if err + .downcast_ref::() + .is_some() + && (kind.mime_type() == "application/zip" + || kind.mime_type() == "application/x-7z-compressed") + { + // compress_tools or libarchive failed to extract zip/7z file (e.g. archive is deflate64 compressed) + // Attempt to uncompress the archive using `7z` unix command instead + warn!(error = %err, "failed to extract file with compress_tools, extracting whole archive with 7z instead"); + extract_with_7zip(&mut file, &pool, &db_file, &db_mod).await + } else { + Err(err) + } + } + }?; + } + } + + debug!(duration = ?download_link_resp.wait, "sleeping"); + sleep(download_link_resp.wait).await; + } + + game_mod::update_last_updated_files_at(&pool, db_mod.id).await?; + } + + page += 1; + debug!(?page, ?has_next_page, "sleeping 1 second"); + sleep(Duration::from_secs(1)).await; + } + + Ok(()) +} diff --git a/src/extractors/compress_tools.rs b/src/extractors/compress_tools.rs index 7e9a883..127337c 100644 --- a/src/extractors/compress_tools.rs +++ b/src/extractors/compress_tools.rs @@ -7,6 +7,10 @@ use std::io::Seek; use std::io::SeekFrom; use tracing::{info, info_span}; +use crate::models::file::File; +use crate::models::game_mod::Mod; +use crate::plugin_processor::process_plugin; + #[derive(Debug)] pub struct ExtractorError; @@ -78,3 +82,19 @@ impl<'a> Iterator for Extractor<'a> { None } } + +pub async fn extract_with_compress_tools( + file: &mut std::fs::File, + pool: &sqlx::Pool, + db_file: &File, + db_mod: &Mod, +) -> Result<()> { + let extractor = Extractor::new(file); + for plugin in extractor.into_iter() { + let (file_path, mut plugin_buf) = plugin?; + let plugin_span = info_span!("plugin", name = ?file_path); + let _plugin_span = plugin_span.enter(); + process_plugin(&mut plugin_buf, &pool, &db_file, &db_mod, &file_path).await?; + } + Ok(()) +} diff --git a/src/extractors/mod.rs b/src/extractors/mod.rs index 440fb79..3f83232 100644 --- a/src/extractors/mod.rs +++ b/src/extractors/mod.rs @@ -1 +1,7 @@ pub mod compress_tools; +pub mod seven_zip; +pub mod unrar; + +pub use self::compress_tools::extract_with_compress_tools; +pub use self::unrar::extract_with_unrar; +pub use seven_zip::extract_with_7zip; diff --git a/src/extractors/seven_zip.rs b/src/extractors/seven_zip.rs new file mode 100644 index 0000000..13f2cb8 --- /dev/null +++ b/src/extractors/seven_zip.rs @@ -0,0 +1,61 @@ +use anyhow::Result; +use std::io::{Seek, SeekFrom}; +use std::process::Command; +use tempfile::tempdir; +use tracing::{info, info_span}; +use walkdir::WalkDir; + +use crate::models::file::File; +use crate::models::game_mod::Mod; +use crate::plugin_processor::process_plugin; + +pub async fn extract_with_7zip( + file: &mut std::fs::File, + pool: &sqlx::Pool, + db_file: &File, + db_mod: &Mod, +) -> Result<()> { + file.seek(SeekFrom::Start(0))?; + let temp_dir = tempdir()?; + let temp_file_path = temp_dir.path().join("download.zip"); + let mut temp_file = std::fs::File::create(&temp_file_path)?; + std::io::copy(file, &mut temp_file)?; + drop(temp_file); // close handle to temp file so 7zip process can open it + let extracted_path = temp_dir.path().join("extracted"); + + Command::new("7z") + .args(&[ + "x", + &format!("-o{}", &extracted_path.to_string_lossy()), + &temp_file_path.to_string_lossy().to_string(), + ]) + .status()?; + + for entry in WalkDir::new(&extracted_path) + .contents_first(true) + .into_iter() + .filter_entry(|e| { + if let Some(extension) = e.path().extension() { + extension == "esp" || extension == "esm" || extension == "esl" + } else { + false + } + }) + { + let entry = entry?; + let file_path = entry.path(); + let plugin_span = info_span!("plugin", name = ?file_path); + let _plugin_span = plugin_span.enter(); + info!("processing uncompressed file from downloaded archive"); + let mut plugin_buf = std::fs::read(extracted_path.join(file_path))?; + process_plugin( + &mut plugin_buf, + &pool, + &db_file, + &db_mod, + &file_path.to_string_lossy(), + ) + .await?; + } + Ok(()) +} diff --git a/src/extractors/unrar.rs b/src/extractors/unrar.rs new file mode 100644 index 0000000..d53ee18 --- /dev/null +++ b/src/extractors/unrar.rs @@ -0,0 +1,88 @@ +use anyhow::Result; +use tempfile::tempdir; +use tracing::{error, info, warn}; +use unrar::Archive; + +use crate::models::file::{self, File}; +use crate::models::game_mod::Mod; +use crate::plugin_processor::process_plugin; + +pub async fn extract_with_unrar( + file: &mut std::fs::File, + pool: &sqlx::Pool, + db_file: &File, + db_mod: &Mod, + checked_metadata: bool, +) -> Result<()> { + let temp_dir = tempdir()?; + let temp_file_path = temp_dir.path().join("download.rar"); + let mut temp_file = std::fs::File::create(&temp_file_path)?; + std::io::copy(file, &mut temp_file)?; + + let mut plugin_file_paths = Vec::new(); + let list = Archive::new(&temp_file_path.to_string_lossy().to_string())?.list(); + match list { + Ok(list) => { + for entry in list.flatten() { + if let Some(extension) = entry.filename.extension() { + if entry.is_file() + && (extension == "esp" || extension == "esm" || extension == "esl") + { + plugin_file_paths.push(entry.filename); + } + } + } + } + Err(_) => { + if !checked_metadata { + warn!("failed to read archive and server has no metadata, skipping file"); + file::update_unable_to_extract_plugins(&pool, db_file.id, true).await?; + return Ok(()); + } else { + error!("failed to read archive, but server had metadata"); + panic!("failed to read archive, but server had metadata"); + } + } + } + info!( + num_plugin_files = plugin_file_paths.len(), + "listed plugins in downloaded archive" + ); + + if !plugin_file_paths.is_empty() { + info!("uncompressing downloaded archive"); + let extract = Archive::new(&temp_file_path.to_string_lossy().to_string())? + .extract_to(temp_dir.path().to_string_lossy().to_string()); + + let mut extract = match extract { + Err(err) => { + warn!(error = %err, "failed to extract with unrar"); + file::update_unable_to_extract_plugins(&pool, db_file.id, true).await?; + return Ok(()); + } + Ok(extract) => extract, + }; + if let Err(err) = extract.process() { + warn!(error = %err, "failed to extract with unrar"); + file::update_unable_to_extract_plugins(&pool, db_file.id, true).await?; + return Ok(()); + } + + for file_path in plugin_file_paths.iter() { + info!( + ?file_path, + "processing uncompressed file from downloaded archive" + ); + let mut plugin_buf = std::fs::read(temp_dir.path().join(file_path))?; + process_plugin( + &mut plugin_buf, + &pool, + &db_file, + &db_mod, + &file_path.to_string_lossy(), + ) + .await?; + } + } + Ok(()) +} diff --git a/src/main.rs b/src/main.rs index 0c61dc5..84ab0f5 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,42 +1,19 @@ use anyhow::Result; use argh::FromArgs; -use chrono::{NaiveDateTime, NaiveTime}; use dotenv::dotenv; -use humansize::{file_size_opts, FileSize}; -use models::file::File; -use models::game_mod::Mod; -use reqwest::StatusCode; -use serde::Serialize; use sqlx::postgres::PgPoolOptions; -use std::collections::{HashMap, HashSet}; use std::env; -use std::io::Seek; -use std::io::SeekFrom; -use std::io::Write; -use std::process::Command; -use std::time::Duration; -use tempfile::tempdir; -use tokio::io::{AsyncReadExt, AsyncSeekExt}; -use tokio::time::sleep; -use tracing::{debug, error, info, info_span, warn}; -use unrar::Archive; -use walkdir::WalkDir; +mod commands; mod extractors; mod models; mod nexus_api; mod nexus_scraper; mod plugin_processor; -use models::cell; -use models::file; -use models::game; -use models::{game_mod, game_mod::UnsavedMod}; -use nexus_api::{GAME_ID, GAME_NAME}; -use plugin_processor::process_plugin; - -const REQUEST_TIMEOUT: Duration = Duration::from_secs(7200); // 2 hours -const CONNECT_TIMEOUT: Duration = Duration::from_secs(30); +use commands::{ + dump_cell_data, dump_cell_edit_counts, dump_mod_data, dump_mod_search_index, update, +}; #[derive(FromArgs)] /// Downloads every mod off nexus mods, parses CELL and WRLD data from plugins in each, and saves the data to the database. @@ -62,153 +39,6 @@ struct Args { mod_search_index: Option, } -async fn extract_with_compress_tools( - file: &mut std::fs::File, - pool: &sqlx::Pool, - db_file: &File, - db_mod: &Mod, -) -> Result<()> { - let extractor = extractors::compress_tools::Extractor::new(file); - for plugin in extractor.into_iter() { - let (file_path, mut plugin_buf) = plugin?; - let plugin_span = info_span!("plugin", name = ?file_path); - let _plugin_span = plugin_span.enter(); - process_plugin(&mut plugin_buf, &pool, &db_file, &db_mod, &file_path).await?; - } - Ok(()) -} - -async fn extract_with_7zip( - file: &mut std::fs::File, - pool: &sqlx::Pool, - db_file: &File, - db_mod: &Mod, -) -> Result<()> { - file.seek(SeekFrom::Start(0))?; - let temp_dir = tempdir()?; - let temp_file_path = temp_dir.path().join("download.zip"); - let mut temp_file = std::fs::File::create(&temp_file_path)?; - std::io::copy(file, &mut temp_file)?; - drop(temp_file); // close handle to temp file so 7zip process can open it - let extracted_path = temp_dir.path().join("extracted"); - - Command::new("7z") - .args(&[ - "x", - &format!("-o{}", &extracted_path.to_string_lossy()), - &temp_file_path.to_string_lossy().to_string(), - ]) - .status()?; - - for entry in WalkDir::new(&extracted_path) - .contents_first(true) - .into_iter() - .filter_entry(|e| { - if let Some(extension) = e.path().extension() { - extension == "esp" || extension == "esm" || extension == "esl" - } else { - false - } - }) - { - let entry = entry?; - let file_path = entry.path(); - let plugin_span = info_span!("plugin", name = ?file_path); - let _plugin_span = plugin_span.enter(); - info!("processing uncompressed file from downloaded archive"); - let mut plugin_buf = std::fs::read(extracted_path.join(file_path))?; - process_plugin( - &mut plugin_buf, - &pool, - &db_file, - &db_mod, - &file_path.to_string_lossy(), - ) - .await?; - } - Ok(()) -} - -async fn extract_with_unrar( - file: &mut std::fs::File, - pool: &sqlx::Pool, - db_file: &File, - db_mod: &Mod, - checked_metadata: bool, -) -> Result<()> { - let temp_dir = tempdir()?; - let temp_file_path = temp_dir.path().join("download.rar"); - let mut temp_file = std::fs::File::create(&temp_file_path)?; - std::io::copy(file, &mut temp_file)?; - - let mut plugin_file_paths = Vec::new(); - let list = Archive::new(&temp_file_path.to_string_lossy().to_string())?.list(); - match list { - Ok(list) => { - for entry in list.flatten() { - if let Some(extension) = entry.filename.extension() { - if entry.is_file() - && (extension == "esp" || extension == "esm" || extension == "esl") - { - plugin_file_paths.push(entry.filename); - } - } - } - } - Err(_) => { - if !checked_metadata { - warn!("failed to read archive and server has no metadata, skipping file"); - file::update_unable_to_extract_plugins(&pool, db_file.id, true).await?; - return Ok(()); - } else { - error!("failed to read archive, but server had metadata"); - panic!("failed to read archive, but server had metadata"); - } - } - } - info!( - num_plugin_files = plugin_file_paths.len(), - "listed plugins in downloaded archive" - ); - - if !plugin_file_paths.is_empty() { - info!("uncompressing downloaded archive"); - let extract = Archive::new(&temp_file_path.to_string_lossy().to_string())? - .extract_to(temp_dir.path().to_string_lossy().to_string()); - - let mut extract = match extract { - Err(err) => { - warn!(error = %err, "failed to extract with unrar"); - file::update_unable_to_extract_plugins(&pool, db_file.id, true).await?; - return Ok(()); - } - Ok(extract) => extract, - }; - if let Err(err) = extract.process() { - warn!(error = %err, "failed to extract with unrar"); - file::update_unable_to_extract_plugins(&pool, db_file.id, true).await?; - return Ok(()); - } - - for file_path in plugin_file_paths.iter() { - info!( - ?file_path, - "processing uncompressed file from downloaded archive" - ); - let mut plugin_buf = std::fs::read(temp_dir.path().join(file_path))?; - process_plugin( - &mut plugin_buf, - &pool, - &db_file, - &db_mod, - &file_path.to_string_lossy(), - ) - .await?; - } - } - Ok(()) -} - #[tokio::main] pub async fn main() -> Result<()> { dotenv().ok(); @@ -222,343 +52,18 @@ pub async fn main() -> Result<()> { let args: Args = argh::from_env(); - if let Some(dump_edits) = args.dump_edits { - let mut cell_mod_edit_counts = HashMap::new(); - for x in -77..75 { - for y in -50..44 { - if let Some(count) = cell::count_mod_edits(&pool, "Skyrim.esm", 1, x, y).await? { - cell_mod_edit_counts.insert(format!("{},{}", x, y), count); - } - } - } - let mut file = std::fs::File::create(dump_edits)?; - write!(file, "{}", serde_json::to_string(&cell_mod_edit_counts)?)?; - return Ok(()); + if let Some(path) = args.dump_edits { + return dump_cell_edit_counts(&pool, &path).await; + } + if let Some(dir) = args.cell_data { + return dump_cell_data(&pool, &dir).await; + } + if let Some(dir) = args.mod_data { + return dump_mod_data(&pool, &dir).await; + } + if let Some(path) = args.mod_search_index { + return dump_mod_search_index(&pool, &path).await; } - if let Some(cell_data_dir) = args.cell_data { - for x in -77..75 { - for y in -50..44 { - if let Ok(data) = cell::get_cell_data(&pool, "Skyrim.esm", 1, x, y).await { - let path = format!("{}/{}", &cell_data_dir, x); - let path = std::path::Path::new(&path); - std::fs::create_dir_all(&path)?; - let path = path.join(format!("{}.json", y)); - let mut file = std::fs::File::create(path)?; - write!(file, "{}", serde_json::to_string(&data)?)?; - } - } - } - return Ok(()); - } - - if let Some(mod_data_dir) = args.mod_data { - let page_size = 20; - let mut last_id = None; - loop { - let mods = game_mod::batched_get_with_cells(&pool, page_size, last_id, "Skyrim.esm", 1) - .await?; - if mods.is_empty() { - break; - } - for mod_with_cells in mods { - let path = std::path::Path::new(&mod_data_dir); - std::fs::create_dir_all(&path)?; - let path = path.join(format!("{}.json", mod_with_cells.nexus_mod_id)); - let mut file = std::fs::File::create(path)?; - write!(file, "{}", serde_json::to_string(&mod_with_cells)?)?; - last_id = Some(mod_with_cells.id); - } - } - return Ok(()); - } - - if let Some(mod_search_index) = args.mod_search_index { - #[derive(Serialize)] - struct ModForSearchIdTranslated { - name: String, - id: i32, - } - let mut search_index = vec![]; - let page_size = 20; - let mut last_id = None; - loop { - let mods = game_mod::batched_get_for_search(&pool, page_size, last_id).await?; - if mods.is_empty() { - break; - } - for mod_for_search in mods { - search_index.push(ModForSearchIdTranslated { - name: mod_for_search.name, - id: mod_for_search.nexus_mod_id, - }); - last_id = Some(mod_for_search.id); - } - } - let mut file = std::fs::File::create(mod_search_index)?; - write!(file, "{}", serde_json::to_string(&search_index)?)?; - return Ok(()); - } - - let mut page = args.page; - let mut has_next_page = true; - - let game = game::insert(&pool, GAME_NAME, GAME_ID as i32).await?; - - let client = reqwest::Client::builder() - .timeout(REQUEST_TIMEOUT) - .connect_timeout(CONNECT_TIMEOUT) - .build()?; - - while has_next_page { - let page_span = info_span!("page", page); - let _page_span = page_span.enter(); - let mod_list_resp = nexus_scraper::get_mod_list_page(&client, page).await?; - let scraped = mod_list_resp.scrape_mods()?; - - has_next_page = scraped.has_next_page; - let processed_mods = game_mod::bulk_get_last_updated_by_nexus_mod_ids( - &pool, - &scraped - .mods - .iter() - .map(|scraped_mod| scraped_mod.nexus_mod_id) - .collect::>(), - ) - .await?; - let mods_to_create_or_update: Vec = scraped - .mods - .iter() - .filter(|scraped_mod| { - if let Some(processed_mod) = processed_mods - .iter() - .find(|processed_mod| processed_mod.nexus_mod_id == scraped_mod.nexus_mod_id) - { - if processed_mod.last_updated_files_at - > NaiveDateTime::new( - scraped_mod.last_update_at, - NaiveTime::from_hms(0, 0, 0), - ) - { - return false; - } - } - true - }) - .map(|scraped_mod| UnsavedMod { - name: scraped_mod.name, - nexus_mod_id: scraped_mod.nexus_mod_id, - author_name: scraped_mod.author_name, - author_id: scraped_mod.author_id, - category_name: scraped_mod.category_name, - category_id: scraped_mod.category_id, - description: scraped_mod.desc, - thumbnail_link: scraped_mod.thumbnail_link, - game_id: game.id, - last_update_at: NaiveDateTime::new( - scraped_mod.last_update_at, - NaiveTime::from_hms(0, 0, 0), - ), - first_upload_at: NaiveDateTime::new( - scraped_mod.first_upload_at, - NaiveTime::from_hms(0, 0, 0), - ), - }) - .collect(); - - let mods = game_mod::batched_insert(&pool, &mods_to_create_or_update).await?; - - for db_mod in mods { - let mod_span = info_span!("mod", name = ?&db_mod.name, id = &db_mod.nexus_mod_id); - let _mod_span = mod_span.enter(); - let files_resp = nexus_api::files::get(&client, db_mod.nexus_mod_id).await?; - - debug!(duration = ?files_resp.wait, "sleeping"); - sleep(files_resp.wait).await; - - // Filter out replaced/deleted files (indicated by null category) and archived files - let files = files_resp - .files()? - .into_iter() - .filter(|file| match file.category { - None => { - info!( - name = file.file_name, - id = file.file_id, - "skipping file with no category" - ); - false - } - Some(category) if category == "ARCHIVED" => false, - Some(_) => true, - }); - - let processed_file_ids: HashSet = - file::get_processed_nexus_file_ids_by_mod_id(&pool, db_mod.id) - .await? - .into_iter() - .collect(); - - for api_file in files { - let file_span = - info_span!("file", name = &api_file.file_name, id = &api_file.file_id,); - let _file_span = file_span.enter(); - - if processed_file_ids.contains(&(api_file.file_id as i32)) { - info!("skipping file already present and processed in database"); - continue; - } - let db_file = file::insert( - &pool, - &file::UnsavedFile { - name: api_file.name, - file_name: api_file.file_name, - nexus_file_id: api_file.file_id as i32, - mod_id: db_mod.id, - category: api_file.category, - version: api_file.version, - mod_version: api_file.mod_version, - size: api_file.size, - uploaded_at: api_file.uploaded_at, - }, - ) - .await?; - - let mut checked_metadata = false; - match nexus_api::metadata::contains_plugin(&client, &api_file).await { - Ok(contains_plugin) => { - if let Some(contains_plugin) = contains_plugin { - checked_metadata = true; - if !contains_plugin { - info!("file metadata does not contain a plugin, skip downloading"); - file::update_has_plugin(&pool, db_file.id, false).await?; - continue; - } - } else { - warn!("file has no metadata link, continuing with download"); - } - } - Err(err) => { - warn!(error = %err, "error retreiving metadata for file, continuing with download"); - } - }; - - let humanized_size = api_file - .size - .file_size(file_size_opts::CONVENTIONAL) - .expect("unable to create human-readable file size"); - info!(size = %humanized_size, "decided to download file"); - let download_link_resp = - nexus_api::download_link::get(&client, db_mod.nexus_mod_id, api_file.file_id) - .await; - if let Err(err) = &download_link_resp { - if let Some(reqwest_err) = err.downcast_ref::() { - if reqwest_err.status() == Some(StatusCode::NOT_FOUND) { - warn!( - status = ?reqwest_err.status(), - "failed to get download link for file, skipping file" - ); - file::update_has_download_link(&pool, db_file.id, false).await?; - continue; - } - } - } - let download_link_resp = download_link_resp?; - - let mut tokio_file = match download_link_resp.download_file(&client).await { - Ok(file) => { - info!(bytes = api_file.size, "download finished"); - file::update_downloaded_at(&pool, db_file.id).await?; - file - } - Err(err) => { - warn!(error = %err, "failed all attempts at downloading file, skipping file"); - continue; - } - }; - - let mut initial_bytes = [0; 8]; - tokio_file.seek(SeekFrom::Start(0)).await?; - if let Err(err) = tokio_file.read_exact(&mut initial_bytes).await { - warn!(error = %err, "failed to read initial bytes, skipping file"); - file::update_unable_to_extract_plugins(&pool, db_file.id, true).await?; - continue; - } - let kind = match infer::get(&initial_bytes) { - Some(kind) => kind, - None => { - warn!(initial_bytes = ?initial_bytes, "unable to determine file type of archive, skipping file"); - file::update_unable_to_extract_plugins(&pool, db_file.id, true).await?; - continue; - } - }; - info!( - mime_type = kind.mime_type(), - "inferred mime_type of downloaded archive" - ); - - match kind.mime_type() { - "application/vnd.rar" => { - info!("downloaded archive is RAR archive, attempt to uncompress entire archive"); - // Use unrar to uncompress the entire .rar file to avoid bugs with compress_tools uncompressing certain .rar files: - // https://github.com/libarchive/libarchive/issues/373, https://github.com/libarchive/libarchive/issues/1426 - tokio_file.seek(SeekFrom::Start(0)).await?; - let mut file = tokio_file.try_clone().await?.into_std().await; - match extract_with_unrar( - &mut file, - &pool, - &db_file, - &db_mod, - checked_metadata, - ) - .await - { - Ok(_) => Ok(()), - Err(err) => { - // unrar failed to extract rar file (e.g. archive has unicode filenames) - // Attempt to uncompress the archive using `7z` unix command instead - warn!(error = %err, "failed to extract file with unrar, extracting whole archive with 7z instead"); - extract_with_7zip(&mut file, &pool, &db_file, &db_mod).await - } - }?; - } - _ => { - tokio_file.seek(SeekFrom::Start(0)).await?; - let mut file = tokio_file.try_clone().await?.into_std().await; - - match extract_with_compress_tools(&mut file, &pool, &db_file, &db_mod).await - { - Ok(_) => Ok(()), - Err(err) => { - if err - .downcast_ref::() - .is_some() - && (kind.mime_type() == "application/zip" - || kind.mime_type() == "application/x-7z-compressed") - { - // compress_tools or libarchive failed to extract zip/7z file (e.g. archive is deflate64 compressed) - // Attempt to uncompress the archive using `7z` unix command instead - warn!(error = %err, "failed to extract file with compress_tools, extracting whole archive with 7z instead"); - extract_with_7zip(&mut file, &pool, &db_file, &db_mod).await - } else { - Err(err) - } - } - }?; - } - } - - debug!(duration = ?download_link_resp.wait, "sleeping"); - sleep(download_link_resp.wait).await; - } - - game_mod::update_last_updated_files_at(&pool, db_mod.id).await?; - } - - page += 1; - debug!(?page, ?has_next_page, "sleeping 1 second"); - sleep(Duration::from_secs(1)).await; - } - - Ok(()) + return update(&pool, args.page).await; }