modmapper/src/main.rs

577 lines
24 KiB
Rust
Raw Normal View History

2021-07-09 04:37:08 +00:00
use anyhow::Result;
use compress_tools::{list_archive_files, uncompress_archive_file};
use dotenv::dotenv;
use reqwest::StatusCode;
use skyrim_cell_dump::parse_plugin;
use sqlx::postgres::PgPoolOptions;
use std::convert::TryInto;
use std::env;
use std::fs::OpenOptions;
use std::io::Read;
use std::io::Seek;
use std::io::SeekFrom;
use std::ops::Index;
use std::path::Path;
use std::process::Command;
2021-07-09 04:37:08 +00:00
use std::time::Duration;
use tempfile::tempdir;
use tokio::io::{AsyncReadExt, AsyncSeekExt};
use tokio::time::sleep;
use tracing::{debug, info, info_span, warn};
use unrar::Archive;
use zip::write::{FileOptions, ZipWriter};
use zip::ZipArchive;
mod models;
2021-07-09 04:37:08 +00:00
mod nexus_api;
mod nexus_scraper;
use models::game;
use models::plugin;
use models::{cell, cell::UnsavedCell};
use models::{file, file::File};
use models::{game_mod, game_mod::Mod};
use models::{plugin_cell, plugin_cell::UnsavedPluginCell};
use models::{plugin_world, plugin_world::UnsavedPluginWorld};
use models::{world, world::UnsavedWorld};
2021-07-09 04:37:08 +00:00
use nexus_api::{GAME_ID, GAME_NAME};
fn get_local_form_id_and_master<'a>(
form_id: u32,
masters: &'a [&str],
file_name: &'a str,
) -> Result<(i32, &'a str)> {
let master_index = (form_id >> 24) as usize;
let local_form_id = (form_id & 0xFFFFFF).try_into()?;
if master_index >= masters.len() {
return Ok((local_form_id, file_name));
}
Ok((local_form_id, masters[master_index]))
}
async fn process_plugin(
plugin_buf: &mut [u8],
pool: &sqlx::Pool<sqlx::Postgres>,
// plugin_archive: &mut ZipWriter<W>,
db_file: &File,
mod_obj: &Mod,
file_path: &str,
) -> Result<()>
// where
// W: std::io::Write + std::io::Seek,
{
if plugin_buf.len() == 0 {
warn!("skipping processing of invalid empty plugin");
return Ok(());
}
info!(bytes = plugin_buf.len(), "parsing plugin");
match parse_plugin(&plugin_buf) {
Ok(plugin) => {
info!(
num_worlds = plugin.worlds.len(),
num_cells = plugin.cells.len(),
"parse finished"
);
let hash = seahash::hash(&plugin_buf);
let file_name = Path::new(file_path)
.file_name()
.expect("plugin path ends in a valid file_name")
.to_string_lossy();
let plugin_row = plugin::insert(
&pool,
&db_file.name,
hash as i64,
db_file.id,
plugin.header.version as f64,
plugin_buf.len() as i64,
plugin.header.author,
plugin.header.description,
&plugin
.header
.masters
.iter()
.map(|s| s.to_string())
.collect::<Vec<String>>(),
&file_name,
file_path,
)
.await?;
let worlds: Vec<UnsavedWorld> = plugin
.worlds
.iter()
.map(|world| {
let (form_id, master) = get_local_form_id_and_master(
world.form_id,
&plugin.header.masters,
&file_name,
)
.expect("form_id to be a valid i32");
UnsavedWorld {
form_id,
master: master.to_string(),
}
})
.collect();
let db_worlds = world::batched_insert(&pool, &worlds).await?;
let plugin_worlds: Vec<UnsavedPluginWorld> = db_worlds
.iter()
.zip(&plugin.worlds)
.map(|(db_world, plugin_world)| UnsavedPluginWorld {
plugin_id: plugin_row.id,
world_id: db_world.id,
editor_id: plugin_world.editor_id.clone(),
})
.collect();
plugin_world::batched_insert(&pool, &plugin_worlds).await?;
let cells: Vec<UnsavedCell> = plugin
.cells
.iter()
.map(|cell| {
let world_id = if let Some(world_form_id) = cell.world_form_id {
let (form_id, master) = get_local_form_id_and_master(
world_form_id,
&plugin.header.masters,
&file_name,
)
.expect("form_id to be valid i32");
Some(
db_worlds
.iter()
.find(|&world| world.form_id == form_id && world.master == master)
.expect("cell references world in the plugin worlds")
.id,
)
} else {
None
};
let (form_id, master) = get_local_form_id_and_master(
cell.form_id,
&plugin.header.masters,
&file_name,
)
.expect("form_id is a valid i32");
UnsavedCell {
form_id,
master: master.to_string(),
x: cell.x,
y: cell.y,
world_id,
is_persistent: cell.is_persistent,
}
})
.collect();
let db_cells = cell::batched_insert(&pool, &cells).await?;
let plugin_cells: Vec<UnsavedPluginCell> = db_cells
.iter()
.zip(&plugin.cells)
.map(|(db_cell, plugin_cell)| UnsavedPluginCell {
plugin_id: plugin_row.id,
cell_id: db_cell.id,
editor_id: plugin_cell.editor_id.clone(),
})
.collect();
plugin_cell::batched_insert(&pool, &plugin_cells).await?;
}
Err(err) => {
warn!(error = %err, "Failed to parse plugin, skipping plugin");
}
}
// TODO: re-enable after db fix
// plugin_archive.start_file(
// format!(
// "{}/{}/{}/{}",
// GAME_NAME, mod_obj.nexus_mod_id, db_file.nexus_file_id, file_path
// ),
// FileOptions::default(),
// )?;
// let mut reader = std::io::Cursor::new(&plugin_buf);
// std::io::copy(&mut reader, plugin_archive)?;
Ok(())
}
2021-07-09 04:37:08 +00:00
fn initialize_plugins_archive(mod_id: i32, file_id: i32) -> Result<()> {
let mut plugins_archive = ZipWriter::new(
OpenOptions::new()
.write(true)
.create(true)
.open("plugins.zip")?,
);
plugins_archive.add_directory(
format!("{}/{}/{}", GAME_NAME, mod_id, file_id),
FileOptions::default(),
)?;
plugins_archive.finish()?;
Ok(())
}
#[tokio::main]
pub async fn main() -> Result<()> {
dotenv().ok();
2021-07-11 23:45:26 +00:00
tracing_subscriber::fmt::init();
let pool = PgPoolOptions::new()
.max_connections(5)
.connect(&env::var("DATABASE_URL")?)
.await?;
let game = game::insert(&pool, GAME_NAME, GAME_ID as i32).await?;
let client = reqwest::Client::new();
// DELETEME: just running this to clean up the existing database rows
let plugins_archive = std::fs::File::open("plugins.zip")?;
let mut plugins_archive = ZipArchive::new(plugins_archive)?;
let file_paths: Vec<String> = plugins_archive
.file_names()
.map(|s| s.to_string())
.collect();
for (i, file_name) in file_paths.iter().enumerate() {
info!("plugin: {:?} / {:?}. {}", i, file_paths.len(), file_name);
let file_path = Path::new(file_name);
let mut components = file_path.components();
let _game_name = components.next().expect("game directory");
let nexus_mod_id: i32 = components
.next()
.expect("mod_id directory")
.as_os_str()
.to_string_lossy()
.parse()?;
let nexus_file_id: i32 = components
.next()
.expect("file_id directory")
.as_os_str()
.to_string_lossy()
.parse()?;
let original_file_path: &Path = components.as_ref();
let original_file_path = original_file_path.to_string_lossy();
if let Some(db_mod) = game_mod::get_by_nexus_mod_id(&pool, nexus_mod_id).await? {
if let Some(db_file) = file::get_by_nexus_file_id(&pool, nexus_file_id).await? {
let mut plugin_file = plugins_archive.by_name(file_name)?;
let mut plugin_buf = Vec::new();
plugin_file.read_to_end(&mut plugin_buf)?;
info!(
nexus_mod_id,
nexus_file_id, %original_file_path, "processing plugin"
);
process_plugin(
&mut plugin_buf,
&pool,
&db_file,
&db_mod,
&original_file_path,
)
.await?;
} else {
warn!(nexus_file_id, "missing db file!");
}
} else {
warn!(nexus_mod_id, "missing db mod!");
}
}
return Ok(());
let mut page: i32 = 1;
let mut has_next_page = true;
while has_next_page {
let page_span = info_span!("page", page);
let _page_span = page_span.enter();
2021-07-09 04:37:08 +00:00
let mod_list_resp = nexus_scraper::get_mod_list_page(&client, page).await?;
let scraped = mod_list_resp.scrape_mods()?;
2021-07-09 04:37:08 +00:00
has_next_page = scraped.has_next_page;
let mut mods = Vec::new();
for scraped_mod in scraped.mods {
// TODO: this logic needs to change once I clean up the existing database rows
if let Some(game_mod) =
game_mod::get_by_nexus_mod_id(&pool, scraped_mod.nexus_mod_id).await?
{
mods.push(
game_mod::insert(
2021-07-09 04:37:08 +00:00
&pool,
scraped_mod.name,
scraped_mod.nexus_mod_id,
scraped_mod.author,
scraped_mod.category,
scraped_mod.desc,
game.id,
)
.await?,
);
}
}
2021-07-09 04:37:08 +00:00
for db_mod in mods {
let mod_span = info_span!("mod", name = ?&db_mod.name, id = &db_mod.nexus_mod_id);
let _mod_span = mod_span.enter();
2021-07-09 04:37:08 +00:00
let files_resp = nexus_api::files::get(&client, db_mod.nexus_mod_id).await?;
debug!(duration = ?files_resp.wait, "sleeping");
sleep(files_resp.wait).await;
// Filter out replaced/deleted files (indicated by null category) and archived files
let files = files_resp
.files()?
.into_iter()
.filter(|file| match file.category {
None => {
info!(
name = file.file_name,
id = file.file_id,
"skipping file with no category"
);
false
}
Some(category) if category == "ARCHIVED" => false,
Some(_) => true,
});
for api_file in files {
let file_span =
info_span!("file", name = &api_file.file_name, id = &api_file.file_id);
let _file_span = file_span.enter();
let db_file = file::insert(
&pool,
2021-07-09 04:37:08 +00:00
api_file.name,
api_file.file_name,
api_file.file_id as i32,
db_mod.id,
api_file.category,
api_file.version,
api_file.mod_version,
api_file.size,
2021-07-09 04:37:08 +00:00
api_file.uploaded_at,
)
.await?;
match nexus_api::metadata::contains_plugin(&client, &api_file).await {
Ok(contains_plugin) => {
if let Some(contains_plugin) = contains_plugin {
if !contains_plugin {
info!("file metadata does not contain a plugin, skip downloading");
continue;
}
} else {
warn!("file has no metadata link");
}
Ok(())
}
Err(err) => {
if let Some(reqwest_err) = err.downcast_ref::<reqwest::Error>() {
if reqwest_err.status() == Some(StatusCode::NOT_FOUND) {
warn!(
status = ?reqwest_err.status(),
"metadata for file not found on server"
);
Ok(())
} else {
Err(err)
}
} else {
Err(err)
}
}
}?;
2021-07-11 23:45:26 +00:00
2021-07-09 04:37:08 +00:00
let download_link_resp =
nexus_api::download_link::get(&client, db_mod.nexus_mod_id, api_file.file_id)
.await;
if let Err(err) = &download_link_resp {
if let Some(reqwest_err) = err.downcast_ref::<reqwest::Error>() {
if reqwest_err.status() == Some(StatusCode::NOT_FOUND) {
warn!(
status = ?reqwest_err.status(),
"failed to get download link for file"
);
file::update_has_download_link(&pool, db_file.id, false).await?;
continue;
}
}
}
let download_link_resp = download_link_resp?;
2021-07-09 04:37:08 +00:00
let mut tokio_file = download_link_resp.download_file(&client).await?;
info!(bytes = api_file.size, "download finished");
2021-07-09 04:37:08 +00:00
initialize_plugins_archive(db_mod.nexus_mod_id, db_file.nexus_file_id)?;
let mut plugins_archive = ZipWriter::new_append(
OpenOptions::new()
.read(true)
.write(true)
.open("plugins.zip")?,
)?;
2021-07-09 04:37:08 +00:00
let mut initial_bytes = [0; 8];
tokio_file.seek(SeekFrom::Start(0)).await?;
2021-07-18 20:06:49 +00:00
match tokio_file.read_exact(&mut initial_bytes).await {
Err(err) => {
warn!(error = %err, "failed to read initial bytes, skipping file");
continue;
}
_ => {}
}
let kind = infer::get(&initial_bytes).expect("unknown file type of file download");
2021-07-11 23:45:26 +00:00
info!(
mime_type = kind.mime_type(),
"inferred mime_type of downloaded archive"
);
match kind.mime_type() {
"application/vnd.rar" => {
info!("downloaded archive is RAR archive, attempt to uncompress entire archive");
// Use unrar to uncompress the entire .rar file to avoid bugs with compress_tools uncompressing certain .rar files:
// https://github.com/libarchive/libarchive/issues/373, https://github.com/libarchive/libarchive/issues/1426
tokio_file.seek(SeekFrom::Start(0)).await?;
let mut file = tokio_file.try_clone().await?.into_std().await;
let temp_dir = tempdir()?;
let temp_file_path = temp_dir.path().join("download.rar");
let mut temp_file = std::fs::File::create(&temp_file_path)?;
std::io::copy(&mut file, &mut temp_file)?;
let mut plugin_file_paths = Vec::new();
let list =
Archive::new(temp_file_path.to_string_lossy().to_string()).list();
if let Ok(list) = list {
for entry in list {
if let Ok(entry) = entry {
if entry.is_file()
&& (entry.filename.ends_with(".esp")
|| entry.filename.ends_with(".esm")
|| entry.filename.ends_with(".esl"))
{
plugin_file_paths.push(entry.filename);
}
}
}
}
info!(
num_plugin_files = plugin_file_paths.len(),
"listed plugins in downloaded archive"
);
if plugin_file_paths.len() > 0 {
info!("uncompressing downloaded archive");
let extract =
Archive::new(temp_file_path.to_string_lossy().to_string())
.extract_to(temp_dir.path().to_string_lossy().to_string());
extract
.expect("failed to extract")
.process()
.expect("failed to extract");
for file_path in plugin_file_paths.iter() {
info!(
?file_path,
"processing uncompressed file from downloaded archive"
);
let mut plugin_buf =
std::fs::read(temp_dir.path().join(file_path))?;
process_plugin(
&mut plugin_buf,
&pool,
// &mut plugins_archive,
&db_file,
&db_mod,
file_path,
)
.await?;
}
}
temp_dir.close()?;
}
_ => {
tokio_file.seek(SeekFrom::Start(0)).await?;
let mut file = tokio_file.try_clone().await?.into_std().await;
let mut plugin_file_paths = Vec::new();
for file_path in list_archive_files(&file)? {
if file_path.ends_with(".esp")
|| file_path.ends_with(".esm")
|| file_path.ends_with(".esl")
{
plugin_file_paths.push(file_path);
}
}
info!(
num_plugin_files = plugin_file_paths.len(),
"listed plugins in downloaded archive"
);
for file_path in plugin_file_paths.iter() {
let plugin_span = info_span!("plugin", name = ?file_path);
let plugin_span = plugin_span.enter();
file.seek(SeekFrom::Start(0))?;
let mut buf = Vec::default();
info!("uncompressing plugin file from downloaded archive");
match uncompress_archive_file(&mut file, &mut buf, file_path) {
Ok(_) => Ok(()),
Err(err) => {
if kind.mime_type() == "application/zip" {
// compress_tools or libarchive failed to extract zip file (e.g. archive is deflate64 compressed)
// Attempt to uncompress the archive using `unzip` unix command instead
warn!(error = %err, "failed to extract file with compress_tools, extracting whole archive with unzip instead");
drop(plugin_span);
file.seek(SeekFrom::Start(0))?;
let temp_dir = tempdir()?;
let temp_file_path = temp_dir
.path()
.join(format!("download.{}", kind.extension()));
let mut temp_file = std::fs::File::create(&temp_file_path)?;
std::io::copy(&mut file, &mut temp_file)?;
let extracted_path = temp_dir.path().join("extracted");
Command::new("unzip")
.args(&[
&temp_file_path.to_string_lossy(),
"-d",
&extracted_path.to_string_lossy(),
])
.status()?;
for file_path in plugin_file_paths.iter() {
let plugin_span =
info_span!("plugin", name = ?file_path);
let _plugin_span = plugin_span.enter();
info!("processing uncompressed file from downloaded archive");
let mut plugin_buf =
std::fs::read(extracted_path.join(file_path))?;
process_plugin(
&mut plugin_buf,
&pool,
// &mut plugins_archive,
&db_file,
&db_mod,
file_path,
)
.await?;
}
break;
}
Err(err)
}
}?;
process_plugin(
&mut buf, &pool, // &mut plugins_archive,
&db_file, &db_mod, file_path,
)
.await?;
}
}
}
2021-07-09 04:37:08 +00:00
plugins_archive.finish()?;
debug!(duration = ?download_link_resp.wait, "sleeping");
sleep(download_link_resp.wait).await;
}
}
page += 1;
2021-07-11 23:45:26 +00:00
debug!(?page, ?has_next_page, "sleeping 1 second");
sleep(Duration::from_secs(1)).await;
}
Ok(())
}