Fix plugin listing for rar files, temporary backfil shim

`main` is just backfilling the eixsting bad rar files in the db. I will return script to normal once it's run.
This commit is contained in:
Tyler Hallada 2021-07-26 01:16:43 -04:00
parent 87ca90be06
commit 5d62fb7d61
3 changed files with 285 additions and 267 deletions

View File

@ -222,311 +222,310 @@ pub async fn main() -> Result<()> {
while has_next_page { while has_next_page {
let page_span = info_span!("page", page); let page_span = info_span!("page", page);
let _page_span = page_span.enter(); let _page_span = page_span.enter();
let mod_list_resp = nexus_scraper::get_mod_list_page(&client, page).await?; // let mod_list_resp = nexus_scraper::get_mod_list_page(&client, page).await?;
let scraped = mod_list_resp.scrape_mods()?; // let scraped = mod_list_resp.scrape_mods()?;
has_next_page = scraped.has_next_page; // TODO: delete
let present_mods = game_mod::bulk_get_present_nexus_mod_ids( has_next_page = false;
&pool, // has_next_page = scraped.has_next_page;
&scraped // let present_mods = game_mod::bulk_get_present_nexus_mod_ids(
.mods // &pool,
.iter() // &scraped
.map(|scraped_mod| scraped_mod.nexus_mod_id) // .mods
.collect::<Vec<i32>>(), // .iter()
) // .map(|scraped_mod| scraped_mod.nexus_mod_id)
.await?; // .collect::<Vec<i32>>(),
let mods_to_create: Vec<UnsavedMod> = scraped // )
.mods // .await?;
.iter() // let mods_to_create: Vec<UnsavedMod> = scraped
.filter(|scraped_mod| !present_mods.contains(&scraped_mod.nexus_mod_id)) // .mods
.map(|scraped_mod| UnsavedMod { // .iter()
name: scraped_mod.name, // .filter(|scraped_mod| !present_mods.contains(&scraped_mod.nexus_mod_id))
nexus_mod_id: scraped_mod.nexus_mod_id, // .map(|scraped_mod| UnsavedMod {
author: scraped_mod.author, // name: scraped_mod.name,
category: scraped_mod.category, // nexus_mod_id: scraped_mod.nexus_mod_id,
description: scraped_mod.desc, // author: scraped_mod.author,
game_id: game.id, // category: scraped_mod.category,
}) // description: scraped_mod.desc,
.collect(); // game_id: game.id,
// })
// .collect();
let mods = game_mod::batched_insert(&pool, &mods_to_create).await?; // let mods = game_mod::batched_insert(&pool, &mods_to_create).await?;
for db_mod in mods { for db_file in file::get_files_that_need_backfill(&pool).await? {
let db_mod = game_mod::get(&pool, db_file.mod_id)
.await?
.expect("a mod to exist for file");
let mod_span = info_span!("mod", name = ?&db_mod.name, id = &db_mod.nexus_mod_id); let mod_span = info_span!("mod", name = ?&db_mod.name, id = &db_mod.nexus_mod_id);
let _mod_span = mod_span.enter(); let _mod_span = mod_span.enter();
let files_resp = nexus_api::files::get(&client, db_mod.nexus_mod_id).await?; // let files_resp = nexus_api::files::get(&client, db_mod.nexus_mod_id).await?;
debug!(duration = ?files_resp.wait, "sleeping"); // debug!(duration = ?files_resp.wait, "sleeping");
sleep(files_resp.wait).await; // sleep(files_resp.wait).await;
// Filter out replaced/deleted files (indicated by null category) and archived files // Filter out replaced/deleted files (indicated by null category) and archived files
let files = files_resp // let files = files_resp
.files()? // .files()?
.into_iter() // .into_iter()
.filter(|file| match file.category { // .filter(|file| match file.category {
None => { // None => {
info!( // info!(
name = file.file_name, // name = file.file_name,
id = file.file_id, // id = file.file_id,
"skipping file with no category" // "skipping file with no category"
// );
// false
// }
// Some(category) if category == "ARCHIVED" => false,
// Some(_) => true,
// });
// for api_file in files {
let file_span = info_span!("file", id = &db_file.nexus_file_id);
let _file_span = file_span.enter();
// let db_file = file::insert(
// &pool,
// api_file.name,
// api_file.file_name,
// api_file.file_id as i32,
// db_mod.id,
// api_file.category,
// api_file.version,
// api_file.mod_version,
// api_file.size,
// api_file.uploaded_at,
// )
// .await?;
// let mut checked_metadata = false;
// match nexus_api::metadata::contains_plugin(&client, &api_file).await {
// Ok(contains_plugin) => {
// if let Some(contains_plugin) = contains_plugin {
// checked_metadata = true;
// if !contains_plugin {
// info!("file metadata does not contain a plugin, skip downloading");
// continue;
// }
// } else {
// warn!("file has no metadata link, continuing with download");
// }
// }
// Err(err) => {
// warn!(error = %err, "error retreiving metadata for file, continuing with download");
// }
// };
let download_link_resp = nexus_api::download_link::get(
&client,
db_mod.nexus_mod_id,
db_file.nexus_file_id as i64,
)
.await;
if let Err(err) = &download_link_resp {
if let Some(reqwest_err) = err.downcast_ref::<reqwest::Error>() {
if reqwest_err.status() == Some(StatusCode::NOT_FOUND) {
warn!(
status = ?reqwest_err.status(),
"failed to get download link for file"
); );
false file::update_has_download_link(&pool, db_file.id, false).await?;
}
Some(category) if category == "ARCHIVED" => false,
Some(_) => true,
});
for api_file in files {
let file_span =
info_span!("file", name = &api_file.file_name, id = &api_file.file_id);
let _file_span = file_span.enter();
let db_file = file::insert(
&pool,
api_file.name,
api_file.file_name,
api_file.file_id as i32,
db_mod.id,
api_file.category,
api_file.version,
api_file.mod_version,
api_file.size,
api_file.uploaded_at,
)
.await?;
let mut checked_metadata = false;
match nexus_api::metadata::contains_plugin(&client, &api_file).await {
Ok(contains_plugin) => {
if let Some(contains_plugin) = contains_plugin {
checked_metadata = true;
if !contains_plugin {
info!("file metadata does not contain a plugin, skip downloading");
continue;
}
} else {
warn!("file has no metadata link, continuing with download");
}
}
Err(err) => {
warn!(error = %err, "error retreiving metadata for file, continuing with download");
}
};
let download_link_resp =
nexus_api::download_link::get(&client, db_mod.nexus_mod_id, api_file.file_id)
.await;
if let Err(err) = &download_link_resp {
if let Some(reqwest_err) = err.downcast_ref::<reqwest::Error>() {
if reqwest_err.status() == Some(StatusCode::NOT_FOUND) {
warn!(
status = ?reqwest_err.status(),
"failed to get download link for file"
);
file::update_has_download_link(&pool, db_file.id, false).await?;
continue;
}
}
}
let download_link_resp = download_link_resp?;
let mut tokio_file = download_link_resp.download_file(&client).await?;
info!(bytes = api_file.size, "download finished");
initialize_plugins_archive(db_mod.nexus_mod_id, db_file.nexus_file_id)?;
let mut plugins_archive = ZipWriter::new_append(
OpenOptions::new()
.read(true)
.write(true)
.open("plugins.zip")?,
)?;
let mut initial_bytes = [0; 8];
tokio_file.seek(SeekFrom::Start(0)).await?;
match tokio_file.read_exact(&mut initial_bytes).await {
Err(err) => {
warn!(error = %err, "failed to read initial bytes, skipping file");
continue; continue;
} }
_ => {}
} }
let kind = infer::get(&initial_bytes).expect("unknown file type of file download"); }
info!( let download_link_resp = download_link_resp?;
mime_type = kind.mime_type(), let mut tokio_file = download_link_resp.download_file(&client).await?;
"inferred mime_type of downloaded archive" info!("download finished");
);
match kind.mime_type() { initialize_plugins_archive(db_mod.nexus_mod_id, db_file.nexus_file_id)?;
"application/vnd.rar" => { let mut plugins_archive = ZipWriter::new_append(
info!("downloaded archive is RAR archive, attempt to uncompress entire archive"); OpenOptions::new()
// Use unrar to uncompress the entire .rar file to avoid bugs with compress_tools uncompressing certain .rar files: .read(true)
// https://github.com/libarchive/libarchive/issues/373, https://github.com/libarchive/libarchive/issues/1426 .write(true)
tokio_file.seek(SeekFrom::Start(0)).await?; .open("plugins.zip")?,
let mut file = tokio_file.try_clone().await?.into_std().await; )?;
let temp_dir = tempdir()?;
let temp_file_path = temp_dir.path().join("download.rar");
let mut temp_file = std::fs::File::create(&temp_file_path)?;
std::io::copy(&mut file, &mut temp_file)?;
let mut plugin_file_paths = Vec::new(); let mut initial_bytes = [0; 8];
let list = tokio_file.seek(SeekFrom::Start(0)).await?;
Archive::new(&temp_file_path.to_string_lossy().to_string())?.list(); match tokio_file.read_exact(&mut initial_bytes).await {
if let Ok(list) = list { Err(err) => {
for entry in list { warn!(error = %err, "failed to read initial bytes, skipping file");
if let Ok(entry) = entry { continue;
}
_ => {}
}
let kind = infer::get(&initial_bytes).expect("unknown file type of file download");
info!(
mime_type = kind.mime_type(),
"inferred mime_type of downloaded archive"
);
match kind.mime_type() {
"application/vnd.rar" => {
info!(
"downloaded archive is RAR archive, attempt to uncompress entire archive"
);
// Use unrar to uncompress the entire .rar file to avoid bugs with compress_tools uncompressing certain .rar files:
// https://github.com/libarchive/libarchive/issues/373, https://github.com/libarchive/libarchive/issues/1426
tokio_file.seek(SeekFrom::Start(0)).await?;
let mut file = tokio_file.try_clone().await?.into_std().await;
let temp_dir = tempdir()?;
let temp_file_path = temp_dir.path().join("download.rar");
let mut temp_file = std::fs::File::create(&temp_file_path)?;
std::io::copy(&mut file, &mut temp_file)?;
let mut plugin_file_paths = Vec::new();
let list = Archive::new(&temp_file_path.to_string_lossy().to_string())?.list();
if let Ok(list) = list {
for entry in list {
if let Ok(entry) = entry {
if let Some(extension) = entry.filename.extension() {
if entry.is_file() if entry.is_file()
&& (entry.filename.ends_with(".esp") && (extension == "esp"
|| entry.filename.ends_with(".esm") || extension == "esm"
|| entry.filename.ends_with(".esl")) || extension == "esl")
{ {
plugin_file_paths.push(entry.filename); plugin_file_paths.push(entry.filename);
} }
} }
} }
} else {
if !checked_metadata {
warn!("failed to read archive and server has no metadata, skipping file");
continue;
} else {
error!("failed to read archive, but server had metadata");
panic!("failed to read archive, but server had metadata");
}
} }
info!( } else {
num_plugin_files = plugin_file_paths.len(), warn!("failed to read archive, skipping file");
"listed plugins in downloaded archive" continue;
);
if plugin_file_paths.len() > 0 {
info!("uncompressing downloaded archive");
let extract =
Archive::new(&temp_file_path.to_string_lossy().to_string())?
.extract_to(temp_dir.path().to_string_lossy().to_string());
extract
.expect("failed to extract")
.process()
.expect("failed to extract");
for file_path in plugin_file_paths.iter() {
info!(
?file_path,
"processing uncompressed file from downloaded archive"
);
let mut plugin_buf =
std::fs::read(temp_dir.path().join(file_path))?;
process_plugin(
&mut plugin_buf,
&pool,
&mut plugins_archive,
&db_file,
&db_mod,
&file_path.to_string_lossy(),
)
.await?;
}
}
temp_dir.close()?;
} }
_ => { info!(
tokio_file.seek(SeekFrom::Start(0)).await?; num_plugin_files = plugin_file_paths.len(),
let mut file = tokio_file.try_clone().await?.into_std().await; "listed plugins in downloaded archive"
let mut plugin_file_paths = Vec::new(); );
let archive_files = match list_archive_files(&file) { if plugin_file_paths.len() > 0 {
Ok(files) => Ok(files), info!("uncompressing downloaded archive");
Err(err) => { let extract = Archive::new(&temp_file_path.to_string_lossy().to_string())?
if !checked_metadata { .extract_to(temp_dir.path().to_string_lossy().to_string());
warn!(error = %err, "failed to read archive and server has no metadata, skipping file"); extract
continue; .expect("failed to extract")
} else { .process()
error!(error = %err, "failed to read archive, but server had metadata"); .expect("failed to extract");
Err(err)
}
}
}?;
for file_path in archive_files {
if file_path.ends_with(".esp")
|| file_path.ends_with(".esm")
|| file_path.ends_with(".esl")
{
plugin_file_paths.push(file_path);
}
}
info!(
num_plugin_files = plugin_file_paths.len(),
"listed plugins in downloaded archive"
);
for file_path in plugin_file_paths.iter() { for file_path in plugin_file_paths.iter() {
let plugin_span = info_span!("plugin", name = ?file_path); info!(
let plugin_span = plugin_span.enter(); ?file_path,
file.seek(SeekFrom::Start(0))?; "processing uncompressed file from downloaded archive"
let mut buf = Vec::default(); );
info!("uncompressing plugin file from downloaded archive"); let mut plugin_buf = std::fs::read(temp_dir.path().join(file_path))?;
match uncompress_archive_file(&mut file, &mut buf, file_path) {
Ok(_) => Ok(()),
Err(err) => {
if kind.mime_type() == "application/zip" {
// compress_tools or libarchive failed to extract zip file (e.g. archive is deflate64 compressed)
// Attempt to uncompress the archive using `unzip` unix command instead
warn!(error = %err, "failed to extract file with compress_tools, extracting whole archive with unzip instead");
drop(plugin_span);
file.seek(SeekFrom::Start(0))?;
let temp_dir = tempdir()?;
let temp_file_path = temp_dir
.path()
.join(format!("download.{}", kind.extension()));
let mut temp_file = std::fs::File::create(&temp_file_path)?;
std::io::copy(&mut file, &mut temp_file)?;
let extracted_path = temp_dir.path().join("extracted");
Command::new("unzip")
.args(&[
&temp_file_path.to_string_lossy(),
"-d",
&extracted_path.to_string_lossy(),
])
.status()?;
for file_path in plugin_file_paths.iter() {
let plugin_span =
info_span!("plugin", name = ?file_path);
let _plugin_span = plugin_span.enter();
info!("processing uncompressed file from downloaded archive");
let mut plugin_buf =
std::fs::read(extracted_path.join(file_path))?;
process_plugin(
&mut plugin_buf,
&pool,
&mut plugins_archive,
&db_file,
&db_mod,
file_path,
)
.await?;
}
break;
}
Err(err)
}
}?;
process_plugin( process_plugin(
&mut buf, &mut plugin_buf,
&pool, &pool,
&mut plugins_archive, &mut plugins_archive,
&db_file, &db_file,
&db_mod, &db_mod,
file_path, &file_path.to_string_lossy(),
) )
.await?; .await?;
} }
} }
temp_dir.close()?;
} }
_ => {
tokio_file.seek(SeekFrom::Start(0)).await?;
let mut file = tokio_file.try_clone().await?.into_std().await;
let mut plugin_file_paths = Vec::new();
plugins_archive.finish()?; let archive_files = match list_archive_files(&file) {
debug!(duration = ?download_link_resp.wait, "sleeping"); Ok(files) => files,
sleep(download_link_resp.wait).await; Err(err) => {
warn!(error = %err, "failed to read archive, skipping file");
continue;
}
};
for file_path in archive_files {
if file_path.ends_with(".esp")
|| file_path.ends_with(".esm")
|| file_path.ends_with(".esl")
{
plugin_file_paths.push(file_path);
}
}
info!(
num_plugin_files = plugin_file_paths.len(),
"listed plugins in downloaded archive"
);
for file_path in plugin_file_paths.iter() {
let plugin_span = info_span!("plugin", name = ?file_path);
let plugin_span = plugin_span.enter();
file.seek(SeekFrom::Start(0))?;
let mut buf = Vec::default();
info!("uncompressing plugin file from downloaded archive");
match uncompress_archive_file(&mut file, &mut buf, file_path) {
Ok(_) => Ok(()),
Err(err) => {
if kind.mime_type() == "application/zip" {
// compress_tools or libarchive failed to extract zip file (e.g. archive is deflate64 compressed)
// Attempt to uncompress the archive using `unzip` unix command instead
warn!(error = %err, "failed to extract file with compress_tools, extracting whole archive with unzip instead");
drop(plugin_span);
file.seek(SeekFrom::Start(0))?;
let temp_dir = tempdir()?;
let temp_file_path = temp_dir
.path()
.join(format!("download.{}", kind.extension()));
let mut temp_file = std::fs::File::create(&temp_file_path)?;
std::io::copy(&mut file, &mut temp_file)?;
let extracted_path = temp_dir.path().join("extracted");
Command::new("unzip")
.args(&[
&temp_file_path.to_string_lossy(),
"-d",
&extracted_path.to_string_lossy(),
])
.status()?;
for file_path in plugin_file_paths.iter() {
let plugin_span = info_span!("plugin", name = ?file_path);
let _plugin_span = plugin_span.enter();
info!(
"processing uncompressed file from downloaded archive"
);
let mut plugin_buf =
std::fs::read(extracted_path.join(file_path))?;
process_plugin(
&mut plugin_buf,
&pool,
&mut plugins_archive,
&db_file,
&db_mod,
file_path,
)
.await?;
}
break;
}
Err(err)
}
}?;
process_plugin(
&mut buf,
&pool,
&mut plugins_archive,
&db_file,
&db_mod,
file_path,
)
.await?;
}
}
} }
plugins_archive.finish()?;
debug!(duration = ?download_link_resp.wait, "sleeping");
sleep(download_link_resp.wait).await;
} }
// }
page += 1; page += 1;
debug!(?page, ?has_next_page, "sleeping 1 second"); debug!(?page, ?has_next_page, "sleeping 1 second");

View File

@ -91,3 +91,14 @@ pub async fn update_has_download_link(
.await .await
.context("Failed to update file") .context("Failed to update file")
} }
#[instrument(level = "debug", skip(pool))]
pub async fn get_files_that_need_backfill(pool: &sqlx::Pool<sqlx::Postgres>) -> Result<Vec<File>> {
sqlx::query_as!(
File,
"SELECT * FROM files WHERE created_at >= '2021-07-24 02:07:25' and file_name like '%.rar'",
)
.fetch_all(pool)
.await
.context("Failed to get files")
}

View File

@ -136,3 +136,11 @@ pub async fn batched_insert<'a>(
} }
Ok(saved_mods) Ok(saved_mods)
} }
#[instrument(level = "debug", skip(pool))]
pub async fn get(pool: &sqlx::Pool<sqlx::Postgres>, id: i32) -> Result<Option<Mod>> {
sqlx::query_as!(Mod, "SELECT * FROM mods WHERE id = $1", id)
.fetch_optional(pool)
.await
.context("Failed to get mod")
}