Fix plugin listing for rar files, temporary backfil shim
`main` is just backfilling the eixsting bad rar files in the db. I will return script to normal once it's run.
This commit is contained in:
parent
87ca90be06
commit
5d62fb7d61
221
src/main.rs
221
src/main.rs
@ -222,98 +222,105 @@ pub async fn main() -> Result<()> {
|
|||||||
while has_next_page {
|
while has_next_page {
|
||||||
let page_span = info_span!("page", page);
|
let page_span = info_span!("page", page);
|
||||||
let _page_span = page_span.enter();
|
let _page_span = page_span.enter();
|
||||||
let mod_list_resp = nexus_scraper::get_mod_list_page(&client, page).await?;
|
// let mod_list_resp = nexus_scraper::get_mod_list_page(&client, page).await?;
|
||||||
let scraped = mod_list_resp.scrape_mods()?;
|
// let scraped = mod_list_resp.scrape_mods()?;
|
||||||
|
|
||||||
has_next_page = scraped.has_next_page;
|
// TODO: delete
|
||||||
let present_mods = game_mod::bulk_get_present_nexus_mod_ids(
|
has_next_page = false;
|
||||||
&pool,
|
// has_next_page = scraped.has_next_page;
|
||||||
&scraped
|
// let present_mods = game_mod::bulk_get_present_nexus_mod_ids(
|
||||||
.mods
|
// &pool,
|
||||||
.iter()
|
// &scraped
|
||||||
.map(|scraped_mod| scraped_mod.nexus_mod_id)
|
// .mods
|
||||||
.collect::<Vec<i32>>(),
|
// .iter()
|
||||||
)
|
// .map(|scraped_mod| scraped_mod.nexus_mod_id)
|
||||||
.await?;
|
// .collect::<Vec<i32>>(),
|
||||||
let mods_to_create: Vec<UnsavedMod> = scraped
|
// )
|
||||||
.mods
|
// .await?;
|
||||||
.iter()
|
// let mods_to_create: Vec<UnsavedMod> = scraped
|
||||||
.filter(|scraped_mod| !present_mods.contains(&scraped_mod.nexus_mod_id))
|
// .mods
|
||||||
.map(|scraped_mod| UnsavedMod {
|
// .iter()
|
||||||
name: scraped_mod.name,
|
// .filter(|scraped_mod| !present_mods.contains(&scraped_mod.nexus_mod_id))
|
||||||
nexus_mod_id: scraped_mod.nexus_mod_id,
|
// .map(|scraped_mod| UnsavedMod {
|
||||||
author: scraped_mod.author,
|
// name: scraped_mod.name,
|
||||||
category: scraped_mod.category,
|
// nexus_mod_id: scraped_mod.nexus_mod_id,
|
||||||
description: scraped_mod.desc,
|
// author: scraped_mod.author,
|
||||||
game_id: game.id,
|
// category: scraped_mod.category,
|
||||||
})
|
// description: scraped_mod.desc,
|
||||||
.collect();
|
// game_id: game.id,
|
||||||
|
// })
|
||||||
|
// .collect();
|
||||||
|
|
||||||
let mods = game_mod::batched_insert(&pool, &mods_to_create).await?;
|
// let mods = game_mod::batched_insert(&pool, &mods_to_create).await?;
|
||||||
|
|
||||||
for db_mod in mods {
|
for db_file in file::get_files_that_need_backfill(&pool).await? {
|
||||||
|
let db_mod = game_mod::get(&pool, db_file.mod_id)
|
||||||
|
.await?
|
||||||
|
.expect("a mod to exist for file");
|
||||||
let mod_span = info_span!("mod", name = ?&db_mod.name, id = &db_mod.nexus_mod_id);
|
let mod_span = info_span!("mod", name = ?&db_mod.name, id = &db_mod.nexus_mod_id);
|
||||||
let _mod_span = mod_span.enter();
|
let _mod_span = mod_span.enter();
|
||||||
let files_resp = nexus_api::files::get(&client, db_mod.nexus_mod_id).await?;
|
// let files_resp = nexus_api::files::get(&client, db_mod.nexus_mod_id).await?;
|
||||||
|
|
||||||
debug!(duration = ?files_resp.wait, "sleeping");
|
// debug!(duration = ?files_resp.wait, "sleeping");
|
||||||
sleep(files_resp.wait).await;
|
// sleep(files_resp.wait).await;
|
||||||
|
|
||||||
// Filter out replaced/deleted files (indicated by null category) and archived files
|
// Filter out replaced/deleted files (indicated by null category) and archived files
|
||||||
let files = files_resp
|
// let files = files_resp
|
||||||
.files()?
|
// .files()?
|
||||||
.into_iter()
|
// .into_iter()
|
||||||
.filter(|file| match file.category {
|
// .filter(|file| match file.category {
|
||||||
None => {
|
// None => {
|
||||||
info!(
|
// info!(
|
||||||
name = file.file_name,
|
// name = file.file_name,
|
||||||
id = file.file_id,
|
// id = file.file_id,
|
||||||
"skipping file with no category"
|
// "skipping file with no category"
|
||||||
);
|
// );
|
||||||
false
|
// false
|
||||||
}
|
// }
|
||||||
Some(category) if category == "ARCHIVED" => false,
|
// Some(category) if category == "ARCHIVED" => false,
|
||||||
Some(_) => true,
|
// Some(_) => true,
|
||||||
});
|
// });
|
||||||
|
|
||||||
for api_file in files {
|
// for api_file in files {
|
||||||
let file_span =
|
let file_span = info_span!("file", id = &db_file.nexus_file_id);
|
||||||
info_span!("file", name = &api_file.file_name, id = &api_file.file_id);
|
|
||||||
let _file_span = file_span.enter();
|
let _file_span = file_span.enter();
|
||||||
let db_file = file::insert(
|
// let db_file = file::insert(
|
||||||
&pool,
|
// &pool,
|
||||||
api_file.name,
|
// api_file.name,
|
||||||
api_file.file_name,
|
// api_file.file_name,
|
||||||
api_file.file_id as i32,
|
// api_file.file_id as i32,
|
||||||
db_mod.id,
|
// db_mod.id,
|
||||||
api_file.category,
|
// api_file.category,
|
||||||
api_file.version,
|
// api_file.version,
|
||||||
api_file.mod_version,
|
// api_file.mod_version,
|
||||||
api_file.size,
|
// api_file.size,
|
||||||
api_file.uploaded_at,
|
// api_file.uploaded_at,
|
||||||
|
// )
|
||||||
|
// .await?;
|
||||||
|
|
||||||
|
// let mut checked_metadata = false;
|
||||||
|
// match nexus_api::metadata::contains_plugin(&client, &api_file).await {
|
||||||
|
// Ok(contains_plugin) => {
|
||||||
|
// if let Some(contains_plugin) = contains_plugin {
|
||||||
|
// checked_metadata = true;
|
||||||
|
// if !contains_plugin {
|
||||||
|
// info!("file metadata does not contain a plugin, skip downloading");
|
||||||
|
// continue;
|
||||||
|
// }
|
||||||
|
// } else {
|
||||||
|
// warn!("file has no metadata link, continuing with download");
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// Err(err) => {
|
||||||
|
// warn!(error = %err, "error retreiving metadata for file, continuing with download");
|
||||||
|
// }
|
||||||
|
// };
|
||||||
|
|
||||||
|
let download_link_resp = nexus_api::download_link::get(
|
||||||
|
&client,
|
||||||
|
db_mod.nexus_mod_id,
|
||||||
|
db_file.nexus_file_id as i64,
|
||||||
)
|
)
|
||||||
.await?;
|
|
||||||
|
|
||||||
let mut checked_metadata = false;
|
|
||||||
match nexus_api::metadata::contains_plugin(&client, &api_file).await {
|
|
||||||
Ok(contains_plugin) => {
|
|
||||||
if let Some(contains_plugin) = contains_plugin {
|
|
||||||
checked_metadata = true;
|
|
||||||
if !contains_plugin {
|
|
||||||
info!("file metadata does not contain a plugin, skip downloading");
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
warn!("file has no metadata link, continuing with download");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Err(err) => {
|
|
||||||
warn!(error = %err, "error retreiving metadata for file, continuing with download");
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
let download_link_resp =
|
|
||||||
nexus_api::download_link::get(&client, db_mod.nexus_mod_id, api_file.file_id)
|
|
||||||
.await;
|
.await;
|
||||||
if let Err(err) = &download_link_resp {
|
if let Err(err) = &download_link_resp {
|
||||||
if let Some(reqwest_err) = err.downcast_ref::<reqwest::Error>() {
|
if let Some(reqwest_err) = err.downcast_ref::<reqwest::Error>() {
|
||||||
@ -329,7 +336,7 @@ pub async fn main() -> Result<()> {
|
|||||||
}
|
}
|
||||||
let download_link_resp = download_link_resp?;
|
let download_link_resp = download_link_resp?;
|
||||||
let mut tokio_file = download_link_resp.download_file(&client).await?;
|
let mut tokio_file = download_link_resp.download_file(&client).await?;
|
||||||
info!(bytes = api_file.size, "download finished");
|
info!("download finished");
|
||||||
|
|
||||||
initialize_plugins_archive(db_mod.nexus_mod_id, db_file.nexus_file_id)?;
|
initialize_plugins_archive(db_mod.nexus_mod_id, db_file.nexus_file_id)?;
|
||||||
let mut plugins_archive = ZipWriter::new_append(
|
let mut plugins_archive = ZipWriter::new_append(
|
||||||
@ -356,7 +363,9 @@ pub async fn main() -> Result<()> {
|
|||||||
|
|
||||||
match kind.mime_type() {
|
match kind.mime_type() {
|
||||||
"application/vnd.rar" => {
|
"application/vnd.rar" => {
|
||||||
info!("downloaded archive is RAR archive, attempt to uncompress entire archive");
|
info!(
|
||||||
|
"downloaded archive is RAR archive, attempt to uncompress entire archive"
|
||||||
|
);
|
||||||
// Use unrar to uncompress the entire .rar file to avoid bugs with compress_tools uncompressing certain .rar files:
|
// Use unrar to uncompress the entire .rar file to avoid bugs with compress_tools uncompressing certain .rar files:
|
||||||
// https://github.com/libarchive/libarchive/issues/373, https://github.com/libarchive/libarchive/issues/1426
|
// https://github.com/libarchive/libarchive/issues/373, https://github.com/libarchive/libarchive/issues/1426
|
||||||
tokio_file.seek(SeekFrom::Start(0)).await?;
|
tokio_file.seek(SeekFrom::Start(0)).await?;
|
||||||
@ -367,28 +376,24 @@ pub async fn main() -> Result<()> {
|
|||||||
std::io::copy(&mut file, &mut temp_file)?;
|
std::io::copy(&mut file, &mut temp_file)?;
|
||||||
|
|
||||||
let mut plugin_file_paths = Vec::new();
|
let mut plugin_file_paths = Vec::new();
|
||||||
let list =
|
let list = Archive::new(&temp_file_path.to_string_lossy().to_string())?.list();
|
||||||
Archive::new(&temp_file_path.to_string_lossy().to_string())?.list();
|
|
||||||
if let Ok(list) = list {
|
if let Ok(list) = list {
|
||||||
for entry in list {
|
for entry in list {
|
||||||
if let Ok(entry) = entry {
|
if let Ok(entry) = entry {
|
||||||
|
if let Some(extension) = entry.filename.extension() {
|
||||||
if entry.is_file()
|
if entry.is_file()
|
||||||
&& (entry.filename.ends_with(".esp")
|
&& (extension == "esp"
|
||||||
|| entry.filename.ends_with(".esm")
|
|| extension == "esm"
|
||||||
|| entry.filename.ends_with(".esl"))
|
|| extension == "esl")
|
||||||
{
|
{
|
||||||
plugin_file_paths.push(entry.filename);
|
plugin_file_paths.push(entry.filename);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
if !checked_metadata {
|
|
||||||
warn!("failed to read archive and server has no metadata, skipping file");
|
|
||||||
continue;
|
|
||||||
} else {
|
|
||||||
error!("failed to read archive, but server had metadata");
|
|
||||||
panic!("failed to read archive, but server had metadata");
|
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
warn!("failed to read archive, skipping file");
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
info!(
|
info!(
|
||||||
num_plugin_files = plugin_file_paths.len(),
|
num_plugin_files = plugin_file_paths.len(),
|
||||||
@ -397,8 +402,7 @@ pub async fn main() -> Result<()> {
|
|||||||
|
|
||||||
if plugin_file_paths.len() > 0 {
|
if plugin_file_paths.len() > 0 {
|
||||||
info!("uncompressing downloaded archive");
|
info!("uncompressing downloaded archive");
|
||||||
let extract =
|
let extract = Archive::new(&temp_file_path.to_string_lossy().to_string())?
|
||||||
Archive::new(&temp_file_path.to_string_lossy().to_string())?
|
|
||||||
.extract_to(temp_dir.path().to_string_lossy().to_string());
|
.extract_to(temp_dir.path().to_string_lossy().to_string());
|
||||||
extract
|
extract
|
||||||
.expect("failed to extract")
|
.expect("failed to extract")
|
||||||
@ -410,8 +414,7 @@ pub async fn main() -> Result<()> {
|
|||||||
?file_path,
|
?file_path,
|
||||||
"processing uncompressed file from downloaded archive"
|
"processing uncompressed file from downloaded archive"
|
||||||
);
|
);
|
||||||
let mut plugin_buf =
|
let mut plugin_buf = std::fs::read(temp_dir.path().join(file_path))?;
|
||||||
std::fs::read(temp_dir.path().join(file_path))?;
|
|
||||||
process_plugin(
|
process_plugin(
|
||||||
&mut plugin_buf,
|
&mut plugin_buf,
|
||||||
&pool,
|
&pool,
|
||||||
@ -431,17 +434,12 @@ pub async fn main() -> Result<()> {
|
|||||||
let mut plugin_file_paths = Vec::new();
|
let mut plugin_file_paths = Vec::new();
|
||||||
|
|
||||||
let archive_files = match list_archive_files(&file) {
|
let archive_files = match list_archive_files(&file) {
|
||||||
Ok(files) => Ok(files),
|
Ok(files) => files,
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
if !checked_metadata {
|
warn!(error = %err, "failed to read archive, skipping file");
|
||||||
warn!(error = %err, "failed to read archive and server has no metadata, skipping file");
|
|
||||||
continue;
|
continue;
|
||||||
} else {
|
|
||||||
error!(error = %err, "failed to read archive, but server had metadata");
|
|
||||||
Err(err)
|
|
||||||
}
|
}
|
||||||
}
|
};
|
||||||
}?;
|
|
||||||
for file_path in archive_files {
|
for file_path in archive_files {
|
||||||
if file_path.ends_with(".esp")
|
if file_path.ends_with(".esp")
|
||||||
|| file_path.ends_with(".esm")
|
|| file_path.ends_with(".esm")
|
||||||
@ -487,10 +485,11 @@ pub async fn main() -> Result<()> {
|
|||||||
.status()?;
|
.status()?;
|
||||||
|
|
||||||
for file_path in plugin_file_paths.iter() {
|
for file_path in plugin_file_paths.iter() {
|
||||||
let plugin_span =
|
let plugin_span = info_span!("plugin", name = ?file_path);
|
||||||
info_span!("plugin", name = ?file_path);
|
|
||||||
let _plugin_span = plugin_span.enter();
|
let _plugin_span = plugin_span.enter();
|
||||||
info!("processing uncompressed file from downloaded archive");
|
info!(
|
||||||
|
"processing uncompressed file from downloaded archive"
|
||||||
|
);
|
||||||
let mut plugin_buf =
|
let mut plugin_buf =
|
||||||
std::fs::read(extracted_path.join(file_path))?;
|
std::fs::read(extracted_path.join(file_path))?;
|
||||||
process_plugin(
|
process_plugin(
|
||||||
@ -526,7 +525,7 @@ pub async fn main() -> Result<()> {
|
|||||||
debug!(duration = ?download_link_resp.wait, "sleeping");
|
debug!(duration = ?download_link_resp.wait, "sleeping");
|
||||||
sleep(download_link_resp.wait).await;
|
sleep(download_link_resp.wait).await;
|
||||||
}
|
}
|
||||||
}
|
// }
|
||||||
|
|
||||||
page += 1;
|
page += 1;
|
||||||
debug!(?page, ?has_next_page, "sleeping 1 second");
|
debug!(?page, ?has_next_page, "sleeping 1 second");
|
||||||
|
@ -91,3 +91,14 @@ pub async fn update_has_download_link(
|
|||||||
.await
|
.await
|
||||||
.context("Failed to update file")
|
.context("Failed to update file")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[instrument(level = "debug", skip(pool))]
|
||||||
|
pub async fn get_files_that_need_backfill(pool: &sqlx::Pool<sqlx::Postgres>) -> Result<Vec<File>> {
|
||||||
|
sqlx::query_as!(
|
||||||
|
File,
|
||||||
|
"SELECT * FROM files WHERE created_at >= '2021-07-24 02:07:25' and file_name like '%.rar'",
|
||||||
|
)
|
||||||
|
.fetch_all(pool)
|
||||||
|
.await
|
||||||
|
.context("Failed to get files")
|
||||||
|
}
|
||||||
|
@ -136,3 +136,11 @@ pub async fn batched_insert<'a>(
|
|||||||
}
|
}
|
||||||
Ok(saved_mods)
|
Ok(saved_mods)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[instrument(level = "debug", skip(pool))]
|
||||||
|
pub async fn get(pool: &sqlx::Pool<sqlx::Postgres>, id: i32) -> Result<Option<Mod>> {
|
||||||
|
sqlx::query_as!(Mod, "SELECT * FROM mods WHERE id = $1", id)
|
||||||
|
.fetch_optional(pool)
|
||||||
|
.await
|
||||||
|
.context("Failed to get mod")
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user