diff --git a/src/main.rs b/src/main.rs index 17b733f..c2f7cdf 100644 --- a/src/main.rs +++ b/src/main.rs @@ -222,311 +222,310 @@ pub async fn main() -> Result<()> { while has_next_page { let page_span = info_span!("page", page); let _page_span = page_span.enter(); - let mod_list_resp = nexus_scraper::get_mod_list_page(&client, page).await?; - let scraped = mod_list_resp.scrape_mods()?; + // let mod_list_resp = nexus_scraper::get_mod_list_page(&client, page).await?; + // let scraped = mod_list_resp.scrape_mods()?; - has_next_page = scraped.has_next_page; - let present_mods = game_mod::bulk_get_present_nexus_mod_ids( - &pool, - &scraped - .mods - .iter() - .map(|scraped_mod| scraped_mod.nexus_mod_id) - .collect::>(), - ) - .await?; - let mods_to_create: Vec = scraped - .mods - .iter() - .filter(|scraped_mod| !present_mods.contains(&scraped_mod.nexus_mod_id)) - .map(|scraped_mod| UnsavedMod { - name: scraped_mod.name, - nexus_mod_id: scraped_mod.nexus_mod_id, - author: scraped_mod.author, - category: scraped_mod.category, - description: scraped_mod.desc, - game_id: game.id, - }) - .collect(); + // TODO: delete + has_next_page = false; + // has_next_page = scraped.has_next_page; + // let present_mods = game_mod::bulk_get_present_nexus_mod_ids( + // &pool, + // &scraped + // .mods + // .iter() + // .map(|scraped_mod| scraped_mod.nexus_mod_id) + // .collect::>(), + // ) + // .await?; + // let mods_to_create: Vec = scraped + // .mods + // .iter() + // .filter(|scraped_mod| !present_mods.contains(&scraped_mod.nexus_mod_id)) + // .map(|scraped_mod| UnsavedMod { + // name: scraped_mod.name, + // nexus_mod_id: scraped_mod.nexus_mod_id, + // author: scraped_mod.author, + // category: scraped_mod.category, + // description: scraped_mod.desc, + // game_id: game.id, + // }) + // .collect(); - let mods = game_mod::batched_insert(&pool, &mods_to_create).await?; + // let mods = game_mod::batched_insert(&pool, &mods_to_create).await?; - for db_mod in mods { + for db_file in file::get_files_that_need_backfill(&pool).await? { + let db_mod = game_mod::get(&pool, db_file.mod_id) + .await? + .expect("a mod to exist for file"); let mod_span = info_span!("mod", name = ?&db_mod.name, id = &db_mod.nexus_mod_id); let _mod_span = mod_span.enter(); - let files_resp = nexus_api::files::get(&client, db_mod.nexus_mod_id).await?; + // let files_resp = nexus_api::files::get(&client, db_mod.nexus_mod_id).await?; - debug!(duration = ?files_resp.wait, "sleeping"); - sleep(files_resp.wait).await; + // debug!(duration = ?files_resp.wait, "sleeping"); + // sleep(files_resp.wait).await; // Filter out replaced/deleted files (indicated by null category) and archived files - let files = files_resp - .files()? - .into_iter() - .filter(|file| match file.category { - None => { - info!( - name = file.file_name, - id = file.file_id, - "skipping file with no category" + // let files = files_resp + // .files()? + // .into_iter() + // .filter(|file| match file.category { + // None => { + // info!( + // name = file.file_name, + // id = file.file_id, + // "skipping file with no category" + // ); + // false + // } + // Some(category) if category == "ARCHIVED" => false, + // Some(_) => true, + // }); + + // for api_file in files { + let file_span = info_span!("file", id = &db_file.nexus_file_id); + let _file_span = file_span.enter(); + // let db_file = file::insert( + // &pool, + // api_file.name, + // api_file.file_name, + // api_file.file_id as i32, + // db_mod.id, + // api_file.category, + // api_file.version, + // api_file.mod_version, + // api_file.size, + // api_file.uploaded_at, + // ) + // .await?; + + // let mut checked_metadata = false; + // match nexus_api::metadata::contains_plugin(&client, &api_file).await { + // Ok(contains_plugin) => { + // if let Some(contains_plugin) = contains_plugin { + // checked_metadata = true; + // if !contains_plugin { + // info!("file metadata does not contain a plugin, skip downloading"); + // continue; + // } + // } else { + // warn!("file has no metadata link, continuing with download"); + // } + // } + // Err(err) => { + // warn!(error = %err, "error retreiving metadata for file, continuing with download"); + // } + // }; + + let download_link_resp = nexus_api::download_link::get( + &client, + db_mod.nexus_mod_id, + db_file.nexus_file_id as i64, + ) + .await; + if let Err(err) = &download_link_resp { + if let Some(reqwest_err) = err.downcast_ref::() { + if reqwest_err.status() == Some(StatusCode::NOT_FOUND) { + warn!( + status = ?reqwest_err.status(), + "failed to get download link for file" ); - false - } - Some(category) if category == "ARCHIVED" => false, - Some(_) => true, - }); - - for api_file in files { - let file_span = - info_span!("file", name = &api_file.file_name, id = &api_file.file_id); - let _file_span = file_span.enter(); - let db_file = file::insert( - &pool, - api_file.name, - api_file.file_name, - api_file.file_id as i32, - db_mod.id, - api_file.category, - api_file.version, - api_file.mod_version, - api_file.size, - api_file.uploaded_at, - ) - .await?; - - let mut checked_metadata = false; - match nexus_api::metadata::contains_plugin(&client, &api_file).await { - Ok(contains_plugin) => { - if let Some(contains_plugin) = contains_plugin { - checked_metadata = true; - if !contains_plugin { - info!("file metadata does not contain a plugin, skip downloading"); - continue; - } - } else { - warn!("file has no metadata link, continuing with download"); - } - } - Err(err) => { - warn!(error = %err, "error retreiving metadata for file, continuing with download"); - } - }; - - let download_link_resp = - nexus_api::download_link::get(&client, db_mod.nexus_mod_id, api_file.file_id) - .await; - if let Err(err) = &download_link_resp { - if let Some(reqwest_err) = err.downcast_ref::() { - if reqwest_err.status() == Some(StatusCode::NOT_FOUND) { - warn!( - status = ?reqwest_err.status(), - "failed to get download link for file" - ); - file::update_has_download_link(&pool, db_file.id, false).await?; - continue; - } - } - } - let download_link_resp = download_link_resp?; - let mut tokio_file = download_link_resp.download_file(&client).await?; - info!(bytes = api_file.size, "download finished"); - - initialize_plugins_archive(db_mod.nexus_mod_id, db_file.nexus_file_id)?; - let mut plugins_archive = ZipWriter::new_append( - OpenOptions::new() - .read(true) - .write(true) - .open("plugins.zip")?, - )?; - - let mut initial_bytes = [0; 8]; - tokio_file.seek(SeekFrom::Start(0)).await?; - match tokio_file.read_exact(&mut initial_bytes).await { - Err(err) => { - warn!(error = %err, "failed to read initial bytes, skipping file"); + file::update_has_download_link(&pool, db_file.id, false).await?; continue; } - _ => {} } - let kind = infer::get(&initial_bytes).expect("unknown file type of file download"); - info!( - mime_type = kind.mime_type(), - "inferred mime_type of downloaded archive" - ); + } + let download_link_resp = download_link_resp?; + let mut tokio_file = download_link_resp.download_file(&client).await?; + info!("download finished"); - match kind.mime_type() { - "application/vnd.rar" => { - info!("downloaded archive is RAR archive, attempt to uncompress entire archive"); - // Use unrar to uncompress the entire .rar file to avoid bugs with compress_tools uncompressing certain .rar files: - // https://github.com/libarchive/libarchive/issues/373, https://github.com/libarchive/libarchive/issues/1426 - tokio_file.seek(SeekFrom::Start(0)).await?; - let mut file = tokio_file.try_clone().await?.into_std().await; - let temp_dir = tempdir()?; - let temp_file_path = temp_dir.path().join("download.rar"); - let mut temp_file = std::fs::File::create(&temp_file_path)?; - std::io::copy(&mut file, &mut temp_file)?; + initialize_plugins_archive(db_mod.nexus_mod_id, db_file.nexus_file_id)?; + let mut plugins_archive = ZipWriter::new_append( + OpenOptions::new() + .read(true) + .write(true) + .open("plugins.zip")?, + )?; - let mut plugin_file_paths = Vec::new(); - let list = - Archive::new(&temp_file_path.to_string_lossy().to_string())?.list(); - if let Ok(list) = list { - for entry in list { - if let Ok(entry) = entry { + let mut initial_bytes = [0; 8]; + tokio_file.seek(SeekFrom::Start(0)).await?; + match tokio_file.read_exact(&mut initial_bytes).await { + Err(err) => { + warn!(error = %err, "failed to read initial bytes, skipping file"); + continue; + } + _ => {} + } + let kind = infer::get(&initial_bytes).expect("unknown file type of file download"); + info!( + mime_type = kind.mime_type(), + "inferred mime_type of downloaded archive" + ); + + match kind.mime_type() { + "application/vnd.rar" => { + info!( + "downloaded archive is RAR archive, attempt to uncompress entire archive" + ); + // Use unrar to uncompress the entire .rar file to avoid bugs with compress_tools uncompressing certain .rar files: + // https://github.com/libarchive/libarchive/issues/373, https://github.com/libarchive/libarchive/issues/1426 + tokio_file.seek(SeekFrom::Start(0)).await?; + let mut file = tokio_file.try_clone().await?.into_std().await; + let temp_dir = tempdir()?; + let temp_file_path = temp_dir.path().join("download.rar"); + let mut temp_file = std::fs::File::create(&temp_file_path)?; + std::io::copy(&mut file, &mut temp_file)?; + + let mut plugin_file_paths = Vec::new(); + let list = Archive::new(&temp_file_path.to_string_lossy().to_string())?.list(); + if let Ok(list) = list { + for entry in list { + if let Ok(entry) = entry { + if let Some(extension) = entry.filename.extension() { if entry.is_file() - && (entry.filename.ends_with(".esp") - || entry.filename.ends_with(".esm") - || entry.filename.ends_with(".esl")) + && (extension == "esp" + || extension == "esm" + || extension == "esl") { plugin_file_paths.push(entry.filename); } } } - } else { - if !checked_metadata { - warn!("failed to read archive and server has no metadata, skipping file"); - continue; - } else { - error!("failed to read archive, but server had metadata"); - panic!("failed to read archive, but server had metadata"); - } } - info!( - num_plugin_files = plugin_file_paths.len(), - "listed plugins in downloaded archive" - ); - - if plugin_file_paths.len() > 0 { - info!("uncompressing downloaded archive"); - let extract = - Archive::new(&temp_file_path.to_string_lossy().to_string())? - .extract_to(temp_dir.path().to_string_lossy().to_string()); - extract - .expect("failed to extract") - .process() - .expect("failed to extract"); - - for file_path in plugin_file_paths.iter() { - info!( - ?file_path, - "processing uncompressed file from downloaded archive" - ); - let mut plugin_buf = - std::fs::read(temp_dir.path().join(file_path))?; - process_plugin( - &mut plugin_buf, - &pool, - &mut plugins_archive, - &db_file, - &db_mod, - &file_path.to_string_lossy(), - ) - .await?; - } - } - temp_dir.close()?; + } else { + warn!("failed to read archive, skipping file"); + continue; } - _ => { - tokio_file.seek(SeekFrom::Start(0)).await?; - let mut file = tokio_file.try_clone().await?.into_std().await; - let mut plugin_file_paths = Vec::new(); + info!( + num_plugin_files = plugin_file_paths.len(), + "listed plugins in downloaded archive" + ); - let archive_files = match list_archive_files(&file) { - Ok(files) => Ok(files), - Err(err) => { - if !checked_metadata { - warn!(error = %err, "failed to read archive and server has no metadata, skipping file"); - continue; - } else { - error!(error = %err, "failed to read archive, but server had metadata"); - Err(err) - } - } - }?; - for file_path in archive_files { - if file_path.ends_with(".esp") - || file_path.ends_with(".esm") - || file_path.ends_with(".esl") - { - plugin_file_paths.push(file_path); - } - } - info!( - num_plugin_files = plugin_file_paths.len(), - "listed plugins in downloaded archive" - ); + if plugin_file_paths.len() > 0 { + info!("uncompressing downloaded archive"); + let extract = Archive::new(&temp_file_path.to_string_lossy().to_string())? + .extract_to(temp_dir.path().to_string_lossy().to_string()); + extract + .expect("failed to extract") + .process() + .expect("failed to extract"); for file_path in plugin_file_paths.iter() { - let plugin_span = info_span!("plugin", name = ?file_path); - let plugin_span = plugin_span.enter(); - file.seek(SeekFrom::Start(0))?; - let mut buf = Vec::default(); - info!("uncompressing plugin file from downloaded archive"); - match uncompress_archive_file(&mut file, &mut buf, file_path) { - Ok(_) => Ok(()), - Err(err) => { - if kind.mime_type() == "application/zip" { - // compress_tools or libarchive failed to extract zip file (e.g. archive is deflate64 compressed) - // Attempt to uncompress the archive using `unzip` unix command instead - warn!(error = %err, "failed to extract file with compress_tools, extracting whole archive with unzip instead"); - drop(plugin_span); - file.seek(SeekFrom::Start(0))?; - let temp_dir = tempdir()?; - let temp_file_path = temp_dir - .path() - .join(format!("download.{}", kind.extension())); - let mut temp_file = std::fs::File::create(&temp_file_path)?; - std::io::copy(&mut file, &mut temp_file)?; - let extracted_path = temp_dir.path().join("extracted"); - - Command::new("unzip") - .args(&[ - &temp_file_path.to_string_lossy(), - "-d", - &extracted_path.to_string_lossy(), - ]) - .status()?; - - for file_path in plugin_file_paths.iter() { - let plugin_span = - info_span!("plugin", name = ?file_path); - let _plugin_span = plugin_span.enter(); - info!("processing uncompressed file from downloaded archive"); - let mut plugin_buf = - std::fs::read(extracted_path.join(file_path))?; - process_plugin( - &mut plugin_buf, - &pool, - &mut plugins_archive, - &db_file, - &db_mod, - file_path, - ) - .await?; - } - - break; - } - Err(err) - } - }?; + info!( + ?file_path, + "processing uncompressed file from downloaded archive" + ); + let mut plugin_buf = std::fs::read(temp_dir.path().join(file_path))?; process_plugin( - &mut buf, + &mut plugin_buf, &pool, &mut plugins_archive, &db_file, &db_mod, - file_path, + &file_path.to_string_lossy(), ) .await?; } } + temp_dir.close()?; } + _ => { + tokio_file.seek(SeekFrom::Start(0)).await?; + let mut file = tokio_file.try_clone().await?.into_std().await; + let mut plugin_file_paths = Vec::new(); - plugins_archive.finish()?; - debug!(duration = ?download_link_resp.wait, "sleeping"); - sleep(download_link_resp.wait).await; + let archive_files = match list_archive_files(&file) { + Ok(files) => files, + Err(err) => { + warn!(error = %err, "failed to read archive, skipping file"); + continue; + } + }; + for file_path in archive_files { + if file_path.ends_with(".esp") + || file_path.ends_with(".esm") + || file_path.ends_with(".esl") + { + plugin_file_paths.push(file_path); + } + } + info!( + num_plugin_files = plugin_file_paths.len(), + "listed plugins in downloaded archive" + ); + + for file_path in plugin_file_paths.iter() { + let plugin_span = info_span!("plugin", name = ?file_path); + let plugin_span = plugin_span.enter(); + file.seek(SeekFrom::Start(0))?; + let mut buf = Vec::default(); + info!("uncompressing plugin file from downloaded archive"); + match uncompress_archive_file(&mut file, &mut buf, file_path) { + Ok(_) => Ok(()), + Err(err) => { + if kind.mime_type() == "application/zip" { + // compress_tools or libarchive failed to extract zip file (e.g. archive is deflate64 compressed) + // Attempt to uncompress the archive using `unzip` unix command instead + warn!(error = %err, "failed to extract file with compress_tools, extracting whole archive with unzip instead"); + drop(plugin_span); + file.seek(SeekFrom::Start(0))?; + let temp_dir = tempdir()?; + let temp_file_path = temp_dir + .path() + .join(format!("download.{}", kind.extension())); + let mut temp_file = std::fs::File::create(&temp_file_path)?; + std::io::copy(&mut file, &mut temp_file)?; + let extracted_path = temp_dir.path().join("extracted"); + + Command::new("unzip") + .args(&[ + &temp_file_path.to_string_lossy(), + "-d", + &extracted_path.to_string_lossy(), + ]) + .status()?; + + for file_path in plugin_file_paths.iter() { + let plugin_span = info_span!("plugin", name = ?file_path); + let _plugin_span = plugin_span.enter(); + info!( + "processing uncompressed file from downloaded archive" + ); + let mut plugin_buf = + std::fs::read(extracted_path.join(file_path))?; + process_plugin( + &mut plugin_buf, + &pool, + &mut plugins_archive, + &db_file, + &db_mod, + file_path, + ) + .await?; + } + + break; + } + Err(err) + } + }?; + process_plugin( + &mut buf, + &pool, + &mut plugins_archive, + &db_file, + &db_mod, + file_path, + ) + .await?; + } + } } + + plugins_archive.finish()?; + debug!(duration = ?download_link_resp.wait, "sleeping"); + sleep(download_link_resp.wait).await; } + // } page += 1; debug!(?page, ?has_next_page, "sleeping 1 second"); diff --git a/src/models/file.rs b/src/models/file.rs index 494f5d9..34a239e 100644 --- a/src/models/file.rs +++ b/src/models/file.rs @@ -91,3 +91,14 @@ pub async fn update_has_download_link( .await .context("Failed to update file") } + +#[instrument(level = "debug", skip(pool))] +pub async fn get_files_that_need_backfill(pool: &sqlx::Pool) -> Result> { + sqlx::query_as!( + File, + "SELECT * FROM files WHERE created_at >= '2021-07-24 02:07:25' and file_name like '%.rar'", + ) + .fetch_all(pool) + .await + .context("Failed to get files") +} diff --git a/src/models/game_mod.rs b/src/models/game_mod.rs index 2cd3a8e..da0c419 100644 --- a/src/models/game_mod.rs +++ b/src/models/game_mod.rs @@ -136,3 +136,11 @@ pub async fn batched_insert<'a>( } Ok(saved_mods) } + +#[instrument(level = "debug", skip(pool))] +pub async fn get(pool: &sqlx::Pool, id: i32) -> Result> { + sqlx::query_as!(Mod, "SELECT * FROM mods WHERE id = $1", id) + .fetch_optional(pool) + .await + .context("Failed to get mod") +}