From d6b8f4e74a578ec7dfb624d353f30f2829527e1e Mon Sep 17 00:00:00 2001 From: Tyler Hallada Date: Sun, 13 Jun 2021 22:30:40 -0400 Subject: [PATCH] Mostly working download loop done Still need to fix a panic that happens on some .rar archive files. --- Cargo.lock | 4 +- Cargo.toml | 2 +- src/main.rs | 626 +++++++++++++++++++++++++++++----------------------- 3 files changed, 350 insertions(+), 282 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8cc8ebe..e37ffe1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1626,9 +1626,9 @@ checksum = "cbce6d4507c7e4a3962091436e56e95290cb71fa302d0d270e32130b75fbff27" [[package]] name = "skyrim-cell-dump" -version = "0.1.2" +version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "372b96816596c25ba82afdc4819aae92e3750c9f4d965aa99d46f25fe53bbc3f" +checksum = "b8ff27163eeca52326be9a89a4adc15dd7ed3d7c0c44dd981aa2bbacff10aede" dependencies = [ "anyhow", "bitflags", diff --git a/Cargo.toml b/Cargo.toml index ecfae74..be91e00 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,7 +22,7 @@ seahash = "4.1" serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" sqlx = { version = "0.5", features = ["runtime-tokio-native-tls", "postgres", "migrate", "chrono"] } -skyrim-cell-dump = "0.1.2" +skyrim-cell-dump = "0.1.3" tempfile = "3.2" tokio = { version = "1.5.0", features = ["full"] } tokio-util = { version = "0.6", features = ["compat"] } diff --git a/src/main.rs b/src/main.rs index 90331de..ede33e4 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,9 +1,13 @@ use anyhow::{anyhow, Context, Result}; +use chrono::DateTime; +use chrono::Duration; use chrono::NaiveDateTime; +use chrono::Utc; use compress_tools::{list_archive_files, uncompress_archive_file}; use dotenv::dotenv; use futures::future::try_join_all; use futures::stream::TryStreamExt; +use reqwest::Response; use scraper::{Html, Selector}; use serde::{Deserialize, Serialize}; use serde_json::Value; @@ -16,6 +20,7 @@ use std::io::Seek; use std::io::SeekFrom; use tempfile::tempfile; use tokio::io::{AsyncReadExt, AsyncSeekExt}; +use tokio::time::sleep; use tokio_util::compat::FuturesAsyncReadCompatExt; use zip::write::{FileOptions, ZipWriter}; @@ -223,7 +228,9 @@ async fn insert_cell( "INSERT INTO cells (form_id, x, y, is_persistent, created_at, updated_at) VALUES ($1, $2, $3, $4, now(), now()) - ON CONFLICT DO NOTHING + ON CONFLICT (form_id) DO UPDATE + SET (x, y, is_persistent, updated_at) = + (EXCLUDED.x, EXCLUDED.y, EXCLUDED.is_persistent, now()) RETURNING *", form_id, x, @@ -258,6 +265,38 @@ async fn insert_plugin_cell( .context("Failed to insert cell") } +fn rate_limit_wait_duration(res: &Response) -> Result> { + let daily_remaining = res + .headers() + .get("X-RL-Daily-Remaining") + .expect("No daily limit in response headers"); + let hourly_remaining = res + .headers() + .get("X-RL-Hourly-Remaining") + .expect("No hourly limit in response headers"); + let hourly_reset = res + .headers() + .get("X-RL-Hourly-Reset") + .expect("No hourly reset in response headers"); + dbg!(daily_remaining); + dbg!(hourly_remaining); + + if hourly_remaining == "0" { + let hourly_reset = hourly_reset.to_str()?.trim(); + let hourly_reset: DateTime = + (DateTime::parse_from_str(hourly_reset, "%Y-%m-%d %H:%M:%S %z")? + + Duration::seconds(5)) + .into(); + dbg!(hourly_reset); + let duration = (hourly_reset - Utc::now()).to_std()?; + dbg!(duration); + + return Ok(Some(duration)); + } + + Ok(None) +} + #[tokio::main] pub async fn main() -> Result<()> { dotenv().ok(); @@ -268,153 +307,99 @@ pub async fn main() -> Result<()> { let game = insert_game(&pool, GAME_NAME, GAME_ID as i32).await?; let client = reqwest::Client::new(); - let res = client - .get(format!( - "https://www.nexusmods.com/Core/Libs/Common/Widgets/ModList?RH_ModList=nav:true,home:false,type:0,user_id:0,game_id:{},advfilt:true,include_adult:true,page_size:80,show_game_filter:false,open:false,page:1,sort_by:OLD_u_downloads", - GAME_ID - )) - .send() - .await? - .error_for_status()?; - let html = res.text().await?; - let document = Html::parse_document(&html); - let mod_select = Selector::parse("div.mod-tile").expect("failed to parse CSS selector"); - let left_select = Selector::parse("div.mod-tile-left").expect("failed to parse CSS selector"); - let right_select = Selector::parse("div.mod-tile-right").expect("failed to parse CSS selector"); - let name_select = Selector::parse("p.tile-name a").expect("failed to parse CSS selector"); - let category_select = Selector::parse("div.category a").expect("failed to parse CSS selector"); - let author_select = Selector::parse("div.author a").expect("failed to parse CSS selector"); - let desc_select = Selector::parse("p.desc").expect("failed to parse CSS selector"); + let mut page: i32 = 1; + let mut last_page: i32 = 1; - let mods = try_join_all(document.select(&mod_select).map(|element| { - let left = element - .select(&left_select) - .next() - .expect("Missing left div for mod"); - let right = element - .select(&right_select) - .next() - .expect("Missing right div for mod"); - let nexus_mod_id = left - .value() - .attr("data-mod-id") - .expect("Missing mod id attribute") - .parse::() - .ok() - .expect("Failed to parse mod id"); - let name_elem = right - .select(&name_select) - .next() - .expect("Missing name link for mod"); - let name = name_elem.text().next().expect("Missing name text for mod"); - let category_elem = right - .select(&category_select) - .next() - .expect("Missing category link for mod"); - let category = category_elem - .text() - .next() - .expect("Missing category text for mod"); - let author_elem = right - .select(&author_select) - .next() - .expect("Missing author link for mod"); - let author = author_elem - .text() - .next() - .expect("Missing author text for mod"); - let desc_elem = right - .select(&desc_select) - .next() - .expect("Missing desc elem for mod"); - let desc = desc_elem.text().next(); - dbg!(name, nexus_mod_id, author, category, desc, game.id); - insert_mod(&pool, name, nexus_mod_id, author, category, desc, game.id) - })) - .await?; - dbg!(&mods); - - for mod_obj in mods { - dbg!(mod_obj.id); + while page <= last_page { let res = client .get(format!( - "https://api.nexusmods.com/v1/games/{}/mods/{}/files.json", - GAME_NAME, mod_obj.id + "https://www.nexusmods.com/Core/Libs/Common/Widgets/ModList?RH_ModList=nav:true,home:false,type:0,user_id:0,game_id:{},advfilt:true,include_adult:true,page_size:80,show_game_filter:false,open:false,page:{},sort_by:OLD_u_downloads", + GAME_ID, + page )) - .header("accept", "application/json") - .header("apikey", env::var("NEXUS_API_KEY")?) - .header("user-agent", USER_AGENT) .send() .await? .error_for_status()?; - let files = res.json::().await?; - let files = files - .get("files") - .ok_or_else(|| anyhow!("Missing files key in API response"))? - .as_array() - .ok_or_else(|| anyhow!("files value in API response is not an array"))?; - // TODO: download other files than just MAIN files - let files = files.into_iter().filter(|file| { - if let Some(category_name) = file.get("category_name") { - category_name.as_str() == Some("MAIN") - } else { - false - } - }); + let html = res.text().await?; + let document = Html::parse_document(&html); + let mod_select = Selector::parse("li.mod-tile").expect("failed to parse CSS selector"); + let left_select = + Selector::parse("div.mod-tile-left").expect("failed to parse CSS selector"); + let right_select = + Selector::parse("div.mod-tile-right").expect("failed to parse CSS selector"); + let name_select = Selector::parse("p.tile-name a").expect("failed to parse CSS selector"); + let category_select = + Selector::parse("div.category a").expect("failed to parse CSS selector"); + let author_select = Selector::parse("div.author a").expect("failed to parse CSS selector"); + let desc_select = Selector::parse("p.desc").expect("failed to parse CSS selector"); + let last_page_select = + Selector::parse("div.pagination li.extra a").expect("failed to parse CSS selector"); - for file in files { - let file_id = file - .get("file_id") - .ok_or_else(|| anyhow!("Missing file_id key in file in API response"))? - .as_i64() - .ok_or_else(|| anyhow!("file_id value in API response file is not a number"))?; - dbg!(file_id); - let name = file - .get("name") - .ok_or_else(|| anyhow!("Missing name key in file in API response"))? - .as_str() - .ok_or_else(|| anyhow!("name value in API response file is not a string"))?; - let file_name = file - .get("file_name") - .ok_or_else(|| anyhow!("Missing file_name key in file in API response"))? - .as_str() - .ok_or_else(|| anyhow!("file_name value in API response file is not a string"))?; - let category = file - .get("category") - .ok_or_else(|| anyhow!("Missing category key in file in API response"))? - .as_str(); - let version = file - .get("version") - .ok_or_else(|| anyhow!("Missing version key in file in API response"))? - .as_str(); - let mod_version = file - .get("mod_version") - .ok_or_else(|| anyhow!("Missing mod_version key in file in API response"))? - .as_str(); - let uploaded_timestamp = file - .get("uploaded_timestamp") - .ok_or_else(|| anyhow!("Missing uploaded_timestamp key in file in API response"))? - .as_i64() - .ok_or_else(|| { - anyhow!("uploaded_timestamp value in API response file is not a number") - })?; - let uploaded_at = NaiveDateTime::from_timestamp(uploaded_timestamp, 0); - insert_file( - &pool, - name, - file_name, - file_id as i32, - mod_obj.id, - category, - version, - mod_version, - uploaded_at, - ) - .await?; + let last_page_elem = document + .select(&last_page_select) + .next() + .expect("Missing last page link"); + last_page = last_page_elem + .text() + .next() + .expect("Missing last page text") + .trim() + .parse::() + .ok() + .expect("Failed to parse last page"); + + let mods = try_join_all(document.select(&mod_select).map(|element| { + let left = element + .select(&left_select) + .next() + .expect("Missing left div for mod"); + let right = element + .select(&right_select) + .next() + .expect("Missing right div for mod"); + let nexus_mod_id = left + .value() + .attr("data-mod-id") + .expect("Missing mod id attribute") + .parse::() + .ok() + .expect("Failed to parse mod id"); + let name_elem = right + .select(&name_select) + .next() + .expect("Missing name link for mod"); + let name = name_elem.text().next().expect("Missing name text for mod"); + let category_elem = right + .select(&category_select) + .next() + .expect("Missing category link for mod"); + let category = category_elem + .text() + .next() + .expect("Missing category text for mod"); + let author_elem = right + .select(&author_select) + .next() + .expect("Missing author link for mod"); + let author = author_elem + .text() + .next() + .expect("Missing author text for mod"); + let desc_elem = right + .select(&desc_select) + .next() + .expect("Missing desc elem for mod"); + let desc = desc_elem.text().next(); + insert_mod(&pool, name, nexus_mod_id, author, category, desc, game.id) + })) + .await?; + + for mod_obj in mods { + dbg!(&mod_obj); let res = client .get(format!( - "https://api.nexusmods.com/v1/games/{}/mods/{}/files/{}/download_link.json", - GAME_NAME, mod_obj.id, file_id + "https://api.nexusmods.com/v1/games/{}/mods/{}/files.json", + GAME_NAME, mod_obj.nexus_mod_id )) .header("accept", "application/json") .header("apikey", env::var("NEXUS_API_KEY")?) @@ -422,162 +407,245 @@ pub async fn main() -> Result<()> { .send() .await? .error_for_status()?; - let links = res.json::().await?; - let link = links - .get(0) - .ok_or_else(|| anyhow!("Links array in API response is missing first element"))? - .get("URI") - .ok_or_else(|| anyhow!("Missing URI key in link in API response"))? - .as_str() - .ok_or_else(|| anyhow!("URI value in API response link is not a string"))?; - let mut tokio_file = tokio::fs::File::from_std(tempfile()?); - let res = client - .get(link) - .header("apikey", env::var("NEXUS_API_KEY")?) - .header("user-agent", USER_AGENT) - .send() - .await? - .error_for_status()?; + if let Some(duration) = rate_limit_wait_duration(&res)? { + sleep(duration).await; + } - // See: https://github.com/benkay86/async-applied/blob/master/reqwest-tokio-compat/src/main.rs - let mut byte_stream = res - .bytes_stream() - .map_err(|e| futures::io::Error::new(futures::io::ErrorKind::Other, e)) - .into_async_read() - .compat(); + let files = res.json::().await?; + let files = files + .get("files") + .ok_or_else(|| anyhow!("Missing files key in API response"))? + .as_array() + .ok_or_else(|| anyhow!("files value in API response is not an array"))?; + // TODO: download other files than just MAIN files + let files = files.into_iter().filter(|file| { + if let Some(category_name) = file.get("category_name") { + category_name.as_str() == Some("MAIN") + } else { + false + } + }); - tokio::io::copy(&mut byte_stream, &mut tokio_file).await?; + for file in files { + let file_id = file + .get("file_id") + .ok_or_else(|| anyhow!("Missing file_id key in file in API response"))? + .as_i64() + .ok_or_else(|| anyhow!("file_id value in API response file is not a number"))?; + dbg!(file_id); + let name = file + .get("name") + .ok_or_else(|| anyhow!("Missing name key in file in API response"))? + .as_str() + .ok_or_else(|| anyhow!("name value in API response file is not a string"))?; + let file_name = file + .get("file_name") + .ok_or_else(|| anyhow!("Missing file_name key in file in API response"))? + .as_str() + .ok_or_else(|| { + anyhow!("file_name value in API response file is not a string") + })?; + let category = file + .get("category_name") + .ok_or_else(|| anyhow!("Missing category key in file in API response"))? + .as_str(); + let version = file + .get("version") + .ok_or_else(|| anyhow!("Missing version key in file in API response"))? + .as_str(); + let mod_version = file + .get("mod_version") + .ok_or_else(|| anyhow!("Missing mod_version key in file in API response"))? + .as_str(); + let uploaded_timestamp = file + .get("uploaded_timestamp") + .ok_or_else(|| { + anyhow!("Missing uploaded_timestamp key in file in API response") + })? + .as_i64() + .ok_or_else(|| { + anyhow!("uploaded_timestamp value in API response file is not a number") + })?; + let uploaded_at = NaiveDateTime::from_timestamp(uploaded_timestamp, 0); + let db_file = insert_file( + &pool, + name, + file_name, + file_id as i32, + mod_obj.id, + category, + version, + mod_version, + uploaded_at, + ) + .await?; + let res = client + .get(format!( + "https://api.nexusmods.com/v1/games/{}/mods/{}/files/{}/download_link.json", + GAME_NAME, mod_obj.nexus_mod_id, file_id + )) + .header("accept", "application/json") + .header("apikey", env::var("NEXUS_API_KEY")?) + .header("user-agent", USER_AGENT) + .send() + .await? + .error_for_status()?; - // let bytes = res.bytes().await?; - // let reader = std::io::Cursor::new(&bytes); + let links = res.json::().await?; + let link = links + .get(0) + .ok_or_else(|| anyhow!("Links array in API response is missing first element"))? + .get("URI") + .ok_or_else(|| anyhow!("Missing URI key in link in API response"))? + .as_str() + .ok_or_else(|| anyhow!("URI value in API response link is not a string"))?; - let mut plugin_archive = ZipWriter::new( - OpenOptions::new() - .write(true) - .create(true) - .open("plugins.zip")?, - ); - plugin_archive.add_directory( - format!("{}/{}/{}", GAME_NAME, mod_obj.id, file_id), - FileOptions::default(), - )?; - plugin_archive.finish()?; + let mut tokio_file = tokio::fs::File::from_std(tempfile()?); + let res = client + .get(link) + .header("apikey", env::var("NEXUS_API_KEY")?) + .header("user-agent", USER_AGENT) + .send() + .await? + .error_for_status()?; - let mut plugin_archive = ZipWriter::new_append( - OpenOptions::new() - .read(true) - .write(true) - .open("plugins.zip")?, - )?; - let mut initial_bytes = [0; 8]; - tokio_file.seek(SeekFrom::Start(0)).await?; - tokio_file.read_exact(&mut initial_bytes).await?; - dbg!(&initial_bytes); - let kind = infer::get(&initial_bytes).expect("unknown file type of file download"); - match kind.mime_type() { - // "application/zip" => { - // let mut archive = ZipArchive::new(reader)?; - // let mut plugin_file_paths = Vec::new(); - // for file_name in archive.file_names() { - // dbg!(file_name); - // if file_name.ends_with(".esp") - // || file_name.ends_with(".esm") - // || file_name.ends_with(".esl") - // { - // plugin_file_paths.push(file_name.to_string()); - // } - // } - // dbg!(&plugin_file_paths); - // for file_name in plugin_file_paths.iter() { - // let mut file = archive.by_name(file_name)?; - // let plugin = parse_plugin(file)?; - // dbg!(plugin); - // plugin_archive.start_file( - // format!("{}/{}/{}/{}", GAME_NAME, mod_id, file_id, file_name), - // FileOptions::default(), - // )?; - // std::io::copy(&mut file, &mut plugin_archive)?; - // } - // } - _ => { - tokio_file.seek(SeekFrom::Start(0)).await?; - let mut file = tokio_file.into_std().await; - let mut plugin_file_paths = Vec::new(); - for file_name in list_archive_files(&file)? { - dbg!(&file_name); - if file_name.ends_with(".esp") - || file_name.ends_with(".esm") - || file_name.ends_with(".esl") - { - plugin_file_paths.push(file_name); + let duration = rate_limit_wait_duration(&res)?; + + // See: https://github.com/benkay86/async-applied/blob/master/reqwest-tokio-compat/src/main.rs + let mut byte_stream = res + .bytes_stream() + .map_err(|e| futures::io::Error::new(futures::io::ErrorKind::Other, e)) + .into_async_read() + .compat(); + + tokio::io::copy(&mut byte_stream, &mut tokio_file).await?; + + // let bytes = res.bytes().await?; + // let reader = std::io::Cursor::new(&bytes); + + let mut plugin_archive = ZipWriter::new( + OpenOptions::new() + .write(true) + .create(true) + .open("plugins.zip")?, + ); + plugin_archive.add_directory( + format!("{}/{}/{}", GAME_NAME, mod_obj.nexus_mod_id, file_id), + FileOptions::default(), + )?; + plugin_archive.finish()?; + + let mut plugin_archive = ZipWriter::new_append( + OpenOptions::new() + .read(true) + .write(true) + .open("plugins.zip")?, + )?; + let mut initial_bytes = [0; 8]; + tokio_file.seek(SeekFrom::Start(0)).await?; + tokio_file.read_exact(&mut initial_bytes).await?; + let kind = infer::get(&initial_bytes).expect("unknown file type of file download"); + match kind.mime_type() { + // "application/zip" => { + // let mut archive = ZipArchive::new(reader)?; + // let mut plugin_file_paths = Vec::new(); + // for file_name in archive.file_names() { + // dbg!(file_name); + // if file_name.ends_with(".esp") + // || file_name.ends_with(".esm") + // || file_name.ends_with(".esl") + // { + // plugin_file_paths.push(file_name.to_string()); + // } + // } + // dbg!(&plugin_file_paths); + // for file_name in plugin_file_paths.iter() { + // let mut file = archive.by_name(file_name)?; + // let plugin = parse_plugin(file)?; + // dbg!(plugin); + // plugin_archive.start_file( + // format!("{}/{}/{}/{}", GAME_NAME, mod_id, file_id, file_name), + // FileOptions::default(), + // )?; + // std::io::copy(&mut file, &mut plugin_archive)?; + // } + // } + _ => { + tokio_file.seek(SeekFrom::Start(0)).await?; + let mut file = tokio_file.into_std().await; + let mut plugin_file_paths = Vec::new(); + for file_name in list_archive_files(&file)? { + if file_name.ends_with(".esp") + || file_name.ends_with(".esm") + || file_name.ends_with(".esl") + { + plugin_file_paths.push(file_name); + } } - } - file.seek(SeekFrom::Start(0))?; - for file_name in plugin_file_paths.iter() { - dbg!(file_name); - let mut buf = Vec::default(); - uncompress_archive_file(&mut file, &mut buf, file_name)?; - let plugin = parse_plugin(&buf)?; - dbg!(&plugin); - let hash = seahash::hash(&buf); - dbg!(&hash); - let plugin_row = insert_plugin( - &pool, - name, - // TODO: how to make i64 hash? - hash.try_into()?, - file_id as i32, - Some(plugin.header.version as f64), - plugin.header.author, - plugin.header.description, - Some( - &plugin - .header - .masters - .iter() - .map(|s| s.to_string()) - .collect::>(), - ), - ) - .await?; - for cell in plugin.cells { - let cell_row = insert_cell( + for file_name in plugin_file_paths.iter() { + file.seek(SeekFrom::Start(0))?; + dbg!(file_name); + let mut buf = Vec::default(); + uncompress_archive_file(&mut file, &mut buf, file_name)?; + let plugin = parse_plugin(&buf)?; + let hash = seahash::hash(&buf); + let plugin_row = insert_plugin( &pool, - cell.form_id.try_into().unwrap(), - cell.x, - cell.y, - cell.is_persistent, + name, + hash as i64, + db_file.id, + Some(plugin.header.version as f64), + plugin.header.author, + plugin.header.description, + Some( + &plugin + .header + .masters + .iter() + .map(|s| s.to_string()) + .collect::>(), + ), ) .await?; - insert_plugin_cell(&pool, plugin_row.id, cell_row.id, cell.editor_id) + for cell in plugin.cells { + let cell_row = insert_cell( + &pool, + cell.form_id.try_into().unwrap(), + cell.x, + cell.y, + cell.is_persistent, + ) .await?; + insert_plugin_cell( + &pool, + plugin_row.id, + cell_row.id, + cell.editor_id, + ) + .await?; + } + plugin_archive.start_file( + format!( + "{}/{}/{}/{}", + GAME_NAME, mod_obj.nexus_mod_id, file_id, file_name + ), + FileOptions::default(), + )?; + std::io::copy(&mut buf.as_slice(), &mut plugin_archive)?; } - plugin_archive.start_file( - format!("{}/{}/{}/{}", GAME_NAME, mod_obj.id, file_id, file_name), - FileOptions::default(), - )?; - std::io::copy(&mut buf.as_slice(), &mut plugin_archive)?; } - } - }; + }; - plugin_archive.finish()?; - break; // temporarily just grabbing first file + plugin_archive.finish()?; + if let Some(duration) = duration { + sleep(duration).await; + } + } } - break; // temporarily just grabbing first mod + + page += 1; } - // let mod_id = 4119; // hardcoded temporarily - // let res = client - // .get("https://cf-files.nexusmods.com/cdn/1704/351/Kynesgrove-351-2-0-8-1602105523.7z?md5=hUgu4epNAuzlp8yTUMNPgQ&expires=1621585205&user_id=512579&rip=24.218.205.137") - // .header("apikey", env::var("NEXUS_API_KEY")?) - // .header("user-agent", USER_AGENT) - // .send() - // .await?; - // dbg!(&res); - // let bytes = res.bytes().await?; - // let mut bytes = read("C:\\Users\\tyler\\Downloads\\Crime Overhaul Expanded 1.1-19188-1-1.rar")?; - // let mut bytes = read("C:\\Users\\tyler\\Downloads\\YourMarketStall-15814-1-4-2.zip")?; - // let mut reader = std::io::Cursor::new(&bytes); + Ok(()) }