Mostly working download loop done

Still need to fix a panic that happens on some .rar archive files.
This commit is contained in:
Tyler Hallada 2021-06-13 22:30:40 -04:00
parent b132a94c64
commit d6b8f4e74a
3 changed files with 350 additions and 282 deletions

4
Cargo.lock generated
View File

@ -1626,9 +1626,9 @@ checksum = "cbce6d4507c7e4a3962091436e56e95290cb71fa302d0d270e32130b75fbff27"
[[package]]
name = "skyrim-cell-dump"
version = "0.1.2"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "372b96816596c25ba82afdc4819aae92e3750c9f4d965aa99d46f25fe53bbc3f"
checksum = "b8ff27163eeca52326be9a89a4adc15dd7ed3d7c0c44dd981aa2bbacff10aede"
dependencies = [
"anyhow",
"bitflags",

View File

@ -22,7 +22,7 @@ seahash = "4.1"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
sqlx = { version = "0.5", features = ["runtime-tokio-native-tls", "postgres", "migrate", "chrono"] }
skyrim-cell-dump = "0.1.2"
skyrim-cell-dump = "0.1.3"
tempfile = "3.2"
tokio = { version = "1.5.0", features = ["full"] }
tokio-util = { version = "0.6", features = ["compat"] }

View File

@ -1,9 +1,13 @@
use anyhow::{anyhow, Context, Result};
use chrono::DateTime;
use chrono::Duration;
use chrono::NaiveDateTime;
use chrono::Utc;
use compress_tools::{list_archive_files, uncompress_archive_file};
use dotenv::dotenv;
use futures::future::try_join_all;
use futures::stream::TryStreamExt;
use reqwest::Response;
use scraper::{Html, Selector};
use serde::{Deserialize, Serialize};
use serde_json::Value;
@ -16,6 +20,7 @@ use std::io::Seek;
use std::io::SeekFrom;
use tempfile::tempfile;
use tokio::io::{AsyncReadExt, AsyncSeekExt};
use tokio::time::sleep;
use tokio_util::compat::FuturesAsyncReadCompatExt;
use zip::write::{FileOptions, ZipWriter};
@ -223,7 +228,9 @@ async fn insert_cell(
"INSERT INTO cells
(form_id, x, y, is_persistent, created_at, updated_at)
VALUES ($1, $2, $3, $4, now(), now())
ON CONFLICT DO NOTHING
ON CONFLICT (form_id) DO UPDATE
SET (x, y, is_persistent, updated_at) =
(EXCLUDED.x, EXCLUDED.y, EXCLUDED.is_persistent, now())
RETURNING *",
form_id,
x,
@ -258,6 +265,38 @@ async fn insert_plugin_cell(
.context("Failed to insert cell")
}
fn rate_limit_wait_duration(res: &Response) -> Result<Option<std::time::Duration>> {
let daily_remaining = res
.headers()
.get("X-RL-Daily-Remaining")
.expect("No daily limit in response headers");
let hourly_remaining = res
.headers()
.get("X-RL-Hourly-Remaining")
.expect("No hourly limit in response headers");
let hourly_reset = res
.headers()
.get("X-RL-Hourly-Reset")
.expect("No hourly reset in response headers");
dbg!(daily_remaining);
dbg!(hourly_remaining);
if hourly_remaining == "0" {
let hourly_reset = hourly_reset.to_str()?.trim();
let hourly_reset: DateTime<Utc> =
(DateTime::parse_from_str(hourly_reset, "%Y-%m-%d %H:%M:%S %z")?
+ Duration::seconds(5))
.into();
dbg!(hourly_reset);
let duration = (hourly_reset - Utc::now()).to_std()?;
dbg!(duration);
return Ok(Some(duration));
}
Ok(None)
}
#[tokio::main]
pub async fn main() -> Result<()> {
dotenv().ok();
@ -268,153 +307,99 @@ pub async fn main() -> Result<()> {
let game = insert_game(&pool, GAME_NAME, GAME_ID as i32).await?;
let client = reqwest::Client::new();
let res = client
.get(format!(
"https://www.nexusmods.com/Core/Libs/Common/Widgets/ModList?RH_ModList=nav:true,home:false,type:0,user_id:0,game_id:{},advfilt:true,include_adult:true,page_size:80,show_game_filter:false,open:false,page:1,sort_by:OLD_u_downloads",
GAME_ID
))
.send()
.await?
.error_for_status()?;
let html = res.text().await?;
let document = Html::parse_document(&html);
let mod_select = Selector::parse("div.mod-tile").expect("failed to parse CSS selector");
let left_select = Selector::parse("div.mod-tile-left").expect("failed to parse CSS selector");
let right_select = Selector::parse("div.mod-tile-right").expect("failed to parse CSS selector");
let name_select = Selector::parse("p.tile-name a").expect("failed to parse CSS selector");
let category_select = Selector::parse("div.category a").expect("failed to parse CSS selector");
let author_select = Selector::parse("div.author a").expect("failed to parse CSS selector");
let desc_select = Selector::parse("p.desc").expect("failed to parse CSS selector");
let mut page: i32 = 1;
let mut last_page: i32 = 1;
let mods = try_join_all(document.select(&mod_select).map(|element| {
let left = element
.select(&left_select)
.next()
.expect("Missing left div for mod");
let right = element
.select(&right_select)
.next()
.expect("Missing right div for mod");
let nexus_mod_id = left
.value()
.attr("data-mod-id")
.expect("Missing mod id attribute")
.parse::<i32>()
.ok()
.expect("Failed to parse mod id");
let name_elem = right
.select(&name_select)
.next()
.expect("Missing name link for mod");
let name = name_elem.text().next().expect("Missing name text for mod");
let category_elem = right
.select(&category_select)
.next()
.expect("Missing category link for mod");
let category = category_elem
.text()
.next()
.expect("Missing category text for mod");
let author_elem = right
.select(&author_select)
.next()
.expect("Missing author link for mod");
let author = author_elem
.text()
.next()
.expect("Missing author text for mod");
let desc_elem = right
.select(&desc_select)
.next()
.expect("Missing desc elem for mod");
let desc = desc_elem.text().next();
dbg!(name, nexus_mod_id, author, category, desc, game.id);
insert_mod(&pool, name, nexus_mod_id, author, category, desc, game.id)
}))
.await?;
dbg!(&mods);
for mod_obj in mods {
dbg!(mod_obj.id);
while page <= last_page {
let res = client
.get(format!(
"https://api.nexusmods.com/v1/games/{}/mods/{}/files.json",
GAME_NAME, mod_obj.id
"https://www.nexusmods.com/Core/Libs/Common/Widgets/ModList?RH_ModList=nav:true,home:false,type:0,user_id:0,game_id:{},advfilt:true,include_adult:true,page_size:80,show_game_filter:false,open:false,page:{},sort_by:OLD_u_downloads",
GAME_ID,
page
))
.header("accept", "application/json")
.header("apikey", env::var("NEXUS_API_KEY")?)
.header("user-agent", USER_AGENT)
.send()
.await?
.error_for_status()?;
let files = res.json::<Value>().await?;
let files = files
.get("files")
.ok_or_else(|| anyhow!("Missing files key in API response"))?
.as_array()
.ok_or_else(|| anyhow!("files value in API response is not an array"))?;
// TODO: download other files than just MAIN files
let files = files.into_iter().filter(|file| {
if let Some(category_name) = file.get("category_name") {
category_name.as_str() == Some("MAIN")
} else {
false
}
});
let html = res.text().await?;
let document = Html::parse_document(&html);
let mod_select = Selector::parse("li.mod-tile").expect("failed to parse CSS selector");
let left_select =
Selector::parse("div.mod-tile-left").expect("failed to parse CSS selector");
let right_select =
Selector::parse("div.mod-tile-right").expect("failed to parse CSS selector");
let name_select = Selector::parse("p.tile-name a").expect("failed to parse CSS selector");
let category_select =
Selector::parse("div.category a").expect("failed to parse CSS selector");
let author_select = Selector::parse("div.author a").expect("failed to parse CSS selector");
let desc_select = Selector::parse("p.desc").expect("failed to parse CSS selector");
let last_page_select =
Selector::parse("div.pagination li.extra a").expect("failed to parse CSS selector");
for file in files {
let file_id = file
.get("file_id")
.ok_or_else(|| anyhow!("Missing file_id key in file in API response"))?
.as_i64()
.ok_or_else(|| anyhow!("file_id value in API response file is not a number"))?;
dbg!(file_id);
let name = file
.get("name")
.ok_or_else(|| anyhow!("Missing name key in file in API response"))?
.as_str()
.ok_or_else(|| anyhow!("name value in API response file is not a string"))?;
let file_name = file
.get("file_name")
.ok_or_else(|| anyhow!("Missing file_name key in file in API response"))?
.as_str()
.ok_or_else(|| anyhow!("file_name value in API response file is not a string"))?;
let category = file
.get("category")
.ok_or_else(|| anyhow!("Missing category key in file in API response"))?
.as_str();
let version = file
.get("version")
.ok_or_else(|| anyhow!("Missing version key in file in API response"))?
.as_str();
let mod_version = file
.get("mod_version")
.ok_or_else(|| anyhow!("Missing mod_version key in file in API response"))?
.as_str();
let uploaded_timestamp = file
.get("uploaded_timestamp")
.ok_or_else(|| anyhow!("Missing uploaded_timestamp key in file in API response"))?
.as_i64()
.ok_or_else(|| {
anyhow!("uploaded_timestamp value in API response file is not a number")
})?;
let uploaded_at = NaiveDateTime::from_timestamp(uploaded_timestamp, 0);
insert_file(
&pool,
name,
file_name,
file_id as i32,
mod_obj.id,
category,
version,
mod_version,
uploaded_at,
)
.await?;
let last_page_elem = document
.select(&last_page_select)
.next()
.expect("Missing last page link");
last_page = last_page_elem
.text()
.next()
.expect("Missing last page text")
.trim()
.parse::<i32>()
.ok()
.expect("Failed to parse last page");
let mods = try_join_all(document.select(&mod_select).map(|element| {
let left = element
.select(&left_select)
.next()
.expect("Missing left div for mod");
let right = element
.select(&right_select)
.next()
.expect("Missing right div for mod");
let nexus_mod_id = left
.value()
.attr("data-mod-id")
.expect("Missing mod id attribute")
.parse::<i32>()
.ok()
.expect("Failed to parse mod id");
let name_elem = right
.select(&name_select)
.next()
.expect("Missing name link for mod");
let name = name_elem.text().next().expect("Missing name text for mod");
let category_elem = right
.select(&category_select)
.next()
.expect("Missing category link for mod");
let category = category_elem
.text()
.next()
.expect("Missing category text for mod");
let author_elem = right
.select(&author_select)
.next()
.expect("Missing author link for mod");
let author = author_elem
.text()
.next()
.expect("Missing author text for mod");
let desc_elem = right
.select(&desc_select)
.next()
.expect("Missing desc elem for mod");
let desc = desc_elem.text().next();
insert_mod(&pool, name, nexus_mod_id, author, category, desc, game.id)
}))
.await?;
for mod_obj in mods {
dbg!(&mod_obj);
let res = client
.get(format!(
"https://api.nexusmods.com/v1/games/{}/mods/{}/files/{}/download_link.json",
GAME_NAME, mod_obj.id, file_id
"https://api.nexusmods.com/v1/games/{}/mods/{}/files.json",
GAME_NAME, mod_obj.nexus_mod_id
))
.header("accept", "application/json")
.header("apikey", env::var("NEXUS_API_KEY")?)
@ -422,162 +407,245 @@ pub async fn main() -> Result<()> {
.send()
.await?
.error_for_status()?;
let links = res.json::<Value>().await?;
let link = links
.get(0)
.ok_or_else(|| anyhow!("Links array in API response is missing first element"))?
.get("URI")
.ok_or_else(|| anyhow!("Missing URI key in link in API response"))?
.as_str()
.ok_or_else(|| anyhow!("URI value in API response link is not a string"))?;
let mut tokio_file = tokio::fs::File::from_std(tempfile()?);
let res = client
.get(link)
.header("apikey", env::var("NEXUS_API_KEY")?)
.header("user-agent", USER_AGENT)
.send()
.await?
.error_for_status()?;
if let Some(duration) = rate_limit_wait_duration(&res)? {
sleep(duration).await;
}
// See: https://github.com/benkay86/async-applied/blob/master/reqwest-tokio-compat/src/main.rs
let mut byte_stream = res
.bytes_stream()
.map_err(|e| futures::io::Error::new(futures::io::ErrorKind::Other, e))
.into_async_read()
.compat();
let files = res.json::<Value>().await?;
let files = files
.get("files")
.ok_or_else(|| anyhow!("Missing files key in API response"))?
.as_array()
.ok_or_else(|| anyhow!("files value in API response is not an array"))?;
// TODO: download other files than just MAIN files
let files = files.into_iter().filter(|file| {
if let Some(category_name) = file.get("category_name") {
category_name.as_str() == Some("MAIN")
} else {
false
}
});
tokio::io::copy(&mut byte_stream, &mut tokio_file).await?;
for file in files {
let file_id = file
.get("file_id")
.ok_or_else(|| anyhow!("Missing file_id key in file in API response"))?
.as_i64()
.ok_or_else(|| anyhow!("file_id value in API response file is not a number"))?;
dbg!(file_id);
let name = file
.get("name")
.ok_or_else(|| anyhow!("Missing name key in file in API response"))?
.as_str()
.ok_or_else(|| anyhow!("name value in API response file is not a string"))?;
let file_name = file
.get("file_name")
.ok_or_else(|| anyhow!("Missing file_name key in file in API response"))?
.as_str()
.ok_or_else(|| {
anyhow!("file_name value in API response file is not a string")
})?;
let category = file
.get("category_name")
.ok_or_else(|| anyhow!("Missing category key in file in API response"))?
.as_str();
let version = file
.get("version")
.ok_or_else(|| anyhow!("Missing version key in file in API response"))?
.as_str();
let mod_version = file
.get("mod_version")
.ok_or_else(|| anyhow!("Missing mod_version key in file in API response"))?
.as_str();
let uploaded_timestamp = file
.get("uploaded_timestamp")
.ok_or_else(|| {
anyhow!("Missing uploaded_timestamp key in file in API response")
})?
.as_i64()
.ok_or_else(|| {
anyhow!("uploaded_timestamp value in API response file is not a number")
})?;
let uploaded_at = NaiveDateTime::from_timestamp(uploaded_timestamp, 0);
let db_file = insert_file(
&pool,
name,
file_name,
file_id as i32,
mod_obj.id,
category,
version,
mod_version,
uploaded_at,
)
.await?;
let res = client
.get(format!(
"https://api.nexusmods.com/v1/games/{}/mods/{}/files/{}/download_link.json",
GAME_NAME, mod_obj.nexus_mod_id, file_id
))
.header("accept", "application/json")
.header("apikey", env::var("NEXUS_API_KEY")?)
.header("user-agent", USER_AGENT)
.send()
.await?
.error_for_status()?;
// let bytes = res.bytes().await?;
// let reader = std::io::Cursor::new(&bytes);
let links = res.json::<Value>().await?;
let link = links
.get(0)
.ok_or_else(|| anyhow!("Links array in API response is missing first element"))?
.get("URI")
.ok_or_else(|| anyhow!("Missing URI key in link in API response"))?
.as_str()
.ok_or_else(|| anyhow!("URI value in API response link is not a string"))?;
let mut plugin_archive = ZipWriter::new(
OpenOptions::new()
.write(true)
.create(true)
.open("plugins.zip")?,
);
plugin_archive.add_directory(
format!("{}/{}/{}", GAME_NAME, mod_obj.id, file_id),
FileOptions::default(),
)?;
plugin_archive.finish()?;
let mut tokio_file = tokio::fs::File::from_std(tempfile()?);
let res = client
.get(link)
.header("apikey", env::var("NEXUS_API_KEY")?)
.header("user-agent", USER_AGENT)
.send()
.await?
.error_for_status()?;
let mut plugin_archive = ZipWriter::new_append(
OpenOptions::new()
.read(true)
.write(true)
.open("plugins.zip")?,
)?;
let mut initial_bytes = [0; 8];
tokio_file.seek(SeekFrom::Start(0)).await?;
tokio_file.read_exact(&mut initial_bytes).await?;
dbg!(&initial_bytes);
let kind = infer::get(&initial_bytes).expect("unknown file type of file download");
match kind.mime_type() {
// "application/zip" => {
// let mut archive = ZipArchive::new(reader)?;
// let mut plugin_file_paths = Vec::new();
// for file_name in archive.file_names() {
// dbg!(file_name);
// if file_name.ends_with(".esp")
// || file_name.ends_with(".esm")
// || file_name.ends_with(".esl")
// {
// plugin_file_paths.push(file_name.to_string());
// }
// }
// dbg!(&plugin_file_paths);
// for file_name in plugin_file_paths.iter() {
// let mut file = archive.by_name(file_name)?;
// let plugin = parse_plugin(file)?;
// dbg!(plugin);
// plugin_archive.start_file(
// format!("{}/{}/{}/{}", GAME_NAME, mod_id, file_id, file_name),
// FileOptions::default(),
// )?;
// std::io::copy(&mut file, &mut plugin_archive)?;
// }
// }
_ => {
tokio_file.seek(SeekFrom::Start(0)).await?;
let mut file = tokio_file.into_std().await;
let mut plugin_file_paths = Vec::new();
for file_name in list_archive_files(&file)? {
dbg!(&file_name);
if file_name.ends_with(".esp")
|| file_name.ends_with(".esm")
|| file_name.ends_with(".esl")
{
plugin_file_paths.push(file_name);
let duration = rate_limit_wait_duration(&res)?;
// See: https://github.com/benkay86/async-applied/blob/master/reqwest-tokio-compat/src/main.rs
let mut byte_stream = res
.bytes_stream()
.map_err(|e| futures::io::Error::new(futures::io::ErrorKind::Other, e))
.into_async_read()
.compat();
tokio::io::copy(&mut byte_stream, &mut tokio_file).await?;
// let bytes = res.bytes().await?;
// let reader = std::io::Cursor::new(&bytes);
let mut plugin_archive = ZipWriter::new(
OpenOptions::new()
.write(true)
.create(true)
.open("plugins.zip")?,
);
plugin_archive.add_directory(
format!("{}/{}/{}", GAME_NAME, mod_obj.nexus_mod_id, file_id),
FileOptions::default(),
)?;
plugin_archive.finish()?;
let mut plugin_archive = ZipWriter::new_append(
OpenOptions::new()
.read(true)
.write(true)
.open("plugins.zip")?,
)?;
let mut initial_bytes = [0; 8];
tokio_file.seek(SeekFrom::Start(0)).await?;
tokio_file.read_exact(&mut initial_bytes).await?;
let kind = infer::get(&initial_bytes).expect("unknown file type of file download");
match kind.mime_type() {
// "application/zip" => {
// let mut archive = ZipArchive::new(reader)?;
// let mut plugin_file_paths = Vec::new();
// for file_name in archive.file_names() {
// dbg!(file_name);
// if file_name.ends_with(".esp")
// || file_name.ends_with(".esm")
// || file_name.ends_with(".esl")
// {
// plugin_file_paths.push(file_name.to_string());
// }
// }
// dbg!(&plugin_file_paths);
// for file_name in plugin_file_paths.iter() {
// let mut file = archive.by_name(file_name)?;
// let plugin = parse_plugin(file)?;
// dbg!(plugin);
// plugin_archive.start_file(
// format!("{}/{}/{}/{}", GAME_NAME, mod_id, file_id, file_name),
// FileOptions::default(),
// )?;
// std::io::copy(&mut file, &mut plugin_archive)?;
// }
// }
_ => {
tokio_file.seek(SeekFrom::Start(0)).await?;
let mut file = tokio_file.into_std().await;
let mut plugin_file_paths = Vec::new();
for file_name in list_archive_files(&file)? {
if file_name.ends_with(".esp")
|| file_name.ends_with(".esm")
|| file_name.ends_with(".esl")
{
plugin_file_paths.push(file_name);
}
}
}
file.seek(SeekFrom::Start(0))?;
for file_name in plugin_file_paths.iter() {
dbg!(file_name);
let mut buf = Vec::default();
uncompress_archive_file(&mut file, &mut buf, file_name)?;
let plugin = parse_plugin(&buf)?;
dbg!(&plugin);
let hash = seahash::hash(&buf);
dbg!(&hash);
let plugin_row = insert_plugin(
&pool,
name,
// TODO: how to make i64 hash?
hash.try_into()?,
file_id as i32,
Some(plugin.header.version as f64),
plugin.header.author,
plugin.header.description,
Some(
&plugin
.header
.masters
.iter()
.map(|s| s.to_string())
.collect::<Vec<String>>(),
),
)
.await?;
for cell in plugin.cells {
let cell_row = insert_cell(
for file_name in plugin_file_paths.iter() {
file.seek(SeekFrom::Start(0))?;
dbg!(file_name);
let mut buf = Vec::default();
uncompress_archive_file(&mut file, &mut buf, file_name)?;
let plugin = parse_plugin(&buf)?;
let hash = seahash::hash(&buf);
let plugin_row = insert_plugin(
&pool,
cell.form_id.try_into().unwrap(),
cell.x,
cell.y,
cell.is_persistent,
name,
hash as i64,
db_file.id,
Some(plugin.header.version as f64),
plugin.header.author,
plugin.header.description,
Some(
&plugin
.header
.masters
.iter()
.map(|s| s.to_string())
.collect::<Vec<String>>(),
),
)
.await?;
insert_plugin_cell(&pool, plugin_row.id, cell_row.id, cell.editor_id)
for cell in plugin.cells {
let cell_row = insert_cell(
&pool,
cell.form_id.try_into().unwrap(),
cell.x,
cell.y,
cell.is_persistent,
)
.await?;
insert_plugin_cell(
&pool,
plugin_row.id,
cell_row.id,
cell.editor_id,
)
.await?;
}
plugin_archive.start_file(
format!(
"{}/{}/{}/{}",
GAME_NAME, mod_obj.nexus_mod_id, file_id, file_name
),
FileOptions::default(),
)?;
std::io::copy(&mut buf.as_slice(), &mut plugin_archive)?;
}
plugin_archive.start_file(
format!("{}/{}/{}/{}", GAME_NAME, mod_obj.id, file_id, file_name),
FileOptions::default(),
)?;
std::io::copy(&mut buf.as_slice(), &mut plugin_archive)?;
}
}
};
};
plugin_archive.finish()?;
break; // temporarily just grabbing first file
plugin_archive.finish()?;
if let Some(duration) = duration {
sleep(duration).await;
}
}
}
break; // temporarily just grabbing first mod
page += 1;
}
// let mod_id = 4119; // hardcoded temporarily
// let res = client
// .get("https://cf-files.nexusmods.com/cdn/1704/351/Kynesgrove-351-2-0-8-1602105523.7z?md5=hUgu4epNAuzlp8yTUMNPgQ&expires=1621585205&user_id=512579&rip=24.218.205.137")
// .header("apikey", env::var("NEXUS_API_KEY")?)
// .header("user-agent", USER_AGENT)
// .send()
// .await?;
// dbg!(&res);
// let bytes = res.bytes().await?;
// let mut bytes = read("C:\\Users\\tyler\\Downloads\\Crime Overhaul Expanded 1.1-19188-1-1.rar")?;
// let mut bytes = read("C:\\Users\\tyler\\Downloads\\YourMarketStall-15814-1-4-2.zip")?;
// let mut reader = std::io::Cursor::new(&bytes);
Ok(())
}