Modularize scraping and api requests
This commit is contained in:
parent
19350081c3
commit
22757bc475
368
src/main.rs
368
src/main.rs
@ -1,15 +1,6 @@
|
|||||||
use anyhow::{anyhow, Context, Result};
|
use anyhow::Result;
|
||||||
use chrono::DateTime;
|
|
||||||
use chrono::Duration;
|
|
||||||
use chrono::NaiveDateTime;
|
|
||||||
use chrono::Utc;
|
|
||||||
use compress_tools::{list_archive_files, uncompress_archive_file};
|
use compress_tools::{list_archive_files, uncompress_archive_file};
|
||||||
use dotenv::dotenv;
|
use dotenv::dotenv;
|
||||||
use futures::stream::TryStreamExt;
|
|
||||||
use reqwest::Response;
|
|
||||||
use scraper::{Html, Selector};
|
|
||||||
use serde::{Deserialize, Serialize};
|
|
||||||
use serde_json::Value;
|
|
||||||
use skyrim_cell_dump::parse_plugin;
|
use skyrim_cell_dump::parse_plugin;
|
||||||
use sqlx::postgres::PgPoolOptions;
|
use sqlx::postgres::PgPoolOptions;
|
||||||
use std::convert::TryInto;
|
use std::convert::TryInto;
|
||||||
@ -17,14 +8,16 @@ use std::env;
|
|||||||
use std::fs::OpenOptions;
|
use std::fs::OpenOptions;
|
||||||
use std::io::Seek;
|
use std::io::Seek;
|
||||||
use std::io::SeekFrom;
|
use std::io::SeekFrom;
|
||||||
use tempfile::{tempdir, tempfile};
|
use std::time::Duration;
|
||||||
|
use tempfile::tempdir;
|
||||||
use tokio::io::{AsyncReadExt, AsyncSeekExt};
|
use tokio::io::{AsyncReadExt, AsyncSeekExt};
|
||||||
use tokio::time::sleep;
|
use tokio::time::sleep;
|
||||||
use tokio_util::compat::FuturesAsyncReadCompatExt;
|
|
||||||
use unrar::Archive;
|
use unrar::Archive;
|
||||||
use zip::write::{FileOptions, ZipWriter};
|
use zip::write::{FileOptions, ZipWriter};
|
||||||
|
|
||||||
mod models;
|
mod models;
|
||||||
|
mod nexus_api;
|
||||||
|
mod nexus_scraper;
|
||||||
|
|
||||||
use models::cell::insert_cell;
|
use models::cell::insert_cell;
|
||||||
use models::file::{insert_file, File};
|
use models::file::{insert_file, File};
|
||||||
@ -32,51 +25,14 @@ use models::game::insert_game;
|
|||||||
use models::game_mod::{get_mod_by_nexus_mod_id, insert_mod, Mod};
|
use models::game_mod::{get_mod_by_nexus_mod_id, insert_mod, Mod};
|
||||||
use models::plugin::insert_plugin;
|
use models::plugin::insert_plugin;
|
||||||
use models::plugin_cell::insert_plugin_cell;
|
use models::plugin_cell::insert_plugin_cell;
|
||||||
|
use nexus_api::{GAME_ID, GAME_NAME};
|
||||||
static USER_AGENT: &str = "mod-mapper/0.1";
|
|
||||||
static GAME_NAME: &str = "skyrimspecialedition";
|
|
||||||
const GAME_ID: u32 = 1704;
|
|
||||||
|
|
||||||
fn rate_limit_wait_duration(res: &Response) -> Result<Option<std::time::Duration>> {
|
|
||||||
let daily_remaining = res
|
|
||||||
.headers()
|
|
||||||
.get("x-rl-daily-remaining")
|
|
||||||
.expect("No daily remaining in response headers");
|
|
||||||
let hourly_remaining = res
|
|
||||||
.headers()
|
|
||||||
.get("x-rl-hourly-remaining")
|
|
||||||
.expect("No hourly limit in response headers");
|
|
||||||
let hourly_reset = res
|
|
||||||
.headers()
|
|
||||||
.get("x-rl-hourly-reset")
|
|
||||||
.expect("No hourly reset in response headers");
|
|
||||||
dbg!(daily_remaining);
|
|
||||||
dbg!(hourly_remaining);
|
|
||||||
|
|
||||||
if hourly_remaining == "0" {
|
|
||||||
let hourly_reset = hourly_reset.to_str()?.trim();
|
|
||||||
let hourly_reset: DateTime<Utc> =
|
|
||||||
(DateTime::parse_from_str(hourly_reset, "%Y-%m-%d %H:%M:%S %z")?
|
|
||||||
+ Duration::seconds(5))
|
|
||||||
.into();
|
|
||||||
dbg!(hourly_reset);
|
|
||||||
let duration = (hourly_reset - Utc::now()).to_std()?;
|
|
||||||
dbg!(duration);
|
|
||||||
|
|
||||||
return Ok(Some(duration));
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(None)
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn process_plugin<W>(
|
async fn process_plugin<W>(
|
||||||
plugin_buf: &mut [u8],
|
plugin_buf: &mut [u8],
|
||||||
pool: &sqlx::Pool<sqlx::Postgres>,
|
pool: &sqlx::Pool<sqlx::Postgres>,
|
||||||
plugin_archive: &mut ZipWriter<W>,
|
plugin_archive: &mut ZipWriter<W>,
|
||||||
name: &str,
|
|
||||||
db_file: &File,
|
db_file: &File,
|
||||||
mod_obj: &Mod,
|
mod_obj: &Mod,
|
||||||
file_id: i64,
|
|
||||||
file_name: &str,
|
file_name: &str,
|
||||||
) -> Result<()>
|
) -> Result<()>
|
||||||
where
|
where
|
||||||
@ -86,7 +42,7 @@ where
|
|||||||
let hash = seahash::hash(&plugin_buf);
|
let hash = seahash::hash(&plugin_buf);
|
||||||
let plugin_row = insert_plugin(
|
let plugin_row = insert_plugin(
|
||||||
&pool,
|
&pool,
|
||||||
name,
|
&db_file.name,
|
||||||
hash as i64,
|
hash as i64,
|
||||||
db_file.id,
|
db_file.id,
|
||||||
Some(plugin.header.version as f64),
|
Some(plugin.header.version as f64),
|
||||||
@ -116,7 +72,7 @@ where
|
|||||||
plugin_archive.start_file(
|
plugin_archive.start_file(
|
||||||
format!(
|
format!(
|
||||||
"{}/{}/{}/{}",
|
"{}/{}/{}/{}",
|
||||||
GAME_NAME, mod_obj.nexus_mod_id, file_id, file_name
|
GAME_NAME, mod_obj.nexus_mod_id, db_file.nexus_file_id, file_name
|
||||||
),
|
),
|
||||||
FileOptions::default(),
|
FileOptions::default(),
|
||||||
)?;
|
)?;
|
||||||
@ -126,6 +82,21 @@ where
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn initialize_plugins_archive(mod_id: i32, file_id: i32) -> Result<()> {
|
||||||
|
let mut plugins_archive = ZipWriter::new(
|
||||||
|
OpenOptions::new()
|
||||||
|
.write(true)
|
||||||
|
.create(true)
|
||||||
|
.open("plugins.zip")?,
|
||||||
|
);
|
||||||
|
plugins_archive.add_directory(
|
||||||
|
format!("{}/{}/{}", GAME_NAME, mod_id, file_id),
|
||||||
|
FileOptions::default(),
|
||||||
|
)?;
|
||||||
|
plugins_archive.finish()?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
pub async fn main() -> Result<()> {
|
pub async fn main() -> Result<()> {
|
||||||
dotenv().ok();
|
dotenv().ok();
|
||||||
@ -140,263 +111,77 @@ pub async fn main() -> Result<()> {
|
|||||||
let mut has_next_page = true;
|
let mut has_next_page = true;
|
||||||
|
|
||||||
while has_next_page {
|
while has_next_page {
|
||||||
let res = client
|
let mod_list_resp = nexus_scraper::get_mod_list_page(&client, page).await?;
|
||||||
.get(format!(
|
let scraped = mod_list_resp.scrape_mods()?;
|
||||||
"https://www.nexusmods.com/Core/Libs/Common/Widgets/ModList?RH_ModList=nav:true,home:false,type:0,user_id:0,game_id:{},advfilt:true,include_adult:true,page_size:80,show_game_filter:false,open:false,page:{},sort_by:OLD_u_downloads",
|
|
||||||
GAME_ID,
|
|
||||||
page
|
|
||||||
))
|
|
||||||
.send()
|
|
||||||
.await?
|
|
||||||
.error_for_status()?;
|
|
||||||
let html = res.text().await?;
|
|
||||||
let document = Html::parse_document(&html);
|
|
||||||
let mod_select = Selector::parse("li.mod-tile").expect("failed to parse CSS selector");
|
|
||||||
let left_select =
|
|
||||||
Selector::parse("div.mod-tile-left").expect("failed to parse CSS selector");
|
|
||||||
let right_select =
|
|
||||||
Selector::parse("div.mod-tile-right").expect("failed to parse CSS selector");
|
|
||||||
let name_select = Selector::parse("p.tile-name a").expect("failed to parse CSS selector");
|
|
||||||
let category_select =
|
|
||||||
Selector::parse("div.category a").expect("failed to parse CSS selector");
|
|
||||||
let author_select = Selector::parse("div.author a").expect("failed to parse CSS selector");
|
|
||||||
let desc_select = Selector::parse("p.desc").expect("failed to parse CSS selector");
|
|
||||||
let next_page_select =
|
|
||||||
Selector::parse("div.pagination li.next").expect("failed to parse CSS selector");
|
|
||||||
|
|
||||||
let next_page_elem = document.select(&next_page_select).next();
|
has_next_page = scraped.has_next_page;
|
||||||
|
let mut mods = Vec::new();
|
||||||
has_next_page = next_page_elem.is_some();
|
for scraped_mod in scraped.mods {
|
||||||
|
if let None = get_mod_by_nexus_mod_id(&pool, scraped_mod.nexus_mod_id).await? {
|
||||||
let mut mods = vec![];
|
|
||||||
for element in document.select(&mod_select) {
|
|
||||||
let left = element
|
|
||||||
.select(&left_select)
|
|
||||||
.next()
|
|
||||||
.expect("Missing left div for mod");
|
|
||||||
let right = element
|
|
||||||
.select(&right_select)
|
|
||||||
.next()
|
|
||||||
.expect("Missing right div for mod");
|
|
||||||
let nexus_mod_id = left
|
|
||||||
.value()
|
|
||||||
.attr("data-mod-id")
|
|
||||||
.expect("Missing mod id attribute")
|
|
||||||
.parse::<i32>()
|
|
||||||
.ok()
|
|
||||||
.expect("Failed to parse mod id");
|
|
||||||
let name_elem = right
|
|
||||||
.select(&name_select)
|
|
||||||
.next()
|
|
||||||
.expect("Missing name link for mod");
|
|
||||||
let name = name_elem.text().next().expect("Missing name text for mod");
|
|
||||||
let category_elem = right
|
|
||||||
.select(&category_select)
|
|
||||||
.next()
|
|
||||||
.expect("Missing category link for mod");
|
|
||||||
let category = category_elem
|
|
||||||
.text()
|
|
||||||
.next()
|
|
||||||
.expect("Missing category text for mod");
|
|
||||||
let author_elem = right
|
|
||||||
.select(&author_select)
|
|
||||||
.next()
|
|
||||||
.expect("Missing author link for mod");
|
|
||||||
let author = author_elem
|
|
||||||
.text()
|
|
||||||
.next()
|
|
||||||
.expect("Missing author text for mod");
|
|
||||||
let desc_elem = right
|
|
||||||
.select(&desc_select)
|
|
||||||
.next()
|
|
||||||
.expect("Missing desc elem for mod");
|
|
||||||
let desc = desc_elem.text().next();
|
|
||||||
|
|
||||||
if let None = get_mod_by_nexus_mod_id(&pool, nexus_mod_id).await? {
|
|
||||||
mods.push(
|
mods.push(
|
||||||
insert_mod(&pool, name, nexus_mod_id, author, category, desc, game.id).await?,
|
insert_mod(
|
||||||
|
&pool,
|
||||||
|
scraped_mod.name,
|
||||||
|
scraped_mod.nexus_mod_id,
|
||||||
|
scraped_mod.author,
|
||||||
|
scraped_mod.category,
|
||||||
|
scraped_mod.desc,
|
||||||
|
game.id,
|
||||||
|
)
|
||||||
|
.await?,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
dbg!(mods.len());
|
dbg!(mods.len());
|
||||||
|
|
||||||
for mod_obj in mods {
|
for db_mod in mods {
|
||||||
dbg!(&mod_obj.name);
|
dbg!(&db_mod.name);
|
||||||
let res = client
|
let files_resp = nexus_api::files::get(&client, db_mod.nexus_mod_id).await?;
|
||||||
.get(format!(
|
// TODO: download other files than just MAIN files
|
||||||
"https://api.nexusmods.com/v1/games/{}/mods/{}/files.json",
|
// let files = files.into_iter().filter(|file| {
|
||||||
GAME_NAME, mod_obj.nexus_mod_id
|
// if let Some(category_name) = file.get("category_name") {
|
||||||
))
|
// category_name.as_str() == Some("MAIN")
|
||||||
.header("accept", "application/json")
|
// } else {
|
||||||
.header("apikey", env::var("NEXUS_API_KEY")?)
|
// false
|
||||||
.header("user-agent", USER_AGENT)
|
// }
|
||||||
.send()
|
// });
|
||||||
.await?
|
if let Some(duration) = files_resp.wait {
|
||||||
.error_for_status()?;
|
|
||||||
|
|
||||||
if let Some(duration) = rate_limit_wait_duration(&res)? {
|
|
||||||
sleep(duration).await;
|
sleep(duration).await;
|
||||||
}
|
}
|
||||||
|
|
||||||
let files = res.json::<Value>().await?;
|
for api_file in files_resp.files()? {
|
||||||
let files = files
|
|
||||||
.get("files")
|
|
||||||
.ok_or_else(|| anyhow!("Missing files key in API response"))?
|
|
||||||
.as_array()
|
|
||||||
.ok_or_else(|| anyhow!("files value in API response is not an array"))?;
|
|
||||||
// TODO: download other files than just MAIN files
|
|
||||||
let files = files.into_iter().filter(|file| {
|
|
||||||
if let Some(category_name) = file.get("category_name") {
|
|
||||||
category_name.as_str() == Some("MAIN")
|
|
||||||
} else {
|
|
||||||
false
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
for file in files {
|
|
||||||
let file_id = file
|
|
||||||
.get("file_id")
|
|
||||||
.ok_or_else(|| anyhow!("Missing file_id key in file in API response"))?
|
|
||||||
.as_i64()
|
|
||||||
.ok_or_else(|| anyhow!("file_id value in API response file is not a number"))?;
|
|
||||||
dbg!(file_id);
|
|
||||||
let name = file
|
|
||||||
.get("name")
|
|
||||||
.ok_or_else(|| anyhow!("Missing name key in file in API response"))?
|
|
||||||
.as_str()
|
|
||||||
.ok_or_else(|| anyhow!("name value in API response file is not a string"))?;
|
|
||||||
let file_name = file
|
|
||||||
.get("file_name")
|
|
||||||
.ok_or_else(|| anyhow!("Missing file_name key in file in API response"))?
|
|
||||||
.as_str()
|
|
||||||
.ok_or_else(|| {
|
|
||||||
anyhow!("file_name value in API response file is not a string")
|
|
||||||
})?;
|
|
||||||
let category = file
|
|
||||||
.get("category_name")
|
|
||||||
.ok_or_else(|| anyhow!("Missing category key in file in API response"))?
|
|
||||||
.as_str();
|
|
||||||
let version = file
|
|
||||||
.get("version")
|
|
||||||
.ok_or_else(|| anyhow!("Missing version key in file in API response"))?
|
|
||||||
.as_str();
|
|
||||||
let mod_version = file
|
|
||||||
.get("mod_version")
|
|
||||||
.ok_or_else(|| anyhow!("Missing mod_version key in file in API response"))?
|
|
||||||
.as_str();
|
|
||||||
let uploaded_timestamp = file
|
|
||||||
.get("uploaded_timestamp")
|
|
||||||
.ok_or_else(|| {
|
|
||||||
anyhow!("Missing uploaded_timestamp key in file in API response")
|
|
||||||
})?
|
|
||||||
.as_i64()
|
|
||||||
.ok_or_else(|| {
|
|
||||||
anyhow!("uploaded_timestamp value in API response file is not a number")
|
|
||||||
})?;
|
|
||||||
let uploaded_at = NaiveDateTime::from_timestamp(uploaded_timestamp, 0);
|
|
||||||
let db_file = insert_file(
|
let db_file = insert_file(
|
||||||
&pool,
|
&pool,
|
||||||
name,
|
api_file.name,
|
||||||
file_name,
|
api_file.file_name,
|
||||||
file_id as i32,
|
api_file.file_id as i32,
|
||||||
mod_obj.id,
|
db_mod.id,
|
||||||
category,
|
api_file.category,
|
||||||
version,
|
api_file.version,
|
||||||
mod_version,
|
api_file.mod_version,
|
||||||
uploaded_at,
|
api_file.uploaded_at,
|
||||||
)
|
)
|
||||||
.await?;
|
.await?;
|
||||||
let res = client
|
|
||||||
.get(format!(
|
|
||||||
"https://api.nexusmods.com/v1/games/{}/mods/{}/files/{}/download_link.json",
|
|
||||||
GAME_NAME, mod_obj.nexus_mod_id, file_id
|
|
||||||
))
|
|
||||||
.header("accept", "application/json")
|
|
||||||
.header("apikey", env::var("NEXUS_API_KEY")?)
|
|
||||||
.header("user-agent", USER_AGENT)
|
|
||||||
.send()
|
|
||||||
.await?
|
|
||||||
.error_for_status()?;
|
|
||||||
|
|
||||||
let duration = rate_limit_wait_duration(&res)?;
|
let download_link_resp =
|
||||||
|
nexus_api::download_link::get(&client, db_mod.nexus_mod_id, api_file.file_id)
|
||||||
|
.await?;
|
||||||
|
let mut tokio_file = download_link_resp.download_file(&client).await?;
|
||||||
|
|
||||||
let links = res.json::<Value>().await?;
|
initialize_plugins_archive(db_mod.nexus_mod_id, db_file.nexus_file_id)?;
|
||||||
let link = links
|
let mut plugins_archive = ZipWriter::new_append(
|
||||||
.get(0)
|
|
||||||
.ok_or_else(|| anyhow!("Links array in API response is missing first element"))?
|
|
||||||
.get("URI")
|
|
||||||
.ok_or_else(|| anyhow!("Missing URI key in link in API response"))?
|
|
||||||
.as_str()
|
|
||||||
.ok_or_else(|| anyhow!("URI value in API response link is not a string"))?;
|
|
||||||
|
|
||||||
let mut tokio_file = tokio::fs::File::from_std(tempfile()?);
|
|
||||||
let res = client
|
|
||||||
.get(link)
|
|
||||||
.header("apikey", env::var("NEXUS_API_KEY")?)
|
|
||||||
.header("user-agent", USER_AGENT)
|
|
||||||
.send()
|
|
||||||
.await?
|
|
||||||
.error_for_status()?;
|
|
||||||
|
|
||||||
// See: https://github.com/benkay86/async-applied/blob/master/reqwest-tokio-compat/src/main.rs
|
|
||||||
let mut byte_stream = res
|
|
||||||
.bytes_stream()
|
|
||||||
.map_err(|e| futures::io::Error::new(futures::io::ErrorKind::Other, e))
|
|
||||||
.into_async_read()
|
|
||||||
.compat();
|
|
||||||
|
|
||||||
tokio::io::copy(&mut byte_stream, &mut tokio_file).await?;
|
|
||||||
|
|
||||||
let mut plugin_archive = ZipWriter::new(
|
|
||||||
OpenOptions::new()
|
|
||||||
.write(true)
|
|
||||||
.create(true)
|
|
||||||
.open("plugins.zip")?,
|
|
||||||
);
|
|
||||||
plugin_archive.add_directory(
|
|
||||||
format!("{}/{}/{}", GAME_NAME, mod_obj.nexus_mod_id, file_id),
|
|
||||||
FileOptions::default(),
|
|
||||||
)?;
|
|
||||||
plugin_archive.finish()?;
|
|
||||||
|
|
||||||
let mut plugin_archive = ZipWriter::new_append(
|
|
||||||
OpenOptions::new()
|
OpenOptions::new()
|
||||||
.read(true)
|
.read(true)
|
||||||
.write(true)
|
.write(true)
|
||||||
.open("plugins.zip")?,
|
.open("plugins.zip")?,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
let mut initial_bytes = [0; 8];
|
let mut initial_bytes = [0; 8];
|
||||||
tokio_file.seek(SeekFrom::Start(0)).await?;
|
tokio_file.seek(SeekFrom::Start(0)).await?;
|
||||||
tokio_file.read_exact(&mut initial_bytes).await?;
|
tokio_file.read_exact(&mut initial_bytes).await?;
|
||||||
let kind = infer::get(&initial_bytes).expect("unknown file type of file download");
|
let kind = infer::get(&initial_bytes).expect("unknown file type of file download");
|
||||||
dbg!(kind.mime_type());
|
dbg!(kind.mime_type());
|
||||||
// "application/zip" => {
|
|
||||||
// let mut archive = ZipArchive::new(reader)?;
|
|
||||||
// let mut plugin_file_paths = Vec::new();
|
|
||||||
// for file_name in archive.file_names() {
|
|
||||||
// dbg!(file_name);
|
|
||||||
// if file_name.ends_with(".esp")
|
|
||||||
// || file_name.ends_with(".esm")
|
|
||||||
// || file_name.ends_with(".esl")
|
|
||||||
// {
|
|
||||||
// plugin_file_paths.push(file_name.to_string());
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// dbg!(&plugin_file_paths);
|
|
||||||
// for file_name in plugin_file_paths.iter() {
|
|
||||||
// let mut file = archive.by_name(file_name)?;
|
|
||||||
// let plugin = parse_plugin(file)?;
|
|
||||||
// dbg!(plugin);
|
|
||||||
// plugin_archive.start_file(
|
|
||||||
// format!("{}/{}/{}/{}", GAME_NAME, mod_id, file_id, file_name),
|
|
||||||
// FileOptions::default(),
|
|
||||||
// )?;
|
|
||||||
// std::io::copy(&mut file, &mut plugin_archive)?;
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|
||||||
// Use unrar to uncompress the entire .rar file to avoid a bug with compress_tools panicking when uncompressing
|
|
||||||
// certain .rar files: https://github.com/libarchive/libarchive/issues/373
|
|
||||||
tokio_file.seek(SeekFrom::Start(0)).await?;
|
tokio_file.seek(SeekFrom::Start(0)).await?;
|
||||||
let mut file = tokio_file.try_clone().await?.into_std().await;
|
let mut file = tokio_file.try_clone().await?.into_std().await;
|
||||||
let mut plugin_file_paths = Vec::new();
|
let mut plugin_file_paths = Vec::new();
|
||||||
@ -419,11 +204,9 @@ pub async fn main() -> Result<()> {
|
|||||||
process_plugin(
|
process_plugin(
|
||||||
&mut buf,
|
&mut buf,
|
||||||
&pool,
|
&pool,
|
||||||
&mut plugin_archive,
|
&mut plugins_archive,
|
||||||
name,
|
|
||||||
&db_file,
|
&db_file,
|
||||||
&mod_obj,
|
&db_mod,
|
||||||
file_id,
|
|
||||||
file_name,
|
file_name,
|
||||||
)
|
)
|
||||||
.await?;
|
.await?;
|
||||||
@ -433,6 +216,8 @@ pub async fn main() -> Result<()> {
|
|||||||
if kind.mime_type() == "application/x-rar-compressed"
|
if kind.mime_type() == "application/x-rar-compressed"
|
||||||
|| kind.mime_type() == "application/vnd.rar"
|
|| kind.mime_type() == "application/vnd.rar"
|
||||||
{
|
{
|
||||||
|
// Use unrar to uncompress the entire .rar file to avoid a bug with compress_tools panicking when uncompressing
|
||||||
|
// certain .rar files: https://github.com/libarchive/libarchive/issues/373
|
||||||
tokio_file.seek(SeekFrom::Start(0)).await?;
|
tokio_file.seek(SeekFrom::Start(0)).await?;
|
||||||
let mut file = tokio_file.try_clone().await?.into_std().await;
|
let mut file = tokio_file.try_clone().await?.into_std().await;
|
||||||
let temp_dir = tempdir()?;
|
let temp_dir = tempdir()?;
|
||||||
@ -474,11 +259,9 @@ pub async fn main() -> Result<()> {
|
|||||||
process_plugin(
|
process_plugin(
|
||||||
&mut plugin_buf,
|
&mut plugin_buf,
|
||||||
&pool,
|
&pool,
|
||||||
&mut plugin_archive,
|
&mut plugins_archive,
|
||||||
name,
|
|
||||||
&db_file,
|
&db_file,
|
||||||
&mod_obj,
|
&db_mod,
|
||||||
file_id,
|
|
||||||
file_name,
|
file_name,
|
||||||
)
|
)
|
||||||
.await?;
|
.await?;
|
||||||
@ -490,8 +273,8 @@ pub async fn main() -> Result<()> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
plugin_archive.finish()?;
|
plugins_archive.finish()?;
|
||||||
if let Some(duration) = duration {
|
if let Some(duration) = download_link_resp.wait {
|
||||||
sleep(duration).await;
|
sleep(duration).await;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -500,6 +283,7 @@ pub async fn main() -> Result<()> {
|
|||||||
page += 1;
|
page += 1;
|
||||||
dbg!(page);
|
dbg!(page);
|
||||||
dbg!(has_next_page);
|
dbg!(has_next_page);
|
||||||
|
sleep(Duration::new(1, 0)).await;
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
|
70
src/nexus_api/download_link.rs
Normal file
70
src/nexus_api/download_link.rs
Normal file
@ -0,0 +1,70 @@
|
|||||||
|
use anyhow::{anyhow, Result};
|
||||||
|
use futures::TryStreamExt;
|
||||||
|
use reqwest::Client;
|
||||||
|
use serde_json::Value;
|
||||||
|
use std::{env, time::Duration};
|
||||||
|
use tempfile::tempfile;
|
||||||
|
use tokio::fs::File;
|
||||||
|
use tokio_util::compat::FuturesAsyncReadCompatExt;
|
||||||
|
|
||||||
|
use super::{rate_limit_wait_duration, GAME_NAME, USER_AGENT};
|
||||||
|
|
||||||
|
pub struct DownloadLinkResponse {
|
||||||
|
pub wait: Option<Duration>,
|
||||||
|
json: Value,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn get(client: &Client, mod_id: i32, file_id: i64) -> Result<DownloadLinkResponse> {
|
||||||
|
let res = client
|
||||||
|
.get(format!(
|
||||||
|
"https://api.nexusmods.com/v1/games/{}/mods/{}/files/{}/download_link.json",
|
||||||
|
GAME_NAME, mod_id, file_id
|
||||||
|
))
|
||||||
|
.header("accept", "application/json")
|
||||||
|
.header("apikey", env::var("NEXUS_API_KEY")?)
|
||||||
|
.header("user-agent", USER_AGENT)
|
||||||
|
.send()
|
||||||
|
.await?
|
||||||
|
.error_for_status()?;
|
||||||
|
|
||||||
|
let wait = rate_limit_wait_duration(&res)?;
|
||||||
|
let json = res.json::<Value>().await?;
|
||||||
|
|
||||||
|
Ok(DownloadLinkResponse { wait, json })
|
||||||
|
}
|
||||||
|
|
||||||
|
impl DownloadLinkResponse {
|
||||||
|
pub fn link<'a>(&'a self) -> Result<&'a str> {
|
||||||
|
let link = self
|
||||||
|
.json
|
||||||
|
.get(0)
|
||||||
|
.ok_or_else(|| anyhow!("Links array in API response is missing first element"))?
|
||||||
|
.get("URI")
|
||||||
|
.ok_or_else(|| anyhow!("Missing URI key in link in API response"))?
|
||||||
|
.as_str()
|
||||||
|
.ok_or_else(|| anyhow!("URI value in API response link is not a string"))?;
|
||||||
|
Ok(link)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn download_file(&self, client: &Client) -> Result<File> {
|
||||||
|
let mut tokio_file = File::from_std(tempfile()?);
|
||||||
|
let res = client
|
||||||
|
.get(self.link()?)
|
||||||
|
.header("apikey", env::var("NEXUS_API_KEY")?)
|
||||||
|
.header("user-agent", USER_AGENT)
|
||||||
|
.send()
|
||||||
|
.await?
|
||||||
|
.error_for_status()?;
|
||||||
|
|
||||||
|
// See: https://github.com/benkay86/async-applied/blob/master/reqwest-tokio-compat/src/main.rs
|
||||||
|
let mut byte_stream = res
|
||||||
|
.bytes_stream()
|
||||||
|
.map_err(|e| futures::io::Error::new(futures::io::ErrorKind::Other, e))
|
||||||
|
.into_async_read()
|
||||||
|
.compat();
|
||||||
|
|
||||||
|
tokio::io::copy(&mut byte_stream, &mut tokio_file).await?;
|
||||||
|
|
||||||
|
return Ok(tokio_file);
|
||||||
|
}
|
||||||
|
}
|
107
src/nexus_api/files.rs
Normal file
107
src/nexus_api/files.rs
Normal file
@ -0,0 +1,107 @@
|
|||||||
|
use anyhow::{anyhow, Result};
|
||||||
|
use chrono::NaiveDateTime;
|
||||||
|
use reqwest::Client;
|
||||||
|
use serde_json::Value;
|
||||||
|
use std::{env, time::Duration};
|
||||||
|
|
||||||
|
use super::{rate_limit_wait_duration, GAME_NAME, USER_AGENT};
|
||||||
|
|
||||||
|
pub struct FilesResponse {
|
||||||
|
pub wait: Option<Duration>,
|
||||||
|
json: Value,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct ApiFile<'a> {
|
||||||
|
pub file_id: i64,
|
||||||
|
pub name: &'a str,
|
||||||
|
pub file_name: &'a str,
|
||||||
|
pub category: Option<&'a str>,
|
||||||
|
pub version: Option<&'a str>,
|
||||||
|
pub mod_version: Option<&'a str>,
|
||||||
|
pub uploaded_at: NaiveDateTime,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn get(client: &Client, nexus_mod_id: i32) -> Result<FilesResponse> {
|
||||||
|
let res = client
|
||||||
|
.get(format!(
|
||||||
|
"https://api.nexusmods.com/v1/games/{}/mods/{}/files.json",
|
||||||
|
GAME_NAME, nexus_mod_id
|
||||||
|
))
|
||||||
|
.header("accept", "application/json")
|
||||||
|
.header("apikey", env::var("NEXUS_API_KEY")?)
|
||||||
|
.header("user-agent", USER_AGENT)
|
||||||
|
.send()
|
||||||
|
.await?
|
||||||
|
.error_for_status()?;
|
||||||
|
|
||||||
|
let wait = rate_limit_wait_duration(&res)?;
|
||||||
|
let json = res.json::<Value>().await?;
|
||||||
|
|
||||||
|
Ok(FilesResponse { wait, json })
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FilesResponse {
|
||||||
|
pub fn files<'a>(&'a self) -> Result<Vec<ApiFile<'a>>> {
|
||||||
|
let files = self
|
||||||
|
.json
|
||||||
|
.get("files")
|
||||||
|
.ok_or_else(|| anyhow!("Missing files key in API response"))?
|
||||||
|
.as_array()
|
||||||
|
.ok_or_else(|| anyhow!("files value in API response is not an array"))?;
|
||||||
|
files
|
||||||
|
.into_iter()
|
||||||
|
.map(|file| {
|
||||||
|
let file_id = file
|
||||||
|
.get("file_id")
|
||||||
|
.ok_or_else(|| anyhow!("Missing file_id key in file in API response"))?
|
||||||
|
.as_i64()
|
||||||
|
.ok_or_else(|| anyhow!("file_id value in API response file is not a number"))?;
|
||||||
|
dbg!(file_id);
|
||||||
|
let name = file
|
||||||
|
.get("name")
|
||||||
|
.ok_or_else(|| anyhow!("Missing name key in file in API response"))?
|
||||||
|
.as_str()
|
||||||
|
.ok_or_else(|| anyhow!("name value in API response file is not a string"))?;
|
||||||
|
let file_name = file
|
||||||
|
.get("file_name")
|
||||||
|
.ok_or_else(|| anyhow!("Missing file_name key in file in API response"))?
|
||||||
|
.as_str()
|
||||||
|
.ok_or_else(|| {
|
||||||
|
anyhow!("file_name value in API response file is not a string")
|
||||||
|
})?;
|
||||||
|
let category = file
|
||||||
|
.get("category_name")
|
||||||
|
.ok_or_else(|| anyhow!("Missing category key in file in API response"))?
|
||||||
|
.as_str();
|
||||||
|
let version = file
|
||||||
|
.get("version")
|
||||||
|
.ok_or_else(|| anyhow!("Missing version key in file in API response"))?
|
||||||
|
.as_str();
|
||||||
|
let mod_version = file
|
||||||
|
.get("mod_version")
|
||||||
|
.ok_or_else(|| anyhow!("Missing mod_version key in file in API response"))?
|
||||||
|
.as_str();
|
||||||
|
let uploaded_timestamp = file
|
||||||
|
.get("uploaded_timestamp")
|
||||||
|
.ok_or_else(|| {
|
||||||
|
anyhow!("Missing uploaded_timestamp key in file in API response")
|
||||||
|
})?
|
||||||
|
.as_i64()
|
||||||
|
.ok_or_else(|| {
|
||||||
|
anyhow!("uploaded_timestamp value in API response file is not a number")
|
||||||
|
})?;
|
||||||
|
let uploaded_at = NaiveDateTime::from_timestamp(uploaded_timestamp, 0);
|
||||||
|
|
||||||
|
Ok(ApiFile {
|
||||||
|
file_id,
|
||||||
|
name,
|
||||||
|
file_name,
|
||||||
|
category,
|
||||||
|
version,
|
||||||
|
mod_version,
|
||||||
|
uploaded_at,
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
}
|
44
src/nexus_api/mod.rs
Normal file
44
src/nexus_api/mod.rs
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
use anyhow::Result;
|
||||||
|
use chrono::DateTime;
|
||||||
|
use chrono::Duration;
|
||||||
|
use chrono::Utc;
|
||||||
|
use reqwest::Response;
|
||||||
|
|
||||||
|
pub mod download_link;
|
||||||
|
pub mod files;
|
||||||
|
|
||||||
|
pub static GAME_NAME: &str = "skyrimspecialedition";
|
||||||
|
pub const GAME_ID: u32 = 1704;
|
||||||
|
pub static USER_AGENT: &str = "mod-mapper/0.1";
|
||||||
|
|
||||||
|
pub fn rate_limit_wait_duration(res: &Response) -> Result<Option<std::time::Duration>> {
|
||||||
|
let daily_remaining = res
|
||||||
|
.headers()
|
||||||
|
.get("x-rl-daily-remaining")
|
||||||
|
.expect("No daily remaining in response headers");
|
||||||
|
let hourly_remaining = res
|
||||||
|
.headers()
|
||||||
|
.get("x-rl-hourly-remaining")
|
||||||
|
.expect("No hourly limit in response headers");
|
||||||
|
let hourly_reset = res
|
||||||
|
.headers()
|
||||||
|
.get("x-rl-hourly-reset")
|
||||||
|
.expect("No hourly reset in response headers");
|
||||||
|
dbg!(daily_remaining);
|
||||||
|
dbg!(hourly_remaining);
|
||||||
|
|
||||||
|
if hourly_remaining == "0" {
|
||||||
|
let hourly_reset = hourly_reset.to_str()?.trim();
|
||||||
|
let hourly_reset: DateTime<Utc> =
|
||||||
|
(DateTime::parse_from_str(hourly_reset, "%Y-%m-%d %H:%M:%S %z")?
|
||||||
|
+ Duration::seconds(5))
|
||||||
|
.into();
|
||||||
|
dbg!(hourly_reset);
|
||||||
|
let duration = (hourly_reset - Utc::now()).to_std()?;
|
||||||
|
dbg!(duration);
|
||||||
|
|
||||||
|
return Ok(Some(duration));
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(None)
|
||||||
|
}
|
119
src/nexus_scraper.rs
Normal file
119
src/nexus_scraper.rs
Normal file
@ -0,0 +1,119 @@
|
|||||||
|
use anyhow::Result;
|
||||||
|
use reqwest::Client;
|
||||||
|
use scraper::{Html, Selector};
|
||||||
|
|
||||||
|
use crate::nexus_api::GAME_ID;
|
||||||
|
|
||||||
|
pub struct ModListResponse {
|
||||||
|
html: Html,
|
||||||
|
}
|
||||||
|
pub struct ScrapedMod<'a> {
|
||||||
|
pub nexus_mod_id: i32,
|
||||||
|
pub name: &'a str,
|
||||||
|
pub category: &'a str,
|
||||||
|
pub author: &'a str,
|
||||||
|
pub desc: Option<&'a str>,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct ModListScrape<'a> {
|
||||||
|
pub mods: Vec<ScrapedMod<'a>>,
|
||||||
|
pub has_next_page: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn get_mod_list_page(client: &Client, page: i32) -> Result<ModListResponse> {
|
||||||
|
let res = client
|
||||||
|
.get(format!(
|
||||||
|
"https://www.nexusmods.com/Core/Libs/Common/Widgets/ModList?RH_ModList=nav:true,home:false,type:0,user_id:0,game_id:{},advfilt:true,include_adult:true,page_size:80,show_game_filter:false,open:false,page:{},sort_by:OLD_u_downloads",
|
||||||
|
GAME_ID,
|
||||||
|
page
|
||||||
|
))
|
||||||
|
.send()
|
||||||
|
.await?
|
||||||
|
.error_for_status()?;
|
||||||
|
let text = res.text().await?;
|
||||||
|
let html = Html::parse_document(&text);
|
||||||
|
|
||||||
|
Ok(ModListResponse { html })
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ModListResponse {
|
||||||
|
pub fn scrape_mods<'a>(&'a self) -> Result<ModListScrape> {
|
||||||
|
let mod_select = Selector::parse("li.mod-tile").expect("failed to parse CSS selector");
|
||||||
|
let left_select =
|
||||||
|
Selector::parse("div.mod-tile-left").expect("failed to parse CSS selector");
|
||||||
|
let right_select =
|
||||||
|
Selector::parse("div.mod-tile-right").expect("failed to parse CSS selector");
|
||||||
|
let name_select = Selector::parse("p.tile-name a").expect("failed to parse CSS selector");
|
||||||
|
let category_select =
|
||||||
|
Selector::parse("div.category a").expect("failed to parse CSS selector");
|
||||||
|
let author_select = Selector::parse("div.author a").expect("failed to parse CSS selector");
|
||||||
|
let desc_select = Selector::parse("p.desc").expect("failed to parse CSS selector");
|
||||||
|
let next_page_select =
|
||||||
|
Selector::parse("div.pagination li.next").expect("failed to parse CSS selector");
|
||||||
|
|
||||||
|
let next_page_elem = self.html.select(&next_page_select).next();
|
||||||
|
|
||||||
|
let has_next_page = next_page_elem.is_some();
|
||||||
|
|
||||||
|
let mods: Vec<ScrapedMod> = self
|
||||||
|
.html
|
||||||
|
.select(&mod_select)
|
||||||
|
.map(|element| {
|
||||||
|
let left = element
|
||||||
|
.select(&left_select)
|
||||||
|
.next()
|
||||||
|
.expect("Missing left div for mod");
|
||||||
|
let right = element
|
||||||
|
.select(&right_select)
|
||||||
|
.next()
|
||||||
|
.expect("Missing right div for mod");
|
||||||
|
let nexus_mod_id = left
|
||||||
|
.value()
|
||||||
|
.attr("data-mod-id")
|
||||||
|
.expect("Missing mod id attribute")
|
||||||
|
.parse::<i32>()
|
||||||
|
.ok()
|
||||||
|
.expect("Failed to parse mod id");
|
||||||
|
let name_elem = right
|
||||||
|
.select(&name_select)
|
||||||
|
.next()
|
||||||
|
.expect("Missing name link for mod");
|
||||||
|
let name = name_elem.text().next().expect("Missing name text for mod");
|
||||||
|
let category_elem = right
|
||||||
|
.select(&category_select)
|
||||||
|
.next()
|
||||||
|
.expect("Missing category link for mod");
|
||||||
|
let category = category_elem
|
||||||
|
.text()
|
||||||
|
.next()
|
||||||
|
.expect("Missing category text for mod");
|
||||||
|
let author_elem = right
|
||||||
|
.select(&author_select)
|
||||||
|
.next()
|
||||||
|
.expect("Missing author link for mod");
|
||||||
|
let author = author_elem
|
||||||
|
.text()
|
||||||
|
.next()
|
||||||
|
.expect("Missing author text for mod");
|
||||||
|
let desc_elem = right
|
||||||
|
.select(&desc_select)
|
||||||
|
.next()
|
||||||
|
.expect("Missing desc elem for mod");
|
||||||
|
let desc = desc_elem.text().next();
|
||||||
|
|
||||||
|
ScrapedMod {
|
||||||
|
nexus_mod_id,
|
||||||
|
name,
|
||||||
|
category,
|
||||||
|
author,
|
||||||
|
desc,
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
dbg!(mods.len());
|
||||||
|
Ok(ModListScrape {
|
||||||
|
mods,
|
||||||
|
has_next_page,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user