From 22757bc47503210136f4bc794a76bda1c78fcea4 Mon Sep 17 00:00:00 2001 From: Tyler Hallada Date: Fri, 9 Jul 2021 00:37:08 -0400 Subject: [PATCH] Modularize scraping and api requests --- src/main.rs | 368 +++++++-------------------------- src/nexus_api/download_link.rs | 70 +++++++ src/nexus_api/files.rs | 107 ++++++++++ src/nexus_api/mod.rs | 44 ++++ src/nexus_scraper.rs | 119 +++++++++++ 5 files changed, 416 insertions(+), 292 deletions(-) create mode 100644 src/nexus_api/download_link.rs create mode 100644 src/nexus_api/files.rs create mode 100644 src/nexus_api/mod.rs create mode 100644 src/nexus_scraper.rs diff --git a/src/main.rs b/src/main.rs index eaa31d4..b60ef09 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,15 +1,6 @@ -use anyhow::{anyhow, Context, Result}; -use chrono::DateTime; -use chrono::Duration; -use chrono::NaiveDateTime; -use chrono::Utc; +use anyhow::Result; use compress_tools::{list_archive_files, uncompress_archive_file}; use dotenv::dotenv; -use futures::stream::TryStreamExt; -use reqwest::Response; -use scraper::{Html, Selector}; -use serde::{Deserialize, Serialize}; -use serde_json::Value; use skyrim_cell_dump::parse_plugin; use sqlx::postgres::PgPoolOptions; use std::convert::TryInto; @@ -17,14 +8,16 @@ use std::env; use std::fs::OpenOptions; use std::io::Seek; use std::io::SeekFrom; -use tempfile::{tempdir, tempfile}; +use std::time::Duration; +use tempfile::tempdir; use tokio::io::{AsyncReadExt, AsyncSeekExt}; use tokio::time::sleep; -use tokio_util::compat::FuturesAsyncReadCompatExt; use unrar::Archive; use zip::write::{FileOptions, ZipWriter}; mod models; +mod nexus_api; +mod nexus_scraper; use models::cell::insert_cell; use models::file::{insert_file, File}; @@ -32,51 +25,14 @@ use models::game::insert_game; use models::game_mod::{get_mod_by_nexus_mod_id, insert_mod, Mod}; use models::plugin::insert_plugin; use models::plugin_cell::insert_plugin_cell; - -static USER_AGENT: &str = "mod-mapper/0.1"; -static GAME_NAME: &str = "skyrimspecialedition"; -const GAME_ID: u32 = 1704; - -fn rate_limit_wait_duration(res: &Response) -> Result> { - let daily_remaining = res - .headers() - .get("x-rl-daily-remaining") - .expect("No daily remaining in response headers"); - let hourly_remaining = res - .headers() - .get("x-rl-hourly-remaining") - .expect("No hourly limit in response headers"); - let hourly_reset = res - .headers() - .get("x-rl-hourly-reset") - .expect("No hourly reset in response headers"); - dbg!(daily_remaining); - dbg!(hourly_remaining); - - if hourly_remaining == "0" { - let hourly_reset = hourly_reset.to_str()?.trim(); - let hourly_reset: DateTime = - (DateTime::parse_from_str(hourly_reset, "%Y-%m-%d %H:%M:%S %z")? - + Duration::seconds(5)) - .into(); - dbg!(hourly_reset); - let duration = (hourly_reset - Utc::now()).to_std()?; - dbg!(duration); - - return Ok(Some(duration)); - } - - Ok(None) -} +use nexus_api::{GAME_ID, GAME_NAME}; async fn process_plugin( plugin_buf: &mut [u8], pool: &sqlx::Pool, plugin_archive: &mut ZipWriter, - name: &str, db_file: &File, mod_obj: &Mod, - file_id: i64, file_name: &str, ) -> Result<()> where @@ -86,7 +42,7 @@ where let hash = seahash::hash(&plugin_buf); let plugin_row = insert_plugin( &pool, - name, + &db_file.name, hash as i64, db_file.id, Some(plugin.header.version as f64), @@ -116,7 +72,7 @@ where plugin_archive.start_file( format!( "{}/{}/{}/{}", - GAME_NAME, mod_obj.nexus_mod_id, file_id, file_name + GAME_NAME, mod_obj.nexus_mod_id, db_file.nexus_file_id, file_name ), FileOptions::default(), )?; @@ -126,6 +82,21 @@ where Ok(()) } +fn initialize_plugins_archive(mod_id: i32, file_id: i32) -> Result<()> { + let mut plugins_archive = ZipWriter::new( + OpenOptions::new() + .write(true) + .create(true) + .open("plugins.zip")?, + ); + plugins_archive.add_directory( + format!("{}/{}/{}", GAME_NAME, mod_id, file_id), + FileOptions::default(), + )?; + plugins_archive.finish()?; + Ok(()) +} + #[tokio::main] pub async fn main() -> Result<()> { dotenv().ok(); @@ -140,263 +111,77 @@ pub async fn main() -> Result<()> { let mut has_next_page = true; while has_next_page { - let res = client - .get(format!( - "https://www.nexusmods.com/Core/Libs/Common/Widgets/ModList?RH_ModList=nav:true,home:false,type:0,user_id:0,game_id:{},advfilt:true,include_adult:true,page_size:80,show_game_filter:false,open:false,page:{},sort_by:OLD_u_downloads", - GAME_ID, - page - )) - .send() - .await? - .error_for_status()?; - let html = res.text().await?; - let document = Html::parse_document(&html); - let mod_select = Selector::parse("li.mod-tile").expect("failed to parse CSS selector"); - let left_select = - Selector::parse("div.mod-tile-left").expect("failed to parse CSS selector"); - let right_select = - Selector::parse("div.mod-tile-right").expect("failed to parse CSS selector"); - let name_select = Selector::parse("p.tile-name a").expect("failed to parse CSS selector"); - let category_select = - Selector::parse("div.category a").expect("failed to parse CSS selector"); - let author_select = Selector::parse("div.author a").expect("failed to parse CSS selector"); - let desc_select = Selector::parse("p.desc").expect("failed to parse CSS selector"); - let next_page_select = - Selector::parse("div.pagination li.next").expect("failed to parse CSS selector"); + let mod_list_resp = nexus_scraper::get_mod_list_page(&client, page).await?; + let scraped = mod_list_resp.scrape_mods()?; - let next_page_elem = document.select(&next_page_select).next(); - - has_next_page = next_page_elem.is_some(); - - let mut mods = vec![]; - for element in document.select(&mod_select) { - let left = element - .select(&left_select) - .next() - .expect("Missing left div for mod"); - let right = element - .select(&right_select) - .next() - .expect("Missing right div for mod"); - let nexus_mod_id = left - .value() - .attr("data-mod-id") - .expect("Missing mod id attribute") - .parse::() - .ok() - .expect("Failed to parse mod id"); - let name_elem = right - .select(&name_select) - .next() - .expect("Missing name link for mod"); - let name = name_elem.text().next().expect("Missing name text for mod"); - let category_elem = right - .select(&category_select) - .next() - .expect("Missing category link for mod"); - let category = category_elem - .text() - .next() - .expect("Missing category text for mod"); - let author_elem = right - .select(&author_select) - .next() - .expect("Missing author link for mod"); - let author = author_elem - .text() - .next() - .expect("Missing author text for mod"); - let desc_elem = right - .select(&desc_select) - .next() - .expect("Missing desc elem for mod"); - let desc = desc_elem.text().next(); - - if let None = get_mod_by_nexus_mod_id(&pool, nexus_mod_id).await? { + has_next_page = scraped.has_next_page; + let mut mods = Vec::new(); + for scraped_mod in scraped.mods { + if let None = get_mod_by_nexus_mod_id(&pool, scraped_mod.nexus_mod_id).await? { mods.push( - insert_mod(&pool, name, nexus_mod_id, author, category, desc, game.id).await?, + insert_mod( + &pool, + scraped_mod.name, + scraped_mod.nexus_mod_id, + scraped_mod.author, + scraped_mod.category, + scraped_mod.desc, + game.id, + ) + .await?, ); } } dbg!(mods.len()); - for mod_obj in mods { - dbg!(&mod_obj.name); - let res = client - .get(format!( - "https://api.nexusmods.com/v1/games/{}/mods/{}/files.json", - GAME_NAME, mod_obj.nexus_mod_id - )) - .header("accept", "application/json") - .header("apikey", env::var("NEXUS_API_KEY")?) - .header("user-agent", USER_AGENT) - .send() - .await? - .error_for_status()?; - - if let Some(duration) = rate_limit_wait_duration(&res)? { + for db_mod in mods { + dbg!(&db_mod.name); + let files_resp = nexus_api::files::get(&client, db_mod.nexus_mod_id).await?; + // TODO: download other files than just MAIN files + // let files = files.into_iter().filter(|file| { + // if let Some(category_name) = file.get("category_name") { + // category_name.as_str() == Some("MAIN") + // } else { + // false + // } + // }); + if let Some(duration) = files_resp.wait { sleep(duration).await; } - let files = res.json::().await?; - let files = files - .get("files") - .ok_or_else(|| anyhow!("Missing files key in API response"))? - .as_array() - .ok_or_else(|| anyhow!("files value in API response is not an array"))?; - // TODO: download other files than just MAIN files - let files = files.into_iter().filter(|file| { - if let Some(category_name) = file.get("category_name") { - category_name.as_str() == Some("MAIN") - } else { - false - } - }); - - for file in files { - let file_id = file - .get("file_id") - .ok_or_else(|| anyhow!("Missing file_id key in file in API response"))? - .as_i64() - .ok_or_else(|| anyhow!("file_id value in API response file is not a number"))?; - dbg!(file_id); - let name = file - .get("name") - .ok_or_else(|| anyhow!("Missing name key in file in API response"))? - .as_str() - .ok_or_else(|| anyhow!("name value in API response file is not a string"))?; - let file_name = file - .get("file_name") - .ok_or_else(|| anyhow!("Missing file_name key in file in API response"))? - .as_str() - .ok_or_else(|| { - anyhow!("file_name value in API response file is not a string") - })?; - let category = file - .get("category_name") - .ok_or_else(|| anyhow!("Missing category key in file in API response"))? - .as_str(); - let version = file - .get("version") - .ok_or_else(|| anyhow!("Missing version key in file in API response"))? - .as_str(); - let mod_version = file - .get("mod_version") - .ok_or_else(|| anyhow!("Missing mod_version key in file in API response"))? - .as_str(); - let uploaded_timestamp = file - .get("uploaded_timestamp") - .ok_or_else(|| { - anyhow!("Missing uploaded_timestamp key in file in API response") - })? - .as_i64() - .ok_or_else(|| { - anyhow!("uploaded_timestamp value in API response file is not a number") - })?; - let uploaded_at = NaiveDateTime::from_timestamp(uploaded_timestamp, 0); + for api_file in files_resp.files()? { let db_file = insert_file( &pool, - name, - file_name, - file_id as i32, - mod_obj.id, - category, - version, - mod_version, - uploaded_at, + api_file.name, + api_file.file_name, + api_file.file_id as i32, + db_mod.id, + api_file.category, + api_file.version, + api_file.mod_version, + api_file.uploaded_at, ) .await?; - let res = client - .get(format!( - "https://api.nexusmods.com/v1/games/{}/mods/{}/files/{}/download_link.json", - GAME_NAME, mod_obj.nexus_mod_id, file_id - )) - .header("accept", "application/json") - .header("apikey", env::var("NEXUS_API_KEY")?) - .header("user-agent", USER_AGENT) - .send() - .await? - .error_for_status()?; - let duration = rate_limit_wait_duration(&res)?; + let download_link_resp = + nexus_api::download_link::get(&client, db_mod.nexus_mod_id, api_file.file_id) + .await?; + let mut tokio_file = download_link_resp.download_file(&client).await?; - let links = res.json::().await?; - let link = links - .get(0) - .ok_or_else(|| anyhow!("Links array in API response is missing first element"))? - .get("URI") - .ok_or_else(|| anyhow!("Missing URI key in link in API response"))? - .as_str() - .ok_or_else(|| anyhow!("URI value in API response link is not a string"))?; - - let mut tokio_file = tokio::fs::File::from_std(tempfile()?); - let res = client - .get(link) - .header("apikey", env::var("NEXUS_API_KEY")?) - .header("user-agent", USER_AGENT) - .send() - .await? - .error_for_status()?; - - // See: https://github.com/benkay86/async-applied/blob/master/reqwest-tokio-compat/src/main.rs - let mut byte_stream = res - .bytes_stream() - .map_err(|e| futures::io::Error::new(futures::io::ErrorKind::Other, e)) - .into_async_read() - .compat(); - - tokio::io::copy(&mut byte_stream, &mut tokio_file).await?; - - let mut plugin_archive = ZipWriter::new( - OpenOptions::new() - .write(true) - .create(true) - .open("plugins.zip")?, - ); - plugin_archive.add_directory( - format!("{}/{}/{}", GAME_NAME, mod_obj.nexus_mod_id, file_id), - FileOptions::default(), - )?; - plugin_archive.finish()?; - - let mut plugin_archive = ZipWriter::new_append( + initialize_plugins_archive(db_mod.nexus_mod_id, db_file.nexus_file_id)?; + let mut plugins_archive = ZipWriter::new_append( OpenOptions::new() .read(true) .write(true) .open("plugins.zip")?, )?; + let mut initial_bytes = [0; 8]; tokio_file.seek(SeekFrom::Start(0)).await?; tokio_file.read_exact(&mut initial_bytes).await?; let kind = infer::get(&initial_bytes).expect("unknown file type of file download"); dbg!(kind.mime_type()); - // "application/zip" => { - // let mut archive = ZipArchive::new(reader)?; - // let mut plugin_file_paths = Vec::new(); - // for file_name in archive.file_names() { - // dbg!(file_name); - // if file_name.ends_with(".esp") - // || file_name.ends_with(".esm") - // || file_name.ends_with(".esl") - // { - // plugin_file_paths.push(file_name.to_string()); - // } - // } - // dbg!(&plugin_file_paths); - // for file_name in plugin_file_paths.iter() { - // let mut file = archive.by_name(file_name)?; - // let plugin = parse_plugin(file)?; - // dbg!(plugin); - // plugin_archive.start_file( - // format!("{}/{}/{}/{}", GAME_NAME, mod_id, file_id, file_name), - // FileOptions::default(), - // )?; - // std::io::copy(&mut file, &mut plugin_archive)?; - // } - // } - // Use unrar to uncompress the entire .rar file to avoid a bug with compress_tools panicking when uncompressing - // certain .rar files: https://github.com/libarchive/libarchive/issues/373 tokio_file.seek(SeekFrom::Start(0)).await?; let mut file = tokio_file.try_clone().await?.into_std().await; let mut plugin_file_paths = Vec::new(); @@ -419,11 +204,9 @@ pub async fn main() -> Result<()> { process_plugin( &mut buf, &pool, - &mut plugin_archive, - name, + &mut plugins_archive, &db_file, - &mod_obj, - file_id, + &db_mod, file_name, ) .await?; @@ -433,6 +216,8 @@ pub async fn main() -> Result<()> { if kind.mime_type() == "application/x-rar-compressed" || kind.mime_type() == "application/vnd.rar" { + // Use unrar to uncompress the entire .rar file to avoid a bug with compress_tools panicking when uncompressing + // certain .rar files: https://github.com/libarchive/libarchive/issues/373 tokio_file.seek(SeekFrom::Start(0)).await?; let mut file = tokio_file.try_clone().await?.into_std().await; let temp_dir = tempdir()?; @@ -474,11 +259,9 @@ pub async fn main() -> Result<()> { process_plugin( &mut plugin_buf, &pool, - &mut plugin_archive, - name, + &mut plugins_archive, &db_file, - &mod_obj, - file_id, + &db_mod, file_name, ) .await?; @@ -490,8 +273,8 @@ pub async fn main() -> Result<()> { } } - plugin_archive.finish()?; - if let Some(duration) = duration { + plugins_archive.finish()?; + if let Some(duration) = download_link_resp.wait { sleep(duration).await; } } @@ -500,6 +283,7 @@ pub async fn main() -> Result<()> { page += 1; dbg!(page); dbg!(has_next_page); + sleep(Duration::new(1, 0)).await; } Ok(()) diff --git a/src/nexus_api/download_link.rs b/src/nexus_api/download_link.rs new file mode 100644 index 0000000..65c5547 --- /dev/null +++ b/src/nexus_api/download_link.rs @@ -0,0 +1,70 @@ +use anyhow::{anyhow, Result}; +use futures::TryStreamExt; +use reqwest::Client; +use serde_json::Value; +use std::{env, time::Duration}; +use tempfile::tempfile; +use tokio::fs::File; +use tokio_util::compat::FuturesAsyncReadCompatExt; + +use super::{rate_limit_wait_duration, GAME_NAME, USER_AGENT}; + +pub struct DownloadLinkResponse { + pub wait: Option, + json: Value, +} + +pub async fn get(client: &Client, mod_id: i32, file_id: i64) -> Result { + let res = client + .get(format!( + "https://api.nexusmods.com/v1/games/{}/mods/{}/files/{}/download_link.json", + GAME_NAME, mod_id, file_id + )) + .header("accept", "application/json") + .header("apikey", env::var("NEXUS_API_KEY")?) + .header("user-agent", USER_AGENT) + .send() + .await? + .error_for_status()?; + + let wait = rate_limit_wait_duration(&res)?; + let json = res.json::().await?; + + Ok(DownloadLinkResponse { wait, json }) +} + +impl DownloadLinkResponse { + pub fn link<'a>(&'a self) -> Result<&'a str> { + let link = self + .json + .get(0) + .ok_or_else(|| anyhow!("Links array in API response is missing first element"))? + .get("URI") + .ok_or_else(|| anyhow!("Missing URI key in link in API response"))? + .as_str() + .ok_or_else(|| anyhow!("URI value in API response link is not a string"))?; + Ok(link) + } + + pub async fn download_file(&self, client: &Client) -> Result { + let mut tokio_file = File::from_std(tempfile()?); + let res = client + .get(self.link()?) + .header("apikey", env::var("NEXUS_API_KEY")?) + .header("user-agent", USER_AGENT) + .send() + .await? + .error_for_status()?; + + // See: https://github.com/benkay86/async-applied/blob/master/reqwest-tokio-compat/src/main.rs + let mut byte_stream = res + .bytes_stream() + .map_err(|e| futures::io::Error::new(futures::io::ErrorKind::Other, e)) + .into_async_read() + .compat(); + + tokio::io::copy(&mut byte_stream, &mut tokio_file).await?; + + return Ok(tokio_file); + } +} diff --git a/src/nexus_api/files.rs b/src/nexus_api/files.rs new file mode 100644 index 0000000..3e4d734 --- /dev/null +++ b/src/nexus_api/files.rs @@ -0,0 +1,107 @@ +use anyhow::{anyhow, Result}; +use chrono::NaiveDateTime; +use reqwest::Client; +use serde_json::Value; +use std::{env, time::Duration}; + +use super::{rate_limit_wait_duration, GAME_NAME, USER_AGENT}; + +pub struct FilesResponse { + pub wait: Option, + json: Value, +} + +pub struct ApiFile<'a> { + pub file_id: i64, + pub name: &'a str, + pub file_name: &'a str, + pub category: Option<&'a str>, + pub version: Option<&'a str>, + pub mod_version: Option<&'a str>, + pub uploaded_at: NaiveDateTime, +} + +pub async fn get(client: &Client, nexus_mod_id: i32) -> Result { + let res = client + .get(format!( + "https://api.nexusmods.com/v1/games/{}/mods/{}/files.json", + GAME_NAME, nexus_mod_id + )) + .header("accept", "application/json") + .header("apikey", env::var("NEXUS_API_KEY")?) + .header("user-agent", USER_AGENT) + .send() + .await? + .error_for_status()?; + + let wait = rate_limit_wait_duration(&res)?; + let json = res.json::().await?; + + Ok(FilesResponse { wait, json }) +} + +impl FilesResponse { + pub fn files<'a>(&'a self) -> Result>> { + let files = self + .json + .get("files") + .ok_or_else(|| anyhow!("Missing files key in API response"))? + .as_array() + .ok_or_else(|| anyhow!("files value in API response is not an array"))?; + files + .into_iter() + .map(|file| { + let file_id = file + .get("file_id") + .ok_or_else(|| anyhow!("Missing file_id key in file in API response"))? + .as_i64() + .ok_or_else(|| anyhow!("file_id value in API response file is not a number"))?; + dbg!(file_id); + let name = file + .get("name") + .ok_or_else(|| anyhow!("Missing name key in file in API response"))? + .as_str() + .ok_or_else(|| anyhow!("name value in API response file is not a string"))?; + let file_name = file + .get("file_name") + .ok_or_else(|| anyhow!("Missing file_name key in file in API response"))? + .as_str() + .ok_or_else(|| { + anyhow!("file_name value in API response file is not a string") + })?; + let category = file + .get("category_name") + .ok_or_else(|| anyhow!("Missing category key in file in API response"))? + .as_str(); + let version = file + .get("version") + .ok_or_else(|| anyhow!("Missing version key in file in API response"))? + .as_str(); + let mod_version = file + .get("mod_version") + .ok_or_else(|| anyhow!("Missing mod_version key in file in API response"))? + .as_str(); + let uploaded_timestamp = file + .get("uploaded_timestamp") + .ok_or_else(|| { + anyhow!("Missing uploaded_timestamp key in file in API response") + })? + .as_i64() + .ok_or_else(|| { + anyhow!("uploaded_timestamp value in API response file is not a number") + })?; + let uploaded_at = NaiveDateTime::from_timestamp(uploaded_timestamp, 0); + + Ok(ApiFile { + file_id, + name, + file_name, + category, + version, + mod_version, + uploaded_at, + }) + }) + .collect() + } +} diff --git a/src/nexus_api/mod.rs b/src/nexus_api/mod.rs new file mode 100644 index 0000000..fc21dca --- /dev/null +++ b/src/nexus_api/mod.rs @@ -0,0 +1,44 @@ +use anyhow::Result; +use chrono::DateTime; +use chrono::Duration; +use chrono::Utc; +use reqwest::Response; + +pub mod download_link; +pub mod files; + +pub static GAME_NAME: &str = "skyrimspecialedition"; +pub const GAME_ID: u32 = 1704; +pub static USER_AGENT: &str = "mod-mapper/0.1"; + +pub fn rate_limit_wait_duration(res: &Response) -> Result> { + let daily_remaining = res + .headers() + .get("x-rl-daily-remaining") + .expect("No daily remaining in response headers"); + let hourly_remaining = res + .headers() + .get("x-rl-hourly-remaining") + .expect("No hourly limit in response headers"); + let hourly_reset = res + .headers() + .get("x-rl-hourly-reset") + .expect("No hourly reset in response headers"); + dbg!(daily_remaining); + dbg!(hourly_remaining); + + if hourly_remaining == "0" { + let hourly_reset = hourly_reset.to_str()?.trim(); + let hourly_reset: DateTime = + (DateTime::parse_from_str(hourly_reset, "%Y-%m-%d %H:%M:%S %z")? + + Duration::seconds(5)) + .into(); + dbg!(hourly_reset); + let duration = (hourly_reset - Utc::now()).to_std()?; + dbg!(duration); + + return Ok(Some(duration)); + } + + Ok(None) +} diff --git a/src/nexus_scraper.rs b/src/nexus_scraper.rs new file mode 100644 index 0000000..c16dd60 --- /dev/null +++ b/src/nexus_scraper.rs @@ -0,0 +1,119 @@ +use anyhow::Result; +use reqwest::Client; +use scraper::{Html, Selector}; + +use crate::nexus_api::GAME_ID; + +pub struct ModListResponse { + html: Html, +} +pub struct ScrapedMod<'a> { + pub nexus_mod_id: i32, + pub name: &'a str, + pub category: &'a str, + pub author: &'a str, + pub desc: Option<&'a str>, +} + +pub struct ModListScrape<'a> { + pub mods: Vec>, + pub has_next_page: bool, +} + +pub async fn get_mod_list_page(client: &Client, page: i32) -> Result { + let res = client + .get(format!( + "https://www.nexusmods.com/Core/Libs/Common/Widgets/ModList?RH_ModList=nav:true,home:false,type:0,user_id:0,game_id:{},advfilt:true,include_adult:true,page_size:80,show_game_filter:false,open:false,page:{},sort_by:OLD_u_downloads", + GAME_ID, + page + )) + .send() + .await? + .error_for_status()?; + let text = res.text().await?; + let html = Html::parse_document(&text); + + Ok(ModListResponse { html }) +} + +impl ModListResponse { + pub fn scrape_mods<'a>(&'a self) -> Result { + let mod_select = Selector::parse("li.mod-tile").expect("failed to parse CSS selector"); + let left_select = + Selector::parse("div.mod-tile-left").expect("failed to parse CSS selector"); + let right_select = + Selector::parse("div.mod-tile-right").expect("failed to parse CSS selector"); + let name_select = Selector::parse("p.tile-name a").expect("failed to parse CSS selector"); + let category_select = + Selector::parse("div.category a").expect("failed to parse CSS selector"); + let author_select = Selector::parse("div.author a").expect("failed to parse CSS selector"); + let desc_select = Selector::parse("p.desc").expect("failed to parse CSS selector"); + let next_page_select = + Selector::parse("div.pagination li.next").expect("failed to parse CSS selector"); + + let next_page_elem = self.html.select(&next_page_select).next(); + + let has_next_page = next_page_elem.is_some(); + + let mods: Vec = self + .html + .select(&mod_select) + .map(|element| { + let left = element + .select(&left_select) + .next() + .expect("Missing left div for mod"); + let right = element + .select(&right_select) + .next() + .expect("Missing right div for mod"); + let nexus_mod_id = left + .value() + .attr("data-mod-id") + .expect("Missing mod id attribute") + .parse::() + .ok() + .expect("Failed to parse mod id"); + let name_elem = right + .select(&name_select) + .next() + .expect("Missing name link for mod"); + let name = name_elem.text().next().expect("Missing name text for mod"); + let category_elem = right + .select(&category_select) + .next() + .expect("Missing category link for mod"); + let category = category_elem + .text() + .next() + .expect("Missing category text for mod"); + let author_elem = right + .select(&author_select) + .next() + .expect("Missing author link for mod"); + let author = author_elem + .text() + .next() + .expect("Missing author text for mod"); + let desc_elem = right + .select(&desc_select) + .next() + .expect("Missing desc elem for mod"); + let desc = desc_elem.text().next(); + + ScrapedMod { + nexus_mod_id, + name, + category, + author, + desc, + } + }) + .collect(); + dbg!(mods.len()); + Ok(ModListScrape { + mods, + has_next_page, + }) + } +}