Revert "Login before scraping mods to include adult mods"

This reverts commit bb02771b87.
This commit is contained in:
Tyler Hallada 2022-10-02 02:37:15 -04:00
parent bb02771b87
commit 03cf20029e
3 changed files with 21 additions and 160 deletions

View File

@ -45,10 +45,6 @@ NEXUS_API_KEY=...
7. Build the release binary by running `cargo build --release`. 7. Build the release binary by running `cargo build --release`.
8. See `./target/release/modmapper -h` for further commands or run `./scripts/update.sh` to start populating the database with scraped mods and dumping the data to JSON files. 8. See `./target/release/modmapper -h` for further commands or run `./scripts/update.sh` to start populating the database with scraped mods and dumping the data to JSON files.
## Nexus Mods user credentials
Nexus Mods filters out adult-only mods unless you are logged in and have set your content filters to allow adult mods. Modmapper works without Nexus Mods user credentials, but if you would like to add your user credentials so that adult mods are included then edit the `.env` file and add values for `NEXUS_MODS_USERNAME` and `NEXUS_MODS_PASSWORD`.
## Sync and Backup Setup ## Sync and Backup Setup
`scripts/sync.sh` and `scripts/backup.sh` both utilize [`rclone`](https://rclone.org) to transfer files that are generated on the machine running modmapper to separate servers for file storage. `scripts/sync.sh` and `scripts/backup.sh` both utilize [`rclone`](https://rclone.org) to transfer files that are generated on the machine running modmapper to separate servers for file storage.

View File

@ -1,9 +1,8 @@
use anyhow::Result; use anyhow::Result;
use chrono::{NaiveDateTime, NaiveTime}; use chrono::{NaiveDateTime, NaiveTime};
use humansize::{file_size_opts, FileSize}; use humansize::{file_size_opts, FileSize};
use reqwest::{header, StatusCode}; use reqwest::StatusCode;
use std::collections::HashSet; use std::collections::HashSet;
use std::env;
use std::io::SeekFrom; use std::io::SeekFrom;
use std::time::Duration; use std::time::Duration;
use tokio::io::{AsyncReadExt, AsyncSeekExt}; use tokio::io::{AsyncReadExt, AsyncSeekExt};
@ -14,7 +13,7 @@ use crate::extractors::{self, extract_with_7zip, extract_with_compress_tools, ex
use crate::models::file; use crate::models::file;
use crate::models::game; use crate::models::game;
use crate::models::{game_mod, game_mod::UnsavedMod}; use crate::models::{game_mod, game_mod::UnsavedMod};
use crate::nexus_api::{self, get_game_id, USER_AGENT}; use crate::nexus_api::{self, get_game_id};
use crate::nexus_scraper; use crate::nexus_scraper;
const REQUEST_TIMEOUT: Duration = Duration::from_secs(7200); // 2 hours const REQUEST_TIMEOUT: Duration = Duration::from_secs(7200); // 2 hours
@ -31,41 +30,10 @@ pub async fn update(
let mut has_next_page = true; let mut has_next_page = true;
let mut pages_with_no_updates = 0; let mut pages_with_no_updates = 0;
let unauthed_client = reqwest::Client::builder() let client = reqwest::Client::builder()
.user_agent(USER_AGENT)
.timeout(REQUEST_TIMEOUT) .timeout(REQUEST_TIMEOUT)
.connect_timeout(CONNECT_TIMEOUT) .connect_timeout(CONNECT_TIMEOUT)
.build()?; .build()?;
// An authed client is only needed to fetch adult mods, so default to an unauthed client
let mut scraping_client = unauthed_client.clone();
let username = env::var("NEXUS_MODS_USERNAME");
let password = env::var("NEXUS_MODS_PASSWORD");
if username.is_ok() && password.is_ok() {
let login_form = nexus_scraper::get_login_form(&scraping_client).await?;
let authenticity_token = login_form.scrape_authenticity_token()?;
let cookie = login_form.cookie;
let cookie = nexus_scraper::login(
&scraping_client,
&authenticity_token,
&cookie,
&username.unwrap(),
&password.unwrap(),
)
.await?;
let mut cookie = header::HeaderValue::from_str(&cookie)?;
cookie.set_sensitive(true);
let mut headers = header::HeaderMap::new();
headers.insert("Cookie", cookie);
let authed_client = reqwest::Client::builder()
.user_agent(USER_AGENT)
.timeout(REQUEST_TIMEOUT)
.connect_timeout(CONNECT_TIMEOUT)
.default_headers(headers)
.build()?;
scraping_client = authed_client;
}
let game_id = get_game_id(game_name).expect("valid game name"); let game_id = get_game_id(game_name).expect("valid game name");
let game = game::insert(&pool, game_name, game_id).await?; let game = game::insert(&pool, game_name, game_id).await?;
@ -78,13 +46,8 @@ pub async fn update(
let page_span = info_span!("page", page, game_name, include_translations); let page_span = info_span!("page", page, game_name, include_translations);
let _page_span = page_span.enter(); let _page_span = page_span.enter();
let mod_list_resp = nexus_scraper::get_mod_list_page( let mod_list_resp =
&scraping_client, nexus_scraper::get_mod_list_page(&client, page, game.nexus_game_id, include_translations).await?;
page,
game.nexus_game_id,
include_translations,
)
.await?;
let scraped = mod_list_resp.scrape_mods()?; let scraped = mod_list_resp.scrape_mods()?;
has_next_page = scraped.has_next_page; has_next_page = scraped.has_next_page;
@ -149,8 +112,7 @@ pub async fn update(
for db_mod in mods { for db_mod in mods {
let mod_span = info_span!("mod", name = ?&db_mod.name, id = &db_mod.nexus_mod_id); let mod_span = info_span!("mod", name = ?&db_mod.name, id = &db_mod.nexus_mod_id);
let _mod_span = mod_span.enter(); let _mod_span = mod_span.enter();
let files_resp = let files_resp = nexus_api::files::get(&client, game_name, db_mod.nexus_mod_id).await?;
nexus_api::files::get(&unauthed_client, game_name, db_mod.nexus_mod_id).await?;
debug!(duration = ?files_resp.wait, "sleeping"); debug!(duration = ?files_resp.wait, "sleeping");
sleep(files_resp.wait).await; sleep(files_resp.wait).await;
@ -204,7 +166,7 @@ pub async fn update(
.await?; .await?;
let mut checked_metadata = false; let mut checked_metadata = false;
match nexus_api::metadata::contains_plugin(&unauthed_client, &api_file).await { match nexus_api::metadata::contains_plugin(&client, &api_file).await {
Ok(contains_plugin) => { Ok(contains_plugin) => {
if let Some(contains_plugin) = contains_plugin { if let Some(contains_plugin) = contains_plugin {
checked_metadata = true; checked_metadata = true;
@ -230,7 +192,7 @@ pub async fn update(
.expect("unable to create human-readable file size"); .expect("unable to create human-readable file size");
info!(size = %humanized_size, "decided to download file"); info!(size = %humanized_size, "decided to download file");
let download_link_resp = nexus_api::download_link::get( let download_link_resp = nexus_api::download_link::get(
&unauthed_client, &client,
game_name, game_name,
db_mod.nexus_mod_id, db_mod.nexus_mod_id,
api_file.file_id, api_file.file_id,
@ -250,10 +212,7 @@ pub async fn update(
} }
let download_link_resp = download_link_resp?; let download_link_resp = download_link_resp?;
let mut tokio_file = match download_link_resp let mut tokio_file = match download_link_resp.download_file(&client).await {
.download_file(&unauthed_client)
.await
{
Ok(file) => { Ok(file) => {
info!(bytes = api_file.size, "download finished"); info!(bytes = api_file.size, "download finished");
file::update_downloaded_at(&pool, db_file.id).await?; file::update_downloaded_at(&pool, db_file.id).await?;
@ -307,15 +266,7 @@ pub async fn update(
// unrar failed to extract rar file (e.g. archive has unicode filenames) // unrar failed to extract rar file (e.g. archive has unicode filenames)
// Attempt to uncompress the archive using `7z` unix command instead // Attempt to uncompress the archive using `7z` unix command instead
warn!(error = %err, "failed to extract file with unrar, extracting whole archive with 7z instead"); warn!(error = %err, "failed to extract file with unrar, extracting whole archive with 7z instead");
extract_with_7zip( extract_with_7zip(&mut file, &pool, &db_file, &db_mod, game_name, checked_metadata).await
&mut file,
&pool,
&db_file,
&db_mod,
game_name,
checked_metadata,
)
.await
} }
}?; }?;
} }
@ -323,9 +274,7 @@ pub async fn update(
tokio_file.seek(SeekFrom::Start(0)).await?; tokio_file.seek(SeekFrom::Start(0)).await?;
let mut file = tokio_file.try_clone().await?.into_std().await; let mut file = tokio_file.try_clone().await?.into_std().await;
match extract_with_compress_tools( match extract_with_compress_tools(&mut file, &pool, &db_file, &db_mod, game_name)
&mut file, &pool, &db_file, &db_mod, game_name,
)
.await .await
{ {
Ok(_) => Ok(()), Ok(_) => Ok(()),
@ -340,24 +289,11 @@ pub async fn update(
// compress_tools or libarchive failed to extract zip/7z file (e.g. archive is deflate64 compressed) // compress_tools or libarchive failed to extract zip/7z file (e.g. archive is deflate64 compressed)
// Attempt to uncompress the archive using `7z` unix command instead // Attempt to uncompress the archive using `7z` unix command instead
warn!(error = %err, "failed to extract file with compress_tools, extracting whole archive with 7z instead"); warn!(error = %err, "failed to extract file with compress_tools, extracting whole archive with 7z instead");
extract_with_7zip( extract_with_7zip(&mut file, &pool, &db_file, &db_mod, game_name, checked_metadata).await
&mut file, } else if kind.mime_type() == "application/vnd.microsoft.portable-executable" {
&pool,
&db_file,
&db_mod,
game_name,
checked_metadata,
)
.await
} else if kind.mime_type()
== "application/vnd.microsoft.portable-executable"
{
// we tried to extract this .exe file, but it's not an archive so there's nothing we can do // we tried to extract this .exe file, but it's not an archive so there's nothing we can do
warn!("archive is an .exe file that cannot be extracted, skipping file"); warn!("archive is an .exe file that cannot be extracted, skipping file");
file::update_unable_to_extract_plugins( file::update_unable_to_extract_plugins(&pool, db_file.id, true).await?;
&pool, db_file.id, true,
)
.await?;
continue; continue;
} else { } else {
Err(err) Err(err)

View File

@ -4,11 +4,6 @@ use reqwest::Client;
use scraper::{Html, Selector}; use scraper::{Html, Selector};
use tracing::{info, instrument}; use tracing::{info, instrument};
pub struct LoginResponse {
pub html: Html,
pub cookie: String,
}
pub struct ModListResponse { pub struct ModListResponse {
html: Html, html: Html,
} }
@ -32,57 +27,6 @@ pub struct ModListScrape<'a> {
pub has_next_page: bool, pub has_next_page: bool,
} }
#[instrument(skip(client))]
pub async fn get_login_form(client: &Client) -> Result<LoginResponse> {
let res = client
.get("https://users.nexusmods.com/auth/sign_in")
.send()
.await?
.error_for_status()?;
info!(status = %res.status(), "fetched login form");
let cookie = res
.headers()
.get("set-cookie")
.expect("Missing set-cookie header on login GET response")
.to_str()
.expect("set-cookie header on login GET response contained invalid ASCII characters")
.to_string();
let text = res.text().await?;
let html = Html::parse_document(&text);
Ok(LoginResponse { html, cookie })
}
#[instrument(skip(client, cookie, password))]
pub async fn login(
client: &Client,
authenticity_token: &str,
cookie: &str,
username: &str,
password: &str,
) -> Result<String> {
let res = client
.post("https://users.nexusmods.com/auth/sign_in")
.header("Cookie", cookie)
.form(&[
("authenticity_token", authenticity_token),
("user[login]", username),
("user[password]", password),
("commit", "Log+in"),
])
.send()
.await?
.error_for_status()?;
info!(status = %res.status(), "submitted login form");
Ok(res
.headers()
.get("set-cookie")
.expect("Missing set-cookie header on login POST response")
.to_str()
.expect("set-cookie header on login POST response contained invalid ASCII characters")
.to_string())
}
#[instrument(skip(client))] #[instrument(skip(client))]
pub async fn get_mod_list_page( pub async fn get_mod_list_page(
client: &Client, client: &Client,
@ -107,24 +51,6 @@ pub async fn get_mod_list_page(
Ok(ModListResponse { html }) Ok(ModListResponse { html })
} }
impl LoginResponse {
#[instrument(skip(self))]
pub fn scrape_authenticity_token(&self) -> Result<String> {
let token_select = Selector::parse(r#"input[name="authenticity_token"]"#)
.expect("failed to parse CSS selector");
let token_elem = self
.html
.select(&token_select)
.next()
.expect("Missing authenticity_token input");
let token = token_elem
.value()
.attr("value")
.expect("Missing value attribute on authenticity_token input");
Ok(token.to_string())
}
}
impl ModListResponse { impl ModListResponse {
#[instrument(skip(self))] #[instrument(skip(self))]
pub fn scrape_mods<'a>(&'a self) -> Result<ModListScrape> { pub fn scrape_mods<'a>(&'a self) -> Result<ModListScrape> {
@ -144,8 +70,8 @@ impl ModListResponse {
Selector::parse("time.date").expect("failed to parse CSS selector"); Selector::parse("time.date").expect("failed to parse CSS selector");
let last_update_date_select = let last_update_date_select =
Selector::parse("div.date").expect("failed to parse CSS selector"); Selector::parse("div.date").expect("failed to parse CSS selector");
let next_page_select = Selector::parse("div.pagination li:last-child a.page-selected") let next_page_select =
.expect("failed to parse CSS selector"); Selector::parse("div.pagination li:last-child a.page-selected").expect("failed to parse CSS selector");
let next_page_elem = self.html.select(&next_page_select).next(); let next_page_elem = self.html.select(&next_page_select).next();
@ -201,7 +127,10 @@ impl ModListResponse {
.expect("Missing author id for mod") .expect("Missing author id for mod")
.parse::<i32>() .parse::<i32>()
.expect("Failed to parse author id"); .expect("Failed to parse author id");
let author_name = author_elem.text().next().unwrap_or("Unknown"); let author_name = author_elem
.text()
.next()
.unwrap_or("Unknown");
let desc_elem = right let desc_elem = right
.select(&desc_select) .select(&desc_select)
.next() .next()