Revert "Login before scraping mods to include adult mods"

This reverts commit bb02771b87.
This commit is contained in:
Tyler Hallada 2022-10-02 02:37:15 -04:00
parent bb02771b87
commit 03cf20029e
3 changed files with 21 additions and 160 deletions

View File

@ -45,10 +45,6 @@ NEXUS_API_KEY=...
7. Build the release binary by running `cargo build --release`.
8. See `./target/release/modmapper -h` for further commands or run `./scripts/update.sh` to start populating the database with scraped mods and dumping the data to JSON files.
## Nexus Mods user credentials
Nexus Mods filters out adult-only mods unless you are logged in and have set your content filters to allow adult mods. Modmapper works without Nexus Mods user credentials, but if you would like to add your user credentials so that adult mods are included then edit the `.env` file and add values for `NEXUS_MODS_USERNAME` and `NEXUS_MODS_PASSWORD`.
## Sync and Backup Setup
`scripts/sync.sh` and `scripts/backup.sh` both utilize [`rclone`](https://rclone.org) to transfer files that are generated on the machine running modmapper to separate servers for file storage.

View File

@ -1,9 +1,8 @@
use anyhow::Result;
use chrono::{NaiveDateTime, NaiveTime};
use humansize::{file_size_opts, FileSize};
use reqwest::{header, StatusCode};
use reqwest::StatusCode;
use std::collections::HashSet;
use std::env;
use std::io::SeekFrom;
use std::time::Duration;
use tokio::io::{AsyncReadExt, AsyncSeekExt};
@ -14,7 +13,7 @@ use crate::extractors::{self, extract_with_7zip, extract_with_compress_tools, ex
use crate::models::file;
use crate::models::game;
use crate::models::{game_mod, game_mod::UnsavedMod};
use crate::nexus_api::{self, get_game_id, USER_AGENT};
use crate::nexus_api::{self, get_game_id};
use crate::nexus_scraper;
const REQUEST_TIMEOUT: Duration = Duration::from_secs(7200); // 2 hours
@ -31,41 +30,10 @@ pub async fn update(
let mut has_next_page = true;
let mut pages_with_no_updates = 0;
let unauthed_client = reqwest::Client::builder()
.user_agent(USER_AGENT)
let client = reqwest::Client::builder()
.timeout(REQUEST_TIMEOUT)
.connect_timeout(CONNECT_TIMEOUT)
.build()?;
// An authed client is only needed to fetch adult mods, so default to an unauthed client
let mut scraping_client = unauthed_client.clone();
let username = env::var("NEXUS_MODS_USERNAME");
let password = env::var("NEXUS_MODS_PASSWORD");
if username.is_ok() && password.is_ok() {
let login_form = nexus_scraper::get_login_form(&scraping_client).await?;
let authenticity_token = login_form.scrape_authenticity_token()?;
let cookie = login_form.cookie;
let cookie = nexus_scraper::login(
&scraping_client,
&authenticity_token,
&cookie,
&username.unwrap(),
&password.unwrap(),
)
.await?;
let mut cookie = header::HeaderValue::from_str(&cookie)?;
cookie.set_sensitive(true);
let mut headers = header::HeaderMap::new();
headers.insert("Cookie", cookie);
let authed_client = reqwest::Client::builder()
.user_agent(USER_AGENT)
.timeout(REQUEST_TIMEOUT)
.connect_timeout(CONNECT_TIMEOUT)
.default_headers(headers)
.build()?;
scraping_client = authed_client;
}
let game_id = get_game_id(game_name).expect("valid game name");
let game = game::insert(&pool, game_name, game_id).await?;
@ -78,13 +46,8 @@ pub async fn update(
let page_span = info_span!("page", page, game_name, include_translations);
let _page_span = page_span.enter();
let mod_list_resp = nexus_scraper::get_mod_list_page(
&scraping_client,
page,
game.nexus_game_id,
include_translations,
)
.await?;
let mod_list_resp =
nexus_scraper::get_mod_list_page(&client, page, game.nexus_game_id, include_translations).await?;
let scraped = mod_list_resp.scrape_mods()?;
has_next_page = scraped.has_next_page;
@ -149,8 +112,7 @@ pub async fn update(
for db_mod in mods {
let mod_span = info_span!("mod", name = ?&db_mod.name, id = &db_mod.nexus_mod_id);
let _mod_span = mod_span.enter();
let files_resp =
nexus_api::files::get(&unauthed_client, game_name, db_mod.nexus_mod_id).await?;
let files_resp = nexus_api::files::get(&client, game_name, db_mod.nexus_mod_id).await?;
debug!(duration = ?files_resp.wait, "sleeping");
sleep(files_resp.wait).await;
@ -204,7 +166,7 @@ pub async fn update(
.await?;
let mut checked_metadata = false;
match nexus_api::metadata::contains_plugin(&unauthed_client, &api_file).await {
match nexus_api::metadata::contains_plugin(&client, &api_file).await {
Ok(contains_plugin) => {
if let Some(contains_plugin) = contains_plugin {
checked_metadata = true;
@ -230,7 +192,7 @@ pub async fn update(
.expect("unable to create human-readable file size");
info!(size = %humanized_size, "decided to download file");
let download_link_resp = nexus_api::download_link::get(
&unauthed_client,
&client,
game_name,
db_mod.nexus_mod_id,
api_file.file_id,
@ -250,10 +212,7 @@ pub async fn update(
}
let download_link_resp = download_link_resp?;
let mut tokio_file = match download_link_resp
.download_file(&unauthed_client)
.await
{
let mut tokio_file = match download_link_resp.download_file(&client).await {
Ok(file) => {
info!(bytes = api_file.size, "download finished");
file::update_downloaded_at(&pool, db_file.id).await?;
@ -307,15 +266,7 @@ pub async fn update(
// unrar failed to extract rar file (e.g. archive has unicode filenames)
// Attempt to uncompress the archive using `7z` unix command instead
warn!(error = %err, "failed to extract file with unrar, extracting whole archive with 7z instead");
extract_with_7zip(
&mut file,
&pool,
&db_file,
&db_mod,
game_name,
checked_metadata,
)
.await
extract_with_7zip(&mut file, &pool, &db_file, &db_mod, game_name, checked_metadata).await
}
}?;
}
@ -323,9 +274,7 @@ pub async fn update(
tokio_file.seek(SeekFrom::Start(0)).await?;
let mut file = tokio_file.try_clone().await?.into_std().await;
match extract_with_compress_tools(
&mut file, &pool, &db_file, &db_mod, game_name,
)
match extract_with_compress_tools(&mut file, &pool, &db_file, &db_mod, game_name)
.await
{
Ok(_) => Ok(()),
@ -340,24 +289,11 @@ pub async fn update(
// compress_tools or libarchive failed to extract zip/7z file (e.g. archive is deflate64 compressed)
// Attempt to uncompress the archive using `7z` unix command instead
warn!(error = %err, "failed to extract file with compress_tools, extracting whole archive with 7z instead");
extract_with_7zip(
&mut file,
&pool,
&db_file,
&db_mod,
game_name,
checked_metadata,
)
.await
} else if kind.mime_type()
== "application/vnd.microsoft.portable-executable"
{
extract_with_7zip(&mut file, &pool, &db_file, &db_mod, game_name, checked_metadata).await
} else if kind.mime_type() == "application/vnd.microsoft.portable-executable" {
// we tried to extract this .exe file, but it's not an archive so there's nothing we can do
warn!("archive is an .exe file that cannot be extracted, skipping file");
file::update_unable_to_extract_plugins(
&pool, db_file.id, true,
)
.await?;
file::update_unable_to_extract_plugins(&pool, db_file.id, true).await?;
continue;
} else {
Err(err)

View File

@ -4,11 +4,6 @@ use reqwest::Client;
use scraper::{Html, Selector};
use tracing::{info, instrument};
pub struct LoginResponse {
pub html: Html,
pub cookie: String,
}
pub struct ModListResponse {
html: Html,
}
@ -32,57 +27,6 @@ pub struct ModListScrape<'a> {
pub has_next_page: bool,
}
#[instrument(skip(client))]
pub async fn get_login_form(client: &Client) -> Result<LoginResponse> {
let res = client
.get("https://users.nexusmods.com/auth/sign_in")
.send()
.await?
.error_for_status()?;
info!(status = %res.status(), "fetched login form");
let cookie = res
.headers()
.get("set-cookie")
.expect("Missing set-cookie header on login GET response")
.to_str()
.expect("set-cookie header on login GET response contained invalid ASCII characters")
.to_string();
let text = res.text().await?;
let html = Html::parse_document(&text);
Ok(LoginResponse { html, cookie })
}
#[instrument(skip(client, cookie, password))]
pub async fn login(
client: &Client,
authenticity_token: &str,
cookie: &str,
username: &str,
password: &str,
) -> Result<String> {
let res = client
.post("https://users.nexusmods.com/auth/sign_in")
.header("Cookie", cookie)
.form(&[
("authenticity_token", authenticity_token),
("user[login]", username),
("user[password]", password),
("commit", "Log+in"),
])
.send()
.await?
.error_for_status()?;
info!(status = %res.status(), "submitted login form");
Ok(res
.headers()
.get("set-cookie")
.expect("Missing set-cookie header on login POST response")
.to_str()
.expect("set-cookie header on login POST response contained invalid ASCII characters")
.to_string())
}
#[instrument(skip(client))]
pub async fn get_mod_list_page(
client: &Client,
@ -107,24 +51,6 @@ pub async fn get_mod_list_page(
Ok(ModListResponse { html })
}
impl LoginResponse {
#[instrument(skip(self))]
pub fn scrape_authenticity_token(&self) -> Result<String> {
let token_select = Selector::parse(r#"input[name="authenticity_token"]"#)
.expect("failed to parse CSS selector");
let token_elem = self
.html
.select(&token_select)
.next()
.expect("Missing authenticity_token input");
let token = token_elem
.value()
.attr("value")
.expect("Missing value attribute on authenticity_token input");
Ok(token.to_string())
}
}
impl ModListResponse {
#[instrument(skip(self))]
pub fn scrape_mods<'a>(&'a self) -> Result<ModListScrape> {
@ -144,8 +70,8 @@ impl ModListResponse {
Selector::parse("time.date").expect("failed to parse CSS selector");
let last_update_date_select =
Selector::parse("div.date").expect("failed to parse CSS selector");
let next_page_select = Selector::parse("div.pagination li:last-child a.page-selected")
.expect("failed to parse CSS selector");
let next_page_select =
Selector::parse("div.pagination li:last-child a.page-selected").expect("failed to parse CSS selector");
let next_page_elem = self.html.select(&next_page_select).next();
@ -201,7 +127,10 @@ impl ModListResponse {
.expect("Missing author id for mod")
.parse::<i32>()
.expect("Failed to parse author id");
let author_name = author_elem.text().next().unwrap_or("Unknown");
let author_name = author_elem
.text()
.next()
.unwrap_or("Unknown");
let desc_elem = right
.select(&desc_select)
.next()