Add option to temporarily backfill mod data

This commit is contained in:
Tyler Hallada 2022-01-18 20:54:50 -05:00
parent f22587d4a0
commit d48e2fdb5f
4 changed files with 238 additions and 4 deletions

View File

@ -51,6 +51,10 @@ struct Args {
/// folder to output all cell data as json files
#[argh(option, short = 'c')]
cell_data: Option<String>,
/// backfill mods data from the Nexus API (temporary)
#[argh(switch, short = 'b')]
backfill_mods: bool,
}
async fn extract_with_compress_tools(
@ -213,6 +217,24 @@ pub async fn main() -> Result<()> {
let args: Args = argh::from_env();
let client = reqwest::Client::builder()
.timeout(REQUEST_TIMEOUT)
.connect_timeout(CONNECT_TIMEOUT)
.build()?;
if args.backfill_mods {
for game_mod in game_mod::bulk_get_need_backfill(&pool).await? {
let response = nexus_api::game_mod::get(&client, game_mod.nexus_mod_id).await?;
let mod_data = response.extract_data()?;
game_mod::update_from_api_response(&pool, &game_mod, &mod_data).await?;
info!(
id = game_mod.id,
nexus_mod_id = game_mod.nexus_mod_id,
"backfilled mod data from api"
);
}
}
if let Some(dump_edits) = args.dump_edits {
let mut cell_mod_edit_counts = HashMap::new();
for x in -77..75 {
@ -247,10 +269,6 @@ pub async fn main() -> Result<()> {
let mut has_next_page = true;
let game = game::insert(&pool, GAME_NAME, GAME_ID as i32).await?;
let client = reqwest::Client::builder()
.timeout(REQUEST_TIMEOUT)
.connect_timeout(CONNECT_TIMEOUT)
.build()?;
while has_next_page {
let page_span = info_span!("page", page);

View File

@ -4,6 +4,8 @@ use serde::{Deserialize, Serialize};
use sqlx::FromRow;
use tracing::instrument;
use crate::nexus_api::game_mod::ExtractedModData;
use super::BATCH_SIZE;
#[derive(Debug, Serialize, Deserialize, FromRow)]
@ -210,3 +212,88 @@ pub async fn update_last_updated_files_at(
.await
.context("Failed to update mod")
}
#[instrument(level = "debug", skip(pool))]
pub async fn bulk_get_need_backfill(pool: &sqlx::Pool<sqlx::Postgres>) -> Result<Vec<Mod>> {
sqlx::query_as!(
Mod,
"SELECT * FROM mods
WHERE author_id IS NULL"
)
.fetch_all(pool)
.await
.context("Failed to bulk get need backfill")
}
#[instrument(level = "debug", skip(pool, game_mod, mod_data))]
pub async fn update_from_api_response<'a>(
pool: &sqlx::Pool<sqlx::Postgres>,
game_mod: &Mod,
mod_data: &ExtractedModData<'a>,
) -> Result<Mod> {
let name = mod_data.name.unwrap_or(&game_mod.name);
let category_id = match mod_data.category_id {
Some(category_id) => Some(category_id),
None => game_mod.category_id,
};
let mut ret = sqlx::query_as!(
Mod,
"UPDATE mods
SET
nexus_mod_id = $2,
name = $3,
category_id = $4,
author_name = $5,
author_id = $6,
last_update_at = $7,
first_upload_at = $8
WHERE id = $1
RETURNING *",
game_mod.id,
mod_data.nexus_mod_id,
name,
category_id,
mod_data.author_name,
mod_data.author_id,
mod_data.last_update_at,
mod_data.first_upload_at,
)
.fetch_one(pool)
.await
.context("Failed to update mod from api response")?;
if let Some(description) = mod_data.description {
ret = sqlx::query_as!(
Mod,
"UPDATE mods
SET
description = $2
WHERE id = $1
RETURNING *",
game_mod.id,
description,
)
.fetch_one(pool)
.await
.context("Failed to update mod from api response")?;
}
if let Some(thumbnail_link) = mod_data.thumbnail_link {
ret = sqlx::query_as!(
Mod,
"UPDATE mods
SET
thumbnail_link = $2
WHERE id = $1
RETURNING *",
game_mod.id,
thumbnail_link,
)
.fetch_one(pool)
.await
.context("Failed to update mod from api response")?;
}
Ok(ret)
}

128
src/nexus_api/game_mod.rs Normal file
View File

@ -0,0 +1,128 @@
use anyhow::{anyhow, Result};
use chrono::NaiveDateTime;
use reqwest::Client;
use serde_json::Value;
use std::{env, time::Duration};
use tracing::{info, instrument};
use super::{rate_limit_wait_duration, warn_and_sleep, GAME_NAME, USER_AGENT};
pub struct ModResponse {
pub wait: Duration,
json: Value,
}
#[instrument(skip(client))]
pub async fn get(client: &Client, mod_id: i32) -> Result<ModResponse> {
for attempt in 1..=3 {
let res = match client
.get(format!(
"https://api.nexusmods.com/v1/games/{}/mods/{}.json",
GAME_NAME, mod_id
))
.header("accept", "application/json")
.header("apikey", env::var("NEXUS_API_KEY")?)
.header("user-agent", USER_AGENT)
.send()
.await
{
Ok(res) => match res.error_for_status() {
Ok(res) => res,
Err(err) => {
warn_and_sleep("game_mod::get", anyhow!(err), attempt).await;
continue;
}
},
Err(err) => {
warn_and_sleep("game_mod::get", anyhow!(err), attempt).await;
continue;
}
};
info!(status = %res.status(), "fetched mod data from API");
let wait = rate_limit_wait_duration(&res)?;
let json = res.json::<Value>().await?;
return Ok(ModResponse { wait, json });
}
Err(anyhow!("Failed to get mod data in three attempts"))
}
pub struct ExtractedModData<'a> {
pub nexus_mod_id: i32,
pub name: Option<&'a str>,
pub category_id: Option<i32>,
pub author_name: &'a str,
pub author_id: i32,
pub description: Option<&'a str>,
pub thumbnail_link: Option<&'a str>,
pub last_update_at: NaiveDateTime,
pub first_upload_at: NaiveDateTime,
}
impl ModResponse {
#[instrument(skip(self))]
pub fn extract_data<'a>(&'a self) -> Result<ExtractedModData<'a>> {
let nexus_mod_id = self
.json
.get("mod_id")
.expect("Missing mod_id in mod response")
.as_i64()
.expect("Failed to parse mod_id in mod response") as i32;
let category_id = self.json.get("category_id").map(|id| {
id.as_i64()
.expect("Failed to parse category_id in mod response") as i32
});
let name = self
.json
.get("name")
.map(|name| name.as_str().expect("Failed to parse name in mod response"));
let description = self.json.get("description").map(|description| {
description
.as_str()
.expect("Failed to parse description in mod response")
});
let thumbnail_link = self.json.get("picture_url").map(|thumbnail_link| {
thumbnail_link
.as_str()
.expect("Failed to parse picture_url in mod response")
});
let user = self.json.get("user").expect("Missing user in mod response");
let author_name = user
.get("name")
.expect("Missing user name in mod response")
.as_str()
.expect("Failed to parse user name in mod response");
let author_id = user
.get("member_id")
.expect("Missing member_id in mod response")
.as_i64()
.expect("Failed to parse member_id in mod response") as i32;
let updated_timestamp = self
.json
.get("updated_timestamp")
.expect("Missing updated_timestamp in mod response")
.as_i64()
.expect("Failed to parse updated_timestamp in mod response");
let last_update_at = NaiveDateTime::from_timestamp(updated_timestamp, 0);
let created_timestamp = self
.json
.get("created_timestamp")
.expect("Missing created_timestamp in mod response")
.as_i64()
.expect("Failed to parse created_timestamp in mod response");
let first_upload_at = NaiveDateTime::from_timestamp(created_timestamp, 0);
info!("parsed mod data from API response");
Ok(ExtractedModData {
nexus_mod_id,
name,
category_id,
author_name,
author_id,
description,
thumbnail_link,
last_update_at,
first_upload_at,
})
}
}

View File

@ -8,6 +8,7 @@ use tracing::{info, warn};
pub mod download_link;
pub mod files;
pub mod game_mod;
pub mod metadata;
pub static GAME_NAME: &str = "skyrimspecialedition";