Make separate mod search indices per game instead

Saves significant amount of space in the json if I don't need to list a game name for every mod.
This commit is contained in:
Tyler Hallada 2022-09-03 00:49:57 -04:00
parent 84a02971a0
commit 14be03cd3d
5 changed files with 27 additions and 11 deletions

View File

@ -11,7 +11,8 @@ mkdir -p plugins_data
if [ -n "$last_update_time" ]; then if [ -n "$last_update_time" ]; then
./target/release/mod-mapper -e cells/edits.json &>> logs/modmapper.log ./target/release/mod-mapper -e cells/edits.json &>> logs/modmapper.log
./target/release/mod-mapper -c cells &>> logs/modmapper.log ./target/release/mod-mapper -c cells &>> logs/modmapper.log
./target/release/mod-mapper -s mods/mod_search_index.json &>> logs/modmapper.log ./target/release/mod-mapper -s mods/skyrimspecialedition_mod_search_index.json -g skyrimspecialedition &>> logs/modmapper.log
./target/release/mod-mapper -s mods/skyrim_mod_search_index.json -g skyrim &>> logs/modmapper.log
./target/release/mod-mapper -M mods/mod_cell_counts.json &>> logs/modmapper.log ./target/release/mod-mapper -M mods/mod_cell_counts.json &>> logs/modmapper.log
./target/release/mod-mapper -G mods/games.json &>> logs/modmapper.log ./target/release/mod-mapper -G mods/games.json &>> logs/modmapper.log
./target/release/mod-mapper -m mods -u "$last_update_time" &>> logs/modmapper.log ./target/release/mod-mapper -m mods -u "$last_update_time" &>> logs/modmapper.log
@ -20,7 +21,8 @@ if [ -n "$last_update_time" ]; then
else else
./target/release/mod-mapper -e cells/edits.json &>> logs/modmapper.log ./target/release/mod-mapper -e cells/edits.json &>> logs/modmapper.log
./target/release/mod-mapper -c cells &>> logs/modmapper.log ./target/release/mod-mapper -c cells &>> logs/modmapper.log
./target/release/mod-mapper -s mods/mod_search_index.json &>> logs/modmapper.log ./target/release/mod-mapper -s mods/skyrimspecialedition_mod_search_index.json -g skyrimspecialedition &>> logs/modmapper.log
./target/release/mod-mapper -s mods/skyrim_mod_search_index.json -g skyrim &>> logs/modmapper.log
./target/release/mod-mapper -M mods/mod_cell_counts.json &>> logs/modmapper.log ./target/release/mod-mapper -M mods/mod_cell_counts.json &>> logs/modmapper.log
./target/release/mod-mapper -G mods/games.json &>> logs/modmapper.log ./target/release/mod-mapper -G mods/games.json &>> logs/modmapper.log
./target/release/mod-mapper -m mods &>> logs/modmapper.log ./target/release/mod-mapper -m mods &>> logs/modmapper.log

View File

@ -11,18 +11,17 @@ use crate::models::game_mod;
#[derive(Serialize)] #[derive(Serialize)]
struct ModForSearchIdTranslated { struct ModForSearchIdTranslated {
name: String, name: String,
game: String,
id: i32, id: i32,
} }
pub async fn dump_mod_search_index(pool: &sqlx::Pool<sqlx::Postgres>, path: &str) -> Result<()> { pub async fn dump_mod_search_index(pool: &sqlx::Pool<sqlx::Postgres>, game: &str, path: &str) -> Result<()> {
let mut page = 1; let mut page = 1;
let mut search_index = vec![]; let mut search_index = vec![];
let page_size = 20; let page_size = 20;
let mut last_id = None; let mut last_id = None;
let game_id_to_names: HashMap<_, _> = game::get_all(&pool).await?.into_iter().map(|game| (game.id, game.name)).collect(); let game_id = game::get_id_by_name(&pool, game).await?;
loop { loop {
let mods = game_mod::batched_get_for_search(&pool, page_size, last_id).await?; let mods = game_mod::batched_get_for_search(&pool, game_id, page_size, last_id).await?;
if mods.is_empty() { if mods.is_empty() {
break; break;
} }
@ -30,7 +29,6 @@ pub async fn dump_mod_search_index(pool: &sqlx::Pool<sqlx::Postgres>, path: &str
info!(page = page, nexus_mod_id = mod_for_search.nexus_mod_id, "read mod name for search index"); info!(page = page, nexus_mod_id = mod_for_search.nexus_mod_id, "read mod name for search index");
search_index.push(ModForSearchIdTranslated { search_index.push(ModForSearchIdTranslated {
name: mod_for_search.name, name: mod_for_search.name,
game: game_id_to_names.get(&mod_for_search.game_id).expect("known game id").to_string(),
id: mod_for_search.nexus_mod_id, id: mod_for_search.nexus_mod_id,
}); });
last_id = Some(mod_for_search.id); last_id = Some(mod_for_search.id);

View File

@ -18,7 +18,7 @@ use commands::{
}; };
#[derive(FromArgs)] #[derive(FromArgs)]
/// Downloads every mod off nexus mods, parses CELL and WRLD data from plugins in each, and saves the data to the database. /// Downloads every mod off nexus mods, parses CELL and WRLD data from plugins in each, and saves the da&ta to the database.
struct Args { struct Args {
#[argh(option, short = 'p', default = "1")] #[argh(option, short = 'p', default = "1")]
/// the page number to start scraping for mods on nexus mods /// the page number to start scraping for mods on nexus mods
@ -100,7 +100,7 @@ pub async fn main() -> Result<()> {
return dump_mod_data(&pool, &dir, args.updated_after).await; return dump_mod_data(&pool, &dir, args.updated_after).await;
} }
if let Some(path) = args.mod_search_index { if let Some(path) = args.mod_search_index {
return dump_mod_search_index(&pool, &path).await; return dump_mod_search_index(&pool, &args.game, &path).await;
} }
if let Some(path) = args.mod_cell_counts { if let Some(path) = args.mod_cell_counts {
return dump_mod_cell_counts(&pool, &path).await; return dump_mod_cell_counts(&pool, &path).await;

View File

@ -44,4 +44,18 @@ pub async fn get_all(
.fetch_all(pool) .fetch_all(pool)
.await .await
.context("Failed to fetch games") .context("Failed to fetch games")
}
#[instrument(level = "debug", skip(pool))]
pub async fn get_id_by_name(
pool: &sqlx::Pool<sqlx::Postgres>,
name: &str,
) -> Result<i32> {
sqlx::query_scalar!(
"SELECT id FROM games WHERE name = $1",
name
)
.fetch_one(pool)
.await
.context("Failed to fetch game id by name")
} }

View File

@ -351,6 +351,7 @@ pub async fn update_from_api_response<'a>(
#[instrument(level = "debug", skip(pool))] #[instrument(level = "debug", skip(pool))]
pub async fn batched_get_for_search( pub async fn batched_get_for_search(
pool: &sqlx::Pool<sqlx::Postgres>, pool: &sqlx::Pool<sqlx::Postgres>,
game: i32,
page_size: i64, page_size: i64,
last_id: Option<i32>, last_id: Option<i32>,
) -> Result<Vec<ModForSearch>> { ) -> Result<Vec<ModForSearch>> {
@ -363,9 +364,10 @@ pub async fn batched_get_for_search(
game_id, game_id,
nexus_mod_id nexus_mod_id
FROM mods FROM mods
WHERE id > $2 WHERE id > $3 AND game_id = $1
ORDER BY mods.id ASC ORDER BY mods.id ASC
LIMIT $1", LIMIT $2",
game,
page_size, page_size,
last_id, last_id,
) )