Add updated_after option to limit files generated on every run

Instead of generating files for all mods every update, only update files for mods that have been updated since the last run.
This commit is contained in:
Tyler Hallada 2022-06-04 00:50:11 -04:00
parent bed49d0953
commit 026d175da4
9 changed files with 200 additions and 90 deletions

1
.gitignore vendored
View File

@ -4,3 +4,4 @@ plugins.zip
plugins plugins
cells cells
mods mods
files

View File

@ -1,10 +1,22 @@
#!/bin/bash #!/bin/bash
if [ -f cells/edits.json ]; then
last_update_time=$(date -r cells/edits.json +'%Y-%m-%dT%H:%M:%S')
fi
mkdir -p logs mkdir -p logs
./target/release/mod-mapper &>> logs/modmapper.log ./target/release/mod-mapper &>> logs/modmapper.log
mkdir -p cells mkdir -p cells
mkdir -p mods mkdir -p mods
mkdir -p files mkdir -p files
mkdir -p plugins_data mkdir -p plugins_data
if [ -n "$last_update_time" ]; then
./target/release/mod-mapper -e cells/edits.json
./target/release/mod-mapper -c cells -u "$last_update_time"
./target/release/mod-mapper -s mods/mod_search_index.json
./target/release/mod-mapper -M mods/mod_cell_counts.json
./target/release/mod-mapper -m mods -u "$last_update_time"
./target/release/mod-mapper -P plugins_data
./target/release/mod-mapper -F files -u "$last_update_time"
else
./target/release/mod-mapper -e cells/edits.json ./target/release/mod-mapper -e cells/edits.json
./target/release/mod-mapper -c cells ./target/release/mod-mapper -c cells
./target/release/mod-mapper -s mods/mod_search_index.json ./target/release/mod-mapper -s mods/mod_search_index.json
@ -12,3 +24,4 @@ mkdir -p plugins_data
./target/release/mod-mapper -m mods ./target/release/mod-mapper -m mods
./target/release/mod-mapper -P plugins_data ./target/release/mod-mapper -P plugins_data
./target/release/mod-mapper -F files ./target/release/mod-mapper -F files
fi

View File

@ -1,16 +1,17 @@
use anyhow::Result; use anyhow::Result;
use chrono::NaiveDateTime;
use std::fs::File; use std::fs::File;
use std::io::Write; use std::io::Write;
use std::path::Path; use std::path::Path;
use crate::models::file; use crate::models::file;
pub async fn dump_file_data(pool: &sqlx::Pool<sqlx::Postgres>, dir: &str) -> Result<()> { pub async fn dump_file_data(pool: &sqlx::Pool<sqlx::Postgres>, dir: &str, updated_after: Option<NaiveDateTime>) -> Result<()> {
let page_size = 20; let page_size = 20;
let mut last_id = None; let mut last_id = None;
loop { loop {
let files = let files =
file::batched_get_with_cells(&pool, page_size, last_id, "Skyrim.esm", 1).await?; file::batched_get_with_cells(&pool, page_size, last_id, "Skyrim.esm", 1, updated_after).await?;
if files.is_empty() { if files.is_empty() {
break; break;
} }

View File

@ -1,16 +1,17 @@
use anyhow::Result; use anyhow::Result;
use chrono::NaiveDateTime;
use std::fs::File; use std::fs::File;
use std::io::Write; use std::io::Write;
use std::path::Path; use std::path::Path;
use crate::models::game_mod; use crate::models::game_mod;
pub async fn dump_mod_data(pool: &sqlx::Pool<sqlx::Postgres>, dir: &str) -> Result<()> { pub async fn dump_mod_data(pool: &sqlx::Pool<sqlx::Postgres>, dir: &str, updated_after: Option<NaiveDateTime>) -> Result<()> {
let page_size = 20; let page_size = 20;
let mut last_id = None; let mut last_id = None;
loop { loop {
let mods = let mods =
game_mod::batched_get_with_cells(&pool, page_size, last_id, "Skyrim.esm", 1).await?; game_mod::batched_get_with_cells(&pool, page_size, last_id, "Skyrim.esm", 1, updated_after).await?;
if mods.is_empty() { if mods.is_empty() {
break; break;
} }

View File

@ -1,4 +1,5 @@
use anyhow::Result; use anyhow::Result;
use chrono::NaiveDateTime;
use std::fs::{create_dir_all, File}; use std::fs::{create_dir_all, File};
use std::io::Write; use std::io::Write;
use std::path::Path; use std::path::Path;
@ -21,12 +22,12 @@ fn format_radix(mut x: u64, radix: u32) -> String {
result.into_iter().rev().collect() result.into_iter().rev().collect()
} }
pub async fn dump_plugin_data(pool: &sqlx::Pool<sqlx::Postgres>, dir: &str) -> Result<()> { pub async fn dump_plugin_data(pool: &sqlx::Pool<sqlx::Postgres>, dir: &str, updated_after: Option<NaiveDateTime>) -> Result<()> {
let page_size = 20; let page_size = 20;
let mut last_hash = None; let mut last_hash = None;
loop { loop {
let plugins = let plugins =
plugin::batched_get_by_hash_with_mods(pool, page_size, last_hash, "Skyrim.esm", 1).await?; plugin::batched_get_by_hash_with_mods(pool, page_size, last_hash, "Skyrim.esm", 1, updated_after).await?;
if plugins.is_empty() { if plugins.is_empty() {
break; break;
} }

View File

@ -1,5 +1,6 @@
use anyhow::Result; use anyhow::Result;
use argh::FromArgs; use argh::FromArgs;
use chrono::NaiveDateTime;
use dotenv::dotenv; use dotenv::dotenv;
use sqlx::postgres::PgPoolOptions; use sqlx::postgres::PgPoolOptions;
use std::env; use std::env;
@ -62,6 +63,10 @@ struct Args {
/// backfill the is_translation column in the mods table /// backfill the is_translation column in the mods table
#[argh(switch)] #[argh(switch)]
backfill_is_translation: bool, backfill_is_translation: bool,
/// when dumping data, only dump data for mods or files that have been updated since this date
#[argh(option, short = 'u')]
updated_after: Option<NaiveDateTime>,
} }
#[tokio::main] #[tokio::main]
@ -84,7 +89,7 @@ pub async fn main() -> Result<()> {
return dump_cell_data(&pool, &dir).await; return dump_cell_data(&pool, &dir).await;
} }
if let Some(dir) = args.mod_data { if let Some(dir) = args.mod_data {
return dump_mod_data(&pool, &dir).await; return dump_mod_data(&pool, &dir, args.updated_after).await;
} }
if let Some(path) = args.mod_search_index { if let Some(path) = args.mod_search_index {
return dump_mod_search_index(&pool, &path).await; return dump_mod_search_index(&pool, &path).await;
@ -93,10 +98,10 @@ pub async fn main() -> Result<()> {
return dump_mod_cell_counts(&pool, &path).await; return dump_mod_cell_counts(&pool, &path).await;
} }
if let Some(path) = args.plugin_data { if let Some(path) = args.plugin_data {
return dump_plugin_data(&pool, &path).await; return dump_plugin_data(&pool, &path, args.updated_after).await;
} }
if let Some(path) = args.file_data { if let Some(path) = args.file_data {
return dump_file_data(&pool, &path).await; return dump_file_data(&pool, &path, args.updated_after).await;
} }
if let Some(dir) = args.download_tiles { if let Some(dir) = args.download_tiles {
return download_tiles(&dir).await; return download_tiles(&dir).await;

View File

@ -205,8 +205,35 @@ pub async fn batched_get_with_cells(
last_id: Option<i32>, last_id: Option<i32>,
master: &str, master: &str,
world_id: i32, world_id: i32,
updated_after: Option<NaiveDateTime>,
) -> Result<Vec<FileWithCells>> { ) -> Result<Vec<FileWithCells>> {
let last_id = last_id.unwrap_or(0); let last_id = last_id.unwrap_or(0);
if let Some(updated_after) = updated_after {
sqlx::query_as!(
FileWithCells,
"SELECT
files.*,
COALESCE(json_agg(DISTINCT jsonb_build_object('x', cells.x, 'y', cells.y)) FILTER (WHERE cells.x IS NOT NULL AND cells.y IS NOT NULL AND cells.master = $3 AND cells.world_id = $4), '[]') AS cells,
COALESCE(json_agg(DISTINCT jsonb_build_object('hash', plugins.hash, 'file_name', plugins.file_name)) FILTER (WHERE plugins.hash IS NOT NULL), '[]') AS plugins,
COUNT(plugins.*) AS plugin_count
FROM files
LEFT OUTER JOIN plugin_cells ON plugin_cells.file_id = files.id
LEFT OUTER JOIN cells ON cells.id = plugin_cells.cell_id
LEFT OUTER JOIN plugins ON plugins.file_id = files.id
WHERE files.id > $2 AND files.updated_at > $5
GROUP BY files.id
ORDER BY files.id ASC
LIMIT $1",
page_size,
last_id,
master,
world_id,
updated_after
)
.fetch_all(pool)
.await
.context("Failed to batch get with cells")
} else {
sqlx::query_as!( sqlx::query_as!(
FileWithCells, FileWithCells,
"SELECT "SELECT
@ -231,3 +258,4 @@ pub async fn batched_get_with_cells(
.await .await
.context("Failed to batch get with cells") .context("Failed to batch get with cells")
} }
}

View File

@ -362,8 +362,37 @@ pub async fn batched_get_with_cells(
last_id: Option<i32>, last_id: Option<i32>,
master: &str, master: &str,
world_id: i32, world_id: i32,
updated_after: Option<NaiveDateTime>,
) -> Result<Vec<ModWithCells>> { ) -> Result<Vec<ModWithCells>> {
let last_id = last_id.unwrap_or(0); let last_id = last_id.unwrap_or(0);
if let Some(updated_after) = updated_after {
sqlx::query_as!(
ModWithCells,
"SELECT
mods.*,
COALESCE(json_agg(DISTINCT jsonb_build_object('x', cells.x, 'y', cells.y)) FILTER (WHERE cells.x IS NOT NULL AND cells.y IS NOT NULL AND cells.master = $3 AND cells.world_id = $4), '[]') AS cells,
COALESCE(json_agg(DISTINCT jsonb_build_object('id', files.nexus_file_id, 'name', files.name, 'version', files.version, 'category', files.category)) FILTER (WHERE files.nexus_file_id IS NOT NULL), '[]') AS files,
COUNT(files.*) AS file_count,
COUNT(plugins.*) AS plugin_count
FROM mods
LEFT OUTER JOIN plugin_cells ON plugin_cells.mod_id = mods.id
LEFT OUTER JOIN cells ON cells.id = plugin_cells.cell_id
LEFT OUTER JOIN files ON files.mod_id = mods.id
LEFT OUTER JOIN plugins ON plugins.mod_id = mods.id
WHERE mods.id > $2 AND mods.updated_at > $5
GROUP BY mods.id
ORDER BY mods.id ASC
LIMIT $1",
page_size,
last_id,
master,
world_id,
updated_after
)
.fetch_all(pool)
.await
.context("Failed to batch get with cells")
} else {
sqlx::query_as!( sqlx::query_as!(
ModWithCells, ModWithCells,
"SELECT "SELECT
@ -390,6 +419,7 @@ pub async fn batched_get_with_cells(
.await .await
.context("Failed to batch get with cells") .context("Failed to batch get with cells")
} }
}
#[instrument(level = "debug", skip(pool))] #[instrument(level = "debug", skip(pool))]
pub async fn batched_get_cell_counts( pub async fn batched_get_cell_counts(

View File

@ -84,8 +84,37 @@ pub async fn batched_get_by_hash_with_mods(
last_hash: Option<i64>, last_hash: Option<i64>,
master: &str, master: &str,
world_id: i32, world_id: i32,
updated_after: Option<NaiveDateTime>,
) -> Result<Vec<PluginsByHashWithMods>> { ) -> Result<Vec<PluginsByHashWithMods>> {
let last_hash = last_hash.unwrap_or(-9223372036854775808); // psql bigint min let last_hash = last_hash.unwrap_or(-9223372036854775808); // psql bigint min
if let Some(updated_after) = updated_after {
sqlx::query_as!(
PluginsByHashWithMods,
"SELECT
plugins.hash,
json_agg(DISTINCT plugins.*) as plugins,
json_agg(DISTINCT files.*) as files,
json_agg(DISTINCT mods.*) as mods,
COALESCE(json_agg(DISTINCT jsonb_build_object('x', cells.x, 'y', cells.y)) FILTER (WHERE cells.x IS NOT NULL AND cells.y IS NOT NULL AND cells.master = $3 AND cells.world_id = $4), '[]') AS cells
FROM plugins
LEFT OUTER JOIN files ON files.id = plugins.file_id
LEFT OUTER JOIN mods ON mods.id = files.mod_id
LEFT OUTER JOIN plugin_cells ON plugin_cells.plugin_id = plugins.id
LEFT OUTER JOIN cells ON cells.id = plugin_cells.cell_id
WHERE plugins.hash > $2 AND plugins.updated_at > $5
GROUP BY plugins.hash
ORDER BY plugins.hash ASC
LIMIT $1",
page_size,
last_hash,
master,
world_id,
updated_after
)
.fetch_all(pool)
.await
.context("Failed to batch get by hash with mods")
} else {
sqlx::query_as!( sqlx::query_as!(
PluginsByHashWithMods, PluginsByHashWithMods,
"SELECT "SELECT
@ -112,3 +141,4 @@ pub async fn batched_get_by_hash_with_mods(
.await .await
.context("Failed to batch get by hash with mods") .context("Failed to batch get by hash with mods")
} }
}