Add updated_after option to limit files generated on every run

Instead of generating files for all mods every update, only update files for mods that have been updated since the last run.
This commit is contained in:
Tyler Hallada 2022-06-04 00:50:11 -04:00
parent bed49d0953
commit 026d175da4
9 changed files with 200 additions and 90 deletions

1
.gitignore vendored
View File

@ -4,3 +4,4 @@ plugins.zip
plugins plugins
cells cells
mods mods
files

View File

@ -1,14 +1,27 @@
#!/bin/bash #!/bin/bash
if [ -f cells/edits.json ]; then
last_update_time=$(date -r cells/edits.json +'%Y-%m-%dT%H:%M:%S')
fi
mkdir -p logs mkdir -p logs
./target/release/mod-mapper &>> logs/modmapper.log ./target/release/mod-mapper &>> logs/modmapper.log
mkdir -p cells mkdir -p cells
mkdir -p mods mkdir -p mods
mkdir -p files mkdir -p files
mkdir -p plugins_data mkdir -p plugins_data
./target/release/mod-mapper -e cells/edits.json if [ -n "$last_update_time" ]; then
./target/release/mod-mapper -c cells ./target/release/mod-mapper -e cells/edits.json
./target/release/mod-mapper -s mods/mod_search_index.json ./target/release/mod-mapper -c cells -u "$last_update_time"
./target/release/mod-mapper -M mods/mod_cell_counts.json ./target/release/mod-mapper -s mods/mod_search_index.json
./target/release/mod-mapper -m mods ./target/release/mod-mapper -M mods/mod_cell_counts.json
./target/release/mod-mapper -P plugins_data ./target/release/mod-mapper -m mods -u "$last_update_time"
./target/release/mod-mapper -F files ./target/release/mod-mapper -P plugins_data
./target/release/mod-mapper -F files -u "$last_update_time"
else
./target/release/mod-mapper -e cells/edits.json
./target/release/mod-mapper -c cells
./target/release/mod-mapper -s mods/mod_search_index.json
./target/release/mod-mapper -M mods/mod_cell_counts.json
./target/release/mod-mapper -m mods
./target/release/mod-mapper -P plugins_data
./target/release/mod-mapper -F files
fi

View File

@ -1,16 +1,17 @@
use anyhow::Result; use anyhow::Result;
use chrono::NaiveDateTime;
use std::fs::File; use std::fs::File;
use std::io::Write; use std::io::Write;
use std::path::Path; use std::path::Path;
use crate::models::file; use crate::models::file;
pub async fn dump_file_data(pool: &sqlx::Pool<sqlx::Postgres>, dir: &str) -> Result<()> { pub async fn dump_file_data(pool: &sqlx::Pool<sqlx::Postgres>, dir: &str, updated_after: Option<NaiveDateTime>) -> Result<()> {
let page_size = 20; let page_size = 20;
let mut last_id = None; let mut last_id = None;
loop { loop {
let files = let files =
file::batched_get_with_cells(&pool, page_size, last_id, "Skyrim.esm", 1).await?; file::batched_get_with_cells(&pool, page_size, last_id, "Skyrim.esm", 1, updated_after).await?;
if files.is_empty() { if files.is_empty() {
break; break;
} }

View File

@ -1,16 +1,17 @@
use anyhow::Result; use anyhow::Result;
use chrono::NaiveDateTime;
use std::fs::File; use std::fs::File;
use std::io::Write; use std::io::Write;
use std::path::Path; use std::path::Path;
use crate::models::game_mod; use crate::models::game_mod;
pub async fn dump_mod_data(pool: &sqlx::Pool<sqlx::Postgres>, dir: &str) -> Result<()> { pub async fn dump_mod_data(pool: &sqlx::Pool<sqlx::Postgres>, dir: &str, updated_after: Option<NaiveDateTime>) -> Result<()> {
let page_size = 20; let page_size = 20;
let mut last_id = None; let mut last_id = None;
loop { loop {
let mods = let mods =
game_mod::batched_get_with_cells(&pool, page_size, last_id, "Skyrim.esm", 1).await?; game_mod::batched_get_with_cells(&pool, page_size, last_id, "Skyrim.esm", 1, updated_after).await?;
if mods.is_empty() { if mods.is_empty() {
break; break;
} }

View File

@ -1,4 +1,5 @@
use anyhow::Result; use anyhow::Result;
use chrono::NaiveDateTime;
use std::fs::{create_dir_all, File}; use std::fs::{create_dir_all, File};
use std::io::Write; use std::io::Write;
use std::path::Path; use std::path::Path;
@ -21,12 +22,12 @@ fn format_radix(mut x: u64, radix: u32) -> String {
result.into_iter().rev().collect() result.into_iter().rev().collect()
} }
pub async fn dump_plugin_data(pool: &sqlx::Pool<sqlx::Postgres>, dir: &str) -> Result<()> { pub async fn dump_plugin_data(pool: &sqlx::Pool<sqlx::Postgres>, dir: &str, updated_after: Option<NaiveDateTime>) -> Result<()> {
let page_size = 20; let page_size = 20;
let mut last_hash = None; let mut last_hash = None;
loop { loop {
let plugins = let plugins =
plugin::batched_get_by_hash_with_mods(pool, page_size, last_hash, "Skyrim.esm", 1).await?; plugin::batched_get_by_hash_with_mods(pool, page_size, last_hash, "Skyrim.esm", 1, updated_after).await?;
if plugins.is_empty() { if plugins.is_empty() {
break; break;
} }

View File

@ -1,5 +1,6 @@
use anyhow::Result; use anyhow::Result;
use argh::FromArgs; use argh::FromArgs;
use chrono::NaiveDateTime;
use dotenv::dotenv; use dotenv::dotenv;
use sqlx::postgres::PgPoolOptions; use sqlx::postgres::PgPoolOptions;
use std::env; use std::env;
@ -62,6 +63,10 @@ struct Args {
/// backfill the is_translation column in the mods table /// backfill the is_translation column in the mods table
#[argh(switch)] #[argh(switch)]
backfill_is_translation: bool, backfill_is_translation: bool,
/// when dumping data, only dump data for mods or files that have been updated since this date
#[argh(option, short = 'u')]
updated_after: Option<NaiveDateTime>,
} }
#[tokio::main] #[tokio::main]
@ -84,7 +89,7 @@ pub async fn main() -> Result<()> {
return dump_cell_data(&pool, &dir).await; return dump_cell_data(&pool, &dir).await;
} }
if let Some(dir) = args.mod_data { if let Some(dir) = args.mod_data {
return dump_mod_data(&pool, &dir).await; return dump_mod_data(&pool, &dir, args.updated_after).await;
} }
if let Some(path) = args.mod_search_index { if let Some(path) = args.mod_search_index {
return dump_mod_search_index(&pool, &path).await; return dump_mod_search_index(&pool, &path).await;
@ -93,10 +98,10 @@ pub async fn main() -> Result<()> {
return dump_mod_cell_counts(&pool, &path).await; return dump_mod_cell_counts(&pool, &path).await;
} }
if let Some(path) = args.plugin_data { if let Some(path) = args.plugin_data {
return dump_plugin_data(&pool, &path).await; return dump_plugin_data(&pool, &path, args.updated_after).await;
} }
if let Some(path) = args.file_data { if let Some(path) = args.file_data {
return dump_file_data(&pool, &path).await; return dump_file_data(&pool, &path, args.updated_after).await;
} }
if let Some(dir) = args.download_tiles { if let Some(dir) = args.download_tiles {
return download_tiles(&dir).await; return download_tiles(&dir).await;

View File

@ -205,29 +205,57 @@ pub async fn batched_get_with_cells(
last_id: Option<i32>, last_id: Option<i32>,
master: &str, master: &str,
world_id: i32, world_id: i32,
updated_after: Option<NaiveDateTime>,
) -> Result<Vec<FileWithCells>> { ) -> Result<Vec<FileWithCells>> {
let last_id = last_id.unwrap_or(0); let last_id = last_id.unwrap_or(0);
sqlx::query_as!( if let Some(updated_after) = updated_after {
FileWithCells, sqlx::query_as!(
"SELECT FileWithCells,
files.*, "SELECT
COALESCE(json_agg(DISTINCT jsonb_build_object('x', cells.x, 'y', cells.y)) FILTER (WHERE cells.x IS NOT NULL AND cells.y IS NOT NULL AND cells.master = $3 AND cells.world_id = $4), '[]') AS cells, files.*,
COALESCE(json_agg(DISTINCT jsonb_build_object('hash', plugins.hash, 'file_name', plugins.file_name)) FILTER (WHERE plugins.hash IS NOT NULL), '[]') AS plugins, COALESCE(json_agg(DISTINCT jsonb_build_object('x', cells.x, 'y', cells.y)) FILTER (WHERE cells.x IS NOT NULL AND cells.y IS NOT NULL AND cells.master = $3 AND cells.world_id = $4), '[]') AS cells,
COUNT(plugins.*) AS plugin_count COALESCE(json_agg(DISTINCT jsonb_build_object('hash', plugins.hash, 'file_name', plugins.file_name)) FILTER (WHERE plugins.hash IS NOT NULL), '[]') AS plugins,
FROM files COUNT(plugins.*) AS plugin_count
LEFT OUTER JOIN plugin_cells ON plugin_cells.file_id = files.id FROM files
LEFT OUTER JOIN cells ON cells.id = plugin_cells.cell_id LEFT OUTER JOIN plugin_cells ON plugin_cells.file_id = files.id
LEFT OUTER JOIN plugins ON plugins.file_id = files.id LEFT OUTER JOIN cells ON cells.id = plugin_cells.cell_id
WHERE files.id > $2 LEFT OUTER JOIN plugins ON plugins.file_id = files.id
GROUP BY files.id WHERE files.id > $2 AND files.updated_at > $5
ORDER BY files.id ASC GROUP BY files.id
LIMIT $1", ORDER BY files.id ASC
page_size, LIMIT $1",
last_id, page_size,
master, last_id,
world_id master,
) world_id,
.fetch_all(pool) updated_after
.await )
.context("Failed to batch get with cells") .fetch_all(pool)
.await
.context("Failed to batch get with cells")
} else {
sqlx::query_as!(
FileWithCells,
"SELECT
files.*,
COALESCE(json_agg(DISTINCT jsonb_build_object('x', cells.x, 'y', cells.y)) FILTER (WHERE cells.x IS NOT NULL AND cells.y IS NOT NULL AND cells.master = $3 AND cells.world_id = $4), '[]') AS cells,
COALESCE(json_agg(DISTINCT jsonb_build_object('hash', plugins.hash, 'file_name', plugins.file_name)) FILTER (WHERE plugins.hash IS NOT NULL), '[]') AS plugins,
COUNT(plugins.*) AS plugin_count
FROM files
LEFT OUTER JOIN plugin_cells ON plugin_cells.file_id = files.id
LEFT OUTER JOIN cells ON cells.id = plugin_cells.cell_id
LEFT OUTER JOIN plugins ON plugins.file_id = files.id
WHERE files.id > $2
GROUP BY files.id
ORDER BY files.id ASC
LIMIT $1",
page_size,
last_id,
master,
world_id
)
.fetch_all(pool)
.await
.context("Failed to batch get with cells")
}
} }

View File

@ -362,33 +362,63 @@ pub async fn batched_get_with_cells(
last_id: Option<i32>, last_id: Option<i32>,
master: &str, master: &str,
world_id: i32, world_id: i32,
updated_after: Option<NaiveDateTime>,
) -> Result<Vec<ModWithCells>> { ) -> Result<Vec<ModWithCells>> {
let last_id = last_id.unwrap_or(0); let last_id = last_id.unwrap_or(0);
sqlx::query_as!( if let Some(updated_after) = updated_after {
ModWithCells, sqlx::query_as!(
"SELECT ModWithCells,
mods.*, "SELECT
COALESCE(json_agg(DISTINCT jsonb_build_object('x', cells.x, 'y', cells.y)) FILTER (WHERE cells.x IS NOT NULL AND cells.y IS NOT NULL AND cells.master = $3 AND cells.world_id = $4), '[]') AS cells, mods.*,
COALESCE(json_agg(DISTINCT jsonb_build_object('id', files.nexus_file_id, 'name', files.name, 'version', files.version, 'category', files.category)) FILTER (WHERE files.nexus_file_id IS NOT NULL), '[]') AS files, COALESCE(json_agg(DISTINCT jsonb_build_object('x', cells.x, 'y', cells.y)) FILTER (WHERE cells.x IS NOT NULL AND cells.y IS NOT NULL AND cells.master = $3 AND cells.world_id = $4), '[]') AS cells,
COUNT(files.*) AS file_count, COALESCE(json_agg(DISTINCT jsonb_build_object('id', files.nexus_file_id, 'name', files.name, 'version', files.version, 'category', files.category)) FILTER (WHERE files.nexus_file_id IS NOT NULL), '[]') AS files,
COUNT(plugins.*) AS plugin_count COUNT(files.*) AS file_count,
FROM mods COUNT(plugins.*) AS plugin_count
LEFT OUTER JOIN plugin_cells ON plugin_cells.mod_id = mods.id FROM mods
LEFT OUTER JOIN cells ON cells.id = plugin_cells.cell_id LEFT OUTER JOIN plugin_cells ON plugin_cells.mod_id = mods.id
LEFT OUTER JOIN files ON files.mod_id = mods.id LEFT OUTER JOIN cells ON cells.id = plugin_cells.cell_id
LEFT OUTER JOIN plugins ON plugins.mod_id = mods.id LEFT OUTER JOIN files ON files.mod_id = mods.id
WHERE mods.id > $2 LEFT OUTER JOIN plugins ON plugins.mod_id = mods.id
GROUP BY mods.id WHERE mods.id > $2 AND mods.updated_at > $5
ORDER BY mods.id ASC GROUP BY mods.id
LIMIT $1", ORDER BY mods.id ASC
page_size, LIMIT $1",
last_id, page_size,
master, last_id,
world_id master,
) world_id,
.fetch_all(pool) updated_after
.await )
.context("Failed to batch get with cells") .fetch_all(pool)
.await
.context("Failed to batch get with cells")
} else {
sqlx::query_as!(
ModWithCells,
"SELECT
mods.*,
COALESCE(json_agg(DISTINCT jsonb_build_object('x', cells.x, 'y', cells.y)) FILTER (WHERE cells.x IS NOT NULL AND cells.y IS NOT NULL AND cells.master = $3 AND cells.world_id = $4), '[]') AS cells,
COALESCE(json_agg(DISTINCT jsonb_build_object('id', files.nexus_file_id, 'name', files.name, 'version', files.version, 'category', files.category)) FILTER (WHERE files.nexus_file_id IS NOT NULL), '[]') AS files,
COUNT(files.*) AS file_count,
COUNT(plugins.*) AS plugin_count
FROM mods
LEFT OUTER JOIN plugin_cells ON plugin_cells.mod_id = mods.id
LEFT OUTER JOIN cells ON cells.id = plugin_cells.cell_id
LEFT OUTER JOIN files ON files.mod_id = mods.id
LEFT OUTER JOIN plugins ON plugins.mod_id = mods.id
WHERE mods.id > $2
GROUP BY mods.id
ORDER BY mods.id ASC
LIMIT $1",
page_size,
last_id,
master,
world_id
)
.fetch_all(pool)
.await
.context("Failed to batch get with cells")
}
} }
#[instrument(level = "debug", skip(pool))] #[instrument(level = "debug", skip(pool))]

View File

@ -84,31 +84,61 @@ pub async fn batched_get_by_hash_with_mods(
last_hash: Option<i64>, last_hash: Option<i64>,
master: &str, master: &str,
world_id: i32, world_id: i32,
updated_after: Option<NaiveDateTime>,
) -> Result<Vec<PluginsByHashWithMods>> { ) -> Result<Vec<PluginsByHashWithMods>> {
let last_hash = last_hash.unwrap_or(-9223372036854775808); // psql bigint min let last_hash = last_hash.unwrap_or(-9223372036854775808); // psql bigint min
sqlx::query_as!( if let Some(updated_after) = updated_after {
PluginsByHashWithMods, sqlx::query_as!(
"SELECT PluginsByHashWithMods,
plugins.hash, "SELECT
json_agg(DISTINCT plugins.*) as plugins, plugins.hash,
json_agg(DISTINCT files.*) as files, json_agg(DISTINCT plugins.*) as plugins,
json_agg(DISTINCT mods.*) as mods, json_agg(DISTINCT files.*) as files,
COALESCE(json_agg(DISTINCT jsonb_build_object('x', cells.x, 'y', cells.y)) FILTER (WHERE cells.x IS NOT NULL AND cells.y IS NOT NULL AND cells.master = $3 AND cells.world_id = $4), '[]') AS cells json_agg(DISTINCT mods.*) as mods,
FROM plugins COALESCE(json_agg(DISTINCT jsonb_build_object('x', cells.x, 'y', cells.y)) FILTER (WHERE cells.x IS NOT NULL AND cells.y IS NOT NULL AND cells.master = $3 AND cells.world_id = $4), '[]') AS cells
LEFT OUTER JOIN files ON files.id = plugins.file_id FROM plugins
LEFT OUTER JOIN mods ON mods.id = files.mod_id LEFT OUTER JOIN files ON files.id = plugins.file_id
LEFT OUTER JOIN plugin_cells ON plugin_cells.plugin_id = plugins.id LEFT OUTER JOIN mods ON mods.id = files.mod_id
LEFT OUTER JOIN cells ON cells.id = plugin_cells.cell_id LEFT OUTER JOIN plugin_cells ON plugin_cells.plugin_id = plugins.id
WHERE plugins.hash > $2 LEFT OUTER JOIN cells ON cells.id = plugin_cells.cell_id
GROUP BY plugins.hash WHERE plugins.hash > $2 AND plugins.updated_at > $5
ORDER BY plugins.hash ASC GROUP BY plugins.hash
LIMIT $1", ORDER BY plugins.hash ASC
page_size, LIMIT $1",
last_hash, page_size,
master, last_hash,
world_id master,
) world_id,
.fetch_all(pool) updated_after
.await )
.context("Failed to batch get by hash with mods") .fetch_all(pool)
.await
.context("Failed to batch get by hash with mods")
} else {
sqlx::query_as!(
PluginsByHashWithMods,
"SELECT
plugins.hash,
json_agg(DISTINCT plugins.*) as plugins,
json_agg(DISTINCT files.*) as files,
json_agg(DISTINCT mods.*) as mods,
COALESCE(json_agg(DISTINCT jsonb_build_object('x', cells.x, 'y', cells.y)) FILTER (WHERE cells.x IS NOT NULL AND cells.y IS NOT NULL AND cells.master = $3 AND cells.world_id = $4), '[]') AS cells
FROM plugins
LEFT OUTER JOIN files ON files.id = plugins.file_id
LEFT OUTER JOIN mods ON mods.id = files.mod_id
LEFT OUTER JOIN plugin_cells ON plugin_cells.plugin_id = plugins.id
LEFT OUTER JOIN cells ON cells.id = plugin_cells.cell_id
WHERE plugins.hash > $2
GROUP BY plugins.hash
ORDER BY plugins.hash ASC
LIMIT $1",
page_size,
last_hash,
master,
world_id
)
.fetch_all(pool)
.await
.context("Failed to batch get by hash with mods")
}
} }