Add updated_after option to limit files generated on every run

Instead of generating files for all mods every update, only update files for mods that have been updated since the last run.
This commit is contained in:
Tyler Hallada 2022-06-04 00:50:11 -04:00
parent bed49d0953
commit 026d175da4
9 changed files with 200 additions and 90 deletions

1
.gitignore vendored
View File

@ -4,3 +4,4 @@ plugins.zip
plugins
cells
mods
files

View File

@ -1,14 +1,27 @@
#!/bin/bash
if [ -f cells/edits.json ]; then
last_update_time=$(date -r cells/edits.json +'%Y-%m-%dT%H:%M:%S')
fi
mkdir -p logs
./target/release/mod-mapper &>> logs/modmapper.log
mkdir -p cells
mkdir -p mods
mkdir -p files
mkdir -p plugins_data
./target/release/mod-mapper -e cells/edits.json
./target/release/mod-mapper -c cells
./target/release/mod-mapper -s mods/mod_search_index.json
./target/release/mod-mapper -M mods/mod_cell_counts.json
./target/release/mod-mapper -m mods
./target/release/mod-mapper -P plugins_data
./target/release/mod-mapper -F files
if [ -n "$last_update_time" ]; then
./target/release/mod-mapper -e cells/edits.json
./target/release/mod-mapper -c cells -u "$last_update_time"
./target/release/mod-mapper -s mods/mod_search_index.json
./target/release/mod-mapper -M mods/mod_cell_counts.json
./target/release/mod-mapper -m mods -u "$last_update_time"
./target/release/mod-mapper -P plugins_data
./target/release/mod-mapper -F files -u "$last_update_time"
else
./target/release/mod-mapper -e cells/edits.json
./target/release/mod-mapper -c cells
./target/release/mod-mapper -s mods/mod_search_index.json
./target/release/mod-mapper -M mods/mod_cell_counts.json
./target/release/mod-mapper -m mods
./target/release/mod-mapper -P plugins_data
./target/release/mod-mapper -F files
fi

View File

@ -1,16 +1,17 @@
use anyhow::Result;
use chrono::NaiveDateTime;
use std::fs::File;
use std::io::Write;
use std::path::Path;
use crate::models::file;
pub async fn dump_file_data(pool: &sqlx::Pool<sqlx::Postgres>, dir: &str) -> Result<()> {
pub async fn dump_file_data(pool: &sqlx::Pool<sqlx::Postgres>, dir: &str, updated_after: Option<NaiveDateTime>) -> Result<()> {
let page_size = 20;
let mut last_id = None;
loop {
let files =
file::batched_get_with_cells(&pool, page_size, last_id, "Skyrim.esm", 1).await?;
file::batched_get_with_cells(&pool, page_size, last_id, "Skyrim.esm", 1, updated_after).await?;
if files.is_empty() {
break;
}

View File

@ -1,16 +1,17 @@
use anyhow::Result;
use chrono::NaiveDateTime;
use std::fs::File;
use std::io::Write;
use std::path::Path;
use crate::models::game_mod;
pub async fn dump_mod_data(pool: &sqlx::Pool<sqlx::Postgres>, dir: &str) -> Result<()> {
pub async fn dump_mod_data(pool: &sqlx::Pool<sqlx::Postgres>, dir: &str, updated_after: Option<NaiveDateTime>) -> Result<()> {
let page_size = 20;
let mut last_id = None;
loop {
let mods =
game_mod::batched_get_with_cells(&pool, page_size, last_id, "Skyrim.esm", 1).await?;
game_mod::batched_get_with_cells(&pool, page_size, last_id, "Skyrim.esm", 1, updated_after).await?;
if mods.is_empty() {
break;
}

View File

@ -1,4 +1,5 @@
use anyhow::Result;
use chrono::NaiveDateTime;
use std::fs::{create_dir_all, File};
use std::io::Write;
use std::path::Path;
@ -21,12 +22,12 @@ fn format_radix(mut x: u64, radix: u32) -> String {
result.into_iter().rev().collect()
}
pub async fn dump_plugin_data(pool: &sqlx::Pool<sqlx::Postgres>, dir: &str) -> Result<()> {
pub async fn dump_plugin_data(pool: &sqlx::Pool<sqlx::Postgres>, dir: &str, updated_after: Option<NaiveDateTime>) -> Result<()> {
let page_size = 20;
let mut last_hash = None;
loop {
let plugins =
plugin::batched_get_by_hash_with_mods(pool, page_size, last_hash, "Skyrim.esm", 1).await?;
plugin::batched_get_by_hash_with_mods(pool, page_size, last_hash, "Skyrim.esm", 1, updated_after).await?;
if plugins.is_empty() {
break;
}

View File

@ -1,5 +1,6 @@
use anyhow::Result;
use argh::FromArgs;
use chrono::NaiveDateTime;
use dotenv::dotenv;
use sqlx::postgres::PgPoolOptions;
use std::env;
@ -62,6 +63,10 @@ struct Args {
/// backfill the is_translation column in the mods table
#[argh(switch)]
backfill_is_translation: bool,
/// when dumping data, only dump data for mods or files that have been updated since this date
#[argh(option, short = 'u')]
updated_after: Option<NaiveDateTime>,
}
#[tokio::main]
@ -84,7 +89,7 @@ pub async fn main() -> Result<()> {
return dump_cell_data(&pool, &dir).await;
}
if let Some(dir) = args.mod_data {
return dump_mod_data(&pool, &dir).await;
return dump_mod_data(&pool, &dir, args.updated_after).await;
}
if let Some(path) = args.mod_search_index {
return dump_mod_search_index(&pool, &path).await;
@ -93,10 +98,10 @@ pub async fn main() -> Result<()> {
return dump_mod_cell_counts(&pool, &path).await;
}
if let Some(path) = args.plugin_data {
return dump_plugin_data(&pool, &path).await;
return dump_plugin_data(&pool, &path, args.updated_after).await;
}
if let Some(path) = args.file_data {
return dump_file_data(&pool, &path).await;
return dump_file_data(&pool, &path, args.updated_after).await;
}
if let Some(dir) = args.download_tiles {
return download_tiles(&dir).await;

View File

@ -205,29 +205,57 @@ pub async fn batched_get_with_cells(
last_id: Option<i32>,
master: &str,
world_id: i32,
updated_after: Option<NaiveDateTime>,
) -> Result<Vec<FileWithCells>> {
let last_id = last_id.unwrap_or(0);
sqlx::query_as!(
FileWithCells,
"SELECT
files.*,
COALESCE(json_agg(DISTINCT jsonb_build_object('x', cells.x, 'y', cells.y)) FILTER (WHERE cells.x IS NOT NULL AND cells.y IS NOT NULL AND cells.master = $3 AND cells.world_id = $4), '[]') AS cells,
COALESCE(json_agg(DISTINCT jsonb_build_object('hash', plugins.hash, 'file_name', plugins.file_name)) FILTER (WHERE plugins.hash IS NOT NULL), '[]') AS plugins,
COUNT(plugins.*) AS plugin_count
FROM files
LEFT OUTER JOIN plugin_cells ON plugin_cells.file_id = files.id
LEFT OUTER JOIN cells ON cells.id = plugin_cells.cell_id
LEFT OUTER JOIN plugins ON plugins.file_id = files.id
WHERE files.id > $2
GROUP BY files.id
ORDER BY files.id ASC
LIMIT $1",
page_size,
last_id,
master,
world_id
)
.fetch_all(pool)
.await
.context("Failed to batch get with cells")
if let Some(updated_after) = updated_after {
sqlx::query_as!(
FileWithCells,
"SELECT
files.*,
COALESCE(json_agg(DISTINCT jsonb_build_object('x', cells.x, 'y', cells.y)) FILTER (WHERE cells.x IS NOT NULL AND cells.y IS NOT NULL AND cells.master = $3 AND cells.world_id = $4), '[]') AS cells,
COALESCE(json_agg(DISTINCT jsonb_build_object('hash', plugins.hash, 'file_name', plugins.file_name)) FILTER (WHERE plugins.hash IS NOT NULL), '[]') AS plugins,
COUNT(plugins.*) AS plugin_count
FROM files
LEFT OUTER JOIN plugin_cells ON plugin_cells.file_id = files.id
LEFT OUTER JOIN cells ON cells.id = plugin_cells.cell_id
LEFT OUTER JOIN plugins ON plugins.file_id = files.id
WHERE files.id > $2 AND files.updated_at > $5
GROUP BY files.id
ORDER BY files.id ASC
LIMIT $1",
page_size,
last_id,
master,
world_id,
updated_after
)
.fetch_all(pool)
.await
.context("Failed to batch get with cells")
} else {
sqlx::query_as!(
FileWithCells,
"SELECT
files.*,
COALESCE(json_agg(DISTINCT jsonb_build_object('x', cells.x, 'y', cells.y)) FILTER (WHERE cells.x IS NOT NULL AND cells.y IS NOT NULL AND cells.master = $3 AND cells.world_id = $4), '[]') AS cells,
COALESCE(json_agg(DISTINCT jsonb_build_object('hash', plugins.hash, 'file_name', plugins.file_name)) FILTER (WHERE plugins.hash IS NOT NULL), '[]') AS plugins,
COUNT(plugins.*) AS plugin_count
FROM files
LEFT OUTER JOIN plugin_cells ON plugin_cells.file_id = files.id
LEFT OUTER JOIN cells ON cells.id = plugin_cells.cell_id
LEFT OUTER JOIN plugins ON plugins.file_id = files.id
WHERE files.id > $2
GROUP BY files.id
ORDER BY files.id ASC
LIMIT $1",
page_size,
last_id,
master,
world_id
)
.fetch_all(pool)
.await
.context("Failed to batch get with cells")
}
}

View File

@ -362,33 +362,63 @@ pub async fn batched_get_with_cells(
last_id: Option<i32>,
master: &str,
world_id: i32,
updated_after: Option<NaiveDateTime>,
) -> Result<Vec<ModWithCells>> {
let last_id = last_id.unwrap_or(0);
sqlx::query_as!(
ModWithCells,
"SELECT
mods.*,
COALESCE(json_agg(DISTINCT jsonb_build_object('x', cells.x, 'y', cells.y)) FILTER (WHERE cells.x IS NOT NULL AND cells.y IS NOT NULL AND cells.master = $3 AND cells.world_id = $4), '[]') AS cells,
COALESCE(json_agg(DISTINCT jsonb_build_object('id', files.nexus_file_id, 'name', files.name, 'version', files.version, 'category', files.category)) FILTER (WHERE files.nexus_file_id IS NOT NULL), '[]') AS files,
COUNT(files.*) AS file_count,
COUNT(plugins.*) AS plugin_count
FROM mods
LEFT OUTER JOIN plugin_cells ON plugin_cells.mod_id = mods.id
LEFT OUTER JOIN cells ON cells.id = plugin_cells.cell_id
LEFT OUTER JOIN files ON files.mod_id = mods.id
LEFT OUTER JOIN plugins ON plugins.mod_id = mods.id
WHERE mods.id > $2
GROUP BY mods.id
ORDER BY mods.id ASC
LIMIT $1",
page_size,
last_id,
master,
world_id
)
.fetch_all(pool)
.await
.context("Failed to batch get with cells")
if let Some(updated_after) = updated_after {
sqlx::query_as!(
ModWithCells,
"SELECT
mods.*,
COALESCE(json_agg(DISTINCT jsonb_build_object('x', cells.x, 'y', cells.y)) FILTER (WHERE cells.x IS NOT NULL AND cells.y IS NOT NULL AND cells.master = $3 AND cells.world_id = $4), '[]') AS cells,
COALESCE(json_agg(DISTINCT jsonb_build_object('id', files.nexus_file_id, 'name', files.name, 'version', files.version, 'category', files.category)) FILTER (WHERE files.nexus_file_id IS NOT NULL), '[]') AS files,
COUNT(files.*) AS file_count,
COUNT(plugins.*) AS plugin_count
FROM mods
LEFT OUTER JOIN plugin_cells ON plugin_cells.mod_id = mods.id
LEFT OUTER JOIN cells ON cells.id = plugin_cells.cell_id
LEFT OUTER JOIN files ON files.mod_id = mods.id
LEFT OUTER JOIN plugins ON plugins.mod_id = mods.id
WHERE mods.id > $2 AND mods.updated_at > $5
GROUP BY mods.id
ORDER BY mods.id ASC
LIMIT $1",
page_size,
last_id,
master,
world_id,
updated_after
)
.fetch_all(pool)
.await
.context("Failed to batch get with cells")
} else {
sqlx::query_as!(
ModWithCells,
"SELECT
mods.*,
COALESCE(json_agg(DISTINCT jsonb_build_object('x', cells.x, 'y', cells.y)) FILTER (WHERE cells.x IS NOT NULL AND cells.y IS NOT NULL AND cells.master = $3 AND cells.world_id = $4), '[]') AS cells,
COALESCE(json_agg(DISTINCT jsonb_build_object('id', files.nexus_file_id, 'name', files.name, 'version', files.version, 'category', files.category)) FILTER (WHERE files.nexus_file_id IS NOT NULL), '[]') AS files,
COUNT(files.*) AS file_count,
COUNT(plugins.*) AS plugin_count
FROM mods
LEFT OUTER JOIN plugin_cells ON plugin_cells.mod_id = mods.id
LEFT OUTER JOIN cells ON cells.id = plugin_cells.cell_id
LEFT OUTER JOIN files ON files.mod_id = mods.id
LEFT OUTER JOIN plugins ON plugins.mod_id = mods.id
WHERE mods.id > $2
GROUP BY mods.id
ORDER BY mods.id ASC
LIMIT $1",
page_size,
last_id,
master,
world_id
)
.fetch_all(pool)
.await
.context("Failed to batch get with cells")
}
}
#[instrument(level = "debug", skip(pool))]

View File

@ -84,31 +84,61 @@ pub async fn batched_get_by_hash_with_mods(
last_hash: Option<i64>,
master: &str,
world_id: i32,
updated_after: Option<NaiveDateTime>,
) -> Result<Vec<PluginsByHashWithMods>> {
let last_hash = last_hash.unwrap_or(-9223372036854775808); // psql bigint min
sqlx::query_as!(
PluginsByHashWithMods,
"SELECT
plugins.hash,
json_agg(DISTINCT plugins.*) as plugins,
json_agg(DISTINCT files.*) as files,
json_agg(DISTINCT mods.*) as mods,
COALESCE(json_agg(DISTINCT jsonb_build_object('x', cells.x, 'y', cells.y)) FILTER (WHERE cells.x IS NOT NULL AND cells.y IS NOT NULL AND cells.master = $3 AND cells.world_id = $4), '[]') AS cells
FROM plugins
LEFT OUTER JOIN files ON files.id = plugins.file_id
LEFT OUTER JOIN mods ON mods.id = files.mod_id
LEFT OUTER JOIN plugin_cells ON plugin_cells.plugin_id = plugins.id
LEFT OUTER JOIN cells ON cells.id = plugin_cells.cell_id
WHERE plugins.hash > $2
GROUP BY plugins.hash
ORDER BY plugins.hash ASC
LIMIT $1",
page_size,
last_hash,
master,
world_id
)
.fetch_all(pool)
.await
.context("Failed to batch get by hash with mods")
if let Some(updated_after) = updated_after {
sqlx::query_as!(
PluginsByHashWithMods,
"SELECT
plugins.hash,
json_agg(DISTINCT plugins.*) as plugins,
json_agg(DISTINCT files.*) as files,
json_agg(DISTINCT mods.*) as mods,
COALESCE(json_agg(DISTINCT jsonb_build_object('x', cells.x, 'y', cells.y)) FILTER (WHERE cells.x IS NOT NULL AND cells.y IS NOT NULL AND cells.master = $3 AND cells.world_id = $4), '[]') AS cells
FROM plugins
LEFT OUTER JOIN files ON files.id = plugins.file_id
LEFT OUTER JOIN mods ON mods.id = files.mod_id
LEFT OUTER JOIN plugin_cells ON plugin_cells.plugin_id = plugins.id
LEFT OUTER JOIN cells ON cells.id = plugin_cells.cell_id
WHERE plugins.hash > $2 AND plugins.updated_at > $5
GROUP BY plugins.hash
ORDER BY plugins.hash ASC
LIMIT $1",
page_size,
last_hash,
master,
world_id,
updated_after
)
.fetch_all(pool)
.await
.context("Failed to batch get by hash with mods")
} else {
sqlx::query_as!(
PluginsByHashWithMods,
"SELECT
plugins.hash,
json_agg(DISTINCT plugins.*) as plugins,
json_agg(DISTINCT files.*) as files,
json_agg(DISTINCT mods.*) as mods,
COALESCE(json_agg(DISTINCT jsonb_build_object('x', cells.x, 'y', cells.y)) FILTER (WHERE cells.x IS NOT NULL AND cells.y IS NOT NULL AND cells.master = $3 AND cells.world_id = $4), '[]') AS cells
FROM plugins
LEFT OUTER JOIN files ON files.id = plugins.file_id
LEFT OUTER JOIN mods ON mods.id = files.mod_id
LEFT OUTER JOIN plugin_cells ON plugin_cells.plugin_id = plugins.id
LEFT OUTER JOIN cells ON cells.id = plugin_cells.cell_id
WHERE plugins.hash > $2
GROUP BY plugins.hash
ORDER BY plugins.hash ASC
LIMIT $1",
page_size,
last_hash,
master,
world_id
)
.fetch_all(pool)
.await
.context("Failed to batch get by hash with mods")
}
}