Dump plugins by hash instead of id

The same exact plugins are often included in many different files/mods. Grouping by hash in the sql query will show all files and mods a plugin appears in. Previously, the last file that was processed would overwrite the plugin file and only that single file/mod would appear in the file.

Also adds an index on plugins.hash since I'm now querying directly on it.
This commit is contained in:
Tyler Hallada 2022-03-12 13:05:21 -05:00
parent c9dcd3b7c5
commit 5d55e78283
3 changed files with 24 additions and 34 deletions

View File

@ -0,0 +1 @@
CREATE INDEX ON plugins (hash);

View File

@ -10,7 +10,7 @@ fn format_radix(mut x: u64, radix: u32) -> String {
let mut result = vec![]; let mut result = vec![];
loop { loop {
let m = x % radix as u64; let m = x % radix as u64;
x = x / radix as u64; x /= radix as u64;
// will panic if you use a bad radix (< 2 or > 36). // will panic if you use a bad radix (< 2 or > 36).
result.push(std::char::from_digit(m as u32, radix).unwrap()); result.push(std::char::from_digit(m as u32, radix).unwrap());
@ -23,10 +23,10 @@ fn format_radix(mut x: u64, radix: u32) -> String {
pub async fn dump_plugin_data(pool: &sqlx::Pool<sqlx::Postgres>, dir: &str) -> Result<()> { pub async fn dump_plugin_data(pool: &sqlx::Pool<sqlx::Postgres>, dir: &str) -> Result<()> {
let page_size = 20; let page_size = 20;
let mut last_id = None; let mut last_hash = None;
loop { loop {
let plugins = let plugins =
plugin::batched_get_with_data(&pool, page_size, last_id, "Skyrim.esm", 1).await?; plugin::batched_get_by_hash_with_mods(pool, page_size, last_hash, "Skyrim.esm", 1).await?;
if plugins.is_empty() { if plugins.is_empty() {
break; break;
} }
@ -36,8 +36,8 @@ pub async fn dump_plugin_data(pool: &sqlx::Pool<sqlx::Postgres>, dir: &str) -> R
let path = path.join(format!("{}.json", format_radix(plugin.hash as u64, 36))); let path = path.join(format!("{}.json", format_radix(plugin.hash as u64, 36)));
let mut file = File::create(path)?; let mut file = File::create(path)?;
write!(file, "{}", serde_json::to_string(&plugin)?)?; write!(file, "{}", serde_json::to_string(&plugin)?)?;
last_id = Some(plugin.id); last_hash = Some(plugin.hash);
} }
} }
return Ok(()); Ok(())
} }

View File

@ -38,23 +38,11 @@ pub struct UnsavedPlugin<'a> {
} }
#[derive(Debug, Serialize, Deserialize, FromRow)] #[derive(Debug, Serialize, Deserialize, FromRow)]
pub struct PluginWithData { pub struct PluginsByHashWithMods {
pub id: i32,
pub name: String,
pub hash: i64, pub hash: i64,
pub file_id: i32, pub plugins: Option<serde_json::Value>,
pub mod_id: i32, pub files: Option<serde_json::Value>,
pub version: f64, pub mods: Option<serde_json::Value>,
pub size: i64,
pub author: Option<String>,
pub description: Option<String>,
pub masters: Vec<String>,
pub file_name: String,
pub file_path: String,
pub updated_at: NaiveDateTime,
pub created_at: NaiveDateTime,
pub file: Option<serde_json::Value>,
pub r#mod: Option<serde_json::Value>,
pub cells: Option<serde_json::Value>, pub cells: Option<serde_json::Value>,
} }
@ -90,36 +78,37 @@ pub async fn insert<'a>(
} }
#[instrument(level = "debug", skip(pool))] #[instrument(level = "debug", skip(pool))]
pub async fn batched_get_with_data( pub async fn batched_get_by_hash_with_mods(
pool: &sqlx::Pool<sqlx::Postgres>, pool: &sqlx::Pool<sqlx::Postgres>,
page_size: i64, page_size: i64,
last_id: Option<i32>, last_hash: Option<i64>,
master: &str, master: &str,
world_id: i32, world_id: i32,
) -> Result<Vec<PluginWithData>> { ) -> Result<Vec<PluginsByHashWithMods>> {
let last_id = last_id.unwrap_or(0); let last_hash = last_hash.unwrap_or(-9223372036854775808); // psql bigint min
sqlx::query_as!( sqlx::query_as!(
PluginWithData, PluginsByHashWithMods,
"SELECT "SELECT
plugins.*, plugins.hash,
json_agg(DISTINCT files.*) as file, json_agg(DISTINCT plugins.*) as plugins,
json_agg(DISTINCT mods.*) as mod, json_agg(DISTINCT files.*) as files,
json_agg(DISTINCT mods.*) as mods,
COALESCE(json_agg(DISTINCT jsonb_build_object('x', cells.x, 'y', cells.y)) FILTER (WHERE cells.x IS NOT NULL AND cells.y IS NOT NULL AND cells.master = $3 AND cells.world_id = $4), '[]') AS cells COALESCE(json_agg(DISTINCT jsonb_build_object('x', cells.x, 'y', cells.y)) FILTER (WHERE cells.x IS NOT NULL AND cells.y IS NOT NULL AND cells.master = $3 AND cells.world_id = $4), '[]') AS cells
FROM plugins FROM plugins
LEFT OUTER JOIN files ON files.id = plugins.file_id LEFT OUTER JOIN files ON files.id = plugins.file_id
LEFT OUTER JOIN mods ON mods.id = files.mod_id LEFT OUTER JOIN mods ON mods.id = files.mod_id
LEFT OUTER JOIN plugin_cells ON plugin_cells.plugin_id = plugins.id LEFT OUTER JOIN plugin_cells ON plugin_cells.plugin_id = plugins.id
LEFT OUTER JOIN cells ON cells.id = plugin_cells.cell_id LEFT OUTER JOIN cells ON cells.id = plugin_cells.cell_id
WHERE plugins.id > $2 WHERE plugins.hash > $2
GROUP BY plugins.id GROUP BY plugins.hash
ORDER BY plugins.id ASC ORDER BY plugins.hash ASC
LIMIT $1", LIMIT $1",
page_size, page_size,
last_id, last_hash,
master, master,
world_id world_id
) )
.fetch_all(pool) .fetch_all(pool)
.await .await
.context("Failed to batch get with data") .context("Failed to batch get by hash with mods")
} }