Dump plugins by hash instead of id
The same exact plugins are often included in many different files/mods. Grouping by hash in the sql query will show all files and mods a plugin appears in. Previously, the last file that was processed would overwrite the plugin file and only that single file/mod would appear in the file. Also adds an index on plugins.hash since I'm now querying directly on it.
This commit is contained in:
parent
c9dcd3b7c5
commit
5d55e78283
1
migrations/20220312173403_add_plugin_hash_index.sql
Normal file
1
migrations/20220312173403_add_plugin_hash_index.sql
Normal file
@ -0,0 +1 @@
|
|||||||
|
CREATE INDEX ON plugins (hash);
|
@ -10,7 +10,7 @@ fn format_radix(mut x: u64, radix: u32) -> String {
|
|||||||
let mut result = vec![];
|
let mut result = vec![];
|
||||||
loop {
|
loop {
|
||||||
let m = x % radix as u64;
|
let m = x % radix as u64;
|
||||||
x = x / radix as u64;
|
x /= radix as u64;
|
||||||
|
|
||||||
// will panic if you use a bad radix (< 2 or > 36).
|
// will panic if you use a bad radix (< 2 or > 36).
|
||||||
result.push(std::char::from_digit(m as u32, radix).unwrap());
|
result.push(std::char::from_digit(m as u32, radix).unwrap());
|
||||||
@ -23,10 +23,10 @@ fn format_radix(mut x: u64, radix: u32) -> String {
|
|||||||
|
|
||||||
pub async fn dump_plugin_data(pool: &sqlx::Pool<sqlx::Postgres>, dir: &str) -> Result<()> {
|
pub async fn dump_plugin_data(pool: &sqlx::Pool<sqlx::Postgres>, dir: &str) -> Result<()> {
|
||||||
let page_size = 20;
|
let page_size = 20;
|
||||||
let mut last_id = None;
|
let mut last_hash = None;
|
||||||
loop {
|
loop {
|
||||||
let plugins =
|
let plugins =
|
||||||
plugin::batched_get_with_data(&pool, page_size, last_id, "Skyrim.esm", 1).await?;
|
plugin::batched_get_by_hash_with_mods(pool, page_size, last_hash, "Skyrim.esm", 1).await?;
|
||||||
if plugins.is_empty() {
|
if plugins.is_empty() {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -36,8 +36,8 @@ pub async fn dump_plugin_data(pool: &sqlx::Pool<sqlx::Postgres>, dir: &str) -> R
|
|||||||
let path = path.join(format!("{}.json", format_radix(plugin.hash as u64, 36)));
|
let path = path.join(format!("{}.json", format_radix(plugin.hash as u64, 36)));
|
||||||
let mut file = File::create(path)?;
|
let mut file = File::create(path)?;
|
||||||
write!(file, "{}", serde_json::to_string(&plugin)?)?;
|
write!(file, "{}", serde_json::to_string(&plugin)?)?;
|
||||||
last_id = Some(plugin.id);
|
last_hash = Some(plugin.hash);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return Ok(());
|
Ok(())
|
||||||
}
|
}
|
||||||
|
@ -38,23 +38,11 @@ pub struct UnsavedPlugin<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Serialize, Deserialize, FromRow)]
|
#[derive(Debug, Serialize, Deserialize, FromRow)]
|
||||||
pub struct PluginWithData {
|
pub struct PluginsByHashWithMods {
|
||||||
pub id: i32,
|
|
||||||
pub name: String,
|
|
||||||
pub hash: i64,
|
pub hash: i64,
|
||||||
pub file_id: i32,
|
pub plugins: Option<serde_json::Value>,
|
||||||
pub mod_id: i32,
|
pub files: Option<serde_json::Value>,
|
||||||
pub version: f64,
|
pub mods: Option<serde_json::Value>,
|
||||||
pub size: i64,
|
|
||||||
pub author: Option<String>,
|
|
||||||
pub description: Option<String>,
|
|
||||||
pub masters: Vec<String>,
|
|
||||||
pub file_name: String,
|
|
||||||
pub file_path: String,
|
|
||||||
pub updated_at: NaiveDateTime,
|
|
||||||
pub created_at: NaiveDateTime,
|
|
||||||
pub file: Option<serde_json::Value>,
|
|
||||||
pub r#mod: Option<serde_json::Value>,
|
|
||||||
pub cells: Option<serde_json::Value>,
|
pub cells: Option<serde_json::Value>,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -90,36 +78,37 @@ pub async fn insert<'a>(
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[instrument(level = "debug", skip(pool))]
|
#[instrument(level = "debug", skip(pool))]
|
||||||
pub async fn batched_get_with_data(
|
pub async fn batched_get_by_hash_with_mods(
|
||||||
pool: &sqlx::Pool<sqlx::Postgres>,
|
pool: &sqlx::Pool<sqlx::Postgres>,
|
||||||
page_size: i64,
|
page_size: i64,
|
||||||
last_id: Option<i32>,
|
last_hash: Option<i64>,
|
||||||
master: &str,
|
master: &str,
|
||||||
world_id: i32,
|
world_id: i32,
|
||||||
) -> Result<Vec<PluginWithData>> {
|
) -> Result<Vec<PluginsByHashWithMods>> {
|
||||||
let last_id = last_id.unwrap_or(0);
|
let last_hash = last_hash.unwrap_or(-9223372036854775808); // psql bigint min
|
||||||
sqlx::query_as!(
|
sqlx::query_as!(
|
||||||
PluginWithData,
|
PluginsByHashWithMods,
|
||||||
"SELECT
|
"SELECT
|
||||||
plugins.*,
|
plugins.hash,
|
||||||
json_agg(DISTINCT files.*) as file,
|
json_agg(DISTINCT plugins.*) as plugins,
|
||||||
json_agg(DISTINCT mods.*) as mod,
|
json_agg(DISTINCT files.*) as files,
|
||||||
|
json_agg(DISTINCT mods.*) as mods,
|
||||||
COALESCE(json_agg(DISTINCT jsonb_build_object('x', cells.x, 'y', cells.y)) FILTER (WHERE cells.x IS NOT NULL AND cells.y IS NOT NULL AND cells.master = $3 AND cells.world_id = $4), '[]') AS cells
|
COALESCE(json_agg(DISTINCT jsonb_build_object('x', cells.x, 'y', cells.y)) FILTER (WHERE cells.x IS NOT NULL AND cells.y IS NOT NULL AND cells.master = $3 AND cells.world_id = $4), '[]') AS cells
|
||||||
FROM plugins
|
FROM plugins
|
||||||
LEFT OUTER JOIN files ON files.id = plugins.file_id
|
LEFT OUTER JOIN files ON files.id = plugins.file_id
|
||||||
LEFT OUTER JOIN mods ON mods.id = files.mod_id
|
LEFT OUTER JOIN mods ON mods.id = files.mod_id
|
||||||
LEFT OUTER JOIN plugin_cells ON plugin_cells.plugin_id = plugins.id
|
LEFT OUTER JOIN plugin_cells ON plugin_cells.plugin_id = plugins.id
|
||||||
LEFT OUTER JOIN cells ON cells.id = plugin_cells.cell_id
|
LEFT OUTER JOIN cells ON cells.id = plugin_cells.cell_id
|
||||||
WHERE plugins.id > $2
|
WHERE plugins.hash > $2
|
||||||
GROUP BY plugins.id
|
GROUP BY plugins.hash
|
||||||
ORDER BY plugins.id ASC
|
ORDER BY plugins.hash ASC
|
||||||
LIMIT $1",
|
LIMIT $1",
|
||||||
page_size,
|
page_size,
|
||||||
last_id,
|
last_hash,
|
||||||
master,
|
master,
|
||||||
world_id
|
world_id
|
||||||
)
|
)
|
||||||
.fetch_all(pool)
|
.fetch_all(pool)
|
||||||
.await
|
.await
|
||||||
.context("Failed to batch get with data")
|
.context("Failed to batch get by hash with mods")
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user