Fix bad interior cell data in db
I had a bad unique index on the `cells` table. This commit does not fix it but has two backfill scripts that will allow me to fixup the data before creating the new unique index. `is_base_game` identifies cells that are from the `Skyrim.esm` shipped with the game (and not from some mod that happens to also have a `Skyrim.esm` with a 0,0 cell with a different form id, see: https://github.com/thallada/modmapper-web/issues/8). `dump_cell_data` now only dumps cells from `is_base_game` cells. `deduplicate_interior_cells` removes duplicate cell and plugin_cell rows caused by the buggy index. I will need to upgrade the database and create a new index in a later commit.
This commit is contained in:
138
src/commands/backfills/deduplicate_interior_cells.rs
Normal file
138
src/commands/backfills/deduplicate_interior_cells.rs
Normal file
@@ -0,0 +1,138 @@
|
||||
/// There was a bug with the unique index on the `cells` table that was causing the same form_id
|
||||
/// and master to be inserted multiple times. This function deduplicates those, choosing the cell
|
||||
/// with `is_base_game = true` if there is one, otherwise randomly chooses one.
|
||||
/// rows referencing the duplicate cells are updated to reference the chosen cell.
|
||||
use anyhow::Result;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use sqlx::postgres::PgDatabaseError;
|
||||
use sqlx::types::Json;
|
||||
use sqlx::FromRow;
|
||||
use tracing::{info, warn};
|
||||
|
||||
const PAGE_SIZE: i64 = 100;
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize, FromRow)]
|
||||
pub struct CellId {
|
||||
pub id: i32,
|
||||
pub is_base_game: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, FromRow)]
|
||||
pub struct CellDuplicates {
|
||||
pub ids: Option<Json<Vec<CellId>>>,
|
||||
pub form_id: i32,
|
||||
pub master: String,
|
||||
}
|
||||
|
||||
pub async fn deduplicate_interior_cells(pool: &sqlx::Pool<sqlx::Postgres>) -> Result<()> {
|
||||
let mut page = 0;
|
||||
loop {
|
||||
info!("deduplicating interior cells page {}", page);
|
||||
let duplicates = sqlx::query_as!(
|
||||
CellDuplicates,
|
||||
r#"SELECT
|
||||
json_agg(
|
||||
json_build_object(
|
||||
'id', id,
|
||||
'is_base_game', is_base_game
|
||||
)) as "ids: Json<Vec<CellId>>",
|
||||
form_id,
|
||||
master
|
||||
FROM cells
|
||||
WHERE world_id IS NULL
|
||||
GROUP BY (form_id, master)
|
||||
HAVING COUNT(*) > 1
|
||||
LIMIT $1
|
||||
"#,
|
||||
PAGE_SIZE,
|
||||
)
|
||||
.fetch_all(pool)
|
||||
.await?;
|
||||
|
||||
if duplicates.is_empty() {
|
||||
break;
|
||||
}
|
||||
|
||||
for duplicate_cell in duplicates.into_iter() {
|
||||
info!(
|
||||
"deduplicating cells form_id={} master={}",
|
||||
duplicate_cell.form_id, duplicate_cell.master
|
||||
);
|
||||
let duplicate_ids = duplicate_cell.ids.clone().unwrap();
|
||||
let chosen_cell = duplicate_ids
|
||||
.iter()
|
||||
.find(|cell| cell.is_base_game)
|
||||
.unwrap_or_else(|| {
|
||||
duplicate_ids
|
||||
.iter()
|
||||
.next()
|
||||
.expect("duplicate cell has no ids")
|
||||
});
|
||||
info!(
|
||||
"choosing cell_id={} is_base_game={}",
|
||||
chosen_cell.id, chosen_cell.is_base_game
|
||||
);
|
||||
// Update all plugin_cells cell_id references to point to the chosen cell
|
||||
let duplicate_ids = duplicate_cell
|
||||
.ids
|
||||
.unwrap()
|
||||
.iter()
|
||||
.map(|cell| cell.id)
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// First, I need to fix-up any duplicated plugin_cells rows caused by broken
|
||||
// plugins that have multiple cells with the same form_id. For these duplicate
|
||||
// plugin_cells with the same plugin_id, I just arbitrarily choose one and delete
|
||||
// the others (since it's undefined behavior of which duplicate record should "win"
|
||||
// out in this case anyways). In the case of exterior cells, where the duplicate
|
||||
// interior cell bug is not a problem, the last processed cell record in the plugin
|
||||
// wins since `process_plugin` uses an upsert method which updates existing
|
||||
// `plugin_cells` if it tries to insert a new one that conflicts with an existing one.
|
||||
// So I am effectively retroactively doing the same here for interior cells.
|
||||
let plugin_cells_delete = sqlx::query!(
|
||||
r#"DELETE FROM plugin_cells
|
||||
WHERE id NOT IN (
|
||||
SELECT MIN(id)
|
||||
FROM plugin_cells
|
||||
WHERE cell_id = ANY($1)
|
||||
GROUP BY plugin_id
|
||||
)
|
||||
AND cell_id = ANY($1)
|
||||
"#,
|
||||
&duplicate_ids
|
||||
)
|
||||
.execute(pool)
|
||||
.await?;
|
||||
info!(
|
||||
"deleted {} duplicate plugin_cells from broken plugins",
|
||||
plugin_cells_delete.rows_affected()
|
||||
);
|
||||
|
||||
let update = sqlx::query!(
|
||||
r#"UPDATE plugin_cells
|
||||
SET
|
||||
cell_id = $1,
|
||||
updated_at = now()
|
||||
WHERE cell_id = ANY($2)"#,
|
||||
chosen_cell.id,
|
||||
&duplicate_ids
|
||||
)
|
||||
.execute(pool)
|
||||
.await?;
|
||||
info!("updated {} plugin_cells", update.rows_affected());
|
||||
|
||||
// Delete all cells that are not the chosen cell
|
||||
let delete = sqlx::query!(
|
||||
r#"DELETE FROM cells
|
||||
WHERE id != $1 AND id = ANY($2)"#,
|
||||
chosen_cell.id,
|
||||
&duplicate_ids
|
||||
)
|
||||
.execute(pool)
|
||||
.await?;
|
||||
info!("deleted {} cells", delete.rows_affected());
|
||||
}
|
||||
page += 1;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
71
src/commands/backfills/is_base_game.rs
Normal file
71
src/commands/backfills/is_base_game.rs
Normal file
@@ -0,0 +1,71 @@
|
||||
use std::borrow::Borrow;
|
||||
use std::fs::File;
|
||||
use std::io::BufReader;
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use skyrim_cell_dump::Plugin;
|
||||
use tracing::info;
|
||||
|
||||
use crate::models::cell::{self, UnsavedCell};
|
||||
use crate::models::world::{self, UnsavedWorld};
|
||||
use crate::plugin_processor::get_local_form_id_and_master;
|
||||
|
||||
pub async fn backfill_is_base_game(pool: &sqlx::Pool<sqlx::Postgres>) -> Result<()> {
|
||||
let file = File::open("./data/skyrim.json")?;
|
||||
let reader = BufReader::new(file);
|
||||
let plugin: Plugin =
|
||||
serde_json::from_reader(reader).context("failed to deserialize data/skyrim.json")?;
|
||||
let file_name = "Skyrim.esm";
|
||||
let masters: Vec<&str> = plugin.header.masters.iter().map(|s| s.borrow()).collect();
|
||||
let base_worlds: Vec<UnsavedWorld> = plugin
|
||||
.worlds
|
||||
.iter()
|
||||
.map(|world| {
|
||||
let (form_id, master) =
|
||||
get_local_form_id_and_master(world.form_id, &masters, file_name)
|
||||
.expect("form_id to be a valid i32");
|
||||
UnsavedWorld { form_id, master }
|
||||
})
|
||||
.collect();
|
||||
let db_worlds = world::batched_insert(pool, &base_worlds).await?;
|
||||
info!("Upserted {} Skyrim.esm base worlds", db_worlds.len());
|
||||
let base_cells: Vec<UnsavedCell> = plugin
|
||||
.cells
|
||||
.iter()
|
||||
.map(|cell| {
|
||||
let world_id = if let Some(world_form_id) = cell.world_form_id {
|
||||
let (form_id, master) =
|
||||
get_local_form_id_and_master(world_form_id, &masters, file_name)
|
||||
.expect("form_id to be valid i32");
|
||||
Some(
|
||||
db_worlds
|
||||
.iter()
|
||||
.find(|&world| world.form_id == form_id && world.master == master)
|
||||
.expect("cell references world in the plugin worlds")
|
||||
.id,
|
||||
)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let (form_id, master) =
|
||||
get_local_form_id_and_master(cell.form_id, &masters, file_name)
|
||||
.expect("form_id is a valid i32");
|
||||
UnsavedCell {
|
||||
form_id,
|
||||
master,
|
||||
x: cell.x,
|
||||
y: cell.y,
|
||||
world_id,
|
||||
is_persistent: cell.is_persistent,
|
||||
is_base_game: true, // the whole point of this function
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
let db_cells = cell::batched_insert(pool, &base_cells).await?;
|
||||
info!("Upserted {} Skyrim.esm base cells", db_cells.len());
|
||||
// This works for exterior cells, but there's a bug with the unique index on cells that
|
||||
// creates duplicate interior cells. To fix that, I need to upgrade postgres to
|
||||
// 15 or later, migate the data to the new db cluster, consolidate all of the duplicate cells
|
||||
// into one cell in a separate backfill command, then fix the unique index.
|
||||
Ok(())
|
||||
}
|
||||
@@ -1,3 +1,7 @@
|
||||
pub mod deduplicate_interior_cells;
|
||||
pub mod is_translation;
|
||||
pub mod is_base_game;
|
||||
|
||||
pub use deduplicate_interior_cells::deduplicate_interior_cells;
|
||||
pub use is_translation::backfill_is_translation;
|
||||
pub use is_base_game::backfill_is_base_game;
|
||||
|
||||
@@ -10,7 +10,7 @@ pub async fn dump_cell_data(pool: &sqlx::Pool<sqlx::Postgres>, dir: &str) -> Res
|
||||
let mut cell_count = 0;
|
||||
for x in -77..75 {
|
||||
for y in -50..44 {
|
||||
if let Ok(data) = cell::get_cell_data(pool, "Skyrim.esm", 1, x, y).await {
|
||||
if let Ok(data) = cell::get_cell_data(pool, "Skyrim.esm", 1, x, y, true).await {
|
||||
let path = format!("{}/{}", &dir, x);
|
||||
let path = Path::new(&path);
|
||||
create_dir_all(path)?;
|
||||
|
||||
Reference in New Issue
Block a user