Recreate pg pool connection in all dump commands

Since the slow query issue seems to be affecting all dump commands that
run queries in loops.
This commit is contained in:
Tyler Hallada 2023-11-15 18:35:48 -05:00
parent 8cb9bfa4ca
commit 7579db364b
9 changed files with 155 additions and 71 deletions

View File

@ -4,10 +4,9 @@
/// rows referencing the duplicate cells are updated to reference the chosen cell. /// rows referencing the duplicate cells are updated to reference the chosen cell.
use anyhow::Result; use anyhow::Result;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use sqlx::postgres::PgDatabaseError;
use sqlx::types::Json; use sqlx::types::Json;
use sqlx::FromRow; use sqlx::FromRow;
use tracing::{info, warn}; use tracing::info;
const PAGE_SIZE: i64 = 100; const PAGE_SIZE: i64 = 100;
@ -84,10 +83,10 @@ pub async fn deduplicate_interior_cells(pool: &sqlx::Pool<sqlx::Postgres>) -> Re
// plugins that have multiple cells with the same form_id. For these duplicate // plugins that have multiple cells with the same form_id. For these duplicate
// plugin_cells with the same plugin_id, I just arbitrarily choose one and delete // plugin_cells with the same plugin_id, I just arbitrarily choose one and delete
// the others (since it's undefined behavior of which duplicate record should "win" // the others (since it's undefined behavior of which duplicate record should "win"
// out in this case anyways). In the case of exterior cells, where the duplicate // out in this case anyways). In the case of exterior cells, where the duplicate
// interior cell bug is not a problem, the last processed cell record in the plugin // interior cell bug is not a problem, the last processed cell record in the plugin
// wins since `process_plugin` uses an upsert method which updates existing // wins since `process_plugin` uses an upsert method which updates existing
// `plugin_cells` if it tries to insert a new one that conflicts with an existing one. // `plugin_cells` if it tries to insert a new one that conflicts with an existing one.
// So I am effectively retroactively doing the same here for interior cells. // So I am effectively retroactively doing the same here for interior cells.
let plugin_cells_delete = sqlx::query!( let plugin_cells_delete = sqlx::query!(
r#"DELETE FROM plugin_cells r#"DELETE FROM plugin_cells

View File

@ -47,9 +47,8 @@ pub async fn backfill_is_base_game(pool: &sqlx::Pool<sqlx::Postgres>) -> Result<
} else { } else {
None None
}; };
let (form_id, master) = let (form_id, master) = get_local_form_id_and_master(cell.form_id, &masters, file_name)
get_local_form_id_and_master(cell.form_id, &masters, file_name) .expect("form_id is a valid i32");
.expect("form_id is a valid i32");
UnsavedCell { UnsavedCell {
form_id, form_id,
master, master,
@ -63,8 +62,8 @@ pub async fn backfill_is_base_game(pool: &sqlx::Pool<sqlx::Postgres>) -> Result<
.collect(); .collect();
let db_cells = cell::batched_insert(pool, &base_cells).await?; let db_cells = cell::batched_insert(pool, &base_cells).await?;
info!("Upserted {} Skyrim.esm base cells", db_cells.len()); info!("Upserted {} Skyrim.esm base cells", db_cells.len());
// This works for exterior cells, but there's a bug with the unique index on cells that // This works for exterior cells, but there's a bug with the unique index on cells that
// creates duplicate interior cells. To fix that, I need to upgrade postgres to // creates duplicate interior cells. To fix that, I need to upgrade postgres to
// 15 or later, migate the data to the new db cluster, consolidate all of the duplicate cells // 15 or later, migate the data to the new db cluster, consolidate all of the duplicate cells
// into one cell in a separate backfill command, then fix the unique index. // into one cell in a separate backfill command, then fix the unique index.
Ok(()) Ok(())

View File

@ -1,16 +1,32 @@
use anyhow::Result; use anyhow::Result;
use std::fs::{create_dir_all, File}; use sqlx::postgres::PgPoolOptions;
use std::io::Write; use std::env;
use std::fs::create_dir_all;
use std::path::Path; use std::path::Path;
use tokio::fs::File;
use tokio::io::AsyncWriteExt;
use tracing::{debug, info}; use tracing::{debug, info};
use crate::models::cell; use crate::models::cell;
pub async fn dump_cell_data(pool: &sqlx::Pool<sqlx::Postgres>, dir: &str) -> Result<()> { pub async fn dump_cell_data(dir: &str) -> Result<()> {
let mut pool = PgPoolOptions::new()
.max_connections(5)
.connect(&env::var("DATABASE_URL")?)
.await?;
let mut cell_count = 0; let mut cell_count = 0;
for x in -77..75 { for x in -77..75 {
for y in -50..44 { for y in -50..44 {
if let Ok(data) = cell::get_cell_data(pool, "Skyrim.esm", 1, x, y, true).await { if cell_count % 5 == 0 {
// There's a weird issue that slows down this query after 5 iterations. Recreating the
// connection pool seems to fix it. I don't know why.
info!("reconnecting to database");
pool = PgPoolOptions::new()
.max_connections(5)
.connect(&env::var("DATABASE_URL")?)
.await?;
}
if let Ok(data) = cell::get_cell_data(&pool, "Skyrim.esm", 1, x, y, true).await {
let path = format!("{}/{}", &dir, x); let path = format!("{}/{}", &dir, x);
let path = Path::new(&path); let path = Path::new(&path);
create_dir_all(path)?; create_dir_all(path)?;
@ -22,11 +38,13 @@ pub async fn dump_cell_data(pool: &sqlx::Pool<sqlx::Postgres>, dir: &str) -> Res
"dumping cell data to {}", "dumping cell data to {}",
path.display() path.display()
); );
let mut file = File::create(path)?; let mut file = File::create(path).await?;
write!(file, "{}", serde_json::to_string(&data)?)?; file.write_all(serde_json::to_string(&data)?.as_bytes())
.await?;
cell_count += 1; cell_count += 1;
} }
} }
info!("dumped all rows in x: {}", x);
} }
info!("dumped {} cell data files", cell_count); info!("dumped {} cell data files", cell_count);
Ok(()) Ok(())

View File

@ -1,24 +1,35 @@
use anyhow::Result; use anyhow::Result;
use chrono::NaiveDateTime; use chrono::NaiveDateTime;
use std::fs::File; use sqlx::postgres::PgPoolOptions;
use std::io::Write; use std::env;
use std::path::Path; use std::path::Path;
use tokio::fs::File;
use tokio::io::AsyncWriteExt;
use tracing::{debug, info}; use tracing::{debug, info};
use crate::models::file; use crate::models::file;
pub async fn dump_file_data( pub async fn dump_file_data(dir: &str, updated_after: Option<NaiveDateTime>) -> Result<()> {
pool: &sqlx::Pool<sqlx::Postgres>, let mut pool = PgPoolOptions::new()
dir: &str, .max_connections(5)
updated_after: Option<NaiveDateTime>, .connect(&env::var("DATABASE_URL")?)
) -> Result<()> { .await?;
let mut file_count = 0; let mut file_count = 0;
let mut page = 1; let mut page = 1;
let page_size = 20; let page_size = 20;
let mut last_id = None; let mut last_id = None;
loop { loop {
if page % 5 == 0 {
// There's a weird issue that slows down this query after 5 iterations. Recreating the
// connection pool seems to fix it. I don't know why.
info!("reconnecting to database");
pool = PgPoolOptions::new()
.max_connections(5)
.connect(&env::var("DATABASE_URL")?)
.await?;
}
let files = let files =
file::batched_get_with_cells(pool, page_size, last_id, "Skyrim.esm", 1, updated_after) file::batched_get_with_cells(&pool, page_size, last_id, "Skyrim.esm", 1, updated_after)
.await?; .await?;
if files.is_empty() { if files.is_empty() {
break; break;
@ -33,11 +44,13 @@ pub async fn dump_file_data(
"dumping file data to {}", "dumping file data to {}",
path.display() path.display()
); );
let mut file = File::create(path)?; let mut file = File::create(path).await?;
write!(file, "{}", serde_json::to_string(&file_with_cells)?)?; file.write_all(serde_json::to_string(&file_with_cells)?.as_bytes())
.await?;
last_id = Some(file_with_cells.id); last_id = Some(file_with_cells.id);
file_count += 1; file_count += 1;
} }
info!("dumped page {}", page);
page += 1; page += 1;
} }
info!("dumped {} file data files", file_count); info!("dumped {} file data files", file_count);

View File

@ -1,19 +1,34 @@
use anyhow::Result; use anyhow::Result;
use sqlx::postgres::PgPoolOptions;
use std::collections::HashMap; use std::collections::HashMap;
use std::fs::File; use std::env;
use std::io::Write; use tokio::fs::File;
use tokio::io::AsyncWriteExt;
use tracing::{debug, info}; use tracing::{debug, info};
use crate::models::game_mod; use crate::models::game_mod;
pub async fn dump_mod_cell_counts(pool: &sqlx::Pool<sqlx::Postgres>, path: &str) -> Result<()> { pub async fn dump_mod_cell_counts(path: &str) -> Result<()> {
let mut pool = PgPoolOptions::new()
.max_connections(5)
.connect(&env::var("DATABASE_URL")?)
.await?;
let mut page = 1; let mut page = 1;
let page_size = 100; let page_size = 100;
let mut last_id = None; let mut last_id = None;
let mut counts = HashMap::new(); let mut counts = HashMap::new();
loop { loop {
if page % 5 == 0 {
// There's a weird issue that slows down this query after 5 iterations. Recreating the
// connection pool seems to fix it. I don't know why.
info!("reconnecting to database");
pool = PgPoolOptions::new()
.max_connections(5)
.connect(&env::var("DATABASE_URL")?)
.await?;
}
let mod_cell_counts = let mod_cell_counts =
game_mod::batched_get_cell_counts(pool, page_size, last_id, "Skyrim.esm", 1).await?; game_mod::batched_get_cell_counts(&pool, page_size, last_id, "Skyrim.esm", 1).await?;
if mod_cell_counts.is_empty() { if mod_cell_counts.is_empty() {
break; break;
} }
@ -27,10 +42,12 @@ pub async fn dump_mod_cell_counts(pool: &sqlx::Pool<sqlx::Postgres>, path: &str)
counts.insert(mod_cell_count.nexus_mod_id, mod_cell_count.cells); counts.insert(mod_cell_count.nexus_mod_id, mod_cell_count.cells);
last_id = Some(mod_cell_count.nexus_mod_id); last_id = Some(mod_cell_count.nexus_mod_id);
} }
info!("dumped page {}", page);
page += 1; page += 1;
} }
info!("writing {} mod cell counts to {}", counts.len(), path); info!("writing {} mod cell counts to {}", counts.len(), path);
let mut file = File::create(path)?; let mut file = File::create(path).await?;
write!(file, "{}", serde_json::to_string(&counts)?)?; file.write_all(serde_json::to_string(&counts)?.as_bytes())
.await?;
Ok(()) Ok(())
} }

View File

@ -1,31 +1,42 @@
use anyhow::Result; use anyhow::Result;
use chrono::NaiveDateTime; use chrono::NaiveDateTime;
use sqlx::postgres::PgPoolOptions;
use std::collections::HashMap; use std::collections::HashMap;
use std::fs::File; use std::env;
use std::io::Write;
use std::path::Path; use std::path::Path;
use tokio::fs::File;
use tokio::io::AsyncWriteExt;
use tracing::{debug, info}; use tracing::{debug, info};
use crate::models::game; use crate::models::game;
use crate::models::game_mod; use crate::models::game_mod;
pub async fn dump_mod_data( pub async fn dump_mod_data(dir: &str, updated_after: Option<NaiveDateTime>) -> Result<()> {
pool: &sqlx::Pool<sqlx::Postgres>, let mut pool = PgPoolOptions::new()
dir: &str, .max_connections(5)
updated_after: Option<NaiveDateTime>, .connect(&env::var("DATABASE_URL")?)
) -> Result<()> { .await?;
let mut mod_count = 0; let mut mod_count = 0;
let mut page = 1; let mut page = 1;
let page_size = 20; let page_size = 20;
let mut last_id = None; let mut last_id = None;
let game_id_to_name: HashMap<_, _> = game::get_all(pool) let game_id_to_name: HashMap<_, _> = game::get_all(&pool)
.await? .await?
.into_iter() .into_iter()
.map(|game| (game.id, game.name)) .map(|game| (game.id, game.name))
.collect(); .collect();
loop { loop {
if page % 5 == 0 {
// There's a weird issue that slows down this query after 5 iterations. Recreating the
// connection pool seems to fix it. I don't know why.
info!("reconnecting to database");
pool = PgPoolOptions::new()
.max_connections(5)
.connect(&env::var("DATABASE_URL")?)
.await?;
}
let mods = game_mod::batched_get_with_cells_and_files( let mods = game_mod::batched_get_with_cells_and_files(
pool, &pool,
page_size, page_size,
last_id, last_id,
"Skyrim.esm", "Skyrim.esm",
@ -50,11 +61,13 @@ pub async fn dump_mod_data(
"dumping mod data to {}", "dumping mod data to {}",
path.display() path.display()
); );
let mut file = File::create(path)?; let mut file = File::create(path).await?;
write!(file, "{}", serde_json::to_string(&mod_with_cells)?)?; file.write_all(serde_json::to_string(&mod_with_cells)?.as_bytes())
.await?;
last_id = Some(mod_with_cells.id); last_id = Some(mod_with_cells.id);
mod_count += 1; mod_count += 1;
} }
info!("dumped page {}", page);
page += 1; page += 1;
} }
info!("dumped {} mod data files", mod_count); info!("dumped {} mod data files", mod_count);

View File

@ -1,7 +1,9 @@
use anyhow::Result; use anyhow::Result;
use serde::Serialize; use serde::Serialize;
use std::fs::File; use sqlx::postgres::PgPoolOptions;
use std::io::Write; use std::env;
use tokio::fs::File;
use tokio::io::AsyncWriteExt;
use tracing::{debug, info}; use tracing::{debug, info};
use crate::models::game; use crate::models::game;
@ -13,18 +15,27 @@ struct ModForSearchIdTranslated {
id: i32, id: i32,
} }
pub async fn dump_mod_search_index( pub async fn dump_mod_search_index(game: &str, path: &str) -> Result<()> {
pool: &sqlx::Pool<sqlx::Postgres>, let mut pool = PgPoolOptions::new()
game: &str, .max_connections(5)
path: &str, .connect(&env::var("DATABASE_URL")?)
) -> Result<()> { .await?;
let mut page = 1; let mut page = 1;
let mut search_index = vec![]; let mut search_index = vec![];
let page_size = 20; let page_size = 20;
let mut last_id = None; let mut last_id = None;
let game_id = game::get_id_by_name(pool, game).await?; let game_id = game::get_id_by_name(&pool, game).await?;
loop { loop {
let mods = game_mod::batched_get_for_search(pool, game_id, page_size, last_id).await?; if page % 5 == 0 {
// There's a weird issue that slows down this query after 5 iterations. Recreating the
// connection pool seems to fix it. I don't know why.
info!("reconnecting to database");
pool = PgPoolOptions::new()
.max_connections(5)
.connect(&env::var("DATABASE_URL")?)
.await?;
}
let mods = game_mod::batched_get_for_search(&pool, game_id, page_size, last_id).await?;
if mods.is_empty() { if mods.is_empty() {
break; break;
} }
@ -47,7 +58,8 @@ pub async fn dump_mod_search_index(
search_index.len(), search_index.len(),
path path
); );
let mut file = File::create(path)?; let mut file = File::create(path).await?;
write!(file, "{}", serde_json::to_string(&search_index)?)?; file.write_all(serde_json::to_string(&search_index)?.as_bytes())
.await?;
Ok(()) Ok(())
} }

View File

@ -1,24 +1,36 @@
use anyhow::Result; use anyhow::Result;
use chrono::NaiveDateTime; use chrono::NaiveDateTime;
use std::fs::{create_dir_all, File}; use sqlx::postgres::PgPoolOptions;
use std::io::Write; use std::env;
use std::fs::create_dir_all;
use std::path::Path; use std::path::Path;
use tokio::fs::File;
use tokio::io::AsyncWriteExt;
use tracing::{debug, info}; use tracing::{debug, info};
use crate::models::{format_radix, plugin}; use crate::models::{format_radix, plugin};
pub async fn dump_plugin_data( pub async fn dump_plugin_data(dir: &str, updated_after: Option<NaiveDateTime>) -> Result<()> {
pool: &sqlx::Pool<sqlx::Postgres>, let mut pool = PgPoolOptions::new()
dir: &str, .max_connections(5)
updated_after: Option<NaiveDateTime>, .connect(&env::var("DATABASE_URL")?)
) -> Result<()> { .await?;
let mut plugin_count = 0; let mut plugin_count = 0;
let mut page: u32 = 1; let mut page: u32 = 1;
let page_size = 20; let page_size = 20;
let mut last_hash = None; let mut last_hash = None;
loop { loop {
if page % 5 == 0 {
// There's a weird issue that slows down this query after 5 iterations. Recreating the
// connection pool seems to fix it. I don't know why.
info!("reconnecting to database");
pool = PgPoolOptions::new()
.max_connections(5)
.connect(&env::var("DATABASE_URL")?)
.await?;
}
let plugins = plugin::batched_get_by_hash_with_mods( let plugins = plugin::batched_get_by_hash_with_mods(
pool, &pool,
page_size, page_size,
last_hash, last_hash,
"Skyrim.esm", "Skyrim.esm",
@ -39,12 +51,13 @@ pub async fn dump_plugin_data(
"dumping plugin data to {}", "dumping plugin data to {}",
path.display() path.display()
); );
let mut file = File::create(path)?; let mut file = File::create(path).await?;
let json_val = serde_json::to_string(&plugin)?; let json_val = serde_json::to_string(&plugin)?;
write!(file, "{}", json_val)?; file.write_all(json_val.as_bytes()).await?;
last_hash = Some(plugin.hash); last_hash = Some(plugin.hash);
plugin_count += 1; plugin_count += 1;
} }
info!("dumped page {}", page);
page += 1; page += 1;
} }
info!("dumped {} plugin data files", plugin_count); info!("dumped {} plugin data files", plugin_count);

View File

@ -120,22 +120,22 @@ pub async fn main() -> Result<()> {
.await; .await;
} }
if let Some(dir) = args.cell_data { if let Some(dir) = args.cell_data {
return dump_cell_data(&pool, &dir).await; return dump_cell_data(&dir).await;
} }
if let Some(dir) = args.mod_data { if let Some(dir) = args.mod_data {
return dump_mod_data(&pool, &dir, args.updated_after).await; return dump_mod_data(&dir, args.updated_after).await;
} }
if let Some(path) = args.mod_search_index { if let Some(path) = args.mod_search_index {
return dump_mod_search_index(&pool, &args.game, &path).await; return dump_mod_search_index(&args.game, &path).await;
} }
if let Some(path) = args.mod_cell_counts { if let Some(path) = args.mod_cell_counts {
return dump_mod_cell_counts(&pool, &path).await; return dump_mod_cell_counts(&path).await;
} }
if let Some(path) = args.plugin_data { if let Some(path) = args.plugin_data {
return dump_plugin_data(&pool, &path, args.updated_after).await; return dump_plugin_data(&path, args.updated_after).await;
} }
if let Some(path) = args.file_data { if let Some(path) = args.file_data {
return dump_file_data(&pool, &path, args.updated_after).await; return dump_file_data(&path, args.updated_after).await;
} }
if let Some(path) = args.game_data { if let Some(path) = args.game_data {
return dump_games(&pool, &path).await; return dump_games(&pool, &path).await;