Recreate pg pool connection in all dump commands
Since the slow query issue seems to be affecting all dump commands that run queries in loops.
This commit is contained in:
parent
8cb9bfa4ca
commit
7579db364b
@ -4,10 +4,9 @@
|
||||
/// rows referencing the duplicate cells are updated to reference the chosen cell.
|
||||
use anyhow::Result;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use sqlx::postgres::PgDatabaseError;
|
||||
use sqlx::types::Json;
|
||||
use sqlx::FromRow;
|
||||
use tracing::{info, warn};
|
||||
use tracing::info;
|
||||
|
||||
const PAGE_SIZE: i64 = 100;
|
||||
|
||||
@ -84,10 +83,10 @@ pub async fn deduplicate_interior_cells(pool: &sqlx::Pool<sqlx::Postgres>) -> Re
|
||||
// plugins that have multiple cells with the same form_id. For these duplicate
|
||||
// plugin_cells with the same plugin_id, I just arbitrarily choose one and delete
|
||||
// the others (since it's undefined behavior of which duplicate record should "win"
|
||||
// out in this case anyways). In the case of exterior cells, where the duplicate
|
||||
// interior cell bug is not a problem, the last processed cell record in the plugin
|
||||
// wins since `process_plugin` uses an upsert method which updates existing
|
||||
// `plugin_cells` if it tries to insert a new one that conflicts with an existing one.
|
||||
// out in this case anyways). In the case of exterior cells, where the duplicate
|
||||
// interior cell bug is not a problem, the last processed cell record in the plugin
|
||||
// wins since `process_plugin` uses an upsert method which updates existing
|
||||
// `plugin_cells` if it tries to insert a new one that conflicts with an existing one.
|
||||
// So I am effectively retroactively doing the same here for interior cells.
|
||||
let plugin_cells_delete = sqlx::query!(
|
||||
r#"DELETE FROM plugin_cells
|
||||
|
@ -47,9 +47,8 @@ pub async fn backfill_is_base_game(pool: &sqlx::Pool<sqlx::Postgres>) -> Result<
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let (form_id, master) =
|
||||
get_local_form_id_and_master(cell.form_id, &masters, file_name)
|
||||
.expect("form_id is a valid i32");
|
||||
let (form_id, master) = get_local_form_id_and_master(cell.form_id, &masters, file_name)
|
||||
.expect("form_id is a valid i32");
|
||||
UnsavedCell {
|
||||
form_id,
|
||||
master,
|
||||
@ -63,8 +62,8 @@ pub async fn backfill_is_base_game(pool: &sqlx::Pool<sqlx::Postgres>) -> Result<
|
||||
.collect();
|
||||
let db_cells = cell::batched_insert(pool, &base_cells).await?;
|
||||
info!("Upserted {} Skyrim.esm base cells", db_cells.len());
|
||||
// This works for exterior cells, but there's a bug with the unique index on cells that
|
||||
// creates duplicate interior cells. To fix that, I need to upgrade postgres to
|
||||
// This works for exterior cells, but there's a bug with the unique index on cells that
|
||||
// creates duplicate interior cells. To fix that, I need to upgrade postgres to
|
||||
// 15 or later, migate the data to the new db cluster, consolidate all of the duplicate cells
|
||||
// into one cell in a separate backfill command, then fix the unique index.
|
||||
Ok(())
|
||||
|
@ -1,16 +1,32 @@
|
||||
use anyhow::Result;
|
||||
use std::fs::{create_dir_all, File};
|
||||
use std::io::Write;
|
||||
use sqlx::postgres::PgPoolOptions;
|
||||
use std::env;
|
||||
use std::fs::create_dir_all;
|
||||
use std::path::Path;
|
||||
use tokio::fs::File;
|
||||
use tokio::io::AsyncWriteExt;
|
||||
use tracing::{debug, info};
|
||||
|
||||
use crate::models::cell;
|
||||
|
||||
pub async fn dump_cell_data(pool: &sqlx::Pool<sqlx::Postgres>, dir: &str) -> Result<()> {
|
||||
pub async fn dump_cell_data(dir: &str) -> Result<()> {
|
||||
let mut pool = PgPoolOptions::new()
|
||||
.max_connections(5)
|
||||
.connect(&env::var("DATABASE_URL")?)
|
||||
.await?;
|
||||
let mut cell_count = 0;
|
||||
for x in -77..75 {
|
||||
for y in -50..44 {
|
||||
if let Ok(data) = cell::get_cell_data(pool, "Skyrim.esm", 1, x, y, true).await {
|
||||
if cell_count % 5 == 0 {
|
||||
// There's a weird issue that slows down this query after 5 iterations. Recreating the
|
||||
// connection pool seems to fix it. I don't know why.
|
||||
info!("reconnecting to database");
|
||||
pool = PgPoolOptions::new()
|
||||
.max_connections(5)
|
||||
.connect(&env::var("DATABASE_URL")?)
|
||||
.await?;
|
||||
}
|
||||
if let Ok(data) = cell::get_cell_data(&pool, "Skyrim.esm", 1, x, y, true).await {
|
||||
let path = format!("{}/{}", &dir, x);
|
||||
let path = Path::new(&path);
|
||||
create_dir_all(path)?;
|
||||
@ -22,11 +38,13 @@ pub async fn dump_cell_data(pool: &sqlx::Pool<sqlx::Postgres>, dir: &str) -> Res
|
||||
"dumping cell data to {}",
|
||||
path.display()
|
||||
);
|
||||
let mut file = File::create(path)?;
|
||||
write!(file, "{}", serde_json::to_string(&data)?)?;
|
||||
let mut file = File::create(path).await?;
|
||||
file.write_all(serde_json::to_string(&data)?.as_bytes())
|
||||
.await?;
|
||||
cell_count += 1;
|
||||
}
|
||||
}
|
||||
info!("dumped all rows in x: {}", x);
|
||||
}
|
||||
info!("dumped {} cell data files", cell_count);
|
||||
Ok(())
|
||||
|
@ -1,24 +1,35 @@
|
||||
use anyhow::Result;
|
||||
use chrono::NaiveDateTime;
|
||||
use std::fs::File;
|
||||
use std::io::Write;
|
||||
use sqlx::postgres::PgPoolOptions;
|
||||
use std::env;
|
||||
use std::path::Path;
|
||||
use tokio::fs::File;
|
||||
use tokio::io::AsyncWriteExt;
|
||||
use tracing::{debug, info};
|
||||
|
||||
use crate::models::file;
|
||||
|
||||
pub async fn dump_file_data(
|
||||
pool: &sqlx::Pool<sqlx::Postgres>,
|
||||
dir: &str,
|
||||
updated_after: Option<NaiveDateTime>,
|
||||
) -> Result<()> {
|
||||
pub async fn dump_file_data(dir: &str, updated_after: Option<NaiveDateTime>) -> Result<()> {
|
||||
let mut pool = PgPoolOptions::new()
|
||||
.max_connections(5)
|
||||
.connect(&env::var("DATABASE_URL")?)
|
||||
.await?;
|
||||
let mut file_count = 0;
|
||||
let mut page = 1;
|
||||
let page_size = 20;
|
||||
let mut last_id = None;
|
||||
loop {
|
||||
if page % 5 == 0 {
|
||||
// There's a weird issue that slows down this query after 5 iterations. Recreating the
|
||||
// connection pool seems to fix it. I don't know why.
|
||||
info!("reconnecting to database");
|
||||
pool = PgPoolOptions::new()
|
||||
.max_connections(5)
|
||||
.connect(&env::var("DATABASE_URL")?)
|
||||
.await?;
|
||||
}
|
||||
let files =
|
||||
file::batched_get_with_cells(pool, page_size, last_id, "Skyrim.esm", 1, updated_after)
|
||||
file::batched_get_with_cells(&pool, page_size, last_id, "Skyrim.esm", 1, updated_after)
|
||||
.await?;
|
||||
if files.is_empty() {
|
||||
break;
|
||||
@ -33,11 +44,13 @@ pub async fn dump_file_data(
|
||||
"dumping file data to {}",
|
||||
path.display()
|
||||
);
|
||||
let mut file = File::create(path)?;
|
||||
write!(file, "{}", serde_json::to_string(&file_with_cells)?)?;
|
||||
let mut file = File::create(path).await?;
|
||||
file.write_all(serde_json::to_string(&file_with_cells)?.as_bytes())
|
||||
.await?;
|
||||
last_id = Some(file_with_cells.id);
|
||||
file_count += 1;
|
||||
}
|
||||
info!("dumped page {}", page);
|
||||
page += 1;
|
||||
}
|
||||
info!("dumped {} file data files", file_count);
|
||||
|
@ -1,19 +1,34 @@
|
||||
use anyhow::Result;
|
||||
use sqlx::postgres::PgPoolOptions;
|
||||
use std::collections::HashMap;
|
||||
use std::fs::File;
|
||||
use std::io::Write;
|
||||
use std::env;
|
||||
use tokio::fs::File;
|
||||
use tokio::io::AsyncWriteExt;
|
||||
use tracing::{debug, info};
|
||||
|
||||
use crate::models::game_mod;
|
||||
|
||||
pub async fn dump_mod_cell_counts(pool: &sqlx::Pool<sqlx::Postgres>, path: &str) -> Result<()> {
|
||||
pub async fn dump_mod_cell_counts(path: &str) -> Result<()> {
|
||||
let mut pool = PgPoolOptions::new()
|
||||
.max_connections(5)
|
||||
.connect(&env::var("DATABASE_URL")?)
|
||||
.await?;
|
||||
let mut page = 1;
|
||||
let page_size = 100;
|
||||
let mut last_id = None;
|
||||
let mut counts = HashMap::new();
|
||||
loop {
|
||||
if page % 5 == 0 {
|
||||
// There's a weird issue that slows down this query after 5 iterations. Recreating the
|
||||
// connection pool seems to fix it. I don't know why.
|
||||
info!("reconnecting to database");
|
||||
pool = PgPoolOptions::new()
|
||||
.max_connections(5)
|
||||
.connect(&env::var("DATABASE_URL")?)
|
||||
.await?;
|
||||
}
|
||||
let mod_cell_counts =
|
||||
game_mod::batched_get_cell_counts(pool, page_size, last_id, "Skyrim.esm", 1).await?;
|
||||
game_mod::batched_get_cell_counts(&pool, page_size, last_id, "Skyrim.esm", 1).await?;
|
||||
if mod_cell_counts.is_empty() {
|
||||
break;
|
||||
}
|
||||
@ -27,10 +42,12 @@ pub async fn dump_mod_cell_counts(pool: &sqlx::Pool<sqlx::Postgres>, path: &str)
|
||||
counts.insert(mod_cell_count.nexus_mod_id, mod_cell_count.cells);
|
||||
last_id = Some(mod_cell_count.nexus_mod_id);
|
||||
}
|
||||
info!("dumped page {}", page);
|
||||
page += 1;
|
||||
}
|
||||
info!("writing {} mod cell counts to {}", counts.len(), path);
|
||||
let mut file = File::create(path)?;
|
||||
write!(file, "{}", serde_json::to_string(&counts)?)?;
|
||||
let mut file = File::create(path).await?;
|
||||
file.write_all(serde_json::to_string(&counts)?.as_bytes())
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
|
@ -1,31 +1,42 @@
|
||||
use anyhow::Result;
|
||||
use chrono::NaiveDateTime;
|
||||
use sqlx::postgres::PgPoolOptions;
|
||||
use std::collections::HashMap;
|
||||
use std::fs::File;
|
||||
use std::io::Write;
|
||||
use std::env;
|
||||
use std::path::Path;
|
||||
use tokio::fs::File;
|
||||
use tokio::io::AsyncWriteExt;
|
||||
use tracing::{debug, info};
|
||||
|
||||
use crate::models::game;
|
||||
use crate::models::game_mod;
|
||||
|
||||
pub async fn dump_mod_data(
|
||||
pool: &sqlx::Pool<sqlx::Postgres>,
|
||||
dir: &str,
|
||||
updated_after: Option<NaiveDateTime>,
|
||||
) -> Result<()> {
|
||||
pub async fn dump_mod_data(dir: &str, updated_after: Option<NaiveDateTime>) -> Result<()> {
|
||||
let mut pool = PgPoolOptions::new()
|
||||
.max_connections(5)
|
||||
.connect(&env::var("DATABASE_URL")?)
|
||||
.await?;
|
||||
let mut mod_count = 0;
|
||||
let mut page = 1;
|
||||
let page_size = 20;
|
||||
let mut last_id = None;
|
||||
let game_id_to_name: HashMap<_, _> = game::get_all(pool)
|
||||
let game_id_to_name: HashMap<_, _> = game::get_all(&pool)
|
||||
.await?
|
||||
.into_iter()
|
||||
.map(|game| (game.id, game.name))
|
||||
.collect();
|
||||
loop {
|
||||
if page % 5 == 0 {
|
||||
// There's a weird issue that slows down this query after 5 iterations. Recreating the
|
||||
// connection pool seems to fix it. I don't know why.
|
||||
info!("reconnecting to database");
|
||||
pool = PgPoolOptions::new()
|
||||
.max_connections(5)
|
||||
.connect(&env::var("DATABASE_URL")?)
|
||||
.await?;
|
||||
}
|
||||
let mods = game_mod::batched_get_with_cells_and_files(
|
||||
pool,
|
||||
&pool,
|
||||
page_size,
|
||||
last_id,
|
||||
"Skyrim.esm",
|
||||
@ -50,11 +61,13 @@ pub async fn dump_mod_data(
|
||||
"dumping mod data to {}",
|
||||
path.display()
|
||||
);
|
||||
let mut file = File::create(path)?;
|
||||
write!(file, "{}", serde_json::to_string(&mod_with_cells)?)?;
|
||||
let mut file = File::create(path).await?;
|
||||
file.write_all(serde_json::to_string(&mod_with_cells)?.as_bytes())
|
||||
.await?;
|
||||
last_id = Some(mod_with_cells.id);
|
||||
mod_count += 1;
|
||||
}
|
||||
info!("dumped page {}", page);
|
||||
page += 1;
|
||||
}
|
||||
info!("dumped {} mod data files", mod_count);
|
||||
|
@ -1,7 +1,9 @@
|
||||
use anyhow::Result;
|
||||
use serde::Serialize;
|
||||
use std::fs::File;
|
||||
use std::io::Write;
|
||||
use sqlx::postgres::PgPoolOptions;
|
||||
use std::env;
|
||||
use tokio::fs::File;
|
||||
use tokio::io::AsyncWriteExt;
|
||||
use tracing::{debug, info};
|
||||
|
||||
use crate::models::game;
|
||||
@ -13,18 +15,27 @@ struct ModForSearchIdTranslated {
|
||||
id: i32,
|
||||
}
|
||||
|
||||
pub async fn dump_mod_search_index(
|
||||
pool: &sqlx::Pool<sqlx::Postgres>,
|
||||
game: &str,
|
||||
path: &str,
|
||||
) -> Result<()> {
|
||||
pub async fn dump_mod_search_index(game: &str, path: &str) -> Result<()> {
|
||||
let mut pool = PgPoolOptions::new()
|
||||
.max_connections(5)
|
||||
.connect(&env::var("DATABASE_URL")?)
|
||||
.await?;
|
||||
let mut page = 1;
|
||||
let mut search_index = vec![];
|
||||
let page_size = 20;
|
||||
let mut last_id = None;
|
||||
let game_id = game::get_id_by_name(pool, game).await?;
|
||||
let game_id = game::get_id_by_name(&pool, game).await?;
|
||||
loop {
|
||||
let mods = game_mod::batched_get_for_search(pool, game_id, page_size, last_id).await?;
|
||||
if page % 5 == 0 {
|
||||
// There's a weird issue that slows down this query after 5 iterations. Recreating the
|
||||
// connection pool seems to fix it. I don't know why.
|
||||
info!("reconnecting to database");
|
||||
pool = PgPoolOptions::new()
|
||||
.max_connections(5)
|
||||
.connect(&env::var("DATABASE_URL")?)
|
||||
.await?;
|
||||
}
|
||||
let mods = game_mod::batched_get_for_search(&pool, game_id, page_size, last_id).await?;
|
||||
if mods.is_empty() {
|
||||
break;
|
||||
}
|
||||
@ -47,7 +58,8 @@ pub async fn dump_mod_search_index(
|
||||
search_index.len(),
|
||||
path
|
||||
);
|
||||
let mut file = File::create(path)?;
|
||||
write!(file, "{}", serde_json::to_string(&search_index)?)?;
|
||||
let mut file = File::create(path).await?;
|
||||
file.write_all(serde_json::to_string(&search_index)?.as_bytes())
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
|
@ -1,24 +1,36 @@
|
||||
use anyhow::Result;
|
||||
use chrono::NaiveDateTime;
|
||||
use std::fs::{create_dir_all, File};
|
||||
use std::io::Write;
|
||||
use sqlx::postgres::PgPoolOptions;
|
||||
use std::env;
|
||||
use std::fs::create_dir_all;
|
||||
use std::path::Path;
|
||||
use tokio::fs::File;
|
||||
use tokio::io::AsyncWriteExt;
|
||||
use tracing::{debug, info};
|
||||
|
||||
use crate::models::{format_radix, plugin};
|
||||
|
||||
pub async fn dump_plugin_data(
|
||||
pool: &sqlx::Pool<sqlx::Postgres>,
|
||||
dir: &str,
|
||||
updated_after: Option<NaiveDateTime>,
|
||||
) -> Result<()> {
|
||||
pub async fn dump_plugin_data(dir: &str, updated_after: Option<NaiveDateTime>) -> Result<()> {
|
||||
let mut pool = PgPoolOptions::new()
|
||||
.max_connections(5)
|
||||
.connect(&env::var("DATABASE_URL")?)
|
||||
.await?;
|
||||
let mut plugin_count = 0;
|
||||
let mut page: u32 = 1;
|
||||
let page_size = 20;
|
||||
let mut last_hash = None;
|
||||
loop {
|
||||
if page % 5 == 0 {
|
||||
// There's a weird issue that slows down this query after 5 iterations. Recreating the
|
||||
// connection pool seems to fix it. I don't know why.
|
||||
info!("reconnecting to database");
|
||||
pool = PgPoolOptions::new()
|
||||
.max_connections(5)
|
||||
.connect(&env::var("DATABASE_URL")?)
|
||||
.await?;
|
||||
}
|
||||
let plugins = plugin::batched_get_by_hash_with_mods(
|
||||
pool,
|
||||
&pool,
|
||||
page_size,
|
||||
last_hash,
|
||||
"Skyrim.esm",
|
||||
@ -39,12 +51,13 @@ pub async fn dump_plugin_data(
|
||||
"dumping plugin data to {}",
|
||||
path.display()
|
||||
);
|
||||
let mut file = File::create(path)?;
|
||||
let mut file = File::create(path).await?;
|
||||
let json_val = serde_json::to_string(&plugin)?;
|
||||
write!(file, "{}", json_val)?;
|
||||
file.write_all(json_val.as_bytes()).await?;
|
||||
last_hash = Some(plugin.hash);
|
||||
plugin_count += 1;
|
||||
}
|
||||
info!("dumped page {}", page);
|
||||
page += 1;
|
||||
}
|
||||
info!("dumped {} plugin data files", plugin_count);
|
||||
|
12
src/main.rs
12
src/main.rs
@ -120,22 +120,22 @@ pub async fn main() -> Result<()> {
|
||||
.await;
|
||||
}
|
||||
if let Some(dir) = args.cell_data {
|
||||
return dump_cell_data(&pool, &dir).await;
|
||||
return dump_cell_data(&dir).await;
|
||||
}
|
||||
if let Some(dir) = args.mod_data {
|
||||
return dump_mod_data(&pool, &dir, args.updated_after).await;
|
||||
return dump_mod_data(&dir, args.updated_after).await;
|
||||
}
|
||||
if let Some(path) = args.mod_search_index {
|
||||
return dump_mod_search_index(&pool, &args.game, &path).await;
|
||||
return dump_mod_search_index(&args.game, &path).await;
|
||||
}
|
||||
if let Some(path) = args.mod_cell_counts {
|
||||
return dump_mod_cell_counts(&pool, &path).await;
|
||||
return dump_mod_cell_counts(&path).await;
|
||||
}
|
||||
if let Some(path) = args.plugin_data {
|
||||
return dump_plugin_data(&pool, &path, args.updated_after).await;
|
||||
return dump_plugin_data(&path, args.updated_after).await;
|
||||
}
|
||||
if let Some(path) = args.file_data {
|
||||
return dump_file_data(&pool, &path, args.updated_after).await;
|
||||
return dump_file_data(&path, args.updated_after).await;
|
||||
}
|
||||
if let Some(path) = args.game_data {
|
||||
return dump_games(&pool, &path).await;
|
||||
|
Loading…
Reference in New Issue
Block a user