Recreate pg pool connection in all dump commands

Since the slow query issue seems to be affecting all dump commands that
run queries in loops.
This commit is contained in:
Tyler Hallada 2023-11-15 18:35:48 -05:00
parent 8cb9bfa4ca
commit 7579db364b
9 changed files with 155 additions and 71 deletions

View File

@ -4,10 +4,9 @@
/// rows referencing the duplicate cells are updated to reference the chosen cell.
use anyhow::Result;
use serde::{Deserialize, Serialize};
use sqlx::postgres::PgDatabaseError;
use sqlx::types::Json;
use sqlx::FromRow;
use tracing::{info, warn};
use tracing::info;
const PAGE_SIZE: i64 = 100;

View File

@ -47,8 +47,7 @@ pub async fn backfill_is_base_game(pool: &sqlx::Pool<sqlx::Postgres>) -> Result<
} else {
None
};
let (form_id, master) =
get_local_form_id_and_master(cell.form_id, &masters, file_name)
let (form_id, master) = get_local_form_id_and_master(cell.form_id, &masters, file_name)
.expect("form_id is a valid i32");
UnsavedCell {
form_id,

View File

@ -1,16 +1,32 @@
use anyhow::Result;
use std::fs::{create_dir_all, File};
use std::io::Write;
use sqlx::postgres::PgPoolOptions;
use std::env;
use std::fs::create_dir_all;
use std::path::Path;
use tokio::fs::File;
use tokio::io::AsyncWriteExt;
use tracing::{debug, info};
use crate::models::cell;
pub async fn dump_cell_data(pool: &sqlx::Pool<sqlx::Postgres>, dir: &str) -> Result<()> {
pub async fn dump_cell_data(dir: &str) -> Result<()> {
let mut pool = PgPoolOptions::new()
.max_connections(5)
.connect(&env::var("DATABASE_URL")?)
.await?;
let mut cell_count = 0;
for x in -77..75 {
for y in -50..44 {
if let Ok(data) = cell::get_cell_data(pool, "Skyrim.esm", 1, x, y, true).await {
if cell_count % 5 == 0 {
// There's a weird issue that slows down this query after 5 iterations. Recreating the
// connection pool seems to fix it. I don't know why.
info!("reconnecting to database");
pool = PgPoolOptions::new()
.max_connections(5)
.connect(&env::var("DATABASE_URL")?)
.await?;
}
if let Ok(data) = cell::get_cell_data(&pool, "Skyrim.esm", 1, x, y, true).await {
let path = format!("{}/{}", &dir, x);
let path = Path::new(&path);
create_dir_all(path)?;
@ -22,11 +38,13 @@ pub async fn dump_cell_data(pool: &sqlx::Pool<sqlx::Postgres>, dir: &str) -> Res
"dumping cell data to {}",
path.display()
);
let mut file = File::create(path)?;
write!(file, "{}", serde_json::to_string(&data)?)?;
let mut file = File::create(path).await?;
file.write_all(serde_json::to_string(&data)?.as_bytes())
.await?;
cell_count += 1;
}
}
info!("dumped all rows in x: {}", x);
}
info!("dumped {} cell data files", cell_count);
Ok(())

View File

@ -1,24 +1,35 @@
use anyhow::Result;
use chrono::NaiveDateTime;
use std::fs::File;
use std::io::Write;
use sqlx::postgres::PgPoolOptions;
use std::env;
use std::path::Path;
use tokio::fs::File;
use tokio::io::AsyncWriteExt;
use tracing::{debug, info};
use crate::models::file;
pub async fn dump_file_data(
pool: &sqlx::Pool<sqlx::Postgres>,
dir: &str,
updated_after: Option<NaiveDateTime>,
) -> Result<()> {
pub async fn dump_file_data(dir: &str, updated_after: Option<NaiveDateTime>) -> Result<()> {
let mut pool = PgPoolOptions::new()
.max_connections(5)
.connect(&env::var("DATABASE_URL")?)
.await?;
let mut file_count = 0;
let mut page = 1;
let page_size = 20;
let mut last_id = None;
loop {
if page % 5 == 0 {
// There's a weird issue that slows down this query after 5 iterations. Recreating the
// connection pool seems to fix it. I don't know why.
info!("reconnecting to database");
pool = PgPoolOptions::new()
.max_connections(5)
.connect(&env::var("DATABASE_URL")?)
.await?;
}
let files =
file::batched_get_with_cells(pool, page_size, last_id, "Skyrim.esm", 1, updated_after)
file::batched_get_with_cells(&pool, page_size, last_id, "Skyrim.esm", 1, updated_after)
.await?;
if files.is_empty() {
break;
@ -33,11 +44,13 @@ pub async fn dump_file_data(
"dumping file data to {}",
path.display()
);
let mut file = File::create(path)?;
write!(file, "{}", serde_json::to_string(&file_with_cells)?)?;
let mut file = File::create(path).await?;
file.write_all(serde_json::to_string(&file_with_cells)?.as_bytes())
.await?;
last_id = Some(file_with_cells.id);
file_count += 1;
}
info!("dumped page {}", page);
page += 1;
}
info!("dumped {} file data files", file_count);

View File

@ -1,19 +1,34 @@
use anyhow::Result;
use sqlx::postgres::PgPoolOptions;
use std::collections::HashMap;
use std::fs::File;
use std::io::Write;
use std::env;
use tokio::fs::File;
use tokio::io::AsyncWriteExt;
use tracing::{debug, info};
use crate::models::game_mod;
pub async fn dump_mod_cell_counts(pool: &sqlx::Pool<sqlx::Postgres>, path: &str) -> Result<()> {
pub async fn dump_mod_cell_counts(path: &str) -> Result<()> {
let mut pool = PgPoolOptions::new()
.max_connections(5)
.connect(&env::var("DATABASE_URL")?)
.await?;
let mut page = 1;
let page_size = 100;
let mut last_id = None;
let mut counts = HashMap::new();
loop {
if page % 5 == 0 {
// There's a weird issue that slows down this query after 5 iterations. Recreating the
// connection pool seems to fix it. I don't know why.
info!("reconnecting to database");
pool = PgPoolOptions::new()
.max_connections(5)
.connect(&env::var("DATABASE_URL")?)
.await?;
}
let mod_cell_counts =
game_mod::batched_get_cell_counts(pool, page_size, last_id, "Skyrim.esm", 1).await?;
game_mod::batched_get_cell_counts(&pool, page_size, last_id, "Skyrim.esm", 1).await?;
if mod_cell_counts.is_empty() {
break;
}
@ -27,10 +42,12 @@ pub async fn dump_mod_cell_counts(pool: &sqlx::Pool<sqlx::Postgres>, path: &str)
counts.insert(mod_cell_count.nexus_mod_id, mod_cell_count.cells);
last_id = Some(mod_cell_count.nexus_mod_id);
}
info!("dumped page {}", page);
page += 1;
}
info!("writing {} mod cell counts to {}", counts.len(), path);
let mut file = File::create(path)?;
write!(file, "{}", serde_json::to_string(&counts)?)?;
let mut file = File::create(path).await?;
file.write_all(serde_json::to_string(&counts)?.as_bytes())
.await?;
Ok(())
}

View File

@ -1,31 +1,42 @@
use anyhow::Result;
use chrono::NaiveDateTime;
use sqlx::postgres::PgPoolOptions;
use std::collections::HashMap;
use std::fs::File;
use std::io::Write;
use std::env;
use std::path::Path;
use tokio::fs::File;
use tokio::io::AsyncWriteExt;
use tracing::{debug, info};
use crate::models::game;
use crate::models::game_mod;
pub async fn dump_mod_data(
pool: &sqlx::Pool<sqlx::Postgres>,
dir: &str,
updated_after: Option<NaiveDateTime>,
) -> Result<()> {
pub async fn dump_mod_data(dir: &str, updated_after: Option<NaiveDateTime>) -> Result<()> {
let mut pool = PgPoolOptions::new()
.max_connections(5)
.connect(&env::var("DATABASE_URL")?)
.await?;
let mut mod_count = 0;
let mut page = 1;
let page_size = 20;
let mut last_id = None;
let game_id_to_name: HashMap<_, _> = game::get_all(pool)
let game_id_to_name: HashMap<_, _> = game::get_all(&pool)
.await?
.into_iter()
.map(|game| (game.id, game.name))
.collect();
loop {
if page % 5 == 0 {
// There's a weird issue that slows down this query after 5 iterations. Recreating the
// connection pool seems to fix it. I don't know why.
info!("reconnecting to database");
pool = PgPoolOptions::new()
.max_connections(5)
.connect(&env::var("DATABASE_URL")?)
.await?;
}
let mods = game_mod::batched_get_with_cells_and_files(
pool,
&pool,
page_size,
last_id,
"Skyrim.esm",
@ -50,11 +61,13 @@ pub async fn dump_mod_data(
"dumping mod data to {}",
path.display()
);
let mut file = File::create(path)?;
write!(file, "{}", serde_json::to_string(&mod_with_cells)?)?;
let mut file = File::create(path).await?;
file.write_all(serde_json::to_string(&mod_with_cells)?.as_bytes())
.await?;
last_id = Some(mod_with_cells.id);
mod_count += 1;
}
info!("dumped page {}", page);
page += 1;
}
info!("dumped {} mod data files", mod_count);

View File

@ -1,7 +1,9 @@
use anyhow::Result;
use serde::Serialize;
use std::fs::File;
use std::io::Write;
use sqlx::postgres::PgPoolOptions;
use std::env;
use tokio::fs::File;
use tokio::io::AsyncWriteExt;
use tracing::{debug, info};
use crate::models::game;
@ -13,18 +15,27 @@ struct ModForSearchIdTranslated {
id: i32,
}
pub async fn dump_mod_search_index(
pool: &sqlx::Pool<sqlx::Postgres>,
game: &str,
path: &str,
) -> Result<()> {
pub async fn dump_mod_search_index(game: &str, path: &str) -> Result<()> {
let mut pool = PgPoolOptions::new()
.max_connections(5)
.connect(&env::var("DATABASE_URL")?)
.await?;
let mut page = 1;
let mut search_index = vec![];
let page_size = 20;
let mut last_id = None;
let game_id = game::get_id_by_name(pool, game).await?;
let game_id = game::get_id_by_name(&pool, game).await?;
loop {
let mods = game_mod::batched_get_for_search(pool, game_id, page_size, last_id).await?;
if page % 5 == 0 {
// There's a weird issue that slows down this query after 5 iterations. Recreating the
// connection pool seems to fix it. I don't know why.
info!("reconnecting to database");
pool = PgPoolOptions::new()
.max_connections(5)
.connect(&env::var("DATABASE_URL")?)
.await?;
}
let mods = game_mod::batched_get_for_search(&pool, game_id, page_size, last_id).await?;
if mods.is_empty() {
break;
}
@ -47,7 +58,8 @@ pub async fn dump_mod_search_index(
search_index.len(),
path
);
let mut file = File::create(path)?;
write!(file, "{}", serde_json::to_string(&search_index)?)?;
let mut file = File::create(path).await?;
file.write_all(serde_json::to_string(&search_index)?.as_bytes())
.await?;
Ok(())
}

View File

@ -1,24 +1,36 @@
use anyhow::Result;
use chrono::NaiveDateTime;
use std::fs::{create_dir_all, File};
use std::io::Write;
use sqlx::postgres::PgPoolOptions;
use std::env;
use std::fs::create_dir_all;
use std::path::Path;
use tokio::fs::File;
use tokio::io::AsyncWriteExt;
use tracing::{debug, info};
use crate::models::{format_radix, plugin};
pub async fn dump_plugin_data(
pool: &sqlx::Pool<sqlx::Postgres>,
dir: &str,
updated_after: Option<NaiveDateTime>,
) -> Result<()> {
pub async fn dump_plugin_data(dir: &str, updated_after: Option<NaiveDateTime>) -> Result<()> {
let mut pool = PgPoolOptions::new()
.max_connections(5)
.connect(&env::var("DATABASE_URL")?)
.await?;
let mut plugin_count = 0;
let mut page: u32 = 1;
let page_size = 20;
let mut last_hash = None;
loop {
if page % 5 == 0 {
// There's a weird issue that slows down this query after 5 iterations. Recreating the
// connection pool seems to fix it. I don't know why.
info!("reconnecting to database");
pool = PgPoolOptions::new()
.max_connections(5)
.connect(&env::var("DATABASE_URL")?)
.await?;
}
let plugins = plugin::batched_get_by_hash_with_mods(
pool,
&pool,
page_size,
last_hash,
"Skyrim.esm",
@ -39,12 +51,13 @@ pub async fn dump_plugin_data(
"dumping plugin data to {}",
path.display()
);
let mut file = File::create(path)?;
let mut file = File::create(path).await?;
let json_val = serde_json::to_string(&plugin)?;
write!(file, "{}", json_val)?;
file.write_all(json_val.as_bytes()).await?;
last_hash = Some(plugin.hash);
plugin_count += 1;
}
info!("dumped page {}", page);
page += 1;
}
info!("dumped {} plugin data files", plugin_count);

View File

@ -120,22 +120,22 @@ pub async fn main() -> Result<()> {
.await;
}
if let Some(dir) = args.cell_data {
return dump_cell_data(&pool, &dir).await;
return dump_cell_data(&dir).await;
}
if let Some(dir) = args.mod_data {
return dump_mod_data(&pool, &dir, args.updated_after).await;
return dump_mod_data(&dir, args.updated_after).await;
}
if let Some(path) = args.mod_search_index {
return dump_mod_search_index(&pool, &args.game, &path).await;
return dump_mod_search_index(&args.game, &path).await;
}
if let Some(path) = args.mod_cell_counts {
return dump_mod_cell_counts(&pool, &path).await;
return dump_mod_cell_counts(&path).await;
}
if let Some(path) = args.plugin_data {
return dump_plugin_data(&pool, &path, args.updated_after).await;
return dump_plugin_data(&path, args.updated_after).await;
}
if let Some(path) = args.file_data {
return dump_file_data(&pool, &path, args.updated_after).await;
return dump_file_data(&path, args.updated_after).await;
}
if let Some(path) = args.game_data {
return dump_games(&pool, &path).await;