Add timestamp to mods for some resumability

This commit is contained in:
Tyler Hallada 2021-07-29 00:50:42 -04:00
parent a8424e830e
commit fc5a9d39c2
5 changed files with 61 additions and 6 deletions

View File

@ -1,6 +1,6 @@
ALTER TABLE "files" ADD COLUMN "downloaded_at" TIMESTAMP(3); ALTER TABLE "files" ADD COLUMN "downloaded_at" TIMESTAMP(3);
/* Backfill existing columns the created_at timestamps. /* Backfill existing columns using the created_at timestamps.
* *
* This is approximate since usually the file was downloaded shortly after the record was created. * This is approximate since usually the file was downloaded shortly after the record was created.
* I mostly only care whether it is null or not null. All existing files need non-null values. * I mostly only care whether it is null or not null. All existing files need non-null values.

View File

@ -0,0 +1,8 @@
ALTER TABLE "mods" ADD COLUMN "last_updated_files_at" TIMESTAMP(3);
/* Backfill existing columns using the updated_at timestamps.
*
* This is approximate since usually the files are downloaded shortly after the mod record was created.
* I mostly only care whether it is null or not null. Most existing mods need non-null values.
*/
UPDATE "mods" SET "last_updated_files_at" = "updated_at";

View File

@ -6,6 +6,7 @@ use reqwest::StatusCode;
use skyrim_cell_dump::parse_plugin; use skyrim_cell_dump::parse_plugin;
use sqlx::postgres::PgPoolOptions; use sqlx::postgres::PgPoolOptions;
use std::borrow::Borrow; use std::borrow::Borrow;
use std::collections::HashSet;
use std::convert::TryInto; use std::convert::TryInto;
use std::env; use std::env;
use std::io::Seek; use std::io::Seek;
@ -210,7 +211,7 @@ pub async fn main() -> Result<()> {
let scraped = mod_list_resp.scrape_mods()?; let scraped = mod_list_resp.scrape_mods()?;
has_next_page = scraped.has_next_page; has_next_page = scraped.has_next_page;
let present_mods = game_mod::bulk_get_present_nexus_mod_ids( let processed_mods = game_mod::bulk_get_fully_processed_nexus_mod_ids(
&pool, &pool,
&scraped &scraped
.mods .mods
@ -222,7 +223,7 @@ pub async fn main() -> Result<()> {
let mods_to_create: Vec<UnsavedMod> = scraped let mods_to_create: Vec<UnsavedMod> = scraped
.mods .mods
.iter() .iter()
.filter(|scraped_mod| !present_mods.contains(&scraped_mod.nexus_mod_id)) .filter(|scraped_mod| !processed_mods.contains(&scraped_mod.nexus_mod_id))
.map(|scraped_mod| UnsavedMod { .map(|scraped_mod| UnsavedMod {
name: scraped_mod.name, name: scraped_mod.name,
nexus_mod_id: scraped_mod.nexus_mod_id, nexus_mod_id: scraped_mod.nexus_mod_id,
@ -260,10 +261,21 @@ pub async fn main() -> Result<()> {
Some(_) => true, Some(_) => true,
}); });
let present_file_ids: HashSet<i32> =
file::get_nexus_file_ids_by_mod_id(&pool, db_mod.id)
.await?
.into_iter()
.collect();
for api_file in files { for api_file in files {
let file_span = let file_span =
info_span!("file", name = &api_file.file_name, id = &api_file.file_id); info_span!("file", name = &api_file.file_name, id = &api_file.file_id);
let _file_span = file_span.enter(); let _file_span = file_span.enter();
if present_file_ids.contains(&(api_file.file_id as i32)) {
info!("skipping file already present in database");
continue;
}
let db_file = file::insert( let db_file = file::insert(
&pool, &pool,
api_file.name, api_file.name,
@ -503,6 +515,8 @@ pub async fn main() -> Result<()> {
debug!(duration = ?download_link_resp.wait, "sleeping"); debug!(duration = ?download_link_resp.wait, "sleeping");
sleep(download_link_resp.wait).await; sleep(download_link_resp.wait).await;
} }
game_mod::update_last_updated_files_at(&pool, db_mod.id).await?;
} }
page += 1; page += 1;

View File

@ -36,6 +36,18 @@ pub async fn get_by_nexus_file_id(
.context("Failed to get file") .context("Failed to get file")
} }
#[instrument(level = "debug", skip(pool))]
pub async fn get_nexus_file_ids_by_mod_id(
pool: &sqlx::Pool<sqlx::Postgres>,
mod_id: i32,
) -> Result<Vec<i32>> {
sqlx::query!("SELECT nexus_file_id FROM files WHERE mod_id = $1", mod_id)
.map(|row| row.nexus_file_id)
.fetch_all(pool)
.await
.context("Failed to get files")
}
#[instrument(level = "debug", skip(pool))] #[instrument(level = "debug", skip(pool))]
pub async fn insert( pub async fn insert(
pool: &sqlx::Pool<sqlx::Postgres>, pool: &sqlx::Pool<sqlx::Postgres>,

View File

@ -17,6 +17,7 @@ pub struct Mod {
pub game_id: i32, pub game_id: i32,
pub updated_at: NaiveDateTime, pub updated_at: NaiveDateTime,
pub created_at: NaiveDateTime, pub created_at: NaiveDateTime,
pub last_updated_files_at: Option<NaiveDateTime>,
} }
#[derive(Debug)] #[derive(Debug)]
@ -45,18 +46,20 @@ pub async fn get_by_nexus_mod_id(
} }
#[instrument(level = "debug", skip(pool))] #[instrument(level = "debug", skip(pool))]
pub async fn bulk_get_present_nexus_mod_ids( pub async fn bulk_get_fully_processed_nexus_mod_ids(
pool: &sqlx::Pool<sqlx::Postgres>, pool: &sqlx::Pool<sqlx::Postgres>,
nexus_mod_ids: &[i32], nexus_mod_ids: &[i32],
) -> Result<Vec<i32>> { ) -> Result<Vec<i32>> {
sqlx::query!( sqlx::query!(
"SELECT nexus_mod_id FROM mods WHERE nexus_mod_id = ANY($1::int[])", "SELECT nexus_mod_id FROM mods
WHERE nexus_mod_id = ANY($1::int[])
AND last_updated_files_at IS NOT NULL",
nexus_mod_ids, nexus_mod_ids,
) )
.map(|row| row.nexus_mod_id) .map(|row| row.nexus_mod_id)
.fetch_all(pool) .fetch_all(pool)
.await .await
.context("Failed to get mods") .context("Failed to get fully processed , last_updated_files_at: () mods")
} }
#[instrument(level = "debug", skip(pool))] #[instrument(level = "debug", skip(pool))]
@ -144,3 +147,21 @@ pub async fn get(pool: &sqlx::Pool<sqlx::Postgres>, id: i32) -> Result<Option<Mo
.await .await
.context("Failed to get mod") .context("Failed to get mod")
} }
#[instrument(level = "debug", skip(pool))]
pub async fn update_last_updated_files_at(
pool: &sqlx::Pool<sqlx::Postgres>,
id: i32,
) -> Result<Mod> {
sqlx::query_as!(
Mod,
"UPDATE mods
SET last_updated_files_at = now()
WHERE id = $1
RETURNING *",
id,
)
.fetch_one(pool)
.await
.context("Failed to update mod")
}