Start of a crawl_feed job

This commit is contained in:
Tyler Hallada 2024-08-25 22:24:02 -04:00
parent a3450e202a
commit 9c75a88c69
4 changed files with 59 additions and 12 deletions

View File

@ -5,8 +5,7 @@ use apalis_cron::{CronStream, Schedule};
use apalis_redis::RedisStorage; use apalis_redis::RedisStorage;
use chrono::{DateTime, Utc}; use chrono::{DateTime, Utc};
use clap::Parser; use clap::Parser;
use lib::jobs::AsyncJob; use dotenvy::dotenv;
use lib::models::feed::{Feed, GetFeedsOptions};
use sqlx::postgres::PgPoolOptions; use sqlx::postgres::PgPoolOptions;
use sqlx::PgPool; use sqlx::PgPool;
use std::str::FromStr; use std::str::FromStr;
@ -14,9 +13,10 @@ use std::sync::Arc;
use thiserror::Error; use thiserror::Error;
use tracing::{info, instrument}; use tracing::{info, instrument};
use dotenvy::dotenv;
use lib::config::Config; use lib::config::Config;
use lib::jobs::{AsyncJob, CrawlFeedJob};
use lib::log::init_worker_tracing; use lib::log::init_worker_tracing;
use lib::models::feed::{Feed, GetFeedsOptions};
#[derive(Default, Debug, Clone)] #[derive(Default, Debug, Clone)]
struct Crawl(DateTime<Utc>); struct Crawl(DateTime<Utc>);
@ -64,8 +64,9 @@ pub async fn crawl_fn(job: Crawl, state: Data<Arc<State>>) -> Result<(), CrawlEr
for feed in feeds.into_iter() { for feed in feeds.into_iter() {
// self.spawn_crawler_loop(feed, respond_to.clone()); // self.spawn_crawler_loop(feed, respond_to.clone());
// TODO: implement uniqueness on jobs per feed for ~1 minute
apalis apalis
.push(AsyncJob::HelloWorld(feed.feed_id.to_string())) .push(AsyncJob::CrawlFeed(CrawlFeedJob { feed }))
.await .await
.map_err(|err| CrawlError::QueueJobError(err.to_string()))?; .map_err(|err| CrawlError::QueueJobError(err.to_string()))?;
} }

View File

@ -1,17 +1,15 @@
use anyhow::Result; use anyhow::Result;
use apalis::layers::retry::RetryPolicy;
use apalis::layers::tracing::TraceLayer; use apalis::layers::tracing::TraceLayer;
use apalis::prelude::*; use apalis::prelude::*;
use apalis_redis::RedisStorage; use apalis_redis::RedisStorage;
use clap::Parser; use clap::Parser;
use dotenvy::dotenv; use dotenvy::dotenv;
use lib::config::Config; use tower::retry::RetryLayer;
use lib::jobs::AsyncJob;
use lib::log::init_worker_tracing;
pub async fn worker_fn(job: AsyncJob) { use lib::config::Config;
tracing::info!(job = ?job, "Hello, world!"); use lib::jobs::{handle_async_job, AsyncJob};
} use lib::log::init_worker_tracing;
#[tokio::main] #[tokio::main]
async fn main() -> Result<()> { async fn main() -> Result<()> {
@ -28,9 +26,10 @@ async fn main() -> Result<()> {
Monitor::<TokioExecutor>::new() Monitor::<TokioExecutor>::new()
.register_with_count(2, { .register_with_count(2, {
WorkerBuilder::new("worker") WorkerBuilder::new("worker")
.layer(RetryLayer::new(RetryPolicy::default()))
.layer(TraceLayer::new()) .layer(TraceLayer::new())
.backend(apalis_storage) .backend(apalis_storage)
.build_fn(worker_fn) .build_fn(handle_async_job)
}) })
.run() .run()
.await .await

View File

@ -1,4 +1,6 @@
use anyhow::Result;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use tracing::{info, instrument};
use crate::models::feed::Feed; use crate::models::feed::Feed;
@ -6,3 +8,9 @@ use crate::models::feed::Feed;
pub struct CrawlFeedJob { pub struct CrawlFeedJob {
pub feed: Feed, pub feed: Feed,
} }
#[instrument(skip_all, fields(feed_id = %feed.feed_id))]
pub async fn crawl_feed(CrawlFeedJob { feed }: CrawlFeedJob) -> Result<()> {
info!("Crawling feed: {:?}", feed.feed_id);
Err(anyhow::anyhow!("Not implemented"))
}

View File

@ -1,8 +1,47 @@
use apalis::prelude::*;
use apalis_redis::RedisStorage;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use thiserror::Error;
use tracing::{error, info, instrument};
mod crawl_feed; mod crawl_feed;
pub use crawl_feed::CrawlFeedJob;
#[derive(Debug, Deserialize, Serialize, Clone)] #[derive(Debug, Deserialize, Serialize, Clone)]
pub enum AsyncJob { pub enum AsyncJob {
HelloWorld(String), HelloWorld(String),
CrawlFeed(CrawlFeedJob),
}
#[derive(Debug, Error)]
pub enum AsyncJobError {
#[error("error executing job")]
JobError(#[from] anyhow::Error),
}
#[instrument(skip(job, worker_id), fields(worker_id = %worker_id))]
pub async fn handle_async_job(
job: AsyncJob,
worker_id: WorkerId,
// TODO: add task_id to tracing instrumentation context
// it casuses a panic in 0.6.0 currently, see: https://github.com/geofmureithi/apalis/issues/398
// task_id: Data<TaskId>,
) -> Result<(), AsyncJobError> {
let result = match job {
AsyncJob::HelloWorld(name) => {
info!("Hello, {}!", name);
Ok(())
}
AsyncJob::CrawlFeed(job) => crawl_feed::crawl_feed(job).await,
};
match result {
Ok(_) => info!("Job completed successfully"),
Err(err) => {
error!("Job failed: {err:?}");
return Err(AsyncJobError::JobError(err));
}
};
Ok(())
} }