Start of a crawl_feed job
This commit is contained in:
parent
a3450e202a
commit
9c75a88c69
@ -5,8 +5,7 @@ use apalis_cron::{CronStream, Schedule};
|
|||||||
use apalis_redis::RedisStorage;
|
use apalis_redis::RedisStorage;
|
||||||
use chrono::{DateTime, Utc};
|
use chrono::{DateTime, Utc};
|
||||||
use clap::Parser;
|
use clap::Parser;
|
||||||
use lib::jobs::AsyncJob;
|
use dotenvy::dotenv;
|
||||||
use lib::models::feed::{Feed, GetFeedsOptions};
|
|
||||||
use sqlx::postgres::PgPoolOptions;
|
use sqlx::postgres::PgPoolOptions;
|
||||||
use sqlx::PgPool;
|
use sqlx::PgPool;
|
||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
@ -14,9 +13,10 @@ use std::sync::Arc;
|
|||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
use tracing::{info, instrument};
|
use tracing::{info, instrument};
|
||||||
|
|
||||||
use dotenvy::dotenv;
|
|
||||||
use lib::config::Config;
|
use lib::config::Config;
|
||||||
|
use lib::jobs::{AsyncJob, CrawlFeedJob};
|
||||||
use lib::log::init_worker_tracing;
|
use lib::log::init_worker_tracing;
|
||||||
|
use lib::models::feed::{Feed, GetFeedsOptions};
|
||||||
|
|
||||||
#[derive(Default, Debug, Clone)]
|
#[derive(Default, Debug, Clone)]
|
||||||
struct Crawl(DateTime<Utc>);
|
struct Crawl(DateTime<Utc>);
|
||||||
@ -64,8 +64,9 @@ pub async fn crawl_fn(job: Crawl, state: Data<Arc<State>>) -> Result<(), CrawlEr
|
|||||||
|
|
||||||
for feed in feeds.into_iter() {
|
for feed in feeds.into_iter() {
|
||||||
// self.spawn_crawler_loop(feed, respond_to.clone());
|
// self.spawn_crawler_loop(feed, respond_to.clone());
|
||||||
|
// TODO: implement uniqueness on jobs per feed for ~1 minute
|
||||||
apalis
|
apalis
|
||||||
.push(AsyncJob::HelloWorld(feed.feed_id.to_string()))
|
.push(AsyncJob::CrawlFeed(CrawlFeedJob { feed }))
|
||||||
.await
|
.await
|
||||||
.map_err(|err| CrawlError::QueueJobError(err.to_string()))?;
|
.map_err(|err| CrawlError::QueueJobError(err.to_string()))?;
|
||||||
}
|
}
|
||||||
|
@ -1,17 +1,15 @@
|
|||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
|
use apalis::layers::retry::RetryPolicy;
|
||||||
use apalis::layers::tracing::TraceLayer;
|
use apalis::layers::tracing::TraceLayer;
|
||||||
use apalis::prelude::*;
|
use apalis::prelude::*;
|
||||||
use apalis_redis::RedisStorage;
|
use apalis_redis::RedisStorage;
|
||||||
use clap::Parser;
|
use clap::Parser;
|
||||||
|
|
||||||
use dotenvy::dotenv;
|
use dotenvy::dotenv;
|
||||||
use lib::config::Config;
|
use tower::retry::RetryLayer;
|
||||||
use lib::jobs::AsyncJob;
|
|
||||||
use lib::log::init_worker_tracing;
|
|
||||||
|
|
||||||
pub async fn worker_fn(job: AsyncJob) {
|
use lib::config::Config;
|
||||||
tracing::info!(job = ?job, "Hello, world!");
|
use lib::jobs::{handle_async_job, AsyncJob};
|
||||||
}
|
use lib::log::init_worker_tracing;
|
||||||
|
|
||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
async fn main() -> Result<()> {
|
async fn main() -> Result<()> {
|
||||||
@ -28,9 +26,10 @@ async fn main() -> Result<()> {
|
|||||||
Monitor::<TokioExecutor>::new()
|
Monitor::<TokioExecutor>::new()
|
||||||
.register_with_count(2, {
|
.register_with_count(2, {
|
||||||
WorkerBuilder::new("worker")
|
WorkerBuilder::new("worker")
|
||||||
|
.layer(RetryLayer::new(RetryPolicy::default()))
|
||||||
.layer(TraceLayer::new())
|
.layer(TraceLayer::new())
|
||||||
.backend(apalis_storage)
|
.backend(apalis_storage)
|
||||||
.build_fn(worker_fn)
|
.build_fn(handle_async_job)
|
||||||
})
|
})
|
||||||
.run()
|
.run()
|
||||||
.await
|
.await
|
||||||
|
@ -1,4 +1,6 @@
|
|||||||
|
use anyhow::Result;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
use tracing::{info, instrument};
|
||||||
|
|
||||||
use crate::models::feed::Feed;
|
use crate::models::feed::Feed;
|
||||||
|
|
||||||
@ -6,3 +8,9 @@ use crate::models::feed::Feed;
|
|||||||
pub struct CrawlFeedJob {
|
pub struct CrawlFeedJob {
|
||||||
pub feed: Feed,
|
pub feed: Feed,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[instrument(skip_all, fields(feed_id = %feed.feed_id))]
|
||||||
|
pub async fn crawl_feed(CrawlFeedJob { feed }: CrawlFeedJob) -> Result<()> {
|
||||||
|
info!("Crawling feed: {:?}", feed.feed_id);
|
||||||
|
Err(anyhow::anyhow!("Not implemented"))
|
||||||
|
}
|
||||||
|
@ -1,8 +1,47 @@
|
|||||||
|
use apalis::prelude::*;
|
||||||
|
use apalis_redis::RedisStorage;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
use thiserror::Error;
|
||||||
|
use tracing::{error, info, instrument};
|
||||||
|
|
||||||
mod crawl_feed;
|
mod crawl_feed;
|
||||||
|
|
||||||
|
pub use crawl_feed::CrawlFeedJob;
|
||||||
|
|
||||||
#[derive(Debug, Deserialize, Serialize, Clone)]
|
#[derive(Debug, Deserialize, Serialize, Clone)]
|
||||||
pub enum AsyncJob {
|
pub enum AsyncJob {
|
||||||
HelloWorld(String),
|
HelloWorld(String),
|
||||||
|
CrawlFeed(CrawlFeedJob),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Error)]
|
||||||
|
pub enum AsyncJobError {
|
||||||
|
#[error("error executing job")]
|
||||||
|
JobError(#[from] anyhow::Error),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[instrument(skip(job, worker_id), fields(worker_id = %worker_id))]
|
||||||
|
pub async fn handle_async_job(
|
||||||
|
job: AsyncJob,
|
||||||
|
worker_id: WorkerId,
|
||||||
|
// TODO: add task_id to tracing instrumentation context
|
||||||
|
// it casuses a panic in 0.6.0 currently, see: https://github.com/geofmureithi/apalis/issues/398
|
||||||
|
// task_id: Data<TaskId>,
|
||||||
|
) -> Result<(), AsyncJobError> {
|
||||||
|
let result = match job {
|
||||||
|
AsyncJob::HelloWorld(name) => {
|
||||||
|
info!("Hello, {}!", name);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
AsyncJob::CrawlFeed(job) => crawl_feed::crawl_feed(job).await,
|
||||||
|
};
|
||||||
|
|
||||||
|
match result {
|
||||||
|
Ok(_) => info!("Job completed successfully"),
|
||||||
|
Err(err) => {
|
||||||
|
error!("Job failed: {err:?}");
|
||||||
|
return Err(AsyncJobError::JobError(err));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user