Add crawl_entry job

This commit is contained in:
2024-08-27 21:54:14 -04:00
parent 65eac1975c
commit 6912ef9017
5 changed files with 87 additions and 17 deletions

View File

@@ -1,20 +1,24 @@
use apalis::prelude::*;
use apalis_redis::RedisStorage;
use reqwest::Client;
use serde::{Deserialize, Serialize};
use sqlx::PgPool;
use thiserror::Error;
use tracing::{error, info, instrument};
mod crawl_entry;
mod crawl_feed;
pub use crawl_entry::CrawlEntryJob;
pub use crawl_feed::CrawlFeedJob;
use crate::domain_request_limiter::DomainRequestLimiter;
use crate::{config::Config, domain_request_limiter::DomainRequestLimiter};
#[derive(Debug, Deserialize, Serialize, Clone)]
pub enum AsyncJob {
HelloWorld(String),
CrawlFeed(CrawlFeedJob),
CrawlEntry(CrawlEntryJob),
}
#[derive(Debug, Error)]
@@ -23,7 +27,7 @@ pub enum AsyncJobError {
JobError(#[from] anyhow::Error),
}
#[instrument(skip(job, worker_id), fields(worker_id = %worker_id))]
#[instrument(skip_all, fields(worker_id = %worker_id))]
pub async fn handle_async_job(
job: AsyncJob,
worker_id: WorkerId,
@@ -33,6 +37,8 @@ pub async fn handle_async_job(
http_client: Data<Client>,
db: Data<PgPool>,
domain_request_limiter: Data<DomainRequestLimiter>,
config: Data<Config>,
apalis: Data<RedisStorage<AsyncJob>>,
) -> Result<(), AsyncJobError> {
let result = match job {
AsyncJob::HelloWorld(name) => {
@@ -40,7 +46,10 @@ pub async fn handle_async_job(
Ok(())
}
AsyncJob::CrawlFeed(job) => {
crawl_feed::crawl_feed(job, http_client, db, domain_request_limiter).await
crawl_feed::crawl_feed(job, http_client, db, domain_request_limiter, apalis).await
}
AsyncJob::CrawlEntry(job) => {
crawl_entry::crawl_entry(job, http_client, db, domain_request_limiter, config).await
}
};