WIP add apalis & split up main process
This commit is contained in:
110
src/bin/crawler.rs
Normal file
110
src/bin/crawler.rs
Normal file
@@ -0,0 +1,110 @@
|
||||
use anyhow::{anyhow, Result};
|
||||
use apalis::cron::{CronStream, Schedule};
|
||||
use apalis::layers::retry::{RetryLayer, RetryPolicy};
|
||||
use apalis::layers::tracing::TraceLayer;
|
||||
use apalis::prelude::*;
|
||||
use apalis::redis::RedisStorage;
|
||||
use chrono::{DateTime, Utc};
|
||||
use clap::Parser;
|
||||
use lib::actors::crawl_scheduler::CrawlSchedulerError;
|
||||
use lib::jobs::AsyncJob;
|
||||
use lib::models::feed::{Feed, GetFeedsOptions};
|
||||
use sqlx::postgres::PgPoolOptions;
|
||||
use sqlx::PgPool;
|
||||
use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
use tower::ServiceBuilder;
|
||||
use tracing::{info, instrument};
|
||||
|
||||
use dotenvy::dotenv;
|
||||
use lib::config::Config;
|
||||
use lib::log::init_worker_tracing;
|
||||
|
||||
#[derive(Default, Debug, Clone)]
|
||||
struct Crawl(DateTime<Utc>);
|
||||
|
||||
impl From<DateTime<Utc>> for Crawl {
|
||||
fn from(t: DateTime<Utc>) -> Self {
|
||||
Crawl(t)
|
||||
}
|
||||
}
|
||||
|
||||
impl Job for Crawl {
|
||||
const NAME: &'static str = "apalis::Crawl";
|
||||
}
|
||||
|
||||
struct State {
|
||||
pool: PgPool,
|
||||
apalis: RedisStorage<AsyncJob>,
|
||||
}
|
||||
|
||||
#[instrument(skip_all)]
|
||||
pub async fn crawl_fn(job: Crawl, state: Data<Arc<State>>) -> Result<()> {
|
||||
tracing::info!(job = ?job, "crawl");
|
||||
let mut apalis = (state.apalis).clone();
|
||||
let mut options = GetFeedsOptions::default();
|
||||
loop {
|
||||
info!("fetching feeds before: {:?}", options.before);
|
||||
let feeds = match Feed::get_all(&state.pool, &options).await {
|
||||
Err(err) => {
|
||||
return Err(anyhow!(err));
|
||||
}
|
||||
Ok(feeds) if feeds.is_empty() => {
|
||||
info!("no more feeds found");
|
||||
break;
|
||||
}
|
||||
Ok(feeds) => feeds,
|
||||
};
|
||||
info!("found {} feeds", feeds.len());
|
||||
options.before = feeds.last().map(|f| f.created_at);
|
||||
|
||||
for feed in feeds.into_iter() {
|
||||
// self.spawn_crawler_loop(feed, respond_to.clone());
|
||||
apalis
|
||||
.push(AsyncJob::HelloWorld(feed.feed_id.to_string()))
|
||||
.await?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
dotenv().ok();
|
||||
let config = Config::parse();
|
||||
let _guard = init_worker_tracing()?;
|
||||
|
||||
let pool = PgPoolOptions::new()
|
||||
.max_connections(config.database_max_connections)
|
||||
.acquire_timeout(std::time::Duration::from_secs(3))
|
||||
.connect(&config.database_url)
|
||||
.await?;
|
||||
|
||||
// TODO: use redis_pool from above instead of making a new connection
|
||||
// See: https://github.com/geofmureithi/apalis/issues/290
|
||||
let redis_conn = apalis::redis::connect(config.redis_url.clone()).await?;
|
||||
let apalis_config = apalis::redis::Config::default();
|
||||
let mut apalis: RedisStorage<AsyncJob> =
|
||||
RedisStorage::new_with_config(redis_conn, apalis_config);
|
||||
|
||||
let schedule = Schedule::from_str("0 * * * * *").unwrap();
|
||||
// let service = ServiceBuilder::new()
|
||||
// .layer(RetryLayer::new(RetryPolicy::default()))
|
||||
// .layer(TraceLayer::new())
|
||||
// .service(service_fn(crawl_fn));
|
||||
|
||||
let worker = WorkerBuilder::new("crawler")
|
||||
.stream(CronStream::new(schedule).into_stream())
|
||||
.layer(RetryLayer::new(RetryPolicy::default()))
|
||||
.layer(TraceLayer::new())
|
||||
.data(Arc::new(State { pool, apalis }))
|
||||
.build_fn(crawl_fn);
|
||||
|
||||
Monitor::<TokioExecutor>::new()
|
||||
.register(worker)
|
||||
.run()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -1,6 +1,8 @@
|
||||
use std::{collections::HashMap, net::SocketAddr, path::Path, sync::Arc};
|
||||
|
||||
use anyhow::Result;
|
||||
use apalis::prelude::*;
|
||||
use apalis::redis::RedisStorage;
|
||||
use axum::{
|
||||
routing::{get, post},
|
||||
Router,
|
||||
@@ -32,6 +34,7 @@ use tracing::debug;
|
||||
use lib::config::Config;
|
||||
use lib::domain_locks::DomainLocks;
|
||||
use lib::handlers;
|
||||
use lib::jobs::AsyncJob;
|
||||
use lib::log::init_tracing;
|
||||
use lib::state::AppState;
|
||||
use lib::USER_AGENT;
|
||||
@@ -93,6 +96,17 @@ async fn main() -> Result<()> {
|
||||
|
||||
sqlx::migrate!().run(&pool).await?;
|
||||
|
||||
// TODO: use redis_pool from above instead of making a new connection
|
||||
// See: https://github.com/geofmureithi/apalis/issues/290
|
||||
let redis_conn = apalis::redis::connect(config.redis_url.clone()).await?;
|
||||
let apalis_config = apalis::redis::Config::default();
|
||||
let mut apalis: RedisStorage<AsyncJob> =
|
||||
RedisStorage::new_with_config(redis_conn, apalis_config);
|
||||
|
||||
apalis
|
||||
.push(AsyncJob::HelloWorld("hello".to_string()))
|
||||
.await?;
|
||||
|
||||
let crawl_scheduler = CrawlSchedulerHandle::new(
|
||||
pool.clone(),
|
||||
client.clone(),
|
||||
@@ -150,6 +164,7 @@ async fn main() -> Result<()> {
|
||||
importer,
|
||||
imports,
|
||||
mailer,
|
||||
apalis,
|
||||
})
|
||||
.layer(ServiceBuilder::new().layer(TraceLayer::new_for_http()))
|
||||
.layer(auth_layer)
|
||||
36
src/bin/worker.rs
Normal file
36
src/bin/worker.rs
Normal file
@@ -0,0 +1,36 @@
|
||||
use anyhow::Result;
|
||||
use apalis::layers::tracing::TraceLayer;
|
||||
use apalis::prelude::*;
|
||||
use apalis::redis::RedisStorage;
|
||||
use clap::Parser;
|
||||
|
||||
use dotenvy::dotenv;
|
||||
use lib::config::Config;
|
||||
use lib::jobs::AsyncJob;
|
||||
use lib::log::init_worker_tracing;
|
||||
|
||||
pub async fn worker_fn(job: AsyncJob) {
|
||||
tracing::info!(job = ?job, "Hello, world!");
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
dotenv().ok();
|
||||
let config = Config::parse();
|
||||
let _guard = init_worker_tracing()?;
|
||||
let redis_conn = apalis::redis::connect(config.redis_url.clone()).await?;
|
||||
let apalis_config = apalis::redis::Config::default();
|
||||
let apalis: RedisStorage<AsyncJob> = RedisStorage::new_with_config(redis_conn, apalis_config);
|
||||
|
||||
Monitor::<TokioExecutor>::new()
|
||||
.register_with_count(2, {
|
||||
WorkerBuilder::new("worker")
|
||||
.layer(TraceLayer::new())
|
||||
.with_storage(apalis.clone())
|
||||
.build_fn(worker_fn)
|
||||
})
|
||||
.run()
|
||||
.await
|
||||
.unwrap();
|
||||
Ok(())
|
||||
}
|
||||
11
src/jobs/mod.rs
Normal file
11
src/jobs/mod.rs
Normal file
@@ -0,0 +1,11 @@
|
||||
use apalis::prelude::*;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Debug, Deserialize, Serialize, Clone)]
|
||||
pub enum AsyncJob {
|
||||
HelloWorld(String),
|
||||
}
|
||||
|
||||
impl Job for AsyncJob {
|
||||
const NAME: &'static str = "apalis::AsyncJob";
|
||||
}
|
||||
@@ -7,6 +7,7 @@ pub mod error;
|
||||
pub mod handlers;
|
||||
pub mod headers;
|
||||
pub mod htmx;
|
||||
pub mod jobs;
|
||||
pub mod log;
|
||||
pub mod mailers;
|
||||
pub mod models;
|
||||
|
||||
14
src/log.rs
14
src/log.rs
@@ -91,3 +91,17 @@ pub fn init_tracing(
|
||||
.init();
|
||||
Ok((file_writer_guard, mem_writer_guard))
|
||||
}
|
||||
|
||||
pub fn init_worker_tracing() -> Result<WorkerGuard> {
|
||||
let stdout_layer = tracing_subscriber::fmt::layer().pretty();
|
||||
let filter_layer = EnvFilter::from_default_env();
|
||||
let file_appender = tracing_appender::rolling::hourly("./logs", "log");
|
||||
let (file_writer, file_writer_guard) = tracing_appender::non_blocking(file_appender);
|
||||
let file_writer_layer = tracing_subscriber::fmt::layer().with_writer(file_writer);
|
||||
tracing_subscriber::registry()
|
||||
.with(filter_layer)
|
||||
.with(stdout_layer)
|
||||
.with(file_writer_layer)
|
||||
.init();
|
||||
Ok(file_writer_guard)
|
||||
}
|
||||
|
||||
17
src/state.rs
17
src/state.rs
@@ -1,6 +1,7 @@
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use apalis::redis::RedisStorage;
|
||||
use axum::extract::FromRef;
|
||||
use bytes::Bytes;
|
||||
use lettre::SmtpTransport;
|
||||
@@ -9,10 +10,11 @@ use sqlx::PgPool;
|
||||
use tokio::sync::{broadcast, watch, Mutex};
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::actors::importer::{ImporterHandle, ImporterHandleMessage};
|
||||
use crate::actors::crawl_scheduler::{CrawlSchedulerHandle, CrawlSchedulerHandleMessage};
|
||||
use crate::actors::importer::{ImporterHandle, ImporterHandleMessage};
|
||||
use crate::config::Config;
|
||||
use crate::domain_locks::DomainLocks;
|
||||
use crate::jobs::AsyncJob;
|
||||
|
||||
/// A map of feed IDs to a channel receiver for the active `CrawlScheduler` running a feed crawl
|
||||
/// for that feed.
|
||||
@@ -28,12 +30,12 @@ pub type Crawls = Arc<Mutex<HashMap<Uuid, broadcast::Receiver<CrawlSchedulerHand
|
||||
|
||||
/// A map of unique import IDs to a channel receiver for the active `Importer` running that import.
|
||||
///
|
||||
/// Same as the `Crawls` map, the only purpose of this is to keep track of active imports so that
|
||||
/// axum handlers can subscribe to the result of the import via the receiver channel which are then
|
||||
/// Same as the `Crawls` map, the only purpose of this is to keep track of active imports so that
|
||||
/// axum handlers can subscribe to the result of the import via the receiver channel which are then
|
||||
/// sent to end-users as a stream of server-sent events.
|
||||
///
|
||||
/// This map should only contain imports that have just been created but not yet subscribed to.
|
||||
/// Entries are only added when a user adds uploads an OPML to import and entries are removed by
|
||||
/// Entries are only added when a user adds uploads an OPML to import and entries are removed by
|
||||
/// the same user once a server-sent event connection is established.
|
||||
pub type Imports = Arc<Mutex<HashMap<Uuid, broadcast::Receiver<ImporterHandleMessage>>>>;
|
||||
|
||||
@@ -49,6 +51,7 @@ pub struct AppState {
|
||||
pub importer: ImporterHandle,
|
||||
pub imports: Imports,
|
||||
pub mailer: SmtpTransport,
|
||||
pub apalis: RedisStorage<AsyncJob>,
|
||||
}
|
||||
|
||||
impl FromRef<AppState> for PgPool {
|
||||
@@ -110,3 +113,9 @@ impl FromRef<AppState> for SmtpTransport {
|
||||
state.mailer.clone()
|
||||
}
|
||||
}
|
||||
|
||||
impl FromRef<AppState> for RedisStorage<AsyncJob> {
|
||||
fn from_ref(state: &AppState) -> Self {
|
||||
state.apalis.clone()
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user