Move feed fetching to crawl_feed job, DomainRequestLimiter

`DomainRequestLimiter` is a distributed version of `DomainLocks` based
on redis.
This commit is contained in:
2024-08-26 01:12:18 -04:00
parent 9c75a88c69
commit 65eac1975c
9 changed files with 390 additions and 22 deletions

View File

@@ -5,11 +5,16 @@ use apalis::prelude::*;
use apalis_redis::RedisStorage;
use clap::Parser;
use dotenvy::dotenv;
use fred::prelude::*;
use reqwest::Client;
use sqlx::postgres::PgPoolOptions;
use tower::retry::RetryLayer;
use lib::config::Config;
use lib::domain_request_limiter::DomainRequestLimiter;
use lib::jobs::{handle_async_job, AsyncJob};
use lib::log::init_worker_tracing;
use lib::USER_AGENT;
#[tokio::main]
async fn main() -> Result<()> {
@@ -23,11 +28,27 @@ async fn main() -> Result<()> {
let apalis_storage: RedisStorage<AsyncJob> =
RedisStorage::new_with_config(redis_conn, apalis_config);
let redis_config = RedisConfig::from_url(&config.redis_url)?;
let redis_pool = RedisPool::new(redis_config, None, None, None, 5)?;
redis_pool.connect();
redis_pool.wait_for_connect().await?;
let domain_request_limiter = DomainRequestLimiter::new(redis_pool, 10, 5, 100, 0.5);
let http_client = Client::builder().user_agent(USER_AGENT).build()?;
let db = PgPoolOptions::new()
.max_connections(config.database_max_connections)
.acquire_timeout(std::time::Duration::from_secs(3))
.connect(&config.database_url)
.await?;
Monitor::<TokioExecutor>::new()
.register_with_count(2, {
WorkerBuilder::new("worker")
.layer(RetryLayer::new(RetryPolicy::default()))
.layer(TraceLayer::new())
.data(http_client)
.data(db)
.data(domain_request_limiter)
.backend(apalis_storage)
.build_fn(handle_async_job)
})