Add CrawlScheduler actor, shared client w/ last modified headers
This commit is contained in:
54
src/main.rs
54
src/main.rs
@@ -11,7 +11,6 @@ use axum::{
|
||||
Router,
|
||||
};
|
||||
use bytes::Bytes;
|
||||
use chrono::{Duration, Utc};
|
||||
use clap::Parser;
|
||||
use dotenvy::dotenv;
|
||||
use notify::Watcher;
|
||||
@@ -21,13 +20,15 @@ use tokio::sync::watch::channel;
|
||||
use tower::ServiceBuilder;
|
||||
use tower_http::{services::ServeDir, trace::TraceLayer};
|
||||
use tower_livereload::LiveReloadLayer;
|
||||
use tracing::{debug, info};
|
||||
use tracing::debug;
|
||||
|
||||
use lib::actors::crawl_scheduler::CrawlSchedulerHandle;
|
||||
use lib::config::Config;
|
||||
use lib::domain_locks::DomainLocks;
|
||||
use lib::handlers;
|
||||
use lib::log::init_tracing;
|
||||
use lib::state::AppState;
|
||||
use lib::{actors::feed_crawler::FeedCrawlerHandle, config::Config, models::feed::Feed};
|
||||
use lib::{domain_locks::DomainLocks, models::feed::GetFeedsOptions};
|
||||
use lib::USER_AGENT;
|
||||
|
||||
async fn serve(app: Router, addr: SocketAddr) -> Result<()> {
|
||||
debug!("listening on {}", addr);
|
||||
@@ -48,6 +49,7 @@ async fn main() -> Result<()> {
|
||||
|
||||
let crawls = Arc::new(Mutex::new(HashMap::new()));
|
||||
let domain_locks = DomainLocks::new();
|
||||
let client = Client::builder().user_agent(USER_AGENT).build()?;
|
||||
|
||||
let pool = PgPoolOptions::new()
|
||||
.max_connections(config.database_max_connections)
|
||||
@@ -56,6 +58,14 @@ async fn main() -> Result<()> {
|
||||
|
||||
sqlx::migrate!().run(&pool).await?;
|
||||
|
||||
let crawl_scheduler = CrawlSchedulerHandle::new(
|
||||
pool.clone(),
|
||||
client.clone(),
|
||||
domain_locks.clone(),
|
||||
config.content_dir.clone(),
|
||||
);
|
||||
let _ = crawl_scheduler.bootstrap().await;
|
||||
|
||||
let addr = format!("{}:{}", &config.host, &config.port).parse()?;
|
||||
let mut app = Router::new()
|
||||
.route("/api/v1/feeds", get(handlers::api::feeds::get))
|
||||
@@ -75,42 +85,16 @@ async fn main() -> Result<()> {
|
||||
.route("/log/stream", get(handlers::log::stream))
|
||||
.nest_service("/static", ServeDir::new("static"))
|
||||
.with_state(AppState {
|
||||
pool: pool.clone(),
|
||||
config: config.clone(),
|
||||
pool,
|
||||
config,
|
||||
log_receiver,
|
||||
crawls,
|
||||
domain_locks: domain_locks.clone(),
|
||||
domain_locks,
|
||||
client,
|
||||
crawl_scheduler,
|
||||
})
|
||||
.layer(ServiceBuilder::new().layer(TraceLayer::new_for_http()));
|
||||
|
||||
info!("starting crawlers");
|
||||
let mut options = GetFeedsOptions::default();
|
||||
loop {
|
||||
let feeds = Feed::get_all(&pool, options.clone()).await?;
|
||||
if feeds.is_empty() {
|
||||
break;
|
||||
}
|
||||
for feed in feeds.iter() {
|
||||
let client = Client::new(); // TODO: store in state and reuse
|
||||
if let Some(last_crawled_at) = feed.last_crawled_at {
|
||||
if last_crawled_at
|
||||
>= Utc::now() - Duration::minutes(feed.crawl_interval_minutes.into())
|
||||
{
|
||||
continue;
|
||||
}
|
||||
}
|
||||
let feed_crawler = FeedCrawlerHandle::new(
|
||||
pool.clone(),
|
||||
client.clone(),
|
||||
domain_locks.clone(),
|
||||
config.content_dir.clone(),
|
||||
);
|
||||
let _ = feed_crawler.crawl(feed.feed_id).await;
|
||||
}
|
||||
options.before = feeds.last().map(|f| f.created_at);
|
||||
}
|
||||
info!("done starting crawlers");
|
||||
|
||||
if cfg!(debug_assertions) {
|
||||
debug!("starting livereload");
|
||||
let livereload = LiveReloadLayer::new();
|
||||
|
||||
Reference in New Issue
Block a user