Add import_opml job
This commit is contained in:
parent
e41085425a
commit
7a8f7dc415
105
src/jobs/import_opml.rs
Normal file
105
src/jobs/import_opml.rs
Normal file
@ -0,0 +1,105 @@
|
|||||||
|
use std::io::Cursor;
|
||||||
|
|
||||||
|
use anyhow::{anyhow, Context, Result};
|
||||||
|
use apalis::prelude::*;
|
||||||
|
use apalis_redis::RedisStorage;
|
||||||
|
use fred::prelude::*;
|
||||||
|
use opml::OPML;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use sqlx::PgPool;
|
||||||
|
use tracing::{error, instrument, warn};
|
||||||
|
use uuid::Uuid;
|
||||||
|
|
||||||
|
use crate::error::Error;
|
||||||
|
use crate::jobs::crawl_feed::CrawlFeedJob;
|
||||||
|
use crate::jobs::AsyncJob;
|
||||||
|
use crate::models::feed::{CreateFeed, Feed};
|
||||||
|
use crate::uuid::Base62Uuid;
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize, Serialize, Clone)]
|
||||||
|
pub struct ImportOpmlJob {
|
||||||
|
pub import_id: Uuid,
|
||||||
|
pub file_name: Option<String>,
|
||||||
|
pub bytes: Vec<u8>,
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: send messages over redis channel
|
||||||
|
/// `ImporterOpmlMessage::Import` contains the result of importing the OPML file.
|
||||||
|
// #[allow(clippy::large_enum_variant)]
|
||||||
|
// #[derive(Debug, Clone)]
|
||||||
|
// pub enum ImporterOpmlMessage {
|
||||||
|
// Import(ImporterResult<()>),
|
||||||
|
// CreateFeedError(String),
|
||||||
|
// AlreadyImported(String),
|
||||||
|
// CrawlScheduler(CrawlSchedulerHandleMessage),
|
||||||
|
// }
|
||||||
|
|
||||||
|
#[instrument(skip_all, fields(import_id = %import_id))]
|
||||||
|
pub async fn import_opml(
|
||||||
|
ImportOpmlJob {
|
||||||
|
import_id,
|
||||||
|
file_name,
|
||||||
|
bytes,
|
||||||
|
}: ImportOpmlJob,
|
||||||
|
db: Data<PgPool>,
|
||||||
|
apalis: Data<RedisStorage<AsyncJob>>,
|
||||||
|
redis: Data<RedisPool>,
|
||||||
|
) -> Result<()> {
|
||||||
|
let document = OPML::from_reader(&mut Cursor::new(bytes)).with_context(|| {
|
||||||
|
format!(
|
||||||
|
"Failed to read OPML file for import {} from file {}",
|
||||||
|
Base62Uuid::from(import_id),
|
||||||
|
file_name
|
||||||
|
.map(|n| n.to_string())
|
||||||
|
.unwrap_or_else(|| "unknown".to_string())
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
for url in gather_feed_urls(document.body.outlines) {
|
||||||
|
let feed = Feed::create(
|
||||||
|
&*db,
|
||||||
|
CreateFeed {
|
||||||
|
url: url.clone(),
|
||||||
|
..Default::default()
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
match feed {
|
||||||
|
Ok(feed) => {
|
||||||
|
(*apalis)
|
||||||
|
.clone()
|
||||||
|
.push(AsyncJob::CrawlFeed(CrawlFeedJob {
|
||||||
|
feed_id: feed.feed_id,
|
||||||
|
}))
|
||||||
|
.await?;
|
||||||
|
}
|
||||||
|
Err(Error::Sqlx(sqlx::error::Error::Database(err))) => {
|
||||||
|
if err.is_unique_violation() {
|
||||||
|
// let _ = respond_to.send(ImporterHandleMessage::AlreadyImported(url));
|
||||||
|
warn!("Feed {} already imported", url);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(err) => {
|
||||||
|
// let _ = respond_to.send(ImporterHandleMessage::CreateFeedError(url));
|
||||||
|
error!("Failed to create feed for {}", url);
|
||||||
|
return Err(anyhow!(err));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
redis
|
||||||
|
.next()
|
||||||
|
.publish("imports", import_id.to_string())
|
||||||
|
.await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn gather_feed_urls(outlines: Vec<opml::Outline>) -> Vec<String> {
|
||||||
|
let mut urls = Vec::new();
|
||||||
|
for outline in outlines.into_iter() {
|
||||||
|
if let Some(url) = outline.xml_url {
|
||||||
|
urls.push(url);
|
||||||
|
}
|
||||||
|
urls.append(&mut gather_feed_urls(outline.outlines));
|
||||||
|
}
|
||||||
|
urls
|
||||||
|
}
|
@ -9,9 +9,11 @@ use tracing::{error, info, instrument};
|
|||||||
|
|
||||||
mod crawl_entry;
|
mod crawl_entry;
|
||||||
mod crawl_feed;
|
mod crawl_feed;
|
||||||
|
mod import_opml;
|
||||||
|
|
||||||
pub use crawl_entry::CrawlEntryJob;
|
pub use crawl_entry::CrawlEntryJob;
|
||||||
pub use crawl_feed::CrawlFeedJob;
|
pub use crawl_feed::CrawlFeedJob;
|
||||||
|
pub use import_opml::ImportOpmlJob;
|
||||||
|
|
||||||
use crate::{config::Config, domain_request_limiter::DomainRequestLimiter};
|
use crate::{config::Config, domain_request_limiter::DomainRequestLimiter};
|
||||||
|
|
||||||
@ -20,6 +22,7 @@ pub enum AsyncJob {
|
|||||||
HelloWorld(String),
|
HelloWorld(String),
|
||||||
CrawlFeed(CrawlFeedJob),
|
CrawlFeed(CrawlFeedJob),
|
||||||
CrawlEntry(CrawlEntryJob),
|
CrawlEntry(CrawlEntryJob),
|
||||||
|
ImportOpml(ImportOpmlJob),
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Error)]
|
#[derive(Debug, Error)]
|
||||||
@ -53,6 +56,7 @@ pub async fn handle_async_job(
|
|||||||
crawl_entry::crawl_entry(job, http_client, db, domain_request_limiter, config, redis)
|
crawl_entry::crawl_entry(job, http_client, db, domain_request_limiter, config, redis)
|
||||||
.await
|
.await
|
||||||
}
|
}
|
||||||
|
AsyncJob::ImportOpml(job) => import_opml::import_opml(job, db, apalis, redis).await,
|
||||||
};
|
};
|
||||||
|
|
||||||
match result {
|
match result {
|
||||||
|
Loading…
Reference in New Issue
Block a user