Add an async actor FeedCrawler for fetching feed details
Currently, this allows the browser to subscribe to the response of the asynchronous crawl after they add a new feed. Eventually I will also use this in the main scheduled crawls. Right now, it only upserts feed metadata based on the parsed feed.
This commit is contained in:
@@ -1,6 +1,9 @@
|
||||
use std::time::Duration;
|
||||
|
||||
use axum::extract::{Path, State};
|
||||
use axum::http::StatusCode;
|
||||
use axum::response::{IntoResponse, Response, Redirect};
|
||||
use axum::response::sse::{Event, KeepAlive};
|
||||
use axum::response::{IntoResponse, Redirect, Response, Sse};
|
||||
use axum::Form;
|
||||
use feed_rs::parser;
|
||||
use maud::html;
|
||||
@@ -8,13 +11,19 @@ use reqwest::Client;
|
||||
use serde::Deserialize;
|
||||
use serde_with::{serde_as, NoneAsEmptyString};
|
||||
use sqlx::PgPool;
|
||||
use tokio_stream::wrappers::errors::BroadcastStreamRecvError;
|
||||
use tokio_stream::wrappers::BroadcastStream;
|
||||
use tokio_stream::StreamExt;
|
||||
use url::Url;
|
||||
|
||||
use crate::actors::feed_crawler::{FeedCrawlerHandle, FeedCrawlerHandleMessage};
|
||||
use crate::error::{Error, Result};
|
||||
use crate::models::entry::get_entries_for_feed;
|
||||
use crate::models::feed::{create_feed, get_feed, CreateFeed, delete_feed};
|
||||
use crate::models::feed::{create_feed, delete_feed, get_feed, CreateFeed, FeedType};
|
||||
use crate::partials::{entry_list::entry_list, feed_link::feed_link, layout::Layout};
|
||||
use crate::uuid::Base62Uuid;
|
||||
use crate::state::Crawls;
|
||||
use crate::turbo_stream::TurboStream;
|
||||
use crate::uuid::Base62Uuid;
|
||||
|
||||
pub async fn get(
|
||||
Path(id): Path<Base62Uuid>,
|
||||
@@ -51,12 +60,16 @@ pub struct AddFeed {
|
||||
|
||||
#[derive(thiserror::Error, Debug)]
|
||||
pub enum AddFeedError {
|
||||
#[error("invalid feed url: {0}")]
|
||||
InvalidUrl(String, #[source] url::ParseError),
|
||||
#[error("failed to fetch feed: {0}")]
|
||||
FetchError(String, #[source] reqwest::Error),
|
||||
#[error("failed to parse feed: {0}")]
|
||||
ParseError(String, #[source] parser::ParseFeedError),
|
||||
#[error("failed to create feed: {0}")]
|
||||
CreateFeedError(String, #[source] Error),
|
||||
#[error("feed already exists: {0}")]
|
||||
FeedAlreadyExists(String, #[source] Error),
|
||||
}
|
||||
pub type AddFeedResult<T, E = AddFeedError> = ::std::result::Result<T, E>;
|
||||
|
||||
@@ -65,7 +78,9 @@ impl AddFeedError {
|
||||
use AddFeedError::*;
|
||||
|
||||
match self {
|
||||
FetchError(..) | ParseError(..) => StatusCode::UNPROCESSABLE_ENTITY,
|
||||
InvalidUrl(..) | FetchError(..) | ParseError(..) | FeedAlreadyExists(..) => {
|
||||
StatusCode::UNPROCESSABLE_ENTITY
|
||||
}
|
||||
CreateFeedError(..) => StatusCode::INTERNAL_SERVER_ERROR,
|
||||
}
|
||||
}
|
||||
@@ -92,41 +107,56 @@ impl IntoResponse for AddFeedError {
|
||||
|
||||
pub async fn post(
|
||||
State(pool): State<PgPool>,
|
||||
State(crawls): State<Crawls>,
|
||||
Form(add_feed): Form<AddFeed>,
|
||||
) -> AddFeedResult<Response> {
|
||||
// TODO: store the client in axum state (as long as it can be used concurrently?)
|
||||
let client = Client::new();
|
||||
let bytes = client
|
||||
.get(&add_feed.url)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|err| AddFeedError::FetchError(add_feed.url.clone(), err))?
|
||||
.bytes()
|
||||
.await
|
||||
.map_err(|err| AddFeedError::FetchError(add_feed.url.clone(), err))?;
|
||||
let parsed_feed = parser::parse(&bytes[..])
|
||||
.map_err(|err| AddFeedError::ParseError(add_feed.url.clone(), err))?;
|
||||
let feed_crawler = FeedCrawlerHandle::new(pool.clone(), client.clone());
|
||||
|
||||
let feed = create_feed(
|
||||
&pool,
|
||||
CreateFeed {
|
||||
title: add_feed
|
||||
.title
|
||||
.map_or_else(|| parsed_feed.title.map(|text| text.content), Some),
|
||||
title: add_feed.title,
|
||||
url: add_feed.url.clone(),
|
||||
feed_type: parsed_feed.feed_type.into(),
|
||||
description: add_feed
|
||||
.description
|
||||
.map_or_else(|| parsed_feed.description.map(|text| text.content), Some),
|
||||
feed_type: FeedType::Rss, // eh, get rid of this
|
||||
description: add_feed.description,
|
||||
},
|
||||
)
|
||||
.await
|
||||
.map_err(|err| AddFeedError::CreateFeedError(add_feed.url.clone(), err))?;
|
||||
.map_err(|err| {
|
||||
if let Error::Sqlx(sqlx::error::Error::Database(db_error)) = &err {
|
||||
if let Some(code) = db_error.code() {
|
||||
if let Some(constraint) = db_error.constraint() {
|
||||
if code == "23505" && constraint == "feed_url_idx" {
|
||||
return AddFeedError::FeedAlreadyExists(add_feed.url.clone(), err);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
AddFeedError::CreateFeedError(add_feed.url.clone(), err)
|
||||
})?;
|
||||
|
||||
let url: Url = Url::parse(&add_feed.url)
|
||||
.map_err(|err| AddFeedError::InvalidUrl(add_feed.url.clone(), err))?;
|
||||
let receiver = feed_crawler.crawl(url).await;
|
||||
{
|
||||
let mut crawls = crawls.lock().map_err(|_| {
|
||||
AddFeedError::CreateFeedError(add_feed.url.clone(), Error::InternalServerError)
|
||||
})?;
|
||||
crawls.insert(feed.feed_id, receiver);
|
||||
}
|
||||
|
||||
let feed_id = format!("feed-{}", Base62Uuid::from(feed.feed_id));
|
||||
let feed_stream = format!("/feed/{}/stream", Base62Uuid::from(feed.feed_id));
|
||||
Ok((
|
||||
StatusCode::CREATED,
|
||||
TurboStream(
|
||||
html! {
|
||||
turbo-stream-source src=(feed_stream) id="feed-stream" {}
|
||||
turbo-stream action="append" target="feeds" {
|
||||
template {
|
||||
li { (feed_link(&feed)) }
|
||||
li id=(feed_id) { (feed_link(&feed, true)) }
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -136,10 +166,76 @@ pub async fn post(
|
||||
.into_response())
|
||||
}
|
||||
|
||||
pub async fn delete(
|
||||
State(pool): State<PgPool>,
|
||||
pub async fn stream(
|
||||
Path(id): Path<Base62Uuid>,
|
||||
) -> Result<Redirect> {
|
||||
State(crawls): State<Crawls>,
|
||||
) -> Result<impl IntoResponse> {
|
||||
let receiver = {
|
||||
let mut crawls = crawls.lock().expect("crawls lock poisoned");
|
||||
crawls.remove(&id.as_uuid())
|
||||
}
|
||||
.ok_or_else(|| Error::NotFound("feed stream", id.as_uuid()))?;
|
||||
|
||||
let stream = BroadcastStream::new(receiver);
|
||||
let feed_id = format!("feed-{}", id);
|
||||
let stream = stream.map(move |msg| match msg {
|
||||
Ok(FeedCrawlerHandleMessage::Feed(Ok(feed))) => Ok::<Event, String>(
|
||||
Event::default().data(
|
||||
html! {
|
||||
turbo-stream action="remove" target="feed-stream" {}
|
||||
turbo-stream action="replace" target=(feed_id) {
|
||||
template {
|
||||
li id=(feed_id) { (feed_link(&feed, false)) }
|
||||
}
|
||||
}
|
||||
}
|
||||
.into_string(),
|
||||
),
|
||||
),
|
||||
Ok(FeedCrawlerHandleMessage::Feed(Err(error))) => Ok(Event::default().data(
|
||||
html! {
|
||||
turbo-stream action="remove" target="feed-stream" {}
|
||||
turbo-stream action="replace" target=(feed_id) {
|
||||
template {
|
||||
li id=(feed_id) { span class="error" { (error) } }
|
||||
}
|
||||
}
|
||||
}
|
||||
.into_string(),
|
||||
)),
|
||||
// TODO: these Entry messages are not yet sent, need to handle them better
|
||||
Ok(FeedCrawlerHandleMessage::Entry(Ok(_))) => Ok(Event::default().data(
|
||||
html! {
|
||||
turbo-stream action="remove" target="feed-stream" {}
|
||||
turbo-stream action="replace" target=(feed_id) {
|
||||
template {
|
||||
li id=(feed_id) { "fetched entry" }
|
||||
}
|
||||
}
|
||||
}
|
||||
.into_string(),
|
||||
)),
|
||||
Ok(FeedCrawlerHandleMessage::Entry(Err(error))) => Ok(Event::default().data(
|
||||
html! {
|
||||
turbo-stream action="remove" target="feed-stream" {}
|
||||
turbo-stream action="replace" target=(feed_id) {
|
||||
template {
|
||||
li id=(feed_id) { span class="error" { (error) } }
|
||||
}
|
||||
}
|
||||
}
|
||||
.into_string(),
|
||||
)),
|
||||
Err(BroadcastStreamRecvError::Lagged(_)) => Ok(Event::default()),
|
||||
});
|
||||
Ok(Sse::new(stream).keep_alive(
|
||||
KeepAlive::new()
|
||||
.interval(Duration::from_secs(15))
|
||||
.text("keep-alive-text"),
|
||||
))
|
||||
}
|
||||
|
||||
pub async fn delete(State(pool): State<PgPool>, Path(id): Path<Base62Uuid>) -> Result<Redirect> {
|
||||
delete_feed(&pool, id.as_uuid()).await?;
|
||||
Ok(Redirect::to("/feeds"))
|
||||
}
|
||||
|
||||
@@ -14,7 +14,7 @@ pub async fn get(State(pool): State<PgPool>, layout: Layout) -> Result<Response>
|
||||
div class="feeds" {
|
||||
ul id="feeds" {
|
||||
@for feed in feeds {
|
||||
li { (feed_link(&feed)) }
|
||||
li { (feed_link(&feed, false)) }
|
||||
}
|
||||
}
|
||||
div class="add-feed" {
|
||||
|
||||
@@ -4,6 +4,7 @@ use std::time::Duration;
|
||||
|
||||
use ansi_to_html::convert_escaped;
|
||||
use axum::extract::State;
|
||||
use axum::response::sse::KeepAlive;
|
||||
use axum::response::{
|
||||
sse::{Event, Sse},
|
||||
Response,
|
||||
@@ -44,7 +45,7 @@ pub async fn stream(
|
||||
))
|
||||
});
|
||||
Sse::new(log_stream).keep_alive(
|
||||
axum::response::sse::KeepAlive::new()
|
||||
KeepAlive::new()
|
||||
.interval(Duration::from_secs(15))
|
||||
.text("keep-alive-text"),
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user