Add an async actor FeedCrawler for fetching feed details

Currently, this allows the browser to subscribe to the response of the
asynchronous crawl after they add a new feed.

Eventually I will also use this in the main scheduled crawls. Right now,
it only upserts feed metadata based on the parsed feed.
This commit is contained in:
2023-07-09 21:18:19 -04:00
parent 8f4db1d8d9
commit f13c7e5e70
14 changed files with 405 additions and 58 deletions

View File

@@ -1,6 +1,9 @@
use std::time::Duration;
use axum::extract::{Path, State};
use axum::http::StatusCode;
use axum::response::{IntoResponse, Response, Redirect};
use axum::response::sse::{Event, KeepAlive};
use axum::response::{IntoResponse, Redirect, Response, Sse};
use axum::Form;
use feed_rs::parser;
use maud::html;
@@ -8,13 +11,19 @@ use reqwest::Client;
use serde::Deserialize;
use serde_with::{serde_as, NoneAsEmptyString};
use sqlx::PgPool;
use tokio_stream::wrappers::errors::BroadcastStreamRecvError;
use tokio_stream::wrappers::BroadcastStream;
use tokio_stream::StreamExt;
use url::Url;
use crate::actors::feed_crawler::{FeedCrawlerHandle, FeedCrawlerHandleMessage};
use crate::error::{Error, Result};
use crate::models::entry::get_entries_for_feed;
use crate::models::feed::{create_feed, get_feed, CreateFeed, delete_feed};
use crate::models::feed::{create_feed, delete_feed, get_feed, CreateFeed, FeedType};
use crate::partials::{entry_list::entry_list, feed_link::feed_link, layout::Layout};
use crate::uuid::Base62Uuid;
use crate::state::Crawls;
use crate::turbo_stream::TurboStream;
use crate::uuid::Base62Uuid;
pub async fn get(
Path(id): Path<Base62Uuid>,
@@ -51,12 +60,16 @@ pub struct AddFeed {
#[derive(thiserror::Error, Debug)]
pub enum AddFeedError {
#[error("invalid feed url: {0}")]
InvalidUrl(String, #[source] url::ParseError),
#[error("failed to fetch feed: {0}")]
FetchError(String, #[source] reqwest::Error),
#[error("failed to parse feed: {0}")]
ParseError(String, #[source] parser::ParseFeedError),
#[error("failed to create feed: {0}")]
CreateFeedError(String, #[source] Error),
#[error("feed already exists: {0}")]
FeedAlreadyExists(String, #[source] Error),
}
pub type AddFeedResult<T, E = AddFeedError> = ::std::result::Result<T, E>;
@@ -65,7 +78,9 @@ impl AddFeedError {
use AddFeedError::*;
match self {
FetchError(..) | ParseError(..) => StatusCode::UNPROCESSABLE_ENTITY,
InvalidUrl(..) | FetchError(..) | ParseError(..) | FeedAlreadyExists(..) => {
StatusCode::UNPROCESSABLE_ENTITY
}
CreateFeedError(..) => StatusCode::INTERNAL_SERVER_ERROR,
}
}
@@ -92,41 +107,56 @@ impl IntoResponse for AddFeedError {
pub async fn post(
State(pool): State<PgPool>,
State(crawls): State<Crawls>,
Form(add_feed): Form<AddFeed>,
) -> AddFeedResult<Response> {
// TODO: store the client in axum state (as long as it can be used concurrently?)
let client = Client::new();
let bytes = client
.get(&add_feed.url)
.send()
.await
.map_err(|err| AddFeedError::FetchError(add_feed.url.clone(), err))?
.bytes()
.await
.map_err(|err| AddFeedError::FetchError(add_feed.url.clone(), err))?;
let parsed_feed = parser::parse(&bytes[..])
.map_err(|err| AddFeedError::ParseError(add_feed.url.clone(), err))?;
let feed_crawler = FeedCrawlerHandle::new(pool.clone(), client.clone());
let feed = create_feed(
&pool,
CreateFeed {
title: add_feed
.title
.map_or_else(|| parsed_feed.title.map(|text| text.content), Some),
title: add_feed.title,
url: add_feed.url.clone(),
feed_type: parsed_feed.feed_type.into(),
description: add_feed
.description
.map_or_else(|| parsed_feed.description.map(|text| text.content), Some),
feed_type: FeedType::Rss, // eh, get rid of this
description: add_feed.description,
},
)
.await
.map_err(|err| AddFeedError::CreateFeedError(add_feed.url.clone(), err))?;
.map_err(|err| {
if let Error::Sqlx(sqlx::error::Error::Database(db_error)) = &err {
if let Some(code) = db_error.code() {
if let Some(constraint) = db_error.constraint() {
if code == "23505" && constraint == "feed_url_idx" {
return AddFeedError::FeedAlreadyExists(add_feed.url.clone(), err);
}
}
}
}
AddFeedError::CreateFeedError(add_feed.url.clone(), err)
})?;
let url: Url = Url::parse(&add_feed.url)
.map_err(|err| AddFeedError::InvalidUrl(add_feed.url.clone(), err))?;
let receiver = feed_crawler.crawl(url).await;
{
let mut crawls = crawls.lock().map_err(|_| {
AddFeedError::CreateFeedError(add_feed.url.clone(), Error::InternalServerError)
})?;
crawls.insert(feed.feed_id, receiver);
}
let feed_id = format!("feed-{}", Base62Uuid::from(feed.feed_id));
let feed_stream = format!("/feed/{}/stream", Base62Uuid::from(feed.feed_id));
Ok((
StatusCode::CREATED,
TurboStream(
html! {
turbo-stream-source src=(feed_stream) id="feed-stream" {}
turbo-stream action="append" target="feeds" {
template {
li { (feed_link(&feed)) }
li id=(feed_id) { (feed_link(&feed, true)) }
}
}
}
@@ -136,10 +166,76 @@ pub async fn post(
.into_response())
}
pub async fn delete(
State(pool): State<PgPool>,
pub async fn stream(
Path(id): Path<Base62Uuid>,
) -> Result<Redirect> {
State(crawls): State<Crawls>,
) -> Result<impl IntoResponse> {
let receiver = {
let mut crawls = crawls.lock().expect("crawls lock poisoned");
crawls.remove(&id.as_uuid())
}
.ok_or_else(|| Error::NotFound("feed stream", id.as_uuid()))?;
let stream = BroadcastStream::new(receiver);
let feed_id = format!("feed-{}", id);
let stream = stream.map(move |msg| match msg {
Ok(FeedCrawlerHandleMessage::Feed(Ok(feed))) => Ok::<Event, String>(
Event::default().data(
html! {
turbo-stream action="remove" target="feed-stream" {}
turbo-stream action="replace" target=(feed_id) {
template {
li id=(feed_id) { (feed_link(&feed, false)) }
}
}
}
.into_string(),
),
),
Ok(FeedCrawlerHandleMessage::Feed(Err(error))) => Ok(Event::default().data(
html! {
turbo-stream action="remove" target="feed-stream" {}
turbo-stream action="replace" target=(feed_id) {
template {
li id=(feed_id) { span class="error" { (error) } }
}
}
}
.into_string(),
)),
// TODO: these Entry messages are not yet sent, need to handle them better
Ok(FeedCrawlerHandleMessage::Entry(Ok(_))) => Ok(Event::default().data(
html! {
turbo-stream action="remove" target="feed-stream" {}
turbo-stream action="replace" target=(feed_id) {
template {
li id=(feed_id) { "fetched entry" }
}
}
}
.into_string(),
)),
Ok(FeedCrawlerHandleMessage::Entry(Err(error))) => Ok(Event::default().data(
html! {
turbo-stream action="remove" target="feed-stream" {}
turbo-stream action="replace" target=(feed_id) {
template {
li id=(feed_id) { span class="error" { (error) } }
}
}
}
.into_string(),
)),
Err(BroadcastStreamRecvError::Lagged(_)) => Ok(Event::default()),
});
Ok(Sse::new(stream).keep_alive(
KeepAlive::new()
.interval(Duration::from_secs(15))
.text("keep-alive-text"),
))
}
pub async fn delete(State(pool): State<PgPool>, Path(id): Path<Base62Uuid>) -> Result<Redirect> {
delete_feed(&pool, id.as_uuid()).await?;
Ok(Redirect::to("/feeds"))
}

View File

@@ -14,7 +14,7 @@ pub async fn get(State(pool): State<PgPool>, layout: Layout) -> Result<Response>
div class="feeds" {
ul id="feeds" {
@for feed in feeds {
li { (feed_link(&feed)) }
li { (feed_link(&feed, false)) }
}
}
div class="add-feed" {

View File

@@ -4,6 +4,7 @@ use std::time::Duration;
use ansi_to_html::convert_escaped;
use axum::extract::State;
use axum::response::sse::KeepAlive;
use axum::response::{
sse::{Event, Sse},
Response,
@@ -44,7 +45,7 @@ pub async fn stream(
))
});
Sse::new(log_stream).keep_alive(
axum::response::sse::KeepAlive::new()
KeepAlive::new()
.interval(Duration::from_secs(15))
.text("keep-alive-text"),
)