Added importer actor, file upload still not working
This commit is contained in:
parent
e6a37703be
commit
eddf39b62e
97
Cargo.lock
generated
97
Cargo.lock
generated
@ -152,6 +152,7 @@ dependencies = [
|
|||||||
"matchit",
|
"matchit",
|
||||||
"memchr",
|
"memchr",
|
||||||
"mime",
|
"mime",
|
||||||
|
"multer",
|
||||||
"percent-encoding",
|
"percent-encoding",
|
||||||
"pin-project-lite",
|
"pin-project-lite",
|
||||||
"rustversion",
|
"rustversion",
|
||||||
@ -366,9 +367,11 @@ dependencies = [
|
|||||||
"clap",
|
"clap",
|
||||||
"dotenvy",
|
"dotenvy",
|
||||||
"feed-rs",
|
"feed-rs",
|
||||||
|
"futures",
|
||||||
"maud",
|
"maud",
|
||||||
"notify",
|
"notify",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
|
"opml",
|
||||||
"readability",
|
"readability",
|
||||||
"reqwest",
|
"reqwest",
|
||||||
"serde",
|
"serde",
|
||||||
@ -712,6 +715,21 @@ dependencies = [
|
|||||||
"new_debug_unreachable",
|
"new_debug_unreachable",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "futures"
|
||||||
|
version = "0.3.28"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "23342abe12aba583913b2e62f22225ff9c950774065e4bfb61a19cd9770fec40"
|
||||||
|
dependencies = [
|
||||||
|
"futures-channel",
|
||||||
|
"futures-core",
|
||||||
|
"futures-executor",
|
||||||
|
"futures-io",
|
||||||
|
"futures-sink",
|
||||||
|
"futures-task",
|
||||||
|
"futures-util",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-channel"
|
name = "futures-channel"
|
||||||
version = "0.3.28"
|
version = "0.3.28"
|
||||||
@ -756,6 +774,17 @@ version = "0.3.28"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "4fff74096e71ed47f8e023204cfd0aa1289cd54ae5430a9523be060cdb849964"
|
checksum = "4fff74096e71ed47f8e023204cfd0aa1289cd54ae5430a9523be060cdb849964"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "futures-macro"
|
||||||
|
version = "0.3.28"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn 2.0.15",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-sink"
|
name = "futures-sink"
|
||||||
version = "0.3.28"
|
version = "0.3.28"
|
||||||
@ -774,8 +803,10 @@ version = "0.3.28"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "26b01e40b772d54cf6c6d721c1d1abd0647a0106a12ecaa1c186273392a69533"
|
checksum = "26b01e40b772d54cf6c6d721c1d1abd0647a0106a12ecaa1c186273392a69533"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"futures-channel",
|
||||||
"futures-core",
|
"futures-core",
|
||||||
"futures-io",
|
"futures-io",
|
||||||
|
"futures-macro",
|
||||||
"futures-sink",
|
"futures-sink",
|
||||||
"futures-task",
|
"futures-task",
|
||||||
"memchr",
|
"memchr",
|
||||||
@ -835,6 +866,31 @@ dependencies = [
|
|||||||
"tracing",
|
"tracing",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "hard-xml"
|
||||||
|
version = "1.25.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9b64625899999650c63a9b40acb4fcf7afc06f8243755ca1018e6e45cd123e87"
|
||||||
|
dependencies = [
|
||||||
|
"hard-xml-derive",
|
||||||
|
"jetscii",
|
||||||
|
"lazy_static",
|
||||||
|
"memchr",
|
||||||
|
"xmlparser",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "hard-xml-derive"
|
||||||
|
version = "1.25.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5aa98463f92aab6df3d847e3535f7be2118702e79f56621709c0fceec51e615d"
|
||||||
|
dependencies = [
|
||||||
|
"bitflags 1.3.2",
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn 1.0.109",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "hashbrown"
|
name = "hashbrown"
|
||||||
version = "0.12.3"
|
version = "0.12.3"
|
||||||
@ -1158,6 +1214,12 @@ version = "1.0.6"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "453ad9f582a441959e5f0d088b02ce04cfe8d51a8eaf077f12ac6d3e94164ca6"
|
checksum = "453ad9f582a441959e5f0d088b02ce04cfe8d51a8eaf077f12ac6d3e94164ca6"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "jetscii"
|
||||||
|
version = "0.5.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "47f142fe24a9c9944451e8349de0a56af5f3e7226dc46f3ed4d4ecc0b85af75e"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "js-sys"
|
name = "js-sys"
|
||||||
version = "0.3.61"
|
version = "0.3.61"
|
||||||
@ -1379,6 +1441,24 @@ dependencies = [
|
|||||||
"windows-sys 0.45.0",
|
"windows-sys 0.45.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "multer"
|
||||||
|
version = "2.1.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "01acbdc23469fd8fe07ab135923371d5f5a422fbf9c522158677c8eb15bc51c2"
|
||||||
|
dependencies = [
|
||||||
|
"bytes",
|
||||||
|
"encoding_rs",
|
||||||
|
"futures-util",
|
||||||
|
"http",
|
||||||
|
"httparse",
|
||||||
|
"log",
|
||||||
|
"memchr",
|
||||||
|
"mime",
|
||||||
|
"spin 0.9.8",
|
||||||
|
"version_check",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "native-tls"
|
name = "native-tls"
|
||||||
version = "0.2.11"
|
version = "0.2.11"
|
||||||
@ -1549,6 +1629,17 @@ dependencies = [
|
|||||||
"vcpkg",
|
"vcpkg",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "opml"
|
||||||
|
version = "1.1.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "657e16a7677a52c9bcfca579d77c087bc4240644d7e5491b359bb76ed62c779d"
|
||||||
|
dependencies = [
|
||||||
|
"hard-xml",
|
||||||
|
"serde",
|
||||||
|
"thiserror",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "overload"
|
name = "overload"
|
||||||
version = "0.1.1"
|
version = "0.1.1"
|
||||||
@ -3343,6 +3434,12 @@ dependencies = [
|
|||||||
"time 0.1.45",
|
"time 0.1.45",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "xmlparser"
|
||||||
|
version = "0.13.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "4d25c75bf9ea12c4040a97f829154768bbbce366287e2dc044af160cd79a13fd"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "zeroize"
|
name = "zeroize"
|
||||||
version = "1.6.0"
|
version = "1.6.0"
|
||||||
|
@ -14,15 +14,17 @@ path = "src/lib.rs"
|
|||||||
[dependencies]
|
[dependencies]
|
||||||
ansi-to-html = "0.1"
|
ansi-to-html = "0.1"
|
||||||
anyhow = "1"
|
anyhow = "1"
|
||||||
axum = { version = "0.6", features = ["form"] }
|
axum = { version = "0.6", features = ["form", "multipart"] }
|
||||||
bytes = "1.4"
|
bytes = "1.4"
|
||||||
chrono = { version = "0.4", features = ["serde"] }
|
chrono = { version = "0.4", features = ["serde"] }
|
||||||
clap = { version = "4.3", features = ["derive", "env"] }
|
clap = { version = "4.3", features = ["derive", "env"] }
|
||||||
dotenvy = "0.15"
|
dotenvy = "0.15"
|
||||||
feed-rs = "1.3"
|
feed-rs = "1.3"
|
||||||
|
futures = "0.3"
|
||||||
maud = { version = "0.25", features = ["axum"] }
|
maud = { version = "0.25", features = ["axum"] }
|
||||||
notify = "6"
|
notify = "6"
|
||||||
once_cell = "1.17"
|
once_cell = "1.17"
|
||||||
|
opml = "1.1"
|
||||||
readability = "0.2"
|
readability = "0.2"
|
||||||
reqwest = { version = "0.11", features = ["json"] }
|
reqwest = { version = "0.11", features = ["json"] }
|
||||||
serde = { version = "1", features = ["derive"] }
|
serde = { version = "1", features = ["derive"] }
|
||||||
|
@ -122,33 +122,34 @@ div.add-feed {
|
|||||||
grid-area: 'add-feed';
|
grid-area: 'add-feed';
|
||||||
}
|
}
|
||||||
|
|
||||||
form.add-feed-form .form-grid {
|
form.feed-form .form-grid {
|
||||||
display: grid;
|
display: grid;
|
||||||
grid-template-columns: fit-content(100%) minmax(100px, 400px);
|
grid-template-columns: fit-content(100%) minmax(100px, 400px);
|
||||||
grid-gap: 16px;
|
grid-gap: 16px;
|
||||||
|
width: 100%;
|
||||||
|
margin-bottom: 32px;
|
||||||
}
|
}
|
||||||
|
|
||||||
form.add-feed-form .form-grid label {
|
form.feed-form .form-grid label {
|
||||||
font-size: 16px;
|
font-size: 16px;
|
||||||
font-weight: bold;
|
font-weight: bold;
|
||||||
grid-column: 1 / 2;
|
grid-column: 1 / 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
form.add-feed-form .form-grid input, form.add-feed-form .form-grid textarea {
|
form.feed-form .form-grid input, form.feed-form .form-grid textarea {
|
||||||
font-size: 14px;
|
font-size: 14px;
|
||||||
grid-column: 2 / 3;
|
grid-column: 2 / 3;
|
||||||
}
|
}
|
||||||
|
|
||||||
form.add-feed-form .form-grid textarea {
|
form.feed-form .form-grid textarea {
|
||||||
font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;
|
font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;
|
||||||
resize: vertical;
|
resize: vertical;
|
||||||
}
|
}
|
||||||
|
|
||||||
form.add-feed-form button {
|
form.feed-form .form-grid button {
|
||||||
font-size: 14px;
|
font-size: 14px;
|
||||||
margin-top: 24px;
|
|
||||||
padding: 4px 8px;
|
padding: 4px 8px;
|
||||||
float: right;
|
grid-column: 3 / 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Feed */
|
/* Feed */
|
||||||
|
201
src/actors/importer.rs
Normal file
201
src/actors/importer.rs
Normal file
@ -0,0 +1,201 @@
|
|||||||
|
use std::fmt::{self, Display, Formatter};
|
||||||
|
use std::io::Cursor;
|
||||||
|
|
||||||
|
use bytes::Bytes;
|
||||||
|
use futures::stream::FuturesUnordered;
|
||||||
|
use futures::StreamExt;
|
||||||
|
use opml::OPML;
|
||||||
|
use sqlx::PgPool;
|
||||||
|
use tokio::sync::{broadcast, mpsc};
|
||||||
|
use tracing::{debug, error, instrument};
|
||||||
|
|
||||||
|
use crate::actors::crawl_scheduler::{CrawlSchedulerHandle, CrawlSchedulerHandleMessage};
|
||||||
|
use crate::models::feed::{Feed, UpsertFeed};
|
||||||
|
use crate::uuid::Base62Uuid;
|
||||||
|
|
||||||
|
/// The `Importer` actor parses OPML bytes, loops through the document to find all feed URLs, then
|
||||||
|
/// creates a DB entry for each and initiates a new crawl if the feed is new.
|
||||||
|
///
|
||||||
|
/// It receives `ImporterMessage` messages via the `receiver` channel. It communicates back to
|
||||||
|
/// the sender of those messages via the `respond_to` channel on the `ImporterMessage`.
|
||||||
|
///
|
||||||
|
/// `Importer` should not be instantiated directly. Instead, use the `ImporterHandle`.
|
||||||
|
struct Importer {
|
||||||
|
receiver: mpsc::Receiver<ImporterMessage>,
|
||||||
|
pool: PgPool,
|
||||||
|
crawl_scheduler: CrawlSchedulerHandle,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
enum ImporterMessage {
|
||||||
|
Import {
|
||||||
|
import_id: Base62Uuid,
|
||||||
|
file_name: Option<String>,
|
||||||
|
bytes: Bytes,
|
||||||
|
respond_to: broadcast::Sender<ImporterHandleMessage>,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Display for ImporterMessage {
|
||||||
|
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
|
||||||
|
match self {
|
||||||
|
ImporterMessage::Import {
|
||||||
|
import_id, bytes, ..
|
||||||
|
} => write!(f, "Import({}: {} bytes)", import_id, bytes.len()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An error type that enumerates possible failures during a crawl and is cloneable and can be sent
|
||||||
|
/// across threads (does not reference the originating Errors which are usually not cloneable).
|
||||||
|
#[derive(thiserror::Error, Debug, Clone)]
|
||||||
|
pub enum ImporterError {
|
||||||
|
#[error("invalid OPML file: {0}")]
|
||||||
|
InvalidOPML(String),
|
||||||
|
#[error("failed to create feed: {0}")]
|
||||||
|
CreateFeedError(String),
|
||||||
|
}
|
||||||
|
pub type ImporterResult<T, E = ImporterError> = ::std::result::Result<T, E>;
|
||||||
|
|
||||||
|
impl Importer {
|
||||||
|
fn new(
|
||||||
|
receiver: mpsc::Receiver<ImporterMessage>,
|
||||||
|
pool: PgPool,
|
||||||
|
crawl_scheduler: CrawlSchedulerHandle,
|
||||||
|
) -> Self {
|
||||||
|
Importer {
|
||||||
|
receiver,
|
||||||
|
pool,
|
||||||
|
crawl_scheduler,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[instrument(skip_all, fields(import_id = %import_id, file_name = ?file_name))]
|
||||||
|
async fn import_opml(
|
||||||
|
&self,
|
||||||
|
import_id: Base62Uuid,
|
||||||
|
file_name: Option<String>,
|
||||||
|
bytes: Bytes,
|
||||||
|
respond_to: broadcast::Sender<ImporterHandleMessage>,
|
||||||
|
) -> ImporterResult<()> {
|
||||||
|
let document = OPML::from_reader(&mut Cursor::new(bytes))
|
||||||
|
.map_err(|_| ImporterError::InvalidOPML(file_name.unwrap_or(import_id.to_string())))?;
|
||||||
|
let mut receivers = Vec::new();
|
||||||
|
for url in Self::gather_feed_urls(document.body.outlines) {
|
||||||
|
let feed = Feed::upsert(
|
||||||
|
&self.pool,
|
||||||
|
UpsertFeed {
|
||||||
|
url: url.clone(),
|
||||||
|
..Default::default()
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.map_err(|_| ImporterError::CreateFeedError(url))?;
|
||||||
|
if feed.updated_at.is_some() {
|
||||||
|
receivers.push(self.crawl_scheduler.schedule(feed.feed_id).await);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut future_recvs: FuturesUnordered<_> =
|
||||||
|
receivers.iter_mut().map(|rx| rx.recv()).collect();
|
||||||
|
|
||||||
|
while let Some(result) = future_recvs.next().await {
|
||||||
|
if let Ok(crawl_scheduler_msg) = result {
|
||||||
|
let _ = respond_to.send(ImporterHandleMessage::CrawlScheduler(crawl_scheduler_msg));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn gather_feed_urls(outlines: Vec<opml::Outline>) -> Vec<String> {
|
||||||
|
let mut urls = Vec::new();
|
||||||
|
for outline in outlines.into_iter() {
|
||||||
|
if let Some(url) = outline.xml_url {
|
||||||
|
urls.push(url);
|
||||||
|
}
|
||||||
|
urls.append(&mut Self::gather_feed_urls(outline.outlines));
|
||||||
|
}
|
||||||
|
urls
|
||||||
|
}
|
||||||
|
|
||||||
|
#[instrument(skip_all, fields(msg = %msg))]
|
||||||
|
async fn handle_message(&mut self, msg: ImporterMessage) {
|
||||||
|
match msg {
|
||||||
|
ImporterMessage::Import {
|
||||||
|
import_id,
|
||||||
|
file_name,
|
||||||
|
bytes,
|
||||||
|
respond_to,
|
||||||
|
} => {
|
||||||
|
let result = self
|
||||||
|
.import_opml(import_id, file_name, bytes, respond_to.clone())
|
||||||
|
.await;
|
||||||
|
|
||||||
|
// ignore the result since the initiator may have cancelled waiting for the
|
||||||
|
// response, and that is ok
|
||||||
|
let _ = respond_to.send(ImporterHandleMessage::Import(result));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[instrument(skip_all)]
|
||||||
|
async fn run(&mut self) {
|
||||||
|
debug!("starting importer");
|
||||||
|
while let Some(msg) = self.receiver.recv().await {
|
||||||
|
self.handle_message(msg).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The `ImporterHandle` is used to initialize and communicate with a `Importer` actor.
|
||||||
|
///
|
||||||
|
/// The `Importer` actor parses OPML bytes, loops through the document to find all feed URLs, then
|
||||||
|
/// creates a DB entry for each and initiates a new crawl if the feed is new.
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct ImporterHandle {
|
||||||
|
sender: mpsc::Sender<ImporterMessage>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The `ImporterHandleMessage` is the response to a `ImporterMessage` sent to the
|
||||||
|
/// `ImporterHandle`.
|
||||||
|
///
|
||||||
|
/// `ImporterHandleMessage::Import` contains the result of importing the OPML file.
|
||||||
|
#[allow(clippy::large_enum_variant)]
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub enum ImporterHandleMessage {
|
||||||
|
// TODO: send stats of import or forward crawler messages?
|
||||||
|
Import(ImporterResult<()>),
|
||||||
|
CrawlScheduler(CrawlSchedulerHandleMessage),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ImporterHandle {
|
||||||
|
/// Creates an async actor task that will listen for messages on the `sender` channel.
|
||||||
|
pub fn new(pool: PgPool, crawl_scheduler: CrawlSchedulerHandle) -> Self {
|
||||||
|
let (sender, receiver) = mpsc::channel(8);
|
||||||
|
let mut importer = Importer::new(receiver, pool, crawl_scheduler);
|
||||||
|
tokio::spawn(async move { importer.run().await });
|
||||||
|
|
||||||
|
Self { sender }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Sends a `ImporterMessage::Import` message to the running `Importer` actor.
|
||||||
|
///
|
||||||
|
/// Listen to the result of the import via the returned `broadcast::Receiver`.
|
||||||
|
pub async fn import(
|
||||||
|
&self,
|
||||||
|
import_id: Base62Uuid,
|
||||||
|
file_name: Option<String>,
|
||||||
|
bytes: Bytes,
|
||||||
|
) -> broadcast::Receiver<ImporterHandleMessage> {
|
||||||
|
let (sender, receiver) = broadcast::channel(8);
|
||||||
|
let msg = ImporterMessage::Import {
|
||||||
|
import_id,
|
||||||
|
file_name,
|
||||||
|
bytes,
|
||||||
|
respond_to: sender,
|
||||||
|
};
|
||||||
|
|
||||||
|
self.sender.send(msg).await.expect("importer task has died");
|
||||||
|
receiver
|
||||||
|
}
|
||||||
|
}
|
@ -1,3 +1,4 @@
|
|||||||
pub mod crawl_scheduler;
|
pub mod crawl_scheduler;
|
||||||
pub mod entry_crawler;
|
pub mod entry_crawler;
|
||||||
pub mod feed_crawler;
|
pub mod feed_crawler;
|
||||||
|
pub mod importer;
|
||||||
|
10
src/error.rs
10
src/error.rs
@ -1,3 +1,4 @@
|
|||||||
|
use axum::extract::multipart::MultipartError;
|
||||||
use axum::http::StatusCode;
|
use axum::http::StatusCode;
|
||||||
use axum::response::{IntoResponse, Response};
|
use axum::response::{IntoResponse, Response};
|
||||||
use axum::Json;
|
use axum::Json;
|
||||||
@ -26,6 +27,12 @@ pub enum Error {
|
|||||||
#[error("validation error in request body")]
|
#[error("validation error in request body")]
|
||||||
InvalidEntity(#[from] ValidationErrors),
|
InvalidEntity(#[from] ValidationErrors),
|
||||||
|
|
||||||
|
#[error("error with file upload: (0)")]
|
||||||
|
Upload(#[from] MultipartError),
|
||||||
|
|
||||||
|
#[error("no file uploaded")]
|
||||||
|
NoFile,
|
||||||
|
|
||||||
#[error("{0}: {1} not found")]
|
#[error("{0}: {1} not found")]
|
||||||
NotFound(&'static str, Uuid),
|
NotFound(&'static str, Uuid),
|
||||||
|
|
||||||
@ -78,7 +85,8 @@ impl Error {
|
|||||||
InternalServerError | Sqlx(_) | Anyhow(_) | Reqwest(_) => {
|
InternalServerError | Sqlx(_) | Anyhow(_) | Reqwest(_) => {
|
||||||
StatusCode::INTERNAL_SERVER_ERROR
|
StatusCode::INTERNAL_SERVER_ERROR
|
||||||
}
|
}
|
||||||
InvalidEntity(_) | RelationNotFound(_) => StatusCode::UNPROCESSABLE_ENTITY,
|
InvalidEntity(_) | RelationNotFound(_) | NoFile => StatusCode::UNPROCESSABLE_ENTITY,
|
||||||
|
Upload(err) => err.status(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -202,7 +202,6 @@ pub async fn stream(
|
|||||||
Ok(CrawlSchedulerHandleMessage::FeedCrawler(FeedCrawlerHandleMessage::Entry(Ok(_)))) => {
|
Ok(CrawlSchedulerHandleMessage::FeedCrawler(FeedCrawlerHandleMessage::Entry(Ok(_)))) => {
|
||||||
Ok(Event::default().data(
|
Ok(Event::default().data(
|
||||||
html! {
|
html! {
|
||||||
turbo-stream action="remove" target="feed-stream" {}
|
|
||||||
turbo-stream action="replace" target=(feed_id) {
|
turbo-stream action="replace" target=(feed_id) {
|
||||||
template {
|
template {
|
||||||
li id=(feed_id) { "fetched entry" }
|
li id=(feed_id) { "fetched entry" }
|
||||||
@ -216,7 +215,6 @@ pub async fn stream(
|
|||||||
error,
|
error,
|
||||||
)))) => Ok(Event::default().data(
|
)))) => Ok(Event::default().data(
|
||||||
html! {
|
html! {
|
||||||
turbo-stream action="remove" target="feed-stream" {}
|
|
||||||
turbo-stream action="replace" target=(feed_id) {
|
turbo-stream action="replace" target=(feed_id) {
|
||||||
template {
|
template {
|
||||||
li id=(feed_id) { span class="error" { (error) } }
|
li id=(feed_id) { span class="error" { (error) } }
|
||||||
|
@ -31,16 +31,19 @@ pub async fn get(State(pool): State<PgPool>, layout: Layout) -> Result<Response>
|
|||||||
}
|
}
|
||||||
div class="add-feed" {
|
div class="add-feed" {
|
||||||
h3 { "Add Feed" }
|
h3 { "Add Feed" }
|
||||||
form action="/feed" method="post" class="add-feed-form" {
|
form action="/feed" method="post" class="feed-form" {
|
||||||
div class="form-grid" {
|
div class="form-grid" {
|
||||||
label for="url" { "URL (required): " }
|
label for="url" { "URL: " }
|
||||||
input type="text" id="url" name="url" placeholder="https://example.com/feed.xml" required="true";
|
input type="text" id="url" name="url" placeholder="https://example.com/feed.xml" required="true";
|
||||||
label for="title" { "Title: " }
|
button type="submit" { "Add Feed" }
|
||||||
input type="text" id="title" name="title" placeholder="Feed title";
|
}
|
||||||
label { "Description: " }
|
}
|
||||||
textarea id="description" name="description" placeholder="Feed description" {}
|
form action="/import/opml" method="post" enctype="mulipart/form-data" class="feed-form" {
|
||||||
|
div class="form-grid" {
|
||||||
|
label for="opml" { "OPML: " }
|
||||||
|
input type="file" id="opml" name="opml" required="true" accept="text/x-opml,application/xml,text/xml";
|
||||||
|
button type="submit" { "Import Feeds" }
|
||||||
}
|
}
|
||||||
button type="submit" { "Add Feed" }
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
134
src/handlers/import.rs
Normal file
134
src/handlers/import.rs
Normal file
@ -0,0 +1,134 @@
|
|||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
|
use axum::extract::{Multipart, Path, State};
|
||||||
|
use axum::http::StatusCode;
|
||||||
|
use axum::response::sse::{Event, KeepAlive};
|
||||||
|
use axum::response::{IntoResponse, Response, Sse};
|
||||||
|
use maud::html;
|
||||||
|
use tokio_stream::wrappers::BroadcastStream;
|
||||||
|
use tokio_stream::StreamExt;
|
||||||
|
|
||||||
|
use crate::actors::crawl_scheduler::CrawlSchedulerHandleMessage;
|
||||||
|
use crate::actors::feed_crawler::FeedCrawlerHandleMessage;
|
||||||
|
use crate::actors::importer::{ImporterHandle, ImporterHandleMessage};
|
||||||
|
use crate::error::{Error, Result};
|
||||||
|
use crate::partials::feed_link::feed_link;
|
||||||
|
use crate::state::Imports;
|
||||||
|
use crate::turbo_stream::TurboStream;
|
||||||
|
use crate::uuid::Base62Uuid;
|
||||||
|
|
||||||
|
pub async fn opml(
|
||||||
|
State(imports): State<Imports>,
|
||||||
|
State(importer): State<ImporterHandle>,
|
||||||
|
mut multipart: Multipart,
|
||||||
|
) -> Result<Response> {
|
||||||
|
dbg!("opml handler");
|
||||||
|
if let Some(field) = multipart.next_field().await.map_err(|err| { dbg!(&err); err })? {
|
||||||
|
let import_id = Base62Uuid::new();
|
||||||
|
dbg!(&import_id);
|
||||||
|
let file_name = field.file_name().map(|s| s.to_string());
|
||||||
|
dbg!(&file_name);
|
||||||
|
let bytes = field.bytes().await?;
|
||||||
|
dbg!(&bytes.len());
|
||||||
|
let receiver = importer.import(import_id, file_name, bytes).await;
|
||||||
|
{
|
||||||
|
let mut imports = imports.lock().await;
|
||||||
|
imports.insert(import_id.as_uuid(), receiver);
|
||||||
|
}
|
||||||
|
|
||||||
|
let import_html_id = format!("import-{}", import_id);
|
||||||
|
let import_stream = format!("/import/{}/stream", import_id);
|
||||||
|
return Ok((
|
||||||
|
StatusCode::CREATED,
|
||||||
|
TurboStream(
|
||||||
|
html! {
|
||||||
|
turbo-stream-source src=(import_stream) id="import-stream" {}
|
||||||
|
turbo-stream action="append" target="feeds" {
|
||||||
|
template {
|
||||||
|
li id=(import_html_id) { "Importing..." }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
turbo-stream action="remove" target="no-feeds";
|
||||||
|
}
|
||||||
|
.into_string(),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
.into_response());
|
||||||
|
}
|
||||||
|
dbg!("no file");
|
||||||
|
Err(Error::NoFile)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn stream(
|
||||||
|
Path(id): Path<Base62Uuid>,
|
||||||
|
State(imports): State<Imports>,
|
||||||
|
) -> Result<impl IntoResponse> {
|
||||||
|
let receiver = {
|
||||||
|
let mut imports = imports.lock().await;
|
||||||
|
imports.remove(&id.as_uuid())
|
||||||
|
}
|
||||||
|
.ok_or_else(|| Error::NotFound("import stream", id.as_uuid()))?;
|
||||||
|
|
||||||
|
let stream = BroadcastStream::new(receiver);
|
||||||
|
let import_html_id = format!("import-{}", id);
|
||||||
|
let stream = stream.map(move |msg| match msg {
|
||||||
|
Ok(ImporterHandleMessage::Import(Ok(_))) => Ok::<Event, String>(
|
||||||
|
Event::default().data(
|
||||||
|
html! {
|
||||||
|
turbo-stream action="remove" target="import-stream" {}
|
||||||
|
turbo-stream action="replace" target=(import_html_id) {
|
||||||
|
template {
|
||||||
|
li id=(import_html_id) { "Done importing" }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
.into_string(),
|
||||||
|
),
|
||||||
|
),
|
||||||
|
Ok(ImporterHandleMessage::CrawlScheduler(CrawlSchedulerHandleMessage::FeedCrawler(
|
||||||
|
FeedCrawlerHandleMessage::Feed(Ok(feed)),
|
||||||
|
))) => Ok::<Event, String>(
|
||||||
|
Event::default().data(
|
||||||
|
html! {
|
||||||
|
turbo-stream action="prepend" target="feeds" {
|
||||||
|
template {
|
||||||
|
li id=(format!("feed-{}", feed.feed_id)) { (feed_link(&feed, false)) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
.into_string(),
|
||||||
|
),
|
||||||
|
),
|
||||||
|
Ok(ImporterHandleMessage::CrawlScheduler(CrawlSchedulerHandleMessage::FeedCrawler(
|
||||||
|
FeedCrawlerHandleMessage::Feed(Err(error)),
|
||||||
|
))) => Ok::<Event, String>(
|
||||||
|
Event::default().data(
|
||||||
|
html! {
|
||||||
|
turbo-stream action="prepend" target="feeds" {
|
||||||
|
template {
|
||||||
|
li { span class="error" { (error) } }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
.into_string(),
|
||||||
|
),
|
||||||
|
),
|
||||||
|
Ok(ImporterHandleMessage::Import(Err(error))) => Ok(Event::default().data(
|
||||||
|
html! {
|
||||||
|
turbo-stream action="remove" target="import-stream" {}
|
||||||
|
turbo-stream action="replace" target=(import_html_id) {
|
||||||
|
template {
|
||||||
|
li id=(import_html_id) { span class="error" { (error) } }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
.into_string(),
|
||||||
|
)),
|
||||||
|
_ => Ok(Event::default()),
|
||||||
|
});
|
||||||
|
Ok(Sse::new(stream).keep_alive(
|
||||||
|
KeepAlive::new()
|
||||||
|
.interval(Duration::from_secs(15))
|
||||||
|
.text("keep-alive-text"),
|
||||||
|
))
|
||||||
|
}
|
@ -1,6 +1,7 @@
|
|||||||
pub mod api;
|
pub mod api;
|
||||||
pub mod entry;
|
pub mod entry;
|
||||||
pub mod home;
|
pub mod home;
|
||||||
|
pub mod import;
|
||||||
pub mod feed;
|
pub mod feed;
|
||||||
pub mod feeds;
|
pub mod feeds;
|
||||||
pub mod log;
|
pub mod log;
|
||||||
|
10
src/main.rs
10
src/main.rs
@ -24,6 +24,7 @@ use tower_livereload::LiveReloadLayer;
|
|||||||
use tracing::debug;
|
use tracing::debug;
|
||||||
|
|
||||||
use lib::actors::crawl_scheduler::CrawlSchedulerHandle;
|
use lib::actors::crawl_scheduler::CrawlSchedulerHandle;
|
||||||
|
use lib::actors::importer::ImporterHandle;
|
||||||
use lib::config::Config;
|
use lib::config::Config;
|
||||||
use lib::domain_locks::DomainLocks;
|
use lib::domain_locks::DomainLocks;
|
||||||
use lib::handlers;
|
use lib::handlers;
|
||||||
@ -49,6 +50,7 @@ async fn main() -> Result<()> {
|
|||||||
let _guards = init_tracing(&config, log_sender)?;
|
let _guards = init_tracing(&config, log_sender)?;
|
||||||
|
|
||||||
let crawls = Arc::new(Mutex::new(HashMap::new()));
|
let crawls = Arc::new(Mutex::new(HashMap::new()));
|
||||||
|
let imports = Arc::new(Mutex::new(HashMap::new()));
|
||||||
let domain_locks = DomainLocks::new();
|
let domain_locks = DomainLocks::new();
|
||||||
let client = Client::builder().user_agent(USER_AGENT).build()?;
|
let client = Client::builder().user_agent(USER_AGENT).build()?;
|
||||||
|
|
||||||
@ -66,6 +68,10 @@ async fn main() -> Result<()> {
|
|||||||
config.content_dir.clone(),
|
config.content_dir.clone(),
|
||||||
);
|
);
|
||||||
let _ = crawl_scheduler.bootstrap().await;
|
let _ = crawl_scheduler.bootstrap().await;
|
||||||
|
let importer = ImporterHandle::new(
|
||||||
|
pool.clone(),
|
||||||
|
crawl_scheduler.clone(),
|
||||||
|
);
|
||||||
|
|
||||||
let addr = format!("{}:{}", &config.host, &config.port).parse()?;
|
let addr = format!("{}:{}", &config.host, &config.port).parse()?;
|
||||||
let mut app = Router::new()
|
let mut app = Router::new()
|
||||||
@ -84,6 +90,8 @@ async fn main() -> Result<()> {
|
|||||||
.route("/entry/:id", get(handlers::entry::get))
|
.route("/entry/:id", get(handlers::entry::get))
|
||||||
.route("/log", get(handlers::log::get))
|
.route("/log", get(handlers::log::get))
|
||||||
.route("/log/stream", get(handlers::log::stream))
|
.route("/log/stream", get(handlers::log::stream))
|
||||||
|
.route("/import/opml", post(handlers::import::opml))
|
||||||
|
.route("/import/:id/stream", get(handlers::import::stream))
|
||||||
.nest_service("/static", ServeDir::new("static"))
|
.nest_service("/static", ServeDir::new("static"))
|
||||||
.with_state(AppState {
|
.with_state(AppState {
|
||||||
pool,
|
pool,
|
||||||
@ -93,6 +101,8 @@ async fn main() -> Result<()> {
|
|||||||
domain_locks,
|
domain_locks,
|
||||||
client,
|
client,
|
||||||
crawl_scheduler,
|
crawl_scheduler,
|
||||||
|
importer,
|
||||||
|
imports,
|
||||||
})
|
})
|
||||||
.layer(ServiceBuilder::new().layer(TraceLayer::new_for_http()));
|
.layer(ServiceBuilder::new().layer(TraceLayer::new_for_http()));
|
||||||
|
|
||||||
|
@ -81,7 +81,7 @@ pub struct CreateFeed {
|
|||||||
pub description: Option<String>,
|
pub description: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Deserialize, Validate)]
|
#[derive(Debug, Deserialize, Default, Validate)]
|
||||||
pub struct UpsertFeed {
|
pub struct UpsertFeed {
|
||||||
#[validate(length(max = 255))]
|
#[validate(length(max = 255))]
|
||||||
pub title: Option<String>,
|
pub title: Option<String>,
|
||||||
|
32
src/state.rs
32
src/state.rs
@ -9,6 +9,7 @@ use reqwest::Client;
|
|||||||
use sqlx::PgPool;
|
use sqlx::PgPool;
|
||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
|
|
||||||
|
use crate::actors::importer::{ImporterHandle, ImporterHandleMessage};
|
||||||
use crate::actors::crawl_scheduler::{CrawlSchedulerHandle, CrawlSchedulerHandleMessage};
|
use crate::actors::crawl_scheduler::{CrawlSchedulerHandle, CrawlSchedulerHandleMessage};
|
||||||
use crate::config::Config;
|
use crate::config::Config;
|
||||||
use crate::domain_locks::DomainLocks;
|
use crate::domain_locks::DomainLocks;
|
||||||
@ -23,8 +24,25 @@ use crate::domain_locks::DomainLocks;
|
|||||||
/// This map should only contain crawls that have just been created but not yet subscribed to.
|
/// This map should only contain crawls that have just been created but not yet subscribed to.
|
||||||
/// Entries are only added when a user adds a feed in the UI and entries are removed by the same
|
/// Entries are only added when a user adds a feed in the UI and entries are removed by the same
|
||||||
/// user once a server-sent event connection is established.
|
/// user once a server-sent event connection is established.
|
||||||
|
///
|
||||||
|
/// TODO: remove the entries in the CrawlScheduler once the crawl is complete if the user never
|
||||||
|
/// requested the stream to remove it themselves.
|
||||||
pub type Crawls = Arc<Mutex<HashMap<Uuid, broadcast::Receiver<CrawlSchedulerHandleMessage>>>>;
|
pub type Crawls = Arc<Mutex<HashMap<Uuid, broadcast::Receiver<CrawlSchedulerHandleMessage>>>>;
|
||||||
|
|
||||||
|
/// A map of unique import IDs to a channel receiver for the active `Importer` running that import.
|
||||||
|
///
|
||||||
|
/// Same as the `Crawls` map, the only purpose of this is to keep track of active imports so that
|
||||||
|
/// axum handlers can subscribe to the result of the import via the receiver channel which are then
|
||||||
|
/// sent to end-users as a stream of server-sent events.
|
||||||
|
///
|
||||||
|
/// This map should only contain imports that have just been created but not yet subscribed to.
|
||||||
|
/// Entries are only added when a user adds uploads an OPML to import and entries are removed by
|
||||||
|
/// the same user once a server-sent event connection is established.
|
||||||
|
///
|
||||||
|
/// TODO: remove the entries in the Importer once the crawl is complete if the user never requested
|
||||||
|
/// the stream to remove it themselves.
|
||||||
|
pub type Imports = Arc<Mutex<HashMap<Uuid, broadcast::Receiver<ImporterHandleMessage>>>>;
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct AppState {
|
pub struct AppState {
|
||||||
pub pool: PgPool,
|
pub pool: PgPool,
|
||||||
@ -34,6 +52,8 @@ pub struct AppState {
|
|||||||
pub domain_locks: DomainLocks,
|
pub domain_locks: DomainLocks,
|
||||||
pub client: Client,
|
pub client: Client,
|
||||||
pub crawl_scheduler: CrawlSchedulerHandle,
|
pub crawl_scheduler: CrawlSchedulerHandle,
|
||||||
|
pub importer: ImporterHandle,
|
||||||
|
pub imports: Imports,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl FromRef<AppState> for PgPool {
|
impl FromRef<AppState> for PgPool {
|
||||||
@ -77,3 +97,15 @@ impl FromRef<AppState> for CrawlSchedulerHandle {
|
|||||||
state.crawl_scheduler.clone()
|
state.crawl_scheduler.clone()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl FromRef<AppState> for ImporterHandle {
|
||||||
|
fn from_ref(state: &AppState) -> Self {
|
||||||
|
state.importer.clone()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FromRef<AppState> for Imports {
|
||||||
|
fn from_ref(state: &AppState) -> Self {
|
||||||
|
state.imports.clone()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user