Add crawl metadata to feed & improve model interface
This commit is contained in:
parent
0fa0cfc508
commit
4837cbb903
@ -88,8 +88,3 @@ You can also build the binary in release mode for running in production with the
|
|||||||
This project also comes with a CLI binary which allows you to manipulate the
|
This project also comes with a CLI binary which allows you to manipulate the
|
||||||
database directly without needing to go through the REST API server. Run
|
database directly without needing to go through the REST API server. Run
|
||||||
`cli --help` to see all of the available commands.
|
`cli --help` to see all of the available commands.
|
||||||
|
|
||||||
## Running jobs
|
|
||||||
|
|
||||||
To periodically fetch new items from all of the feeds execute the `cli crawl`
|
|
||||||
command in a cronjob.
|
|
||||||
|
@ -31,14 +31,18 @@ $$ language plpgsql;
|
|||||||
-- over things like usernames and emails, ithout needing to remember to do case-conversion.
|
-- over things like usernames and emails, ithout needing to remember to do case-conversion.
|
||||||
create collation case_insensitive (provider = icu, locale = 'und-u-ks-level2', deterministic = false);
|
create collation case_insensitive (provider = icu, locale = 'und-u-ks-level2', deterministic = false);
|
||||||
|
|
||||||
create type feed_type as enum ('atom', 'rss');
|
create type feed_type as enum ('atom', 'json', 'rss0', 'rss1', 'rss2', 'unknown');
|
||||||
|
|
||||||
create table if not exists "feed" (
|
create table if not exists "feed" (
|
||||||
feed_id uuid primary key default uuid_generate_v1mc(),
|
feed_id uuid primary key default uuid_generate_v1mc(),
|
||||||
title text,
|
title text,
|
||||||
url varchar(2048) not null,
|
url varchar(2048) not null,
|
||||||
type feed_type not null,
|
type feed_type not null default 'unknown',
|
||||||
description text,
|
description text default null,
|
||||||
|
crawl_interval_minutes int not null default 180,
|
||||||
|
last_crawl_error text default null,
|
||||||
|
last_crawled_at timestamptz default null,
|
||||||
|
last_entry_published_at timestamptz default null,
|
||||||
created_at timestamptz not null default now(),
|
created_at timestamptz not null default now(),
|
||||||
updated_at timestamptz,
|
updated_at timestamptz,
|
||||||
deleted_at timestamptz
|
deleted_at timestamptz
|
||||||
|
@ -8,11 +8,13 @@ use tokio::sync::{broadcast, mpsc};
|
|||||||
use tracing::log::warn;
|
use tracing::log::warn;
|
||||||
use tracing::{info, info_span, instrument};
|
use tracing::{info, info_span, instrument};
|
||||||
use url::Url;
|
use url::Url;
|
||||||
|
use uuid::Uuid;
|
||||||
|
|
||||||
use crate::actors::entry_crawler::EntryCrawlerHandle;
|
use crate::actors::entry_crawler::EntryCrawlerHandle;
|
||||||
use crate::domain_locks::DomainLocks;
|
use crate::domain_locks::DomainLocks;
|
||||||
use crate::models::entry::{upsert_entries, CreateEntry, Entry};
|
use crate::models::entry::{CreateEntry, Entry};
|
||||||
use crate::models::feed::{upsert_feed, CreateFeed, Feed};
|
use crate::models::feed::Feed;
|
||||||
|
use crate::uuid::Base62Uuid;
|
||||||
|
|
||||||
/// The `FeedCrawler` actor fetches a feed url, parses it, and saves it to the database.
|
/// The `FeedCrawler` actor fetches a feed url, parses it, and saves it to the database.
|
||||||
///
|
///
|
||||||
@ -31,7 +33,7 @@ struct FeedCrawler {
|
|||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
enum FeedCrawlerMessage {
|
enum FeedCrawlerMessage {
|
||||||
Crawl {
|
Crawl {
|
||||||
url: Url,
|
feed_id: Uuid,
|
||||||
respond_to: broadcast::Sender<FeedCrawlerHandleMessage>,
|
respond_to: broadcast::Sender<FeedCrawlerHandleMessage>,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
@ -39,7 +41,7 @@ enum FeedCrawlerMessage {
|
|||||||
impl Display for FeedCrawlerMessage {
|
impl Display for FeedCrawlerMessage {
|
||||||
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
|
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
|
||||||
match self {
|
match self {
|
||||||
FeedCrawlerMessage::Crawl { url, .. } => write!(f, "Crawl({})", url),
|
FeedCrawlerMessage::Crawl { feed_id, .. } => write!(f, "Crawl({})", feed_id),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -49,11 +51,13 @@ impl Display for FeedCrawlerMessage {
|
|||||||
#[derive(thiserror::Error, Debug, Clone)]
|
#[derive(thiserror::Error, Debug, Clone)]
|
||||||
pub enum FeedCrawlerError {
|
pub enum FeedCrawlerError {
|
||||||
#[error("invalid feed url: {0}")]
|
#[error("invalid feed url: {0}")]
|
||||||
InvalidUrl(Url),
|
InvalidUrl(String),
|
||||||
#[error("failed to fetch feed: {0}")]
|
#[error("failed to fetch feed: {0}")]
|
||||||
FetchError(Url),
|
FetchError(Url),
|
||||||
#[error("failed to parse feed: {0}")]
|
#[error("failed to parse feed: {0}")]
|
||||||
ParseError(Url),
|
ParseError(Url),
|
||||||
|
#[error("failed to find feed in database: {0}")]
|
||||||
|
GetFeedError(Base62Uuid),
|
||||||
#[error("failed to create feed: {0}")]
|
#[error("failed to create feed: {0}")]
|
||||||
CreateFeedError(Url),
|
CreateFeedError(Url),
|
||||||
#[error("failed to create feed entries: {0}")]
|
#[error("failed to create feed entries: {0}")]
|
||||||
@ -78,11 +82,17 @@ impl FeedCrawler {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[instrument(skip_all, fields(url = %url))]
|
#[instrument(skip_all, fields(feed_id = %feed_id))]
|
||||||
async fn crawl_feed(&self, url: Url) -> FeedCrawlerResult<Feed> {
|
async fn crawl_feed(&self, feed_id: Uuid) -> FeedCrawlerResult<Feed> {
|
||||||
|
let mut feed = Feed::get(&self.pool, feed_id)
|
||||||
|
.await
|
||||||
|
.map_err(|_| FeedCrawlerError::GetFeedError(Base62Uuid::from(feed_id)))?;
|
||||||
|
info!("got feed from db");
|
||||||
|
let url = Url::parse(&feed.url)
|
||||||
|
.map_err(|_| FeedCrawlerError::InvalidUrl(feed.url.clone()))?;
|
||||||
let domain = url
|
let domain = url
|
||||||
.domain()
|
.domain()
|
||||||
.ok_or(FeedCrawlerError::InvalidUrl(url.clone()))?;
|
.ok_or(FeedCrawlerError::InvalidUrl(feed.url.clone()))?;
|
||||||
let bytes = self
|
let bytes = self
|
||||||
.domain_locks
|
.domain_locks
|
||||||
.run_request(domain, async {
|
.run_request(domain, async {
|
||||||
@ -96,22 +106,24 @@ impl FeedCrawler {
|
|||||||
.map_err(|_| FeedCrawlerError::FetchError(url.clone()))
|
.map_err(|_| FeedCrawlerError::FetchError(url.clone()))
|
||||||
})
|
})
|
||||||
.await?;
|
.await?;
|
||||||
info!("fetched feed");
|
info!(url=%url, "fetched feed");
|
||||||
let parsed_feed =
|
let parsed_feed =
|
||||||
parser::parse(&bytes[..]).map_err(|_| FeedCrawlerError::ParseError(url.clone()))?;
|
parser::parse(&bytes[..]).map_err(|_| FeedCrawlerError::ParseError(url.clone()))?;
|
||||||
info!("parsed feed");
|
info!("parsed feed");
|
||||||
let feed = upsert_feed(
|
feed.url = url.to_string();
|
||||||
&self.pool,
|
feed.feed_type = parsed_feed.feed_type.into();
|
||||||
CreateFeed {
|
feed.last_crawled_at = Some(Utc::now());
|
||||||
title: parsed_feed.title.map(|text| text.content),
|
if let Some(title) = parsed_feed.title {
|
||||||
url: url.to_string(),
|
feed.title = Some(title.content);
|
||||||
feed_type: parsed_feed.feed_type.into(),
|
}
|
||||||
description: parsed_feed.description.map(|text| text.content),
|
if let Some(description) = parsed_feed.description {
|
||||||
},
|
feed.description = Some(description.content);
|
||||||
)
|
}
|
||||||
|
let feed = feed
|
||||||
|
.save(&self.pool)
|
||||||
.await
|
.await
|
||||||
.map_err(|_| FeedCrawlerError::CreateFeedError(url.clone()))?;
|
.map_err(|_| FeedCrawlerError::CreateFeedError(url.clone()))?;
|
||||||
info!(%feed.feed_id, "upserted feed");
|
info!("updated feed in db");
|
||||||
|
|
||||||
let mut payload = Vec::with_capacity(parsed_feed.entries.len());
|
let mut payload = Vec::with_capacity(parsed_feed.entries.len());
|
||||||
for entry in parsed_feed.entries {
|
for entry in parsed_feed.entries {
|
||||||
@ -132,7 +144,7 @@ impl FeedCrawler {
|
|||||||
warn!("Skipping feed entry with no links");
|
warn!("Skipping feed entry with no links");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let entries = upsert_entries(&self.pool, payload)
|
let entries = Entry::bulk_upsert(&self.pool, payload)
|
||||||
.await
|
.await
|
||||||
.map_err(|_| FeedCrawlerError::CreateFeedEntriesError(url.clone()))?;
|
.map_err(|_| FeedCrawlerError::CreateFeedEntriesError(url.clone()))?;
|
||||||
let (new, updated) = entries
|
let (new, updated) = entries
|
||||||
@ -156,8 +168,11 @@ impl FeedCrawler {
|
|||||||
#[instrument(skip_all, fields(msg = %msg))]
|
#[instrument(skip_all, fields(msg = %msg))]
|
||||||
async fn handle_message(&mut self, msg: FeedCrawlerMessage) {
|
async fn handle_message(&mut self, msg: FeedCrawlerMessage) {
|
||||||
match msg {
|
match msg {
|
||||||
FeedCrawlerMessage::Crawl { url, respond_to } => {
|
FeedCrawlerMessage::Crawl {
|
||||||
let result = self.crawl_feed(url).await;
|
feed_id,
|
||||||
|
respond_to,
|
||||||
|
} => {
|
||||||
|
let result = self.crawl_feed(feed_id).await;
|
||||||
// ignore the result since the initiator may have cancelled waiting for the
|
// ignore the result since the initiator may have cancelled waiting for the
|
||||||
// response, and that is ok
|
// response, and that is ok
|
||||||
let _ = respond_to.send(FeedCrawlerHandleMessage::Feed(result));
|
let _ = respond_to.send(FeedCrawlerHandleMessage::Feed(result));
|
||||||
@ -212,10 +227,13 @@ impl FeedCrawlerHandle {
|
|||||||
/// Sends a `FeedCrawlerMessage::Crawl` message to the running `FeedCrawler` actor.
|
/// Sends a `FeedCrawlerMessage::Crawl` message to the running `FeedCrawler` actor.
|
||||||
///
|
///
|
||||||
/// Listen to the result of the crawl via the returned `broadcast::Receiver`.
|
/// Listen to the result of the crawl via the returned `broadcast::Receiver`.
|
||||||
pub async fn crawl(&self, url: Url) -> broadcast::Receiver<FeedCrawlerHandleMessage> {
|
pub async fn crawl(
|
||||||
|
&self,
|
||||||
|
feed_id: Uuid,
|
||||||
|
) -> broadcast::Receiver<FeedCrawlerHandleMessage> {
|
||||||
let (sender, receiver) = broadcast::channel(8);
|
let (sender, receiver) = broadcast::channel(8);
|
||||||
let msg = FeedCrawlerMessage::Crawl {
|
let msg = FeedCrawlerMessage::Crawl {
|
||||||
url,
|
feed_id,
|
||||||
respond_to: sender,
|
respond_to: sender,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -1,15 +1,17 @@
|
|||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use clap::{Args, Parser, Subcommand};
|
|
||||||
use chrono::Utc;
|
use chrono::Utc;
|
||||||
|
use clap::{Args, Parser, Subcommand};
|
||||||
use dotenvy::dotenv;
|
use dotenvy::dotenv;
|
||||||
|
use lib::actors::feed_crawler::FeedCrawlerHandle;
|
||||||
|
use lib::domain_locks::DomainLocks;
|
||||||
|
use reqwest::Client;
|
||||||
use sqlx::postgres::PgPoolOptions;
|
use sqlx::postgres::PgPoolOptions;
|
||||||
use std::env;
|
use std::env;
|
||||||
use tracing::info;
|
use tracing::info;
|
||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
|
|
||||||
use lib::jobs::crawl::crawl;
|
use lib::models::entry::{Entry, CreateEntry};
|
||||||
use lib::models::feed::{create_feed, delete_feed, CreateFeed, FeedType};
|
use lib::models::feed::{CreateFeed, Feed, FeedType};
|
||||||
use lib::models::entry::{create_entry, delete_entry, CreateEntry};
|
|
||||||
use lib::uuid::Base62Uuid;
|
use lib::uuid::Base62Uuid;
|
||||||
|
|
||||||
/// CLI for crawlnicle
|
/// CLI for crawlnicle
|
||||||
@ -23,14 +25,20 @@ struct Cli {
|
|||||||
|
|
||||||
#[derive(Subcommand)]
|
#[derive(Subcommand)]
|
||||||
enum Commands {
|
enum Commands {
|
||||||
/// Fetches new entries from all feeds in the database
|
Crawl(CrawlFeed),
|
||||||
Crawl,
|
|
||||||
AddFeed(AddFeed),
|
AddFeed(AddFeed),
|
||||||
DeleteFeed(DeleteFeed),
|
DeleteFeed(DeleteFeed),
|
||||||
AddEntry(AddEntry),
|
AddEntry(AddEntry),
|
||||||
DeleteEntry(DeleteEntry),
|
DeleteEntry(DeleteEntry),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Args)]
|
||||||
|
/// Crawl a feed (get new entries)
|
||||||
|
struct CrawlFeed {
|
||||||
|
/// id of the feed to crawl
|
||||||
|
id: Uuid,
|
||||||
|
}
|
||||||
|
|
||||||
/// Add a feed to the database
|
/// Add a feed to the database
|
||||||
#[derive(Args)]
|
#[derive(Args)]
|
||||||
struct AddFeed {
|
struct AddFeed {
|
||||||
@ -94,12 +102,11 @@ pub async fn main() -> Result<()> {
|
|||||||
|
|
||||||
match cli.commands {
|
match cli.commands {
|
||||||
Commands::AddFeed(args) => {
|
Commands::AddFeed(args) => {
|
||||||
let feed = create_feed(
|
let feed = Feed::create(
|
||||||
&pool,
|
&pool,
|
||||||
CreateFeed {
|
CreateFeed {
|
||||||
title: args.title,
|
title: args.title,
|
||||||
url: args.url,
|
url: args.url,
|
||||||
feed_type: args.feed_type,
|
|
||||||
description: args.description,
|
description: args.description,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
@ -107,11 +114,11 @@ pub async fn main() -> Result<()> {
|
|||||||
info!("Created feed with id {}", Base62Uuid::from(feed.feed_id));
|
info!("Created feed with id {}", Base62Uuid::from(feed.feed_id));
|
||||||
}
|
}
|
||||||
Commands::DeleteFeed(args) => {
|
Commands::DeleteFeed(args) => {
|
||||||
delete_feed(&pool, args.id).await?;
|
Feed::delete(&pool, args.id).await?;
|
||||||
info!("Deleted feed with id {}", Base62Uuid::from(args.id));
|
info!("Deleted feed with id {}", Base62Uuid::from(args.id));
|
||||||
}
|
}
|
||||||
Commands::AddEntry(args) => {
|
Commands::AddEntry(args) => {
|
||||||
let entry = create_entry(
|
let entry = Entry::create(
|
||||||
&pool,
|
&pool,
|
||||||
CreateEntry {
|
CreateEntry {
|
||||||
title: args.title,
|
title: args.title,
|
||||||
@ -125,12 +132,22 @@ pub async fn main() -> Result<()> {
|
|||||||
info!("Created entry with id {}", Base62Uuid::from(entry.entry_id));
|
info!("Created entry with id {}", Base62Uuid::from(entry.entry_id));
|
||||||
}
|
}
|
||||||
Commands::DeleteEntry(args) => {
|
Commands::DeleteEntry(args) => {
|
||||||
delete_entry(&pool, args.id).await?;
|
Entry::delete(&pool, args.id).await?;
|
||||||
info!("Deleted entry with id {}", Base62Uuid::from(args.id));
|
info!("Deleted entry with id {}", Base62Uuid::from(args.id));
|
||||||
}
|
}
|
||||||
Commands::Crawl => {
|
Commands::Crawl(CrawlFeed { id }) => {
|
||||||
info!("Crawling...");
|
info!("Crawling feed {}...", Base62Uuid::from(id));
|
||||||
crawl(&pool).await?;
|
let client = Client::new();
|
||||||
|
// NOTE: this is not the same DomainLocks as the one used in the server so, if the
|
||||||
|
// server is running, it will *not* serialize same-domain requests with it.
|
||||||
|
let domain_locks = DomainLocks::new();
|
||||||
|
let feed_crawler = FeedCrawlerHandle::new(
|
||||||
|
pool.clone(),
|
||||||
|
client.clone(),
|
||||||
|
domain_locks.clone(),
|
||||||
|
env::var("CONTENT_DIR")?,
|
||||||
|
);
|
||||||
|
let _ = feed_crawler.crawl(id).await;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2,8 +2,8 @@ use axum::{extract::State, Json};
|
|||||||
use sqlx::PgPool;
|
use sqlx::PgPool;
|
||||||
|
|
||||||
use crate::error::Error;
|
use crate::error::Error;
|
||||||
use crate::models::entry::{get_entries, Entry, GetEntriesOptions};
|
use crate::models::entry::Entry;
|
||||||
|
|
||||||
pub async fn get(State(pool): State<PgPool>) -> Result<Json<Vec<Entry>>, Error> {
|
pub async fn get(State(pool): State<PgPool>) -> Result<Json<Vec<Entry>>, Error> {
|
||||||
Ok(Json(get_entries(&pool, GetEntriesOptions::default()).await?))
|
Ok(Json(Entry::get_all(&pool, Default::default()).await?))
|
||||||
}
|
}
|
||||||
|
@ -5,19 +5,19 @@ use axum::{
|
|||||||
use sqlx::PgPool;
|
use sqlx::PgPool;
|
||||||
|
|
||||||
use crate::error::Error;
|
use crate::error::Error;
|
||||||
use crate::models::entry::{create_entry, get_entry, CreateEntry, Entry};
|
use crate::models::entry::{CreateEntry, Entry};
|
||||||
use crate::uuid::Base62Uuid;
|
use crate::uuid::Base62Uuid;
|
||||||
|
|
||||||
pub async fn get(
|
pub async fn get(
|
||||||
State(pool): State<PgPool>,
|
State(pool): State<PgPool>,
|
||||||
Path(id): Path<Base62Uuid>,
|
Path(id): Path<Base62Uuid>,
|
||||||
) -> Result<Json<Entry>, Error> {
|
) -> Result<Json<Entry>, Error> {
|
||||||
Ok(Json(get_entry(&pool, id.as_uuid()).await?))
|
Ok(Json(Entry::get(&pool, id.as_uuid()).await?))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn post(
|
pub async fn post(
|
||||||
State(pool): State<PgPool>,
|
State(pool): State<PgPool>,
|
||||||
Json(payload): Json<CreateEntry>,
|
Json(payload): Json<CreateEntry>,
|
||||||
) -> Result<Json<Entry>, Error> {
|
) -> Result<Json<Entry>, Error> {
|
||||||
Ok(Json(create_entry(&pool, payload).await?))
|
Ok(Json(Entry::create(&pool, payload).await?))
|
||||||
}
|
}
|
||||||
|
@ -5,20 +5,20 @@ use axum::{
|
|||||||
use sqlx::PgPool;
|
use sqlx::PgPool;
|
||||||
|
|
||||||
use crate::error::{Error, Result};
|
use crate::error::{Error, Result};
|
||||||
use crate::models::feed::{create_feed, delete_feed, get_feed, CreateFeed, Feed};
|
use crate::models::feed::{CreateFeed, Feed};
|
||||||
use crate::uuid::Base62Uuid;
|
use crate::uuid::Base62Uuid;
|
||||||
|
|
||||||
pub async fn get(State(pool): State<PgPool>, Path(id): Path<Base62Uuid>) -> Result<Json<Feed>> {
|
pub async fn get(State(pool): State<PgPool>, Path(id): Path<Base62Uuid>) -> Result<Json<Feed>> {
|
||||||
Ok(Json(get_feed(&pool, id.as_uuid()).await?))
|
Ok(Json(Feed::get(&pool, id.as_uuid()).await?))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn post(
|
pub async fn post(
|
||||||
State(pool): State<PgPool>,
|
State(pool): State<PgPool>,
|
||||||
Json(payload): Json<CreateFeed>,
|
Json(payload): Json<CreateFeed>,
|
||||||
) -> Result<Json<Feed>, Error> {
|
) -> Result<Json<Feed>, Error> {
|
||||||
Ok(Json(create_feed(&pool, payload).await?))
|
Ok(Json(Feed::create(&pool, payload).await?))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn delete(State(pool): State<PgPool>, Path(id): Path<Base62Uuid>) -> Result<()> {
|
pub async fn delete(State(pool): State<PgPool>, Path(id): Path<Base62Uuid>) -> Result<()> {
|
||||||
delete_feed(&pool, id.as_uuid()).await
|
Feed::delete(&pool, id.as_uuid()).await
|
||||||
}
|
}
|
||||||
|
@ -2,8 +2,9 @@ use axum::{extract::State, Json};
|
|||||||
use sqlx::PgPool;
|
use sqlx::PgPool;
|
||||||
|
|
||||||
use crate::error::Error;
|
use crate::error::Error;
|
||||||
use crate::models::feed::{get_feeds, Feed};
|
use crate::models::feed::Feed;
|
||||||
|
|
||||||
pub async fn get(State(pool): State<PgPool>) -> Result<Json<Vec<Feed>>, Error> {
|
pub async fn get(State(pool): State<PgPool>) -> Result<Json<Vec<Feed>>, Error> {
|
||||||
Ok(Json(get_feeds(&pool).await?))
|
// TODO: pagination
|
||||||
|
Ok(Json(Feed::get_all(&pool).await?))
|
||||||
}
|
}
|
||||||
|
@ -7,7 +7,7 @@ use sqlx::PgPool;
|
|||||||
|
|
||||||
use crate::config::Config;
|
use crate::config::Config;
|
||||||
use crate::error::Result;
|
use crate::error::Result;
|
||||||
use crate::models::entry::get_entry;
|
use crate::models::entry::Entry;
|
||||||
use crate::partials::layout::Layout;
|
use crate::partials::layout::Layout;
|
||||||
use crate::uuid::Base62Uuid;
|
use crate::uuid::Base62Uuid;
|
||||||
|
|
||||||
@ -17,7 +17,7 @@ pub async fn get(
|
|||||||
State(config): State<Config>,
|
State(config): State<Config>,
|
||||||
layout: Layout,
|
layout: Layout,
|
||||||
) -> Result<Response> {
|
) -> Result<Response> {
|
||||||
let entry = get_entry(&pool, id.as_uuid()).await?;
|
let entry = Entry::get(&pool, id.as_uuid()).await?;
|
||||||
let content_dir = std::path::Path::new(&config.content_dir);
|
let content_dir = std::path::Path::new(&config.content_dir);
|
||||||
let content_path = content_dir.join(format!("{}.html", entry.entry_id));
|
let content_path = content_dir.join(format!("{}.html", entry.entry_id));
|
||||||
Ok(layout.render(html! {
|
Ok(layout.render(html! {
|
||||||
|
@ -20,8 +20,8 @@ use crate::actors::feed_crawler::{FeedCrawlerHandle, FeedCrawlerHandleMessage};
|
|||||||
use crate::config::Config;
|
use crate::config::Config;
|
||||||
use crate::domain_locks::DomainLocks;
|
use crate::domain_locks::DomainLocks;
|
||||||
use crate::error::{Error, Result};
|
use crate::error::{Error, Result};
|
||||||
use crate::models::entry::get_entries_for_feed;
|
use crate::models::entry::Entry;
|
||||||
use crate::models::feed::{create_feed, delete_feed, get_feed, CreateFeed, FeedType};
|
use crate::models::feed::{CreateFeed, Feed};
|
||||||
use crate::partials::{entry_list::entry_list, feed_link::feed_link, layout::Layout};
|
use crate::partials::{entry_list::entry_list, feed_link::feed_link, layout::Layout};
|
||||||
use crate::state::Crawls;
|
use crate::state::Crawls;
|
||||||
use crate::turbo_stream::TurboStream;
|
use crate::turbo_stream::TurboStream;
|
||||||
@ -32,8 +32,8 @@ pub async fn get(
|
|||||||
State(pool): State<PgPool>,
|
State(pool): State<PgPool>,
|
||||||
layout: Layout,
|
layout: Layout,
|
||||||
) -> Result<Response> {
|
) -> Result<Response> {
|
||||||
let feed = get_feed(&pool, id.as_uuid()).await?;
|
let feed = Feed::get(&pool, id.as_uuid()).await?;
|
||||||
let entries = get_entries_for_feed(&pool, feed.feed_id, Default::default()).await?;
|
let entries = Entry::get_all_for_feed(&pool, feed.feed_id, Default::default()).await?;
|
||||||
let delete_url = format!("/feed/{}/delete", id);
|
let delete_url = format!("/feed/{}/delete", id);
|
||||||
Ok(layout.render(html! {
|
Ok(layout.render(html! {
|
||||||
header class="feed-header" {
|
header class="feed-header" {
|
||||||
@ -123,12 +123,11 @@ pub async fn post(
|
|||||||
config.content_dir.clone(),
|
config.content_dir.clone(),
|
||||||
);
|
);
|
||||||
|
|
||||||
let feed = create_feed(
|
let feed = Feed::create(
|
||||||
&pool,
|
&pool,
|
||||||
CreateFeed {
|
CreateFeed {
|
||||||
title: add_feed.title,
|
title: add_feed.title,
|
||||||
url: add_feed.url.clone(),
|
url: add_feed.url.clone(),
|
||||||
feed_type: FeedType::Rss, // eh, get rid of this
|
|
||||||
description: add_feed.description,
|
description: add_feed.description,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
@ -148,7 +147,7 @@ pub async fn post(
|
|||||||
|
|
||||||
let url: Url = Url::parse(&add_feed.url)
|
let url: Url = Url::parse(&add_feed.url)
|
||||||
.map_err(|err| AddFeedError::InvalidUrl(add_feed.url.clone(), err))?;
|
.map_err(|err| AddFeedError::InvalidUrl(add_feed.url.clone(), err))?;
|
||||||
let receiver = feed_crawler.crawl(url).await;
|
let receiver = feed_crawler.crawl(feed.feed_id).await;
|
||||||
{
|
{
|
||||||
let mut crawls = crawls.lock().map_err(|_| {
|
let mut crawls = crawls.lock().map_err(|_| {
|
||||||
AddFeedError::CreateFeedError(add_feed.url.clone(), Error::InternalServerError)
|
AddFeedError::CreateFeedError(add_feed.url.clone(), Error::InternalServerError)
|
||||||
@ -245,6 +244,6 @@ pub async fn stream(
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub async fn delete(State(pool): State<PgPool>, Path(id): Path<Base62Uuid>) -> Result<Redirect> {
|
pub async fn delete(State(pool): State<PgPool>, Path(id): Path<Base62Uuid>) -> Result<Redirect> {
|
||||||
delete_feed(&pool, id.as_uuid()).await?;
|
Feed::delete(&pool, id.as_uuid()).await?;
|
||||||
Ok(Redirect::to("/feeds"))
|
Ok(Redirect::to("/feeds"))
|
||||||
}
|
}
|
||||||
|
@ -4,11 +4,12 @@ use maud::html;
|
|||||||
use sqlx::PgPool;
|
use sqlx::PgPool;
|
||||||
|
|
||||||
use crate::error::Result;
|
use crate::error::Result;
|
||||||
use crate::models::feed::get_feeds;
|
use crate::models::feed::Feed;
|
||||||
use crate::partials::{feed_link::feed_link, layout::Layout};
|
use crate::partials::{feed_link::feed_link, layout::Layout};
|
||||||
|
|
||||||
pub async fn get(State(pool): State<PgPool>, layout: Layout) -> Result<Response> {
|
pub async fn get(State(pool): State<PgPool>, layout: Layout) -> Result<Response> {
|
||||||
let feeds = get_feeds(&pool).await?;
|
// TODO: pagination
|
||||||
|
let feeds = Feed::get_all(&pool).await?;
|
||||||
Ok(layout.render(html! {
|
Ok(layout.render(html! {
|
||||||
h2 { "Feeds" }
|
h2 { "Feeds" }
|
||||||
div class="feeds" {
|
div class="feeds" {
|
||||||
|
@ -3,10 +3,10 @@ use axum::response::Response;
|
|||||||
use sqlx::PgPool;
|
use sqlx::PgPool;
|
||||||
|
|
||||||
use crate::error::Result;
|
use crate::error::Result;
|
||||||
use crate::models::entry::{get_entries, GetEntriesOptions};
|
use crate::models::entry::Entry;
|
||||||
use crate::partials::{layout::Layout, entry_list::entry_list};
|
use crate::partials::{layout::Layout, entry_list::entry_list};
|
||||||
|
|
||||||
pub async fn get(State(pool): State<PgPool>, layout: Layout) -> Result<Response> {
|
pub async fn get(State(pool): State<PgPool>, layout: Layout) -> Result<Response> {
|
||||||
let entries = get_entries(&pool, GetEntriesOptions::default()).await?;
|
let entries = Entry::get_all(&pool, Default::default()).await?;
|
||||||
Ok(layout.render(entry_list(entries)))
|
Ok(layout.render(entry_list(entries)))
|
||||||
}
|
}
|
||||||
|
@ -1,77 +0,0 @@
|
|||||||
use std::fs;
|
|
||||||
use std::env;
|
|
||||||
use std::path::Path;
|
|
||||||
|
|
||||||
use article_scraper::ArticleScraper;
|
|
||||||
use chrono::Utc;
|
|
||||||
use feed_rs::parser;
|
|
||||||
use reqwest::{Client, Url};
|
|
||||||
use sqlx::PgPool;
|
|
||||||
use tracing::{info, info_span, warn};
|
|
||||||
|
|
||||||
use crate::models::feed::get_feeds;
|
|
||||||
use crate::models::entry::{update_entry, upsert_entries, CreateEntry};
|
|
||||||
use crate::uuid::Base62Uuid;
|
|
||||||
|
|
||||||
/// DEPRECATED: Use FeedCrawler instead, keeping this for reference until I set up scheduled jobs.
|
|
||||||
/// For every feed in the database, fetches the feed, parses it, and saves new entries to the
|
|
||||||
/// database.
|
|
||||||
pub async fn crawl(pool: &PgPool) -> anyhow::Result<()> {
|
|
||||||
let scraper = ArticleScraper::new(None).await;
|
|
||||||
let client = Client::new();
|
|
||||||
let content_dir = env::var("CONTENT_DIR")?;
|
|
||||||
let content_dir = Path::new(&content_dir);
|
|
||||||
let feeds = get_feeds(pool).await?;
|
|
||||||
for feed in feeds {
|
|
||||||
let feed_id_str: String = Base62Uuid::from(feed.feed_id).into();
|
|
||||||
let feed_span = info_span!("feed", id = feed_id_str, url = feed.url.as_str());
|
|
||||||
let _feed_span_guard = feed_span.enter();
|
|
||||||
info!("Fetching feed");
|
|
||||||
// TODO: handle these results
|
|
||||||
let bytes = client.get(feed.url).send().await?.bytes().await?;
|
|
||||||
info!("Parsing feed");
|
|
||||||
let parsed_feed = parser::parse(&bytes[..])?;
|
|
||||||
let mut payload = Vec::with_capacity(parsed_feed.entries.len());
|
|
||||||
for entry in parsed_feed.entries {
|
|
||||||
let entry_span = info_span!("entry", id = entry.id, title = entry.title.clone().map(|t| t.content));
|
|
||||||
let _entry_span_guard = entry_span.enter();
|
|
||||||
if let Some(link) = entry.links.get(0) {
|
|
||||||
// if no scraped or feed date is available, fallback to the current time
|
|
||||||
let published_at = entry.published.unwrap_or_else(Utc::now);
|
|
||||||
let entry = CreateEntry {
|
|
||||||
title: entry.title.map(|t| t.content),
|
|
||||||
url: link.href.clone(),
|
|
||||||
description: entry.summary.map(|s| s.content),
|
|
||||||
feed_id: feed.feed_id,
|
|
||||||
published_at,
|
|
||||||
};
|
|
||||||
payload.push(entry);
|
|
||||||
} else {
|
|
||||||
warn!("Skipping feed entry with no links");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
let entries = upsert_entries(pool, payload).await?;
|
|
||||||
info!("Created {} entries", entries.len());
|
|
||||||
|
|
||||||
// TODO: figure out how to do this in parallel. ArticleScraper uses some libxml thing that
|
|
||||||
// doesn't implement Send so this isn't trivial.
|
|
||||||
for mut entry in entries {
|
|
||||||
info!("Fetching and parsing entry link: {}", entry.url);
|
|
||||||
if let Ok(article) = scraper.parse(&Url::parse(&entry.url)?, true, &client, None).await {
|
|
||||||
let id = entry.entry_id;
|
|
||||||
if let Some(date) = article.date {
|
|
||||||
// prefer scraped date over rss feed date
|
|
||||||
entry.published_at = date;
|
|
||||||
update_entry(pool, entry).await?;
|
|
||||||
};
|
|
||||||
let html_content = article.get_content();
|
|
||||||
if let Some(content) = html_content {
|
|
||||||
fs::write(content_dir.join(format!("{}.html", id)), content)?;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
warn!("Failed to fetch article for entry: {:?}", &entry.url);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
@ -1 +0,0 @@
|
|||||||
pub mod crawl;
|
|
@ -3,7 +3,6 @@ pub mod config;
|
|||||||
pub mod domain_locks;
|
pub mod domain_locks;
|
||||||
pub mod error;
|
pub mod error;
|
||||||
pub mod handlers;
|
pub mod handlers;
|
||||||
pub mod jobs;
|
|
||||||
pub mod log;
|
pub mod log;
|
||||||
pub mod models;
|
pub mod models;
|
||||||
pub mod partials;
|
pub mod partials;
|
||||||
|
@ -33,7 +33,14 @@ pub struct CreateEntry {
|
|||||||
pub published_at: DateTime<Utc>,
|
pub published_at: DateTime<Utc>,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn get_entry(pool: &PgPool, entry_id: Uuid) -> Result<Entry> {
|
#[derive(Default)]
|
||||||
|
pub struct GetEntriesOptions {
|
||||||
|
pub published_before: Option<DateTime<Utc>>,
|
||||||
|
pub limit: Option<i64>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Entry {
|
||||||
|
pub async fn get(pool: &PgPool, entry_id: Uuid) -> Result<Entry> {
|
||||||
sqlx::query_as!(Entry, "select * from entry where entry_id = $1", entry_id)
|
sqlx::query_as!(Entry, "select * from entry where entry_id = $1", entry_id)
|
||||||
.fetch_one(pool)
|
.fetch_one(pool)
|
||||||
.await
|
.await
|
||||||
@ -45,13 +52,7 @@ pub async fn get_entry(pool: &PgPool, entry_id: Uuid) -> Result<Entry> {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Default)]
|
pub async fn get_all(pool: &PgPool, options: GetEntriesOptions) -> sqlx::Result<Vec<Entry>> {
|
||||||
pub struct GetEntriesOptions {
|
|
||||||
pub published_before: Option<DateTime<Utc>>,
|
|
||||||
pub limit: Option<i64>,
|
|
||||||
}
|
|
||||||
|
|
||||||
pub async fn get_entries(pool: &PgPool, options: GetEntriesOptions) -> sqlx::Result<Vec<Entry>> {
|
|
||||||
if let Some(published_before) = options.published_before {
|
if let Some(published_before) = options.published_before {
|
||||||
sqlx::query_as!(
|
sqlx::query_as!(
|
||||||
Entry,
|
Entry,
|
||||||
@ -81,7 +82,7 @@ pub async fn get_entries(pool: &PgPool, options: GetEntriesOptions) -> sqlx::Res
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn get_entries_for_feed(
|
pub async fn get_all_for_feed(
|
||||||
pool: &PgPool,
|
pool: &PgPool,
|
||||||
feed_id: Uuid,
|
feed_id: Uuid,
|
||||||
options: GetEntriesOptions,
|
options: GetEntriesOptions,
|
||||||
@ -119,7 +120,7 @@ pub async fn get_entries_for_feed(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn create_entry(pool: &PgPool, payload: CreateEntry) -> Result<Entry> {
|
pub async fn create(pool: &PgPool, payload: CreateEntry) -> Result<Entry> {
|
||||||
payload.validate()?;
|
payload.validate()?;
|
||||||
sqlx::query_as!(
|
sqlx::query_as!(
|
||||||
Entry,
|
Entry,
|
||||||
@ -146,7 +147,7 @@ pub async fn create_entry(pool: &PgPool, payload: CreateEntry) -> Result<Entry>
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn upsert_entry(pool: &PgPool, payload: CreateEntry) -> Result<Entry> {
|
pub async fn upsert(pool: &PgPool, payload: CreateEntry) -> Result<Entry> {
|
||||||
payload.validate()?;
|
payload.validate()?;
|
||||||
sqlx::query_as!(
|
sqlx::query_as!(
|
||||||
Entry,
|
Entry,
|
||||||
@ -177,7 +178,7 @@ pub async fn upsert_entry(pool: &PgPool, payload: CreateEntry) -> Result<Entry>
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn create_entries(pool: &PgPool, payload: Vec<CreateEntry>) -> Result<Vec<Entry>> {
|
pub async fn bulk_create(pool: &PgPool, payload: Vec<CreateEntry>) -> Result<Vec<Entry>> {
|
||||||
let mut titles = Vec::with_capacity(payload.len());
|
let mut titles = Vec::with_capacity(payload.len());
|
||||||
let mut urls = Vec::with_capacity(payload.len());
|
let mut urls = Vec::with_capacity(payload.len());
|
||||||
let mut descriptions: Vec<Option<String>> = Vec::with_capacity(payload.len());
|
let mut descriptions: Vec<Option<String>> = Vec::with_capacity(payload.len());
|
||||||
@ -218,7 +219,7 @@ pub async fn create_entries(pool: &PgPool, payload: Vec<CreateEntry>) -> Result<
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn upsert_entries(pool: &PgPool, payload: Vec<CreateEntry>) -> Result<Vec<Entry>> {
|
pub async fn bulk_upsert(pool: &PgPool, payload: Vec<CreateEntry>) -> Result<Vec<Entry>> {
|
||||||
let mut titles = Vec::with_capacity(payload.len());
|
let mut titles = Vec::with_capacity(payload.len());
|
||||||
let mut urls = Vec::with_capacity(payload.len());
|
let mut urls = Vec::with_capacity(payload.len());
|
||||||
let mut descriptions: Vec<Option<String>> = Vec::with_capacity(payload.len());
|
let mut descriptions: Vec<Option<String>> = Vec::with_capacity(payload.len());
|
||||||
@ -263,7 +264,7 @@ pub async fn upsert_entries(pool: &PgPool, payload: Vec<CreateEntry>) -> Result<
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn update_entry(pool: &PgPool, payload: Entry) -> Result<Entry> {
|
pub async fn update(pool: &PgPool, payload: Entry) -> Result<Entry> {
|
||||||
sqlx::query_as!(
|
sqlx::query_as!(
|
||||||
Entry,
|
Entry,
|
||||||
"update entry set
|
"update entry set
|
||||||
@ -294,7 +295,7 @@ pub async fn update_entry(pool: &PgPool, payload: Entry) -> Result<Entry> {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn delete_entry(pool: &PgPool, entry_id: Uuid) -> Result<()> {
|
pub async fn delete(pool: &PgPool, entry_id: Uuid) -> Result<()> {
|
||||||
sqlx::query!(
|
sqlx::query!(
|
||||||
"update entry set deleted_at = now() where entry_id = $1",
|
"update entry set deleted_at = now() where entry_id = $1",
|
||||||
entry_id
|
entry_id
|
||||||
@ -303,3 +304,4 @@ pub async fn delete_entry(pool: &PgPool, entry_id: Uuid) -> Result<()> {
|
|||||||
.await?;
|
.await?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
@ -2,18 +2,22 @@ use std::str::FromStr;
|
|||||||
|
|
||||||
use chrono::{DateTime, Utc};
|
use chrono::{DateTime, Utc};
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use sqlx::PgPool;
|
use sqlx::{FromRow, PgPool};
|
||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
use validator::Validate;
|
use validator::Validate;
|
||||||
|
|
||||||
use crate::error::{Error, Result};
|
use crate::error::{Error, Result};
|
||||||
|
|
||||||
#[derive(Debug, Serialize, Deserialize, sqlx::Type, Clone)]
|
#[derive(Debug, Serialize, Deserialize, sqlx::Type, Clone, Copy)]
|
||||||
#[sqlx(type_name = "feed_type", rename_all = "lowercase")]
|
#[sqlx(type_name = "feed_type", rename_all = "lowercase")]
|
||||||
#[serde(rename_all = "lowercase")]
|
#[serde(rename_all = "lowercase")]
|
||||||
pub enum FeedType {
|
pub enum FeedType {
|
||||||
Atom,
|
Atom,
|
||||||
Rss,
|
JSON,
|
||||||
|
RSS0,
|
||||||
|
RSS1,
|
||||||
|
RSS2,
|
||||||
|
Unknown,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl FromStr for FeedType {
|
impl FromStr for FeedType {
|
||||||
@ -21,7 +25,11 @@ impl FromStr for FeedType {
|
|||||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||||
match s {
|
match s {
|
||||||
"atom" => Ok(FeedType::Atom),
|
"atom" => Ok(FeedType::Atom),
|
||||||
"rss" => Ok(FeedType::Rss),
|
"json" => Ok(FeedType::JSON),
|
||||||
|
"rss0" => Ok(FeedType::RSS0),
|
||||||
|
"rss1" => Ok(FeedType::RSS1),
|
||||||
|
"rss2" => Ok(FeedType::RSS2),
|
||||||
|
"unknown" => Ok(FeedType::Unknown),
|
||||||
_ => Err(format!("invalid feed type: {}", s)),
|
_ => Err(format!("invalid feed type: {}", s)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -31,13 +39,15 @@ impl From<feed_rs::model::FeedType> for FeedType {
|
|||||||
fn from(value: feed_rs::model::FeedType) -> Self {
|
fn from(value: feed_rs::model::FeedType) -> Self {
|
||||||
match value {
|
match value {
|
||||||
feed_rs::model::FeedType::Atom => FeedType::Atom,
|
feed_rs::model::FeedType::Atom => FeedType::Atom,
|
||||||
// TODO: this isn't really accurate
|
feed_rs::model::FeedType::JSON => FeedType::JSON,
|
||||||
_ => FeedType::Rss,
|
feed_rs::model::FeedType::RSS0 => FeedType::RSS0,
|
||||||
|
feed_rs::model::FeedType::RSS1 => FeedType::RSS1,
|
||||||
|
feed_rs::model::FeedType::RSS2 => FeedType::RSS2,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
#[derive(Debug, Serialize, Deserialize, Clone, FromRow)]
|
||||||
pub struct Feed {
|
pub struct Feed {
|
||||||
pub feed_id: Uuid,
|
pub feed_id: Uuid,
|
||||||
pub title: Option<String>,
|
pub title: Option<String>,
|
||||||
@ -45,6 +55,10 @@ pub struct Feed {
|
|||||||
#[serde(rename = "type")]
|
#[serde(rename = "type")]
|
||||||
pub feed_type: FeedType,
|
pub feed_type: FeedType,
|
||||||
pub description: Option<String>,
|
pub description: Option<String>,
|
||||||
|
pub crawl_interval_minutes: i32,
|
||||||
|
pub last_crawl_error: Option<String>,
|
||||||
|
pub last_crawled_at: Option<DateTime<Utc>>,
|
||||||
|
pub last_entry_published_at: Option<DateTime<Utc>>,
|
||||||
pub created_at: DateTime<Utc>,
|
pub created_at: DateTime<Utc>,
|
||||||
pub updated_at: Option<DateTime<Utc>>,
|
pub updated_at: Option<DateTime<Utc>>,
|
||||||
pub deleted_at: Option<DateTime<Utc>>,
|
pub deleted_at: Option<DateTime<Utc>>,
|
||||||
@ -56,13 +70,40 @@ pub struct CreateFeed {
|
|||||||
pub title: Option<String>,
|
pub title: Option<String>,
|
||||||
#[validate(url)]
|
#[validate(url)]
|
||||||
pub url: String,
|
pub url: String,
|
||||||
#[serde(rename = "type")]
|
|
||||||
pub feed_type: FeedType,
|
|
||||||
#[validate(length(max = 524288))]
|
#[validate(length(max = 524288))]
|
||||||
pub description: Option<String>,
|
pub description: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn get_feed(pool: &PgPool, feed_id: Uuid) -> Result<Feed> {
|
#[derive(Debug, Deserialize, Validate)]
|
||||||
|
pub struct UpsertFeed {
|
||||||
|
#[validate(length(max = 255))]
|
||||||
|
pub title: Option<String>,
|
||||||
|
#[validate(url)]
|
||||||
|
pub url: String,
|
||||||
|
pub feed_type: Option<FeedType>,
|
||||||
|
#[validate(length(max = 524288))]
|
||||||
|
pub description: Option<String>,
|
||||||
|
pub last_crawl_error: Option<String>,
|
||||||
|
pub last_crawled_at: Option<DateTime<Utc>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize, Default, Validate)]
|
||||||
|
pub struct UpdateFeed {
|
||||||
|
#[validate(length(max = 255))]
|
||||||
|
pub title: Option<Option<String>>,
|
||||||
|
#[validate(url)]
|
||||||
|
pub url: Option<String>,
|
||||||
|
pub feed_type: Option<FeedType>,
|
||||||
|
#[validate(length(max = 524288))]
|
||||||
|
pub description: Option<Option<String>>,
|
||||||
|
pub crawl_interval_minutes: Option<i32>,
|
||||||
|
pub last_crawl_error: Option<Option<String>>,
|
||||||
|
pub last_crawled_at: Option<Option<DateTime<Utc>>>,
|
||||||
|
pub last_entry_published_at: Option<Option<DateTime<Utc>>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Feed {
|
||||||
|
pub async fn get(pool: &PgPool, feed_id: Uuid) -> Result<Feed> {
|
||||||
sqlx::query_as!(
|
sqlx::query_as!(
|
||||||
Feed,
|
Feed,
|
||||||
// Unable to SELECT * here due to https://github.com/launchbadge/sqlx/issues/1004
|
// Unable to SELECT * here due to https://github.com/launchbadge/sqlx/issues/1004
|
||||||
@ -73,6 +114,10 @@ pub async fn get_feed(pool: &PgPool, feed_id: Uuid) -> Result<Feed> {
|
|||||||
url,
|
url,
|
||||||
type as "feed_type: FeedType",
|
type as "feed_type: FeedType",
|
||||||
description,
|
description,
|
||||||
|
crawl_interval_minutes,
|
||||||
|
last_crawl_error,
|
||||||
|
last_crawled_at,
|
||||||
|
last_entry_published_at,
|
||||||
created_at,
|
created_at,
|
||||||
updated_at,
|
updated_at,
|
||||||
deleted_at
|
deleted_at
|
||||||
@ -89,7 +134,7 @@ pub async fn get_feed(pool: &PgPool, feed_id: Uuid) -> Result<Feed> {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn get_feeds(pool: &PgPool) -> sqlx::Result<Vec<Feed>> {
|
pub async fn get_all(pool: &PgPool) -> sqlx::Result<Vec<Feed>> {
|
||||||
sqlx::query_as!(
|
sqlx::query_as!(
|
||||||
Feed,
|
Feed,
|
||||||
r#"select
|
r#"select
|
||||||
@ -98,6 +143,10 @@ pub async fn get_feeds(pool: &PgPool) -> sqlx::Result<Vec<Feed>> {
|
|||||||
url,
|
url,
|
||||||
type as "feed_type: FeedType",
|
type as "feed_type: FeedType",
|
||||||
description,
|
description,
|
||||||
|
crawl_interval_minutes,
|
||||||
|
last_crawl_error,
|
||||||
|
last_crawled_at,
|
||||||
|
last_entry_published_at,
|
||||||
created_at,
|
created_at,
|
||||||
updated_at,
|
updated_at,
|
||||||
deleted_at
|
deleted_at
|
||||||
@ -108,34 +157,37 @@ pub async fn get_feeds(pool: &PgPool) -> sqlx::Result<Vec<Feed>> {
|
|||||||
.await
|
.await
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn create_feed(pool: &PgPool, payload: CreateFeed) -> Result<Feed> {
|
pub async fn create(pool: &PgPool, payload: CreateFeed) -> Result<Feed> {
|
||||||
payload.validate()?;
|
payload.validate()?;
|
||||||
Ok(sqlx::query_as!(
|
Ok(sqlx::query_as!(
|
||||||
Feed,
|
Feed,
|
||||||
r#"insert into feed (
|
r#"insert into feed (
|
||||||
title, url, type, description
|
title, url, description
|
||||||
) values (
|
) values (
|
||||||
$1, $2, $3, $4
|
$1, $2, $3
|
||||||
) returning
|
) returning
|
||||||
feed_id,
|
feed_id,
|
||||||
title,
|
title,
|
||||||
url,
|
url,
|
||||||
type as "feed_type: FeedType",
|
type as "feed_type: FeedType",
|
||||||
description,
|
description,
|
||||||
|
crawl_interval_minutes,
|
||||||
|
last_crawl_error,
|
||||||
|
last_crawled_at,
|
||||||
|
last_entry_published_at,
|
||||||
created_at,
|
created_at,
|
||||||
updated_at,
|
updated_at,
|
||||||
deleted_at
|
deleted_at
|
||||||
"#,
|
"#,
|
||||||
payload.title,
|
payload.title,
|
||||||
payload.url,
|
payload.url,
|
||||||
payload.feed_type as FeedType,
|
|
||||||
payload.description
|
payload.description
|
||||||
)
|
)
|
||||||
.fetch_one(pool)
|
.fetch_one(pool)
|
||||||
.await?)
|
.await?)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn upsert_feed(pool: &PgPool, payload: CreateFeed) -> Result<Feed> {
|
pub async fn upsert(pool: &PgPool, payload: UpsertFeed) -> Result<Feed> {
|
||||||
payload.validate()?;
|
payload.validate()?;
|
||||||
Ok(sqlx::query_as!(
|
Ok(sqlx::query_as!(
|
||||||
Feed,
|
Feed,
|
||||||
@ -146,7 +198,7 @@ pub async fn upsert_feed(pool: &PgPool, payload: CreateFeed) -> Result<Feed> {
|
|||||||
) on conflict (url) do update set
|
) on conflict (url) do update set
|
||||||
title = excluded.title,
|
title = excluded.title,
|
||||||
url = excluded.url,
|
url = excluded.url,
|
||||||
type = excluded.type,
|
type = COALESCE(excluded.type, feed.type),
|
||||||
description = excluded.description
|
description = excluded.description
|
||||||
returning
|
returning
|
||||||
feed_id,
|
feed_id,
|
||||||
@ -154,20 +206,67 @@ pub async fn upsert_feed(pool: &PgPool, payload: CreateFeed) -> Result<Feed> {
|
|||||||
url,
|
url,
|
||||||
type as "feed_type: FeedType",
|
type as "feed_type: FeedType",
|
||||||
description,
|
description,
|
||||||
|
crawl_interval_minutes,
|
||||||
|
last_crawl_error,
|
||||||
|
last_crawled_at,
|
||||||
|
last_entry_published_at,
|
||||||
created_at,
|
created_at,
|
||||||
updated_at,
|
updated_at,
|
||||||
deleted_at
|
deleted_at
|
||||||
"#,
|
"#,
|
||||||
payload.title,
|
payload.title,
|
||||||
payload.url,
|
payload.url,
|
||||||
payload.feed_type as FeedType,
|
payload.feed_type as Option<FeedType>,
|
||||||
payload.description
|
payload.description
|
||||||
)
|
)
|
||||||
.fetch_one(pool)
|
.fetch_one(pool)
|
||||||
.await?)
|
.await?)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn delete_feed(pool: &PgPool, feed_id: Uuid) -> Result<()> {
|
pub async fn update(pool: &PgPool, feed_id: Uuid, payload: UpdateFeed) -> Result<Feed> {
|
||||||
|
payload.validate()?;
|
||||||
|
let mut query = sqlx::QueryBuilder::new("UPDATE feed SET ");
|
||||||
|
|
||||||
|
let mut updates = query.separated(", ");
|
||||||
|
if let Some(title) = payload.title {
|
||||||
|
updates.push_unseparated("title = ");
|
||||||
|
updates.push_bind(title);
|
||||||
|
}
|
||||||
|
if let Some(url) = payload.url {
|
||||||
|
updates.push_unseparated("url = ");
|
||||||
|
updates.push_bind(url);
|
||||||
|
}
|
||||||
|
if let Some(description) = payload.description {
|
||||||
|
updates.push_unseparated("description = ");
|
||||||
|
updates.push_bind(description);
|
||||||
|
}
|
||||||
|
if let Some(crawl_interval_minutes) = payload.crawl_interval_minutes {
|
||||||
|
updates.push("crawl_interval_minutes = ");
|
||||||
|
updates.push_bind(crawl_interval_minutes);
|
||||||
|
}
|
||||||
|
if let Some(last_crawl_error) = payload.last_crawl_error {
|
||||||
|
updates.push_unseparated("last_crawl_error = ");
|
||||||
|
updates.push_bind(last_crawl_error);
|
||||||
|
}
|
||||||
|
if let Some(last_crawled_at) = payload.last_crawled_at {
|
||||||
|
updates.push_unseparated("last_crawled_at = ");
|
||||||
|
updates.push_bind(last_crawled_at);
|
||||||
|
}
|
||||||
|
if let Some(last_entry_published_at) = payload.last_entry_published_at {
|
||||||
|
updates.push_unseparated("last_entry_published_at = ");
|
||||||
|
updates.push_bind(last_entry_published_at);
|
||||||
|
}
|
||||||
|
|
||||||
|
query.push(" WHERE id = ");
|
||||||
|
query.push_bind(feed_id);
|
||||||
|
query.push(" RETURNING *");
|
||||||
|
|
||||||
|
let query = query.build_query_as();
|
||||||
|
|
||||||
|
Ok(query.fetch_one(pool).await?)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn delete(pool: &PgPool, feed_id: Uuid) -> Result<()> {
|
||||||
sqlx::query!(
|
sqlx::query!(
|
||||||
"update feed set deleted_at = now() where feed_id = $1",
|
"update feed set deleted_at = now() where feed_id = $1",
|
||||||
feed_id
|
feed_id
|
||||||
@ -176,3 +275,45 @@ pub async fn delete_feed(pool: &PgPool, feed_id: Uuid) -> Result<()> {
|
|||||||
.await?;
|
.await?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub async fn save(&self, pool: &PgPool) -> Result<Feed> {
|
||||||
|
Ok(sqlx::query_as!(
|
||||||
|
Feed,
|
||||||
|
r#"update feed set
|
||||||
|
title = $2,
|
||||||
|
url = $3,
|
||||||
|
type = $4,
|
||||||
|
description = $5,
|
||||||
|
crawl_interval_minutes = $6,
|
||||||
|
last_crawl_error = $7,
|
||||||
|
last_crawled_at = $8,
|
||||||
|
last_entry_published_at = $9
|
||||||
|
where feed_id = $1
|
||||||
|
returning
|
||||||
|
feed_id,
|
||||||
|
title,
|
||||||
|
url,
|
||||||
|
type as "feed_type: FeedType",
|
||||||
|
description,
|
||||||
|
crawl_interval_minutes,
|
||||||
|
last_crawl_error,
|
||||||
|
last_crawled_at,
|
||||||
|
last_entry_published_at,
|
||||||
|
created_at,
|
||||||
|
updated_at,
|
||||||
|
deleted_at
|
||||||
|
"#,
|
||||||
|
self.feed_id,
|
||||||
|
self.title,
|
||||||
|
self.url,
|
||||||
|
self.feed_type as FeedType,
|
||||||
|
self.description,
|
||||||
|
self.crawl_interval_minutes,
|
||||||
|
self.last_crawl_error,
|
||||||
|
self.last_crawled_at,
|
||||||
|
self.last_entry_published_at,
|
||||||
|
)
|
||||||
|
.fetch_one(pool)
|
||||||
|
.await?)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -9,7 +9,7 @@ const BASE62_CHARS: &[u8] = b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmn
|
|||||||
///
|
///
|
||||||
/// Database rows have a UUID primary key, but they are encoded in Base62 to be shorter and more
|
/// Database rows have a UUID primary key, but they are encoded in Base62 to be shorter and more
|
||||||
/// URL-friendly for the frontend.
|
/// URL-friendly for the frontend.
|
||||||
#[derive(Debug, Serialize, Deserialize)]
|
#[derive(Debug, Serialize, Deserialize, Clone, Copy)]
|
||||||
pub struct Base62Uuid(
|
pub struct Base62Uuid(
|
||||||
#[serde(deserialize_with = "uuid_from_base62_str")]
|
#[serde(deserialize_with = "uuid_from_base62_str")]
|
||||||
#[serde(serialize_with = "uuid_to_base62_str")]
|
#[serde(serialize_with = "uuid_to_base62_str")]
|
||||||
|
Loading…
Reference in New Issue
Block a user