Fetch and save entry HTML content with metadata
And render the extracted HTML on the entry page in the frontend.
This commit is contained in:
@@ -98,8 +98,6 @@ pub async fn main() -> Result<()> {
|
||||
|
||||
let args: Args = argh::from_env();
|
||||
|
||||
info!("hello?");
|
||||
|
||||
match args.commands {
|
||||
Commands::AddFeed(args) => {
|
||||
let feed = create_feed(
|
||||
@@ -125,6 +123,7 @@ pub async fn main() -> Result<()> {
|
||||
title: args.title,
|
||||
url: args.url,
|
||||
description: args.description,
|
||||
html_content: None,
|
||||
feed_id: args.feed_id,
|
||||
},
|
||||
)
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use axum::extract::{State, Path};
|
||||
use axum::response::Response;
|
||||
use maud::html;
|
||||
use maud::{html, PreEscaped};
|
||||
use sqlx::PgPool;
|
||||
|
||||
use crate::error::Result;
|
||||
@@ -12,7 +12,7 @@ pub async fn get(Path(id): Path<i32>, State(pool): State<PgPool>, layout: Layout
|
||||
Ok(layout.render(html! {
|
||||
@let title = entry.title.unwrap_or_else(|| "Untitled".to_string());
|
||||
h1 { a href=(entry.url) { (title) } }
|
||||
@let description = entry.description.unwrap_or_else(|| "No description".to_string());
|
||||
p { (description) }
|
||||
@let content = entry.html_content.unwrap_or_else(|| "No content".to_string());
|
||||
(PreEscaped(content))
|
||||
}))
|
||||
}
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
use article_scraper::ArticleScraper;
|
||||
use feed_rs::parser;
|
||||
use reqwest::Client;
|
||||
use reqwest::{Client, Url};
|
||||
use sqlx::PgPool;
|
||||
use tracing::{info, warn};
|
||||
|
||||
@@ -9,18 +10,23 @@ use crate::models::entry::{upsert_entries, CreateEntry};
|
||||
/// For every feed in the database, fetches the feed, parses it, and saves new entries to the
|
||||
/// database.
|
||||
pub async fn crawl(pool: &PgPool) -> anyhow::Result<()> {
|
||||
let scraper = ArticleScraper::new(None).await;
|
||||
let client = Client::new();
|
||||
let feeds = get_feeds(pool).await?;
|
||||
for feed in feeds {
|
||||
info!("Fetching feed {}: {}", feed.id, feed.url);
|
||||
let bytes = client.get(feed.url).send().await?.bytes().await?;
|
||||
let parsed_feed = parser::parse(&bytes[..])?;
|
||||
let mut payload = Vec::with_capacity(parsed_feed.entries.len());
|
||||
for entry in parsed_feed.entries {
|
||||
if let Some(link) = entry.links.get(0) {
|
||||
info!("Fetching entry article: {}", link.href);
|
||||
let article = scraper.parse(&Url::parse(&link.href)?, true, &client, None).await?;
|
||||
let entry = CreateEntry {
|
||||
title: entry.title.map(|t| t.content),
|
||||
url: link.href.clone(),
|
||||
description: entry.summary.map(|s| s.content),
|
||||
html_content: article.get_content(),
|
||||
feed_id: feed.id,
|
||||
};
|
||||
payload.push(entry);
|
||||
|
||||
@@ -11,6 +11,7 @@ pub struct Entry {
|
||||
pub title: Option<String>,
|
||||
pub url: String,
|
||||
pub description: Option<String>,
|
||||
pub html_content: Option<String>,
|
||||
pub feed_id: i32,
|
||||
pub created_at: NaiveDateTime,
|
||||
pub updated_at: NaiveDateTime,
|
||||
@@ -25,6 +26,7 @@ pub struct CreateEntry {
|
||||
pub url: String,
|
||||
#[validate(length(max = 524288))]
|
||||
pub description: Option<String>,
|
||||
pub html_content: Option<String>,
|
||||
#[validate(range(min = 1))]
|
||||
pub feed_id: i32,
|
||||
}
|
||||
@@ -52,13 +54,14 @@ pub async fn create_entry(pool: &PgPool, payload: CreateEntry) -> Result<Entry>
|
||||
sqlx::query_as!(
|
||||
Entry,
|
||||
"INSERT INTO entries (
|
||||
title, url, description, feed_id, created_at, updated_at
|
||||
title, url, description, html_content, feed_id, created_at, updated_at
|
||||
) VALUES (
|
||||
$1, $2, $3, $4, now(), now()
|
||||
$1, $2, $3, $4, $5, now(), now()
|
||||
) RETURNING *",
|
||||
payload.title,
|
||||
payload.url,
|
||||
payload.description,
|
||||
payload.html_content,
|
||||
payload.feed_id,
|
||||
)
|
||||
.fetch_one(pool)
|
||||
@@ -77,23 +80,26 @@ pub async fn create_entries(pool: &PgPool, payload: Vec<CreateEntry>) -> Result<
|
||||
let mut titles = Vec::with_capacity(payload.len());
|
||||
let mut urls = Vec::with_capacity(payload.len());
|
||||
let mut descriptions: Vec<Option<String>> = Vec::with_capacity(payload.len());
|
||||
let mut html_contents: Vec<Option<String>> = Vec::with_capacity(payload.len());
|
||||
let mut feed_ids = Vec::with_capacity(payload.len());
|
||||
payload.iter().map(|entry| {
|
||||
titles.push(entry.title.clone());
|
||||
urls.push(entry.url.clone());
|
||||
descriptions.push(entry.description.clone());
|
||||
html_contents.push(entry.html_content.clone());
|
||||
feed_ids.push(entry.feed_id);
|
||||
entry.validate()
|
||||
}).collect::<Result<Vec<()>, ValidationErrors>>()?;
|
||||
sqlx::query_as!(
|
||||
Entry,
|
||||
"INSERT INTO entries (
|
||||
title, url, description, feed_id, created_at, updated_at
|
||||
) SELECT *, now(), now() FROM UNNEST($1::text[], $2::text[], $3::text[], $4::int[])
|
||||
title, url, description, html_content, feed_id, created_at, updated_at
|
||||
) SELECT *, now(), now() FROM UNNEST($1::text[], $2::text[], $3::text[], $4::text[], $5::int[])
|
||||
RETURNING *",
|
||||
titles.as_slice() as &[Option<String>],
|
||||
urls.as_slice(),
|
||||
descriptions.as_slice() as &[Option<String>],
|
||||
html_contents.as_slice() as &[Option<String>],
|
||||
feed_ids.as_slice(),
|
||||
)
|
||||
.fetch_all(pool)
|
||||
@@ -112,24 +118,27 @@ pub async fn upsert_entries(pool: &PgPool, payload: Vec<CreateEntry>) -> Result<
|
||||
let mut titles = Vec::with_capacity(payload.len());
|
||||
let mut urls = Vec::with_capacity(payload.len());
|
||||
let mut descriptions: Vec<Option<String>> = Vec::with_capacity(payload.len());
|
||||
let mut html_contents: Vec<Option<String>> = Vec::with_capacity(payload.len());
|
||||
let mut feed_ids = Vec::with_capacity(payload.len());
|
||||
payload.iter().map(|entry| {
|
||||
titles.push(entry.title.clone());
|
||||
urls.push(entry.url.clone());
|
||||
descriptions.push(entry.description.clone());
|
||||
html_contents.push(entry.html_content.clone());
|
||||
feed_ids.push(entry.feed_id);
|
||||
entry.validate()
|
||||
}).collect::<Result<Vec<()>, ValidationErrors>>()?;
|
||||
sqlx::query_as!(
|
||||
Entry,
|
||||
"INSERT INTO entries (
|
||||
title, url, description, feed_id, created_at, updated_at
|
||||
) SELECT *, now(), now() FROM UNNEST($1::text[], $2::text[], $3::text[], $4::int[])
|
||||
title, url, description, html_content, feed_id, created_at, updated_at
|
||||
) SELECT *, now(), now() FROM UNNEST($1::text[], $2::text[], $3::text[], $4::text[], $5::int[])
|
||||
ON CONFLICT DO NOTHING
|
||||
RETURNING *",
|
||||
titles.as_slice() as &[Option<String>],
|
||||
urls.as_slice(),
|
||||
descriptions.as_slice() as &[Option<String>],
|
||||
html_contents.as_slice() as &[Option<String>],
|
||||
feed_ids.as_slice(),
|
||||
)
|
||||
.fetch_all(pool)
|
||||
|
||||
Reference in New Issue
Block a user