diff --git a/drop_all.psql b/drop_all.psql new file mode 100644 index 0000000..e8a3cc3 --- /dev/null +++ b/drop_all.psql @@ -0,0 +1,8 @@ +/* !!! THIS DROPS ALL TABLES IN THE DATABASE WHICH DELETES ALL DATA IN THE DATABASE !!! + * + * ONLY RUN IN DEVELOPMENT! + */ +DROP TABLE _sqlx_migrations CASCADE; +DROP TABLE entries CASCADE; +DROP TABLE feeds CASCADE; +DROP TYPE feed_type; diff --git a/migrations/20230507201612_initial.sql b/migrations/20230507201612_initial.sql index 8f6b5cf..5413255 100644 --- a/migrations/20230507201612_initial.sql +++ b/migrations/20230507201612_initial.sql @@ -2,7 +2,7 @@ CREATE TYPE feed_type AS ENUM ('atom', 'rss'); CREATE TABLE IF NOT EXISTS "feeds" ( "id" SERIAL PRIMARY KEY NOT NULL, - "title" VARCHAR(255) NOT NULL, + "title" VARCHAR(255), "url" VARCHAR(2048) NOT NULL, "type" feed_type NOT NULL, "description" TEXT, @@ -15,7 +15,7 @@ CREATE UNIQUE INDEX "feeds_url" ON "feeds" ("url"); CREATE TABLE IF NOT EXISTS "entries" ( "id" SERIAL PRIMARY KEY NOT NULL, - "title" VARCHAR(255) NOT NULL, + "title" VARCHAR(255), "url" VARCHAR(2048) NOT NULL, "description" TEXT, "feed_id" INTEGER REFERENCES "feeds"(id) NOT NULL, diff --git a/src/bin/cli.rs b/src/bin/cli.rs index 54c7a3f..a607ce3 100644 --- a/src/bin/cli.rs +++ b/src/bin/cli.rs @@ -32,7 +32,7 @@ enum Commands { struct AddFeed { #[argh(option)] /// title of the feed (max 255 characters) - title: String, + title: Option, #[argh(option)] /// URL of the feed (max 2048 characters) url: String, @@ -59,7 +59,7 @@ struct DeleteFeed { struct AddEntry { #[argh(option)] /// title of the entry (max 255 characters) - title: String, + title: Option, #[argh(option)] /// URL of the entry (max 2048 characters) url: String, diff --git a/src/jobs/crawl.rs b/src/jobs/crawl.rs index 13ffb45..33f6f8e 100644 --- a/src/jobs/crawl.rs +++ b/src/jobs/crawl.rs @@ -1,7 +1,7 @@ use feed_rs::parser; use reqwest::Client; use sqlx::PgPool; -use tracing::info; +use tracing::{info, warn}; use crate::models::feed::get_feeds; use crate::models::entry::{upsert_entries, CreateEntry}; @@ -16,18 +16,17 @@ pub async fn crawl(pool: &PgPool) -> anyhow::Result<()> { let parsed_feed = parser::parse(&bytes[..])?; let mut payload = Vec::with_capacity(parsed_feed.entries.len()); for entry in parsed_feed.entries { - let entry = CreateEntry { - title: entry - .title - .map_or_else(|| "No title".to_string(), |t| t.content), - url: entry - .links - .get(0) - .map_or_else(|| "https://example.com".to_string(), |l| l.href.clone()), - description: entry.summary.map(|s| s.content), - feed_id: feed.id, - }; - payload.push(entry); + if let Some(link) = entry.links.get(0) { + let entry = CreateEntry { + title: entry.title.map(|t| t.content), + url: link.href.clone(), + description: entry.summary.map(|s| s.content), + feed_id: feed.id, + }; + payload.push(entry); + } else { + warn!("Feed entry has no links: {:?}", entry); + } } let entries = upsert_entries(pool, payload).await?; info!("Created {} entries for feed {}", entries.len(), feed.id); diff --git a/src/models/entry.rs b/src/models/entry.rs index 3813ae7..3ca4134 100644 --- a/src/models/entry.rs +++ b/src/models/entry.rs @@ -8,7 +8,7 @@ use crate::error::{Error, Result}; #[derive(Debug, Serialize, Deserialize)] pub struct Entry { pub id: i32, - pub title: String, + pub title: Option, pub url: String, pub description: Option, pub feed_id: i32, @@ -20,7 +20,7 @@ pub struct Entry { #[derive(Debug, Deserialize, Validate)] pub struct CreateEntry { #[validate(length(max = 255))] - pub title: String, + pub title: Option, #[validate(url)] pub url: String, #[validate(length(max = 524288))] @@ -73,7 +73,7 @@ pub async fn create_entry(pool: &PgPool, payload: CreateEntry) -> Result }) } -pub async fn create_entries(pool: &PgPool, payload: Vec) -> Result> { +pub async fn create_entries(pool: &PgPool, payload: Vec) -> Result> { let mut titles = Vec::with_capacity(payload.len()); let mut urls = Vec::with_capacity(payload.len()); let mut descriptions: Vec> = Vec::with_capacity(payload.len()); @@ -91,7 +91,7 @@ pub async fn create_entries(pool: &PgPool, payload: Vec) -> Result title, url, description, feed_id, created_at, updated_at ) SELECT *, now(), now() FROM UNNEST($1::text[], $2::text[], $3::text[], $4::int[]) RETURNING *", - titles.as_slice(), + titles.as_slice() as &[Option], urls.as_slice(), descriptions.as_slice() as &[Option], feed_ids.as_slice(), @@ -127,7 +127,7 @@ pub async fn upsert_entries(pool: &PgPool, payload: Vec) -> Result< ) SELECT *, now(), now() FROM UNNEST($1::text[], $2::text[], $3::text[], $4::int[]) ON CONFLICT DO NOTHING RETURNING *", - titles.as_slice(), + titles.as_slice() as &[Option], urls.as_slice(), descriptions.as_slice() as &[Option], feed_ids.as_slice(), diff --git a/src/models/feed.rs b/src/models/feed.rs index fd7861c..1b32ee2 100644 --- a/src/models/feed.rs +++ b/src/models/feed.rs @@ -29,7 +29,7 @@ impl FromStr for FeedType { #[derive(Debug, Serialize, Deserialize)] pub struct Feed { pub id: i32, - pub title: String, + pub title: Option, pub url: String, #[serde(rename = "type")] pub feed_type: FeedType, @@ -42,7 +42,7 @@ pub struct Feed { #[derive(Debug, Deserialize, Validate)] pub struct CreateFeed { #[validate(length(max = 255))] - pub title: String, + pub title: Option, #[validate(url)] pub url: String, #[serde(rename = "type")]