Sanitize entry html content with ammonia
This commit is contained in:
@@ -2,6 +2,7 @@ use std::fmt::{self, Display, Formatter};
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
|
||||
use ammonia::clean;
|
||||
use bytes::Buf;
|
||||
use readability::extractor;
|
||||
use reqwest::Client;
|
||||
@@ -114,7 +115,9 @@ impl EntryCrawler {
|
||||
// .await
|
||||
// .map_err(|_| EntryCrawlerError::CreateEntryError(entry.url.clone()))?;
|
||||
// };
|
||||
fs::write(content_dir.join(format!("{}.html", id)), article.content)
|
||||
let content = clean(&article.content);
|
||||
info!("sanitized content");
|
||||
fs::write(content_dir.join(format!("{}.html", id)), content)
|
||||
.map_err(|_| EntryCrawlerError::SaveContentError(entry.url.clone()))?;
|
||||
fs::write(content_dir.join(format!("{}.txt", id)), article.text)
|
||||
.map_err(|_| EntryCrawlerError::SaveContentError(entry.url.clone()))?;
|
||||
|
||||
@@ -201,12 +201,24 @@ impl FeedCrawler {
|
||||
.cmp(&Duration::minutes(feed.crawl_interval_minutes.into()))
|
||||
{
|
||||
Ordering::Greater => {
|
||||
feed.crawl_interval_minutes =
|
||||
i32::max(feed.crawl_interval_minutes * 2, MAX_CRAWL_INTERVAL_MINUTES);
|
||||
feed.crawl_interval_minutes = i32::max(
|
||||
(feed.crawl_interval_minutes as f32 * 1.2).round() as i32,
|
||||
MAX_CRAWL_INTERVAL_MINUTES,
|
||||
);
|
||||
info!(
|
||||
interval = feed.crawl_interval_minutes,
|
||||
"increased crawl interval"
|
||||
);
|
||||
}
|
||||
Ordering::Less => {
|
||||
feed.crawl_interval_minutes =
|
||||
i32::max(feed.crawl_interval_minutes / 2, MIN_CRAWL_INTERVAL_MINUTES);
|
||||
feed.crawl_interval_minutes = i32::max(
|
||||
(feed.crawl_interval_minutes as f32 / 1.2).round() as i32,
|
||||
MIN_CRAWL_INTERVAL_MINUTES,
|
||||
);
|
||||
info!(
|
||||
interval = feed.crawl_interval_minutes,
|
||||
"decreased crawl interval"
|
||||
);
|
||||
}
|
||||
Ordering::Equal => {}
|
||||
}
|
||||
|
||||
@@ -20,18 +20,18 @@ pub async fn get(
|
||||
let entry = Entry::get(&pool, id.as_uuid()).await?;
|
||||
let content_dir = std::path::Path::new(&config.content_dir);
|
||||
let content_path = content_dir.join(format!("{}.html", entry.entry_id));
|
||||
let title = entry.title.unwrap_or_else(|| "Untitled".to_string());
|
||||
let published_at = entry.published_at.to_rfc3339_opts(chrono::SecondsFormat::Millis, true);
|
||||
let content = fs::read_to_string(content_path).unwrap_or_else(|_| "No content".to_string());
|
||||
Ok(layout.render(html! {
|
||||
article {
|
||||
@let title = entry.title.unwrap_or_else(|| "Untitled".to_string());
|
||||
h2 { a href=(entry.url) { (title) } }
|
||||
@let published_at = entry.published_at.to_rfc3339_opts(chrono::SecondsFormat::Millis, true);
|
||||
span class="published" {
|
||||
strong { "Published: " }
|
||||
time datetime=(published_at) data-controller="local-time" {
|
||||
(published_at)
|
||||
}
|
||||
}
|
||||
@let content = fs::read_to_string(content_path).unwrap_or_else(|_| "No content".to_string());
|
||||
(PreEscaped(content))
|
||||
}
|
||||
}))
|
||||
|
||||
@@ -23,24 +23,16 @@ pub async fn opml(
|
||||
State(importer): State<ImporterHandle>,
|
||||
mut multipart: Multipart,
|
||||
) -> Result<Response> {
|
||||
dbg!("opml handler");
|
||||
if let Some(field) = multipart.next_field().await.map_err(|err| {
|
||||
dbg!(&err);
|
||||
err
|
||||
})? {
|
||||
if let Some(field) = multipart.next_field().await? {
|
||||
let import_id = Base62Uuid::new();
|
||||
dbg!(&import_id);
|
||||
let file_name = field.file_name().map(|s| s.to_string());
|
||||
dbg!(&file_name);
|
||||
let bytes = field.bytes().await?;
|
||||
dbg!(&bytes.len());
|
||||
let receiver = importer.import(import_id, file_name, bytes).await;
|
||||
{
|
||||
let mut imports = imports.lock().await;
|
||||
imports.insert(import_id.as_uuid(), receiver);
|
||||
}
|
||||
|
||||
let import_html_id = format!("import-{}", import_id);
|
||||
let import_stream = format!("/import/{}/stream", import_id);
|
||||
return Ok((
|
||||
StatusCode::CREATED,
|
||||
@@ -59,7 +51,6 @@ pub async fn opml(
|
||||
)
|
||||
.into_response());
|
||||
}
|
||||
dbg!("no file");
|
||||
Err(Error::NoFile)
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user