#![warn(clippy::all)] #[macro_use] extern crate lazy_static; #[macro_use] extern crate html5ever; #[macro_use] extern crate maplit; #[macro_use] extern crate cssparser; extern crate string_cache; extern crate typed_arena; use std::collections::HashSet; use std::default::Default; use std::io; use html5ever::tendril::StrTendril; use html5ever::{serialize, Attribute, LocalName, QualName}; use url::{ParseError, Url}; #[macro_use] mod css_property { include!(concat!(env!("OUT_DIR"), "/css_property.rs")); } #[macro_use] mod css_at_rule { include!(concat!(env!("OUT_DIR"), "/css_at_rule.rs")); } mod arena_dom; mod config; mod css_parser; mod transformer; use arena_dom::{create_element, Arena, NodeData, Ref}; use config::permissive::{ADD_ATTRIBUTES, ALL_ATTRIBUTES, ATTRIBUTES, ELEMENTS, PROTOCOLS}; use config::relaxed::{CSS_AT_RULES, CSS_PROPERTIES}; use css_at_rule::CssAtRule; use css_parser::{parse_css_style_attribute, parse_css_stylesheet, CssRule}; use css_property::CssProperty; use transformer::Transformer; fn main() { let transformer = Transformer::new( &should_unwrap_node, vec![ &sanitize_style_tag_css, &sanitize_style_attribute_css, &remove_attributes, &add_attributes, &sanitize_attribute_protocols, &add_single_elements_around_ul, ], ); let root = transformer.parse(&mut io::stdin()).unwrap(); transformer.traverse(root); serialize(&mut io::stdout(), root, Default::default()).expect("serialization failed") } fn css_rules_to_string(rules: Vec) -> String { let mut sanitized_css = String::new(); for rule in rules { match rule { CssRule::StyleRule(style_rule) => { sanitized_css += &style_rule.selectors.trim(); sanitized_css += " {\n"; for declaration in style_rule.declarations.into_iter() { let declaration_string = &declaration.to_string(); if CSS_PROPERTIES.contains(&CssProperty::from(declaration.property)) { sanitized_css += " "; sanitized_css += declaration_string; sanitized_css += " "; } } sanitized_css += "\n}"; } CssRule::AtRule(at_rule) => { dbg!(&at_rule); if CSS_AT_RULES.contains(&CssAtRule::from(at_rule.name.clone())) { sanitized_css += &format!("@{} ", &at_rule.name); sanitized_css += &at_rule.prelude.trim(); if let Some(block) = at_rule.block { sanitized_css += " {\n"; sanitized_css += &css_rules_to_string(block); sanitized_css += "\n}"; } } } } sanitized_css += "\n"; } sanitized_css.trim().to_string() } // TODO: make separate rich and plain transformers // DONE: add whitelist of tags, remove any not in it // DONE: add whitelist of attributes, remove any not in it // DONE: add map of tags to attributes, remove any on tag not in the mapped value // DONE: add whitelist of url schemes, parse urls and remove any not in it // DONE: strip comments // DONE: parse style tags and attributes // DONE: add whitelist of CSS properties, remove any not in it // TODO: scope selectors in rich formatter // TODO: add class attributes to elements in rich formatter // TODO: separate this out into multiple separate transformers // TODO: find a way to avoid passing the arena to transformer functions. It's an implementation // detail that doesn't need to be exposed. Also, it's only needed for creating new elements. fn sanitize_style_tag_css<'arena>(node: Ref<'arena>, _: Arena<'arena>) { if let NodeData::Text { ref contents } = node.data { // TODO: seems rather expensive to lookup the parent on every Text node. Better // solution would be to pass some sort of context from the parent that marks that this // Text node is inside a