Remove debug printing, add allow_doctype config

This commit is contained in:
Tyler Hallada 2020-04-25 19:20:23 -04:00
parent caca121bef
commit 5f57b390e2
3 changed files with 61 additions and 36 deletions

View File

@ -5,6 +5,7 @@ use crate::sanitizer::SanitizerConfig;
lazy_static! { lazy_static! {
pub static ref DEFAULT_CONFIG: SanitizerConfig = SanitizerConfig { pub static ref DEFAULT_CONFIG: SanitizerConfig = SanitizerConfig {
allow_comments: false, allow_comments: false,
allow_doctype: false,
allowed_elements: HashSet::new(), allowed_elements: HashSet::new(),
allowed_attributes: HashSet::new(), allowed_attributes: HashSet::new(),
allowed_attributes_per_element: HashMap::new(), allowed_attributes_per_element: HashMap::new(),

View File

@ -59,8 +59,6 @@ impl<'i> AtRuleParser<'i> for CssParser {
name: CowRcStr<'i>, name: CowRcStr<'i>,
input: &mut Parser<'i, 't>, input: &mut Parser<'i, 't>,
) -> Result<AtRuleType<Self::PreludeNoBlock, Self::PreludeBlock>, CssParseError<'i>> { ) -> Result<AtRuleType<Self::PreludeNoBlock, Self::PreludeBlock>, CssParseError<'i>> {
// let position = input.position();
// while input.next_including_whitespace_and_comments().is_ok() {}
let mut prelude = String::new(); let mut prelude = String::new();
let mut previous_token = TokenSerializationType::nothing(); let mut previous_token = TokenSerializationType::nothing();
while let Ok(token) = input.next_including_whitespace_and_comments() { while let Ok(token) = input.next_including_whitespace_and_comments() {
@ -70,7 +68,6 @@ impl<'i> AtRuleParser<'i> for CssParser {
} }
previous_token = token_type; previous_token = token_type;
token.to_css(&mut prelude).unwrap(); token.to_css(&mut prelude).unwrap();
// TODO: do I need to handle parse_nested_block here?
} }
match_ignore_ascii_case! { &*name, match_ignore_ascii_case! { &*name,
"import" | "namespace" | "charset" => { "import" | "namespace" | "charset" => {
@ -137,7 +134,6 @@ impl<'i> QualifiedRuleParser<'i> for CssParser {
&mut self, &mut self,
input: &mut Parser<'i, 't>, input: &mut Parser<'i, 't>,
) -> Result<Self::Prelude, CssParseError<'i>> { ) -> Result<Self::Prelude, CssParseError<'i>> {
// let position = input.position();
let mut prelude = String::new(); let mut prelude = String::new();
let mut previous_token = TokenSerializationType::nothing(); let mut previous_token = TokenSerializationType::nothing();
while let Ok(token) = input.next_including_whitespace_and_comments() { while let Ok(token) = input.next_including_whitespace_and_comments() {
@ -147,7 +143,6 @@ impl<'i> QualifiedRuleParser<'i> for CssParser {
} }
previous_token = token_type; previous_token = token_type;
token.to_css(&mut prelude).unwrap(); token.to_css(&mut prelude).unwrap();
// TODO: do I need to handle parse_nested_block here?
} }
Ok(prelude) Ok(prelude)
} }
@ -206,7 +201,6 @@ impl<'i> DeclarationParser<'i> for CssDeclarationParser {
name: CowRcStr<'i>, name: CowRcStr<'i>,
input: &mut Parser<'i, 't>, input: &mut Parser<'i, 't>,
) -> Result<Self::Declaration, ParseError<'i, CssError>> { ) -> Result<Self::Declaration, ParseError<'i, CssError>> {
// let start = input.position();
let mut value = String::new(); let mut value = String::new();
let mut previous_token = TokenSerializationType::nothing(); let mut previous_token = TokenSerializationType::nothing();
while let Ok(token) = input.next_including_whitespace_and_comments() { while let Ok(token) = input.next_including_whitespace_and_comments() {
@ -216,10 +210,7 @@ impl<'i> DeclarationParser<'i> for CssDeclarationParser {
} }
previous_token = token_type; previous_token = token_type;
token.to_css(&mut value).unwrap(); token.to_css(&mut value).unwrap();
// TODO: do I need to handle parse_nested_block here?
} }
// input.next_including_whitespace_and_comments()?;
// let value = input.slice_from(start);
Ok(vec![CssDeclaration { Ok(vec![CssDeclaration {
property: name.to_string(), property: name.to_string(),

View File

@ -10,7 +10,6 @@ use crate::arena_dom::{Arena, Node, NodeData, Ref, Sink};
use crate::css_at_rule::CssAtRule; use crate::css_at_rule::CssAtRule;
use crate::css_parser::{parse_css_style_attribute, parse_css_stylesheet, CssRule}; use crate::css_parser::{parse_css_style_attribute, parse_css_stylesheet, CssRule};
use crate::css_property::CssProperty; use crate::css_property::CssProperty;
use crate::css_token_parser::parse_and_serialize;
pub struct Sanitizer<'arena> { pub struct Sanitizer<'arena> {
arena: typed_arena::Arena<Node<'arena>>, arena: typed_arena::Arena<Node<'arena>>,
@ -21,6 +20,7 @@ pub struct Sanitizer<'arena> {
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct SanitizerConfig { pub struct SanitizerConfig {
pub allow_comments: bool, pub allow_comments: bool,
pub allow_doctype: bool,
pub allowed_elements: HashSet<LocalName>, pub allowed_elements: HashSet<LocalName>,
pub allowed_attributes: HashSet<LocalName>, pub allowed_attributes: HashSet<LocalName>,
pub allowed_attributes_per_element: HashMap<LocalName, HashSet<LocalName>>, pub allowed_attributes_per_element: HashMap<LocalName, HashSet<LocalName>>,
@ -103,29 +103,22 @@ impl<'arena> Sanitizer<'arena> {
} }
fn traverse(&'arena self, node: Ref<'arena>) { fn traverse(&'arena self, node: Ref<'arena>) {
println!("{}", &node);
if self.should_unwrap_node(node) { if self.should_unwrap_node(node) {
let sibling = node.next_sibling.get(); let sibling = node.next_sibling.get();
println!("unwrapping node");
if self.should_remove_contents_when_unwrapped(node) { if self.should_remove_contents_when_unwrapped(node) {
println!("detaching node");
node.detach(); node.detach();
println!("post-detach: {}", &node);
} else if let Some(unwrapped_node) = node.unwrap() { } else if let Some(unwrapped_node) = node.unwrap() {
println!("traversing unwrapped node");
self.traverse(unwrapped_node); self.traverse(unwrapped_node);
} }
if let Some(sibling) = sibling { if let Some(sibling) = sibling {
println!("traversing sibling");
self.traverse(sibling); self.traverse(sibling);
} }
return; return;
} }
println!("TRANSFORMING: {}", &node);
self.remove_attributes(node); self.remove_attributes(node);
self.add_attributes(node); self.add_attributes(node);
self.sanitize_attribute_protocols(node); self.sanitize_attribute_protocols(node);
@ -138,23 +131,21 @@ impl<'arena> Sanitizer<'arena> {
} }
if let Some(child) = node.first_child.get() { if let Some(child) = node.first_child.get() {
println!("traversing child");
self.traverse(child); self.traverse(child);
} }
if let Some(sibling) = node.next_sibling.get() { if let Some(sibling) = node.next_sibling.get() {
println!("traversing sibling");
self.traverse(sibling); self.traverse(sibling);
} }
} }
fn should_unwrap_node(&self, node: Ref) -> bool { fn should_unwrap_node(&self, node: Ref) -> bool {
match node.data { match node.data {
NodeData::Document NodeData::Document | NodeData::Text { .. } | NodeData::ProcessingInstruction { .. } => {
| NodeData::Doctype { .. } false
| NodeData::Text { .. } }
| NodeData::ProcessingInstruction { .. } => false,
NodeData::Comment { .. } => !self.config.allow_comments, NodeData::Comment { .. } => !self.config.allow_comments,
NodeData::Doctype { .. } => !self.config.allow_doctype,
NodeData::Element { ref name, .. } => { NodeData::Element { ref name, .. } => {
!self.config.allowed_elements.contains(&name.local) !self.config.allowed_elements.contains(&name.local)
} }
@ -354,19 +345,6 @@ impl<'arena> Sanitizer<'arena> {
} }
} }
} }
fn serialize_css_test(&self, node: Ref<'arena>) {
if let NodeData::Text { ref contents } = node.data {
if let Some(parent) = node.parent.get() {
if let NodeData::Element { ref name, .. } = parent.data {
if name.local == local_name!("style") {
let mut serialized_css = String::new();
parse_and_serialize(contents.borrow(), &mut serialized_css, self.config);
}
}
}
}
}
} }
#[cfg(test)] #[cfg(test)]
@ -399,6 +377,7 @@ mod test {
lazy_static! { lazy_static! {
static ref EMPTY_CONFIG: SanitizerConfig = SanitizerConfig { static ref EMPTY_CONFIG: SanitizerConfig = SanitizerConfig {
allow_comments: false, allow_comments: false,
allow_doctype: false,
allowed_elements: HashSet::new(), allowed_elements: HashSet::new(),
allowed_attributes: HashSet::new(), allowed_attributes: HashSet::new(),
allowed_attributes_per_element: HashMap::new(), allowed_attributes_per_element: HashMap::new(),
@ -438,6 +417,25 @@ mod test {
assert_eq!(str::from_utf8(&output).unwrap(), "<html><div></div></html>"); assert_eq!(str::from_utf8(&output).unwrap(), "<html><div></div></html>");
} }
#[test]
fn allow_html_comments() {
let mut allow_comments_config = EMPTY_CONFIG.clone();
allow_comments_config.allow_comments = true;
allow_comments_config
.allowed_elements
.extend(vec![local_name!("html"), local_name!("div")]);
let sanitizer = Sanitizer::new(&allow_comments_config, vec![]);
let mut mock_data = MockRead::new("<div><!-- keep me --></div>");
let mut output = vec![];
sanitizer
.sanitize_fragment(&mut mock_data, &mut output)
.unwrap();
assert_eq!(
str::from_utf8(&output).unwrap(),
"<html><div><!-- keep me --></div></html>"
);
}
#[test] #[test]
fn remove_script_elements() { fn remove_script_elements() {
let mut disallow_script_config = EMPTY_CONFIG.clone(); let mut disallow_script_config = EMPTY_CONFIG.clone();
@ -744,4 +742,39 @@ mod test {
"<html><div style=\"margin: 10px; color: red;\"></div></html>" "<html><div style=\"margin: 10px; color: red;\"></div></html>"
); );
} }
#[test]
fn remove_doctype() {
let mut disallow_doctype_config = EMPTY_CONFIG.clone();
disallow_doctype_config.allow_doctype = false;
disallow_doctype_config
.allowed_elements
.extend(vec![local_name!("html"), local_name!("div")]);
let sanitizer = Sanitizer::new(&disallow_doctype_config, vec![]);
let mut mock_data = MockRead::new("<!DOCTYPE html><div></div>");
let mut output = vec![];
sanitizer
.sanitize_document(&mut mock_data, &mut output)
.unwrap();
assert_eq!(str::from_utf8(&output).unwrap(), "<html><div></div></html>");
}
#[test]
fn allow_doctype() {
let mut allow_doctype_config = EMPTY_CONFIG.clone();
allow_doctype_config.allow_doctype = true;
allow_doctype_config
.allowed_elements
.extend(vec![local_name!("html"), local_name!("div")]);
let sanitizer = Sanitizer::new(&allow_doctype_config, vec![]);
let mut mock_data = MockRead::new("<!DOCTYPE html><div></div>");
let mut output = vec![];
sanitizer
.sanitize_document(&mut mock_data, &mut output)
.unwrap();
assert_eq!(
str::from_utf8(&output).unwrap(),
"<!DOCTYPE html><html><div></div></html>"
);
}
} }