Remove debug printing, add allow_doctype config
This commit is contained in:
parent
caca121bef
commit
5f57b390e2
@ -5,6 +5,7 @@ use crate::sanitizer::SanitizerConfig;
|
||||
lazy_static! {
|
||||
pub static ref DEFAULT_CONFIG: SanitizerConfig = SanitizerConfig {
|
||||
allow_comments: false,
|
||||
allow_doctype: false,
|
||||
allowed_elements: HashSet::new(),
|
||||
allowed_attributes: HashSet::new(),
|
||||
allowed_attributes_per_element: HashMap::new(),
|
||||
|
@ -59,8 +59,6 @@ impl<'i> AtRuleParser<'i> for CssParser {
|
||||
name: CowRcStr<'i>,
|
||||
input: &mut Parser<'i, 't>,
|
||||
) -> Result<AtRuleType<Self::PreludeNoBlock, Self::PreludeBlock>, CssParseError<'i>> {
|
||||
// let position = input.position();
|
||||
// while input.next_including_whitespace_and_comments().is_ok() {}
|
||||
let mut prelude = String::new();
|
||||
let mut previous_token = TokenSerializationType::nothing();
|
||||
while let Ok(token) = input.next_including_whitespace_and_comments() {
|
||||
@ -70,7 +68,6 @@ impl<'i> AtRuleParser<'i> for CssParser {
|
||||
}
|
||||
previous_token = token_type;
|
||||
token.to_css(&mut prelude).unwrap();
|
||||
// TODO: do I need to handle parse_nested_block here?
|
||||
}
|
||||
match_ignore_ascii_case! { &*name,
|
||||
"import" | "namespace" | "charset" => {
|
||||
@ -137,7 +134,6 @@ impl<'i> QualifiedRuleParser<'i> for CssParser {
|
||||
&mut self,
|
||||
input: &mut Parser<'i, 't>,
|
||||
) -> Result<Self::Prelude, CssParseError<'i>> {
|
||||
// let position = input.position();
|
||||
let mut prelude = String::new();
|
||||
let mut previous_token = TokenSerializationType::nothing();
|
||||
while let Ok(token) = input.next_including_whitespace_and_comments() {
|
||||
@ -147,7 +143,6 @@ impl<'i> QualifiedRuleParser<'i> for CssParser {
|
||||
}
|
||||
previous_token = token_type;
|
||||
token.to_css(&mut prelude).unwrap();
|
||||
// TODO: do I need to handle parse_nested_block here?
|
||||
}
|
||||
Ok(prelude)
|
||||
}
|
||||
@ -206,7 +201,6 @@ impl<'i> DeclarationParser<'i> for CssDeclarationParser {
|
||||
name: CowRcStr<'i>,
|
||||
input: &mut Parser<'i, 't>,
|
||||
) -> Result<Self::Declaration, ParseError<'i, CssError>> {
|
||||
// let start = input.position();
|
||||
let mut value = String::new();
|
||||
let mut previous_token = TokenSerializationType::nothing();
|
||||
while let Ok(token) = input.next_including_whitespace_and_comments() {
|
||||
@ -216,10 +210,7 @@ impl<'i> DeclarationParser<'i> for CssDeclarationParser {
|
||||
}
|
||||
previous_token = token_type;
|
||||
token.to_css(&mut value).unwrap();
|
||||
// TODO: do I need to handle parse_nested_block here?
|
||||
}
|
||||
// input.next_including_whitespace_and_comments()?;
|
||||
// let value = input.slice_from(start);
|
||||
|
||||
Ok(vec![CssDeclaration {
|
||||
property: name.to_string(),
|
||||
|
@ -10,7 +10,6 @@ use crate::arena_dom::{Arena, Node, NodeData, Ref, Sink};
|
||||
use crate::css_at_rule::CssAtRule;
|
||||
use crate::css_parser::{parse_css_style_attribute, parse_css_stylesheet, CssRule};
|
||||
use crate::css_property::CssProperty;
|
||||
use crate::css_token_parser::parse_and_serialize;
|
||||
|
||||
pub struct Sanitizer<'arena> {
|
||||
arena: typed_arena::Arena<Node<'arena>>,
|
||||
@ -21,6 +20,7 @@ pub struct Sanitizer<'arena> {
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct SanitizerConfig {
|
||||
pub allow_comments: bool,
|
||||
pub allow_doctype: bool,
|
||||
pub allowed_elements: HashSet<LocalName>,
|
||||
pub allowed_attributes: HashSet<LocalName>,
|
||||
pub allowed_attributes_per_element: HashMap<LocalName, HashSet<LocalName>>,
|
||||
@ -103,29 +103,22 @@ impl<'arena> Sanitizer<'arena> {
|
||||
}
|
||||
|
||||
fn traverse(&'arena self, node: Ref<'arena>) {
|
||||
println!("{}", &node);
|
||||
if self.should_unwrap_node(node) {
|
||||
let sibling = node.next_sibling.get();
|
||||
|
||||
println!("unwrapping node");
|
||||
if self.should_remove_contents_when_unwrapped(node) {
|
||||
println!("detaching node");
|
||||
node.detach();
|
||||
println!("post-detach: {}", &node);
|
||||
} else if let Some(unwrapped_node) = node.unwrap() {
|
||||
println!("traversing unwrapped node");
|
||||
self.traverse(unwrapped_node);
|
||||
}
|
||||
|
||||
if let Some(sibling) = sibling {
|
||||
println!("traversing sibling");
|
||||
self.traverse(sibling);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
println!("TRANSFORMING: {}", &node);
|
||||
self.remove_attributes(node);
|
||||
self.add_attributes(node);
|
||||
self.sanitize_attribute_protocols(node);
|
||||
@ -138,23 +131,21 @@ impl<'arena> Sanitizer<'arena> {
|
||||
}
|
||||
|
||||
if let Some(child) = node.first_child.get() {
|
||||
println!("traversing child");
|
||||
self.traverse(child);
|
||||
}
|
||||
|
||||
if let Some(sibling) = node.next_sibling.get() {
|
||||
println!("traversing sibling");
|
||||
self.traverse(sibling);
|
||||
}
|
||||
}
|
||||
|
||||
fn should_unwrap_node(&self, node: Ref) -> bool {
|
||||
match node.data {
|
||||
NodeData::Document
|
||||
| NodeData::Doctype { .. }
|
||||
| NodeData::Text { .. }
|
||||
| NodeData::ProcessingInstruction { .. } => false,
|
||||
NodeData::Document | NodeData::Text { .. } | NodeData::ProcessingInstruction { .. } => {
|
||||
false
|
||||
}
|
||||
NodeData::Comment { .. } => !self.config.allow_comments,
|
||||
NodeData::Doctype { .. } => !self.config.allow_doctype,
|
||||
NodeData::Element { ref name, .. } => {
|
||||
!self.config.allowed_elements.contains(&name.local)
|
||||
}
|
||||
@ -354,19 +345,6 @@ impl<'arena> Sanitizer<'arena> {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn serialize_css_test(&self, node: Ref<'arena>) {
|
||||
if let NodeData::Text { ref contents } = node.data {
|
||||
if let Some(parent) = node.parent.get() {
|
||||
if let NodeData::Element { ref name, .. } = parent.data {
|
||||
if name.local == local_name!("style") {
|
||||
let mut serialized_css = String::new();
|
||||
parse_and_serialize(contents.borrow(), &mut serialized_css, self.config);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@ -399,6 +377,7 @@ mod test {
|
||||
lazy_static! {
|
||||
static ref EMPTY_CONFIG: SanitizerConfig = SanitizerConfig {
|
||||
allow_comments: false,
|
||||
allow_doctype: false,
|
||||
allowed_elements: HashSet::new(),
|
||||
allowed_attributes: HashSet::new(),
|
||||
allowed_attributes_per_element: HashMap::new(),
|
||||
@ -438,6 +417,25 @@ mod test {
|
||||
assert_eq!(str::from_utf8(&output).unwrap(), "<html><div></div></html>");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn allow_html_comments() {
|
||||
let mut allow_comments_config = EMPTY_CONFIG.clone();
|
||||
allow_comments_config.allow_comments = true;
|
||||
allow_comments_config
|
||||
.allowed_elements
|
||||
.extend(vec![local_name!("html"), local_name!("div")]);
|
||||
let sanitizer = Sanitizer::new(&allow_comments_config, vec![]);
|
||||
let mut mock_data = MockRead::new("<div><!-- keep me --></div>");
|
||||
let mut output = vec![];
|
||||
sanitizer
|
||||
.sanitize_fragment(&mut mock_data, &mut output)
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
str::from_utf8(&output).unwrap(),
|
||||
"<html><div><!-- keep me --></div></html>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn remove_script_elements() {
|
||||
let mut disallow_script_config = EMPTY_CONFIG.clone();
|
||||
@ -744,4 +742,39 @@ mod test {
|
||||
"<html><div style=\"margin: 10px; color: red;\"></div></html>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn remove_doctype() {
|
||||
let mut disallow_doctype_config = EMPTY_CONFIG.clone();
|
||||
disallow_doctype_config.allow_doctype = false;
|
||||
disallow_doctype_config
|
||||
.allowed_elements
|
||||
.extend(vec![local_name!("html"), local_name!("div")]);
|
||||
let sanitizer = Sanitizer::new(&disallow_doctype_config, vec![]);
|
||||
let mut mock_data = MockRead::new("<!DOCTYPE html><div></div>");
|
||||
let mut output = vec![];
|
||||
sanitizer
|
||||
.sanitize_document(&mut mock_data, &mut output)
|
||||
.unwrap();
|
||||
assert_eq!(str::from_utf8(&output).unwrap(), "<html><div></div></html>");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn allow_doctype() {
|
||||
let mut allow_doctype_config = EMPTY_CONFIG.clone();
|
||||
allow_doctype_config.allow_doctype = true;
|
||||
allow_doctype_config
|
||||
.allowed_elements
|
||||
.extend(vec![local_name!("html"), local_name!("div")]);
|
||||
let sanitizer = Sanitizer::new(&allow_doctype_config, vec![]);
|
||||
let mut mock_data = MockRead::new("<!DOCTYPE html><div></div>");
|
||||
let mut output = vec![];
|
||||
sanitizer
|
||||
.sanitize_document(&mut mock_data, &mut output)
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
str::from_utf8(&output).unwrap(),
|
||||
"<!DOCTYPE html><html><div></div></html>"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user