Remove debug printing, add allow_doctype config
This commit is contained in:
parent
caca121bef
commit
5f57b390e2
@ -5,6 +5,7 @@ use crate::sanitizer::SanitizerConfig;
|
|||||||
lazy_static! {
|
lazy_static! {
|
||||||
pub static ref DEFAULT_CONFIG: SanitizerConfig = SanitizerConfig {
|
pub static ref DEFAULT_CONFIG: SanitizerConfig = SanitizerConfig {
|
||||||
allow_comments: false,
|
allow_comments: false,
|
||||||
|
allow_doctype: false,
|
||||||
allowed_elements: HashSet::new(),
|
allowed_elements: HashSet::new(),
|
||||||
allowed_attributes: HashSet::new(),
|
allowed_attributes: HashSet::new(),
|
||||||
allowed_attributes_per_element: HashMap::new(),
|
allowed_attributes_per_element: HashMap::new(),
|
||||||
|
@ -59,8 +59,6 @@ impl<'i> AtRuleParser<'i> for CssParser {
|
|||||||
name: CowRcStr<'i>,
|
name: CowRcStr<'i>,
|
||||||
input: &mut Parser<'i, 't>,
|
input: &mut Parser<'i, 't>,
|
||||||
) -> Result<AtRuleType<Self::PreludeNoBlock, Self::PreludeBlock>, CssParseError<'i>> {
|
) -> Result<AtRuleType<Self::PreludeNoBlock, Self::PreludeBlock>, CssParseError<'i>> {
|
||||||
// let position = input.position();
|
|
||||||
// while input.next_including_whitespace_and_comments().is_ok() {}
|
|
||||||
let mut prelude = String::new();
|
let mut prelude = String::new();
|
||||||
let mut previous_token = TokenSerializationType::nothing();
|
let mut previous_token = TokenSerializationType::nothing();
|
||||||
while let Ok(token) = input.next_including_whitespace_and_comments() {
|
while let Ok(token) = input.next_including_whitespace_and_comments() {
|
||||||
@ -70,7 +68,6 @@ impl<'i> AtRuleParser<'i> for CssParser {
|
|||||||
}
|
}
|
||||||
previous_token = token_type;
|
previous_token = token_type;
|
||||||
token.to_css(&mut prelude).unwrap();
|
token.to_css(&mut prelude).unwrap();
|
||||||
// TODO: do I need to handle parse_nested_block here?
|
|
||||||
}
|
}
|
||||||
match_ignore_ascii_case! { &*name,
|
match_ignore_ascii_case! { &*name,
|
||||||
"import" | "namespace" | "charset" => {
|
"import" | "namespace" | "charset" => {
|
||||||
@ -137,7 +134,6 @@ impl<'i> QualifiedRuleParser<'i> for CssParser {
|
|||||||
&mut self,
|
&mut self,
|
||||||
input: &mut Parser<'i, 't>,
|
input: &mut Parser<'i, 't>,
|
||||||
) -> Result<Self::Prelude, CssParseError<'i>> {
|
) -> Result<Self::Prelude, CssParseError<'i>> {
|
||||||
// let position = input.position();
|
|
||||||
let mut prelude = String::new();
|
let mut prelude = String::new();
|
||||||
let mut previous_token = TokenSerializationType::nothing();
|
let mut previous_token = TokenSerializationType::nothing();
|
||||||
while let Ok(token) = input.next_including_whitespace_and_comments() {
|
while let Ok(token) = input.next_including_whitespace_and_comments() {
|
||||||
@ -147,7 +143,6 @@ impl<'i> QualifiedRuleParser<'i> for CssParser {
|
|||||||
}
|
}
|
||||||
previous_token = token_type;
|
previous_token = token_type;
|
||||||
token.to_css(&mut prelude).unwrap();
|
token.to_css(&mut prelude).unwrap();
|
||||||
// TODO: do I need to handle parse_nested_block here?
|
|
||||||
}
|
}
|
||||||
Ok(prelude)
|
Ok(prelude)
|
||||||
}
|
}
|
||||||
@ -206,7 +201,6 @@ impl<'i> DeclarationParser<'i> for CssDeclarationParser {
|
|||||||
name: CowRcStr<'i>,
|
name: CowRcStr<'i>,
|
||||||
input: &mut Parser<'i, 't>,
|
input: &mut Parser<'i, 't>,
|
||||||
) -> Result<Self::Declaration, ParseError<'i, CssError>> {
|
) -> Result<Self::Declaration, ParseError<'i, CssError>> {
|
||||||
// let start = input.position();
|
|
||||||
let mut value = String::new();
|
let mut value = String::new();
|
||||||
let mut previous_token = TokenSerializationType::nothing();
|
let mut previous_token = TokenSerializationType::nothing();
|
||||||
while let Ok(token) = input.next_including_whitespace_and_comments() {
|
while let Ok(token) = input.next_including_whitespace_and_comments() {
|
||||||
@ -216,10 +210,7 @@ impl<'i> DeclarationParser<'i> for CssDeclarationParser {
|
|||||||
}
|
}
|
||||||
previous_token = token_type;
|
previous_token = token_type;
|
||||||
token.to_css(&mut value).unwrap();
|
token.to_css(&mut value).unwrap();
|
||||||
// TODO: do I need to handle parse_nested_block here?
|
|
||||||
}
|
}
|
||||||
// input.next_including_whitespace_and_comments()?;
|
|
||||||
// let value = input.slice_from(start);
|
|
||||||
|
|
||||||
Ok(vec![CssDeclaration {
|
Ok(vec![CssDeclaration {
|
||||||
property: name.to_string(),
|
property: name.to_string(),
|
||||||
|
@ -10,7 +10,6 @@ use crate::arena_dom::{Arena, Node, NodeData, Ref, Sink};
|
|||||||
use crate::css_at_rule::CssAtRule;
|
use crate::css_at_rule::CssAtRule;
|
||||||
use crate::css_parser::{parse_css_style_attribute, parse_css_stylesheet, CssRule};
|
use crate::css_parser::{parse_css_style_attribute, parse_css_stylesheet, CssRule};
|
||||||
use crate::css_property::CssProperty;
|
use crate::css_property::CssProperty;
|
||||||
use crate::css_token_parser::parse_and_serialize;
|
|
||||||
|
|
||||||
pub struct Sanitizer<'arena> {
|
pub struct Sanitizer<'arena> {
|
||||||
arena: typed_arena::Arena<Node<'arena>>,
|
arena: typed_arena::Arena<Node<'arena>>,
|
||||||
@ -21,6 +20,7 @@ pub struct Sanitizer<'arena> {
|
|||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct SanitizerConfig {
|
pub struct SanitizerConfig {
|
||||||
pub allow_comments: bool,
|
pub allow_comments: bool,
|
||||||
|
pub allow_doctype: bool,
|
||||||
pub allowed_elements: HashSet<LocalName>,
|
pub allowed_elements: HashSet<LocalName>,
|
||||||
pub allowed_attributes: HashSet<LocalName>,
|
pub allowed_attributes: HashSet<LocalName>,
|
||||||
pub allowed_attributes_per_element: HashMap<LocalName, HashSet<LocalName>>,
|
pub allowed_attributes_per_element: HashMap<LocalName, HashSet<LocalName>>,
|
||||||
@ -103,29 +103,22 @@ impl<'arena> Sanitizer<'arena> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn traverse(&'arena self, node: Ref<'arena>) {
|
fn traverse(&'arena self, node: Ref<'arena>) {
|
||||||
println!("{}", &node);
|
|
||||||
if self.should_unwrap_node(node) {
|
if self.should_unwrap_node(node) {
|
||||||
let sibling = node.next_sibling.get();
|
let sibling = node.next_sibling.get();
|
||||||
|
|
||||||
println!("unwrapping node");
|
|
||||||
if self.should_remove_contents_when_unwrapped(node) {
|
if self.should_remove_contents_when_unwrapped(node) {
|
||||||
println!("detaching node");
|
|
||||||
node.detach();
|
node.detach();
|
||||||
println!("post-detach: {}", &node);
|
|
||||||
} else if let Some(unwrapped_node) = node.unwrap() {
|
} else if let Some(unwrapped_node) = node.unwrap() {
|
||||||
println!("traversing unwrapped node");
|
|
||||||
self.traverse(unwrapped_node);
|
self.traverse(unwrapped_node);
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(sibling) = sibling {
|
if let Some(sibling) = sibling {
|
||||||
println!("traversing sibling");
|
|
||||||
self.traverse(sibling);
|
self.traverse(sibling);
|
||||||
}
|
}
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
println!("TRANSFORMING: {}", &node);
|
|
||||||
self.remove_attributes(node);
|
self.remove_attributes(node);
|
||||||
self.add_attributes(node);
|
self.add_attributes(node);
|
||||||
self.sanitize_attribute_protocols(node);
|
self.sanitize_attribute_protocols(node);
|
||||||
@ -138,23 +131,21 @@ impl<'arena> Sanitizer<'arena> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if let Some(child) = node.first_child.get() {
|
if let Some(child) = node.first_child.get() {
|
||||||
println!("traversing child");
|
|
||||||
self.traverse(child);
|
self.traverse(child);
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(sibling) = node.next_sibling.get() {
|
if let Some(sibling) = node.next_sibling.get() {
|
||||||
println!("traversing sibling");
|
|
||||||
self.traverse(sibling);
|
self.traverse(sibling);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn should_unwrap_node(&self, node: Ref) -> bool {
|
fn should_unwrap_node(&self, node: Ref) -> bool {
|
||||||
match node.data {
|
match node.data {
|
||||||
NodeData::Document
|
NodeData::Document | NodeData::Text { .. } | NodeData::ProcessingInstruction { .. } => {
|
||||||
| NodeData::Doctype { .. }
|
false
|
||||||
| NodeData::Text { .. }
|
}
|
||||||
| NodeData::ProcessingInstruction { .. } => false,
|
|
||||||
NodeData::Comment { .. } => !self.config.allow_comments,
|
NodeData::Comment { .. } => !self.config.allow_comments,
|
||||||
|
NodeData::Doctype { .. } => !self.config.allow_doctype,
|
||||||
NodeData::Element { ref name, .. } => {
|
NodeData::Element { ref name, .. } => {
|
||||||
!self.config.allowed_elements.contains(&name.local)
|
!self.config.allowed_elements.contains(&name.local)
|
||||||
}
|
}
|
||||||
@ -354,19 +345,6 @@ impl<'arena> Sanitizer<'arena> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn serialize_css_test(&self, node: Ref<'arena>) {
|
|
||||||
if let NodeData::Text { ref contents } = node.data {
|
|
||||||
if let Some(parent) = node.parent.get() {
|
|
||||||
if let NodeData::Element { ref name, .. } = parent.data {
|
|
||||||
if name.local == local_name!("style") {
|
|
||||||
let mut serialized_css = String::new();
|
|
||||||
parse_and_serialize(contents.borrow(), &mut serialized_css, self.config);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
@ -399,6 +377,7 @@ mod test {
|
|||||||
lazy_static! {
|
lazy_static! {
|
||||||
static ref EMPTY_CONFIG: SanitizerConfig = SanitizerConfig {
|
static ref EMPTY_CONFIG: SanitizerConfig = SanitizerConfig {
|
||||||
allow_comments: false,
|
allow_comments: false,
|
||||||
|
allow_doctype: false,
|
||||||
allowed_elements: HashSet::new(),
|
allowed_elements: HashSet::new(),
|
||||||
allowed_attributes: HashSet::new(),
|
allowed_attributes: HashSet::new(),
|
||||||
allowed_attributes_per_element: HashMap::new(),
|
allowed_attributes_per_element: HashMap::new(),
|
||||||
@ -438,6 +417,25 @@ mod test {
|
|||||||
assert_eq!(str::from_utf8(&output).unwrap(), "<html><div></div></html>");
|
assert_eq!(str::from_utf8(&output).unwrap(), "<html><div></div></html>");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn allow_html_comments() {
|
||||||
|
let mut allow_comments_config = EMPTY_CONFIG.clone();
|
||||||
|
allow_comments_config.allow_comments = true;
|
||||||
|
allow_comments_config
|
||||||
|
.allowed_elements
|
||||||
|
.extend(vec![local_name!("html"), local_name!("div")]);
|
||||||
|
let sanitizer = Sanitizer::new(&allow_comments_config, vec![]);
|
||||||
|
let mut mock_data = MockRead::new("<div><!-- keep me --></div>");
|
||||||
|
let mut output = vec![];
|
||||||
|
sanitizer
|
||||||
|
.sanitize_fragment(&mut mock_data, &mut output)
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
str::from_utf8(&output).unwrap(),
|
||||||
|
"<html><div><!-- keep me --></div></html>"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn remove_script_elements() {
|
fn remove_script_elements() {
|
||||||
let mut disallow_script_config = EMPTY_CONFIG.clone();
|
let mut disallow_script_config = EMPTY_CONFIG.clone();
|
||||||
@ -744,4 +742,39 @@ mod test {
|
|||||||
"<html><div style=\"margin: 10px; color: red;\"></div></html>"
|
"<html><div style=\"margin: 10px; color: red;\"></div></html>"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn remove_doctype() {
|
||||||
|
let mut disallow_doctype_config = EMPTY_CONFIG.clone();
|
||||||
|
disallow_doctype_config.allow_doctype = false;
|
||||||
|
disallow_doctype_config
|
||||||
|
.allowed_elements
|
||||||
|
.extend(vec![local_name!("html"), local_name!("div")]);
|
||||||
|
let sanitizer = Sanitizer::new(&disallow_doctype_config, vec![]);
|
||||||
|
let mut mock_data = MockRead::new("<!DOCTYPE html><div></div>");
|
||||||
|
let mut output = vec![];
|
||||||
|
sanitizer
|
||||||
|
.sanitize_document(&mut mock_data, &mut output)
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(str::from_utf8(&output).unwrap(), "<html><div></div></html>");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn allow_doctype() {
|
||||||
|
let mut allow_doctype_config = EMPTY_CONFIG.clone();
|
||||||
|
allow_doctype_config.allow_doctype = true;
|
||||||
|
allow_doctype_config
|
||||||
|
.allowed_elements
|
||||||
|
.extend(vec![local_name!("html"), local_name!("div")]);
|
||||||
|
let sanitizer = Sanitizer::new(&allow_doctype_config, vec![]);
|
||||||
|
let mut mock_data = MockRead::new("<!DOCTYPE html><div></div>");
|
||||||
|
let mut output = vec![];
|
||||||
|
sanitizer
|
||||||
|
.sanitize_document(&mut mock_data, &mut output)
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
str::from_utf8(&output).unwrap(),
|
||||||
|
"<!DOCTYPE html><html><div></div></html>"
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user