CSS sanitization and serialisation in a good place
This commit is contained in:
parent
e4316f1a6f
commit
caca121bef
@ -13,6 +13,7 @@ lazy_static! {
|
|||||||
allowed_protocols: HashMap::new(),
|
allowed_protocols: HashMap::new(),
|
||||||
allowed_css_at_rules: HashSet::new(),
|
allowed_css_at_rules: HashSet::new(),
|
||||||
allowed_css_properties: HashSet::new(),
|
allowed_css_properties: HashSet::new(),
|
||||||
|
allow_css_comments: false,
|
||||||
remove_contents_when_unwrapped: hashset! {
|
remove_contents_when_unwrapped: hashset! {
|
||||||
local_name!("iframe"),
|
local_name!("iframe"),
|
||||||
local_name!("noembed"),
|
local_name!("noembed"),
|
||||||
|
@ -35,6 +35,7 @@ pub struct CssDeclaration {
|
|||||||
pub value: String,
|
pub value: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
struct CssAtRulePrelude {
|
struct CssAtRulePrelude {
|
||||||
name: String,
|
name: String,
|
||||||
prelude: String,
|
prelude: String,
|
||||||
@ -145,7 +146,6 @@ impl<'i> QualifiedRuleParser<'i> for CssParser {
|
|||||||
prelude.push_str("/**/");
|
prelude.push_str("/**/");
|
||||||
}
|
}
|
||||||
previous_token = token_type;
|
previous_token = token_type;
|
||||||
dbg!(&token);
|
|
||||||
token.to_css(&mut prelude).unwrap();
|
token.to_css(&mut prelude).unwrap();
|
||||||
// TODO: do I need to handle parse_nested_block here?
|
// TODO: do I need to handle parse_nested_block here?
|
||||||
}
|
}
|
||||||
@ -206,7 +206,6 @@ impl<'i> DeclarationParser<'i> for CssDeclarationParser {
|
|||||||
name: CowRcStr<'i>,
|
name: CowRcStr<'i>,
|
||||||
input: &mut Parser<'i, 't>,
|
input: &mut Parser<'i, 't>,
|
||||||
) -> Result<Self::Declaration, ParseError<'i, CssError>> {
|
) -> Result<Self::Declaration, ParseError<'i, CssError>> {
|
||||||
dbg!(&name);
|
|
||||||
// let start = input.position();
|
// let start = input.position();
|
||||||
let mut value = String::new();
|
let mut value = String::new();
|
||||||
let mut previous_token = TokenSerializationType::nothing();
|
let mut previous_token = TokenSerializationType::nothing();
|
||||||
@ -216,26 +215,45 @@ impl<'i> DeclarationParser<'i> for CssDeclarationParser {
|
|||||||
value.push_str("/**/");
|
value.push_str("/**/");
|
||||||
}
|
}
|
||||||
previous_token = token_type;
|
previous_token = token_type;
|
||||||
dbg!(&token);
|
|
||||||
token.to_css(&mut value).unwrap();
|
token.to_css(&mut value).unwrap();
|
||||||
// TODO: do I need to handle parse_nested_block here?
|
// TODO: do I need to handle parse_nested_block here?
|
||||||
}
|
}
|
||||||
// input.next_including_whitespace_and_comments()?;
|
// input.next_including_whitespace_and_comments()?;
|
||||||
// let value = input.slice_from(start);
|
// let value = input.slice_from(start);
|
||||||
dbg!(&value);
|
|
||||||
|
|
||||||
Ok(vec![CssDeclaration {
|
Ok(vec![CssDeclaration {
|
||||||
property: name.to_string(),
|
property: name.to_string(),
|
||||||
value: value.to_string(),
|
value,
|
||||||
}])
|
}])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'i> AtRuleParser<'i> for CssDeclarationParser {
|
impl<'i> AtRuleParser<'i> for CssDeclarationParser {
|
||||||
type PreludeBlock = ();
|
type PreludeBlock = CssAtRulePrelude;
|
||||||
type PreludeNoBlock = ();
|
type PreludeNoBlock = CssAtRulePrelude;
|
||||||
type AtRule = Vec<CssDeclaration>;
|
type AtRule = Vec<CssDeclaration>;
|
||||||
type Error = CssError;
|
type Error = CssError;
|
||||||
|
|
||||||
|
fn parse_prelude<'t>(
|
||||||
|
&mut self,
|
||||||
|
name: CowRcStr<'i>,
|
||||||
|
input: &mut Parser<'i, 't>,
|
||||||
|
) -> Result<AtRuleType<Self::PreludeNoBlock, Self::PreludeBlock>, CssParseError<'i>> {
|
||||||
|
let mut prelude = String::new();
|
||||||
|
Ok(AtRuleType::WithBlock(CssAtRulePrelude {
|
||||||
|
name: name.to_string(),
|
||||||
|
prelude,
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_block<'t>(
|
||||||
|
&mut self,
|
||||||
|
prelude: Self::PreludeBlock,
|
||||||
|
_location: SourceLocation,
|
||||||
|
input: &mut Parser<'i, 't>,
|
||||||
|
) -> Result<Self::AtRule, CssParseError<'i>> {
|
||||||
|
Ok(vec![])
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn parse_declarations<'i>(
|
pub fn parse_declarations<'i>(
|
||||||
|
@ -1,15 +1,24 @@
|
|||||||
|
// Note: not using this parser. It would be easier to preserve whitespace in the css strings using
|
||||||
|
// this parser but it requires me to do too much of what cssparser is already doing for me
|
||||||
|
// (distinguishing between an at-rule `Ident` vs. a style rule `Ident`).
|
||||||
|
use std::borrow::Borrow;
|
||||||
use std::cell::Ref;
|
use std::cell::Ref;
|
||||||
|
|
||||||
use cssparser::{ParseError, Parser, ParserInput, ToCss, Token, TokenSerializationType};
|
use cssparser::{ParseError, Parser, ParserInput, ToCss, Token, TokenSerializationType};
|
||||||
use html5ever::tendril::StrTendril;
|
use html5ever::tendril::StrTendril;
|
||||||
|
|
||||||
|
use crate::css_property::CssProperty;
|
||||||
|
use crate::sanitizer::SanitizerConfig;
|
||||||
|
|
||||||
pub fn write_to(
|
pub fn write_to(
|
||||||
mut previous_token: TokenSerializationType,
|
mut previous_token: TokenSerializationType,
|
||||||
input: &mut Parser,
|
input: &mut Parser,
|
||||||
string: &mut String,
|
string: &mut String,
|
||||||
preserve_comments: bool,
|
config: &SanitizerConfig,
|
||||||
|
skipping_property: bool,
|
||||||
|
skipping_at_rule: bool,
|
||||||
) {
|
) {
|
||||||
while let Ok(token) = if preserve_comments {
|
while let Ok(token) = if config.allow_css_comments {
|
||||||
input
|
input
|
||||||
.next_including_whitespace_and_comments()
|
.next_including_whitespace_and_comments()
|
||||||
.map(|t| t.clone())
|
.map(|t| t.clone())
|
||||||
@ -17,10 +26,24 @@ pub fn write_to(
|
|||||||
input.next_including_whitespace().map(|t| t.clone())
|
input.next_including_whitespace().map(|t| t.clone())
|
||||||
} {
|
} {
|
||||||
let token_type = token.serialization_type();
|
let token_type = token.serialization_type();
|
||||||
if !preserve_comments && previous_token.needs_separator_when_before(token_type) {
|
let mut skipping_property = skipping_property;
|
||||||
|
let mut skipping_at_rule = skipping_at_rule;
|
||||||
|
if !config.allow_css_comments && previous_token.needs_separator_when_before(token_type) {
|
||||||
string.push_str("/**/")
|
string.push_str("/**/")
|
||||||
}
|
}
|
||||||
previous_token = token_type;
|
previous_token = token_type;
|
||||||
|
match &token {
|
||||||
|
Token::Ident(property) => {
|
||||||
|
let property_str: &str = property.borrow();
|
||||||
|
if !config
|
||||||
|
.allowed_css_properties
|
||||||
|
.contains(&CssProperty::from(property_str))
|
||||||
|
{
|
||||||
|
skipping_property = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
dbg!(&token);
|
dbg!(&token);
|
||||||
token.to_css(string).unwrap();
|
token.to_css(string).unwrap();
|
||||||
let closing_token = match token {
|
let closing_token = match token {
|
||||||
@ -31,7 +54,14 @@ pub fn write_to(
|
|||||||
};
|
};
|
||||||
if let Some(closing_token) = closing_token {
|
if let Some(closing_token) = closing_token {
|
||||||
let result: Result<_, ParseError<()>> = input.parse_nested_block(|input| {
|
let result: Result<_, ParseError<()>> = input.parse_nested_block(|input| {
|
||||||
write_to(previous_token, input, string, preserve_comments);
|
write_to(
|
||||||
|
previous_token,
|
||||||
|
input,
|
||||||
|
string,
|
||||||
|
config,
|
||||||
|
skipping_property,
|
||||||
|
skipping_at_rule,
|
||||||
|
);
|
||||||
Ok(())
|
Ok(())
|
||||||
});
|
});
|
||||||
result.unwrap();
|
result.unwrap();
|
||||||
@ -40,13 +70,15 @@ pub fn write_to(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn parse_and_serialize(input: Ref<StrTendril>, output: &mut String, preserve_comments: bool) {
|
pub fn parse_and_serialize(input: Ref<StrTendril>, output: &mut String, config: &SanitizerConfig) {
|
||||||
let mut parser_input = ParserInput::new(&input);
|
let mut parser_input = ParserInput::new(&input);
|
||||||
let parser = &mut Parser::new(&mut parser_input);
|
let parser = &mut Parser::new(&mut parser_input);
|
||||||
write_to(
|
write_to(
|
||||||
TokenSerializationType::nothing(),
|
TokenSerializationType::nothing(),
|
||||||
parser,
|
parser,
|
||||||
output,
|
output,
|
||||||
preserve_comments,
|
config,
|
||||||
|
false,
|
||||||
|
false,
|
||||||
);
|
);
|
||||||
}
|
}
|
@ -24,7 +24,6 @@ mod css_at_rule {
|
|||||||
mod arena_dom;
|
mod arena_dom;
|
||||||
mod config;
|
mod config;
|
||||||
mod css_parser;
|
mod css_parser;
|
||||||
mod css_parser_2;
|
|
||||||
mod sanitizer;
|
mod sanitizer;
|
||||||
|
|
||||||
use arena_dom::{create_element, Arena, NodeData, Ref};
|
use arena_dom::{create_element, Arena, NodeData, Ref};
|
||||||
|
@ -9,8 +9,8 @@ use html5ever::{parse_document, parse_fragment, serialize, Attribute, LocalName,
|
|||||||
use crate::arena_dom::{Arena, Node, NodeData, Ref, Sink};
|
use crate::arena_dom::{Arena, Node, NodeData, Ref, Sink};
|
||||||
use crate::css_at_rule::CssAtRule;
|
use crate::css_at_rule::CssAtRule;
|
||||||
use crate::css_parser::{parse_css_style_attribute, parse_css_stylesheet, CssRule};
|
use crate::css_parser::{parse_css_style_attribute, parse_css_stylesheet, CssRule};
|
||||||
use crate::css_parser_2::parse_and_serialize;
|
|
||||||
use crate::css_property::CssProperty;
|
use crate::css_property::CssProperty;
|
||||||
|
use crate::css_token_parser::parse_and_serialize;
|
||||||
|
|
||||||
pub struct Sanitizer<'arena> {
|
pub struct Sanitizer<'arena> {
|
||||||
arena: typed_arena::Arena<Node<'arena>>,
|
arena: typed_arena::Arena<Node<'arena>>,
|
||||||
@ -29,6 +29,7 @@ pub struct SanitizerConfig {
|
|||||||
pub allowed_protocols: HashMap<LocalName, HashMap<LocalName, HashSet<Protocol<'static>>>>,
|
pub allowed_protocols: HashMap<LocalName, HashMap<LocalName, HashSet<Protocol<'static>>>>,
|
||||||
pub allowed_css_at_rules: HashSet<CssAtRule>,
|
pub allowed_css_at_rules: HashSet<CssAtRule>,
|
||||||
pub allowed_css_properties: HashSet<CssProperty>,
|
pub allowed_css_properties: HashSet<CssProperty>,
|
||||||
|
pub allow_css_comments: bool,
|
||||||
pub remove_contents_when_unwrapped: HashSet<LocalName>,
|
pub remove_contents_when_unwrapped: HashSet<LocalName>,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -129,7 +130,7 @@ impl<'arena> Sanitizer<'arena> {
|
|||||||
self.add_attributes(node);
|
self.add_attributes(node);
|
||||||
self.sanitize_attribute_protocols(node);
|
self.sanitize_attribute_protocols(node);
|
||||||
self.sanitize_style_tag_css(node);
|
self.sanitize_style_tag_css(node);
|
||||||
// self.sanitize_style_attribute_css(node);
|
self.sanitize_style_attribute_css(node);
|
||||||
// self.serialize_css_test(node);
|
// self.serialize_css_test(node);
|
||||||
|
|
||||||
for transformer in self.transformers.iter() {
|
for transformer in self.transformers.iter() {
|
||||||
@ -241,16 +242,11 @@ impl<'arena> Sanitizer<'arena> {
|
|||||||
let attrs = &mut attrs.borrow_mut();
|
let attrs = &mut attrs.borrow_mut();
|
||||||
|
|
||||||
if let Some(protocols) = self.config.allowed_protocols.get(&name.local) {
|
if let Some(protocols) = self.config.allowed_protocols.get(&name.local) {
|
||||||
dbg!(protocols);
|
|
||||||
dbg!(&attrs);
|
|
||||||
let mut i = 0;
|
let mut i = 0;
|
||||||
while i != attrs.len() {
|
while i != attrs.len() {
|
||||||
dbg!(&attrs[i].name.local);
|
|
||||||
if let Some(allowed_protocols) = protocols.get(&attrs[i].name.local) {
|
if let Some(allowed_protocols) = protocols.get(&attrs[i].name.local) {
|
||||||
dbg!(allowed_protocols);
|
|
||||||
match Url::parse(&attrs[i].value) {
|
match Url::parse(&attrs[i].value) {
|
||||||
Ok(url) => {
|
Ok(url) => {
|
||||||
dbg!(Protocol::Scheme(url.scheme()));
|
|
||||||
if !allowed_protocols.contains(&Protocol::Scheme(url.scheme())) {
|
if !allowed_protocols.contains(&Protocol::Scheme(url.scheme())) {
|
||||||
attrs.remove(i);
|
attrs.remove(i);
|
||||||
} else {
|
} else {
|
||||||
@ -258,7 +254,6 @@ impl<'arena> Sanitizer<'arena> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
Err(ParseError::RelativeUrlWithoutBase) => {
|
Err(ParseError::RelativeUrlWithoutBase) => {
|
||||||
dbg!("relative");
|
|
||||||
if !allowed_protocols.contains(&Protocol::Relative) {
|
if !allowed_protocols.contains(&Protocol::Relative) {
|
||||||
attrs.remove(i);
|
attrs.remove(i);
|
||||||
} else {
|
} else {
|
||||||
@ -294,10 +289,9 @@ impl<'arena> Sanitizer<'arena> {
|
|||||||
sanitized_css += declaration_string;
|
sanitized_css += declaration_string;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
sanitized_css += "}";
|
sanitized_css += " }";
|
||||||
}
|
}
|
||||||
CssRule::AtRule(at_rule) => {
|
CssRule::AtRule(at_rule) => {
|
||||||
dbg!(&at_rule);
|
|
||||||
if self
|
if self
|
||||||
.config
|
.config
|
||||||
.allowed_css_at_rules
|
.allowed_css_at_rules
|
||||||
@ -308,7 +302,9 @@ impl<'arena> Sanitizer<'arena> {
|
|||||||
if let Some(block) = at_rule.block {
|
if let Some(block) = at_rule.block {
|
||||||
sanitized_css += "{";
|
sanitized_css += "{";
|
||||||
sanitized_css += &self.serialize_sanitized_css_rules(block);
|
sanitized_css += &self.serialize_sanitized_css_rules(block);
|
||||||
sanitized_css += "}";
|
sanitized_css += " }";
|
||||||
|
} else {
|
||||||
|
sanitized_css += "; ";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -326,9 +322,7 @@ impl<'arena> Sanitizer<'arena> {
|
|||||||
if let NodeData::Element { ref name, .. } = parent.data {
|
if let NodeData::Element { ref name, .. } = parent.data {
|
||||||
if name.local == local_name!("style") {
|
if name.local == local_name!("style") {
|
||||||
let rules = parse_css_stylesheet(&contents.borrow());
|
let rules = parse_css_stylesheet(&contents.borrow());
|
||||||
dbg!(&rules);
|
|
||||||
let sanitized_css = self.serialize_sanitized_css_rules(rules);
|
let sanitized_css = self.serialize_sanitized_css_rules(rules);
|
||||||
dbg!(&sanitized_css);
|
|
||||||
contents.replace(StrTendril::from(sanitized_css));
|
contents.replace(StrTendril::from(sanitized_css));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -342,7 +336,6 @@ impl<'arena> Sanitizer<'arena> {
|
|||||||
if attr.name.local == local_name!("style") {
|
if attr.name.local == local_name!("style") {
|
||||||
let css_str = &attr.value;
|
let css_str = &attr.value;
|
||||||
let declarations = parse_css_style_attribute(css_str);
|
let declarations = parse_css_style_attribute(css_str);
|
||||||
dbg!(&declarations);
|
|
||||||
let mut sanitized_css = String::new();
|
let mut sanitized_css = String::new();
|
||||||
for declaration in declarations.into_iter() {
|
for declaration in declarations.into_iter() {
|
||||||
let declaration_string = &declaration.to_string();
|
let declaration_string = &declaration.to_string();
|
||||||
@ -356,7 +349,6 @@ impl<'arena> Sanitizer<'arena> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
let sanitized_css = sanitized_css.trim();
|
let sanitized_css = sanitized_css.trim();
|
||||||
dbg!(&sanitized_css);
|
|
||||||
attr.value = StrTendril::from(sanitized_css);
|
attr.value = StrTendril::from(sanitized_css);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -369,7 +361,7 @@ impl<'arena> Sanitizer<'arena> {
|
|||||||
if let NodeData::Element { ref name, .. } = parent.data {
|
if let NodeData::Element { ref name, .. } = parent.data {
|
||||||
if name.local == local_name!("style") {
|
if name.local == local_name!("style") {
|
||||||
let mut serialized_css = String::new();
|
let mut serialized_css = String::new();
|
||||||
parse_and_serialize(contents.borrow(), &mut serialized_css, true);
|
parse_and_serialize(contents.borrow(), &mut serialized_css, self.config);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -415,6 +407,7 @@ mod test {
|
|||||||
allowed_protocols: HashMap::new(),
|
allowed_protocols: HashMap::new(),
|
||||||
allowed_css_at_rules: HashSet::new(),
|
allowed_css_at_rules: HashSet::new(),
|
||||||
allowed_css_properties: HashSet::new(),
|
allowed_css_properties: HashSet::new(),
|
||||||
|
allow_css_comments: false,
|
||||||
remove_contents_when_unwrapped: HashSet::new(),
|
remove_contents_when_unwrapped: HashSet::new(),
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@ -728,31 +721,27 @@ mod test {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn sanitize_style_tag_css() {
|
fn sanitize_style_attribute_css() {
|
||||||
let mut sanitize_css_config = EMPTY_CONFIG.clone();
|
let mut sanitize_css_config = EMPTY_CONFIG.clone();
|
||||||
sanitize_css_config
|
sanitize_css_config
|
||||||
.allowed_elements
|
.allowed_elements
|
||||||
.extend(vec![local_name!("html"), local_name!("style")]);
|
.extend(vec![local_name!("html"), local_name!("div")]);
|
||||||
|
sanitize_css_config
|
||||||
|
.allowed_attributes
|
||||||
|
.extend(vec![local_name!("style")]);
|
||||||
sanitize_css_config
|
sanitize_css_config
|
||||||
.allowed_css_properties
|
.allowed_css_properties
|
||||||
.extend(vec![css_property!("margin"), css_property!("color")]);
|
.extend(vec![css_property!("margin"), css_property!("color")]);
|
||||||
sanitize_css_config
|
|
||||||
.allowed_css_at_rules
|
|
||||||
.extend(vec![css_at_rule!("charset")]);
|
|
||||||
let sanitizer = Sanitizer::new(&sanitize_css_config, vec![]);
|
let sanitizer = Sanitizer::new(&sanitize_css_config, vec![]);
|
||||||
let mut mock_data = MockRead::new(
|
let mut mock_data =
|
||||||
"<style>@charset \"UTF-8\";\
|
MockRead::new("<div style=\"margin: 10px; padding: 10px; color: red;\"></div>");
|
||||||
div { margin: 10px; padding: 10px; color: red; }\
|
|
||||||
@media print { div { margin: 50px; } }</style>",
|
|
||||||
);
|
|
||||||
let mut output = vec![];
|
let mut output = vec![];
|
||||||
sanitizer
|
sanitizer
|
||||||
.sanitize_fragment(&mut mock_data, &mut output)
|
.sanitize_fragment(&mut mock_data, &mut output)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
str::from_utf8(&output).unwrap(),
|
str::from_utf8(&output).unwrap(),
|
||||||
"<html><style>@charset \"UTF-8\";\
|
"<html><div style=\"margin: 10px; color: red;\"></div></html>"
|
||||||
div { margin: 10px; color: red; }</style></html>"
|
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user