Clean up sanitizer configs

This commit is contained in:
Tyler Hallada 2020-04-25 22:11:59 -04:00
parent 00593d3c58
commit 6bdb5f97ea
6 changed files with 875 additions and 1069 deletions

View File

@ -1,9 +1,12 @@
use html5ever::LocalName; use html5ever::LocalName;
use std::collections::{HashMap, HashSet}; use crate::config::restricted::RESTRICTED_CONFIG;
use crate::sanitizer::{Protocol, SanitizerConfig};
lazy_static! { lazy_static! {
pub static ref ELEMENTS: HashSet<LocalName> = hashset! { pub static ref BASIC_CONFIG: SanitizerConfig = {
let mut config = RESTRICTED_CONFIG.clone();
config.allowed_elements.extend(hashset! {
local_name!("a"), local_name!("a"),
local_name!("abbr"), local_name!("abbr"),
local_name!("blockquote"), local_name!("blockquote"),
@ -26,48 +29,40 @@ lazy_static! {
local_name!("small"), local_name!("small"),
local_name!("strike"), local_name!("strike"),
local_name!("sub"), local_name!("sub"),
local_name!("sup"),
local_name!("time"), local_name!("time"),
local_name!("ul"), local_name!("ul"),
local_name!("var"), local_name!("var"),
});
config.allowed_attributes_per_element.extend(hashmap! {
local_name!("a") => hashset! { local_name!("href") },
local_name!("abbr") => hashset! { local_name!("title") },
local_name!("blockquote") => hashset! { local_name!("cite") },
local_name!("dfn") => hashset! { local_name!("title") },
local_name!("q") => hashset! { local_name!("cite") },
local_name!("time") => hashset! { local_name!("datetime"), LocalName::from("pubdate") },
});
config.add_attributes_per_element.extend(hashmap! {
local_name!("a") => hashmap! { local_name!("rel") => "href" },
});
config.allowed_protocols.extend(hashmap! {
local_name!("a") => hashmap! { local_name!("href") => hashset! {
Protocol::Scheme("ftp"),
Protocol::Scheme("http"),
Protocol::Scheme("https"),
Protocol::Scheme("mailto"),
Protocol::Relative,
}},
local_name!("blockquote") => hashmap! { local_name!("cite") => hashset! {
Protocol::Scheme("http"),
Protocol::Scheme("https"),
Protocol::Relative,
}},
local_name!("q") => hashmap! { local_name!("cite") => hashset! {
Protocol::Scheme("http"),
Protocol::Scheme("https"),
Protocol::Relative,
}},
});
config
}; };
pub static ref ALL_ATTRIBUTES: HashSet<LocalName> = hashset! {};
pub static ref ATTRIBUTES: HashMap<LocalName, HashSet<LocalName>> = hashmap! {
local_name!("a") => hashset!{
local_name!("href"),
},
local_name!("abbr") => hashset!{
local_name!("title"),
},
local_name!("blockquote") => hashset!{
local_name!("cite"),
},
local_name!("dfn") => hashset!{
local_name!("title"),
},
local_name!("q") => hashset!{
local_name!("cite"),
},
local_name!("time") => hashset!{
local_name!("datetime"),
LocalName::from("pubdate"),
},
};
pub static ref ADD_ATTRIBUTES: HashMap<LocalName, HashMap<LocalName, &'static str>> = hashmap! {
local_name!("a") => hashmap! {
local_name!("rel") => "nofollow",
},
};
pub static ref PROTOCOLS: HashMap<LocalName, HashMap<LocalName, HashSet<&'static str>>> = hashmap! {
local_name!("a") => hashmap! {
local_name!("href") => hashset!{"ftp", "http", "https", "mailto"},
},
local_name!("blockquote") => hashmap! {
local_name!("cite") => hashset!{"http", "https"},
},
local_name!("q") => hashmap! {
local_name!("cite") => hashset!{"http", "https"},
},
};
pub static ref CSS_PROPERTIES: Vec<String> = vec![];
} }

View File

@ -1,4 +1,4 @@
pub mod default;
pub mod basic; pub mod basic;
pub mod default;
pub mod relaxed; pub mod relaxed;
pub mod permissive; pub mod restricted;

View File

@ -1,177 +0,0 @@
use html5ever::LocalName;
use std::collections::{HashMap, HashSet};
use super::relaxed::{
ADD_ATTRIBUTES as RELAXED_ADD_ATTRIBUTES, ALL_ATTRIBUTES as RELAXED_ALL_ATTRIBUTES,
ATTRIBUTES as RELAXED_ATTRIBUTES, ELEMENTS as RELAXED_ELEMENTS,
};
lazy_static! {
pub static ref ELEMENTS: HashSet<LocalName> = RELAXED_ELEMENTS
.union(&hashset!(
local_name!("acronym"),
local_name!("basefont"),
local_name!("big"),
local_name!("blink"),
local_name!("center"),
LocalName::from("command"),
local_name!("dir"),
local_name!("font"),
local_name!("marquee"),
local_name!("strike"),
local_name!("tt"),
local_name!("form"),
local_name!("input"),
local_name!("button"),
LocalName::from("single"),
LocalName::from("double"),
))
.into_iter()
.cloned()
.collect();
pub static ref ALL_ATTRIBUTES: HashSet<LocalName> = RELAXED_ALL_ATTRIBUTES
.union(&hashset! {
local_name!("bgcolor"),
local_name!("width"),
local_name!("height"),
local_name!("border"),
local_name!("color"),
local_name!("background"),
})
.into_iter()
.cloned()
.collect();
// Can't figure out how to merge HashMaps :(
pub static ref ATTRIBUTES: HashMap<LocalName, HashSet<LocalName>> = hashmap! {
local_name!("a") => hashset!{
local_name!("href"),
local_name!("hreflang"),
local_name!("name"),
local_name!("rel"),
},
local_name!("abbr") => hashset!{
local_name!("title"),
},
local_name!("blockquote") => hashset!{
local_name!("cite"),
},
local_name!("button") => hashset!{
local_name!("type"),
},
local_name!("col") => hashset!{
local_name!("span"),
local_name!("width"),
},
local_name!("colgroup") => hashset!{
local_name!("span"),
local_name!("width"),
},
local_name!("data") => hashset!{
local_name!("value"),
},
local_name!("del") => hashset!{
local_name!("cite"),
local_name!("datetime"),
},
local_name!("dfn") => hashset!{
local_name!("title"),
},
local_name!("img") => hashset!{
local_name!("align"),
local_name!("alt"),
local_name!("border"),
local_name!("height"),
local_name!("src"),
local_name!("srcset"),
local_name!("width"),
},
local_name!("input") => hashset!{
local_name!("type"),
local_name!("name"),
local_name!("value"),
},
local_name!("ins") => hashset!{
local_name!("cite"),
local_name!("datetime"),
},
local_name!("li") => hashset!{
local_name!("value"),
},
local_name!("ol") => hashset!{
LocalName::from("reversed"),
local_name!("start"),
local_name!("type"),
},
local_name!("q") => hashset!{
local_name!("cite"),
},
local_name!("style") => hashset!{
local_name!("media"),
local_name!("scoped"),
local_name!("type"),
},
local_name!("table") => hashset!{
local_name!("align"),
local_name!("bgcolor"),
local_name!("border"),
local_name!("cellpadding"),
local_name!("cellspacing"),
local_name!("frame"),
local_name!("rules"),
LocalName::from("sortable"),
local_name!("summary"),
local_name!("width"),
},
local_name!("td") => hashset!{
local_name!("abbr"),
local_name!("align"),
local_name!("axis"),
local_name!("colspan"),
local_name!("headers"),
local_name!("rowspan"),
local_name!("valign"),
local_name!("width"),
},
local_name!("th") => hashset!{
local_name!("abbr"),
local_name!("align"),
local_name!("axis"),
local_name!("colspan"),
local_name!("headers"),
local_name!("rowspan"),
local_name!("scope"),
LocalName::from("sorted"),
local_name!("valign"),
local_name!("width"),
},
local_name!("time") => hashset!{
local_name!("datetime"),
LocalName::from("pubdate"),
},
local_name!("ul") => hashset!{
local_name!("type"),
},
};
pub static ref ADD_ATTRIBUTES: HashMap<LocalName, HashMap<LocalName, &'static str>> = RELAXED_ADD_ATTRIBUTES.clone();
pub static ref PROTOCOLS: HashMap<LocalName, HashMap<LocalName, HashSet<&'static str>>> = hashmap! {
local_name!("a") => hashmap! {
local_name!("href") => hashset!{"ftp", "http", "https", "mailto"},
},
local_name!("blockquote") => hashmap! {
local_name!("cite") => hashset!{"http", "https"},
},
local_name!("del") => hashmap! {
local_name!("cite") => hashset!{"http", "https"},
},
local_name!("img") => hashmap! {
local_name!("src") => hashset!{"http", "https"},
},
local_name!("ins") => hashmap! {
local_name!("cite") => hashset!{"http", "https"},
},
local_name!("q") => hashmap! {
local_name!("cite") => hashset!{"http", "https"},
},
};
}

View File

@ -1,20 +1,12 @@
use html5ever::LocalName; use html5ever::LocalName;
use std::collections::{HashMap, HashSet}; use crate::config::basic::BASIC_CONFIG;
use std::iter::FromIterator; use crate::sanitizer::{Protocol, SanitizerConfig};
use super::basic::{
ADD_ATTRIBUTES as BASIC_ADD_ATTRIBUTES, ALL_ATTRIBUTES as BASIC_ALL_ATTRIBUTES,
ATTRIBUTES as BASIC_ATTRIBUTES, CSS_PROPERTIES as BASIC_CSS_PROPERTIES,
ELEMENTS as BASIC_ELEMENTS,
};
use crate::css_property::CssProperty;
use crate::css_at_rule::CssAtRule;
lazy_static! { lazy_static! {
pub static ref ELEMENTS: HashSet<LocalName> = BASIC_ELEMENTS pub static ref RELAXED_CONFIG: SanitizerConfig = {
.union(&hashset!( let mut config = BASIC_CONFIG.clone();
config.allowed_elements.extend(hashset! {
local_name!("address"), local_name!("address"),
local_name!("article"), local_name!("article"),
local_name!("aside"), local_name!("aside"),
@ -62,11 +54,9 @@ lazy_static! {
local_name!("title"), local_name!("title"),
local_name!("tr"), local_name!("tr"),
local_name!("wbr"), local_name!("wbr"),
)) });
.into_iter() config.allow_doctype = true;
.cloned() config.allowed_attributes.extend(hashset! {
.collect();
pub static ref ALL_ATTRIBUTES: HashSet<LocalName> = BASIC_ALL_ATTRIBUTES.union(&hashset! {
local_name!("class"), local_name!("class"),
local_name!("dir"), local_name!("dir"),
local_name!("hidden"), local_name!("hidden"),
@ -76,21 +66,14 @@ lazy_static! {
local_name!("tabindex"), local_name!("tabindex"),
local_name!("title"), local_name!("title"),
LocalName::from("translate"), LocalName::from("translate"),
}).into_iter().cloned().collect(); });
// Can't figure out how to merge HashMaps :( config.allowed_attributes_per_element.extend(hashmap! {
pub static ref ATTRIBUTES: HashMap<LocalName, HashSet<LocalName>> = hashmap! {
local_name!("a") => hashset!{ local_name!("a") => hashset!{
local_name!("href"), local_name!("href"),
local_name!("hreflang"), local_name!("hreflang"),
local_name!("name"), local_name!("name"),
local_name!("rel"), local_name!("rel"),
}, },
local_name!("abbr") => hashset!{
local_name!("title"),
},
local_name!("blockquote") => hashset!{
local_name!("cite"),
},
local_name!("col") => hashset!{ local_name!("col") => hashset!{
local_name!("span"), local_name!("span"),
local_name!("width"), local_name!("width"),
@ -106,9 +89,6 @@ lazy_static! {
local_name!("cite"), local_name!("cite"),
local_name!("datetime"), local_name!("datetime"),
}, },
local_name!("dfn") => hashset!{
local_name!("title"),
},
local_name!("img") => hashset!{ local_name!("img") => hashset!{
local_name!("align"), local_name!("align"),
local_name!("alt"), local_name!("alt"),
@ -130,9 +110,6 @@ lazy_static! {
local_name!("start"), local_name!("start"),
local_name!("type"), local_name!("type"),
}, },
local_name!("q") => hashset!{
local_name!("cite"),
},
local_name!("style") => hashset!{ local_name!("style") => hashset!{
local_name!("media"), local_name!("media"),
local_name!("scoped"), local_name!("scoped"),
@ -172,36 +149,56 @@ lazy_static! {
local_name!("valign"), local_name!("valign"),
local_name!("width"), local_name!("width"),
}, },
local_name!("time") => hashset!{
local_name!("datetime"),
LocalName::from("pubdate"),
},
local_name!("ul") => hashset!{ local_name!("ul") => hashset!{
local_name!("type"), local_name!("type"),
}, },
}; });
pub static ref ADD_ATTRIBUTES: HashMap<LocalName, HashMap<LocalName, &'static str>> = BASIC_ADD_ATTRIBUTES.clone(); config.allowed_protocols.extend(hashmap! {
pub static ref PROTOCOLS: HashMap<LocalName, HashMap<LocalName, HashSet<&'static str>>> = hashmap! { local_name!("del") => hashmap! { local_name!("cite") => hashset! {
local_name!("a") => hashmap! { Protocol::Scheme("http"),
local_name!("href") => hashset!{"ftp", "http", "https", "mailto"}, Protocol::Scheme("https"),
}, Protocol::Relative,
local_name!("blockquote") => hashmap! { }},
local_name!("cite") => hashset!{"http", "https"}, local_name!("img") => hashmap! { local_name!("src") => hashset! {
}, Protocol::Scheme("http"),
local_name!("del") => hashmap! { Protocol::Scheme("https"),
local_name!("cite") => hashset!{"http", "https"}, Protocol::Relative,
}, }},
local_name!("img") => hashmap! { local_name!("ins") => hashmap! { local_name!("cite") => hashset! {
local_name!("src") => hashset!{"http", "https"}, Protocol::Scheme("http"),
}, Protocol::Scheme("https"),
local_name!("ins") => hashmap! { Protocol::Relative,
local_name!("cite") => hashset!{"http", "https"}, }},
}, });
local_name!("q") => hashmap! { config.allow_css_comments = true;
local_name!("cite") => hashset!{"http", "https"}, config.allowed_css_at_rules.extend(hashset! {
}, css_at_rule!("bottom-center"),
}; css_at_rule!("bottom-left"),
pub static ref CSS_PROPERTIES: HashSet<CssProperty> = vec![ css_at_rule!("bottom-left-corner"),
css_at_rule!("bottom-right"),
css_at_rule!("bottom-right-corner"),
css_at_rule!("font-face"),
css_at_rule!("left-bottom"),
css_at_rule!("left-middle"),
css_at_rule!("left-top"),
css_at_rule!("page"),
css_at_rule!("right-bottom"),
css_at_rule!("right-middle"),
css_at_rule!("right-top"),
css_at_rule!("top-center"),
css_at_rule!("top-left"),
css_at_rule!("top-left-corner"),
css_at_rule!("top-right"),
css_at_rule!("top-right-corner"),
css_at_rule!("-moz-keyframes"),
css_at_rule!("-o-keyframes"),
css_at_rule!("-webkit-keyframes"),
css_at_rule!("document"),
css_at_rule!("keyframes"),
css_at_rule!("media"),
css_at_rule!("supports"),
});
config.allowed_css_properties.extend(hashset! {
css_property!("-moz-appearance"), css_property!("-moz-appearance"),
css_property!("-moz-background-inline-policy"), css_property!("-moz-background-inline-policy"),
css_property!("-moz-box-sizing"), css_property!("-moz-box-sizing"),
@ -845,32 +842,7 @@ lazy_static! {
css_property!("wrap-through"), css_property!("wrap-through"),
css_property!("writing-mode"), css_property!("writing-mode"),
css_property!("z-index"), css_property!("z-index"),
].into_iter().collect(); });
pub static ref CSS_AT_RULES: HashSet<CssAtRule> = vec![ config
css_at_rule!("bottom-center"), };
css_at_rule!("bottom-left"),
css_at_rule!("bottom-left-corner"),
css_at_rule!("bottom-right"),
css_at_rule!("bottom-right-corner"),
css_at_rule!("font-face"),
css_at_rule!("left-bottom"),
css_at_rule!("left-middle"),
css_at_rule!("left-top"),
css_at_rule!("page"),
css_at_rule!("right-bottom"),
css_at_rule!("right-middle"),
css_at_rule!("right-top"),
css_at_rule!("top-center"),
css_at_rule!("top-left"),
css_at_rule!("top-left-corner"),
css_at_rule!("top-right"),
css_at_rule!("top-right-corner"),
css_at_rule!("-moz-keyframes"),
css_at_rule!("-o-keyframes"),
css_at_rule!("-webkit-keyframes"),
css_at_rule!("document"),
css_at_rule!("keyframes"),
css_at_rule!("media"),
css_at_rule!("supports"),
].into_iter().collect();
} }

16
src/config/restricted.rs Normal file
View File

@ -0,0 +1,16 @@
use crate::config::default::DEFAULT_CONFIG;
use crate::sanitizer::SanitizerConfig;
lazy_static! {
pub static ref RESTRICTED_CONFIG: SanitizerConfig = {
let mut config = DEFAULT_CONFIG.clone();
config.allowed_elements.extend(hashset! {
local_name!("b"),
local_name!("em"),
local_name!("i"),
local_name!("strong"),
local_name!("u"),
});
config
};
}

View File

@ -27,11 +27,11 @@ mod css_parser;
mod sanitizer; mod sanitizer;
use arena_dom::{create_element, Arena, NodeData, Ref}; use arena_dom::{create_element, Arena, NodeData, Ref};
use config::default::DEFAULT_CONFIG; use config::basic::BASIC_CONFIG;
use sanitizer::Sanitizer; use sanitizer::Sanitizer;
fn main() { fn main() {
let sanitizer = Sanitizer::new(&DEFAULT_CONFIG, vec![&add_spacer_elements_around_ul]); let sanitizer = Sanitizer::new(&BASIC_CONFIG, vec![&add_spacer_elements_around_ul]);
sanitizer sanitizer
.sanitize_fragment(&mut io::stdin(), &mut io::stdout()) .sanitize_fragment(&mut io::stdin(), &mut io::stdout())
.unwrap(); .unwrap();