Clean up sanitizer configs

This commit is contained in:
Tyler Hallada 2020-04-25 22:11:59 -04:00
parent 00593d3c58
commit 6bdb5f97ea
6 changed files with 875 additions and 1069 deletions

View File

@ -1,9 +1,12 @@
use html5ever::LocalName;
use std::collections::{HashMap, HashSet};
use crate::config::restricted::RESTRICTED_CONFIG;
use crate::sanitizer::{Protocol, SanitizerConfig};
lazy_static! {
pub static ref ELEMENTS: HashSet<LocalName> = hashset! {
pub static ref BASIC_CONFIG: SanitizerConfig = {
let mut config = RESTRICTED_CONFIG.clone();
config.allowed_elements.extend(hashset! {
local_name!("a"),
local_name!("abbr"),
local_name!("blockquote"),
@ -26,48 +29,40 @@ lazy_static! {
local_name!("small"),
local_name!("strike"),
local_name!("sub"),
local_name!("sup"),
local_name!("time"),
local_name!("ul"),
local_name!("var"),
});
config.allowed_attributes_per_element.extend(hashmap! {
local_name!("a") => hashset! { local_name!("href") },
local_name!("abbr") => hashset! { local_name!("title") },
local_name!("blockquote") => hashset! { local_name!("cite") },
local_name!("dfn") => hashset! { local_name!("title") },
local_name!("q") => hashset! { local_name!("cite") },
local_name!("time") => hashset! { local_name!("datetime"), LocalName::from("pubdate") },
});
config.add_attributes_per_element.extend(hashmap! {
local_name!("a") => hashmap! { local_name!("rel") => "href" },
});
config.allowed_protocols.extend(hashmap! {
local_name!("a") => hashmap! { local_name!("href") => hashset! {
Protocol::Scheme("ftp"),
Protocol::Scheme("http"),
Protocol::Scheme("https"),
Protocol::Scheme("mailto"),
Protocol::Relative,
}},
local_name!("blockquote") => hashmap! { local_name!("cite") => hashset! {
Protocol::Scheme("http"),
Protocol::Scheme("https"),
Protocol::Relative,
}},
local_name!("q") => hashmap! { local_name!("cite") => hashset! {
Protocol::Scheme("http"),
Protocol::Scheme("https"),
Protocol::Relative,
}},
});
config
};
pub static ref ALL_ATTRIBUTES: HashSet<LocalName> = hashset! {};
pub static ref ATTRIBUTES: HashMap<LocalName, HashSet<LocalName>> = hashmap! {
local_name!("a") => hashset!{
local_name!("href"),
},
local_name!("abbr") => hashset!{
local_name!("title"),
},
local_name!("blockquote") => hashset!{
local_name!("cite"),
},
local_name!("dfn") => hashset!{
local_name!("title"),
},
local_name!("q") => hashset!{
local_name!("cite"),
},
local_name!("time") => hashset!{
local_name!("datetime"),
LocalName::from("pubdate"),
},
};
pub static ref ADD_ATTRIBUTES: HashMap<LocalName, HashMap<LocalName, &'static str>> = hashmap! {
local_name!("a") => hashmap! {
local_name!("rel") => "nofollow",
},
};
pub static ref PROTOCOLS: HashMap<LocalName, HashMap<LocalName, HashSet<&'static str>>> = hashmap! {
local_name!("a") => hashmap! {
local_name!("href") => hashset!{"ftp", "http", "https", "mailto"},
},
local_name!("blockquote") => hashmap! {
local_name!("cite") => hashset!{"http", "https"},
},
local_name!("q") => hashmap! {
local_name!("cite") => hashset!{"http", "https"},
},
};
pub static ref CSS_PROPERTIES: Vec<String> = vec![];
}

View File

@ -1,4 +1,4 @@
pub mod default;
pub mod basic;
pub mod default;
pub mod relaxed;
pub mod permissive;
pub mod restricted;

View File

@ -1,177 +0,0 @@
use html5ever::LocalName;
use std::collections::{HashMap, HashSet};
use super::relaxed::{
ADD_ATTRIBUTES as RELAXED_ADD_ATTRIBUTES, ALL_ATTRIBUTES as RELAXED_ALL_ATTRIBUTES,
ATTRIBUTES as RELAXED_ATTRIBUTES, ELEMENTS as RELAXED_ELEMENTS,
};
lazy_static! {
pub static ref ELEMENTS: HashSet<LocalName> = RELAXED_ELEMENTS
.union(&hashset!(
local_name!("acronym"),
local_name!("basefont"),
local_name!("big"),
local_name!("blink"),
local_name!("center"),
LocalName::from("command"),
local_name!("dir"),
local_name!("font"),
local_name!("marquee"),
local_name!("strike"),
local_name!("tt"),
local_name!("form"),
local_name!("input"),
local_name!("button"),
LocalName::from("single"),
LocalName::from("double"),
))
.into_iter()
.cloned()
.collect();
pub static ref ALL_ATTRIBUTES: HashSet<LocalName> = RELAXED_ALL_ATTRIBUTES
.union(&hashset! {
local_name!("bgcolor"),
local_name!("width"),
local_name!("height"),
local_name!("border"),
local_name!("color"),
local_name!("background"),
})
.into_iter()
.cloned()
.collect();
// Can't figure out how to merge HashMaps :(
pub static ref ATTRIBUTES: HashMap<LocalName, HashSet<LocalName>> = hashmap! {
local_name!("a") => hashset!{
local_name!("href"),
local_name!("hreflang"),
local_name!("name"),
local_name!("rel"),
},
local_name!("abbr") => hashset!{
local_name!("title"),
},
local_name!("blockquote") => hashset!{
local_name!("cite"),
},
local_name!("button") => hashset!{
local_name!("type"),
},
local_name!("col") => hashset!{
local_name!("span"),
local_name!("width"),
},
local_name!("colgroup") => hashset!{
local_name!("span"),
local_name!("width"),
},
local_name!("data") => hashset!{
local_name!("value"),
},
local_name!("del") => hashset!{
local_name!("cite"),
local_name!("datetime"),
},
local_name!("dfn") => hashset!{
local_name!("title"),
},
local_name!("img") => hashset!{
local_name!("align"),
local_name!("alt"),
local_name!("border"),
local_name!("height"),
local_name!("src"),
local_name!("srcset"),
local_name!("width"),
},
local_name!("input") => hashset!{
local_name!("type"),
local_name!("name"),
local_name!("value"),
},
local_name!("ins") => hashset!{
local_name!("cite"),
local_name!("datetime"),
},
local_name!("li") => hashset!{
local_name!("value"),
},
local_name!("ol") => hashset!{
LocalName::from("reversed"),
local_name!("start"),
local_name!("type"),
},
local_name!("q") => hashset!{
local_name!("cite"),
},
local_name!("style") => hashset!{
local_name!("media"),
local_name!("scoped"),
local_name!("type"),
},
local_name!("table") => hashset!{
local_name!("align"),
local_name!("bgcolor"),
local_name!("border"),
local_name!("cellpadding"),
local_name!("cellspacing"),
local_name!("frame"),
local_name!("rules"),
LocalName::from("sortable"),
local_name!("summary"),
local_name!("width"),
},
local_name!("td") => hashset!{
local_name!("abbr"),
local_name!("align"),
local_name!("axis"),
local_name!("colspan"),
local_name!("headers"),
local_name!("rowspan"),
local_name!("valign"),
local_name!("width"),
},
local_name!("th") => hashset!{
local_name!("abbr"),
local_name!("align"),
local_name!("axis"),
local_name!("colspan"),
local_name!("headers"),
local_name!("rowspan"),
local_name!("scope"),
LocalName::from("sorted"),
local_name!("valign"),
local_name!("width"),
},
local_name!("time") => hashset!{
local_name!("datetime"),
LocalName::from("pubdate"),
},
local_name!("ul") => hashset!{
local_name!("type"),
},
};
pub static ref ADD_ATTRIBUTES: HashMap<LocalName, HashMap<LocalName, &'static str>> = RELAXED_ADD_ATTRIBUTES.clone();
pub static ref PROTOCOLS: HashMap<LocalName, HashMap<LocalName, HashSet<&'static str>>> = hashmap! {
local_name!("a") => hashmap! {
local_name!("href") => hashset!{"ftp", "http", "https", "mailto"},
},
local_name!("blockquote") => hashmap! {
local_name!("cite") => hashset!{"http", "https"},
},
local_name!("del") => hashmap! {
local_name!("cite") => hashset!{"http", "https"},
},
local_name!("img") => hashmap! {
local_name!("src") => hashset!{"http", "https"},
},
local_name!("ins") => hashmap! {
local_name!("cite") => hashset!{"http", "https"},
},
local_name!("q") => hashmap! {
local_name!("cite") => hashset!{"http", "https"},
},
};
}

View File

@ -1,20 +1,12 @@
use html5ever::LocalName;
use std::collections::{HashMap, HashSet};
use std::iter::FromIterator;
use super::basic::{
ADD_ATTRIBUTES as BASIC_ADD_ATTRIBUTES, ALL_ATTRIBUTES as BASIC_ALL_ATTRIBUTES,
ATTRIBUTES as BASIC_ATTRIBUTES, CSS_PROPERTIES as BASIC_CSS_PROPERTIES,
ELEMENTS as BASIC_ELEMENTS,
};
use crate::css_property::CssProperty;
use crate::css_at_rule::CssAtRule;
use crate::config::basic::BASIC_CONFIG;
use crate::sanitizer::{Protocol, SanitizerConfig};
lazy_static! {
pub static ref ELEMENTS: HashSet<LocalName> = BASIC_ELEMENTS
.union(&hashset!(
pub static ref RELAXED_CONFIG: SanitizerConfig = {
let mut config = BASIC_CONFIG.clone();
config.allowed_elements.extend(hashset! {
local_name!("address"),
local_name!("article"),
local_name!("aside"),
@ -62,11 +54,9 @@ lazy_static! {
local_name!("title"),
local_name!("tr"),
local_name!("wbr"),
))
.into_iter()
.cloned()
.collect();
pub static ref ALL_ATTRIBUTES: HashSet<LocalName> = BASIC_ALL_ATTRIBUTES.union(&hashset! {
});
config.allow_doctype = true;
config.allowed_attributes.extend(hashset! {
local_name!("class"),
local_name!("dir"),
local_name!("hidden"),
@ -76,21 +66,14 @@ lazy_static! {
local_name!("tabindex"),
local_name!("title"),
LocalName::from("translate"),
}).into_iter().cloned().collect();
// Can't figure out how to merge HashMaps :(
pub static ref ATTRIBUTES: HashMap<LocalName, HashSet<LocalName>> = hashmap! {
});
config.allowed_attributes_per_element.extend(hashmap! {
local_name!("a") => hashset!{
local_name!("href"),
local_name!("hreflang"),
local_name!("name"),
local_name!("rel"),
},
local_name!("abbr") => hashset!{
local_name!("title"),
},
local_name!("blockquote") => hashset!{
local_name!("cite"),
},
local_name!("col") => hashset!{
local_name!("span"),
local_name!("width"),
@ -106,9 +89,6 @@ lazy_static! {
local_name!("cite"),
local_name!("datetime"),
},
local_name!("dfn") => hashset!{
local_name!("title"),
},
local_name!("img") => hashset!{
local_name!("align"),
local_name!("alt"),
@ -130,9 +110,6 @@ lazy_static! {
local_name!("start"),
local_name!("type"),
},
local_name!("q") => hashset!{
local_name!("cite"),
},
local_name!("style") => hashset!{
local_name!("media"),
local_name!("scoped"),
@ -172,36 +149,56 @@ lazy_static! {
local_name!("valign"),
local_name!("width"),
},
local_name!("time") => hashset!{
local_name!("datetime"),
LocalName::from("pubdate"),
},
local_name!("ul") => hashset!{
local_name!("type"),
},
};
pub static ref ADD_ATTRIBUTES: HashMap<LocalName, HashMap<LocalName, &'static str>> = BASIC_ADD_ATTRIBUTES.clone();
pub static ref PROTOCOLS: HashMap<LocalName, HashMap<LocalName, HashSet<&'static str>>> = hashmap! {
local_name!("a") => hashmap! {
local_name!("href") => hashset!{"ftp", "http", "https", "mailto"},
},
local_name!("blockquote") => hashmap! {
local_name!("cite") => hashset!{"http", "https"},
},
local_name!("del") => hashmap! {
local_name!("cite") => hashset!{"http", "https"},
},
local_name!("img") => hashmap! {
local_name!("src") => hashset!{"http", "https"},
},
local_name!("ins") => hashmap! {
local_name!("cite") => hashset!{"http", "https"},
},
local_name!("q") => hashmap! {
local_name!("cite") => hashset!{"http", "https"},
},
};
pub static ref CSS_PROPERTIES: HashSet<CssProperty> = vec![
});
config.allowed_protocols.extend(hashmap! {
local_name!("del") => hashmap! { local_name!("cite") => hashset! {
Protocol::Scheme("http"),
Protocol::Scheme("https"),
Protocol::Relative,
}},
local_name!("img") => hashmap! { local_name!("src") => hashset! {
Protocol::Scheme("http"),
Protocol::Scheme("https"),
Protocol::Relative,
}},
local_name!("ins") => hashmap! { local_name!("cite") => hashset! {
Protocol::Scheme("http"),
Protocol::Scheme("https"),
Protocol::Relative,
}},
});
config.allow_css_comments = true;
config.allowed_css_at_rules.extend(hashset! {
css_at_rule!("bottom-center"),
css_at_rule!("bottom-left"),
css_at_rule!("bottom-left-corner"),
css_at_rule!("bottom-right"),
css_at_rule!("bottom-right-corner"),
css_at_rule!("font-face"),
css_at_rule!("left-bottom"),
css_at_rule!("left-middle"),
css_at_rule!("left-top"),
css_at_rule!("page"),
css_at_rule!("right-bottom"),
css_at_rule!("right-middle"),
css_at_rule!("right-top"),
css_at_rule!("top-center"),
css_at_rule!("top-left"),
css_at_rule!("top-left-corner"),
css_at_rule!("top-right"),
css_at_rule!("top-right-corner"),
css_at_rule!("-moz-keyframes"),
css_at_rule!("-o-keyframes"),
css_at_rule!("-webkit-keyframes"),
css_at_rule!("document"),
css_at_rule!("keyframes"),
css_at_rule!("media"),
css_at_rule!("supports"),
});
config.allowed_css_properties.extend(hashset! {
css_property!("-moz-appearance"),
css_property!("-moz-background-inline-policy"),
css_property!("-moz-box-sizing"),
@ -845,32 +842,7 @@ lazy_static! {
css_property!("wrap-through"),
css_property!("writing-mode"),
css_property!("z-index"),
].into_iter().collect();
pub static ref CSS_AT_RULES: HashSet<CssAtRule> = vec![
css_at_rule!("bottom-center"),
css_at_rule!("bottom-left"),
css_at_rule!("bottom-left-corner"),
css_at_rule!("bottom-right"),
css_at_rule!("bottom-right-corner"),
css_at_rule!("font-face"),
css_at_rule!("left-bottom"),
css_at_rule!("left-middle"),
css_at_rule!("left-top"),
css_at_rule!("page"),
css_at_rule!("right-bottom"),
css_at_rule!("right-middle"),
css_at_rule!("right-top"),
css_at_rule!("top-center"),
css_at_rule!("top-left"),
css_at_rule!("top-left-corner"),
css_at_rule!("top-right"),
css_at_rule!("top-right-corner"),
css_at_rule!("-moz-keyframes"),
css_at_rule!("-o-keyframes"),
css_at_rule!("-webkit-keyframes"),
css_at_rule!("document"),
css_at_rule!("keyframes"),
css_at_rule!("media"),
css_at_rule!("supports"),
].into_iter().collect();
});
config
};
}

16
src/config/restricted.rs Normal file
View File

@ -0,0 +1,16 @@
use crate::config::default::DEFAULT_CONFIG;
use crate::sanitizer::SanitizerConfig;
lazy_static! {
pub static ref RESTRICTED_CONFIG: SanitizerConfig = {
let mut config = DEFAULT_CONFIG.clone();
config.allowed_elements.extend(hashset! {
local_name!("b"),
local_name!("em"),
local_name!("i"),
local_name!("strong"),
local_name!("u"),
});
config
};
}

View File

@ -27,11 +27,11 @@ mod css_parser;
mod sanitizer;
use arena_dom::{create_element, Arena, NodeData, Ref};
use config::default::DEFAULT_CONFIG;
use config::basic::BASIC_CONFIG;
use sanitizer::Sanitizer;
fn main() {
let sanitizer = Sanitizer::new(&DEFAULT_CONFIG, vec![&add_spacer_elements_around_ul]);
let sanitizer = Sanitizer::new(&BASIC_CONFIG, vec![&add_spacer_elements_around_ul]);
sanitizer
.sanitize_fragment(&mut io::stdin(), &mut io::stdout())
.unwrap();