Split up transform_node into separate functions
This commit is contained in:
parent
47bd10f508
commit
f1671c0758
258
src/main.rs
258
src/main.rs
@ -42,7 +42,17 @@ use css_property::CssProperty;
|
|||||||
use traverser::Traverser;
|
use traverser::Traverser;
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
let traverser = Traverser::new(&should_unwrap_node, vec![Box::new(&transform_node)]);
|
let traverser = Traverser::new(
|
||||||
|
&should_unwrap_node,
|
||||||
|
vec![
|
||||||
|
Box::new(&sanitize_style_tag_css),
|
||||||
|
Box::new(&sanitize_style_attribute_css),
|
||||||
|
Box::new(&remove_attributes),
|
||||||
|
Box::new(&add_attributes),
|
||||||
|
Box::new(&sanitize_attribute_protocols),
|
||||||
|
Box::new(&add_single_elements_around_ul),
|
||||||
|
],
|
||||||
|
);
|
||||||
let root = traverser.parse(&mut io::stdin()).unwrap();
|
let root = traverser.parse(&mut io::stdin()).unwrap();
|
||||||
traverser.traverse(root);
|
traverser.traverse(root);
|
||||||
serialize(&mut io::stdout(), root, Default::default()).expect("serialization failed")
|
serialize(&mut io::stdout(), root, Default::default()).expect("serialization failed")
|
||||||
@ -94,118 +104,170 @@ fn css_rules_to_string(rules: Vec<CssRule>) -> String {
|
|||||||
// TODO: scope selectors in rich formatter
|
// TODO: scope selectors in rich formatter
|
||||||
// TODO: add class attributes to elements in rich formatter
|
// TODO: add class attributes to elements in rich formatter
|
||||||
// TODO: separate this out into multiple separate transformers
|
// TODO: separate this out into multiple separate transformers
|
||||||
fn transform_node<'arena>(node: Ref<'arena>, arena: Arena<'arena>) -> bool {
|
// TODO: find a way to avoid passing the arena to transformer functions. It's an implementation
|
||||||
match node.data {
|
// detail that doesn't need to be exposed. Also, it's only needed for creating new elements.
|
||||||
NodeData::Document
|
fn sanitize_style_tag_css<'arena>(node: Ref<'arena>, arena: Arena<'arena>) -> bool {
|
||||||
| NodeData::Doctype { .. }
|
if let NodeData::Text { ref contents } = node.data {
|
||||||
| NodeData::Comment { .. }
|
// TODO: seems rather expensive to lookup the parent on every Text node. Better
|
||||||
| NodeData::ProcessingInstruction { .. } => false,
|
// solution would be to pass some sort of context from the parent that marks that this
|
||||||
NodeData::Text { ref contents } => {
|
// Text node is inside a <style>.
|
||||||
// TODO: seems rather expensive to lookup the parent on every Text node. Better
|
if let Some(parent) = node.parent.get() {
|
||||||
// solution would be to pass some sort of context from the parent that marks that this
|
if let NodeData::Element { ref name, .. } = parent.data {
|
||||||
// Text node is inside a <style>.
|
if name.local == local_name!("style") {
|
||||||
if let Some(parent) = node.parent.get() {
|
let rules = parse_css_stylesheet(&contents.borrow());
|
||||||
if let NodeData::Element { ref name, .. } = parent.data {
|
dbg!(&rules);
|
||||||
if name.local == local_name!("style") {
|
let sanitized_css = css_rules_to_string(rules);
|
||||||
let rules = parse_css_stylesheet(&contents.borrow());
|
dbg!(&sanitized_css);
|
||||||
dbg!(&rules);
|
contents.replace(StrTendril::from(sanitized_css));
|
||||||
let sanitized_css = css_rules_to_string(rules);
|
return true;
|
||||||
dbg!(&sanitized_css);
|
|
||||||
contents.replace(StrTendril::from(sanitized_css));
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
false
|
|
||||||
}
|
}
|
||||||
NodeData::Element {
|
}
|
||||||
ref attrs,
|
false
|
||||||
ref name,
|
}
|
||||||
..
|
|
||||||
} => {
|
|
||||||
let ref mut attrs = attrs.borrow_mut();
|
|
||||||
|
|
||||||
let mut allowed_attrs: HashSet<LocalName> = ALL_ATTRIBUTES.clone();
|
fn sanitize_style_attribute_css<'arena>(node: Ref<'arena>, arena: Arena<'arena>) -> bool {
|
||||||
if let Some(element_attrs) = ATTRIBUTES.get(&name.local) {
|
if let NodeData::Element { ref attrs, .. } = node.data {
|
||||||
allowed_attrs = allowed_attrs.union(element_attrs).cloned().collect();
|
let mut has_transformed = false;
|
||||||
|
for attr in attrs.borrow_mut().iter_mut() {
|
||||||
|
if attr.name.local == local_name!("style") {
|
||||||
|
let css_str = &attr.value;
|
||||||
|
let declarations = parse_css_style_attribute(css_str);
|
||||||
|
dbg!(&declarations);
|
||||||
|
let mut sanitized_css = String::new();
|
||||||
|
for declaration in declarations.into_iter() {
|
||||||
|
let declaration_string = &declaration.to_string();
|
||||||
|
if CSS_PROPERTIES.contains(&CssProperty::from(declaration.property)) {
|
||||||
|
sanitized_css += declaration_string;
|
||||||
|
sanitized_css += " ";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let sanitized_css = sanitized_css.trim();
|
||||||
|
dbg!(&sanitized_css);
|
||||||
|
attr.value = StrTendril::from(sanitized_css);
|
||||||
|
has_transformed = true;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
return has_transformed;
|
||||||
|
}
|
||||||
|
false
|
||||||
|
}
|
||||||
|
|
||||||
|
fn remove_attributes<'arena>(node: Ref<'arena>, arena: Arena<'arena>) -> bool {
|
||||||
|
if let NodeData::Element {
|
||||||
|
ref attrs,
|
||||||
|
ref name,
|
||||||
|
..
|
||||||
|
} = node.data
|
||||||
|
{
|
||||||
|
let mut has_transformed = false;
|
||||||
|
let ref mut attrs = attrs.borrow_mut();
|
||||||
|
let mut allowed_attrs: HashSet<LocalName> = ALL_ATTRIBUTES.clone();
|
||||||
|
if let Some(element_attrs) = ATTRIBUTES.get(&name.local) {
|
||||||
|
allowed_attrs = allowed_attrs.union(element_attrs).cloned().collect();
|
||||||
|
}
|
||||||
|
let mut i = 0;
|
||||||
|
|
||||||
|
while i != attrs.len() {
|
||||||
|
if !allowed_attrs.contains(&attrs[i].name.local) {
|
||||||
|
attrs.remove(i);
|
||||||
|
has_transformed = true;
|
||||||
|
}
|
||||||
|
i += 1;
|
||||||
|
}
|
||||||
|
return has_transformed;
|
||||||
|
}
|
||||||
|
false
|
||||||
|
}
|
||||||
|
|
||||||
|
fn add_attributes<'arena>(node: Ref<'arena>, arena: Arena<'arena>) -> bool {
|
||||||
|
if let NodeData::Element {
|
||||||
|
ref attrs,
|
||||||
|
ref name,
|
||||||
|
..
|
||||||
|
} = node.data
|
||||||
|
{
|
||||||
|
let mut has_transformed = false;
|
||||||
|
let ref mut attrs = attrs.borrow_mut();
|
||||||
|
|
||||||
|
if let Some(add_attributes) = ADD_ATTRIBUTES.get(&name.local) {
|
||||||
|
for (name, &value) in add_attributes.iter() {
|
||||||
|
attrs.push(Attribute {
|
||||||
|
name: QualName::new(None, ns!(), name.clone()),
|
||||||
|
value: StrTendril::from(value),
|
||||||
|
});
|
||||||
|
has_transformed = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return has_transformed;
|
||||||
|
}
|
||||||
|
false
|
||||||
|
}
|
||||||
|
|
||||||
|
fn sanitize_attribute_protocols<'arena>(node: Ref<'arena>, arena: Arena<'arena>) -> bool {
|
||||||
|
if let NodeData::Element {
|
||||||
|
ref attrs,
|
||||||
|
ref name,
|
||||||
|
..
|
||||||
|
} = node.data
|
||||||
|
{
|
||||||
|
let mut has_transformed = false;
|
||||||
|
let ref mut attrs = attrs.borrow_mut();
|
||||||
|
|
||||||
|
if let Some(protocols) = PROTOCOLS.get(&name.local) {
|
||||||
let mut i = 0;
|
let mut i = 0;
|
||||||
while i != attrs.len() {
|
while i != attrs.len() {
|
||||||
if !allowed_attrs.contains(&attrs[i].name.local) {
|
if let Some(allowed_protocols) = protocols.get(&attrs[i].name.local) {
|
||||||
attrs.remove(i);
|
match Url::parse(&attrs[i].value) {
|
||||||
} else {
|
Ok(url) => {
|
||||||
if attrs[i].name.local == local_name!("style") {
|
if !allowed_protocols.contains(url.scheme()) {
|
||||||
let css_str = &attrs[i].value;
|
attrs.remove(i);
|
||||||
let declarations = parse_css_style_attribute(css_str);
|
has_transformed = true;
|
||||||
dbg!(&declarations);
|
} else {
|
||||||
let mut sanitized_css = String::new();
|
i += 1;
|
||||||
for declaration in declarations.into_iter() {
|
|
||||||
let declaration_string = &declaration.to_string();
|
|
||||||
if CSS_PROPERTIES.contains(&CssProperty::from(declaration.property)) {
|
|
||||||
sanitized_css += declaration_string;
|
|
||||||
sanitized_css += " ";
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let sanitized_css = sanitized_css.trim();
|
Err(ParseError::RelativeUrlWithoutBase) => {
|
||||||
dbg!(&sanitized_css);
|
attrs[i].value = StrTendril::from(format!("http://{}", attrs[i].value));
|
||||||
attrs[i].value = StrTendril::from(sanitized_css);
|
has_transformed = true;
|
||||||
|
i += 1;
|
||||||
|
}
|
||||||
|
Err(_) => {
|
||||||
|
attrs.remove(i);
|
||||||
|
has_transformed = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
i += 1;
|
i += 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return has_transformed;
|
||||||
if let Some(add_attributes) = ADD_ATTRIBUTES.get(&name.local) {
|
|
||||||
for (name, &value) in add_attributes.iter() {
|
|
||||||
attrs.push(Attribute {
|
|
||||||
name: QualName::new(None, ns!(), name.clone()),
|
|
||||||
value: StrTendril::from(value),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(protocols) = PROTOCOLS.get(&name.local) {
|
|
||||||
let mut i = 0;
|
|
||||||
while i != attrs.len() {
|
|
||||||
if let Some(allowed_protocols) = protocols.get(&attrs[i].name.local) {
|
|
||||||
match Url::parse(&attrs[i].value) {
|
|
||||||
Ok(url) => {
|
|
||||||
if !allowed_protocols.contains(url.scheme()) {
|
|
||||||
attrs.remove(i);
|
|
||||||
} else {
|
|
||||||
i += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Err(ParseError::RelativeUrlWithoutBase) => {
|
|
||||||
attrs[i].value =
|
|
||||||
StrTendril::from(format!("http://{}", attrs[i].value));
|
|
||||||
i += 1;
|
|
||||||
}
|
|
||||||
Err(_) => {
|
|
||||||
attrs.remove(i);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
i += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
match name.local {
|
|
||||||
local_name!("ul") => {
|
|
||||||
node.insert_before(create_element(
|
|
||||||
arena,
|
|
||||||
QualName::new(None, ns!(), LocalName::from("single")),
|
|
||||||
));
|
|
||||||
node.insert_after(create_element(
|
|
||||||
arena,
|
|
||||||
QualName::new(None, ns!(), LocalName::from("single")),
|
|
||||||
));
|
|
||||||
}
|
|
||||||
_ => {}
|
|
||||||
}
|
|
||||||
false
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
false
|
||||||
|
}
|
||||||
|
|
||||||
|
fn add_single_elements_around_ul<'arena>(node: Ref<'arena>, arena: Arena<'arena>) -> bool {
|
||||||
|
if let NodeData::Element {
|
||||||
|
ref attrs,
|
||||||
|
ref name,
|
||||||
|
..
|
||||||
|
} = node.data
|
||||||
|
{
|
||||||
|
if let local_name!("ul") = name.local {
|
||||||
|
node.insert_before(create_element(
|
||||||
|
arena,
|
||||||
|
QualName::new(None, ns!(), LocalName::from("single")),
|
||||||
|
));
|
||||||
|
node.insert_after(create_element(
|
||||||
|
arena,
|
||||||
|
QualName::new(None, ns!(), LocalName::from("single")),
|
||||||
|
));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
false
|
||||||
}
|
}
|
||||||
|
|
||||||
fn should_unwrap_node(node: Ref) -> bool {
|
fn should_unwrap_node(node: Ref) -> bool {
|
||||||
|
@ -4,7 +4,7 @@ use std::io::{self, Error, Read};
|
|||||||
|
|
||||||
use html5ever::{serialize, Attribute, LocalName, QualName};
|
use html5ever::{serialize, Attribute, LocalName, QualName};
|
||||||
|
|
||||||
use crate::arena_dom::{html5ever_parse_slice_into_arena, Arena, Node, Ref};
|
use crate::arena_dom::{create_element, html5ever_parse_slice_into_arena, Arena, Node, Ref};
|
||||||
|
|
||||||
// TODO: I don't love the "Traverser" name. Should maybe come up with something else.
|
// TODO: I don't love the "Traverser" name. Should maybe come up with something else.
|
||||||
// (it also unwraps nodes and calls transformer functions... does a lot more than traverse)
|
// (it also unwraps nodes and calls transformer functions... does a lot more than traverse)
|
||||||
@ -63,6 +63,14 @@ where
|
|||||||
self.traverse(sibling);
|
self.traverse(sibling);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: how to call this from transformer functions?
|
||||||
|
pub fn create_element(&'arena self, name: &str) -> Ref<'arena> {
|
||||||
|
create_element(
|
||||||
|
&self.arena,
|
||||||
|
QualName::new(None, ns!(), LocalName::from(name)),
|
||||||
|
)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
Loading…
Reference in New Issue
Block a user