Experimenting with Transformer as a trait

This commit is contained in:
Tyler Hallada 2020-04-19 16:25:05 -04:00
parent 446aff77af
commit d59c7df55d
2 changed files with 97 additions and 89 deletions

View File

@ -39,20 +39,24 @@ use config::relaxed::{CSS_AT_RULES, CSS_PROPERTIES};
use css_at_rule::CssAtRule; use css_at_rule::CssAtRule;
use css_parser::{parse_css_style_attribute, parse_css_stylesheet, CssRule}; use css_parser::{parse_css_style_attribute, parse_css_stylesheet, CssRule};
use css_property::CssProperty; use css_property::CssProperty;
use transformer::Transformer; use transformer::{Transformer, TransformerState};
fn main() { fn main() {
let transformer = Transformer::new( // let transformer = Transformer::new(
&should_unwrap_node, // &should_unwrap_node,
vec![ // vec![
&sanitize_style_tag_css, // &sanitize_style_tag_css,
&sanitize_style_attribute_css, // &sanitize_style_attribute_css,
&remove_attributes, // &remove_attributes,
&add_attributes, // &add_attributes,
&sanitize_attribute_protocols, // &sanitize_attribute_protocols,
&add_single_elements_around_ul, // &add_single_elements_around_ul,
], // ],
); // );
// let root = transformer.parse_fragment(&mut io::stdin()).unwrap();
// transformer.traverse(root);
// serialize(&mut io::stdout(), root, Default::default()).expect("serialization failed")
let transformer = TransformerState::new();
let root = transformer.parse_fragment(&mut io::stdin()).unwrap(); let root = transformer.parse_fragment(&mut io::stdin()).unwrap();
transformer.traverse(root); transformer.traverse(root);
serialize(&mut io::stdout(), root, Default::default()).expect("serialization failed") serialize(&mut io::stdout(), root, Default::default()).expect("serialization failed")

View File

@ -9,36 +9,21 @@ use crate::arena_dom::{Arena, Node, NodeData, Ref, Sink};
// TODO: What are the performance implications of using a vec of closures instead of one // TODO: What are the performance implications of using a vec of closures instead of one
// transformer function who's size is known at compile time (U: Fn(Ref<'arena>) -> bool)? // transformer function who's size is known at compile time (U: Fn(Ref<'arena>) -> bool)?
// TODO: how to integrate CSS parsing and transforming? // TODO: how to integrate CSS parsing and transforming?
pub struct Transformer<'arena, T> pub struct TransformerState<'arena> {
where
T: Fn(Ref) -> bool,
{
arena: typed_arena::Arena<Node<'arena>>, arena: typed_arena::Arena<Node<'arena>>,
should_unwrap: T,
transformer_fns: Vec<&'arena dyn Fn(Ref<'arena>, Arena<'arena>)>,
} }
impl<'arena, T> Transformer<'arena, T> pub trait Transformer<'arena> {
where fn new() -> Self;
T: Fn(Ref) -> bool,
{
pub fn new(
should_unwrap: T,
transformers: Vec<&'arena dyn Fn(Ref<'arena>, Arena<'arena>)>,
) -> Transformer<'arena, T> {
Transformer {
arena: typed_arena::Arena::new(),
should_unwrap,
transformer_fns: transformers,
}
}
pub fn parse_document(&'arena self, data: &mut impl Read) -> Result<Ref<'arena>, Error> { fn arena(&'arena self) -> &'arena typed_arena::Arena<Node<'arena>>;
fn parse_document(&'arena self, data: &mut impl Read) -> Result<Ref<'arena>, Error> {
let mut bytes = Vec::new(); let mut bytes = Vec::new();
data.read_to_end(&mut bytes)?; data.read_to_end(&mut bytes)?;
let sink = Sink { let sink = Sink {
arena: &self.arena, arena: self.arena(),
document: self.arena.alloc(Node::new(NodeData::Document)), document: self.arena().alloc(Node::new(NodeData::Document)),
quirks_mode: QuirksMode::NoQuirks, quirks_mode: QuirksMode::NoQuirks,
}; };
Ok(parse_document(sink, Default::default()) Ok(parse_document(sink, Default::default())
@ -46,12 +31,12 @@ where
.one(&bytes[..])) .one(&bytes[..]))
} }
pub fn parse_fragment(&'arena self, data: &mut impl Read) -> Result<Ref<'arena>, Error> { fn parse_fragment(&'arena self, data: &mut impl Read) -> Result<Ref<'arena>, Error> {
let mut bytes = Vec::new(); let mut bytes = Vec::new();
data.read_to_end(&mut bytes)?; data.read_to_end(&mut bytes)?;
let sink = Sink { let sink = Sink {
arena: &self.arena, arena: self.arena(),
document: self.arena.alloc(Node::new(NodeData::Document)), document: self.arena().alloc(Node::new(NodeData::Document)),
quirks_mode: QuirksMode::NoQuirks, quirks_mode: QuirksMode::NoQuirks,
}; };
Ok(parse_fragment( Ok(parse_fragment(
@ -64,8 +49,8 @@ where
.one(&bytes[..])) .one(&bytes[..]))
} }
pub fn traverse(&'arena self, node: Ref<'arena>) { fn traverse(&'arena self, node: Ref<'arena>) {
if (self.should_unwrap)(node) { if self.should_unwrap(node) {
if let Some(unwrapped_node) = node.unwrap() { if let Some(unwrapped_node) = node.unwrap() {
return self.traverse(unwrapped_node); return self.traverse(unwrapped_node);
} else { } else {
@ -73,9 +58,10 @@ where
} }
} }
for transformer in self.transformer_fns.iter() { // for transformer in self.transformer_fns.iter() {
transformer(node, &self.arena); // transformer(node, &self.arena);
} // }
self.transform(node);
if let Some(child) = node.first_child.get() { if let Some(child) = node.first_child.get() {
self.traverse(child); self.traverse(child);
@ -85,6 +71,24 @@ where
self.traverse(sibling); self.traverse(sibling);
} }
} }
fn transform(&'arena self, node: Ref<'arena>) {}
fn should_unwrap(&'arena self, node: Ref<'arena>) -> bool {
false
}
}
impl<'arena> Transformer<'arena> for TransformerState<'arena> {
fn new() -> TransformerState<'arena> {
TransformerState {
arena: typed_arena::Arena::new(),
}
}
fn arena(&'arena self) -> &'arena typed_arena::Arena<Node<'arena>> {
&self.arena
}
} }
#[cfg(test)] #[cfg(test)]
@ -154,53 +158,53 @@ mod test {
// count // count
// } // }
fn assert_serialized_html_eq(node: Ref, expected: &str) { // fn assert_serialized_html_eq(node: Ref, expected: &str) {
let mut output = vec![]; // let mut output = vec![];
serialize(&mut output, node, Default::default()).unwrap(); // serialize(&mut output, node, Default::default()).unwrap();
assert_eq!(str::from_utf8(&output).unwrap(), expected); // assert_eq!(str::from_utf8(&output).unwrap(), expected);
} // }
#[test] // #[test]
fn traversal() { // fn traversal() {
let transformer = Transformer::new(|_| false, vec![&|_, _| {}]); // let transformer = Transformer::new(|_| false, vec![&|_, _| {}]);
let mut mock_data = MockRead::new("<div></div>"); // let mut mock_data = MockRead::new("<div></div>");
let root = transformer.parse_fragment(&mut mock_data).unwrap(); // let root = transformer.parse_fragment(&mut mock_data).unwrap();
transformer.traverse(root); // transformer.traverse(root);
assert_serialized_html_eq(root, "<html><div></div></html>"); // assert_serialized_html_eq(root, "<html><div></div></html>");
} // }
#[test] // #[test]
fn unwraps_element() { // fn unwraps_element() {
let transformer = Transformer::new( // let transformer = Transformer::new(
|node| { // |node| {
if let NodeData::Element { ref name, .. } = node.data { // if let NodeData::Element { ref name, .. } = node.data {
return name.local == local_name!("div"); // return name.local == local_name!("div");
} // }
false // false
}, // },
vec![&|_, _| {}], // vec![&|_, _| {}],
); // );
let mut mock_data = MockRead::new("<div></div>"); // let mut mock_data = MockRead::new("<div></div>");
let root = transformer.parse_fragment(&mut mock_data).unwrap(); // let root = transformer.parse_fragment(&mut mock_data).unwrap();
transformer.traverse(root); // transformer.traverse(root);
assert_serialized_html_eq(root, "<html></html>"); // assert_serialized_html_eq(root, "<html></html>");
} // }
#[test] // #[test]
fn adds_element() { // fn adds_element() {
let transformer = Transformer::new( // let transformer = Transformer::new(
|_| false, // |_| false,
vec![&|node, arena| { // vec![&|node, arena| {
if let NodeData::Element { ref name, .. } = node.data { // if let NodeData::Element { ref name, .. } = node.data {
if let local_name!("div") = name.local { // if let local_name!("div") = name.local {
node.insert_after(create_element(arena, "span")); // node.insert_after(create_element(arena, "span"));
} // }
} // }
}], // }],
); // );
let mut mock_data = MockRead::new("<div></div>"); // let mut mock_data = MockRead::new("<div></div>");
let root = transformer.parse_fragment(&mut mock_data).unwrap(); // let root = transformer.parse_fragment(&mut mock_data).unwrap();
transformer.traverse(root); // transformer.traverse(root);
assert_serialized_html_eq(root, "<html><div></div><span></span></html>"); // assert_serialized_html_eq(root, "<html><div></div><span></span></html>");
} // }
} }