Experimenting with Transformer as a trait

This commit is contained in:
Tyler Hallada 2020-04-19 16:25:05 -04:00
parent 446aff77af
commit d59c7df55d
2 changed files with 97 additions and 89 deletions

View File

@ -39,20 +39,24 @@ use config::relaxed::{CSS_AT_RULES, CSS_PROPERTIES};
use css_at_rule::CssAtRule;
use css_parser::{parse_css_style_attribute, parse_css_stylesheet, CssRule};
use css_property::CssProperty;
use transformer::Transformer;
use transformer::{Transformer, TransformerState};
fn main() {
let transformer = Transformer::new(
&should_unwrap_node,
vec![
&sanitize_style_tag_css,
&sanitize_style_attribute_css,
&remove_attributes,
&add_attributes,
&sanitize_attribute_protocols,
&add_single_elements_around_ul,
],
);
// let transformer = Transformer::new(
// &should_unwrap_node,
// vec![
// &sanitize_style_tag_css,
// &sanitize_style_attribute_css,
// &remove_attributes,
// &add_attributes,
// &sanitize_attribute_protocols,
// &add_single_elements_around_ul,
// ],
// );
// let root = transformer.parse_fragment(&mut io::stdin()).unwrap();
// transformer.traverse(root);
// serialize(&mut io::stdout(), root, Default::default()).expect("serialization failed")
let transformer = TransformerState::new();
let root = transformer.parse_fragment(&mut io::stdin()).unwrap();
transformer.traverse(root);
serialize(&mut io::stdout(), root, Default::default()).expect("serialization failed")

View File

@ -9,36 +9,21 @@ use crate::arena_dom::{Arena, Node, NodeData, Ref, Sink};
// TODO: What are the performance implications of using a vec of closures instead of one
// transformer function who's size is known at compile time (U: Fn(Ref<'arena>) -> bool)?
// TODO: how to integrate CSS parsing and transforming?
pub struct Transformer<'arena, T>
where
T: Fn(Ref) -> bool,
{
pub struct TransformerState<'arena> {
arena: typed_arena::Arena<Node<'arena>>,
should_unwrap: T,
transformer_fns: Vec<&'arena dyn Fn(Ref<'arena>, Arena<'arena>)>,
}
impl<'arena, T> Transformer<'arena, T>
where
T: Fn(Ref) -> bool,
{
pub fn new(
should_unwrap: T,
transformers: Vec<&'arena dyn Fn(Ref<'arena>, Arena<'arena>)>,
) -> Transformer<'arena, T> {
Transformer {
arena: typed_arena::Arena::new(),
should_unwrap,
transformer_fns: transformers,
}
}
pub trait Transformer<'arena> {
fn new() -> Self;
pub fn parse_document(&'arena self, data: &mut impl Read) -> Result<Ref<'arena>, Error> {
fn arena(&'arena self) -> &'arena typed_arena::Arena<Node<'arena>>;
fn parse_document(&'arena self, data: &mut impl Read) -> Result<Ref<'arena>, Error> {
let mut bytes = Vec::new();
data.read_to_end(&mut bytes)?;
let sink = Sink {
arena: &self.arena,
document: self.arena.alloc(Node::new(NodeData::Document)),
arena: self.arena(),
document: self.arena().alloc(Node::new(NodeData::Document)),
quirks_mode: QuirksMode::NoQuirks,
};
Ok(parse_document(sink, Default::default())
@ -46,12 +31,12 @@ where
.one(&bytes[..]))
}
pub fn parse_fragment(&'arena self, data: &mut impl Read) -> Result<Ref<'arena>, Error> {
fn parse_fragment(&'arena self, data: &mut impl Read) -> Result<Ref<'arena>, Error> {
let mut bytes = Vec::new();
data.read_to_end(&mut bytes)?;
let sink = Sink {
arena: &self.arena,
document: self.arena.alloc(Node::new(NodeData::Document)),
arena: self.arena(),
document: self.arena().alloc(Node::new(NodeData::Document)),
quirks_mode: QuirksMode::NoQuirks,
};
Ok(parse_fragment(
@ -64,8 +49,8 @@ where
.one(&bytes[..]))
}
pub fn traverse(&'arena self, node: Ref<'arena>) {
if (self.should_unwrap)(node) {
fn traverse(&'arena self, node: Ref<'arena>) {
if self.should_unwrap(node) {
if let Some(unwrapped_node) = node.unwrap() {
return self.traverse(unwrapped_node);
} else {
@ -73,9 +58,10 @@ where
}
}
for transformer in self.transformer_fns.iter() {
transformer(node, &self.arena);
}
// for transformer in self.transformer_fns.iter() {
// transformer(node, &self.arena);
// }
self.transform(node);
if let Some(child) = node.first_child.get() {
self.traverse(child);
@ -85,6 +71,24 @@ where
self.traverse(sibling);
}
}
fn transform(&'arena self, node: Ref<'arena>) {}
fn should_unwrap(&'arena self, node: Ref<'arena>) -> bool {
false
}
}
impl<'arena> Transformer<'arena> for TransformerState<'arena> {
fn new() -> TransformerState<'arena> {
TransformerState {
arena: typed_arena::Arena::new(),
}
}
fn arena(&'arena self) -> &'arena typed_arena::Arena<Node<'arena>> {
&self.arena
}
}
#[cfg(test)]
@ -154,53 +158,53 @@ mod test {
// count
// }
fn assert_serialized_html_eq(node: Ref, expected: &str) {
let mut output = vec![];
serialize(&mut output, node, Default::default()).unwrap();
assert_eq!(str::from_utf8(&output).unwrap(), expected);
}
// fn assert_serialized_html_eq(node: Ref, expected: &str) {
// let mut output = vec![];
// serialize(&mut output, node, Default::default()).unwrap();
// assert_eq!(str::from_utf8(&output).unwrap(), expected);
// }
#[test]
fn traversal() {
let transformer = Transformer::new(|_| false, vec![&|_, _| {}]);
let mut mock_data = MockRead::new("<div></div>");
let root = transformer.parse_fragment(&mut mock_data).unwrap();
transformer.traverse(root);
assert_serialized_html_eq(root, "<html><div></div></html>");
}
// #[test]
// fn traversal() {
// let transformer = Transformer::new(|_| false, vec![&|_, _| {}]);
// let mut mock_data = MockRead::new("<div></div>");
// let root = transformer.parse_fragment(&mut mock_data).unwrap();
// transformer.traverse(root);
// assert_serialized_html_eq(root, "<html><div></div></html>");
// }
#[test]
fn unwraps_element() {
let transformer = Transformer::new(
|node| {
if let NodeData::Element { ref name, .. } = node.data {
return name.local == local_name!("div");
}
false
},
vec![&|_, _| {}],
);
let mut mock_data = MockRead::new("<div></div>");
let root = transformer.parse_fragment(&mut mock_data).unwrap();
transformer.traverse(root);
assert_serialized_html_eq(root, "<html></html>");
}
// #[test]
// fn unwraps_element() {
// let transformer = Transformer::new(
// |node| {
// if let NodeData::Element { ref name, .. } = node.data {
// return name.local == local_name!("div");
// }
// false
// },
// vec![&|_, _| {}],
// );
// let mut mock_data = MockRead::new("<div></div>");
// let root = transformer.parse_fragment(&mut mock_data).unwrap();
// transformer.traverse(root);
// assert_serialized_html_eq(root, "<html></html>");
// }
#[test]
fn adds_element() {
let transformer = Transformer::new(
|_| false,
vec![&|node, arena| {
if let NodeData::Element { ref name, .. } = node.data {
if let local_name!("div") = name.local {
node.insert_after(create_element(arena, "span"));
}
}
}],
);
let mut mock_data = MockRead::new("<div></div>");
let root = transformer.parse_fragment(&mut mock_data).unwrap();
transformer.traverse(root);
assert_serialized_html_eq(root, "<html><div></div><span></span></html>");
}
// #[test]
// fn adds_element() {
// let transformer = Transformer::new(
// |_| false,
// vec![&|node, arena| {
// if let NodeData::Element { ref name, .. } = node.data {
// if let local_name!("div") = name.local {
// node.insert_after(create_element(arena, "span"));
// }
// }
// }],
// );
// let mut mock_data = MockRead::new("<div></div>");
// let root = transformer.parse_fragment(&mut mock_data).unwrap();
// transformer.traverse(root);
// assert_serialized_html_eq(root, "<html><div></div><span></span></html>");
// }
}