Initial commit: arena_dom and basic sanitize
This commit is contained in:
commit
6f682f1e91
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
/target
|
||||
**/*.rs.bk
|
502
Cargo.lock
generated
Normal file
502
Cargo.lock
generated
Normal file
@ -0,0 +1,502 @@
|
||||
[[package]]
|
||||
name = "autocfg"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "bitflags"
|
||||
version = "1.0.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
version = "0.1.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "cloudabi"
|
||||
version = "0.0.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fuchsia-cprng"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "futf"
|
||||
version = "0.1.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"mac 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"new_debug_unreachable 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "html5ever"
|
||||
version = "0.22.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"mac 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"markup5ever 0.7.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"proc-macro2 0.4.27 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"quote 0.6.11 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"syn 0.15.26 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "idna"
|
||||
version = "0.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"matches 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"unicode-bidi 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"unicode-normalization 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "itoa"
|
||||
version = "0.4.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "lazy_static"
|
||||
version = "1.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.49"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "log"
|
||||
version = "0.4.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "mac"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "maplit"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "markup5ever"
|
||||
version = "0.7.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"phf 0.7.24 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"phf_codegen 0.7.24 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde 1.0.88 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde_derive 1.0.88 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde_json 1.0.38 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"string_cache 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"string_cache_codegen 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"tendril 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "matches"
|
||||
version = "0.1.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "new_debug_unreachable"
|
||||
version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "percent-encoding"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "phf"
|
||||
version = "0.7.24"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"phf_shared 0.7.24 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf_codegen"
|
||||
version = "0.7.24"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"phf_generator 0.7.24 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"phf_shared 0.7.24 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf_generator"
|
||||
version = "0.7.24"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"phf_shared 0.7.24 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rand 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf_shared"
|
||||
version = "0.7.24"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"siphasher 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "precomputed-hash"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "0.4.27"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "0.6.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"proc-macro2 0.4.27 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand"
|
||||
version = "0.6.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"autocfg 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libc 0.2.49 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rand_chacha 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rand_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rand_hc 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rand_isaac 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rand_jitter 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rand_os 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rand_pcg 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rand_xorshift 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_chacha"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"autocfg 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rand_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_core"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"rand_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_core"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "rand_hc"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"rand_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_isaac"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"rand_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_jitter"
|
||||
version = "0.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"libc 0.2.49 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rand_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_os"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"fuchsia-cprng 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libc 0.2.49 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rand_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rdrand 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_pcg"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"autocfg 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rand_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_xorshift"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"rand_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rdrand"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"rand_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ryu"
|
||||
version = "0.2.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "sanitizer"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"html5ever 0.22.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"maplit 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"string_cache 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"typed-arena 1.4.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"url 1.7.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.88"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "serde_derive"
|
||||
version = "1.0.88"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"proc-macro2 0.4.27 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"quote 0.6.11 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"syn 0.15.26 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_json"
|
||||
version = "1.0.38"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"itoa 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"ryu 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde 1.0.88 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "siphasher"
|
||||
version = "0.2.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "smallvec"
|
||||
version = "0.6.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "string_cache"
|
||||
version = "0.7.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"new_debug_unreachable 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"phf_shared 0.7.24 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"precomputed-hash 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde 1.0.88 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"string_cache_codegen 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"string_cache_shared 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "string_cache_codegen"
|
||||
version = "0.4.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"phf_generator 0.7.24 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"phf_shared 0.7.24 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"proc-macro2 0.4.27 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"quote 0.6.11 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"string_cache_shared 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "string_cache_shared"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "0.15.26"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"proc-macro2 0.4.27 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"quote 0.6.11 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tendril"
|
||||
version = "0.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"futf 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"mac 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"utf-8 0.7.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "typed-arena"
|
||||
version = "1.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-bidi"
|
||||
version = "0.3.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"matches 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-normalization"
|
||||
version = "0.1.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"smallvec 0.6.9 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-xid"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "url"
|
||||
version = "1.7.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"idna 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"matches 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"percent-encoding 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "utf-8"
|
||||
version = "0.7.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "winapi"
|
||||
version = "0.3.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winapi-i686-pc-windows-gnu"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "winapi-x86_64-pc-windows-gnu"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[metadata]
|
||||
"checksum autocfg 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a6d640bee2da49f60a4068a7fae53acde8982514ab7bae8b8cea9e88cbcfd799"
|
||||
"checksum bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "228047a76f468627ca71776ecdebd732a3423081fcf5125585bcd7c49886ce12"
|
||||
"checksum cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "082bb9b28e00d3c9d39cc03e64ce4cea0f1bb9b3fde493f0cbc008472d22bdf4"
|
||||
"checksum cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f"
|
||||
"checksum fuchsia-cprng 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba"
|
||||
"checksum futf 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "7c9c1ce3fa9336301af935ab852c437817d14cd33690446569392e65170aac3b"
|
||||
"checksum html5ever 0.22.5 (registry+https://github.com/rust-lang/crates.io-index)" = "c213fa6a618dc1da552f54f85cba74b05d8e883c92ec4e89067736938084c26e"
|
||||
"checksum idna 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "38f09e0f0b1fb55fdee1f17470ad800da77af5186a1a76c026b679358b7e844e"
|
||||
"checksum itoa 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "1306f3464951f30e30d12373d31c79fbd52d236e5e896fd92f96ec7babbbe60b"
|
||||
"checksum lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bc5729f27f159ddd61f4df6228e827e86643d4d3e7c32183cb30a1c08f604a14"
|
||||
"checksum libc 0.2.49 (registry+https://github.com/rust-lang/crates.io-index)" = "413f3dfc802c5dc91dc570b05125b6cda9855edfaa9825c9849807876376e70e"
|
||||
"checksum log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c84ec4b527950aa83a329754b01dbe3f58361d1c5efacd1f6d68c494d08a17c6"
|
||||
"checksum mac 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
|
||||
"checksum maplit 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "08cbb6b4fef96b6d77bfc40ec491b1690c779e77b05cd9f07f787ed376fd4c43"
|
||||
"checksum markup5ever 0.7.5 (registry+https://github.com/rust-lang/crates.io-index)" = "897636f9850c3eef4905a5540683ed53dc9393860f0846cab2c2ddf9939862ff"
|
||||
"checksum matches 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "7ffc5c5338469d4d3ea17d269fa8ea3512ad247247c30bd2df69e68309ed0a08"
|
||||
"checksum new_debug_unreachable 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "fe2deb65e9f08f6540e6766481b9dc3a36e73d2fdb96e82bc3cd56353fafe90a"
|
||||
"checksum percent-encoding 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "31010dd2e1ac33d5b46a5b413495239882813e0369f8ed8a5e266f173602f831"
|
||||
"checksum phf 0.7.24 (registry+https://github.com/rust-lang/crates.io-index)" = "b3da44b85f8e8dfaec21adae67f95d93244b2ecf6ad2a692320598dcc8e6dd18"
|
||||
"checksum phf_codegen 0.7.24 (registry+https://github.com/rust-lang/crates.io-index)" = "b03e85129e324ad4166b06b2c7491ae27fe3ec353af72e72cd1654c7225d517e"
|
||||
"checksum phf_generator 0.7.24 (registry+https://github.com/rust-lang/crates.io-index)" = "09364cc93c159b8b06b1f4dd8a4398984503483891b0c26b867cf431fb132662"
|
||||
"checksum phf_shared 0.7.24 (registry+https://github.com/rust-lang/crates.io-index)" = "234f71a15de2288bcb7e3b6515828d22af7ec8598ee6d24c3b526fa0a80b67a0"
|
||||
"checksum precomputed-hash 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
|
||||
"checksum proc-macro2 0.4.27 (registry+https://github.com/rust-lang/crates.io-index)" = "4d317f9caece796be1980837fd5cb3dfec5613ebdb04ad0956deea83ce168915"
|
||||
"checksum quote 0.6.11 (registry+https://github.com/rust-lang/crates.io-index)" = "cdd8e04bd9c52e0342b406469d494fcb033be4bdbe5c606016defbb1681411e1"
|
||||
"checksum rand 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)" = "6d71dacdc3c88c1fde3885a3be3fbab9f35724e6ce99467f7d9c5026132184ca"
|
||||
"checksum rand_chacha 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "556d3a1ca6600bfcbab7c7c91ccb085ac7fbbcd70e008a98742e7847f4f7bcef"
|
||||
"checksum rand_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7a6fdeb83b075e8266dcc8762c22776f6877a63111121f5f8c7411e5be7eed4b"
|
||||
"checksum rand_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d0e7a549d590831370895ab7ba4ea0c1b6b011d106b5ff2da6eee112615e6dc0"
|
||||
"checksum rand_hc 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7b40677c7be09ae76218dc623efbf7b18e34bced3f38883af07bb75630a21bc4"
|
||||
"checksum rand_isaac 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "ded997c9d5f13925be2a6fd7e66bf1872597f759fd9dd93513dd7e92e5a5ee08"
|
||||
"checksum rand_jitter 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "7b9ea758282efe12823e0d952ddb269d2e1897227e464919a554f2a03ef1b832"
|
||||
"checksum rand_os 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "b7c690732391ae0abafced5015ffb53656abfaec61b342290e5eb56b286a679d"
|
||||
"checksum rand_pcg 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "abf9b09b01790cfe0364f52bf32995ea3c39f4d2dd011eac241d2914146d0b44"
|
||||
"checksum rand_xorshift 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cbf7e9e623549b0e21f6e97cf8ecf247c1a8fd2e8a992ae265314300b2455d5c"
|
||||
"checksum rdrand 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2"
|
||||
"checksum ryu 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "eb9e9b8cde282a9fe6a42dd4681319bfb63f121b8a8ee9439c6f4107e58a46f7"
|
||||
"checksum serde 1.0.88 (registry+https://github.com/rust-lang/crates.io-index)" = "9f301d728f2b94c9a7691c90f07b0b4e8a4517181d9461be94c04bddeb4bd850"
|
||||
"checksum serde_derive 1.0.88 (registry+https://github.com/rust-lang/crates.io-index)" = "beed18e6f5175aef3ba670e57c60ef3b1b74d250d962a26604bff4c80e970dd4"
|
||||
"checksum serde_json 1.0.38 (registry+https://github.com/rust-lang/crates.io-index)" = "27dce848e7467aa0e2fcaf0a413641499c0b745452aaca1194d24dedde9e13c9"
|
||||
"checksum siphasher 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "0b8de496cf83d4ed58b6be86c3a275b8602f6ffe98d3024a869e124147a9a3ac"
|
||||
"checksum smallvec 0.6.9 (registry+https://github.com/rust-lang/crates.io-index)" = "c4488ae950c49d403731982257768f48fada354a5203fe81f9bb6f43ca9002be"
|
||||
"checksum string_cache 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)" = "25d70109977172b127fe834e5449e5ab1740b9ba49fa18a2020f509174f25423"
|
||||
"checksum string_cache_codegen 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "1eea1eee654ef80933142157fdad9dd8bc43cf7c74e999e369263496f04ff4da"
|
||||
"checksum string_cache_shared 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b1884d1bc09741d466d9b14e6d37ac89d6909cbcac41dd9ae982d4d063bbedfc"
|
||||
"checksum syn 0.15.26 (registry+https://github.com/rust-lang/crates.io-index)" = "f92e629aa1d9c827b2bb8297046c1ccffc57c99b947a680d3ccff1f136a3bee9"
|
||||
"checksum tendril 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "707feda9f2582d5d680d733e38755547a3e8fb471e7ba11452ecfd9ce93a5d3b"
|
||||
"checksum typed-arena 1.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "c6c06a92aef38bb4dc5b0df00d68496fc31307c5344c867bb61678c6e1671ec5"
|
||||
"checksum unicode-bidi 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "49f2bd0c6468a8230e1db229cff8029217cf623c767ea5d60bfbd42729ea54d5"
|
||||
"checksum unicode-normalization 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "141339a08b982d942be2ca06ff8b076563cbe223d1befd5450716790d44e2426"
|
||||
"checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc"
|
||||
"checksum url 1.7.2 (registry+https://github.com/rust-lang/crates.io-index)" = "dd4e7c0d531266369519a4aa4f399d748bd37043b00bde1e4ff1f60a120b355a"
|
||||
"checksum utf-8 0.7.5 (registry+https://github.com/rust-lang/crates.io-index)" = "05e42f7c18b8f902290b009cde6d651262f956c98bc51bca4cd1d511c9cd85c7"
|
||||
"checksum winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "92c1eb33641e276cfa214a0522acad57be5c56b10cb348b3c5117db75f3ac4b0"
|
||||
"checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
|
||||
"checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
13
Cargo.toml
Normal file
13
Cargo.toml
Normal file
@ -0,0 +1,13 @@
|
||||
[package]
|
||||
name = "sanitizer"
|
||||
version = "0.1.0"
|
||||
authors = ["Tyler Hallada <tyler@consider.co>"]
|
||||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
html5ever = "*"
|
||||
lazy_static = "1.3.0"
|
||||
maplit = "1.0.1"
|
||||
string_cache = "0.7"
|
||||
typed-arena = "1.4.1"
|
||||
url = "1.7.2"
|
484
src/arena_dom.rs
Normal file
484
src/arena_dom.rs
Normal file
@ -0,0 +1,484 @@
|
||||
// Majority of this file is from the html5ever project.
|
||||
// https://github.com/servo/html5ever/blob/45b2fca5c6/html5ever/examples/arena.rs
|
||||
//
|
||||
// Copyright 2014-2017 The html5ever Project Developers. See the
|
||||
// COPYRIGHT file at the top-level directory of this distribution.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
extern crate html5ever;
|
||||
extern crate typed_arena;
|
||||
|
||||
use std::borrow::Cow;
|
||||
use std::cell::{Cell, RefCell};
|
||||
use std::collections::HashSet;
|
||||
use std::default::Default;
|
||||
use std::io;
|
||||
use std::ptr;
|
||||
|
||||
use html5ever::interface::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink};
|
||||
use html5ever::serialize::TraversalScope::{ChildrenOnly, IncludeNode};
|
||||
use html5ever::serialize::{Serialize, Serializer, TraversalScope};
|
||||
use html5ever::tendril::{StrTendril, TendrilSink};
|
||||
use html5ever::{parse_document, Attribute, ExpandedName, QualName};
|
||||
|
||||
pub fn html5ever_parse_slice_into_arena<'a>(bytes: &[u8], arena: Arena<'a>) -> Ref<'a> {
|
||||
let sink = Sink {
|
||||
arena: arena,
|
||||
document: arena.alloc(Node::new(NodeData::Document)),
|
||||
quirks_mode: QuirksMode::NoQuirks,
|
||||
};
|
||||
parse_document(sink, Default::default())
|
||||
.from_utf8()
|
||||
.one(bytes)
|
||||
}
|
||||
|
||||
pub fn create_element<'arena>(arena: Arena<'arena>, name: QualName) -> Ref<'arena> {
|
||||
arena.alloc(Node::new(NodeData::Element {
|
||||
name: name,
|
||||
attrs: RefCell::new(vec![]),
|
||||
template_contents: None,
|
||||
mathml_annotation_xml_integration_point: false,
|
||||
}))
|
||||
}
|
||||
|
||||
pub type Arena<'arena> = &'arena typed_arena::Arena<Node<'arena>>;
|
||||
|
||||
pub type Ref<'arena> = &'arena Node<'arena>;
|
||||
|
||||
pub type Link<'arena> = Cell<Option<Ref<'arena>>>;
|
||||
|
||||
pub struct Sink<'arena> {
|
||||
arena: Arena<'arena>,
|
||||
document: Ref<'arena>,
|
||||
quirks_mode: QuirksMode,
|
||||
}
|
||||
|
||||
pub struct Node<'arena> {
|
||||
pub parent: Link<'arena>,
|
||||
pub next_sibling: Link<'arena>,
|
||||
pub previous_sibling: Link<'arena>,
|
||||
pub first_child: Link<'arena>,
|
||||
pub last_child: Link<'arena>,
|
||||
pub data: NodeData<'arena>,
|
||||
}
|
||||
|
||||
pub enum NodeData<'arena> {
|
||||
Document,
|
||||
Doctype {
|
||||
name: StrTendril,
|
||||
public_id: StrTendril,
|
||||
system_id: StrTendril,
|
||||
},
|
||||
Text {
|
||||
contents: RefCell<StrTendril>,
|
||||
},
|
||||
Comment {
|
||||
contents: StrTendril,
|
||||
},
|
||||
Element {
|
||||
name: QualName,
|
||||
attrs: RefCell<Vec<Attribute>>,
|
||||
template_contents: Option<Ref<'arena>>,
|
||||
mathml_annotation_xml_integration_point: bool,
|
||||
},
|
||||
ProcessingInstruction {
|
||||
target: StrTendril,
|
||||
contents: StrTendril,
|
||||
},
|
||||
}
|
||||
|
||||
impl<'arena> Node<'arena> {
|
||||
pub fn new(data: NodeData<'arena>) -> Self {
|
||||
Node {
|
||||
parent: Cell::new(None),
|
||||
previous_sibling: Cell::new(None),
|
||||
next_sibling: Cell::new(None),
|
||||
first_child: Cell::new(None),
|
||||
last_child: Cell::new(None),
|
||||
data: data,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn detach(&self) {
|
||||
let parent = self.parent.take();
|
||||
let previous_sibling = self.previous_sibling.take();
|
||||
let next_sibling = self.next_sibling.take();
|
||||
|
||||
if let Some(next_sibling) = next_sibling {
|
||||
next_sibling.previous_sibling.set(previous_sibling);
|
||||
} else if let Some(parent) = parent {
|
||||
parent.last_child.set(previous_sibling);
|
||||
}
|
||||
|
||||
if let Some(previous_sibling) = previous_sibling {
|
||||
previous_sibling.next_sibling.set(next_sibling);
|
||||
} else if let Some(parent) = parent {
|
||||
parent.first_child.set(next_sibling);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn unwrap(&self) -> Option<&'arena Self> {
|
||||
let parent = self.parent.take();
|
||||
let previous_sibling = self.previous_sibling.take();
|
||||
let next_sibling = self.next_sibling.take();
|
||||
let first_child = self.first_child.take();
|
||||
let last_child = self.last_child.take();
|
||||
|
||||
if let Some(next_sibling) = next_sibling {
|
||||
if let Some(last_child) = last_child {
|
||||
next_sibling.previous_sibling.set(Some(last_child));
|
||||
} else {
|
||||
next_sibling.previous_sibling.set(previous_sibling);
|
||||
}
|
||||
} else if let Some(parent) = parent {
|
||||
parent.last_child.set(previous_sibling);
|
||||
if let Some(last_child) = last_child {
|
||||
parent.last_child.set(Some(last_child));
|
||||
} else {
|
||||
parent.last_child.set(previous_sibling);
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(previous_sibling) = previous_sibling {
|
||||
if let Some(first_child) = first_child {
|
||||
previous_sibling.next_sibling.set(Some(first_child));
|
||||
} else {
|
||||
previous_sibling.next_sibling.set(next_sibling);
|
||||
}
|
||||
} else if let Some(parent) = parent {
|
||||
parent.first_child.set(next_sibling);
|
||||
if let Some(first_child) = first_child {
|
||||
parent.first_child.set(Some(first_child));
|
||||
} else {
|
||||
parent.first_child.set(next_sibling);
|
||||
}
|
||||
}
|
||||
|
||||
let mut child = first_child;
|
||||
loop {
|
||||
match child {
|
||||
Some(next_child) => {
|
||||
next_child.parent.set(parent);
|
||||
child = next_child.next_sibling.get();
|
||||
},
|
||||
None => break,
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(first_child) = first_child {
|
||||
Some(first_child)
|
||||
} else if let Some(next_sibling) = next_sibling {
|
||||
Some(next_sibling)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
pub fn append(&'arena self, new_child: &'arena Self) {
|
||||
new_child.detach();
|
||||
new_child.parent.set(Some(self));
|
||||
if let Some(last_child) = self.last_child.take() {
|
||||
new_child.previous_sibling.set(Some(last_child));
|
||||
debug_assert!(last_child.next_sibling.get().is_none());
|
||||
last_child.next_sibling.set(Some(new_child));
|
||||
} else {
|
||||
debug_assert!(self.first_child.get().is_none());
|
||||
self.first_child.set(Some(new_child));
|
||||
}
|
||||
self.last_child.set(Some(new_child));
|
||||
}
|
||||
|
||||
pub fn insert_before(&'arena self, new_sibling: &'arena Self) {
|
||||
new_sibling.detach();
|
||||
new_sibling.parent.set(self.parent.get());
|
||||
new_sibling.next_sibling.set(Some(self));
|
||||
if let Some(previous_sibling) = self.previous_sibling.take() {
|
||||
new_sibling.previous_sibling.set(Some(previous_sibling));
|
||||
debug_assert!(ptr::eq::<Node>(
|
||||
previous_sibling.next_sibling.get().unwrap(),
|
||||
self
|
||||
));
|
||||
previous_sibling.next_sibling.set(Some(new_sibling));
|
||||
} else if let Some(parent) = self.parent.get() {
|
||||
debug_assert!(ptr::eq::<Node>(parent.first_child.get().unwrap(), self));
|
||||
parent.first_child.set(Some(new_sibling));
|
||||
}
|
||||
self.previous_sibling.set(Some(new_sibling));
|
||||
}
|
||||
|
||||
pub fn insert_after(&'arena self, new_sibling: &'arena Self) {
|
||||
new_sibling.detach();
|
||||
new_sibling.parent.set(self.parent.get());
|
||||
new_sibling.previous_sibling.set(Some(self));
|
||||
if let Some(next_sibling) = self.next_sibling.take() {
|
||||
new_sibling.next_sibling.set(Some(next_sibling));
|
||||
debug_assert!(ptr::eq::<Node>(
|
||||
next_sibling.previous_sibling.get().unwrap(),
|
||||
self
|
||||
));
|
||||
next_sibling.previous_sibling.set(Some(new_sibling));
|
||||
} else if let Some(parent) = self.parent.get() {
|
||||
debug_assert!(ptr::eq::<Node>(parent.last_child.get().unwrap(), self));
|
||||
parent.last_child.set(Some(new_sibling));
|
||||
}
|
||||
self.next_sibling.set(Some(new_sibling));
|
||||
}
|
||||
}
|
||||
|
||||
impl<'arena> Sink<'arena> {
|
||||
fn new_node(&self, data: NodeData<'arena>) -> Ref<'arena> {
|
||||
self.arena.alloc(Node::new(data))
|
||||
}
|
||||
|
||||
fn append_common<P, A>(&self, child: NodeOrText<Ref<'arena>>, previous: P, append: A)
|
||||
where
|
||||
P: FnOnce() -> Option<Ref<'arena>>,
|
||||
A: FnOnce(Ref<'arena>),
|
||||
{
|
||||
let new_node = match child {
|
||||
NodeOrText::AppendText(text) => {
|
||||
// Append to an existing Text node if we have one.
|
||||
if let Some(&Node {
|
||||
data: NodeData::Text { ref contents },
|
||||
..
|
||||
}) = previous()
|
||||
{
|
||||
contents.borrow_mut().push_tendril(&text);
|
||||
return;
|
||||
}
|
||||
self.new_node(NodeData::Text {
|
||||
contents: RefCell::new(text),
|
||||
})
|
||||
}
|
||||
NodeOrText::AppendNode(node) => node,
|
||||
};
|
||||
|
||||
append(new_node)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'arena> TreeSink for Sink<'arena> {
|
||||
type Handle = Ref<'arena>;
|
||||
type Output = Ref<'arena>;
|
||||
|
||||
fn finish(self) -> Ref<'arena> {
|
||||
self.document
|
||||
}
|
||||
|
||||
fn parse_error(&mut self, _: Cow<'static, str>) {}
|
||||
|
||||
fn get_document(&mut self) -> Ref<'arena> {
|
||||
self.document
|
||||
}
|
||||
|
||||
fn set_quirks_mode(&mut self, mode: QuirksMode) {
|
||||
self.quirks_mode = mode;
|
||||
}
|
||||
|
||||
fn same_node(&self, x: &Ref<'arena>, y: &Ref<'arena>) -> bool {
|
||||
ptr::eq::<Node>(*x, *y)
|
||||
}
|
||||
|
||||
fn elem_name<'a>(&self, target: &'a Ref<'arena>) -> ExpandedName<'a> {
|
||||
match target.data {
|
||||
NodeData::Element { ref name, .. } => name.expanded(),
|
||||
_ => panic!("not an element!"),
|
||||
}
|
||||
}
|
||||
|
||||
fn get_template_contents(&mut self, target: &Ref<'arena>) -> Ref<'arena> {
|
||||
if let NodeData::Element {
|
||||
template_contents: Some(ref contents),
|
||||
..
|
||||
} = target.data
|
||||
{
|
||||
contents
|
||||
} else {
|
||||
panic!("not a template element!")
|
||||
}
|
||||
}
|
||||
|
||||
fn is_mathml_annotation_xml_integration_point(&self, target: &Ref<'arena>) -> bool {
|
||||
if let NodeData::Element {
|
||||
mathml_annotation_xml_integration_point,
|
||||
..
|
||||
} = target.data
|
||||
{
|
||||
mathml_annotation_xml_integration_point
|
||||
} else {
|
||||
panic!("not an element!")
|
||||
}
|
||||
}
|
||||
|
||||
fn create_element(
|
||||
&mut self,
|
||||
name: QualName,
|
||||
attrs: Vec<Attribute>,
|
||||
flags: ElementFlags,
|
||||
) -> Ref<'arena> {
|
||||
self.new_node(NodeData::Element {
|
||||
name: name,
|
||||
attrs: RefCell::new(attrs),
|
||||
template_contents: if flags.template {
|
||||
Some(self.new_node(NodeData::Document))
|
||||
} else {
|
||||
None
|
||||
},
|
||||
mathml_annotation_xml_integration_point: flags.mathml_annotation_xml_integration_point,
|
||||
})
|
||||
}
|
||||
|
||||
fn create_comment(&mut self, text: StrTendril) -> Ref<'arena> {
|
||||
self.new_node(NodeData::Comment { contents: text })
|
||||
}
|
||||
|
||||
fn create_pi(&mut self, target: StrTendril, data: StrTendril) -> Ref<'arena> {
|
||||
self.new_node(NodeData::ProcessingInstruction {
|
||||
target: target,
|
||||
contents: data,
|
||||
})
|
||||
}
|
||||
|
||||
fn append(&mut self, parent: &Ref<'arena>, child: NodeOrText<Ref<'arena>>) {
|
||||
self.append_common(
|
||||
child,
|
||||
|| parent.last_child.get(),
|
||||
|new_node| parent.append(new_node),
|
||||
)
|
||||
}
|
||||
|
||||
fn append_before_sibling(&mut self, sibling: &Ref<'arena>, child: NodeOrText<Ref<'arena>>) {
|
||||
self.append_common(
|
||||
child,
|
||||
|| sibling.previous_sibling.get(),
|
||||
|new_node| sibling.insert_before(new_node),
|
||||
)
|
||||
}
|
||||
|
||||
fn append_based_on_parent_node(
|
||||
&mut self,
|
||||
element: &Ref<'arena>,
|
||||
prev_element: &Ref<'arena>,
|
||||
child: NodeOrText<Ref<'arena>>,
|
||||
) {
|
||||
if element.parent.get().is_some() {
|
||||
self.append_before_sibling(element, child)
|
||||
} else {
|
||||
self.append(prev_element, child)
|
||||
}
|
||||
}
|
||||
|
||||
fn append_doctype_to_document(
|
||||
&mut self,
|
||||
name: StrTendril,
|
||||
public_id: StrTendril,
|
||||
system_id: StrTendril,
|
||||
) {
|
||||
self.document.append(self.new_node(NodeData::Doctype {
|
||||
name: name,
|
||||
public_id: public_id,
|
||||
system_id: system_id,
|
||||
}))
|
||||
}
|
||||
|
||||
fn add_attrs_if_missing(&mut self, target: &Ref<'arena>, attrs: Vec<Attribute>) {
|
||||
let mut existing = if let NodeData::Element { ref attrs, .. } = target.data {
|
||||
attrs.borrow_mut()
|
||||
} else {
|
||||
panic!("not an element")
|
||||
};
|
||||
|
||||
let existing_names = existing
|
||||
.iter()
|
||||
.map(|e| e.name.clone())
|
||||
.collect::<HashSet<_>>();
|
||||
existing.extend(
|
||||
attrs
|
||||
.into_iter()
|
||||
.filter(|attr| !existing_names.contains(&attr.name)),
|
||||
);
|
||||
}
|
||||
|
||||
fn remove_from_parent(&mut self, target: &Ref<'arena>) {
|
||||
target.detach()
|
||||
}
|
||||
|
||||
fn reparent_children(&mut self, node: &Ref<'arena>, new_parent: &Ref<'arena>) {
|
||||
let mut next_child = node.first_child.get();
|
||||
while let Some(child) = next_child {
|
||||
debug_assert!(ptr::eq::<Node>(child.parent.get().unwrap(), *node));
|
||||
next_child = child.next_sibling.get();
|
||||
new_parent.append(child)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Implementation adapted from implementation for RcDom:
|
||||
// https://github.com/servo/html5ever/blob/45b2fca5c6/markup5ever/rcdom.rs#L410
|
||||
impl<'arena> Serialize for Node<'arena> {
|
||||
fn serialize<S>(&self, serializer: &mut S, traversal_scope: TraversalScope) -> io::Result<()>
|
||||
where
|
||||
S: Serializer,
|
||||
{
|
||||
match (&traversal_scope, &self.data) {
|
||||
(
|
||||
_,
|
||||
&NodeData::Element {
|
||||
ref name,
|
||||
ref attrs,
|
||||
..
|
||||
},
|
||||
) => {
|
||||
if traversal_scope == IncludeNode {
|
||||
serializer.start_elem(
|
||||
name.clone(),
|
||||
attrs.borrow().iter().map(|at| (&at.name, &at.value[..])),
|
||||
)?;
|
||||
}
|
||||
|
||||
if let Some(child) = self.first_child.get() {
|
||||
child.serialize(serializer, IncludeNode)?;
|
||||
}
|
||||
|
||||
if traversal_scope == IncludeNode {
|
||||
serializer.end_elem(name.clone())?;
|
||||
}
|
||||
}
|
||||
|
||||
(&ChildrenOnly(_), &NodeData::Document) => {
|
||||
if let Some(child) = self.first_child.get() {
|
||||
child.serialize(serializer, IncludeNode)?;
|
||||
}
|
||||
}
|
||||
|
||||
(&ChildrenOnly(_), _) => {},
|
||||
|
||||
(&IncludeNode, &NodeData::Doctype { ref name, .. }) => serializer.write_doctype(&name)?,
|
||||
(&IncludeNode, &NodeData::Text { ref contents }) => {
|
||||
serializer.write_text(&contents.borrow())?
|
||||
}
|
||||
(&IncludeNode, &NodeData::Comment { ref contents }) => {
|
||||
serializer.write_comment(&contents)?
|
||||
}
|
||||
(
|
||||
&IncludeNode,
|
||||
&NodeData::ProcessingInstruction {
|
||||
ref target,
|
||||
ref contents,
|
||||
},
|
||||
) => serializer.write_processing_instruction(target, contents)?,
|
||||
(&IncludeNode, &NodeData::Document) => panic!("Can't serialize Document node itself"),
|
||||
}
|
||||
|
||||
if let Some(sibling) = self.next_sibling.get() {
|
||||
sibling.serialize(serializer, IncludeNode)?
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
72
src/config/basic.rs
Normal file
72
src/config/basic.rs
Normal file
@ -0,0 +1,72 @@
|
||||
use html5ever::LocalName;
|
||||
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
||||
lazy_static! {
|
||||
pub static ref ELEMENTS: HashSet<LocalName> = hashset! {
|
||||
local_name!("a"),
|
||||
local_name!("abbr"),
|
||||
local_name!("blockquote"),
|
||||
local_name!("br"),
|
||||
local_name!("cite"),
|
||||
local_name!("code"),
|
||||
local_name!("dd"),
|
||||
local_name!("dfn"),
|
||||
local_name!("dl"),
|
||||
local_name!("dt"),
|
||||
local_name!("kbd"),
|
||||
local_name!("li"),
|
||||
local_name!("mark"),
|
||||
local_name!("ol"),
|
||||
local_name!("p"),
|
||||
local_name!("pre"),
|
||||
local_name!("q"),
|
||||
local_name!("s"),
|
||||
local_name!("samp"),
|
||||
local_name!("small"),
|
||||
local_name!("strike"),
|
||||
local_name!("sub"),
|
||||
local_name!("sup"),
|
||||
local_name!("time"),
|
||||
local_name!("ul"),
|
||||
local_name!("var"),
|
||||
};
|
||||
pub static ref ALL_ATTRIBUTES: HashSet<LocalName> = hashset! {};
|
||||
pub static ref ATTRIBUTES: HashMap<LocalName, HashSet<LocalName>> = hashmap! {
|
||||
local_name!("a") => hashset!{
|
||||
local_name!("href"),
|
||||
},
|
||||
local_name!("abbr") => hashset!{
|
||||
local_name!("title"),
|
||||
},
|
||||
local_name!("blockquote") => hashset!{
|
||||
local_name!("cite"),
|
||||
},
|
||||
local_name!("dfn") => hashset!{
|
||||
local_name!("title"),
|
||||
},
|
||||
local_name!("q") => hashset!{
|
||||
local_name!("cite"),
|
||||
},
|
||||
local_name!("time") => hashset!{
|
||||
local_name!("datetime"),
|
||||
LocalName::from("pubdate"),
|
||||
},
|
||||
};
|
||||
pub static ref ADD_ATTRIBUTES: HashMap<LocalName, HashMap<LocalName, &'static str>> = hashmap! {
|
||||
local_name!("a") => hashmap! {
|
||||
local_name!("rel") => "nofollow",
|
||||
},
|
||||
};
|
||||
pub static ref PROTOCOLS: HashMap<LocalName, HashMap<LocalName, HashSet<&'static str>>> = hashmap! {
|
||||
local_name!("a") => hashmap! {
|
||||
local_name!("href") => hashset!{"ftp", "http", "https", "mailto"},
|
||||
},
|
||||
local_name!("blockquote") => hashmap! {
|
||||
local_name!("cite") => hashset!{"http", "https"},
|
||||
},
|
||||
local_name!("q") => hashmap! {
|
||||
local_name!("cite") => hashset!{"http", "https"},
|
||||
},
|
||||
};
|
||||
}
|
3
src/config/mod.rs
Normal file
3
src/config/mod.rs
Normal file
@ -0,0 +1,3 @@
|
||||
pub mod basic;
|
||||
pub mod relaxed;
|
||||
pub mod permissive;
|
177
src/config/permissive.rs
Normal file
177
src/config/permissive.rs
Normal file
@ -0,0 +1,177 @@
|
||||
use html5ever::LocalName;
|
||||
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
||||
use super::relaxed::{
|
||||
ADD_ATTRIBUTES as RELAXED_ADD_ATTRIBUTES, ALL_ATTRIBUTES as RELAXED_ALL_ATTRIBUTES,
|
||||
ATTRIBUTES as RELAXED_ATTRIBUTES, ELEMENTS as RELAXED_ELEMENTS,
|
||||
};
|
||||
|
||||
lazy_static! {
|
||||
pub static ref ELEMENTS: HashSet<LocalName> = RELAXED_ELEMENTS
|
||||
.union(&hashset!(
|
||||
local_name!("acronym"),
|
||||
local_name!("basefont"),
|
||||
local_name!("big"),
|
||||
local_name!("blink"),
|
||||
local_name!("center"),
|
||||
LocalName::from("command"),
|
||||
local_name!("dir"),
|
||||
local_name!("font"),
|
||||
local_name!("marquee"),
|
||||
local_name!("strike"),
|
||||
local_name!("tt"),
|
||||
local_name!("form"),
|
||||
local_name!("input"),
|
||||
local_name!("button"),
|
||||
LocalName::from("single"),
|
||||
LocalName::from("double"),
|
||||
))
|
||||
.into_iter()
|
||||
.cloned()
|
||||
.collect();
|
||||
pub static ref ALL_ATTRIBUTES: HashSet<LocalName> = RELAXED_ALL_ATTRIBUTES
|
||||
.union(&hashset! {
|
||||
local_name!("bgcolor"),
|
||||
local_name!("width"),
|
||||
local_name!("height"),
|
||||
local_name!("border"),
|
||||
local_name!("color"),
|
||||
local_name!("background"),
|
||||
})
|
||||
.into_iter()
|
||||
.cloned()
|
||||
.collect();
|
||||
// Can't figure out how to merge HashMaps :(
|
||||
pub static ref ATTRIBUTES: HashMap<LocalName, HashSet<LocalName>> = hashmap! {
|
||||
local_name!("a") => hashset!{
|
||||
local_name!("href"),
|
||||
local_name!("hreflang"),
|
||||
local_name!("name"),
|
||||
local_name!("rel"),
|
||||
},
|
||||
local_name!("abbr") => hashset!{
|
||||
local_name!("title"),
|
||||
},
|
||||
local_name!("blockquote") => hashset!{
|
||||
local_name!("cite"),
|
||||
},
|
||||
local_name!("button") => hashset!{
|
||||
local_name!("type"),
|
||||
},
|
||||
local_name!("col") => hashset!{
|
||||
local_name!("span"),
|
||||
local_name!("width"),
|
||||
},
|
||||
local_name!("colgroup") => hashset!{
|
||||
local_name!("span"),
|
||||
local_name!("width"),
|
||||
},
|
||||
local_name!("data") => hashset!{
|
||||
local_name!("value"),
|
||||
},
|
||||
local_name!("del") => hashset!{
|
||||
local_name!("cite"),
|
||||
local_name!("datetime"),
|
||||
},
|
||||
local_name!("dfn") => hashset!{
|
||||
local_name!("title"),
|
||||
},
|
||||
local_name!("img") => hashset!{
|
||||
local_name!("align"),
|
||||
local_name!("alt"),
|
||||
local_name!("border"),
|
||||
local_name!("height"),
|
||||
local_name!("src"),
|
||||
local_name!("srcset"),
|
||||
local_name!("width"),
|
||||
},
|
||||
local_name!("input") => hashset!{
|
||||
local_name!("type"),
|
||||
local_name!("name"),
|
||||
local_name!("value"),
|
||||
},
|
||||
local_name!("ins") => hashset!{
|
||||
local_name!("cite"),
|
||||
local_name!("datetime"),
|
||||
},
|
||||
local_name!("li") => hashset!{
|
||||
local_name!("value"),
|
||||
},
|
||||
local_name!("ol") => hashset!{
|
||||
LocalName::from("reversed"),
|
||||
local_name!("start"),
|
||||
local_name!("type"),
|
||||
},
|
||||
local_name!("q") => hashset!{
|
||||
local_name!("cite"),
|
||||
},
|
||||
local_name!("style") => hashset!{
|
||||
local_name!("media"),
|
||||
local_name!("scoped"),
|
||||
local_name!("type"),
|
||||
},
|
||||
local_name!("table") => hashset!{
|
||||
local_name!("align"),
|
||||
local_name!("bgcolor"),
|
||||
local_name!("border"),
|
||||
local_name!("cellpadding"),
|
||||
local_name!("cellspacing"),
|
||||
local_name!("frame"),
|
||||
local_name!("rules"),
|
||||
LocalName::from("sortable"),
|
||||
local_name!("summary"),
|
||||
local_name!("width"),
|
||||
},
|
||||
local_name!("td") => hashset!{
|
||||
local_name!("abbr"),
|
||||
local_name!("align"),
|
||||
local_name!("axis"),
|
||||
local_name!("colspan"),
|
||||
local_name!("headers"),
|
||||
local_name!("rowspan"),
|
||||
local_name!("valign"),
|
||||
local_name!("width"),
|
||||
},
|
||||
local_name!("th") => hashset!{
|
||||
local_name!("abbr"),
|
||||
local_name!("align"),
|
||||
local_name!("axis"),
|
||||
local_name!("colspan"),
|
||||
local_name!("headers"),
|
||||
local_name!("rowspan"),
|
||||
local_name!("scope"),
|
||||
LocalName::from("sorted"),
|
||||
local_name!("valign"),
|
||||
local_name!("width"),
|
||||
},
|
||||
local_name!("time") => hashset!{
|
||||
local_name!("datetime"),
|
||||
LocalName::from("pubdate"),
|
||||
},
|
||||
local_name!("ul") => hashset!{
|
||||
local_name!("type"),
|
||||
},
|
||||
};
|
||||
pub static ref ADD_ATTRIBUTES: HashMap<LocalName, HashMap<LocalName, &'static str>> = RELAXED_ADD_ATTRIBUTES.clone();
|
||||
pub static ref PROTOCOLS: HashMap<LocalName, HashMap<LocalName, HashSet<&'static str>>> = hashmap! {
|
||||
local_name!("a") => hashmap! {
|
||||
local_name!("href") => hashset!{"ftp", "http", "https", "mailto"},
|
||||
},
|
||||
local_name!("blockquote") => hashmap! {
|
||||
local_name!("cite") => hashset!{"http", "https"},
|
||||
},
|
||||
local_name!("del") => hashmap! {
|
||||
local_name!("cite") => hashset!{"http", "https"},
|
||||
},
|
||||
local_name!("img") => hashmap! {
|
||||
local_name!("src") => hashset!{"http", "https"},
|
||||
},
|
||||
local_name!("ins") => hashmap! {
|
||||
local_name!("cite") => hashset!{"http", "https"},
|
||||
},
|
||||
local_name!("q") => hashmap! {
|
||||
local_name!("cite") => hashset!{"http", "https"},
|
||||
},
|
||||
};
|
||||
}
|
199
src/config/relaxed.rs
Normal file
199
src/config/relaxed.rs
Normal file
@ -0,0 +1,199 @@
|
||||
use html5ever::LocalName;
|
||||
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
||||
use super::basic::{
|
||||
ADD_ATTRIBUTES as BASIC_ADD_ATTRIBUTES, ALL_ATTRIBUTES as BASIC_ALL_ATTRIBUTES,
|
||||
ATTRIBUTES as BASIC_ATTRIBUTES, ELEMENTS as BASIC_ELEMENTS,
|
||||
};
|
||||
|
||||
lazy_static! {
|
||||
pub static ref ELEMENTS: HashSet<LocalName> = BASIC_ELEMENTS
|
||||
.union(&hashset!(
|
||||
local_name!("address"),
|
||||
local_name!("article"),
|
||||
local_name!("aside"),
|
||||
local_name!("bdi"),
|
||||
local_name!("bdo"),
|
||||
local_name!("body"),
|
||||
local_name!("caption"),
|
||||
local_name!("col"),
|
||||
local_name!("colgroup"),
|
||||
local_name!("data"),
|
||||
local_name!("del"),
|
||||
local_name!("div"),
|
||||
local_name!("figcaption"),
|
||||
local_name!("figure"),
|
||||
local_name!("footer"),
|
||||
local_name!("h1"),
|
||||
local_name!("h2"),
|
||||
local_name!("h3"),
|
||||
local_name!("h4"),
|
||||
local_name!("h5"),
|
||||
local_name!("h6"),
|
||||
local_name!("head"),
|
||||
local_name!("header"),
|
||||
local_name!("hgroup"),
|
||||
local_name!("hr"),
|
||||
local_name!("html"),
|
||||
local_name!("img"),
|
||||
local_name!("ins"),
|
||||
local_name!("main"),
|
||||
local_name!("nav"),
|
||||
local_name!("rp"),
|
||||
local_name!("rt"),
|
||||
local_name!("ruby"),
|
||||
local_name!("section"),
|
||||
local_name!("span"),
|
||||
local_name!("style"),
|
||||
local_name!("summary"),
|
||||
local_name!("sup"),
|
||||
local_name!("table"),
|
||||
local_name!("tbody"),
|
||||
local_name!("td"),
|
||||
local_name!("tfoot"),
|
||||
local_name!("th"),
|
||||
local_name!("thead"),
|
||||
local_name!("title"),
|
||||
local_name!("tr"),
|
||||
local_name!("wbr"),
|
||||
))
|
||||
.into_iter()
|
||||
.cloned()
|
||||
.collect();
|
||||
pub static ref ALL_ATTRIBUTES: HashSet<LocalName> = BASIC_ALL_ATTRIBUTES.union(&hashset! {
|
||||
local_name!("class"),
|
||||
local_name!("dir"),
|
||||
local_name!("hidden"),
|
||||
local_name!("id"),
|
||||
local_name!("lang"),
|
||||
local_name!("style"),
|
||||
local_name!("tabindex"),
|
||||
local_name!("title"),
|
||||
LocalName::from("translate"),
|
||||
}).into_iter().cloned().collect();
|
||||
// Can't figure out how to merge HashMaps :(
|
||||
pub static ref ATTRIBUTES: HashMap<LocalName, HashSet<LocalName>> = hashmap! {
|
||||
local_name!("a") => hashset!{
|
||||
local_name!("href"),
|
||||
local_name!("hreflang"),
|
||||
local_name!("name"),
|
||||
local_name!("rel"),
|
||||
},
|
||||
local_name!("abbr") => hashset!{
|
||||
local_name!("title"),
|
||||
},
|
||||
local_name!("blockquote") => hashset!{
|
||||
local_name!("cite"),
|
||||
},
|
||||
local_name!("col") => hashset!{
|
||||
local_name!("span"),
|
||||
local_name!("width"),
|
||||
},
|
||||
local_name!("colgroup") => hashset!{
|
||||
local_name!("span"),
|
||||
local_name!("width"),
|
||||
},
|
||||
local_name!("data") => hashset!{
|
||||
local_name!("value"),
|
||||
},
|
||||
local_name!("del") => hashset!{
|
||||
local_name!("cite"),
|
||||
local_name!("datetime"),
|
||||
},
|
||||
local_name!("dfn") => hashset!{
|
||||
local_name!("title"),
|
||||
},
|
||||
local_name!("img") => hashset!{
|
||||
local_name!("align"),
|
||||
local_name!("alt"),
|
||||
local_name!("border"),
|
||||
local_name!("height"),
|
||||
local_name!("src"),
|
||||
local_name!("srcset"),
|
||||
local_name!("width"),
|
||||
},
|
||||
local_name!("ins") => hashset!{
|
||||
local_name!("cite"),
|
||||
local_name!("datetime"),
|
||||
},
|
||||
local_name!("li") => hashset!{
|
||||
local_name!("value"),
|
||||
},
|
||||
local_name!("ol") => hashset!{
|
||||
LocalName::from("reversed"),
|
||||
local_name!("start"),
|
||||
local_name!("type"),
|
||||
},
|
||||
local_name!("q") => hashset!{
|
||||
local_name!("cite"),
|
||||
},
|
||||
local_name!("style") => hashset!{
|
||||
local_name!("media"),
|
||||
local_name!("scoped"),
|
||||
local_name!("type"),
|
||||
},
|
||||
local_name!("table") => hashset!{
|
||||
local_name!("align"),
|
||||
local_name!("bgcolor"),
|
||||
local_name!("border"),
|
||||
local_name!("cellpadding"),
|
||||
local_name!("cellspacing"),
|
||||
local_name!("frame"),
|
||||
local_name!("rules"),
|
||||
LocalName::from("sortable"),
|
||||
local_name!("summary"),
|
||||
local_name!("width"),
|
||||
},
|
||||
local_name!("td") => hashset!{
|
||||
local_name!("abbr"),
|
||||
local_name!("align"),
|
||||
local_name!("axis"),
|
||||
local_name!("colspan"),
|
||||
local_name!("headers"),
|
||||
local_name!("rowspan"),
|
||||
local_name!("valign"),
|
||||
local_name!("width"),
|
||||
},
|
||||
local_name!("th") => hashset!{
|
||||
local_name!("abbr"),
|
||||
local_name!("align"),
|
||||
local_name!("axis"),
|
||||
local_name!("colspan"),
|
||||
local_name!("headers"),
|
||||
local_name!("rowspan"),
|
||||
local_name!("scope"),
|
||||
LocalName::from("sorted"),
|
||||
local_name!("valign"),
|
||||
local_name!("width"),
|
||||
},
|
||||
local_name!("time") => hashset!{
|
||||
local_name!("datetime"),
|
||||
LocalName::from("pubdate"),
|
||||
},
|
||||
local_name!("ul") => hashset!{
|
||||
local_name!("type"),
|
||||
},
|
||||
};
|
||||
pub static ref ADD_ATTRIBUTES: HashMap<LocalName, HashMap<LocalName, &'static str>> = BASIC_ADD_ATTRIBUTES.clone();
|
||||
pub static ref PROTOCOLS: HashMap<LocalName, HashMap<LocalName, HashSet<&'static str>>> = hashmap! {
|
||||
local_name!("a") => hashmap! {
|
||||
local_name!("href") => hashset!{"ftp", "http", "https", "mailto"},
|
||||
},
|
||||
local_name!("blockquote") => hashmap! {
|
||||
local_name!("cite") => hashset!{"http", "https"},
|
||||
},
|
||||
local_name!("del") => hashmap! {
|
||||
local_name!("cite") => hashset!{"http", "https"},
|
||||
},
|
||||
local_name!("img") => hashmap! {
|
||||
local_name!("src") => hashset!{"http", "https"},
|
||||
},
|
||||
local_name!("ins") => hashmap! {
|
||||
local_name!("cite") => hashset!{"http", "https"},
|
||||
},
|
||||
local_name!("q") => hashmap! {
|
||||
local_name!("cite") => hashset!{"http", "https"},
|
||||
},
|
||||
};
|
||||
}
|
164
src/main.rs
Normal file
164
src/main.rs
Normal file
@ -0,0 +1,164 @@
|
||||
#[macro_use]
|
||||
extern crate lazy_static;
|
||||
#[macro_use]
|
||||
extern crate html5ever;
|
||||
#[macro_use]
|
||||
extern crate maplit;
|
||||
extern crate typed_arena;
|
||||
|
||||
use std::collections::HashSet;
|
||||
use std::default::Default;
|
||||
use std::io::{self, Read};
|
||||
|
||||
use html5ever::tendril::StrTendril;
|
||||
use html5ever::{serialize, Attribute, LocalName, QualName};
|
||||
|
||||
use url::{ParseError, Url};
|
||||
|
||||
mod arena_dom;
|
||||
mod config;
|
||||
|
||||
use arena_dom::{create_element, html5ever_parse_slice_into_arena, Arena, NodeData, Ref};
|
||||
use config::permissive::{ADD_ATTRIBUTES, ALL_ATTRIBUTES, ATTRIBUTES, ELEMENTS, PROTOCOLS};
|
||||
|
||||
fn main() {
|
||||
let mut bytes = Vec::new();
|
||||
io::stdin().read_to_end(&mut bytes).unwrap();
|
||||
let arena = typed_arena::Arena::new();
|
||||
let doc = html5ever_parse_slice_into_arena(&bytes, &arena);
|
||||
sanitize(doc, &arena);
|
||||
serialize(&mut io::stdout(), doc, Default::default())
|
||||
.ok()
|
||||
.expect("serialization failed")
|
||||
}
|
||||
|
||||
fn sanitize<'arena>(node: Ref<'arena>, arena: Arena<'arena>) {
|
||||
if let Some(unwrapped) = maybe_unwrap_node(&node) {
|
||||
if let Some(unwrapped_node) = unwrapped {
|
||||
return sanitize(unwrapped_node, arena);
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
transform_node(&node, arena);
|
||||
|
||||
if let Some(child) = node.first_child.get() {
|
||||
sanitize(child, arena);
|
||||
}
|
||||
|
||||
if let Some(sibling) = node.next_sibling.get() {
|
||||
sanitize(sibling, arena);
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: make separate rich and plain transformers
|
||||
// TODO: add whitelist of tags, remove any not in it DONE
|
||||
// TODO: add whitelist of attributes, remove any not in it DONE
|
||||
// TODO: add map of tags to attributes, remove any on tag not in the mapped value DONE
|
||||
// TODO: add whitelist of url schemes, parse urls and remove any not in it DONE
|
||||
// TODO: strip comments DONE
|
||||
// TODO: parse style tags and attributes
|
||||
// TODO: add whitelist of CSS properties, remove any not in it
|
||||
// TODO: scope selectors in rich formatter
|
||||
// TODO: add class attributes to elements in rich formatter
|
||||
fn transform_node<'arena>(node: Ref<'arena>, arena: Arena<'arena>) {
|
||||
match node.data {
|
||||
NodeData::Document
|
||||
| NodeData::Doctype { .. }
|
||||
| NodeData::Text { .. }
|
||||
| NodeData::Comment { .. }
|
||||
| NodeData::ProcessingInstruction { .. } => {}
|
||||
NodeData::Element {
|
||||
ref attrs,
|
||||
ref name,
|
||||
..
|
||||
} => {
|
||||
let ref mut attrs = attrs.borrow_mut();
|
||||
|
||||
let mut allowed_attrs: HashSet<LocalName> = ALL_ATTRIBUTES.clone();
|
||||
if let Some(element_attrs) = ATTRIBUTES.get(&name.local) {
|
||||
allowed_attrs = allowed_attrs
|
||||
.union(element_attrs)
|
||||
.into_iter()
|
||||
.cloned()
|
||||
.collect();
|
||||
}
|
||||
let mut i = 0;
|
||||
while i != attrs.len() {
|
||||
if !allowed_attrs.contains(&attrs[i].name.local) {
|
||||
attrs.remove(i);
|
||||
} else {
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(add_attributes) = ADD_ATTRIBUTES.get(&name.local) {
|
||||
for (name, &value) in add_attributes.iter() {
|
||||
attrs.push(Attribute {
|
||||
name: QualName::new(None, ns!(), name.clone()),
|
||||
value: StrTendril::from(value),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(protocols) = PROTOCOLS.get(&name.local) {
|
||||
let mut i = 0;
|
||||
while i != attrs.len() {
|
||||
if let Some(allowed_protocols) = protocols.get(&attrs[i].name.local) {
|
||||
match Url::parse(&attrs[i].value) {
|
||||
Ok(url) => {
|
||||
if !allowed_protocols.contains(url.scheme()) {
|
||||
attrs.remove(i);
|
||||
} else {
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
Err(ParseError::RelativeUrlWithoutBase) => {
|
||||
attrs[i].value =
|
||||
StrTendril::from(format!("http://{}", attrs[i].value));
|
||||
i += 1;
|
||||
}
|
||||
Err(_) => {
|
||||
attrs.remove(i);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
match name.local {
|
||||
local_name!("ul") => {
|
||||
node.insert_before(create_element(
|
||||
arena,
|
||||
QualName::new(None, ns!(), LocalName::from("single")),
|
||||
));
|
||||
node.insert_after(create_element(
|
||||
arena,
|
||||
QualName::new(None, ns!(), LocalName::from("single")),
|
||||
));
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn maybe_unwrap_node<'arena>(node: Ref<'arena>) -> Option<Option<Ref<'arena>>> {
|
||||
match node.data {
|
||||
NodeData::Document
|
||||
| NodeData::Doctype { .. }
|
||||
| NodeData::Text { .. }
|
||||
| NodeData::ProcessingInstruction { .. } => None,
|
||||
NodeData::Comment { .. } => Some(node.unwrap()),
|
||||
NodeData::Element { ref name, .. } => {
|
||||
if !ELEMENTS.contains(&name.local) {
|
||||
Some(node.unwrap())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user