Initial commit: arena_dom and basic sanitize

This commit is contained in:
Tyler Hallada 2019-03-09 20:26:43 -05:00
commit 6f682f1e91
9 changed files with 1616 additions and 0 deletions

2
.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
/target
**/*.rs.bk

502
Cargo.lock generated Normal file
View File

@ -0,0 +1,502 @@
[[package]]
name = "autocfg"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "bitflags"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "cfg-if"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "cloudabi"
version = "0.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "fuchsia-cprng"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "futf"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"mac 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"new_debug_unreachable 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "html5ever"
version = "0.22.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
"mac 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"markup5ever 0.7.5 (registry+https://github.com/rust-lang/crates.io-index)",
"proc-macro2 0.4.27 (registry+https://github.com/rust-lang/crates.io-index)",
"quote 0.6.11 (registry+https://github.com/rust-lang/crates.io-index)",
"syn 0.15.26 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "idna"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"matches 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)",
"unicode-bidi 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)",
"unicode-normalization 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "itoa"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "lazy_static"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "libc"
version = "0.2.49"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "log"
version = "0.4.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "mac"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "maplit"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "markup5ever"
version = "0.7.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"phf 0.7.24 (registry+https://github.com/rust-lang/crates.io-index)",
"phf_codegen 0.7.24 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.88 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_derive 1.0.88 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_json 1.0.38 (registry+https://github.com/rust-lang/crates.io-index)",
"string_cache 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)",
"string_cache_codegen 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)",
"tendril 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "matches"
version = "0.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "new_debug_unreachable"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "percent-encoding"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "phf"
version = "0.7.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"phf_shared 0.7.24 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "phf_codegen"
version = "0.7.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"phf_generator 0.7.24 (registry+https://github.com/rust-lang/crates.io-index)",
"phf_shared 0.7.24 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "phf_generator"
version = "0.7.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"phf_shared 0.7.24 (registry+https://github.com/rust-lang/crates.io-index)",
"rand 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "phf_shared"
version = "0.7.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"siphasher 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "precomputed-hash"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "proc-macro2"
version = "0.4.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "quote"
version = "0.6.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"proc-macro2 0.4.27 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rand"
version = "0.6.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"autocfg 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.49 (registry+https://github.com/rust-lang/crates.io-index)",
"rand_chacha 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"rand_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
"rand_hc 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"rand_isaac 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"rand_jitter 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
"rand_os 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
"rand_pcg 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
"rand_xorshift 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rand_chacha"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"autocfg 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
"rand_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rand_core"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"rand_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rand_core"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "rand_hc"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"rand_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rand_isaac"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"rand_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rand_jitter"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"libc 0.2.49 (registry+https://github.com/rust-lang/crates.io-index)",
"rand_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
"winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rand_os"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
"fuchsia-cprng 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.49 (registry+https://github.com/rust-lang/crates.io-index)",
"rand_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
"rdrand 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
"winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rand_pcg"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"autocfg 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
"rand_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rand_xorshift"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"rand_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rdrand"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"rand_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "ryu"
version = "0.2.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "sanitizer"
version = "0.1.0"
dependencies = [
"html5ever 0.22.5 (registry+https://github.com/rust-lang/crates.io-index)",
"lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
"maplit 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
"string_cache 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)",
"typed-arena 1.4.1 (registry+https://github.com/rust-lang/crates.io-index)",
"url 1.7.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "serde"
version = "1.0.88"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "serde_derive"
version = "1.0.88"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"proc-macro2 0.4.27 (registry+https://github.com/rust-lang/crates.io-index)",
"quote 0.6.11 (registry+https://github.com/rust-lang/crates.io-index)",
"syn 0.15.26 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "serde_json"
version = "1.0.38"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"itoa 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)",
"ryu 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.88 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "siphasher"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "smallvec"
version = "0.6.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "string_cache"
version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
"new_debug_unreachable 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
"phf_shared 0.7.24 (registry+https://github.com/rust-lang/crates.io-index)",
"precomputed-hash 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.88 (registry+https://github.com/rust-lang/crates.io-index)",
"string_cache_codegen 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)",
"string_cache_shared 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "string_cache_codegen"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"phf_generator 0.7.24 (registry+https://github.com/rust-lang/crates.io-index)",
"phf_shared 0.7.24 (registry+https://github.com/rust-lang/crates.io-index)",
"proc-macro2 0.4.27 (registry+https://github.com/rust-lang/crates.io-index)",
"quote 0.6.11 (registry+https://github.com/rust-lang/crates.io-index)",
"string_cache_shared 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "string_cache_shared"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "syn"
version = "0.15.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"proc-macro2 0.4.27 (registry+https://github.com/rust-lang/crates.io-index)",
"quote 0.6.11 (registry+https://github.com/rust-lang/crates.io-index)",
"unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "tendril"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"futf 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
"mac 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"utf-8 0.7.5 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "typed-arena"
version = "1.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "unicode-bidi"
version = "0.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"matches 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "unicode-normalization"
version = "0.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"smallvec 0.6.9 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "unicode-xid"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "url"
version = "1.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"idna 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
"matches 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)",
"percent-encoding 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "utf-8"
version = "0.7.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "winapi"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
"winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[metadata]
"checksum autocfg 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a6d640bee2da49f60a4068a7fae53acde8982514ab7bae8b8cea9e88cbcfd799"
"checksum bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "228047a76f468627ca71776ecdebd732a3423081fcf5125585bcd7c49886ce12"
"checksum cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "082bb9b28e00d3c9d39cc03e64ce4cea0f1bb9b3fde493f0cbc008472d22bdf4"
"checksum cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f"
"checksum fuchsia-cprng 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba"
"checksum futf 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "7c9c1ce3fa9336301af935ab852c437817d14cd33690446569392e65170aac3b"
"checksum html5ever 0.22.5 (registry+https://github.com/rust-lang/crates.io-index)" = "c213fa6a618dc1da552f54f85cba74b05d8e883c92ec4e89067736938084c26e"
"checksum idna 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "38f09e0f0b1fb55fdee1f17470ad800da77af5186a1a76c026b679358b7e844e"
"checksum itoa 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "1306f3464951f30e30d12373d31c79fbd52d236e5e896fd92f96ec7babbbe60b"
"checksum lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bc5729f27f159ddd61f4df6228e827e86643d4d3e7c32183cb30a1c08f604a14"
"checksum libc 0.2.49 (registry+https://github.com/rust-lang/crates.io-index)" = "413f3dfc802c5dc91dc570b05125b6cda9855edfaa9825c9849807876376e70e"
"checksum log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c84ec4b527950aa83a329754b01dbe3f58361d1c5efacd1f6d68c494d08a17c6"
"checksum mac 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
"checksum maplit 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "08cbb6b4fef96b6d77bfc40ec491b1690c779e77b05cd9f07f787ed376fd4c43"
"checksum markup5ever 0.7.5 (registry+https://github.com/rust-lang/crates.io-index)" = "897636f9850c3eef4905a5540683ed53dc9393860f0846cab2c2ddf9939862ff"
"checksum matches 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "7ffc5c5338469d4d3ea17d269fa8ea3512ad247247c30bd2df69e68309ed0a08"
"checksum new_debug_unreachable 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "fe2deb65e9f08f6540e6766481b9dc3a36e73d2fdb96e82bc3cd56353fafe90a"
"checksum percent-encoding 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "31010dd2e1ac33d5b46a5b413495239882813e0369f8ed8a5e266f173602f831"
"checksum phf 0.7.24 (registry+https://github.com/rust-lang/crates.io-index)" = "b3da44b85f8e8dfaec21adae67f95d93244b2ecf6ad2a692320598dcc8e6dd18"
"checksum phf_codegen 0.7.24 (registry+https://github.com/rust-lang/crates.io-index)" = "b03e85129e324ad4166b06b2c7491ae27fe3ec353af72e72cd1654c7225d517e"
"checksum phf_generator 0.7.24 (registry+https://github.com/rust-lang/crates.io-index)" = "09364cc93c159b8b06b1f4dd8a4398984503483891b0c26b867cf431fb132662"
"checksum phf_shared 0.7.24 (registry+https://github.com/rust-lang/crates.io-index)" = "234f71a15de2288bcb7e3b6515828d22af7ec8598ee6d24c3b526fa0a80b67a0"
"checksum precomputed-hash 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
"checksum proc-macro2 0.4.27 (registry+https://github.com/rust-lang/crates.io-index)" = "4d317f9caece796be1980837fd5cb3dfec5613ebdb04ad0956deea83ce168915"
"checksum quote 0.6.11 (registry+https://github.com/rust-lang/crates.io-index)" = "cdd8e04bd9c52e0342b406469d494fcb033be4bdbe5c606016defbb1681411e1"
"checksum rand 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)" = "6d71dacdc3c88c1fde3885a3be3fbab9f35724e6ce99467f7d9c5026132184ca"
"checksum rand_chacha 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "556d3a1ca6600bfcbab7c7c91ccb085ac7fbbcd70e008a98742e7847f4f7bcef"
"checksum rand_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7a6fdeb83b075e8266dcc8762c22776f6877a63111121f5f8c7411e5be7eed4b"
"checksum rand_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d0e7a549d590831370895ab7ba4ea0c1b6b011d106b5ff2da6eee112615e6dc0"
"checksum rand_hc 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7b40677c7be09ae76218dc623efbf7b18e34bced3f38883af07bb75630a21bc4"
"checksum rand_isaac 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "ded997c9d5f13925be2a6fd7e66bf1872597f759fd9dd93513dd7e92e5a5ee08"
"checksum rand_jitter 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "7b9ea758282efe12823e0d952ddb269d2e1897227e464919a554f2a03ef1b832"
"checksum rand_os 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "b7c690732391ae0abafced5015ffb53656abfaec61b342290e5eb56b286a679d"
"checksum rand_pcg 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "abf9b09b01790cfe0364f52bf32995ea3c39f4d2dd011eac241d2914146d0b44"
"checksum rand_xorshift 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cbf7e9e623549b0e21f6e97cf8ecf247c1a8fd2e8a992ae265314300b2455d5c"
"checksum rdrand 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2"
"checksum ryu 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "eb9e9b8cde282a9fe6a42dd4681319bfb63f121b8a8ee9439c6f4107e58a46f7"
"checksum serde 1.0.88 (registry+https://github.com/rust-lang/crates.io-index)" = "9f301d728f2b94c9a7691c90f07b0b4e8a4517181d9461be94c04bddeb4bd850"
"checksum serde_derive 1.0.88 (registry+https://github.com/rust-lang/crates.io-index)" = "beed18e6f5175aef3ba670e57c60ef3b1b74d250d962a26604bff4c80e970dd4"
"checksum serde_json 1.0.38 (registry+https://github.com/rust-lang/crates.io-index)" = "27dce848e7467aa0e2fcaf0a413641499c0b745452aaca1194d24dedde9e13c9"
"checksum siphasher 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "0b8de496cf83d4ed58b6be86c3a275b8602f6ffe98d3024a869e124147a9a3ac"
"checksum smallvec 0.6.9 (registry+https://github.com/rust-lang/crates.io-index)" = "c4488ae950c49d403731982257768f48fada354a5203fe81f9bb6f43ca9002be"
"checksum string_cache 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)" = "25d70109977172b127fe834e5449e5ab1740b9ba49fa18a2020f509174f25423"
"checksum string_cache_codegen 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "1eea1eee654ef80933142157fdad9dd8bc43cf7c74e999e369263496f04ff4da"
"checksum string_cache_shared 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b1884d1bc09741d466d9b14e6d37ac89d6909cbcac41dd9ae982d4d063bbedfc"
"checksum syn 0.15.26 (registry+https://github.com/rust-lang/crates.io-index)" = "f92e629aa1d9c827b2bb8297046c1ccffc57c99b947a680d3ccff1f136a3bee9"
"checksum tendril 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "707feda9f2582d5d680d733e38755547a3e8fb471e7ba11452ecfd9ce93a5d3b"
"checksum typed-arena 1.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "c6c06a92aef38bb4dc5b0df00d68496fc31307c5344c867bb61678c6e1671ec5"
"checksum unicode-bidi 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "49f2bd0c6468a8230e1db229cff8029217cf623c767ea5d60bfbd42729ea54d5"
"checksum unicode-normalization 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "141339a08b982d942be2ca06ff8b076563cbe223d1befd5450716790d44e2426"
"checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc"
"checksum url 1.7.2 (registry+https://github.com/rust-lang/crates.io-index)" = "dd4e7c0d531266369519a4aa4f399d748bd37043b00bde1e4ff1f60a120b355a"
"checksum utf-8 0.7.5 (registry+https://github.com/rust-lang/crates.io-index)" = "05e42f7c18b8f902290b009cde6d651262f956c98bc51bca4cd1d511c9cd85c7"
"checksum winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "92c1eb33641e276cfa214a0522acad57be5c56b10cb348b3c5117db75f3ac4b0"
"checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
"checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"

13
Cargo.toml Normal file
View File

@ -0,0 +1,13 @@
[package]
name = "sanitizer"
version = "0.1.0"
authors = ["Tyler Hallada <tyler@consider.co>"]
edition = "2018"
[dependencies]
html5ever = "*"
lazy_static = "1.3.0"
maplit = "1.0.1"
string_cache = "0.7"
typed-arena = "1.4.1"
url = "1.7.2"

484
src/arena_dom.rs Normal file
View File

@ -0,0 +1,484 @@
// Majority of this file is from the html5ever project.
// https://github.com/servo/html5ever/blob/45b2fca5c6/html5ever/examples/arena.rs
//
// Copyright 2014-2017 The html5ever Project Developers. See the
// COPYRIGHT file at the top-level directory of this distribution.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
extern crate html5ever;
extern crate typed_arena;
use std::borrow::Cow;
use std::cell::{Cell, RefCell};
use std::collections::HashSet;
use std::default::Default;
use std::io;
use std::ptr;
use html5ever::interface::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink};
use html5ever::serialize::TraversalScope::{ChildrenOnly, IncludeNode};
use html5ever::serialize::{Serialize, Serializer, TraversalScope};
use html5ever::tendril::{StrTendril, TendrilSink};
use html5ever::{parse_document, Attribute, ExpandedName, QualName};
pub fn html5ever_parse_slice_into_arena<'a>(bytes: &[u8], arena: Arena<'a>) -> Ref<'a> {
let sink = Sink {
arena: arena,
document: arena.alloc(Node::new(NodeData::Document)),
quirks_mode: QuirksMode::NoQuirks,
};
parse_document(sink, Default::default())
.from_utf8()
.one(bytes)
}
pub fn create_element<'arena>(arena: Arena<'arena>, name: QualName) -> Ref<'arena> {
arena.alloc(Node::new(NodeData::Element {
name: name,
attrs: RefCell::new(vec![]),
template_contents: None,
mathml_annotation_xml_integration_point: false,
}))
}
pub type Arena<'arena> = &'arena typed_arena::Arena<Node<'arena>>;
pub type Ref<'arena> = &'arena Node<'arena>;
pub type Link<'arena> = Cell<Option<Ref<'arena>>>;
pub struct Sink<'arena> {
arena: Arena<'arena>,
document: Ref<'arena>,
quirks_mode: QuirksMode,
}
pub struct Node<'arena> {
pub parent: Link<'arena>,
pub next_sibling: Link<'arena>,
pub previous_sibling: Link<'arena>,
pub first_child: Link<'arena>,
pub last_child: Link<'arena>,
pub data: NodeData<'arena>,
}
pub enum NodeData<'arena> {
Document,
Doctype {
name: StrTendril,
public_id: StrTendril,
system_id: StrTendril,
},
Text {
contents: RefCell<StrTendril>,
},
Comment {
contents: StrTendril,
},
Element {
name: QualName,
attrs: RefCell<Vec<Attribute>>,
template_contents: Option<Ref<'arena>>,
mathml_annotation_xml_integration_point: bool,
},
ProcessingInstruction {
target: StrTendril,
contents: StrTendril,
},
}
impl<'arena> Node<'arena> {
pub fn new(data: NodeData<'arena>) -> Self {
Node {
parent: Cell::new(None),
previous_sibling: Cell::new(None),
next_sibling: Cell::new(None),
first_child: Cell::new(None),
last_child: Cell::new(None),
data: data,
}
}
pub fn detach(&self) {
let parent = self.parent.take();
let previous_sibling = self.previous_sibling.take();
let next_sibling = self.next_sibling.take();
if let Some(next_sibling) = next_sibling {
next_sibling.previous_sibling.set(previous_sibling);
} else if let Some(parent) = parent {
parent.last_child.set(previous_sibling);
}
if let Some(previous_sibling) = previous_sibling {
previous_sibling.next_sibling.set(next_sibling);
} else if let Some(parent) = parent {
parent.first_child.set(next_sibling);
}
}
pub fn unwrap(&self) -> Option<&'arena Self> {
let parent = self.parent.take();
let previous_sibling = self.previous_sibling.take();
let next_sibling = self.next_sibling.take();
let first_child = self.first_child.take();
let last_child = self.last_child.take();
if let Some(next_sibling) = next_sibling {
if let Some(last_child) = last_child {
next_sibling.previous_sibling.set(Some(last_child));
} else {
next_sibling.previous_sibling.set(previous_sibling);
}
} else if let Some(parent) = parent {
parent.last_child.set(previous_sibling);
if let Some(last_child) = last_child {
parent.last_child.set(Some(last_child));
} else {
parent.last_child.set(previous_sibling);
}
}
if let Some(previous_sibling) = previous_sibling {
if let Some(first_child) = first_child {
previous_sibling.next_sibling.set(Some(first_child));
} else {
previous_sibling.next_sibling.set(next_sibling);
}
} else if let Some(parent) = parent {
parent.first_child.set(next_sibling);
if let Some(first_child) = first_child {
parent.first_child.set(Some(first_child));
} else {
parent.first_child.set(next_sibling);
}
}
let mut child = first_child;
loop {
match child {
Some(next_child) => {
next_child.parent.set(parent);
child = next_child.next_sibling.get();
},
None => break,
}
}
if let Some(first_child) = first_child {
Some(first_child)
} else if let Some(next_sibling) = next_sibling {
Some(next_sibling)
} else {
None
}
}
pub fn append(&'arena self, new_child: &'arena Self) {
new_child.detach();
new_child.parent.set(Some(self));
if let Some(last_child) = self.last_child.take() {
new_child.previous_sibling.set(Some(last_child));
debug_assert!(last_child.next_sibling.get().is_none());
last_child.next_sibling.set(Some(new_child));
} else {
debug_assert!(self.first_child.get().is_none());
self.first_child.set(Some(new_child));
}
self.last_child.set(Some(new_child));
}
pub fn insert_before(&'arena self, new_sibling: &'arena Self) {
new_sibling.detach();
new_sibling.parent.set(self.parent.get());
new_sibling.next_sibling.set(Some(self));
if let Some(previous_sibling) = self.previous_sibling.take() {
new_sibling.previous_sibling.set(Some(previous_sibling));
debug_assert!(ptr::eq::<Node>(
previous_sibling.next_sibling.get().unwrap(),
self
));
previous_sibling.next_sibling.set(Some(new_sibling));
} else if let Some(parent) = self.parent.get() {
debug_assert!(ptr::eq::<Node>(parent.first_child.get().unwrap(), self));
parent.first_child.set(Some(new_sibling));
}
self.previous_sibling.set(Some(new_sibling));
}
pub fn insert_after(&'arena self, new_sibling: &'arena Self) {
new_sibling.detach();
new_sibling.parent.set(self.parent.get());
new_sibling.previous_sibling.set(Some(self));
if let Some(next_sibling) = self.next_sibling.take() {
new_sibling.next_sibling.set(Some(next_sibling));
debug_assert!(ptr::eq::<Node>(
next_sibling.previous_sibling.get().unwrap(),
self
));
next_sibling.previous_sibling.set(Some(new_sibling));
} else if let Some(parent) = self.parent.get() {
debug_assert!(ptr::eq::<Node>(parent.last_child.get().unwrap(), self));
parent.last_child.set(Some(new_sibling));
}
self.next_sibling.set(Some(new_sibling));
}
}
impl<'arena> Sink<'arena> {
fn new_node(&self, data: NodeData<'arena>) -> Ref<'arena> {
self.arena.alloc(Node::new(data))
}
fn append_common<P, A>(&self, child: NodeOrText<Ref<'arena>>, previous: P, append: A)
where
P: FnOnce() -> Option<Ref<'arena>>,
A: FnOnce(Ref<'arena>),
{
let new_node = match child {
NodeOrText::AppendText(text) => {
// Append to an existing Text node if we have one.
if let Some(&Node {
data: NodeData::Text { ref contents },
..
}) = previous()
{
contents.borrow_mut().push_tendril(&text);
return;
}
self.new_node(NodeData::Text {
contents: RefCell::new(text),
})
}
NodeOrText::AppendNode(node) => node,
};
append(new_node)
}
}
impl<'arena> TreeSink for Sink<'arena> {
type Handle = Ref<'arena>;
type Output = Ref<'arena>;
fn finish(self) -> Ref<'arena> {
self.document
}
fn parse_error(&mut self, _: Cow<'static, str>) {}
fn get_document(&mut self) -> Ref<'arena> {
self.document
}
fn set_quirks_mode(&mut self, mode: QuirksMode) {
self.quirks_mode = mode;
}
fn same_node(&self, x: &Ref<'arena>, y: &Ref<'arena>) -> bool {
ptr::eq::<Node>(*x, *y)
}
fn elem_name<'a>(&self, target: &'a Ref<'arena>) -> ExpandedName<'a> {
match target.data {
NodeData::Element { ref name, .. } => name.expanded(),
_ => panic!("not an element!"),
}
}
fn get_template_contents(&mut self, target: &Ref<'arena>) -> Ref<'arena> {
if let NodeData::Element {
template_contents: Some(ref contents),
..
} = target.data
{
contents
} else {
panic!("not a template element!")
}
}
fn is_mathml_annotation_xml_integration_point(&self, target: &Ref<'arena>) -> bool {
if let NodeData::Element {
mathml_annotation_xml_integration_point,
..
} = target.data
{
mathml_annotation_xml_integration_point
} else {
panic!("not an element!")
}
}
fn create_element(
&mut self,
name: QualName,
attrs: Vec<Attribute>,
flags: ElementFlags,
) -> Ref<'arena> {
self.new_node(NodeData::Element {
name: name,
attrs: RefCell::new(attrs),
template_contents: if flags.template {
Some(self.new_node(NodeData::Document))
} else {
None
},
mathml_annotation_xml_integration_point: flags.mathml_annotation_xml_integration_point,
})
}
fn create_comment(&mut self, text: StrTendril) -> Ref<'arena> {
self.new_node(NodeData::Comment { contents: text })
}
fn create_pi(&mut self, target: StrTendril, data: StrTendril) -> Ref<'arena> {
self.new_node(NodeData::ProcessingInstruction {
target: target,
contents: data,
})
}
fn append(&mut self, parent: &Ref<'arena>, child: NodeOrText<Ref<'arena>>) {
self.append_common(
child,
|| parent.last_child.get(),
|new_node| parent.append(new_node),
)
}
fn append_before_sibling(&mut self, sibling: &Ref<'arena>, child: NodeOrText<Ref<'arena>>) {
self.append_common(
child,
|| sibling.previous_sibling.get(),
|new_node| sibling.insert_before(new_node),
)
}
fn append_based_on_parent_node(
&mut self,
element: &Ref<'arena>,
prev_element: &Ref<'arena>,
child: NodeOrText<Ref<'arena>>,
) {
if element.parent.get().is_some() {
self.append_before_sibling(element, child)
} else {
self.append(prev_element, child)
}
}
fn append_doctype_to_document(
&mut self,
name: StrTendril,
public_id: StrTendril,
system_id: StrTendril,
) {
self.document.append(self.new_node(NodeData::Doctype {
name: name,
public_id: public_id,
system_id: system_id,
}))
}
fn add_attrs_if_missing(&mut self, target: &Ref<'arena>, attrs: Vec<Attribute>) {
let mut existing = if let NodeData::Element { ref attrs, .. } = target.data {
attrs.borrow_mut()
} else {
panic!("not an element")
};
let existing_names = existing
.iter()
.map(|e| e.name.clone())
.collect::<HashSet<_>>();
existing.extend(
attrs
.into_iter()
.filter(|attr| !existing_names.contains(&attr.name)),
);
}
fn remove_from_parent(&mut self, target: &Ref<'arena>) {
target.detach()
}
fn reparent_children(&mut self, node: &Ref<'arena>, new_parent: &Ref<'arena>) {
let mut next_child = node.first_child.get();
while let Some(child) = next_child {
debug_assert!(ptr::eq::<Node>(child.parent.get().unwrap(), *node));
next_child = child.next_sibling.get();
new_parent.append(child)
}
}
}
// Implementation adapted from implementation for RcDom:
// https://github.com/servo/html5ever/blob/45b2fca5c6/markup5ever/rcdom.rs#L410
impl<'arena> Serialize for Node<'arena> {
fn serialize<S>(&self, serializer: &mut S, traversal_scope: TraversalScope) -> io::Result<()>
where
S: Serializer,
{
match (&traversal_scope, &self.data) {
(
_,
&NodeData::Element {
ref name,
ref attrs,
..
},
) => {
if traversal_scope == IncludeNode {
serializer.start_elem(
name.clone(),
attrs.borrow().iter().map(|at| (&at.name, &at.value[..])),
)?;
}
if let Some(child) = self.first_child.get() {
child.serialize(serializer, IncludeNode)?;
}
if traversal_scope == IncludeNode {
serializer.end_elem(name.clone())?;
}
}
(&ChildrenOnly(_), &NodeData::Document) => {
if let Some(child) = self.first_child.get() {
child.serialize(serializer, IncludeNode)?;
}
}
(&ChildrenOnly(_), _) => {},
(&IncludeNode, &NodeData::Doctype { ref name, .. }) => serializer.write_doctype(&name)?,
(&IncludeNode, &NodeData::Text { ref contents }) => {
serializer.write_text(&contents.borrow())?
}
(&IncludeNode, &NodeData::Comment { ref contents }) => {
serializer.write_comment(&contents)?
}
(
&IncludeNode,
&NodeData::ProcessingInstruction {
ref target,
ref contents,
},
) => serializer.write_processing_instruction(target, contents)?,
(&IncludeNode, &NodeData::Document) => panic!("Can't serialize Document node itself"),
}
if let Some(sibling) = self.next_sibling.get() {
sibling.serialize(serializer, IncludeNode)?
}
Ok(())
}
}

72
src/config/basic.rs Normal file
View File

@ -0,0 +1,72 @@
use html5ever::LocalName;
use std::collections::{HashMap, HashSet};
lazy_static! {
pub static ref ELEMENTS: HashSet<LocalName> = hashset! {
local_name!("a"),
local_name!("abbr"),
local_name!("blockquote"),
local_name!("br"),
local_name!("cite"),
local_name!("code"),
local_name!("dd"),
local_name!("dfn"),
local_name!("dl"),
local_name!("dt"),
local_name!("kbd"),
local_name!("li"),
local_name!("mark"),
local_name!("ol"),
local_name!("p"),
local_name!("pre"),
local_name!("q"),
local_name!("s"),
local_name!("samp"),
local_name!("small"),
local_name!("strike"),
local_name!("sub"),
local_name!("sup"),
local_name!("time"),
local_name!("ul"),
local_name!("var"),
};
pub static ref ALL_ATTRIBUTES: HashSet<LocalName> = hashset! {};
pub static ref ATTRIBUTES: HashMap<LocalName, HashSet<LocalName>> = hashmap! {
local_name!("a") => hashset!{
local_name!("href"),
},
local_name!("abbr") => hashset!{
local_name!("title"),
},
local_name!("blockquote") => hashset!{
local_name!("cite"),
},
local_name!("dfn") => hashset!{
local_name!("title"),
},
local_name!("q") => hashset!{
local_name!("cite"),
},
local_name!("time") => hashset!{
local_name!("datetime"),
LocalName::from("pubdate"),
},
};
pub static ref ADD_ATTRIBUTES: HashMap<LocalName, HashMap<LocalName, &'static str>> = hashmap! {
local_name!("a") => hashmap! {
local_name!("rel") => "nofollow",
},
};
pub static ref PROTOCOLS: HashMap<LocalName, HashMap<LocalName, HashSet<&'static str>>> = hashmap! {
local_name!("a") => hashmap! {
local_name!("href") => hashset!{"ftp", "http", "https", "mailto"},
},
local_name!("blockquote") => hashmap! {
local_name!("cite") => hashset!{"http", "https"},
},
local_name!("q") => hashmap! {
local_name!("cite") => hashset!{"http", "https"},
},
};
}

3
src/config/mod.rs Normal file
View File

@ -0,0 +1,3 @@
pub mod basic;
pub mod relaxed;
pub mod permissive;

177
src/config/permissive.rs Normal file
View File

@ -0,0 +1,177 @@
use html5ever::LocalName;
use std::collections::{HashMap, HashSet};
use super::relaxed::{
ADD_ATTRIBUTES as RELAXED_ADD_ATTRIBUTES, ALL_ATTRIBUTES as RELAXED_ALL_ATTRIBUTES,
ATTRIBUTES as RELAXED_ATTRIBUTES, ELEMENTS as RELAXED_ELEMENTS,
};
lazy_static! {
pub static ref ELEMENTS: HashSet<LocalName> = RELAXED_ELEMENTS
.union(&hashset!(
local_name!("acronym"),
local_name!("basefont"),
local_name!("big"),
local_name!("blink"),
local_name!("center"),
LocalName::from("command"),
local_name!("dir"),
local_name!("font"),
local_name!("marquee"),
local_name!("strike"),
local_name!("tt"),
local_name!("form"),
local_name!("input"),
local_name!("button"),
LocalName::from("single"),
LocalName::from("double"),
))
.into_iter()
.cloned()
.collect();
pub static ref ALL_ATTRIBUTES: HashSet<LocalName> = RELAXED_ALL_ATTRIBUTES
.union(&hashset! {
local_name!("bgcolor"),
local_name!("width"),
local_name!("height"),
local_name!("border"),
local_name!("color"),
local_name!("background"),
})
.into_iter()
.cloned()
.collect();
// Can't figure out how to merge HashMaps :(
pub static ref ATTRIBUTES: HashMap<LocalName, HashSet<LocalName>> = hashmap! {
local_name!("a") => hashset!{
local_name!("href"),
local_name!("hreflang"),
local_name!("name"),
local_name!("rel"),
},
local_name!("abbr") => hashset!{
local_name!("title"),
},
local_name!("blockquote") => hashset!{
local_name!("cite"),
},
local_name!("button") => hashset!{
local_name!("type"),
},
local_name!("col") => hashset!{
local_name!("span"),
local_name!("width"),
},
local_name!("colgroup") => hashset!{
local_name!("span"),
local_name!("width"),
},
local_name!("data") => hashset!{
local_name!("value"),
},
local_name!("del") => hashset!{
local_name!("cite"),
local_name!("datetime"),
},
local_name!("dfn") => hashset!{
local_name!("title"),
},
local_name!("img") => hashset!{
local_name!("align"),
local_name!("alt"),
local_name!("border"),
local_name!("height"),
local_name!("src"),
local_name!("srcset"),
local_name!("width"),
},
local_name!("input") => hashset!{
local_name!("type"),
local_name!("name"),
local_name!("value"),
},
local_name!("ins") => hashset!{
local_name!("cite"),
local_name!("datetime"),
},
local_name!("li") => hashset!{
local_name!("value"),
},
local_name!("ol") => hashset!{
LocalName::from("reversed"),
local_name!("start"),
local_name!("type"),
},
local_name!("q") => hashset!{
local_name!("cite"),
},
local_name!("style") => hashset!{
local_name!("media"),
local_name!("scoped"),
local_name!("type"),
},
local_name!("table") => hashset!{
local_name!("align"),
local_name!("bgcolor"),
local_name!("border"),
local_name!("cellpadding"),
local_name!("cellspacing"),
local_name!("frame"),
local_name!("rules"),
LocalName::from("sortable"),
local_name!("summary"),
local_name!("width"),
},
local_name!("td") => hashset!{
local_name!("abbr"),
local_name!("align"),
local_name!("axis"),
local_name!("colspan"),
local_name!("headers"),
local_name!("rowspan"),
local_name!("valign"),
local_name!("width"),
},
local_name!("th") => hashset!{
local_name!("abbr"),
local_name!("align"),
local_name!("axis"),
local_name!("colspan"),
local_name!("headers"),
local_name!("rowspan"),
local_name!("scope"),
LocalName::from("sorted"),
local_name!("valign"),
local_name!("width"),
},
local_name!("time") => hashset!{
local_name!("datetime"),
LocalName::from("pubdate"),
},
local_name!("ul") => hashset!{
local_name!("type"),
},
};
pub static ref ADD_ATTRIBUTES: HashMap<LocalName, HashMap<LocalName, &'static str>> = RELAXED_ADD_ATTRIBUTES.clone();
pub static ref PROTOCOLS: HashMap<LocalName, HashMap<LocalName, HashSet<&'static str>>> = hashmap! {
local_name!("a") => hashmap! {
local_name!("href") => hashset!{"ftp", "http", "https", "mailto"},
},
local_name!("blockquote") => hashmap! {
local_name!("cite") => hashset!{"http", "https"},
},
local_name!("del") => hashmap! {
local_name!("cite") => hashset!{"http", "https"},
},
local_name!("img") => hashmap! {
local_name!("src") => hashset!{"http", "https"},
},
local_name!("ins") => hashmap! {
local_name!("cite") => hashset!{"http", "https"},
},
local_name!("q") => hashmap! {
local_name!("cite") => hashset!{"http", "https"},
},
};
}

199
src/config/relaxed.rs Normal file
View File

@ -0,0 +1,199 @@
use html5ever::LocalName;
use std::collections::{HashMap, HashSet};
use super::basic::{
ADD_ATTRIBUTES as BASIC_ADD_ATTRIBUTES, ALL_ATTRIBUTES as BASIC_ALL_ATTRIBUTES,
ATTRIBUTES as BASIC_ATTRIBUTES, ELEMENTS as BASIC_ELEMENTS,
};
lazy_static! {
pub static ref ELEMENTS: HashSet<LocalName> = BASIC_ELEMENTS
.union(&hashset!(
local_name!("address"),
local_name!("article"),
local_name!("aside"),
local_name!("bdi"),
local_name!("bdo"),
local_name!("body"),
local_name!("caption"),
local_name!("col"),
local_name!("colgroup"),
local_name!("data"),
local_name!("del"),
local_name!("div"),
local_name!("figcaption"),
local_name!("figure"),
local_name!("footer"),
local_name!("h1"),
local_name!("h2"),
local_name!("h3"),
local_name!("h4"),
local_name!("h5"),
local_name!("h6"),
local_name!("head"),
local_name!("header"),
local_name!("hgroup"),
local_name!("hr"),
local_name!("html"),
local_name!("img"),
local_name!("ins"),
local_name!("main"),
local_name!("nav"),
local_name!("rp"),
local_name!("rt"),
local_name!("ruby"),
local_name!("section"),
local_name!("span"),
local_name!("style"),
local_name!("summary"),
local_name!("sup"),
local_name!("table"),
local_name!("tbody"),
local_name!("td"),
local_name!("tfoot"),
local_name!("th"),
local_name!("thead"),
local_name!("title"),
local_name!("tr"),
local_name!("wbr"),
))
.into_iter()
.cloned()
.collect();
pub static ref ALL_ATTRIBUTES: HashSet<LocalName> = BASIC_ALL_ATTRIBUTES.union(&hashset! {
local_name!("class"),
local_name!("dir"),
local_name!("hidden"),
local_name!("id"),
local_name!("lang"),
local_name!("style"),
local_name!("tabindex"),
local_name!("title"),
LocalName::from("translate"),
}).into_iter().cloned().collect();
// Can't figure out how to merge HashMaps :(
pub static ref ATTRIBUTES: HashMap<LocalName, HashSet<LocalName>> = hashmap! {
local_name!("a") => hashset!{
local_name!("href"),
local_name!("hreflang"),
local_name!("name"),
local_name!("rel"),
},
local_name!("abbr") => hashset!{
local_name!("title"),
},
local_name!("blockquote") => hashset!{
local_name!("cite"),
},
local_name!("col") => hashset!{
local_name!("span"),
local_name!("width"),
},
local_name!("colgroup") => hashset!{
local_name!("span"),
local_name!("width"),
},
local_name!("data") => hashset!{
local_name!("value"),
},
local_name!("del") => hashset!{
local_name!("cite"),
local_name!("datetime"),
},
local_name!("dfn") => hashset!{
local_name!("title"),
},
local_name!("img") => hashset!{
local_name!("align"),
local_name!("alt"),
local_name!("border"),
local_name!("height"),
local_name!("src"),
local_name!("srcset"),
local_name!("width"),
},
local_name!("ins") => hashset!{
local_name!("cite"),
local_name!("datetime"),
},
local_name!("li") => hashset!{
local_name!("value"),
},
local_name!("ol") => hashset!{
LocalName::from("reversed"),
local_name!("start"),
local_name!("type"),
},
local_name!("q") => hashset!{
local_name!("cite"),
},
local_name!("style") => hashset!{
local_name!("media"),
local_name!("scoped"),
local_name!("type"),
},
local_name!("table") => hashset!{
local_name!("align"),
local_name!("bgcolor"),
local_name!("border"),
local_name!("cellpadding"),
local_name!("cellspacing"),
local_name!("frame"),
local_name!("rules"),
LocalName::from("sortable"),
local_name!("summary"),
local_name!("width"),
},
local_name!("td") => hashset!{
local_name!("abbr"),
local_name!("align"),
local_name!("axis"),
local_name!("colspan"),
local_name!("headers"),
local_name!("rowspan"),
local_name!("valign"),
local_name!("width"),
},
local_name!("th") => hashset!{
local_name!("abbr"),
local_name!("align"),
local_name!("axis"),
local_name!("colspan"),
local_name!("headers"),
local_name!("rowspan"),
local_name!("scope"),
LocalName::from("sorted"),
local_name!("valign"),
local_name!("width"),
},
local_name!("time") => hashset!{
local_name!("datetime"),
LocalName::from("pubdate"),
},
local_name!("ul") => hashset!{
local_name!("type"),
},
};
pub static ref ADD_ATTRIBUTES: HashMap<LocalName, HashMap<LocalName, &'static str>> = BASIC_ADD_ATTRIBUTES.clone();
pub static ref PROTOCOLS: HashMap<LocalName, HashMap<LocalName, HashSet<&'static str>>> = hashmap! {
local_name!("a") => hashmap! {
local_name!("href") => hashset!{"ftp", "http", "https", "mailto"},
},
local_name!("blockquote") => hashmap! {
local_name!("cite") => hashset!{"http", "https"},
},
local_name!("del") => hashmap! {
local_name!("cite") => hashset!{"http", "https"},
},
local_name!("img") => hashmap! {
local_name!("src") => hashset!{"http", "https"},
},
local_name!("ins") => hashmap! {
local_name!("cite") => hashset!{"http", "https"},
},
local_name!("q") => hashmap! {
local_name!("cite") => hashset!{"http", "https"},
},
};
}

164
src/main.rs Normal file
View File

@ -0,0 +1,164 @@
#[macro_use]
extern crate lazy_static;
#[macro_use]
extern crate html5ever;
#[macro_use]
extern crate maplit;
extern crate typed_arena;
use std::collections::HashSet;
use std::default::Default;
use std::io::{self, Read};
use html5ever::tendril::StrTendril;
use html5ever::{serialize, Attribute, LocalName, QualName};
use url::{ParseError, Url};
mod arena_dom;
mod config;
use arena_dom::{create_element, html5ever_parse_slice_into_arena, Arena, NodeData, Ref};
use config::permissive::{ADD_ATTRIBUTES, ALL_ATTRIBUTES, ATTRIBUTES, ELEMENTS, PROTOCOLS};
fn main() {
let mut bytes = Vec::new();
io::stdin().read_to_end(&mut bytes).unwrap();
let arena = typed_arena::Arena::new();
let doc = html5ever_parse_slice_into_arena(&bytes, &arena);
sanitize(doc, &arena);
serialize(&mut io::stdout(), doc, Default::default())
.ok()
.expect("serialization failed")
}
fn sanitize<'arena>(node: Ref<'arena>, arena: Arena<'arena>) {
if let Some(unwrapped) = maybe_unwrap_node(&node) {
if let Some(unwrapped_node) = unwrapped {
return sanitize(unwrapped_node, arena);
} else {
return;
}
}
transform_node(&node, arena);
if let Some(child) = node.first_child.get() {
sanitize(child, arena);
}
if let Some(sibling) = node.next_sibling.get() {
sanitize(sibling, arena);
}
}
// TODO: make separate rich and plain transformers
// TODO: add whitelist of tags, remove any not in it DONE
// TODO: add whitelist of attributes, remove any not in it DONE
// TODO: add map of tags to attributes, remove any on tag not in the mapped value DONE
// TODO: add whitelist of url schemes, parse urls and remove any not in it DONE
// TODO: strip comments DONE
// TODO: parse style tags and attributes
// TODO: add whitelist of CSS properties, remove any not in it
// TODO: scope selectors in rich formatter
// TODO: add class attributes to elements in rich formatter
fn transform_node<'arena>(node: Ref<'arena>, arena: Arena<'arena>) {
match node.data {
NodeData::Document
| NodeData::Doctype { .. }
| NodeData::Text { .. }
| NodeData::Comment { .. }
| NodeData::ProcessingInstruction { .. } => {}
NodeData::Element {
ref attrs,
ref name,
..
} => {
let ref mut attrs = attrs.borrow_mut();
let mut allowed_attrs: HashSet<LocalName> = ALL_ATTRIBUTES.clone();
if let Some(element_attrs) = ATTRIBUTES.get(&name.local) {
allowed_attrs = allowed_attrs
.union(element_attrs)
.into_iter()
.cloned()
.collect();
}
let mut i = 0;
while i != attrs.len() {
if !allowed_attrs.contains(&attrs[i].name.local) {
attrs.remove(i);
} else {
i += 1;
}
}
if let Some(add_attributes) = ADD_ATTRIBUTES.get(&name.local) {
for (name, &value) in add_attributes.iter() {
attrs.push(Attribute {
name: QualName::new(None, ns!(), name.clone()),
value: StrTendril::from(value),
});
}
}
if let Some(protocols) = PROTOCOLS.get(&name.local) {
let mut i = 0;
while i != attrs.len() {
if let Some(allowed_protocols) = protocols.get(&attrs[i].name.local) {
match Url::parse(&attrs[i].value) {
Ok(url) => {
if !allowed_protocols.contains(url.scheme()) {
attrs.remove(i);
} else {
i += 1;
}
}
Err(ParseError::RelativeUrlWithoutBase) => {
attrs[i].value =
StrTendril::from(format!("http://{}", attrs[i].value));
i += 1;
}
Err(_) => {
attrs.remove(i);
}
}
} else {
i += 1;
}
}
}
match name.local {
local_name!("ul") => {
node.insert_before(create_element(
arena,
QualName::new(None, ns!(), LocalName::from("single")),
));
node.insert_after(create_element(
arena,
QualName::new(None, ns!(), LocalName::from("single")),
));
}
_ => {}
}
}
}
}
fn maybe_unwrap_node<'arena>(node: Ref<'arena>) -> Option<Option<Ref<'arena>>> {
match node.data {
NodeData::Document
| NodeData::Doctype { .. }
| NodeData::Text { .. }
| NodeData::ProcessingInstruction { .. } => None,
NodeData::Comment { .. } => Some(node.unwrap()),
NodeData::Element { ref name, .. } => {
if !ELEMENTS.contains(&name.local) {
Some(node.unwrap())
} else {
None
}
}
}
}