Fetch and save entry HTML content with metadata
And render the extracted HTML on the entry page in the frontend.
This commit is contained in:
parent
786f3a194f
commit
3f29138bd1
488
Cargo.lock
generated
488
Cargo.lock
generated
@ -2,6 +2,12 @@
|
||||
# It is not intended for manual editing.
|
||||
version = 3
|
||||
|
||||
[[package]]
|
||||
name = "adler"
|
||||
version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
|
||||
|
||||
[[package]]
|
||||
name = "ahash"
|
||||
version = "0.7.6"
|
||||
@ -22,6 +28,21 @@ dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "alloc-no-stdlib"
|
||||
version = "2.0.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3"
|
||||
|
||||
[[package]]
|
||||
name = "alloc-stdlib"
|
||||
version = "0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece"
|
||||
dependencies = [
|
||||
"alloc-no-stdlib",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "android_system_properties"
|
||||
version = "0.1.5"
|
||||
@ -75,6 +96,43 @@ version = "0.1.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "64cb94155d965e3d37ffbbe7cc5b82c3dd79dd33bd48e536f73d2cfb8d85506f"
|
||||
|
||||
[[package]]
|
||||
name = "article_scraper"
|
||||
version = "2.0.0-alpha.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9b0058ad303af21c937736589e6f639eb8b3c64ef3f09e7022e3be8885ae93e8"
|
||||
dependencies = [
|
||||
"base64 0.21.0",
|
||||
"chrono",
|
||||
"encoding_rs",
|
||||
"escaper",
|
||||
"futures",
|
||||
"image",
|
||||
"libxml",
|
||||
"log",
|
||||
"once_cell",
|
||||
"regex",
|
||||
"reqwest",
|
||||
"rust-embed",
|
||||
"thiserror",
|
||||
"tokio",
|
||||
"url",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "async-compression"
|
||||
version = "0.3.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "942c7cd7ae39e91bde4820d74132e9862e62c2f386c3aa90ccf55949f5bad63a"
|
||||
dependencies = [
|
||||
"brotli",
|
||||
"flate2",
|
||||
"futures-core",
|
||||
"memchr",
|
||||
"pin-project-lite",
|
||||
"tokio",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "async-trait"
|
||||
version = "0.1.68"
|
||||
@ -162,6 +220,12 @@ version = "0.21.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a4a4ddaa51a5bc52a6948f74c06d20aaaddb71924eab79b8c97a8c556e942d6a"
|
||||
|
||||
[[package]]
|
||||
name = "bit_field"
|
||||
version = "0.10.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dc827186963e592360843fb5ba4b973e145841266c1357f7180c43526f2e5b61"
|
||||
|
||||
[[package]]
|
||||
name = "bitflags"
|
||||
version = "1.3.2"
|
||||
@ -177,12 +241,39 @@ dependencies = [
|
||||
"generic-array",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "brotli"
|
||||
version = "3.3.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a1a0b1dbcc8ae29329621f8d4f0d835787c1c38bb1401979b49d13b0b305ff68"
|
||||
dependencies = [
|
||||
"alloc-no-stdlib",
|
||||
"alloc-stdlib",
|
||||
"brotli-decompressor",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "brotli-decompressor"
|
||||
version = "2.3.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4b6561fd3f895a11e8f72af2cb7d22e08366bebc2b6b57f7744c4bda27034744"
|
||||
dependencies = [
|
||||
"alloc-no-stdlib",
|
||||
"alloc-stdlib",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bumpalo"
|
||||
version = "3.12.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9b1ce199063694f33ffb7dd4e0ee620741495c32833cde5aa08f02a0bf96f0c8"
|
||||
|
||||
[[package]]
|
||||
name = "bytemuck"
|
||||
version = "1.13.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "17febce684fd15d89027105661fec94afb475cb995fbc59d2865198446ba2eea"
|
||||
|
||||
[[package]]
|
||||
name = "byteorder"
|
||||
version = "1.4.3"
|
||||
@ -233,6 +324,12 @@ dependencies = [
|
||||
"unicode-width",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "color_quant"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3d7b894f5411737b7867f4827955924d7c254fc9f4d91a6aad6b097804b1018b"
|
||||
|
||||
[[package]]
|
||||
name = "core-foundation"
|
||||
version = "0.9.3"
|
||||
@ -265,6 +362,7 @@ dependencies = [
|
||||
"ansi-to-html",
|
||||
"anyhow",
|
||||
"argh",
|
||||
"article_scraper",
|
||||
"axum",
|
||||
"bytes",
|
||||
"chrono",
|
||||
@ -304,6 +402,15 @@ version = "2.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9cace84e55f07e7301bae1c519df89cdad8cc3cd868413d3fdbdeca9ff3db484"
|
||||
|
||||
[[package]]
|
||||
name = "crc32fast"
|
||||
version = "1.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-channel"
|
||||
version = "0.5.8"
|
||||
@ -314,6 +421,30 @@ dependencies = [
|
||||
"crossbeam-utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-deque"
|
||||
version = "0.8.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"crossbeam-epoch",
|
||||
"crossbeam-utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-epoch"
|
||||
version = "0.9.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "46bd5f3f85273295a9d14aedfb86f6aadbff6d8f5295c4a9edb08e819dcf5695"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"cfg-if",
|
||||
"crossbeam-utils",
|
||||
"memoffset",
|
||||
"scopeguard",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-queue"
|
||||
version = "0.3.8"
|
||||
@ -333,6 +464,12 @@ dependencies = [
|
||||
"cfg-if",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crunchy"
|
||||
version = "0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7"
|
||||
|
||||
[[package]]
|
||||
name = "crypto-common"
|
||||
version = "0.1.6"
|
||||
@ -474,6 +611,12 @@ dependencies = [
|
||||
"cfg-if",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "entities"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b5320ae4c3782150d900b79807611a59a99fc9a1d61d686faafc24b93fc8d7ca"
|
||||
|
||||
[[package]]
|
||||
name = "errno"
|
||||
version = "0.3.1"
|
||||
@ -495,12 +638,37 @@ dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "escaper"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a53eb97b7349ba1bdb31839eceafe9aaae8f1d8d944dc589b67fb0b26e1c1666"
|
||||
dependencies = [
|
||||
"entities",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "event-listener"
|
||||
version = "2.5.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0206175f82b8d6bf6652ff7d71a1e27fd2e4efde587fd368662814d6ec1d9ce0"
|
||||
|
||||
[[package]]
|
||||
name = "exr"
|
||||
version = "1.6.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "279d3efcc55e19917fff7ab3ddd6c14afb6a90881a0078465196fe2f99d08c56"
|
||||
dependencies = [
|
||||
"bit_field",
|
||||
"flume",
|
||||
"half",
|
||||
"lebe",
|
||||
"miniz_oxide",
|
||||
"rayon-core",
|
||||
"smallvec",
|
||||
"zune-inflate",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fastrand"
|
||||
version = "1.9.0"
|
||||
@ -510,6 +678,15 @@ dependencies = [
|
||||
"instant",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fdeflate"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d329bdeac514ee06249dabc27877490f17f5d371ec693360768b838e19f3ae10"
|
||||
dependencies = [
|
||||
"simd-adler32",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "feed-rs"
|
||||
version = "1.3.0"
|
||||
@ -540,6 +717,29 @@ dependencies = [
|
||||
"windows-sys 0.48.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "flate2"
|
||||
version = "1.0.26"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3b9429470923de8e8cbd4d2dc513535400b4b3fef0319fb5c4e1f520a7bef743"
|
||||
dependencies = [
|
||||
"crc32fast",
|
||||
"miniz_oxide",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "flume"
|
||||
version = "0.10.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1657b4441c3403d9f7b3409e47575237dac27b1b5726df654a6ecbf92f0f7577"
|
||||
dependencies = [
|
||||
"futures-core",
|
||||
"futures-sink",
|
||||
"nanorand",
|
||||
"pin-project",
|
||||
"spin",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fnv"
|
||||
version = "1.0.7"
|
||||
@ -579,6 +779,21 @@ dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "futures"
|
||||
version = "0.3.28"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "23342abe12aba583913b2e62f22225ff9c950774065e4bfb61a19cd9770fec40"
|
||||
dependencies = [
|
||||
"futures-channel",
|
||||
"futures-core",
|
||||
"futures-executor",
|
||||
"futures-io",
|
||||
"futures-sink",
|
||||
"futures-task",
|
||||
"futures-util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "futures-channel"
|
||||
version = "0.3.28"
|
||||
@ -595,6 +810,17 @@ version = "0.3.28"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4bca583b7e26f571124fe5b7561d49cb2868d79116cfa0eefce955557c6fee8c"
|
||||
|
||||
[[package]]
|
||||
name = "futures-executor"
|
||||
version = "0.3.28"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ccecee823288125bd88b4d7f565c9e58e41858e47ab72e8ea2d64e93624386e0"
|
||||
dependencies = [
|
||||
"futures-core",
|
||||
"futures-task",
|
||||
"futures-util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "futures-intrusive"
|
||||
version = "0.4.2"
|
||||
@ -606,6 +832,23 @@ dependencies = [
|
||||
"parking_lot 0.11.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "futures-io"
|
||||
version = "0.3.28"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4fff74096e71ed47f8e023204cfd0aa1289cd54ae5430a9523be060cdb849964"
|
||||
|
||||
[[package]]
|
||||
name = "futures-macro"
|
||||
version = "0.3.28"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.15",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "futures-sink"
|
||||
version = "0.3.28"
|
||||
@ -624,11 +867,16 @@ version = "0.3.28"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "26b01e40b772d54cf6c6d721c1d1abd0647a0106a12ecaa1c186273392a69533"
|
||||
dependencies = [
|
||||
"futures-channel",
|
||||
"futures-core",
|
||||
"futures-io",
|
||||
"futures-macro",
|
||||
"futures-sink",
|
||||
"futures-task",
|
||||
"memchr",
|
||||
"pin-project-lite",
|
||||
"pin-utils",
|
||||
"slab",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -648,8 +896,20 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c85e1d9ab2eadba7e5040d4e09cbd6d072b76a557ad64e797c2cb9d4da21d7e4"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"js-sys",
|
||||
"libc",
|
||||
"wasi 0.11.0+wasi-snapshot-preview1",
|
||||
"wasm-bindgen",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gif"
|
||||
version = "0.12.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "80792593675e051cf94a4b111980da2ba60d4a83e43e0048c5693baab3977045"
|
||||
dependencies = [
|
||||
"color_quant",
|
||||
"weezl",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -671,6 +931,15 @@ dependencies = [
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "half"
|
||||
version = "2.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "02b4af3693f1b705df946e9fe5631932443781d0aabb423b62fcd4d73f6d2fd0"
|
||||
dependencies = [
|
||||
"crunchy",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.12.3"
|
||||
@ -871,6 +1140,25 @@ version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cb56e1aa765b4b4f3aadfab769793b7087bb03a4ea4920644a6d238e2df5b9ed"
|
||||
|
||||
[[package]]
|
||||
name = "image"
|
||||
version = "0.24.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "527909aa81e20ac3a44803521443a765550f09b5130c2c2fa1ea59c2f8f50a3a"
|
||||
dependencies = [
|
||||
"bytemuck",
|
||||
"byteorder",
|
||||
"color_quant",
|
||||
"exr",
|
||||
"gif",
|
||||
"jpeg-decoder",
|
||||
"num-rational",
|
||||
"num-traits",
|
||||
"png",
|
||||
"qoi",
|
||||
"tiff",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "indexmap"
|
||||
version = "1.9.3"
|
||||
@ -943,6 +1231,15 @@ version = "1.0.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "453ad9f582a441959e5f0d088b02ce04cfe8d51a8eaf077f12ac6d3e94164ca6"
|
||||
|
||||
[[package]]
|
||||
name = "jpeg-decoder"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bc0000e42512c92e31c2252315bda326620a4e034105e900c98ec492fa077b3e"
|
||||
dependencies = [
|
||||
"rayon",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "js-sys"
|
||||
version = "0.3.61"
|
||||
@ -978,12 +1275,29 @@ version = "1.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
||||
|
||||
[[package]]
|
||||
name = "lebe"
|
||||
version = "0.5.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "03087c2bad5e1034e8cace5926dec053fb3790248370865f5117a7d0213354c8"
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.143"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "edc207893e85c5d6be840e969b496b53d94cec8be2d501b214f50daa97fa8024"
|
||||
|
||||
[[package]]
|
||||
name = "libxml"
|
||||
version = "0.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ca310e4db05e9b7386ad0734975fabc912b66f6bc50a98f525e690822da1ee0e"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"pkg-config",
|
||||
"vcpkg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "link-cplusplus"
|
||||
version = "1.0.8"
|
||||
@ -1078,6 +1392,15 @@ version = "2.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
|
||||
|
||||
[[package]]
|
||||
name = "memoffset"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d61c719bcfbcf5d62b3a09efa6088de8c54bc0bfcd3ea7ae39fcc186108b8de1"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "mime"
|
||||
version = "0.3.17"
|
||||
@ -1090,6 +1413,16 @@ version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
|
||||
|
||||
[[package]]
|
||||
name = "miniz_oxide"
|
||||
version = "0.7.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7"
|
||||
dependencies = [
|
||||
"adler",
|
||||
"simd-adler32",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "mio"
|
||||
version = "0.8.6"
|
||||
@ -1102,6 +1435,15 @@ dependencies = [
|
||||
"windows-sys 0.45.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nanorand"
|
||||
version = "0.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6a51313c5820b0b02bd422f4b44776fbf47961755c74ce64afc73bfad10226c3"
|
||||
dependencies = [
|
||||
"getrandom",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "native-tls"
|
||||
version = "0.2.11"
|
||||
@ -1168,6 +1510,17 @@ dependencies = [
|
||||
"num-traits",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num-rational"
|
||||
version = "0.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0638a1c9d0a3c0914158145bc76cff373a75a627e6ecbfb71cbe6f453a5a19b0"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"num-integer",
|
||||
"num-traits",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num-traits"
|
||||
version = "0.2.15"
|
||||
@ -1341,6 +1694,19 @@ version = "0.3.27"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964"
|
||||
|
||||
[[package]]
|
||||
name = "png"
|
||||
version = "0.17.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "aaeebc51f9e7d2c150d3f3bfeb667f2aa985db5ef1e3d212847bdedb488beeaa"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"crc32fast",
|
||||
"fdeflate",
|
||||
"flate2",
|
||||
"miniz_oxide",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ppv-lite86"
|
||||
version = "0.2.17"
|
||||
@ -1380,6 +1746,15 @@ dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "qoi"
|
||||
version = "0.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7f6d64c71eb498fe9eae14ce4ec935c555749aef511cca85b5568910d6e48001"
|
||||
dependencies = [
|
||||
"bytemuck",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quick-xml"
|
||||
version = "0.27.1"
|
||||
@ -1429,6 +1804,28 @@ dependencies = [
|
||||
"getrandom",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rayon"
|
||||
version = "1.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1d2df5196e37bcc87abebc0053e20787d73847bb33134a69841207dd0a47f03b"
|
||||
dependencies = [
|
||||
"either",
|
||||
"rayon-core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rayon-core"
|
||||
version = "1.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4b8f95bd6966f5c87776639160a66bd8ab9895d9d4ab01ddba9fc60661aebe8d"
|
||||
dependencies = [
|
||||
"crossbeam-channel",
|
||||
"crossbeam-deque",
|
||||
"crossbeam-utils",
|
||||
"num_cpus",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "redox_syscall"
|
||||
version = "0.2.16"
|
||||
@ -1496,6 +1893,7 @@ version = "0.11.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "13293b639a097af28fc8a90f22add145a9c954e49d77da06263d58cf44d5fb91"
|
||||
dependencies = [
|
||||
"async-compression",
|
||||
"base64 0.21.0",
|
||||
"bytes",
|
||||
"encoding_rs",
|
||||
@ -1519,14 +1917,50 @@ dependencies = [
|
||||
"serde_urlencoded",
|
||||
"tokio",
|
||||
"tokio-native-tls",
|
||||
"tokio-util",
|
||||
"tower-service",
|
||||
"url",
|
||||
"wasm-bindgen",
|
||||
"wasm-bindgen-futures",
|
||||
"wasm-streams",
|
||||
"web-sys",
|
||||
"winreg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rust-embed"
|
||||
version = "6.6.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1b68543d5527e158213414a92832d2aab11a84d2571a5eb021ebe22c43aab066"
|
||||
dependencies = [
|
||||
"rust-embed-impl",
|
||||
"rust-embed-utils",
|
||||
"walkdir",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rust-embed-impl"
|
||||
version = "6.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4d4e0f0ced47ded9a68374ac145edd65a6c1fa13a96447b873660b2a568a0fd7"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"rust-embed-utils",
|
||||
"syn 1.0.109",
|
||||
"walkdir",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rust-embed-utils"
|
||||
version = "7.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "512b0ab6853f7e14e3c8754acb43d6f748bb9ced66aa5915a6553ac8213f7731"
|
||||
dependencies = [
|
||||
"sha2",
|
||||
"walkdir",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustix"
|
||||
version = "0.37.19"
|
||||
@ -1726,6 +2160,12 @@ dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "simd-adler32"
|
||||
version = "0.3.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "238abfbb77c1915110ad968465608b68e869e0772622c9656714e73e5a1a522f"
|
||||
|
||||
[[package]]
|
||||
name = "siphasher"
|
||||
version = "0.3.10"
|
||||
@ -1757,6 +2197,15 @@ dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "spin"
|
||||
version = "0.9.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
|
||||
dependencies = [
|
||||
"lock_api",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sqlformat"
|
||||
version = "0.2.1"
|
||||
@ -1962,6 +2411,17 @@ dependencies = [
|
||||
"once_cell",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tiff"
|
||||
version = "0.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7449334f9ff2baf290d55d73983a7d6fa15e01198faef72af07e2a8db851e471"
|
||||
dependencies = [
|
||||
"flate2",
|
||||
"jpeg-decoder",
|
||||
"weezl",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "time"
|
||||
version = "0.1.45"
|
||||
@ -2445,6 +2905,19 @@ version = "0.2.84"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0046fef7e28c3804e5e38bfa31ea2a0f73905319b677e57ebe37e49358989b5d"
|
||||
|
||||
[[package]]
|
||||
name = "wasm-streams"
|
||||
version = "0.2.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6bbae3363c08332cadccd13b67db371814cd214c2524020932f0804b8cf7c078"
|
||||
dependencies = [
|
||||
"futures-util",
|
||||
"js-sys",
|
||||
"wasm-bindgen",
|
||||
"wasm-bindgen-futures",
|
||||
"web-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "web-sys"
|
||||
version = "0.3.61"
|
||||
@ -2455,6 +2928,12 @@ dependencies = [
|
||||
"wasm-bindgen",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "weezl"
|
||||
version = "0.1.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9193164d4de03a926d909d3bc7c30543cecb35400c02114792c2cae20d5e2dbb"
|
||||
|
||||
[[package]]
|
||||
name = "whoami"
|
||||
version = "1.4.0"
|
||||
@ -2660,3 +3139,12 @@ checksum = "80d0f4e272c85def139476380b12f9ac60926689dd2e01d4923222f40580869d"
|
||||
dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zune-inflate"
|
||||
version = "0.2.54"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "73ab332fe2f6680068f3582b16a24f90ad7096d5d39b974d1c0aff0125116f02"
|
||||
dependencies = [
|
||||
"simd-adler32",
|
||||
]
|
||||
|
@ -14,6 +14,7 @@ path = "src/lib.rs"
|
||||
ansi-to-html = "0.1"
|
||||
anyhow = "1"
|
||||
argh = "0.1"
|
||||
article_scraper = "2.0.0-alpha.0"
|
||||
axum = "0.6"
|
||||
bytes = "1.4"
|
||||
chrono = { version = "0.4", features = ["serde"] }
|
||||
|
@ -18,6 +18,7 @@ CREATE TABLE IF NOT EXISTS "entries" (
|
||||
"title" VARCHAR(255),
|
||||
"url" VARCHAR(2048) NOT NULL,
|
||||
"description" TEXT,
|
||||
"html_content" TEXT,
|
||||
"feed_id" INTEGER REFERENCES "feeds"(id) NOT NULL,
|
||||
"created_at" timestamp(3) NOT NULL,
|
||||
"updated_at" timestamp(3) NOT NULL,
|
||||
|
@ -98,8 +98,6 @@ pub async fn main() -> Result<()> {
|
||||
|
||||
let args: Args = argh::from_env();
|
||||
|
||||
info!("hello?");
|
||||
|
||||
match args.commands {
|
||||
Commands::AddFeed(args) => {
|
||||
let feed = create_feed(
|
||||
@ -125,6 +123,7 @@ pub async fn main() -> Result<()> {
|
||||
title: args.title,
|
||||
url: args.url,
|
||||
description: args.description,
|
||||
html_content: None,
|
||||
feed_id: args.feed_id,
|
||||
},
|
||||
)
|
||||
|
@ -1,6 +1,6 @@
|
||||
use axum::extract::{State, Path};
|
||||
use axum::response::Response;
|
||||
use maud::html;
|
||||
use maud::{html, PreEscaped};
|
||||
use sqlx::PgPool;
|
||||
|
||||
use crate::error::Result;
|
||||
@ -12,7 +12,7 @@ pub async fn get(Path(id): Path<i32>, State(pool): State<PgPool>, layout: Layout
|
||||
Ok(layout.render(html! {
|
||||
@let title = entry.title.unwrap_or_else(|| "Untitled".to_string());
|
||||
h1 { a href=(entry.url) { (title) } }
|
||||
@let description = entry.description.unwrap_or_else(|| "No description".to_string());
|
||||
p { (description) }
|
||||
@let content = entry.html_content.unwrap_or_else(|| "No content".to_string());
|
||||
(PreEscaped(content))
|
||||
}))
|
||||
}
|
||||
|
@ -1,5 +1,6 @@
|
||||
use article_scraper::ArticleScraper;
|
||||
use feed_rs::parser;
|
||||
use reqwest::Client;
|
||||
use reqwest::{Client, Url};
|
||||
use sqlx::PgPool;
|
||||
use tracing::{info, warn};
|
||||
|
||||
@ -9,18 +10,23 @@ use crate::models::entry::{upsert_entries, CreateEntry};
|
||||
/// For every feed in the database, fetches the feed, parses it, and saves new entries to the
|
||||
/// database.
|
||||
pub async fn crawl(pool: &PgPool) -> anyhow::Result<()> {
|
||||
let scraper = ArticleScraper::new(None).await;
|
||||
let client = Client::new();
|
||||
let feeds = get_feeds(pool).await?;
|
||||
for feed in feeds {
|
||||
info!("Fetching feed {}: {}", feed.id, feed.url);
|
||||
let bytes = client.get(feed.url).send().await?.bytes().await?;
|
||||
let parsed_feed = parser::parse(&bytes[..])?;
|
||||
let mut payload = Vec::with_capacity(parsed_feed.entries.len());
|
||||
for entry in parsed_feed.entries {
|
||||
if let Some(link) = entry.links.get(0) {
|
||||
info!("Fetching entry article: {}", link.href);
|
||||
let article = scraper.parse(&Url::parse(&link.href)?, true, &client, None).await?;
|
||||
let entry = CreateEntry {
|
||||
title: entry.title.map(|t| t.content),
|
||||
url: link.href.clone(),
|
||||
description: entry.summary.map(|s| s.content),
|
||||
html_content: article.get_content(),
|
||||
feed_id: feed.id,
|
||||
};
|
||||
payload.push(entry);
|
||||
|
@ -11,6 +11,7 @@ pub struct Entry {
|
||||
pub title: Option<String>,
|
||||
pub url: String,
|
||||
pub description: Option<String>,
|
||||
pub html_content: Option<String>,
|
||||
pub feed_id: i32,
|
||||
pub created_at: NaiveDateTime,
|
||||
pub updated_at: NaiveDateTime,
|
||||
@ -25,6 +26,7 @@ pub struct CreateEntry {
|
||||
pub url: String,
|
||||
#[validate(length(max = 524288))]
|
||||
pub description: Option<String>,
|
||||
pub html_content: Option<String>,
|
||||
#[validate(range(min = 1))]
|
||||
pub feed_id: i32,
|
||||
}
|
||||
@ -52,13 +54,14 @@ pub async fn create_entry(pool: &PgPool, payload: CreateEntry) -> Result<Entry>
|
||||
sqlx::query_as!(
|
||||
Entry,
|
||||
"INSERT INTO entries (
|
||||
title, url, description, feed_id, created_at, updated_at
|
||||
title, url, description, html_content, feed_id, created_at, updated_at
|
||||
) VALUES (
|
||||
$1, $2, $3, $4, now(), now()
|
||||
$1, $2, $3, $4, $5, now(), now()
|
||||
) RETURNING *",
|
||||
payload.title,
|
||||
payload.url,
|
||||
payload.description,
|
||||
payload.html_content,
|
||||
payload.feed_id,
|
||||
)
|
||||
.fetch_one(pool)
|
||||
@ -77,23 +80,26 @@ pub async fn create_entries(pool: &PgPool, payload: Vec<CreateEntry>) -> Result<
|
||||
let mut titles = Vec::with_capacity(payload.len());
|
||||
let mut urls = Vec::with_capacity(payload.len());
|
||||
let mut descriptions: Vec<Option<String>> = Vec::with_capacity(payload.len());
|
||||
let mut html_contents: Vec<Option<String>> = Vec::with_capacity(payload.len());
|
||||
let mut feed_ids = Vec::with_capacity(payload.len());
|
||||
payload.iter().map(|entry| {
|
||||
titles.push(entry.title.clone());
|
||||
urls.push(entry.url.clone());
|
||||
descriptions.push(entry.description.clone());
|
||||
html_contents.push(entry.html_content.clone());
|
||||
feed_ids.push(entry.feed_id);
|
||||
entry.validate()
|
||||
}).collect::<Result<Vec<()>, ValidationErrors>>()?;
|
||||
sqlx::query_as!(
|
||||
Entry,
|
||||
"INSERT INTO entries (
|
||||
title, url, description, feed_id, created_at, updated_at
|
||||
) SELECT *, now(), now() FROM UNNEST($1::text[], $2::text[], $3::text[], $4::int[])
|
||||
title, url, description, html_content, feed_id, created_at, updated_at
|
||||
) SELECT *, now(), now() FROM UNNEST($1::text[], $2::text[], $3::text[], $4::text[], $5::int[])
|
||||
RETURNING *",
|
||||
titles.as_slice() as &[Option<String>],
|
||||
urls.as_slice(),
|
||||
descriptions.as_slice() as &[Option<String>],
|
||||
html_contents.as_slice() as &[Option<String>],
|
||||
feed_ids.as_slice(),
|
||||
)
|
||||
.fetch_all(pool)
|
||||
@ -112,24 +118,27 @@ pub async fn upsert_entries(pool: &PgPool, payload: Vec<CreateEntry>) -> Result<
|
||||
let mut titles = Vec::with_capacity(payload.len());
|
||||
let mut urls = Vec::with_capacity(payload.len());
|
||||
let mut descriptions: Vec<Option<String>> = Vec::with_capacity(payload.len());
|
||||
let mut html_contents: Vec<Option<String>> = Vec::with_capacity(payload.len());
|
||||
let mut feed_ids = Vec::with_capacity(payload.len());
|
||||
payload.iter().map(|entry| {
|
||||
titles.push(entry.title.clone());
|
||||
urls.push(entry.url.clone());
|
||||
descriptions.push(entry.description.clone());
|
||||
html_contents.push(entry.html_content.clone());
|
||||
feed_ids.push(entry.feed_id);
|
||||
entry.validate()
|
||||
}).collect::<Result<Vec<()>, ValidationErrors>>()?;
|
||||
sqlx::query_as!(
|
||||
Entry,
|
||||
"INSERT INTO entries (
|
||||
title, url, description, feed_id, created_at, updated_at
|
||||
) SELECT *, now(), now() FROM UNNEST($1::text[], $2::text[], $3::text[], $4::int[])
|
||||
title, url, description, html_content, feed_id, created_at, updated_at
|
||||
) SELECT *, now(), now() FROM UNNEST($1::text[], $2::text[], $3::text[], $4::text[], $5::int[])
|
||||
ON CONFLICT DO NOTHING
|
||||
RETURNING *",
|
||||
titles.as_slice() as &[Option<String>],
|
||||
urls.as_slice(),
|
||||
descriptions.as_slice() as &[Option<String>],
|
||||
html_contents.as_slice() as &[Option<String>],
|
||||
feed_ids.as_slice(),
|
||||
)
|
||||
.fetch_all(pool)
|
||||
|
Loading…
Reference in New Issue
Block a user