Compare commits
7 Commits
Author | SHA1 | Date | |
---|---|---|---|
7a8f7dc415 | |||
e41085425a | |||
6912ef9017 | |||
65eac1975c | |||
9c75a88c69 | |||
a3450e202a | |||
764d3f23b8 |
264
Cargo.lock
generated
264
Cargo.lock
generated
@ -138,6 +138,73 @@ version = "1.0.83"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "25bdb32cbbdce2b519a9cd7df3a678443100e265d5e25ca763b7572a5104f5f3"
|
checksum = "25bdb32cbbdce2b519a9cd7df3a678443100e265d5e25ca763b7572a5104f5f3"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "apalis"
|
||||||
|
version = "0.6.0-rc.7"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7fe9f6044555ce7984b4dff510f869c25ee8258d13277eadcfee11efa0a827a0"
|
||||||
|
dependencies = [
|
||||||
|
"apalis-core",
|
||||||
|
"futures",
|
||||||
|
"pin-project-lite",
|
||||||
|
"serde",
|
||||||
|
"thiserror",
|
||||||
|
"tokio",
|
||||||
|
"tower",
|
||||||
|
"tracing",
|
||||||
|
"tracing-futures",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "apalis-core"
|
||||||
|
version = "0.6.0-rc.7"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1f7959df1edc75df26a1ee13a46d1b59fd63ba6642a6c29039583f5aedfd13aa"
|
||||||
|
dependencies = [
|
||||||
|
"async-oneshot",
|
||||||
|
"futures",
|
||||||
|
"futures-timer",
|
||||||
|
"pin-project-lite",
|
||||||
|
"serde",
|
||||||
|
"serde_json",
|
||||||
|
"thiserror",
|
||||||
|
"tower",
|
||||||
|
"ulid",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "apalis-cron"
|
||||||
|
version = "0.6.0-rc.7"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "821d4b219ed2fa48e6ff722f23a97db3a70dcb0beeb96d3cf9391d88d23050c2"
|
||||||
|
dependencies = [
|
||||||
|
"apalis-core",
|
||||||
|
"async-stream",
|
||||||
|
"chrono",
|
||||||
|
"cron",
|
||||||
|
"futures",
|
||||||
|
"tower",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "apalis-redis"
|
||||||
|
version = "0.6.0-rc.7"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "405b6457c973eb82ed4a20048131b767ebca684cdc4897d42f622ed115641f21"
|
||||||
|
dependencies = [
|
||||||
|
"apalis-core",
|
||||||
|
"async-stream",
|
||||||
|
"async-trait",
|
||||||
|
"chrono",
|
||||||
|
"futures",
|
||||||
|
"log",
|
||||||
|
"redis",
|
||||||
|
"serde",
|
||||||
|
"serde_json",
|
||||||
|
"tokio",
|
||||||
|
"tower",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "arc-swap"
|
name = "arc-swap"
|
||||||
version = "1.7.1"
|
version = "1.7.1"
|
||||||
@ -156,6 +223,37 @@ dependencies = [
|
|||||||
"password-hash",
|
"password-hash",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "async-oneshot"
|
||||||
|
version = "0.5.9"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ae47de2a02d543205f3f5457a90b6ecbc9494db70557bd29590ec8f1ddff5463"
|
||||||
|
dependencies = [
|
||||||
|
"futures-micro",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "async-stream"
|
||||||
|
version = "0.3.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "cd56dd203fef61ac097dd65721a419ddccb106b2d2b70ba60a6b529f03961a51"
|
||||||
|
dependencies = [
|
||||||
|
"async-stream-impl",
|
||||||
|
"futures-core",
|
||||||
|
"pin-project-lite",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "async-stream-impl"
|
||||||
|
version = "0.3.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn 2.0.61",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "async-trait"
|
name = "async-trait"
|
||||||
version = "0.1.80"
|
version = "0.1.80"
|
||||||
@ -469,6 +567,20 @@ version = "1.0.1"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "0b6a852b24ab71dffc585bcb46eaf7959d175cb865a7152e35b348d1b2960422"
|
checksum = "0b6a852b24ab71dffc585bcb46eaf7959d175cb865a7152e35b348d1b2960422"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "combine"
|
||||||
|
version = "4.6.7"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ba5a308b75df32fe02788e748662718f03fde005016435c444eea572398219fd"
|
||||||
|
dependencies = [
|
||||||
|
"bytes",
|
||||||
|
"futures-core",
|
||||||
|
"memchr",
|
||||||
|
"pin-project-lite",
|
||||||
|
"tokio",
|
||||||
|
"tokio-util",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "const-oid"
|
name = "const-oid"
|
||||||
version = "0.9.6"
|
version = "0.9.6"
|
||||||
@ -493,12 +605,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cookie-factory"
|
name = "cookie-factory"
|
||||||
version = "0.3.3"
|
version = "0.3.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "9885fa71e26b8ab7855e2ec7cae6e9b380edff76cd052e07c683a0319d51b3a2"
|
checksum = "396de984970346b0d9e93d1415082923c679e5ae5c3ee3dcbd104f5610af126b"
|
||||||
dependencies = [
|
|
||||||
"futures",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "core-foundation"
|
name = "core-foundation"
|
||||||
@ -532,6 +641,9 @@ dependencies = [
|
|||||||
"ammonia",
|
"ammonia",
|
||||||
"ansi-to-html",
|
"ansi-to-html",
|
||||||
"anyhow",
|
"anyhow",
|
||||||
|
"apalis",
|
||||||
|
"apalis-cron",
|
||||||
|
"apalis-redis",
|
||||||
"async-trait",
|
"async-trait",
|
||||||
"axum",
|
"axum",
|
||||||
"axum-client-ip",
|
"axum-client-ip",
|
||||||
@ -543,6 +655,7 @@ dependencies = [
|
|||||||
"clap",
|
"clap",
|
||||||
"dotenvy",
|
"dotenvy",
|
||||||
"feed-rs",
|
"feed-rs",
|
||||||
|
"fred 9.1.2",
|
||||||
"futures",
|
"futures",
|
||||||
"headers",
|
"headers",
|
||||||
"http 1.1.0",
|
"http 1.1.0",
|
||||||
@ -553,6 +666,7 @@ dependencies = [
|
|||||||
"once_cell",
|
"once_cell",
|
||||||
"opml",
|
"opml",
|
||||||
"password-auth",
|
"password-auth",
|
||||||
|
"rand",
|
||||||
"readability",
|
"readability",
|
||||||
"reqwest 0.12.4",
|
"reqwest 0.12.4",
|
||||||
"serde",
|
"serde",
|
||||||
@ -596,6 +710,17 @@ version = "0.4.0"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "338089f42c427b86394a5ee60ff321da23a5c89c9d89514c829687b26359fcff"
|
checksum = "338089f42c427b86394a5ee60ff321da23a5c89c9d89514c829687b26359fcff"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "cron"
|
||||||
|
version = "0.12.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "6f8c3e73077b4b4a6ab1ea5047c37c57aee77657bc8ecd6f29b0af082d0b0c07"
|
||||||
|
dependencies = [
|
||||||
|
"chrono",
|
||||||
|
"nom",
|
||||||
|
"once_cell",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "crossbeam-channel"
|
name = "crossbeam-channel"
|
||||||
version = "0.5.12"
|
version = "0.5.12"
|
||||||
@ -888,7 +1013,33 @@ dependencies = [
|
|||||||
"log",
|
"log",
|
||||||
"parking_lot",
|
"parking_lot",
|
||||||
"rand",
|
"rand",
|
||||||
"redis-protocol",
|
"redis-protocol 4.1.0",
|
||||||
|
"semver",
|
||||||
|
"socket2",
|
||||||
|
"tokio",
|
||||||
|
"tokio-stream",
|
||||||
|
"tokio-util",
|
||||||
|
"url",
|
||||||
|
"urlencoding",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "fred"
|
||||||
|
version = "9.1.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "152397076bd317aa06bca9666e954ad15cde1a8f17b6ea4b007cf0bfc074d1d0"
|
||||||
|
dependencies = [
|
||||||
|
"arc-swap",
|
||||||
|
"async-trait",
|
||||||
|
"bytes",
|
||||||
|
"bytes-utils",
|
||||||
|
"crossbeam-queue",
|
||||||
|
"float-cmp",
|
||||||
|
"futures",
|
||||||
|
"log",
|
||||||
|
"parking_lot",
|
||||||
|
"rand",
|
||||||
|
"redis-protocol 5.0.1",
|
||||||
"semver",
|
"semver",
|
||||||
"socket2",
|
"socket2",
|
||||||
"tokio",
|
"tokio",
|
||||||
@ -987,6 +1138,15 @@ dependencies = [
|
|||||||
"syn 2.0.61",
|
"syn 2.0.61",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "futures-micro"
|
||||||
|
version = "0.5.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b460264b3593d68b16a7bc35f7bc226ddfebdf9a1c8db1ed95d5cc6b7168c826"
|
||||||
|
dependencies = [
|
||||||
|
"pin-project-lite",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-sink"
|
name = "futures-sink"
|
||||||
version = "0.3.30"
|
version = "0.3.30"
|
||||||
@ -999,6 +1159,12 @@ version = "0.3.30"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004"
|
checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "futures-timer"
|
||||||
|
version = "3.0.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f288b0a4f20f9a56b5d1da57e2227c661b7b16168e2f72365f57b63326e29b24"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-util"
|
name = "futures-util"
|
||||||
version = "0.3.30"
|
version = "0.3.30"
|
||||||
@ -2355,6 +2521,29 @@ dependencies = [
|
|||||||
"url",
|
"url",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "redis"
|
||||||
|
version = "0.25.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e0d7a6955c7511f60f3ba9e86c6d02b3c3f144f8c24b288d1f4e18074ab8bbec"
|
||||||
|
dependencies = [
|
||||||
|
"arc-swap",
|
||||||
|
"async-trait",
|
||||||
|
"bytes",
|
||||||
|
"combine",
|
||||||
|
"futures",
|
||||||
|
"futures-util",
|
||||||
|
"itoa",
|
||||||
|
"percent-encoding",
|
||||||
|
"pin-project-lite",
|
||||||
|
"ryu",
|
||||||
|
"sha1_smol",
|
||||||
|
"tokio",
|
||||||
|
"tokio-retry",
|
||||||
|
"tokio-util",
|
||||||
|
"url",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "redis-protocol"
|
name = "redis-protocol"
|
||||||
version = "4.1.0"
|
version = "4.1.0"
|
||||||
@ -2369,6 +2558,20 @@ dependencies = [
|
|||||||
"nom",
|
"nom",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "redis-protocol"
|
||||||
|
version = "5.0.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "65deb7c9501fbb2b6f812a30d59c0253779480853545153a51d8e9e444ddc99f"
|
||||||
|
dependencies = [
|
||||||
|
"bytes",
|
||||||
|
"bytes-utils",
|
||||||
|
"cookie-factory",
|
||||||
|
"crc16",
|
||||||
|
"log",
|
||||||
|
"nom",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "redox_syscall"
|
name = "redox_syscall"
|
||||||
version = "0.4.1"
|
version = "0.4.1"
|
||||||
@ -2758,6 +2961,12 @@ dependencies = [
|
|||||||
"digest",
|
"digest",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "sha1_smol"
|
||||||
|
version = "1.0.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ae1a47186c03a32177042e55dbc5fd5aee900b8e0069a8d70fba96a9375cd012"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "sha2"
|
name = "sha2"
|
||||||
version = "0.10.8"
|
version = "0.10.8"
|
||||||
@ -3336,6 +3545,17 @@ dependencies = [
|
|||||||
"tokio",
|
"tokio",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "tokio-retry"
|
||||||
|
version = "0.3.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7f57eb36ecbe0fc510036adff84824dd3c24bb781e21bfa67b69d556aa85214f"
|
||||||
|
dependencies = [
|
||||||
|
"pin-project",
|
||||||
|
"rand",
|
||||||
|
"tokio",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "tokio-stream"
|
name = "tokio-stream"
|
||||||
version = "0.1.15"
|
version = "0.1.15"
|
||||||
@ -3503,7 +3723,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||||||
checksum = "0460effd120251714fae61067fa3e8b2a3c8202501cdcae375d4bd14194c85cf"
|
checksum = "0460effd120251714fae61067fa3e8b2a3c8202501cdcae375d4bd14194c85cf"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"async-trait",
|
"async-trait",
|
||||||
"fred",
|
"fred 8.0.6",
|
||||||
"rmp-serde",
|
"rmp-serde",
|
||||||
"thiserror",
|
"thiserror",
|
||||||
"time",
|
"time",
|
||||||
@ -3555,6 +3775,15 @@ dependencies = [
|
|||||||
"valuable",
|
"valuable",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "tracing-futures"
|
||||||
|
version = "0.2.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "97d095ae15e245a057c8e8451bab9b3ee1e1f68e9ba2b4fbc18d0ac5237835f2"
|
||||||
|
dependencies = [
|
||||||
|
"tracing",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "tracing-log"
|
name = "tracing-log"
|
||||||
version = "0.2.0"
|
version = "0.2.0"
|
||||||
@ -3596,6 +3825,17 @@ version = "1.17.0"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825"
|
checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ulid"
|
||||||
|
version = "1.1.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "34778c17965aa2a08913b57e1f34db9b4a63f5de31768b55bf20d2795f921259"
|
||||||
|
dependencies = [
|
||||||
|
"getrandom",
|
||||||
|
"rand",
|
||||||
|
"web-time",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "unicase"
|
name = "unicase"
|
||||||
version = "2.7.0"
|
version = "2.7.0"
|
||||||
@ -3832,6 +4072,16 @@ dependencies = [
|
|||||||
"wasm-bindgen",
|
"wasm-bindgen",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "web-time"
|
||||||
|
version = "1.1.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb"
|
||||||
|
dependencies = [
|
||||||
|
"js-sys",
|
||||||
|
"wasm-bindgen",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "whoami"
|
name = "whoami"
|
||||||
version = "1.5.1"
|
version = "1.5.1"
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
name = "crawlnicle"
|
name = "crawlnicle"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
default-run = "crawlnicle"
|
default-run = "web"
|
||||||
authors = ["Tyler Hallada <tyler@hallada.net>"]
|
authors = ["Tyler Hallada <tyler@hallada.net>"]
|
||||||
|
|
||||||
[lib]
|
[lib]
|
||||||
@ -15,6 +15,10 @@ path = "src/lib.rs"
|
|||||||
ammonia = "4"
|
ammonia = "4"
|
||||||
ansi-to-html = "0.2"
|
ansi-to-html = "0.2"
|
||||||
anyhow = "1"
|
anyhow = "1"
|
||||||
|
# apalis v0.6 fixes this issue: https://github.com/geofmureithi/apalis/issues/351
|
||||||
|
apalis = { version = "0.6.0-rc.7", features = ["retry"] }
|
||||||
|
apalis-cron = "0.6.0-rc.7"
|
||||||
|
apalis-redis = "0.6.0-rc.7"
|
||||||
async-trait = "0.1"
|
async-trait = "0.1"
|
||||||
axum = { version = "0.7", features = ["form", "multipart", "query"] }
|
axum = { version = "0.7", features = ["form", "multipart", "query"] }
|
||||||
axum-client-ip = "0.6"
|
axum-client-ip = "0.6"
|
||||||
@ -27,6 +31,7 @@ chrono = { version = "0.4", features = ["serde"] }
|
|||||||
clap = { version = "4.4", features = ["derive", "env"] }
|
clap = { version = "4.4", features = ["derive", "env"] }
|
||||||
dotenvy = "0.15"
|
dotenvy = "0.15"
|
||||||
feed-rs = "1.3"
|
feed-rs = "1.3"
|
||||||
|
fred = "9"
|
||||||
futures = "0.3"
|
futures = "0.3"
|
||||||
headers = "0.4"
|
headers = "0.4"
|
||||||
http = "1.0.0"
|
http = "1.0.0"
|
||||||
@ -40,6 +45,7 @@ notify = "6"
|
|||||||
once_cell = "1.18"
|
once_cell = "1.18"
|
||||||
opml = "1.1"
|
opml = "1.1"
|
||||||
password-auth = "1.0"
|
password-auth = "1.0"
|
||||||
|
rand = { version = "0.8", features = ["small_rng"] }
|
||||||
readability = "0.3"
|
readability = "0.3"
|
||||||
reqwest = { version = "0.12", features = ["json"] }
|
reqwest = { version = "0.12", features = ["json"] }
|
||||||
serde = { version = "1", features = ["derive"] }
|
serde = { version = "1", features = ["derive"] }
|
||||||
|
@ -56,7 +56,7 @@ Install these requirements to get started developing crawlnicle.
|
|||||||
directory with the contents:
|
directory with the contents:
|
||||||
|
|
||||||
```env
|
```env
|
||||||
RUST_LOG=crawlnicle=debug,cli=debug,lib=debug,tower_http=debug,sqlx=debug
|
RUST_LOG=crawlnicle=debug,cli=debug,web=debug,worker=debug,crawler=debug,lib=debug,tower_http=debug,sqlx=debug
|
||||||
HOST=127.0.0.1
|
HOST=127.0.0.1
|
||||||
PORT=3000
|
PORT=3000
|
||||||
PUBLIC_URL=http://localhost:3000
|
PUBLIC_URL=http://localhost:3000
|
||||||
|
5
rust-analyzer.json
Normal file
5
rust-analyzer.json
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
{
|
||||||
|
"files": {
|
||||||
|
"excludeDirs": ["frontend"]
|
||||||
|
}
|
||||||
|
}
|
@ -97,7 +97,7 @@ pub async fn main() -> Result<()> {
|
|||||||
|
|
||||||
tracing_subscriber::fmt::init();
|
tracing_subscriber::fmt::init();
|
||||||
|
|
||||||
let pool = PgPoolOptions::new()
|
let db = PgPoolOptions::new()
|
||||||
.max_connections(env::var("DATABASE_MAX_CONNECTIONS")?.parse()?)
|
.max_connections(env::var("DATABASE_MAX_CONNECTIONS")?.parse()?)
|
||||||
.connect(&env::var("DATABASE_URL")?)
|
.connect(&env::var("DATABASE_URL")?)
|
||||||
.await?;
|
.await?;
|
||||||
@ -108,7 +108,7 @@ pub async fn main() -> Result<()> {
|
|||||||
match cli.commands {
|
match cli.commands {
|
||||||
Commands::AddFeed(args) => {
|
Commands::AddFeed(args) => {
|
||||||
let feed = Feed::create(
|
let feed = Feed::create(
|
||||||
&pool,
|
&db,
|
||||||
CreateFeed {
|
CreateFeed {
|
||||||
title: args.title,
|
title: args.title,
|
||||||
url: args.url,
|
url: args.url,
|
||||||
@ -119,12 +119,12 @@ pub async fn main() -> Result<()> {
|
|||||||
info!("Created feed with id {}", Base62Uuid::from(feed.feed_id));
|
info!("Created feed with id {}", Base62Uuid::from(feed.feed_id));
|
||||||
}
|
}
|
||||||
Commands::DeleteFeed(args) => {
|
Commands::DeleteFeed(args) => {
|
||||||
Feed::delete(&pool, args.id).await?;
|
Feed::delete(&db, args.id).await?;
|
||||||
info!("Deleted feed with id {}", Base62Uuid::from(args.id));
|
info!("Deleted feed with id {}", Base62Uuid::from(args.id));
|
||||||
}
|
}
|
||||||
Commands::AddEntry(args) => {
|
Commands::AddEntry(args) => {
|
||||||
let entry = Entry::create(
|
let entry = Entry::create(
|
||||||
&pool,
|
&db,
|
||||||
CreateEntry {
|
CreateEntry {
|
||||||
title: args.title,
|
title: args.title,
|
||||||
url: args.url,
|
url: args.url,
|
||||||
@ -137,7 +137,7 @@ pub async fn main() -> Result<()> {
|
|||||||
info!("Created entry with id {}", Base62Uuid::from(entry.entry_id));
|
info!("Created entry with id {}", Base62Uuid::from(entry.entry_id));
|
||||||
}
|
}
|
||||||
Commands::DeleteEntry(args) => {
|
Commands::DeleteEntry(args) => {
|
||||||
Entry::delete(&pool, args.id).await?;
|
Entry::delete(&db, args.id).await?;
|
||||||
info!("Deleted entry with id {}", Base62Uuid::from(args.id));
|
info!("Deleted entry with id {}", Base62Uuid::from(args.id));
|
||||||
}
|
}
|
||||||
Commands::Crawl(CrawlFeed { id }) => {
|
Commands::Crawl(CrawlFeed { id }) => {
|
||||||
@ -147,7 +147,7 @@ pub async fn main() -> Result<()> {
|
|||||||
// server is running, it will *not* serialize same-domain requests with it.
|
// server is running, it will *not* serialize same-domain requests with it.
|
||||||
let domain_locks = DomainLocks::new();
|
let domain_locks = DomainLocks::new();
|
||||||
let feed_crawler = FeedCrawlerHandle::new(
|
let feed_crawler = FeedCrawlerHandle::new(
|
||||||
pool.clone(),
|
db.clone(),
|
||||||
client.clone(),
|
client.clone(),
|
||||||
domain_locks.clone(),
|
domain_locks.clone(),
|
||||||
env::var("CONTENT_DIR")?,
|
env::var("CONTENT_DIR")?,
|
||||||
|
118
src/bin/crawler.rs
Normal file
118
src/bin/crawler.rs
Normal file
@ -0,0 +1,118 @@
|
|||||||
|
use apalis::layers::retry::{RetryLayer, RetryPolicy};
|
||||||
|
use apalis::layers::tracing::TraceLayer;
|
||||||
|
use apalis::prelude::*;
|
||||||
|
use apalis_cron::{CronStream, Schedule};
|
||||||
|
use apalis_redis::RedisStorage;
|
||||||
|
use chrono::{DateTime, Utc};
|
||||||
|
use clap::Parser;
|
||||||
|
use dotenvy::dotenv;
|
||||||
|
use sqlx::postgres::PgPoolOptions;
|
||||||
|
use sqlx::PgPool;
|
||||||
|
use std::str::FromStr;
|
||||||
|
use std::sync::Arc;
|
||||||
|
use thiserror::Error;
|
||||||
|
use tracing::{info, instrument};
|
||||||
|
|
||||||
|
use lib::config::Config;
|
||||||
|
use lib::jobs::{AsyncJob, CrawlFeedJob};
|
||||||
|
use lib::log::init_worker_tracing;
|
||||||
|
use lib::models::feed::{Feed, GetFeedsOptions};
|
||||||
|
|
||||||
|
#[derive(Default, Debug, Clone)]
|
||||||
|
struct Crawl(DateTime<Utc>);
|
||||||
|
|
||||||
|
impl From<DateTime<Utc>> for Crawl {
|
||||||
|
fn from(t: DateTime<Utc>) -> Self {
|
||||||
|
Crawl(t)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Error)]
|
||||||
|
enum CrawlError {
|
||||||
|
#[error("error fetching feeds")]
|
||||||
|
FetchFeedsError(#[from] sqlx::Error),
|
||||||
|
#[error("error queueing crawl feed job")]
|
||||||
|
QueueJobError(String),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
struct State {
|
||||||
|
pool: PgPool,
|
||||||
|
apalis: RedisStorage<AsyncJob>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[instrument(skip_all)]
|
||||||
|
pub async fn crawl_fn(job: Crawl, state: Data<Arc<State>>) -> Result<(), CrawlError> {
|
||||||
|
tracing::info!(job = ?job, "crawl");
|
||||||
|
let mut apalis = (state.apalis).clone();
|
||||||
|
let mut options = GetFeedsOptions::default();
|
||||||
|
loop {
|
||||||
|
info!("fetching feeds before: {:?}", options.before);
|
||||||
|
// TODO: filter to feeds where:
|
||||||
|
// now >= feed.last_crawled_at + feed.crawl_interval_minutes
|
||||||
|
// may need more indices...
|
||||||
|
let feeds = match Feed::get_all(&state.pool, &options).await {
|
||||||
|
Err(err) => return Err(CrawlError::FetchFeedsError(err)),
|
||||||
|
Ok(feeds) if feeds.is_empty() => {
|
||||||
|
info!("no more feeds found");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
Ok(feeds) => feeds,
|
||||||
|
};
|
||||||
|
info!("found {} feeds", feeds.len());
|
||||||
|
options.before = feeds.last().map(|f| f.created_at);
|
||||||
|
|
||||||
|
for feed in feeds.into_iter() {
|
||||||
|
// self.spawn_crawler_loop(feed, respond_to.clone());
|
||||||
|
// TODO: implement uniqueness on jobs per feed for ~1 minute
|
||||||
|
apalis
|
||||||
|
.push(AsyncJob::CrawlFeed(CrawlFeedJob {
|
||||||
|
feed_id: feed.feed_id,
|
||||||
|
}))
|
||||||
|
.await
|
||||||
|
.map_err(|err| CrawlError::QueueJobError(err.to_string()))?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() -> anyhow::Result<()> {
|
||||||
|
dotenv().ok();
|
||||||
|
let config = Config::parse();
|
||||||
|
let _guard = init_worker_tracing()?;
|
||||||
|
|
||||||
|
let pool = PgPoolOptions::new()
|
||||||
|
.max_connections(config.database_max_connections)
|
||||||
|
.acquire_timeout(std::time::Duration::from_secs(3))
|
||||||
|
.connect(&config.database_url)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
// TODO: create connection from redis_pool for each job instead using a single connection
|
||||||
|
// See: https://github.com/geofmureithi/apalis/issues/290
|
||||||
|
let redis_conn = apalis_redis::connect(config.redis_url.clone()).await?;
|
||||||
|
let apalis_config = apalis_redis::Config::default();
|
||||||
|
let apalis_storage = RedisStorage::new_with_config(redis_conn, apalis_config);
|
||||||
|
|
||||||
|
let state = Arc::new(State {
|
||||||
|
pool,
|
||||||
|
apalis: apalis_storage.clone(),
|
||||||
|
});
|
||||||
|
|
||||||
|
let schedule = Schedule::from_str("0 * * * * *").unwrap();
|
||||||
|
|
||||||
|
let worker = WorkerBuilder::new("crawler")
|
||||||
|
.layer(RetryLayer::new(RetryPolicy::default()))
|
||||||
|
.layer(TraceLayer::new())
|
||||||
|
.data(state)
|
||||||
|
.backend(CronStream::new(schedule))
|
||||||
|
.build_fn(crawl_fn);
|
||||||
|
|
||||||
|
Monitor::<TokioExecutor>::new()
|
||||||
|
.register(worker)
|
||||||
|
.run()
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
@ -1,6 +1,8 @@
|
|||||||
use std::{collections::HashMap, net::SocketAddr, path::Path, sync::Arc};
|
use std::{collections::HashMap, net::SocketAddr, path::Path, sync::Arc};
|
||||||
|
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
|
use apalis::prelude::*;
|
||||||
|
use apalis_redis::RedisStorage;
|
||||||
use axum::{
|
use axum::{
|
||||||
routing::{get, post},
|
routing::{get, post},
|
||||||
Router,
|
Router,
|
||||||
@ -14,6 +16,7 @@ use base64::prelude::*;
|
|||||||
use bytes::Bytes;
|
use bytes::Bytes;
|
||||||
use clap::Parser;
|
use clap::Parser;
|
||||||
use dotenvy::dotenv;
|
use dotenvy::dotenv;
|
||||||
|
use fred::prelude::*;
|
||||||
use lettre::transport::smtp::authentication::Credentials;
|
use lettre::transport::smtp::authentication::Credentials;
|
||||||
use lettre::SmtpTransport;
|
use lettre::SmtpTransport;
|
||||||
use notify::Watcher;
|
use notify::Watcher;
|
||||||
@ -26,12 +29,20 @@ use tower::ServiceBuilder;
|
|||||||
use tower_http::{services::ServeDir, trace::TraceLayer};
|
use tower_http::{services::ServeDir, trace::TraceLayer};
|
||||||
use tower_livereload::LiveReloadLayer;
|
use tower_livereload::LiveReloadLayer;
|
||||||
use tower_sessions::cookie::Key;
|
use tower_sessions::cookie::Key;
|
||||||
use tower_sessions_redis_store::{fred::prelude::*, RedisStore};
|
use tower_sessions_redis_store::{
|
||||||
|
fred::{
|
||||||
|
interfaces::ClientLike as TowerSessionsRedisClientLike,
|
||||||
|
prelude::{RedisConfig as TowerSessionsRedisConfig, RedisPool as TowerSessionsRedisPool},
|
||||||
|
},
|
||||||
|
RedisStore,
|
||||||
|
};
|
||||||
use tracing::debug;
|
use tracing::debug;
|
||||||
|
|
||||||
use lib::config::Config;
|
use lib::config::Config;
|
||||||
use lib::domain_locks::DomainLocks;
|
use lib::domain_locks::DomainLocks;
|
||||||
|
use lib::domain_request_limiter::DomainRequestLimiter;
|
||||||
use lib::handlers;
|
use lib::handlers;
|
||||||
|
use lib::jobs::AsyncJob;
|
||||||
use lib::log::init_tracing;
|
use lib::log::init_tracing;
|
||||||
use lib::state::AppState;
|
use lib::state::AppState;
|
||||||
use lib::USER_AGENT;
|
use lib::USER_AGENT;
|
||||||
@ -63,7 +74,7 @@ async fn main() -> Result<()> {
|
|||||||
let domain_locks = DomainLocks::new();
|
let domain_locks = DomainLocks::new();
|
||||||
let client = Client::builder().user_agent(USER_AGENT).build()?;
|
let client = Client::builder().user_agent(USER_AGENT).build()?;
|
||||||
|
|
||||||
let pool = PgPoolOptions::new()
|
let db = PgPoolOptions::new()
|
||||||
.max_connections(config.database_max_connections)
|
.max_connections(config.database_max_connections)
|
||||||
.acquire_timeout(std::time::Duration::from_secs(3))
|
.acquire_timeout(std::time::Duration::from_secs(3))
|
||||||
.connect(&config.database_url)
|
.connect(&config.database_url)
|
||||||
@ -72,8 +83,20 @@ async fn main() -> Result<()> {
|
|||||||
let redis_config = RedisConfig::from_url(&config.redis_url)?;
|
let redis_config = RedisConfig::from_url(&config.redis_url)?;
|
||||||
let redis_pool = RedisPool::new(redis_config, None, None, None, config.redis_pool_size)?;
|
let redis_pool = RedisPool::new(redis_config, None, None, None, config.redis_pool_size)?;
|
||||||
redis_pool.init().await?;
|
redis_pool.init().await?;
|
||||||
|
let domain_request_limiter = DomainRequestLimiter::new(redis_pool.clone(), 10, 5, 100, 0.5);
|
||||||
|
|
||||||
let session_store = RedisStore::new(redis_pool);
|
// TODO: is it possible to use the same fred RedisPool that the web app uses?
|
||||||
|
let sessions_redis_config = TowerSessionsRedisConfig::from_url(&config.redis_url)?;
|
||||||
|
let sessions_redis_pool = TowerSessionsRedisPool::new(
|
||||||
|
sessions_redis_config,
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
config.redis_pool_size,
|
||||||
|
)?;
|
||||||
|
sessions_redis_pool.init().await?;
|
||||||
|
|
||||||
|
let session_store = RedisStore::new(sessions_redis_pool);
|
||||||
let session_layer = SessionManagerLayer::new(session_store)
|
let session_layer = SessionManagerLayer::new(session_store)
|
||||||
.with_secure(!cfg!(debug_assertions))
|
.with_secure(!cfg!(debug_assertions))
|
||||||
.with_expiry(Expiry::OnInactivity(Duration::days(
|
.with_expiry(Expiry::OnInactivity(Duration::days(
|
||||||
@ -81,7 +104,7 @@ async fn main() -> Result<()> {
|
|||||||
)))
|
)))
|
||||||
.with_signed(Key::from(&BASE64_STANDARD.decode(&config.session_secret)?));
|
.with_signed(Key::from(&BASE64_STANDARD.decode(&config.session_secret)?));
|
||||||
|
|
||||||
let backend = Backend::new(pool.clone());
|
let backend = Backend::new(db.clone());
|
||||||
let auth_layer = AuthManagerLayerBuilder::new(backend, session_layer).build();
|
let auth_layer = AuthManagerLayerBuilder::new(backend, session_layer).build();
|
||||||
|
|
||||||
let smtp_creds = Credentials::new(config.smtp_user.clone(), config.smtp_password.clone());
|
let smtp_creds = Credentials::new(config.smtp_user.clone(), config.smtp_password.clone());
|
||||||
@ -91,17 +114,28 @@ async fn main() -> Result<()> {
|
|||||||
.credentials(smtp_creds)
|
.credentials(smtp_creds)
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
sqlx::migrate!().run(&pool).await?;
|
sqlx::migrate!().run(&db).await?;
|
||||||
|
|
||||||
|
// TODO: use redis_pool from above instead of making a new connection
|
||||||
|
// See: https://github.com/geofmureithi/apalis/issues/290
|
||||||
|
let redis_conn = apalis_redis::connect(config.redis_url.clone()).await?;
|
||||||
|
let apalis_config = apalis_redis::Config::default();
|
||||||
|
let mut apalis: RedisStorage<AsyncJob> =
|
||||||
|
RedisStorage::new_with_config(redis_conn, apalis_config);
|
||||||
|
|
||||||
|
apalis
|
||||||
|
.push(AsyncJob::HelloWorld("hello".to_string()))
|
||||||
|
.await?;
|
||||||
|
|
||||||
let crawl_scheduler = CrawlSchedulerHandle::new(
|
let crawl_scheduler = CrawlSchedulerHandle::new(
|
||||||
pool.clone(),
|
db.clone(),
|
||||||
client.clone(),
|
client.clone(),
|
||||||
domain_locks.clone(),
|
domain_locks.clone(),
|
||||||
config.content_dir.clone(),
|
config.content_dir.clone(),
|
||||||
crawls.clone(),
|
crawls.clone(),
|
||||||
);
|
);
|
||||||
let _ = crawl_scheduler.bootstrap().await;
|
// let _ = crawl_scheduler.bootstrap().await;
|
||||||
let importer = ImporterHandle::new(pool.clone(), crawl_scheduler.clone(), imports.clone());
|
let importer = ImporterHandle::new(db.clone(), crawl_scheduler.clone(), imports.clone());
|
||||||
|
|
||||||
let ip_source_extension = config.ip_source.0.clone().into_extension();
|
let ip_source_extension = config.ip_source.0.clone().into_extension();
|
||||||
|
|
||||||
@ -140,16 +174,19 @@ async fn main() -> Result<()> {
|
|||||||
.route("/reset-password", post(handlers::reset_password::post))
|
.route("/reset-password", post(handlers::reset_password::post))
|
||||||
.nest_service("/static", ServeDir::new("static"))
|
.nest_service("/static", ServeDir::new("static"))
|
||||||
.with_state(AppState {
|
.with_state(AppState {
|
||||||
pool,
|
db,
|
||||||
config,
|
config,
|
||||||
log_receiver,
|
log_receiver,
|
||||||
crawls,
|
crawls,
|
||||||
domain_locks,
|
domain_locks,
|
||||||
|
domain_request_limiter,
|
||||||
client,
|
client,
|
||||||
crawl_scheduler,
|
crawl_scheduler,
|
||||||
importer,
|
importer,
|
||||||
imports,
|
imports,
|
||||||
mailer,
|
mailer,
|
||||||
|
apalis,
|
||||||
|
redis: redis_pool,
|
||||||
})
|
})
|
||||||
.layer(ServiceBuilder::new().layer(TraceLayer::new_for_http()))
|
.layer(ServiceBuilder::new().layer(TraceLayer::new_for_http()))
|
||||||
.layer(auth_layer)
|
.layer(auth_layer)
|
61
src/bin/worker.rs
Normal file
61
src/bin/worker.rs
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
use anyhow::Result;
|
||||||
|
use apalis::layers::retry::RetryPolicy;
|
||||||
|
use apalis::layers::tracing::TraceLayer;
|
||||||
|
use apalis::prelude::*;
|
||||||
|
use apalis_redis::RedisStorage;
|
||||||
|
use clap::Parser;
|
||||||
|
use dotenvy::dotenv;
|
||||||
|
use fred::prelude::*;
|
||||||
|
use reqwest::Client;
|
||||||
|
use sqlx::postgres::PgPoolOptions;
|
||||||
|
use tower::retry::RetryLayer;
|
||||||
|
|
||||||
|
use lib::config::Config;
|
||||||
|
use lib::domain_request_limiter::DomainRequestLimiter;
|
||||||
|
use lib::jobs::{handle_async_job, AsyncJob};
|
||||||
|
use lib::log::init_worker_tracing;
|
||||||
|
use lib::USER_AGENT;
|
||||||
|
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() -> Result<()> {
|
||||||
|
dotenv().ok();
|
||||||
|
let config = Config::parse();
|
||||||
|
let _guard = init_worker_tracing()?;
|
||||||
|
// TODO: create connection from redis_pool for each job instead using a single connection
|
||||||
|
// See: https://github.com/geofmureithi/apalis/issues/290
|
||||||
|
let redis_conn = apalis_redis::connect(config.redis_url.clone()).await?;
|
||||||
|
let apalis_config = apalis_redis::Config::default();
|
||||||
|
let apalis_storage: RedisStorage<AsyncJob> =
|
||||||
|
RedisStorage::new_with_config(redis_conn, apalis_config);
|
||||||
|
|
||||||
|
let redis_config = RedisConfig::from_url(&config.redis_url)?;
|
||||||
|
let redis_pool = RedisPool::new(redis_config, None, None, None, 5)?;
|
||||||
|
redis_pool.init().await?;
|
||||||
|
let domain_request_limiter = DomainRequestLimiter::new(redis_pool.clone(), 10, 5, 100, 0.5);
|
||||||
|
|
||||||
|
let http_client = Client::builder().user_agent(USER_AGENT).build()?;
|
||||||
|
let db = PgPoolOptions::new()
|
||||||
|
.max_connections(config.database_max_connections)
|
||||||
|
.acquire_timeout(std::time::Duration::from_secs(3))
|
||||||
|
.connect(&config.database_url)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
Monitor::<TokioExecutor>::new()
|
||||||
|
.register_with_count(2, {
|
||||||
|
WorkerBuilder::new("worker")
|
||||||
|
.layer(RetryLayer::new(RetryPolicy::default()))
|
||||||
|
.layer(TraceLayer::new())
|
||||||
|
.data(http_client)
|
||||||
|
.data(db)
|
||||||
|
.data(domain_request_limiter)
|
||||||
|
.data(config)
|
||||||
|
.data(apalis_storage.clone())
|
||||||
|
.data(redis_pool)
|
||||||
|
.backend(apalis_storage)
|
||||||
|
.build_fn(handle_async_job)
|
||||||
|
})
|
||||||
|
.run()
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
Ok(())
|
||||||
|
}
|
123
src/domain_request_limiter.rs
Normal file
123
src/domain_request_limiter.rs
Normal file
@ -0,0 +1,123 @@
|
|||||||
|
use anyhow::{anyhow, Result};
|
||||||
|
use fred::{clients::RedisPool, interfaces::KeysInterface, prelude::*};
|
||||||
|
use rand::{rngs::SmallRng, Rng, SeedableRng};
|
||||||
|
use std::{sync::Arc, time::Duration};
|
||||||
|
use tokio::{sync::Mutex, time::sleep};
|
||||||
|
|
||||||
|
/// A Redis-based rate limiter for domain-specific requests with jittered retry delay.
|
||||||
|
///
|
||||||
|
/// This limiter uses a fixed window algorithm with a 1-second window and applies
|
||||||
|
/// jitter to the retry delay to help prevent synchronized retries in distributed systems.
|
||||||
|
/// It uses fred's RedisPool for efficient connection management.
|
||||||
|
///
|
||||||
|
/// Limitations:
|
||||||
|
/// 1. Fixed window: The limit resets every second, potentially allowing short traffic bursts
|
||||||
|
/// at window boundaries.
|
||||||
|
/// 2. No token bucket: Doesn't accumulate unused capacity from quiet periods.
|
||||||
|
/// 3. Potential overcounting: In distributed systems, there's a small chance of overcounting
|
||||||
|
/// near window ends due to race conditions.
|
||||||
|
/// 4. Redis dependency: Rate limiting fails open if Redis is unavailable.
|
||||||
|
/// 5. Blocking: The acquire method will block until a request is allowed or max_retries is reached.
|
||||||
|
///
|
||||||
|
/// Usage example:
|
||||||
|
/// ```
|
||||||
|
/// use fred::prelude::*;
|
||||||
|
///
|
||||||
|
/// #[tokio::main]
|
||||||
|
/// async fn main() -> Result<()> {
|
||||||
|
/// let config = RedisConfig::default();
|
||||||
|
/// let pool = RedisPool::new(config, None, None, 5)?;
|
||||||
|
/// pool.connect();
|
||||||
|
/// pool.wait_for_connect().await?;
|
||||||
|
///
|
||||||
|
/// let limiter = DomainRequestLimiter::new(pool, 10, 5, 100, 0.5);
|
||||||
|
/// let domain = "example.com";
|
||||||
|
///
|
||||||
|
/// for _ in 0..15 {
|
||||||
|
/// match limiter.acquire(domain).await {
|
||||||
|
/// Ok(()) => println!("Request allowed"),
|
||||||
|
/// Err(_) => println!("Max retries reached, request denied"),
|
||||||
|
/// }
|
||||||
|
/// }
|
||||||
|
///
|
||||||
|
/// Ok(())
|
||||||
|
/// }
|
||||||
|
/// ```
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct DomainRequestLimiter {
|
||||||
|
redis_pool: RedisPool,
|
||||||
|
requests_per_second: u32,
|
||||||
|
max_retries: u32,
|
||||||
|
base_retry_delay_ms: u64,
|
||||||
|
jitter_factor: f64,
|
||||||
|
// TODO: I think I can get rid of this if I instantiate a DomainRequestLimiter per-worker, but
|
||||||
|
// I'm not sure how to do that in apalis (then I could just use thread_rng)
|
||||||
|
rng: Arc<Mutex<SmallRng>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl DomainRequestLimiter {
|
||||||
|
/// Create a new DomainRequestLimiter.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
/// * `redis_pool` - A fred RedisPool.
|
||||||
|
/// * `requests_per_second` - Maximum allowed requests per second per domain.
|
||||||
|
/// * `max_retries` - Maximum number of retries before giving up.
|
||||||
|
/// * `base_retry_delay_ms` - Base delay between retries in milliseconds.
|
||||||
|
/// * `jitter_factor` - Factor to determine the maximum jitter (0.0 to 1.0).
|
||||||
|
pub fn new(
|
||||||
|
redis_pool: RedisPool,
|
||||||
|
requests_per_second: u32,
|
||||||
|
max_retries: u32,
|
||||||
|
base_retry_delay_ms: u64,
|
||||||
|
jitter_factor: f64,
|
||||||
|
) -> Self {
|
||||||
|
Self {
|
||||||
|
redis_pool,
|
||||||
|
requests_per_second,
|
||||||
|
max_retries,
|
||||||
|
base_retry_delay_ms,
|
||||||
|
jitter_factor: jitter_factor.clamp(0.0, 1.0),
|
||||||
|
rng: Arc::new(Mutex::new(SmallRng::from_entropy())),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Attempt to acquire permission for a request, retrying if necessary.
|
||||||
|
///
|
||||||
|
/// This method will attempt to acquire permission up to max_retries times,
|
||||||
|
/// sleeping for a jittered delay between each attempt.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
/// * `domain` - The domain for which to check the rate limit.
|
||||||
|
///
|
||||||
|
/// # Returns
|
||||||
|
/// Ok(()) if permission is granted, or an error if max retries are exceeded.
|
||||||
|
pub async fn acquire(&self, domain: &str) -> Result<()> {
|
||||||
|
for attempt in 0..=self.max_retries {
|
||||||
|
if self.try_acquire(domain).await? {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
if attempt < self.max_retries {
|
||||||
|
let mut rng = self.rng.lock().await;
|
||||||
|
let jitter =
|
||||||
|
rng.gen::<f64>() * self.jitter_factor * self.base_retry_delay_ms as f64;
|
||||||
|
let delay = self.base_retry_delay_ms + jitter as u64;
|
||||||
|
sleep(Duration::from_millis(delay)).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(anyhow!(
|
||||||
|
"Max retries exceeded for domain: {:?}, request denied",
|
||||||
|
domain
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn try_acquire(&self, domain: &str) -> Result<bool, RedisError> {
|
||||||
|
let key = format!("rate_limit:{}", domain);
|
||||||
|
|
||||||
|
let count: u32 = self.redis_pool.incr(&key).await?;
|
||||||
|
if count == 1 {
|
||||||
|
self.redis_pool.expire(&key, 1).await?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(count <= self.requests_per_second)
|
||||||
|
}
|
||||||
|
}
|
@ -27,7 +27,7 @@ pub enum Error {
|
|||||||
#[error("validation error in request body")]
|
#[error("validation error in request body")]
|
||||||
InvalidEntity(#[from] ValidationErrors),
|
InvalidEntity(#[from] ValidationErrors),
|
||||||
|
|
||||||
#[error("error with file upload: (0)")]
|
#[error("error with file upload")]
|
||||||
Upload(#[from] MultipartError),
|
Upload(#[from] MultipartError),
|
||||||
|
|
||||||
#[error("no file uploaded")]
|
#[error("no file uploaded")]
|
||||||
@ -49,7 +49,7 @@ pub enum Error {
|
|||||||
Unauthorized,
|
Unauthorized,
|
||||||
|
|
||||||
#[error("bad request: {0}")]
|
#[error("bad request: {0}")]
|
||||||
BadRequest(&'static str)
|
BadRequest(&'static str),
|
||||||
}
|
}
|
||||||
|
|
||||||
pub type Result<T, E = Error> = ::std::result::Result<T, E>;
|
pub type Result<T, E = Error> = ::std::result::Result<T, E>;
|
||||||
|
@ -13,9 +13,9 @@ use crate::partials::entry_list::entry_list;
|
|||||||
pub async fn get(
|
pub async fn get(
|
||||||
Query(options): Query<GetEntriesOptions>,
|
Query(options): Query<GetEntriesOptions>,
|
||||||
accept: Option<TypedHeader<Accept>>,
|
accept: Option<TypedHeader<Accept>>,
|
||||||
State(pool): State<PgPool>,
|
State(db): State<PgPool>,
|
||||||
) -> Result<impl IntoResponse, impl IntoResponse> {
|
) -> Result<impl IntoResponse, impl IntoResponse> {
|
||||||
let entries = Entry::get_all(&pool, &options).await.map_err(Error::from)?;
|
let entries = Entry::get_all(&db, &options).await.map_err(Error::from)?;
|
||||||
if let Some(TypedHeader(accept)) = accept {
|
if let Some(TypedHeader(accept)) = accept {
|
||||||
if accept == Accept::ApplicationJson {
|
if accept == Accept::ApplicationJson {
|
||||||
return Ok::<ApiResponse<Vec<Entry>>, Error>(ApiResponse::Json(entries));
|
return Ok::<ApiResponse<Vec<Entry>>, Error>(ApiResponse::Json(entries));
|
||||||
|
@ -9,15 +9,15 @@ use crate::models::entry::{CreateEntry, Entry};
|
|||||||
use crate::uuid::Base62Uuid;
|
use crate::uuid::Base62Uuid;
|
||||||
|
|
||||||
pub async fn get(
|
pub async fn get(
|
||||||
State(pool): State<PgPool>,
|
State(db): State<PgPool>,
|
||||||
Path(id): Path<Base62Uuid>,
|
Path(id): Path<Base62Uuid>,
|
||||||
) -> Result<Json<Entry>, Error> {
|
) -> Result<Json<Entry>, Error> {
|
||||||
Ok(Json(Entry::get(&pool, id.as_uuid()).await?))
|
Ok(Json(Entry::get(&db, id.as_uuid()).await?))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn post(
|
pub async fn post(
|
||||||
State(pool): State<PgPool>,
|
State(db): State<PgPool>,
|
||||||
Json(payload): Json<CreateEntry>,
|
Json(payload): Json<CreateEntry>,
|
||||||
) -> Result<Json<Entry>, Error> {
|
) -> Result<Json<Entry>, Error> {
|
||||||
Ok(Json(Entry::create(&pool, payload).await?))
|
Ok(Json(Entry::create(&db, payload).await?))
|
||||||
}
|
}
|
||||||
|
@ -8,17 +8,17 @@ use crate::error::{Error, Result};
|
|||||||
use crate::models::feed::{CreateFeed, Feed};
|
use crate::models::feed::{CreateFeed, Feed};
|
||||||
use crate::uuid::Base62Uuid;
|
use crate::uuid::Base62Uuid;
|
||||||
|
|
||||||
pub async fn get(State(pool): State<PgPool>, Path(id): Path<Base62Uuid>) -> Result<Json<Feed>> {
|
pub async fn get(State(db): State<PgPool>, Path(id): Path<Base62Uuid>) -> Result<Json<Feed>> {
|
||||||
Ok(Json(Feed::get(&pool, id.as_uuid()).await?))
|
Ok(Json(Feed::get(&db, id.as_uuid()).await?))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn post(
|
pub async fn post(
|
||||||
State(pool): State<PgPool>,
|
State(db): State<PgPool>,
|
||||||
Json(payload): Json<CreateFeed>,
|
Json(payload): Json<CreateFeed>,
|
||||||
) -> Result<Json<Feed>, Error> {
|
) -> Result<Json<Feed>, Error> {
|
||||||
Ok(Json(Feed::create(&pool, payload).await?))
|
Ok(Json(Feed::create(&db, payload).await?))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn delete(State(pool): State<PgPool>, Path(id): Path<Base62Uuid>) -> Result<()> {
|
pub async fn delete(State(db): State<PgPool>, Path(id): Path<Base62Uuid>) -> Result<()> {
|
||||||
Feed::delete(&pool, id.as_uuid()).await
|
Feed::delete(&db, id.as_uuid()).await
|
||||||
}
|
}
|
||||||
|
@ -13,9 +13,9 @@ use crate::partials::feed_list::feed_list;
|
|||||||
pub async fn get(
|
pub async fn get(
|
||||||
Query(options): Query<GetFeedsOptions>,
|
Query(options): Query<GetFeedsOptions>,
|
||||||
accept: Option<TypedHeader<Accept>>,
|
accept: Option<TypedHeader<Accept>>,
|
||||||
State(pool): State<PgPool>,
|
State(db): State<PgPool>,
|
||||||
) -> Result<impl IntoResponse, impl IntoResponse> {
|
) -> Result<impl IntoResponse, impl IntoResponse> {
|
||||||
let feeds = Feed::get_all(&pool, &options).await.map_err(Error::from)?;
|
let feeds = Feed::get_all(&db, &options).await.map_err(Error::from)?;
|
||||||
if let Some(TypedHeader(accept)) = accept {
|
if let Some(TypedHeader(accept)) = accept {
|
||||||
if accept == Accept::ApplicationJson {
|
if accept == Accept::ApplicationJson {
|
||||||
return Ok::<ApiResponse<Vec<Feed>>, Error>(ApiResponse::Json(feeds));
|
return Ok::<ApiResponse<Vec<Feed>>, Error>(ApiResponse::Json(feeds));
|
||||||
|
@ -70,7 +70,7 @@ pub fn confirm_email_page(
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub async fn get(
|
pub async fn get(
|
||||||
State(pool): State<PgPool>,
|
State(db): State<PgPool>,
|
||||||
auth: AuthSession,
|
auth: AuthSession,
|
||||||
hx_target: Option<TypedHeader<HXTarget>>,
|
hx_target: Option<TypedHeader<HXTarget>>,
|
||||||
layout: Layout,
|
layout: Layout,
|
||||||
@ -78,7 +78,7 @@ pub async fn get(
|
|||||||
) -> Result<Response> {
|
) -> Result<Response> {
|
||||||
if let Some(token_id) = query.token_id {
|
if let Some(token_id) = query.token_id {
|
||||||
info!(token_id = %token_id.as_uuid(), "get with token_id");
|
info!(token_id = %token_id.as_uuid(), "get with token_id");
|
||||||
let token = match UserEmailVerificationToken::get(&pool, token_id.as_uuid()).await {
|
let token = match UserEmailVerificationToken::get(&db, token_id.as_uuid()).await {
|
||||||
Ok(token) => token,
|
Ok(token) => token,
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
if let Error::NotFoundUuid(_, _) = err {
|
if let Error::NotFoundUuid(_, _) = err {
|
||||||
@ -112,8 +112,8 @@ pub async fn get(
|
|||||||
}))
|
}))
|
||||||
} else {
|
} else {
|
||||||
info!(token_id = %token.token_id, "token valid, verifying email");
|
info!(token_id = %token.token_id, "token valid, verifying email");
|
||||||
User::verify_email(&pool, token.user_id).await?;
|
User::verify_email(&db, token.user_id).await?;
|
||||||
UserEmailVerificationToken::delete(&pool, token.token_id).await?;
|
UserEmailVerificationToken::delete(&db, token.token_id).await?;
|
||||||
Ok(layout
|
Ok(layout
|
||||||
.with_subtitle("confirm email")
|
.with_subtitle("confirm email")
|
||||||
.targeted(hx_target)
|
.targeted(hx_target)
|
||||||
@ -152,7 +152,7 @@ pub struct ConfirmEmail {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub async fn post(
|
pub async fn post(
|
||||||
State(pool): State<PgPool>,
|
State(db): State<PgPool>,
|
||||||
State(mailer): State<SmtpTransport>,
|
State(mailer): State<SmtpTransport>,
|
||||||
State(config): State<Config>,
|
State(config): State<Config>,
|
||||||
hx_target: Option<TypedHeader<HXTarget>>,
|
hx_target: Option<TypedHeader<HXTarget>>,
|
||||||
@ -161,11 +161,11 @@ pub async fn post(
|
|||||||
) -> Result<Response> {
|
) -> Result<Response> {
|
||||||
if let Some(token_id) = confirm_email.token {
|
if let Some(token_id) = confirm_email.token {
|
||||||
info!(%token_id, "posted with token_id");
|
info!(%token_id, "posted with token_id");
|
||||||
let token = UserEmailVerificationToken::get(&pool, token_id).await?;
|
let token = UserEmailVerificationToken::get(&db, token_id).await?;
|
||||||
let user = User::get(&pool, token.user_id).await?;
|
let user = User::get(&db, token.user_id).await?;
|
||||||
if !user.email_verified {
|
if !user.email_verified {
|
||||||
info!(user_id = %user.user_id, "user exists, resending confirmation email");
|
info!(user_id = %user.user_id, "user exists, resending confirmation email");
|
||||||
send_confirmation_email(pool, mailer, config, user);
|
send_confirmation_email(db, mailer, config, user);
|
||||||
} else {
|
} else {
|
||||||
warn!(user_id = %user.user_id, "confirm email submitted for already verified user, skip resend");
|
warn!(user_id = %user.user_id, "confirm email submitted for already verified user, skip resend");
|
||||||
}
|
}
|
||||||
@ -184,10 +184,10 @@ pub async fn post(
|
|||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
if let Some(email) = confirm_email.email {
|
if let Some(email) = confirm_email.email {
|
||||||
if let Ok(user) = User::get_by_email(&pool, email).await {
|
if let Ok(user) = User::get_by_email(&db, email).await {
|
||||||
if !user.email_verified {
|
if !user.email_verified {
|
||||||
info!(user_id = %user.user_id, "user exists, resending confirmation email");
|
info!(user_id = %user.user_id, "user exists, resending confirmation email");
|
||||||
send_confirmation_email(pool, mailer, config, user);
|
send_confirmation_email(db, mailer, config, user);
|
||||||
} else {
|
} else {
|
||||||
warn!(user_id = %user.user_id, "confirm email submitted for already verified user, skip resend");
|
warn!(user_id = %user.user_id, "confirm email submitted for already verified user, skip resend");
|
||||||
}
|
}
|
||||||
|
@ -8,8 +8,8 @@ use crate::partials::entry_list::entry_list;
|
|||||||
|
|
||||||
pub async fn get(
|
pub async fn get(
|
||||||
Query(options): Query<GetEntriesOptions>,
|
Query(options): Query<GetEntriesOptions>,
|
||||||
State(pool): State<PgPool>,
|
State(db): State<PgPool>,
|
||||||
) -> Result<Markup> {
|
) -> Result<Markup> {
|
||||||
let entries = Entry::get_all(&pool, &options).await?;
|
let entries = Entry::get_all(&db, &options).await?;
|
||||||
Ok(entry_list(entries, &options, false))
|
Ok(entry_list(entries, &options, false))
|
||||||
}
|
}
|
||||||
|
@ -16,12 +16,12 @@ use crate::uuid::Base62Uuid;
|
|||||||
|
|
||||||
pub async fn get(
|
pub async fn get(
|
||||||
Path(id): Path<Base62Uuid>,
|
Path(id): Path<Base62Uuid>,
|
||||||
State(pool): State<PgPool>,
|
State(db): State<PgPool>,
|
||||||
State(config): State<Config>,
|
State(config): State<Config>,
|
||||||
hx_target: Option<TypedHeader<HXTarget>>,
|
hx_target: Option<TypedHeader<HXTarget>>,
|
||||||
layout: Layout,
|
layout: Layout,
|
||||||
) -> Result<Response> {
|
) -> Result<Response> {
|
||||||
let entry = Entry::get(&pool, id.as_uuid()).await?;
|
let entry = Entry::get(&db, id.as_uuid()).await?;
|
||||||
let content_dir = std::path::Path::new(&config.content_dir);
|
let content_dir = std::path::Path::new(&config.content_dir);
|
||||||
let content_path = content_dir.join(format!("{}.html", entry.entry_id));
|
let content_path = content_dir.join(format!("{}.html", entry.entry_id));
|
||||||
let title = entry.title.unwrap_or_else(|| "Untitled Entry".to_string());
|
let title = entry.title.unwrap_or_else(|| "Untitled Entry".to_string());
|
||||||
|
@ -28,17 +28,17 @@ use crate::uuid::Base62Uuid;
|
|||||||
|
|
||||||
pub async fn get(
|
pub async fn get(
|
||||||
Path(id): Path<Base62Uuid>,
|
Path(id): Path<Base62Uuid>,
|
||||||
State(pool): State<PgPool>,
|
State(db): State<PgPool>,
|
||||||
hx_target: Option<TypedHeader<HXTarget>>,
|
hx_target: Option<TypedHeader<HXTarget>>,
|
||||||
layout: Layout,
|
layout: Layout,
|
||||||
) -> Result<Response> {
|
) -> Result<Response> {
|
||||||
let feed = Feed::get(&pool, id.as_uuid()).await?;
|
let feed = Feed::get(&db, id.as_uuid()).await?;
|
||||||
let options = GetEntriesOptions {
|
let options = GetEntriesOptions {
|
||||||
feed_id: Some(feed.feed_id),
|
feed_id: Some(feed.feed_id),
|
||||||
..Default::default()
|
..Default::default()
|
||||||
};
|
};
|
||||||
let title = feed.title.unwrap_or_else(|| "Untitled Feed".to_string());
|
let title = feed.title.unwrap_or_else(|| "Untitled Feed".to_string());
|
||||||
let entries = Entry::get_all(&pool, &options).await?;
|
let entries = Entry::get_all(&db, &options).await?;
|
||||||
let delete_url = format!("/feed/{}/delete", id);
|
let delete_url = format!("/feed/{}/delete", id);
|
||||||
Ok(layout.with_subtitle(&title).targeted(hx_target).render(html! {
|
Ok(layout.with_subtitle(&title).targeted(hx_target).render(html! {
|
||||||
header class="mb-4 flex flex-row items-center gap-4" {
|
header class="mb-4 flex flex-row items-center gap-4" {
|
||||||
@ -115,13 +115,13 @@ impl IntoResponse for AddFeedError {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub async fn post(
|
pub async fn post(
|
||||||
State(pool): State<PgPool>,
|
State(db): State<PgPool>,
|
||||||
State(crawls): State<Crawls>,
|
State(crawls): State<Crawls>,
|
||||||
State(crawl_scheduler): State<CrawlSchedulerHandle>,
|
State(crawl_scheduler): State<CrawlSchedulerHandle>,
|
||||||
Form(add_feed): Form<AddFeed>,
|
Form(add_feed): Form<AddFeed>,
|
||||||
) -> AddFeedResult<Response> {
|
) -> AddFeedResult<Response> {
|
||||||
let feed = Feed::create(
|
let feed = Feed::create(
|
||||||
&pool,
|
&db,
|
||||||
CreateFeed {
|
CreateFeed {
|
||||||
title: add_feed.title,
|
title: add_feed.title,
|
||||||
url: add_feed.url.clone(),
|
url: add_feed.url.clone(),
|
||||||
@ -233,7 +233,7 @@ pub async fn stream(
|
|||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn delete(State(pool): State<PgPool>, Path(id): Path<Base62Uuid>) -> Result<Redirect> {
|
pub async fn delete(State(db): State<PgPool>, Path(id): Path<Base62Uuid>) -> Result<Redirect> {
|
||||||
Feed::delete(&pool, id.as_uuid()).await?;
|
Feed::delete(&db, id.as_uuid()).await?;
|
||||||
Ok(Redirect::to("/feeds"))
|
Ok(Redirect::to("/feeds"))
|
||||||
}
|
}
|
||||||
|
@ -13,12 +13,12 @@ use crate::partials::layout::Layout;
|
|||||||
use crate::partials::opml_import_form::opml_import_form;
|
use crate::partials::opml_import_form::opml_import_form;
|
||||||
|
|
||||||
pub async fn get(
|
pub async fn get(
|
||||||
State(pool): State<PgPool>,
|
State(db): State<PgPool>,
|
||||||
hx_target: Option<TypedHeader<HXTarget>>,
|
hx_target: Option<TypedHeader<HXTarget>>,
|
||||||
layout: Layout,
|
layout: Layout,
|
||||||
) -> Result<Response> {
|
) -> Result<Response> {
|
||||||
let options = GetFeedsOptions::default();
|
let options = GetFeedsOptions::default();
|
||||||
let feeds = Feed::get_all(&pool, &options).await?;
|
let feeds = Feed::get_all(&db, &options).await?;
|
||||||
Ok(layout
|
Ok(layout
|
||||||
.with_subtitle("feeds")
|
.with_subtitle("feeds")
|
||||||
.targeted(hx_target)
|
.targeted(hx_target)
|
||||||
|
@ -82,7 +82,7 @@ pub async fn get(
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub async fn post(
|
pub async fn post(
|
||||||
State(pool): State<PgPool>,
|
State(db): State<PgPool>,
|
||||||
State(mailer): State<SmtpTransport>,
|
State(mailer): State<SmtpTransport>,
|
||||||
State(config): State<Config>,
|
State(config): State<Config>,
|
||||||
SecureClientIp(ip): SecureClientIp,
|
SecureClientIp(ip): SecureClientIp,
|
||||||
@ -91,7 +91,7 @@ pub async fn post(
|
|||||||
layout: Layout,
|
layout: Layout,
|
||||||
Form(forgot_password): Form<ForgotPassword>,
|
Form(forgot_password): Form<ForgotPassword>,
|
||||||
) -> Result<Response> {
|
) -> Result<Response> {
|
||||||
let user: User = match User::get_by_email(&pool, forgot_password.email.clone()).await {
|
let user: User = match User::get_by_email(&db, forgot_password.email.clone()).await {
|
||||||
Ok(user) => user,
|
Ok(user) => user,
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
if let Error::NotFoundString(_, _) = err {
|
if let Error::NotFoundString(_, _) = err {
|
||||||
@ -105,7 +105,7 @@ pub async fn post(
|
|||||||
if user.email_verified {
|
if user.email_verified {
|
||||||
info!(user_id = %user.user_id, "user exists with verified email, sending password reset email");
|
info!(user_id = %user.user_id, "user exists with verified email, sending password reset email");
|
||||||
send_forgot_password_email(
|
send_forgot_password_email(
|
||||||
pool,
|
db,
|
||||||
mailer,
|
mailer,
|
||||||
config,
|
config,
|
||||||
user,
|
user,
|
||||||
|
@ -10,12 +10,12 @@ use crate::models::entry::Entry;
|
|||||||
use crate::partials::{entry_list::entry_list, layout::Layout};
|
use crate::partials::{entry_list::entry_list, layout::Layout};
|
||||||
|
|
||||||
pub async fn get(
|
pub async fn get(
|
||||||
State(pool): State<PgPool>,
|
State(db): State<PgPool>,
|
||||||
hx_target: Option<TypedHeader<HXTarget>>,
|
hx_target: Option<TypedHeader<HXTarget>>,
|
||||||
layout: Layout,
|
layout: Layout,
|
||||||
) -> Result<Response> {
|
) -> Result<Response> {
|
||||||
let options = Default::default();
|
let options = Default::default();
|
||||||
let entries = Entry::get_all(&pool, &options).await?;
|
let entries = Entry::get_all(&db, &options).await?;
|
||||||
Ok(layout.targeted(hx_target).render(html! {
|
Ok(layout.targeted(hx_target).render(html! {
|
||||||
ul class="list-none flex flex-col gap-4" {
|
ul class="list-none flex flex-col gap-4" {
|
||||||
(entry_list(entries, &options, true))
|
(entry_list(entries, &options, true))
|
||||||
|
@ -59,7 +59,7 @@ pub async fn get(hx_target: Option<TypedHeader<HXTarget>>, layout: Layout) -> Re
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub async fn post(
|
pub async fn post(
|
||||||
State(pool): State<PgPool>,
|
State(db): State<PgPool>,
|
||||||
State(mailer): State<SmtpTransport>,
|
State(mailer): State<SmtpTransport>,
|
||||||
State(config): State<Config>,
|
State(config): State<Config>,
|
||||||
mut auth: AuthSession,
|
mut auth: AuthSession,
|
||||||
@ -80,7 +80,7 @@ pub async fn post(
|
|||||||
));
|
));
|
||||||
}
|
}
|
||||||
let user = match User::create(
|
let user = match User::create(
|
||||||
&pool,
|
&db,
|
||||||
CreateUser {
|
CreateUser {
|
||||||
email: register.email.clone(),
|
email: register.email.clone(),
|
||||||
password: register.password.clone(),
|
password: register.password.clone(),
|
||||||
@ -144,7 +144,7 @@ pub async fn post(
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
send_confirmation_email(pool, mailer, config, user.clone());
|
send_confirmation_email(db, mailer, config, user.clone());
|
||||||
|
|
||||||
auth.login(&user)
|
auth.login(&user)
|
||||||
.await
|
.await
|
||||||
|
@ -126,14 +126,14 @@ pub fn reset_password_page(
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub async fn get(
|
pub async fn get(
|
||||||
State(pool): State<PgPool>,
|
State(db): State<PgPool>,
|
||||||
hx_target: Option<TypedHeader<HXTarget>>,
|
hx_target: Option<TypedHeader<HXTarget>>,
|
||||||
layout: Layout,
|
layout: Layout,
|
||||||
query: Query<ResetPasswordQuery>,
|
query: Query<ResetPasswordQuery>,
|
||||||
) -> Result<Response> {
|
) -> Result<Response> {
|
||||||
if let Some(token_id) = query.token_id {
|
if let Some(token_id) = query.token_id {
|
||||||
info!(token_id = %token_id.as_uuid(), "get with token_id");
|
info!(token_id = %token_id.as_uuid(), "get with token_id");
|
||||||
let token = match UserPasswordResetToken::get(&pool, token_id.as_uuid()).await {
|
let token = match UserPasswordResetToken::get(&db, token_id.as_uuid()).await {
|
||||||
Ok(token) => token,
|
Ok(token) => token,
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
if let Error::NotFoundUuid(_, _) = err {
|
if let Error::NotFoundUuid(_, _) = err {
|
||||||
@ -158,7 +158,7 @@ pub async fn get(
|
|||||||
}))
|
}))
|
||||||
} else {
|
} else {
|
||||||
info!(token_id = %token.token_id, "token valid, showing reset password form");
|
info!(token_id = %token.token_id, "token valid, showing reset password form");
|
||||||
let user = User::get(&pool, token.user_id).await?;
|
let user = User::get(&db, token.user_id).await?;
|
||||||
Ok(reset_password_page(ResetPasswordPageProps {
|
Ok(reset_password_page(ResetPasswordPageProps {
|
||||||
hx_target,
|
hx_target,
|
||||||
layout,
|
layout,
|
||||||
@ -181,7 +181,7 @@ pub async fn get(
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub async fn post(
|
pub async fn post(
|
||||||
State(pool): State<PgPool>,
|
State(db): State<PgPool>,
|
||||||
State(mailer): State<SmtpTransport>,
|
State(mailer): State<SmtpTransport>,
|
||||||
State(config): State<Config>,
|
State(config): State<Config>,
|
||||||
SecureClientIp(ip): SecureClientIp,
|
SecureClientIp(ip): SecureClientIp,
|
||||||
@ -203,7 +203,7 @@ pub async fn post(
|
|||||||
..Default::default()
|
..Default::default()
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
let token = match UserPasswordResetToken::get(&pool, reset_password.token).await {
|
let token = match UserPasswordResetToken::get(&db, reset_password.token).await {
|
||||||
Ok(token) => token,
|
Ok(token) => token,
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
if let Error::NotFoundUuid(_, _) = err {
|
if let Error::NotFoundUuid(_, _) = err {
|
||||||
@ -241,7 +241,7 @@ pub async fn post(
|
|||||||
..Default::default()
|
..Default::default()
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
let user = match User::get(&pool, token.user_id).await {
|
let user = match User::get(&db, token.user_id).await {
|
||||||
Ok(user) => user,
|
Ok(user) => user,
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
if let Error::NotFoundString(_, _) = err {
|
if let Error::NotFoundString(_, _) = err {
|
||||||
@ -266,7 +266,7 @@ pub async fn post(
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
info!(user_id = %user.user_id, "user exists with verified email, resetting password");
|
info!(user_id = %user.user_id, "user exists with verified email, resetting password");
|
||||||
let mut tx = pool.begin().await?;
|
let mut tx = db.begin().await?;
|
||||||
UserPasswordResetToken::delete(tx.as_mut(), reset_password.token).await?;
|
UserPasswordResetToken::delete(tx.as_mut(), reset_password.token).await?;
|
||||||
let user = match user
|
let user = match user
|
||||||
.update_password(
|
.update_password(
|
||||||
|
68
src/jobs/crawl_entry.rs
Normal file
68
src/jobs/crawl_entry.rs
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
use std::fs;
|
||||||
|
use std::path::Path;
|
||||||
|
|
||||||
|
use ammonia::clean;
|
||||||
|
use anyhow::{anyhow, Result};
|
||||||
|
use apalis::prelude::*;
|
||||||
|
use bytes::Buf;
|
||||||
|
use fred::prelude::*;
|
||||||
|
use readability::extractor;
|
||||||
|
use reqwest::Client;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use sqlx::PgPool;
|
||||||
|
use tracing::{info, instrument};
|
||||||
|
use url::Url;
|
||||||
|
use uuid::Uuid;
|
||||||
|
|
||||||
|
use crate::config::Config;
|
||||||
|
use crate::domain_request_limiter::DomainRequestLimiter;
|
||||||
|
use crate::models::entry::Entry;
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize, Serialize, Clone)]
|
||||||
|
pub struct CrawlEntryJob {
|
||||||
|
pub entry_id: Uuid,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[instrument(skip_all, fields(entry_id = %entry_id))]
|
||||||
|
pub async fn crawl_entry(
|
||||||
|
CrawlEntryJob { entry_id }: CrawlEntryJob,
|
||||||
|
http_client: Data<Client>,
|
||||||
|
db: Data<PgPool>,
|
||||||
|
domain_request_limiter: Data<DomainRequestLimiter>,
|
||||||
|
config: Data<Config>,
|
||||||
|
redis: Data<RedisPool>,
|
||||||
|
) -> Result<()> {
|
||||||
|
let entry = Entry::get(&*db, entry_id).await?;
|
||||||
|
info!("got entry from db");
|
||||||
|
let content_dir = Path::new(&*config.content_dir);
|
||||||
|
let url = Url::parse(&entry.url)?;
|
||||||
|
let domain = url
|
||||||
|
.domain()
|
||||||
|
.ok_or(anyhow!("invalid url: {:?}", entry.url.clone()))?;
|
||||||
|
info!(url=%url, "starting fetch");
|
||||||
|
domain_request_limiter.acquire(domain).await?;
|
||||||
|
let bytes = http_client.get(url.clone()).send().await?.bytes().await?;
|
||||||
|
info!(url=%url, "fetched entry");
|
||||||
|
let article = extractor::extract(&mut bytes.reader(), &url)?;
|
||||||
|
info!("extracted content");
|
||||||
|
let id = entry.entry_id;
|
||||||
|
// TODO: update entry with scraped data
|
||||||
|
// if let Some(date) = article.date {
|
||||||
|
// // prefer scraped date over rss feed date
|
||||||
|
// let mut updated_entry = entry.clone();
|
||||||
|
// updated_entry.published_at = date;
|
||||||
|
// entry = update_entry(&self.pool, updated_entry)
|
||||||
|
// .await
|
||||||
|
// .map_err(|_| EntryCrawlerError::CreateEntryError(entry.url.clone()))?;
|
||||||
|
// };
|
||||||
|
let content = clean(&article.content);
|
||||||
|
info!("sanitized content");
|
||||||
|
fs::write(content_dir.join(format!("{}.html", id)), content)?;
|
||||||
|
fs::write(content_dir.join(format!("{}.txt", id)), article.text)?;
|
||||||
|
info!("saved content to filesystem");
|
||||||
|
redis
|
||||||
|
.next()
|
||||||
|
.publish("entries", entry_id.to_string())
|
||||||
|
.await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
189
src/jobs/crawl_feed.rs
Normal file
189
src/jobs/crawl_feed.rs
Normal file
@ -0,0 +1,189 @@
|
|||||||
|
use std::cmp::Ordering;
|
||||||
|
|
||||||
|
use anyhow::{anyhow, Result};
|
||||||
|
use apalis::prelude::*;
|
||||||
|
use apalis_redis::RedisStorage;
|
||||||
|
use chrono::{Duration, Utc};
|
||||||
|
use feed_rs::parser;
|
||||||
|
use fred::prelude::*;
|
||||||
|
use http::{header, HeaderMap, StatusCode};
|
||||||
|
use reqwest::Client;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use sqlx::PgPool;
|
||||||
|
use tracing::{info, info_span, instrument, warn};
|
||||||
|
use url::Url;
|
||||||
|
use uuid::Uuid;
|
||||||
|
|
||||||
|
use crate::domain_request_limiter::DomainRequestLimiter;
|
||||||
|
use crate::jobs::{AsyncJob, CrawlEntryJob};
|
||||||
|
use crate::models::entry::{CreateEntry, Entry};
|
||||||
|
use crate::models::feed::{Feed, MAX_CRAWL_INTERVAL_MINUTES, MIN_CRAWL_INTERVAL_MINUTES};
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize, Serialize, Clone)]
|
||||||
|
pub struct CrawlFeedJob {
|
||||||
|
pub feed_id: Uuid,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[instrument(skip_all, fields(feed_id = %feed_id))]
|
||||||
|
pub async fn crawl_feed(
|
||||||
|
CrawlFeedJob { feed_id }: CrawlFeedJob,
|
||||||
|
http_client: Data<Client>,
|
||||||
|
db: Data<PgPool>,
|
||||||
|
domain_request_limiter: Data<DomainRequestLimiter>,
|
||||||
|
apalis: Data<RedisStorage<AsyncJob>>,
|
||||||
|
redis: Data<RedisPool>,
|
||||||
|
) -> Result<()> {
|
||||||
|
let mut feed = Feed::get(&*db, feed_id).await?;
|
||||||
|
info!("got feed from db");
|
||||||
|
let url = Url::parse(&feed.url)?;
|
||||||
|
let domain = url
|
||||||
|
.domain()
|
||||||
|
.ok_or(anyhow!("invalid url: {:?}", feed.url.clone()))?;
|
||||||
|
let mut headers = HeaderMap::new();
|
||||||
|
if let Some(etag) = &feed.etag_header {
|
||||||
|
if let Ok(etag) = etag.parse() {
|
||||||
|
headers.insert(header::IF_NONE_MATCH, etag);
|
||||||
|
} else {
|
||||||
|
warn!(%etag, "failed to parse saved etag header");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if let Some(last_modified) = &feed.last_modified_header {
|
||||||
|
if let Ok(last_modified) = last_modified.parse() {
|
||||||
|
headers.insert(header::IF_MODIFIED_SINCE, last_modified);
|
||||||
|
} else {
|
||||||
|
warn!(
|
||||||
|
%last_modified,
|
||||||
|
"failed to parse saved last_modified header",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
info!(url=%url, "starting fetch");
|
||||||
|
domain_request_limiter.acquire(domain).await?;
|
||||||
|
let resp = http_client.get(url.clone()).headers(headers).send().await?;
|
||||||
|
let headers = resp.headers();
|
||||||
|
if let Some(etag) = headers.get(header::ETAG) {
|
||||||
|
if let Ok(etag) = etag.to_str() {
|
||||||
|
feed.etag_header = Some(etag.to_string());
|
||||||
|
} else {
|
||||||
|
warn!(?etag, "failed to convert response etag header to string");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if let Some(last_modified) = headers.get(header::LAST_MODIFIED) {
|
||||||
|
if let Ok(last_modified) = last_modified.to_str() {
|
||||||
|
feed.last_modified_header = Some(last_modified.to_string());
|
||||||
|
} else {
|
||||||
|
warn!(
|
||||||
|
?last_modified,
|
||||||
|
"failed to convert response last_modified header to string",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
info!(url=%url, "fetched feed");
|
||||||
|
if resp.status() == StatusCode::NOT_MODIFIED {
|
||||||
|
info!("feed returned not modified status");
|
||||||
|
feed.last_crawled_at = Some(Utc::now());
|
||||||
|
feed.last_crawl_error = None;
|
||||||
|
feed.save(&*db).await?;
|
||||||
|
info!("updated feed in db");
|
||||||
|
return Ok(());
|
||||||
|
} else if !resp.status().is_success() {
|
||||||
|
warn!("feed returned non-successful status");
|
||||||
|
feed.last_crawled_at = Some(Utc::now());
|
||||||
|
feed.last_crawl_error = resp.status().canonical_reason().map(|s| s.to_string());
|
||||||
|
feed.save(&*db).await?;
|
||||||
|
info!("updated feed in db");
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
let bytes = resp.bytes().await?;
|
||||||
|
|
||||||
|
let parsed_feed = parser::parse(&bytes[..])?;
|
||||||
|
info!("parsed feed");
|
||||||
|
feed.url = url.to_string();
|
||||||
|
feed.feed_type = parsed_feed.feed_type.into();
|
||||||
|
feed.last_crawled_at = Some(Utc::now());
|
||||||
|
feed.last_crawl_error = None;
|
||||||
|
if let Some(title) = parsed_feed.title {
|
||||||
|
feed.title = Some(title.content);
|
||||||
|
}
|
||||||
|
if let Some(description) = parsed_feed.description {
|
||||||
|
feed.description = Some(description.content);
|
||||||
|
}
|
||||||
|
let last_entry_published_at = parsed_feed.entries.iter().filter_map(|e| e.published).max();
|
||||||
|
if let Some(prev_last_entry_published_at) = feed.last_entry_published_at {
|
||||||
|
if let Some(published_at) = last_entry_published_at {
|
||||||
|
let time_since_last_entry = if published_at == prev_last_entry_published_at {
|
||||||
|
// No new entry since last crawl, compare current time to last publish instead
|
||||||
|
Utc::now() - prev_last_entry_published_at
|
||||||
|
} else {
|
||||||
|
// Compare new entry publish time to previous publish time
|
||||||
|
published_at - prev_last_entry_published_at
|
||||||
|
};
|
||||||
|
match time_since_last_entry.cmp(&Duration::minutes(feed.crawl_interval_minutes.into()))
|
||||||
|
{
|
||||||
|
Ordering::Greater => {
|
||||||
|
feed.crawl_interval_minutes = i32::max(
|
||||||
|
(feed.crawl_interval_minutes as f32 * 1.2).ceil() as i32,
|
||||||
|
MAX_CRAWL_INTERVAL_MINUTES,
|
||||||
|
);
|
||||||
|
info!(
|
||||||
|
interval = feed.crawl_interval_minutes,
|
||||||
|
"increased crawl interval"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
Ordering::Less => {
|
||||||
|
feed.crawl_interval_minutes = i32::max(
|
||||||
|
(feed.crawl_interval_minutes as f32 / 1.2).ceil() as i32,
|
||||||
|
MIN_CRAWL_INTERVAL_MINUTES,
|
||||||
|
);
|
||||||
|
info!(
|
||||||
|
interval = feed.crawl_interval_minutes,
|
||||||
|
"decreased crawl interval"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
Ordering::Equal => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
feed.last_entry_published_at = last_entry_published_at;
|
||||||
|
let feed = feed.save(&*db).await?;
|
||||||
|
info!("updated feed in db");
|
||||||
|
|
||||||
|
let mut payload = Vec::with_capacity(parsed_feed.entries.len());
|
||||||
|
for entry in parsed_feed.entries {
|
||||||
|
let entry_span = info_span!("entry", id = entry.id);
|
||||||
|
let _entry_span_guard = entry_span.enter();
|
||||||
|
if let Some(link) = entry.links.first() {
|
||||||
|
// if no scraped or feed date is available, fallback to the current time
|
||||||
|
let published_at = entry.published.unwrap_or_else(Utc::now);
|
||||||
|
let entry = CreateEntry {
|
||||||
|
title: entry.title.map(|t| t.content),
|
||||||
|
url: link.href.clone(),
|
||||||
|
description: entry.summary.map(|s| s.content),
|
||||||
|
feed_id: feed.feed_id,
|
||||||
|
published_at,
|
||||||
|
};
|
||||||
|
payload.push(entry);
|
||||||
|
} else {
|
||||||
|
warn!("skipping feed entry with no links");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let entries = Entry::bulk_upsert(&*db, payload).await?;
|
||||||
|
let (new, updated) = entries
|
||||||
|
.into_iter()
|
||||||
|
.partition::<Vec<_>, _>(|entry| entry.updated_at.is_none());
|
||||||
|
info!(new = new.len(), updated = updated.len(), "saved entries");
|
||||||
|
|
||||||
|
for entry in new {
|
||||||
|
(*apalis)
|
||||||
|
.clone() // TODO: clone bad?
|
||||||
|
.push(AsyncJob::CrawlEntry(CrawlEntryJob {
|
||||||
|
entry_id: entry.entry_id,
|
||||||
|
}))
|
||||||
|
.await?;
|
||||||
|
}
|
||||||
|
|
||||||
|
redis.next().publish("feeds", feed_id.to_string()).await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
105
src/jobs/import_opml.rs
Normal file
105
src/jobs/import_opml.rs
Normal file
@ -0,0 +1,105 @@
|
|||||||
|
use std::io::Cursor;
|
||||||
|
|
||||||
|
use anyhow::{anyhow, Context, Result};
|
||||||
|
use apalis::prelude::*;
|
||||||
|
use apalis_redis::RedisStorage;
|
||||||
|
use fred::prelude::*;
|
||||||
|
use opml::OPML;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use sqlx::PgPool;
|
||||||
|
use tracing::{error, instrument, warn};
|
||||||
|
use uuid::Uuid;
|
||||||
|
|
||||||
|
use crate::error::Error;
|
||||||
|
use crate::jobs::crawl_feed::CrawlFeedJob;
|
||||||
|
use crate::jobs::AsyncJob;
|
||||||
|
use crate::models::feed::{CreateFeed, Feed};
|
||||||
|
use crate::uuid::Base62Uuid;
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize, Serialize, Clone)]
|
||||||
|
pub struct ImportOpmlJob {
|
||||||
|
pub import_id: Uuid,
|
||||||
|
pub file_name: Option<String>,
|
||||||
|
pub bytes: Vec<u8>,
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: send messages over redis channel
|
||||||
|
/// `ImporterOpmlMessage::Import` contains the result of importing the OPML file.
|
||||||
|
// #[allow(clippy::large_enum_variant)]
|
||||||
|
// #[derive(Debug, Clone)]
|
||||||
|
// pub enum ImporterOpmlMessage {
|
||||||
|
// Import(ImporterResult<()>),
|
||||||
|
// CreateFeedError(String),
|
||||||
|
// AlreadyImported(String),
|
||||||
|
// CrawlScheduler(CrawlSchedulerHandleMessage),
|
||||||
|
// }
|
||||||
|
|
||||||
|
#[instrument(skip_all, fields(import_id = %import_id))]
|
||||||
|
pub async fn import_opml(
|
||||||
|
ImportOpmlJob {
|
||||||
|
import_id,
|
||||||
|
file_name,
|
||||||
|
bytes,
|
||||||
|
}: ImportOpmlJob,
|
||||||
|
db: Data<PgPool>,
|
||||||
|
apalis: Data<RedisStorage<AsyncJob>>,
|
||||||
|
redis: Data<RedisPool>,
|
||||||
|
) -> Result<()> {
|
||||||
|
let document = OPML::from_reader(&mut Cursor::new(bytes)).with_context(|| {
|
||||||
|
format!(
|
||||||
|
"Failed to read OPML file for import {} from file {}",
|
||||||
|
Base62Uuid::from(import_id),
|
||||||
|
file_name
|
||||||
|
.map(|n| n.to_string())
|
||||||
|
.unwrap_or_else(|| "unknown".to_string())
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
for url in gather_feed_urls(document.body.outlines) {
|
||||||
|
let feed = Feed::create(
|
||||||
|
&*db,
|
||||||
|
CreateFeed {
|
||||||
|
url: url.clone(),
|
||||||
|
..Default::default()
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
match feed {
|
||||||
|
Ok(feed) => {
|
||||||
|
(*apalis)
|
||||||
|
.clone()
|
||||||
|
.push(AsyncJob::CrawlFeed(CrawlFeedJob {
|
||||||
|
feed_id: feed.feed_id,
|
||||||
|
}))
|
||||||
|
.await?;
|
||||||
|
}
|
||||||
|
Err(Error::Sqlx(sqlx::error::Error::Database(err))) => {
|
||||||
|
if err.is_unique_violation() {
|
||||||
|
// let _ = respond_to.send(ImporterHandleMessage::AlreadyImported(url));
|
||||||
|
warn!("Feed {} already imported", url);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(err) => {
|
||||||
|
// let _ = respond_to.send(ImporterHandleMessage::CreateFeedError(url));
|
||||||
|
error!("Failed to create feed for {}", url);
|
||||||
|
return Err(anyhow!(err));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
redis
|
||||||
|
.next()
|
||||||
|
.publish("imports", import_id.to_string())
|
||||||
|
.await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn gather_feed_urls(outlines: Vec<opml::Outline>) -> Vec<String> {
|
||||||
|
let mut urls = Vec::new();
|
||||||
|
for outline in outlines.into_iter() {
|
||||||
|
if let Some(url) = outline.xml_url {
|
||||||
|
urls.push(url);
|
||||||
|
}
|
||||||
|
urls.append(&mut gather_feed_urls(outline.outlines));
|
||||||
|
}
|
||||||
|
urls
|
||||||
|
}
|
70
src/jobs/mod.rs
Normal file
70
src/jobs/mod.rs
Normal file
@ -0,0 +1,70 @@
|
|||||||
|
use apalis::prelude::*;
|
||||||
|
use apalis_redis::RedisStorage;
|
||||||
|
use fred::prelude::*;
|
||||||
|
use reqwest::Client;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use sqlx::PgPool;
|
||||||
|
use thiserror::Error;
|
||||||
|
use tracing::{error, info, instrument};
|
||||||
|
|
||||||
|
mod crawl_entry;
|
||||||
|
mod crawl_feed;
|
||||||
|
mod import_opml;
|
||||||
|
|
||||||
|
pub use crawl_entry::CrawlEntryJob;
|
||||||
|
pub use crawl_feed::CrawlFeedJob;
|
||||||
|
pub use import_opml::ImportOpmlJob;
|
||||||
|
|
||||||
|
use crate::{config::Config, domain_request_limiter::DomainRequestLimiter};
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize, Serialize, Clone)]
|
||||||
|
pub enum AsyncJob {
|
||||||
|
HelloWorld(String),
|
||||||
|
CrawlFeed(CrawlFeedJob),
|
||||||
|
CrawlEntry(CrawlEntryJob),
|
||||||
|
ImportOpml(ImportOpmlJob),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Error)]
|
||||||
|
pub enum AsyncJobError {
|
||||||
|
#[error("error executing job")]
|
||||||
|
JobError(#[from] anyhow::Error),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[instrument(skip_all, fields(worker_id = ?worker_id, task_id = ?task_id))]
|
||||||
|
pub async fn handle_async_job(
|
||||||
|
job: AsyncJob,
|
||||||
|
worker_id: Data<WorkerId>,
|
||||||
|
task_id: Data<TaskId>,
|
||||||
|
http_client: Data<Client>,
|
||||||
|
db: Data<PgPool>,
|
||||||
|
domain_request_limiter: Data<DomainRequestLimiter>,
|
||||||
|
config: Data<Config>,
|
||||||
|
apalis: Data<RedisStorage<AsyncJob>>,
|
||||||
|
redis: Data<RedisPool>,
|
||||||
|
) -> Result<(), AsyncJobError> {
|
||||||
|
let result = match job {
|
||||||
|
AsyncJob::HelloWorld(name) => {
|
||||||
|
info!("Hello, {}!", name);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
AsyncJob::CrawlFeed(job) => {
|
||||||
|
crawl_feed::crawl_feed(job, http_client, db, domain_request_limiter, apalis, redis)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
AsyncJob::CrawlEntry(job) => {
|
||||||
|
crawl_entry::crawl_entry(job, http_client, db, domain_request_limiter, config, redis)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
AsyncJob::ImportOpml(job) => import_opml::import_opml(job, db, apalis, redis).await,
|
||||||
|
};
|
||||||
|
|
||||||
|
match result {
|
||||||
|
Ok(_) => info!("Job completed successfully"),
|
||||||
|
Err(err) => {
|
||||||
|
error!("Job failed: {err:?}");
|
||||||
|
return Err(AsyncJobError::JobError(err));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
Ok(())
|
||||||
|
}
|
@ -3,10 +3,12 @@ pub mod api_response;
|
|||||||
pub mod auth;
|
pub mod auth;
|
||||||
pub mod config;
|
pub mod config;
|
||||||
pub mod domain_locks;
|
pub mod domain_locks;
|
||||||
|
pub mod domain_request_limiter;
|
||||||
pub mod error;
|
pub mod error;
|
||||||
pub mod handlers;
|
pub mod handlers;
|
||||||
pub mod headers;
|
pub mod headers;
|
||||||
pub mod htmx;
|
pub mod htmx;
|
||||||
|
pub mod jobs;
|
||||||
pub mod log;
|
pub mod log;
|
||||||
pub mod mailers;
|
pub mod mailers;
|
||||||
pub mod models;
|
pub mod models;
|
||||||
|
14
src/log.rs
14
src/log.rs
@ -91,3 +91,17 @@ pub fn init_tracing(
|
|||||||
.init();
|
.init();
|
||||||
Ok((file_writer_guard, mem_writer_guard))
|
Ok((file_writer_guard, mem_writer_guard))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn init_worker_tracing() -> Result<WorkerGuard> {
|
||||||
|
let stdout_layer = tracing_subscriber::fmt::layer().pretty();
|
||||||
|
let filter_layer = EnvFilter::from_default_env();
|
||||||
|
let file_appender = tracing_appender::rolling::hourly("./logs", "log");
|
||||||
|
let (file_writer, file_writer_guard) = tracing_appender::non_blocking(file_appender);
|
||||||
|
let file_writer_layer = tracing_subscriber::fmt::layer().with_writer(file_writer);
|
||||||
|
tracing_subscriber::registry()
|
||||||
|
.with(filter_layer)
|
||||||
|
.with(stdout_layer)
|
||||||
|
.with(file_writer_layer)
|
||||||
|
.init();
|
||||||
|
Ok(file_writer_guard)
|
||||||
|
}
|
||||||
|
@ -17,7 +17,7 @@ use crate::uuid::Base62Uuid;
|
|||||||
// TODO: put in config
|
// TODO: put in config
|
||||||
const USER_EMAIL_VERIFICATION_TOKEN_EXPIRATION: Duration = Duration::from_secs(24 * 60 * 60);
|
const USER_EMAIL_VERIFICATION_TOKEN_EXPIRATION: Duration = Duration::from_secs(24 * 60 * 60);
|
||||||
|
|
||||||
pub fn send_confirmation_email(pool: PgPool, mailer: SmtpTransport, config: Config, user: User) {
|
pub fn send_confirmation_email(db: PgPool, mailer: SmtpTransport, config: Config, user: User) {
|
||||||
tokio::spawn(async move {
|
tokio::spawn(async move {
|
||||||
let user_email_address = match user.email.parse() {
|
let user_email_address = match user.email.parse() {
|
||||||
Ok(address) => address,
|
Ok(address) => address,
|
||||||
@ -28,7 +28,7 @@ pub fn send_confirmation_email(pool: PgPool, mailer: SmtpTransport, config: Conf
|
|||||||
};
|
};
|
||||||
let mailbox = Mailbox::new(user.name.clone(), user_email_address);
|
let mailbox = Mailbox::new(user.name.clone(), user_email_address);
|
||||||
let token = match UserEmailVerificationToken::create(
|
let token = match UserEmailVerificationToken::create(
|
||||||
&pool,
|
&db,
|
||||||
CreateUserEmailVerificationToken {
|
CreateUserEmailVerificationToken {
|
||||||
user_id: user.user_id,
|
user_id: user.user_id,
|
||||||
expires_at: Utc::now() + USER_EMAIL_VERIFICATION_TOKEN_EXPIRATION,
|
expires_at: Utc::now() + USER_EMAIL_VERIFICATION_TOKEN_EXPIRATION,
|
||||||
@ -42,11 +42,10 @@ pub fn send_confirmation_email(pool: PgPool, mailer: SmtpTransport, config: Conf
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
let mut confirm_link = config
|
let mut confirm_link = config.public_url.clone();
|
||||||
.public_url
|
|
||||||
.clone();
|
|
||||||
confirm_link.set_path("confirm-email");
|
confirm_link.set_path("confirm-email");
|
||||||
confirm_link.query_pairs_mut()
|
confirm_link
|
||||||
|
.query_pairs_mut()
|
||||||
.append_pair("token_id", &Base62Uuid::from(token.token_id).to_string());
|
.append_pair("token_id", &Base62Uuid::from(token.token_id).to_string());
|
||||||
let confirm_link = confirm_link.as_str();
|
let confirm_link = confirm_link.as_str();
|
||||||
|
|
||||||
|
@ -18,7 +18,7 @@ use crate::uuid::Base62Uuid;
|
|||||||
const PASSWORD_RESET_TOKEN_EXPIRATION: Duration = Duration::from_secs(24 * 60 * 60);
|
const PASSWORD_RESET_TOKEN_EXPIRATION: Duration = Duration::from_secs(24 * 60 * 60);
|
||||||
|
|
||||||
pub fn send_forgot_password_email(
|
pub fn send_forgot_password_email(
|
||||||
pool: PgPool,
|
db: PgPool,
|
||||||
mailer: SmtpTransport,
|
mailer: SmtpTransport,
|
||||||
config: Config,
|
config: Config,
|
||||||
user: User,
|
user: User,
|
||||||
@ -35,7 +35,7 @@ pub fn send_forgot_password_email(
|
|||||||
};
|
};
|
||||||
let mailbox = Mailbox::new(user.name.clone(), user_email_address);
|
let mailbox = Mailbox::new(user.name.clone(), user_email_address);
|
||||||
let token = match UserPasswordResetToken::create(
|
let token = match UserPasswordResetToken::create(
|
||||||
&pool,
|
&db,
|
||||||
CreatePasswordResetToken {
|
CreatePasswordResetToken {
|
||||||
token_id: Uuid::new_v4(), // cyptographically-secure random uuid
|
token_id: Uuid::new_v4(), // cyptographically-secure random uuid
|
||||||
user_id: user.user_id,
|
user_id: user.user_id,
|
||||||
|
@ -32,7 +32,7 @@ impl UserPasswordResetToken {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub async fn get(
|
pub async fn get(
|
||||||
pool: impl Executor<'_, Database = Postgres>,
|
db: impl Executor<'_, Database = Postgres>,
|
||||||
token_id: Uuid,
|
token_id: Uuid,
|
||||||
) -> Result<UserPasswordResetToken> {
|
) -> Result<UserPasswordResetToken> {
|
||||||
sqlx::query_as!(
|
sqlx::query_as!(
|
||||||
@ -43,7 +43,7 @@ impl UserPasswordResetToken {
|
|||||||
where token_id = $1"#,
|
where token_id = $1"#,
|
||||||
token_id
|
token_id
|
||||||
)
|
)
|
||||||
.fetch_one(pool)
|
.fetch_one(db)
|
||||||
.await
|
.await
|
||||||
.map_err(|error| {
|
.map_err(|error| {
|
||||||
if let sqlx::error::Error::RowNotFound = error {
|
if let sqlx::error::Error::RowNotFound = error {
|
||||||
@ -54,7 +54,7 @@ impl UserPasswordResetToken {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub async fn create(
|
pub async fn create(
|
||||||
pool: impl Executor<'_, Database = Postgres>,
|
db: impl Executor<'_, Database = Postgres>,
|
||||||
payload: CreatePasswordResetToken,
|
payload: CreatePasswordResetToken,
|
||||||
) -> Result<UserPasswordResetToken> {
|
) -> Result<UserPasswordResetToken> {
|
||||||
Ok(sqlx::query_as!(
|
Ok(sqlx::query_as!(
|
||||||
@ -70,20 +70,17 @@ impl UserPasswordResetToken {
|
|||||||
payload.request_ip,
|
payload.request_ip,
|
||||||
payload.expires_at
|
payload.expires_at
|
||||||
)
|
)
|
||||||
.fetch_one(pool)
|
.fetch_one(db)
|
||||||
.await?)
|
.await?)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn delete(
|
pub async fn delete(db: impl Executor<'_, Database = Postgres>, token_id: Uuid) -> Result<()> {
|
||||||
pool: impl Executor<'_, Database = Postgres>,
|
|
||||||
token_id: Uuid,
|
|
||||||
) -> Result<()> {
|
|
||||||
sqlx::query!(
|
sqlx::query!(
|
||||||
r#"delete from user_password_reset_token
|
r#"delete from user_password_reset_token
|
||||||
where token_id = $1"#,
|
where token_id = $1"#,
|
||||||
token_id
|
token_id
|
||||||
)
|
)
|
||||||
.execute(pool)
|
.execute(db)
|
||||||
.await?;
|
.await?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
31
src/state.rs
31
src/state.rs
@ -1,18 +1,22 @@
|
|||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use apalis_redis::RedisStorage;
|
||||||
use axum::extract::FromRef;
|
use axum::extract::FromRef;
|
||||||
use bytes::Bytes;
|
use bytes::Bytes;
|
||||||
|
use fred::clients::RedisPool;
|
||||||
use lettre::SmtpTransport;
|
use lettre::SmtpTransport;
|
||||||
use reqwest::Client;
|
use reqwest::Client;
|
||||||
use sqlx::PgPool;
|
use sqlx::PgPool;
|
||||||
use tokio::sync::{broadcast, watch, Mutex};
|
use tokio::sync::{broadcast, watch, Mutex};
|
||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
|
|
||||||
use crate::actors::importer::{ImporterHandle, ImporterHandleMessage};
|
|
||||||
use crate::actors::crawl_scheduler::{CrawlSchedulerHandle, CrawlSchedulerHandleMessage};
|
use crate::actors::crawl_scheduler::{CrawlSchedulerHandle, CrawlSchedulerHandleMessage};
|
||||||
|
use crate::actors::importer::{ImporterHandle, ImporterHandleMessage};
|
||||||
use crate::config::Config;
|
use crate::config::Config;
|
||||||
use crate::domain_locks::DomainLocks;
|
use crate::domain_locks::DomainLocks;
|
||||||
|
use crate::domain_request_limiter::DomainRequestLimiter;
|
||||||
|
use crate::jobs::AsyncJob;
|
||||||
|
|
||||||
/// A map of feed IDs to a channel receiver for the active `CrawlScheduler` running a feed crawl
|
/// A map of feed IDs to a channel receiver for the active `CrawlScheduler` running a feed crawl
|
||||||
/// for that feed.
|
/// for that feed.
|
||||||
@ -39,21 +43,24 @@ pub type Imports = Arc<Mutex<HashMap<Uuid, broadcast::Receiver<ImporterHandleMes
|
|||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct AppState {
|
pub struct AppState {
|
||||||
pub pool: PgPool,
|
pub db: PgPool,
|
||||||
pub config: Config,
|
pub config: Config,
|
||||||
pub log_receiver: watch::Receiver<Bytes>,
|
pub log_receiver: watch::Receiver<Bytes>,
|
||||||
pub crawls: Crawls,
|
pub crawls: Crawls,
|
||||||
pub domain_locks: DomainLocks,
|
pub domain_locks: DomainLocks,
|
||||||
|
pub domain_request_limiter: DomainRequestLimiter,
|
||||||
pub client: Client,
|
pub client: Client,
|
||||||
pub crawl_scheduler: CrawlSchedulerHandle,
|
pub crawl_scheduler: CrawlSchedulerHandle,
|
||||||
pub importer: ImporterHandle,
|
pub importer: ImporterHandle,
|
||||||
pub imports: Imports,
|
pub imports: Imports,
|
||||||
pub mailer: SmtpTransport,
|
pub mailer: SmtpTransport,
|
||||||
|
pub apalis: RedisStorage<AsyncJob>,
|
||||||
|
pub redis: RedisPool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl FromRef<AppState> for PgPool {
|
impl FromRef<AppState> for PgPool {
|
||||||
fn from_ref(state: &AppState) -> Self {
|
fn from_ref(state: &AppState) -> Self {
|
||||||
state.pool.clone()
|
state.db.clone()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -81,6 +88,12 @@ impl FromRef<AppState> for DomainLocks {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl FromRef<AppState> for DomainRequestLimiter {
|
||||||
|
fn from_ref(state: &AppState) -> Self {
|
||||||
|
state.domain_request_limiter.clone()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl FromRef<AppState> for Client {
|
impl FromRef<AppState> for Client {
|
||||||
fn from_ref(state: &AppState) -> Self {
|
fn from_ref(state: &AppState) -> Self {
|
||||||
state.client.clone()
|
state.client.clone()
|
||||||
@ -110,3 +123,15 @@ impl FromRef<AppState> for SmtpTransport {
|
|||||||
state.mailer.clone()
|
state.mailer.clone()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl FromRef<AppState> for RedisStorage<AsyncJob> {
|
||||||
|
fn from_ref(state: &AppState) -> Self {
|
||||||
|
state.apalis.clone()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FromRef<AppState> for RedisPool {
|
||||||
|
fn from_ref(state: &AppState) -> Self {
|
||||||
|
state.redis.clone()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user