WIP fix rar file extraction

Also, starting to break up into smaller functions
This commit is contained in:
Tyler Hallada 2021-07-03 16:00:18 -04:00
parent d6b8f4e74a
commit 421f2b7071
4 changed files with 310 additions and 55 deletions

158
Cargo.lock generated
View File

@ -75,7 +75,7 @@ version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "616896e05fc0e2649463a93a15183c6a16bf03413a7af88ef1285ddedfa9cda5"
dependencies = [
"num-traits",
"num-traits 0.2.14",
]
[[package]]
@ -182,7 +182,7 @@ checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73"
dependencies = [
"libc",
"num-integer",
"num-traits",
"num-traits 0.2.14",
"serde",
"time",
"winapi",
@ -399,6 +399,15 @@ dependencies = [
"cfg-if",
]
[[package]]
name = "enum_primitive"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "be4551092f4d519593039259a9ed8daedf0da12e5109c5280338073eaeb81180"
dependencies = [
"num-traits 0.1.43",
]
[[package]]
name = "flate2"
version = "1.0.20"
@ -442,6 +451,12 @@ dependencies = [
"percent-encoding",
]
[[package]]
name = "fuchsia-cprng"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba"
[[package]]
name = "funty"
version = "1.1.0"
@ -972,6 +987,7 @@ dependencies = [
"tempfile",
"tokio",
"tokio-util",
"unrar",
"zip",
]
@ -1027,6 +1043,42 @@ dependencies = [
"winapi",
]
[[package]]
name = "num"
version = "0.1.42"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4703ad64153382334aa8db57c637364c322d3372e097840c72000dabdcf6156e"
dependencies = [
"num-bigint",
"num-complex",
"num-integer",
"num-iter",
"num-rational",
"num-traits 0.2.14",
]
[[package]]
name = "num-bigint"
version = "0.1.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e63899ad0da84ce718c14936262a41cee2c79c981fc0a0e7c7beb47d5a07e8c1"
dependencies = [
"num-integer",
"num-traits 0.2.14",
"rand 0.4.6",
"rustc-serialize",
]
[[package]]
name = "num-complex"
version = "0.1.43"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b288631d7878aaf59442cffd36910ea604ecd7745c36054328595114001c9656"
dependencies = [
"num-traits 0.2.14",
"rustc-serialize",
]
[[package]]
name = "num-integer"
version = "0.1.44"
@ -1034,7 +1086,39 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d2cc698a63b549a70bc047073d2949cce27cd1c7b0a4a862d08a8031bc2801db"
dependencies = [
"autocfg",
"num-traits",
"num-traits 0.2.14",
]
[[package]]
name = "num-iter"
version = "0.1.42"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b2021c8337a54d21aca0d59a92577a029af9431cb59b909b03252b9c164fad59"
dependencies = [
"autocfg",
"num-integer",
"num-traits 0.2.14",
]
[[package]]
name = "num-rational"
version = "0.1.42"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ee314c74bd753fc86b4780aa9475da469155f3848473a261d2d18e35245a784e"
dependencies = [
"num-bigint",
"num-integer",
"num-traits 0.2.14",
"rustc-serialize",
]
[[package]]
name = "num-traits"
version = "0.1.43"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "92e5113e9fd4cc14ded8e499429f396a20f98c772a47cc8622a736e1ec843c31"
dependencies = [
"num-traits 0.2.14",
]
[[package]]
@ -1272,6 +1356,19 @@ version = "0.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "941ba9d78d8e2f7ce474c015eea4d9c6d25b6a3327f9832ee29a4de27f91bbb8"
[[package]]
name = "rand"
version = "0.4.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "552840b97013b1a26992c11eac34bdd778e464601a4c2054b5f0bff7c6761293"
dependencies = [
"fuchsia-cprng",
"libc",
"rand_core 0.3.1",
"rdrand",
"winapi",
]
[[package]]
name = "rand"
version = "0.7.3"
@ -1318,6 +1415,21 @@ dependencies = [
"rand_core 0.6.2",
]
[[package]]
name = "rand_core"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a6fdeb83b075e8266dcc8762c22776f6877a63111121f5f8c7411e5be7eed4b"
dependencies = [
"rand_core 0.4.2",
]
[[package]]
name = "rand_core"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c33a3c44ca05fa6f1807d8e6743f3824e8509beca625669633be0acbdf509dc"
[[package]]
name = "rand_core"
version = "0.5.1"
@ -1363,6 +1475,15 @@ dependencies = [
"rand_core 0.5.1",
]
[[package]]
name = "rdrand"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2"
dependencies = [
"rand_core 0.3.1",
]
[[package]]
name = "redox_syscall"
version = "0.2.8"
@ -1443,6 +1564,12 @@ dependencies = [
"winreg",
]
[[package]]
name = "rustc-serialize"
version = "0.3.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dcf128d1287d2ea9d80910b5f1120d0b8eede3fbf1abe91c40d39ea7d51e6fda"
[[package]]
name = "ryu"
version = "1.0.5"
@ -2055,6 +2182,31 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e"
[[package]]
name = "unrar"
version = "0.4.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "433cea4f0b7bec88d47becb380887b8786a3cfb1c82e1ef9d32a682ba6801814"
dependencies = [
"bitflags",
"enum_primitive",
"lazy_static",
"num",
"regex",
"unrar_sys",
]
[[package]]
name = "unrar_sys"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0009399408dc0bcc5c8910672544fceceeba18b91f741ff943916e917d982c60"
dependencies = [
"cc",
"libc",
"winapi",
]
[[package]]
name = "url"
version = "2.2.2"

View File

@ -26,5 +26,6 @@ skyrim-cell-dump = "0.1.3"
tempfile = "3.2"
tokio = { version = "1.5.0", features = ["full"] }
tokio-util = { version = "0.6", features = ["compat"] }
unrar = "0.4"
# Need `ZipWriter::append_new` from https://github.com/zip-rs/zip/commit/ce272616ac69b798bb7b0925147a8a710dc2bb65
zip = { git = "https://github.com/zip-rs/zip.git" }

37
README.md Normal file
View File

@ -0,0 +1,37 @@
# modmapper
Downloads mods from nexus, parses the plugins inside, and saves data to a postgres database.
## Development Install
1. Install and run postgres.
2. Create postgres user and database (and add uuid extension while you're there
):
```
createuser modmapper
createdb modmapper
sudo -u postgres -i psql
postgres=# ALTER DATABASE modmapper OWNER TO modmapper;
\password modmapper
# Or, on Windows in PowerShell:
& 'C:\Program Files\PostgreSQL\13\bin\createuser.exe' -U postgres modmapper
& 'C:\Program Files\PostgreSQL\13\bin\createdb.exe' -U postgres modmapper
& 'C:\Program Files\PostgreSQL\13\bin\psql.exe' -U postgres
postgres=# ALTER DATABASE modmapper OWNER TO modmapper;
\password modmapper
```
3. Save password somewhere safe and then and add a `.env` file to the project
directory with the contents:
```
DATABASE_URL=postgresql://modmapper:<password>@localhost/modmapper
```
4. Install
[`sqlx_cli`](https://github.com/launchbadge/sqlx/tree/master/sqlx-cli) with
`cargo install --version=0.1.0-beta.1 sqlx-cli --no-default-features --features postgres`
5. Run `sqlx migrate --source migrations run` which will run all the database migrations.

View File

@ -16,12 +16,14 @@ use sqlx::postgres::PgPoolOptions;
use std::convert::TryInto;
use std::env;
use std::fs::OpenOptions;
use std::io::Read;
use std::io::Seek;
use std::io::SeekFrom;
use tempfile::tempfile;
use tempfile::{tempfile, tempdir};
use tokio::io::{AsyncReadExt, AsyncSeekExt};
use tokio::time::sleep;
use tokio_util::compat::FuturesAsyncReadCompatExt;
use unrar::Archive;
use zip::write::{FileOptions, ZipWriter};
static USER_AGENT: &str = "mod-mapper/0.1";
@ -266,17 +268,18 @@ async fn insert_plugin_cell(
}
fn rate_limit_wait_duration(res: &Response) -> Result<Option<std::time::Duration>> {
dbg!(res.headers().get("x-rl-daily-remaining"));
let daily_remaining = res
.headers()
.get("X-RL-Daily-Remaining")
.expect("No daily limit in response headers");
.get("x-rl-daily-remaining")
.expect("No daily remaining in response headers");
let hourly_remaining = res
.headers()
.get("X-RL-Hourly-Remaining")
.get("x-rl-hourly-remaining")
.expect("No hourly limit in response headers");
let hourly_reset = res
.headers()
.get("X-RL-Hourly-Reset")
.get("x-rl-hourly-reset")
.expect("No hourly reset in response headers");
dbg!(daily_remaining);
dbg!(hourly_remaining);
@ -297,6 +300,66 @@ fn rate_limit_wait_duration(res: &Response) -> Result<Option<std::time::Duration
Ok(None)
}
async fn process_plugin<W>(
plugin_buf: &[u8],
pool: &sqlx::Pool<sqlx::Postgres>,
plugin_archive: &mut ZipWriter<W>,
name: &str,
db_file: &File,
mod_obj: &Mod,
file_id: i64,
file_name: &str,
) -> Result<()>
where W: std::io::Write + std::io::Seek
{
let plugin = parse_plugin(&plugin_buf)?;
let hash = seahash::hash(&plugin_buf);
let plugin_row = insert_plugin(
&pool,
name,
hash as i64,
db_file.id,
Some(plugin.header.version as f64),
plugin.header.author,
plugin.header.description,
Some(
&plugin
.header
.masters
.iter()
.map(|s| s.to_string())
.collect::<Vec<String>>(),
),
)
.await?;
for cell in plugin.cells {
let cell_row = insert_cell(
&pool,
cell.form_id.try_into().unwrap(),
cell.x,
cell.y,
cell.is_persistent,
)
.await?;
insert_plugin_cell(
&pool,
plugin_row.id,
cell_row.id,
cell.editor_id,
)
.await?;
}
plugin_archive.start_file(
format!(
"{}/{}/{}/{}",
GAME_NAME, mod_obj.nexus_mod_id, file_id, file_name
),
FileOptions::default(),
)?;
std::io::copy(plugin_buf, plugin_archive)?;
Ok(())
}
#[tokio::main]
pub async fn main() -> Result<()> {
dotenv().ok();
@ -433,6 +496,7 @@ pub async fn main() -> Result<()> {
.ok_or_else(|| anyhow!("Missing file_id key in file in API response"))?
.as_i64()
.ok_or_else(|| anyhow!("file_id value in API response file is not a number"))?;
let file_id = 18422; // DELETEME: temp test bad rar file
dbg!(file_id);
let name = file
.get("name")
@ -492,6 +556,8 @@ pub async fn main() -> Result<()> {
.await?
.error_for_status()?;
let duration = rate_limit_wait_duration(&res)?;
let links = res.json::<Value>().await?;
let link = links
.get(0)
@ -510,8 +576,6 @@ pub async fn main() -> Result<()> {
.await?
.error_for_status()?;
let duration = rate_limit_wait_duration(&res)?;
// See: https://github.com/benkay86/async-applied/blob/master/reqwest-tokio-compat/src/main.rs
let mut byte_stream = res
.bytes_stream()
@ -571,10 +635,51 @@ pub async fn main() -> Result<()> {
// std::io::copy(&mut file, &mut plugin_archive)?;
// }
// }
// Use unrar to uncompress the entire .rar file to avoid a bug with compress_tools panicking when uncompressing
// certain .rar files: https://github.com/libarchive/libarchive/issues/373
"application/x-rar-compressed" => {
tokio_file.seek(SeekFrom::Start(0)).await?;
let mut file = tokio_file.into_std().await;
let temp_dir = tempdir()?;
let temp_file_path = temp_dir.path().join("download.rar");
let mut temp_file = std::fs::File::create(temp_file_path)?;
std::io::copy(&mut file, &mut temp_file)?;
let mut plugin_file_paths = Vec::new();
let list = Archive::new(temp_file_path.to_string_lossy().to_string()).list();
if let Ok(list) = list {
for entry in list {
if let Ok(entry) = entry {
if entry.filename.ends_with(".esp")
|| entry.filename.ends_with(".esm")
|| entry.filename.ends_with(".esl")
{
plugin_file_paths.push(entry.filename);
}
}
}
}
if plugin_file_paths.len() > 0 {
let extract = Archive::new(temp_file_path.to_string_lossy().to_string()).extract_to(temp_dir.path().to_string_lossy().to_string());
extract.expect("failed to extract").process().expect("failed to extract");
for file_name in plugin_file_paths.iter() {
dbg!(file_name);
let plugin_file = std::fs::File::open(temp_dir.path().join(file_name))?;
let mut plugin_buf = Vec::new();
plugin_file.read(&mut plugin_buf)?;
process_plugin(&plugin_buf, &pool, &mut plugin_archive, name, &db_file, &mod_obj, file_id, file_name).await?;
}
dbg!("uncompressed!");
}
temp_dir.close()?;
},
_ => {
tokio_file.seek(SeekFrom::Start(0)).await?;
let mut file = tokio_file.into_std().await;
let mut plugin_file_paths = Vec::new();
for file_name in list_archive_files(&file)? {
if file_name.ends_with(".esp")
|| file_name.ends_with(".esm")
@ -583,56 +688,13 @@ pub async fn main() -> Result<()> {
plugin_file_paths.push(file_name);
}
}
for file_name in plugin_file_paths.iter() {
file.seek(SeekFrom::Start(0))?;
dbg!(file_name);
let mut buf = Vec::default();
uncompress_archive_file(&mut file, &mut buf, file_name)?;
let plugin = parse_plugin(&buf)?;
let hash = seahash::hash(&buf);
let plugin_row = insert_plugin(
&pool,
name,
hash as i64,
db_file.id,
Some(plugin.header.version as f64),
plugin.header.author,
plugin.header.description,
Some(
&plugin
.header
.masters
.iter()
.map(|s| s.to_string())
.collect::<Vec<String>>(),
),
)
.await?;
for cell in plugin.cells {
let cell_row = insert_cell(
&pool,
cell.form_id.try_into().unwrap(),
cell.x,
cell.y,
cell.is_persistent,
)
.await?;
insert_plugin_cell(
&pool,
plugin_row.id,
cell_row.id,
cell.editor_id,
)
.await?;
}
plugin_archive.start_file(
format!(
"{}/{}/{}/{}",
GAME_NAME, mod_obj.nexus_mod_id, file_id, file_name
),
FileOptions::default(),
)?;
std::io::copy(&mut buf.as_slice(), &mut plugin_archive)?;
process_plugin(&buf, &pool, &mut plugin_archive, name, &db_file, &mod_obj, file_id, file_name).await?;
}
}
};
@ -641,10 +703,13 @@ pub async fn main() -> Result<()> {
if let Some(duration) = duration {
sleep(duration).await;
}
break;
}
break;
}
page += 1;
break;
}
Ok(())