Parse strings in Windows-1252 encoding

According to UESP, this is what strings in plugin files are encoded in: https://en.uesp.net/wiki/Skyrim_Mod:File_Format_Conventions

Decoding from this encoding fixes parsing some files with zstrings that contain special characters like umlauts.
This commit is contained in:
Tyler Hallada 2021-07-23 23:14:08 -04:00
parent 261c0e21c3
commit 151ff191cc
3 changed files with 19 additions and 7 deletions

10
Cargo.lock generated
View File

@ -88,6 +88,15 @@ dependencies = [
"cfg-if", "cfg-if",
] ]
[[package]]
name = "encoding_rs"
version = "0.8.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "80df024fbc5ac80f87dfef0d9f5209a252f2a497f7f42944cff24d8253cac065"
dependencies = [
"cfg-if",
]
[[package]] [[package]]
name = "flate2" name = "flate2"
version = "1.0.20" version = "1.0.20"
@ -237,6 +246,7 @@ dependencies = [
"anyhow", "anyhow",
"argh", "argh",
"bitflags", "bitflags",
"encoding_rs",
"flate2", "flate2",
"nom", "nom",
"serde", "serde",

View File

@ -14,6 +14,7 @@ license = "MIT"
anyhow = "1.0" anyhow = "1.0"
argh = { version = "0.1", optional = true } argh = { version = "0.1", optional = true }
bitflags = "1.2" bitflags = "1.2"
encoding_rs = "0.8"
flate2 = "1.0" flate2 = "1.0"
nom = "6" nom = "6"
serde = { version = "1.0", features = ["derive"] } serde = { version = "1.0", features = ["derive"] }

View File

@ -1,7 +1,9 @@
use std::borrow::Cow;
use std::io::Read; use std::io::Read;
use std::{convert::TryInto, str}; use std::{convert::TryInto, str};
use anyhow::{anyhow, Result}; use anyhow::{anyhow, Result};
use encoding_rs::WINDOWS_1252;
use flate2::read::ZlibDecoder; use flate2::read::ZlibDecoder;
use nom::{ use nom::{
branch::alt, branch::alt,
@ -32,9 +34,9 @@ pub struct PluginHeader<'a> {
pub version: f32, pub version: f32,
pub num_records_and_groups: i32, pub num_records_and_groups: i32,
pub next_object_id: u32, pub next_object_id: u32,
pub author: Option<&'a str>, pub author: Option<Cow<'a, str>>,
pub description: Option<&'a str>, pub description: Option<Cow<'a, str>>,
pub masters: Vec<&'a str>, pub masters: Vec<Cow<'a, str>>,
} }
/// Parsed [CELL records](https://en.uesp.net/wiki/Skyrim_Mod:Mod_File_Format/CELL) /// Parsed [CELL records](https://en.uesp.net/wiki/Skyrim_Mod:Mod_File_Format/CELL)
@ -526,10 +528,9 @@ fn parse_4char(input: &[u8]) -> IResult<&[u8], &str> {
map_res(take(4usize), |bytes: &[u8]| str::from_utf8(bytes))(input) map_res(take(4usize), |bytes: &[u8]| str::from_utf8(bytes))(input)
} }
fn parse_zstring(input: &[u8]) -> IResult<&[u8], &str> { fn parse_zstring(input: &[u8]) -> IResult<&[u8], Cow<str>> {
let (input, zstring) = map_res(take_while(|byte| byte != 0), |bytes: &[u8]| { let (input, bytes) = take_while(|byte| byte != 0)(input)?;
str::from_utf8(bytes) let (zstring, _, _) = WINDOWS_1252.decode(bytes);
})(input)?;
let (input, _) = take(1usize)(input)?; let (input, _) = take(1usize)(input)?;
Ok((input, zstring)) Ok((input, zstring))
} }