use std::io::Read; use std::{convert::TryInto, str}; use anyhow::{anyhow, Result}; use flate2::read::ZlibDecoder; use nom::{ branch::alt, bytes::complete::{take, take_while}, combinator::{map, map_res, verify}, number::complete::{le_f32, le_i32, le_u16, le_u32}, IResult, }; use serde::Serialize; const HEADER_SIZE: u32 = 24; /// A parsed TES5 Skyrim plugin file #[derive(Debug, PartialEq, Serialize)] pub struct Plugin<'a> { /// Parsed [TES4 header record](https://en.uesp.net/wiki/Skyrim_Mod:Mod_File_Format/TES4) with metadata about the plugin pub header: PluginHeader<'a>, /// Parsed [CELL records](https://en.uesp.net/wiki/Skyrim_Mod:Mod_File_Format/CELL) contained in the plugin pub cells: Vec, } /// Parsed [TES4 header record](https://en.uesp.net/wiki/Skyrim_Mod:Mod_File_Format/TES4) #[derive(Debug, PartialEq, Serialize)] pub struct PluginHeader<'a> { pub version: f32, pub num_records_and_groups: i32, pub next_object_id: u32, pub author: Option<&'a str>, pub description: Option<&'a str>, pub masters: Vec<&'a str>, } /// Parsed [CELL records](https://en.uesp.net/wiki/Skyrim_Mod:Mod_File_Format/CELL) #[derive(Debug, PartialEq, Serialize)] pub struct Cell { pub form_id: u32, pub editor_id: Option, pub x: Option, pub y: Option, /// Indicates that this cell is a special persistent worldspace cell where all persistent references for the worldspace are stored pub is_persistent: bool, } #[derive(Debug)] struct CellData { editor_id: Option, x: Option, y: Option, } #[derive(Debug)] pub struct UnparsedCell<'a> { form_id: u32, is_compressed: bool, is_persistent: bool, data: &'a [u8], } /// A CELL record that has had it's header parsed and data decompressed, but not yet parsed into individual fields #[derive(Debug)] struct DecompressedCell { pub form_id: u32, pub is_persistent: bool, pub data: Vec, } #[derive(Debug)] struct GroupHeader<'a> { size: u32, label: &'a [u8; 4], group_type: i32, timestamp: u16, version_control_info: u16, } #[derive(Debug)] struct RecordHeader<'a> { record_type: &'a str, size: u32, flags: RecordFlags, id: u32, timestamp: u16, version_control_info: u16, version: u16, } bitflags! { struct RecordFlags: u32 { const MASTER_FILE = 0x00000001; const DELETED_GROUP = 0x00000010; const DELETED_RECORD = 0x00000020; const CONSTANT = 0x00000040; const LOCALIZED = 0x00000080; const INACCESSIBLE = 0x00000100; const LIGHT_MASTER_FILE = 0x00000200; const PERSISTENT_REFR = 0x00000400; const INITIALLY_DISABLED = 0x00000800; const IGNORED = 0x00001000; const VISIBLE_WHEN_DISTANT = 0x00008000; const RANDOM_ANIM_START = 0x00010000; const OFF_LIMITS = 0x00020000; const COMPRESSED = 0x00040000; const CANT_WAIT = 0x00080000; const IGNORE_OBJECT_INTERACTION = 0x00100000; const IS_MARKER = 0x00800000; const NO_AI_ACQUIRE = 0x02000000; const NAVMESH_FILTER = 0x04000000; const NAVMESH_BOUNDING_BOX = 0x08000000; const REFLECTED_BY_AUTO_WATER = 0x10000000; const DONT_HAVOK_SETTLE = 0x20000000; const NO_RESPAWN = 0x40000000; const MULTI_BOUND = 0x80000000; } } #[derive(Debug)] enum Header<'a> { Group(GroupHeader<'a>), Record(RecordHeader<'a>), } #[derive(Debug)] struct FieldHeader<'a> { field_type: &'a str, size: u16, } /// Parses fields from the decompressed bytes of a CELL record. Returns remaining bytes of the input after parsing and the parsed Cell struct. fn parse_cell<'a>(input: &'a [u8], form_id: u32, is_persistent: bool) -> IResult<&'a [u8], Cell> { let (input, cell_data) = parse_cell_fields(input)?; Ok(( input, Cell { form_id, editor_id: cell_data.editor_id, x: cell_data.x, y: cell_data.y, is_persistent, }, )) } /// Maps the input `UnparsedCell`s to `DecompressedCell`s and decompresses the zlib compressed data sections of the record if necessary fn decompress_cells(unparsed_cells: Vec) -> Result> { let mut decompressed_cells = Vec::new(); for unparsed_cell in unparsed_cells { let decompressed_data = if unparsed_cell.is_compressed { let mut buf = Vec::new(); let mut decoder = ZlibDecoder::new(&unparsed_cell.data[4..]); decoder.read_to_end(&mut buf)?; buf } else { unparsed_cell.data.to_vec() }; decompressed_cells.push(DecompressedCell { form_id: unparsed_cell.form_id, is_persistent: unparsed_cell.is_persistent, data: decompressed_data, }); } Ok(decompressed_cells) } /// Parses the plugin header and finds and extracts the headers and unparsed (and possibly compressed) data sections of every CELL record in the file. fn parse_header_and_cell_bytes(input: &[u8]) -> IResult<&[u8], (PluginHeader, Vec)> { let (input, header) = parse_plugin_header(input)?; let (input, unparsed_cells) = parse_group_data(input, input.len() as u32, 0)?; Ok((input, (header, unparsed_cells))) } /// Parses header and cell records from input bytes of a plugin file and outputs `Plugin` struct with extracted fields. /// /// # Arguments /// /// * `input` - A slice of bytes read from the plugin file /// /// # Examples /// /// ``` /// use skyrim_cell_dump::parse_plugin; /// /// let plugin_contents = std::fs::read("Plugin.esp").unwrap(); /// let plugin = parse_plugin(&plugin_contents).unwrap(); /// ``` pub fn parse_plugin(input: &[u8]) -> Result { let (_, (header, unparsed_cells)) = parse_header_and_cell_bytes(&input) .map_err(|_err| anyhow!("Failed to parse plugin header and find CELL data"))?; let decompressed_cells = decompress_cells(unparsed_cells)?; let mut cells = Vec::new(); for decompressed_cell in decompressed_cells { let (_, cell) = parse_cell( &decompressed_cell.data, decompressed_cell.form_id, decompressed_cell.is_persistent, ) .unwrap(); cells.push(cell); } Ok(Plugin { header, cells }) } fn parse_group_data<'a>( input: &'a [u8], remaining_bytes: u32, depth: usize, ) -> IResult<&'a [u8], Vec> { let mut input = input; let mut cells = vec![]; let mut consumed_bytes = 0; while !input.is_empty() && consumed_bytes < remaining_bytes { let (remaining, record_header) = parse_header(input)?; match record_header { Header::Group(group_header) => { if group_header.group_type == 0 { // TODO: get rid of unwrap let label = str::from_utf8(group_header.label).unwrap(); if label != "WRLD" && label != "CELL" { let (remaining, _) = take(group_header.size - HEADER_SIZE)(remaining)?; input = remaining; consumed_bytes += group_header.size; continue; } } else if group_header.group_type == 7 { // TODO: DRY let (remaining, _) = take(group_header.size - HEADER_SIZE)(remaining)?; input = remaining; consumed_bytes += group_header.size; continue; } let (remaining, mut inner_cells) = parse_group_data(remaining, group_header.size - HEADER_SIZE, depth + 1)?; cells.append(&mut inner_cells); input = remaining; consumed_bytes += group_header.size; } Header::Record(record_header) => match record_header.record_type { "CELL" => { let (remaining, data) = take(record_header.size)(remaining)?; cells.push(UnparsedCell { form_id: record_header.id, is_compressed: record_header.flags.contains(RecordFlags::COMPRESSED), is_persistent: record_header.flags.contains(RecordFlags::PERSISTENT_REFR), data, }); input = remaining; consumed_bytes += record_header.size + HEADER_SIZE; } _ => { let (remaining, _) = take(record_header.size)(remaining)?; input = remaining; consumed_bytes += record_header.size + HEADER_SIZE; } }, } } Ok((input, cells)) } fn parse_plugin_header(input: &[u8]) -> IResult<&[u8], PluginHeader> { let (mut input, tes4) = verify(parse_record_header, |record_header| { record_header.record_type == "TES4" })(input)?; let mut consumed_bytes = 0; let (remaining, hedr) = verify(parse_field_header, |field_header| { field_header.field_type == "HEDR" })(input)?; consumed_bytes += hedr.size as u32 + 6; input = remaining; let (remaining, (version, num_records_and_groups, next_object_id)) = parse_hedr_fields(input)?; input = remaining; let mut author = None; let mut description = None; let mut masters = vec![]; let mut large_size = None; while consumed_bytes < tes4.size as u32 { let (remaining, field) = parse_field_header(input)?; consumed_bytes += field.size as u32 + 6; input = remaining; match field.field_type { "CNAM" => { let (remaining, author_str) = parse_zstring(input)?; input = remaining; author = Some(author_str); } "SNAM" => { let (remaining, desc_str) = parse_zstring(input)?; input = remaining; description = Some(desc_str); } "MAST" => { let (remaining, master_str) = parse_zstring(input)?; input = remaining; masters.push(master_str); } "INTV" => { let (remaining, _) = take(field.size)(input)?; input = remaining; break; } "XXXX" => { let (remaining, size) = le_u32(input)?; input = remaining; large_size = Some(size); } _ => { if let Some(size) = large_size { let (remaining, _) = take(size)(input)?; input = remaining; large_size = None; } else { let (remaining, _) = take(field.size)(input)?; input = remaining; } } } } Ok(( input, PluginHeader { version, num_records_and_groups, next_object_id, author, description, masters, }, )) } fn parse_group_header(input: &[u8]) -> IResult<&[u8], GroupHeader> { let (input, _record_type) = verify(parse_4char, |record_type: &str| record_type == "GRUP")(input)?; let (input, size) = le_u32(input)?; let (input, label) = map_res(take(4usize), |bytes: &[u8]| bytes.try_into())(input)?; let (input, group_type) = le_i32(input)?; let (input, timestamp) = le_u16(input)?; let (input, version_control_info) = le_u16(input)?; let (input, _) = take(4usize)(input)?; Ok(( input, GroupHeader { size, label, group_type, timestamp, version_control_info, }, )) } fn parse_record_header(input: &[u8]) -> IResult<&[u8], RecordHeader> { let (input, record_type) = verify(parse_4char, |record_type: &str| record_type != "GRUP")(input)?; let (input, size) = le_u32(input)?; let (input, flags) = map_res(le_u32, |bits| { RecordFlags::from_bits(bits).ok_or("bad record flag") })(input)?; let (input, id) = le_u32(input)?; let (input, timestamp) = le_u16(input)?; let (input, version_control_info) = le_u16(input)?; let (input, version) = le_u16(input)?; let (input, _) = take(2usize)(input)?; Ok(( input, RecordHeader { record_type, size, flags, id, timestamp, version_control_info, version, }, )) } fn parse_header(input: &[u8]) -> IResult<&[u8], Header> { alt(( map(parse_group_header, |group_header| { Header::Group(group_header) }), map(parse_record_header, |record_header| { Header::Record(record_header) }), ))(input) } fn parse_field_header(input: &[u8]) -> IResult<&[u8], FieldHeader> { let (input, field_type) = parse_4char(input)?; let (input, size) = le_u16(input)?; Ok((input, FieldHeader { field_type, size })) } fn parse_hedr_fields(input: &[u8]) -> IResult<&[u8], (f32, i32, u32)> { let (input, version) = le_f32(input)?; let (input, num_records_and_groups) = le_i32(input)?; let (input, next_object_id) = le_u32(input)?; Ok((input, (version, num_records_and_groups, next_object_id))) } fn parse_cell_fields<'a>(input: &'a [u8]) -> IResult<&'a [u8], CellData> { let mut cell_data = CellData { editor_id: None, x: None, y: None, }; let mut input = input; let mut large_size = None; while !input.is_empty() { let (remaining, field) = parse_field_header(input)?; input = remaining; match field.field_type { "EDID" => { let (remaining, editor_id) = parse_zstring(input)?; cell_data.editor_id = Some(editor_id.to_string()); input = remaining; } "XCLC" => { let (remaining, x) = le_i32(input)?; let (remaining, y) = le_i32(remaining)?; cell_data.x = Some(x); cell_data.y = Some(y); let (remaining, _) = take(4usize)(remaining)?; input = remaining; } "XXXX" => { let (remaining, size) = le_u32(input)?; input = remaining; large_size = Some(size); } _ => { if let Some(size) = large_size { let (remaining, _) = take(size)(input)?; input = remaining; large_size = None; } else { let (remaining, _) = take(field.size)(input)?; input = remaining; } } } } Ok((input, cell_data)) } fn parse_4char(input: &[u8]) -> IResult<&[u8], &str> { map_res(take(4usize), |bytes: &[u8]| str::from_utf8(bytes))(input) } fn parse_zstring(input: &[u8]) -> IResult<&[u8], &str> { let (input, zstring) = map_res(take_while(|byte| byte != 0), |bytes: &[u8]| { str::from_utf8(bytes) })(input)?; let (input, _) = take(1usize)(input)?; Ok((input, zstring)) }