byml/
parse.rs

1use crate::{AnyError, Byml, NodeType, U24};
2use binread::{BinRead, BinReaderExt, Endian, NullString};
3use byteorder::ByteOrder;
4use std::io::{Cursor, Read, Seek, SeekFrom};
5
6type BymlResult = Result<Byml, AnyError>;
7
8impl From<u8> for NodeType {
9    fn from(val: u8) -> NodeType {
10        match val {
11            0xA0 => NodeType::String,
12            0xA1 => NodeType::Binary,
13            0xC0 => NodeType::Array,
14            0xC1 => NodeType::Hash,
15            0xD0 => NodeType::Bool,
16            0xD1 => NodeType::Int,
17            0xD2 => NodeType::Float,
18            0xD3 => NodeType::UInt,
19            0xD4 => NodeType::Int64,
20            0xD5 => NodeType::UInt64,
21            0xD6 => NodeType::Double,
22            0xFF => NodeType::Null,
23            _ => panic!("Invalid node type"),
24        }
25    }
26}
27
28#[derive(Debug, BinRead)]
29#[br(assert([b"BY", b"YB"].contains(&&magic)))]
30struct BymlDoc {
31    magic: [u8; 2],
32    #[br(is_big = (&magic == b"BY"), is_little = (&magic == b"YB"))]
33    header: Header,
34}
35
36#[derive(Debug, BinRead)]
37#[br(assert(version >= 2 && version < 5))]
38struct Header {
39    version: u16,
40    hash_table_offset: u32,
41    string_table_offset: u32,
42    root_node_offset: u32,
43}
44
45#[derive(Debug, BinRead)]
46#[br(magic = 0xC2u8, assert(strings.len() as u64 == entries.0))]
47struct StringTable {
48    entries: U24,
49    #[br(count = entries.0)]
50    offsets: Vec<u32>,
51    #[br(parse_with = parse_string_table, args(offsets.clone()))]
52    strings: Vec<String>,
53}
54
55fn parse_string_table<R: binread::io::Read + binread::io::Seek>(
56    reader: &mut R,
57    _: &binread::ReadOptions,
58    args: (Vec<u32>,),
59) -> binread::BinResult<Vec<String>> {
60    let mut strings: Vec<String> = vec![];
61    let base_offset: u64 = reader.seek(SeekFrom::Current(0))? - 4 - (4 * args.0.len() as u64);
62    for offset in args.0 {
63        let abs: u64 = base_offset + (offset as u64);
64        reader.seek(SeekFrom::Start(abs))?;
65        strings.push(NullString::read(reader)?.to_string());
66    }
67    Ok(strings)
68}
69
70impl Byml {
71    pub fn from_binary<B: AsRef<[u8]>>(data: &B) -> BymlResult {
72        let data = data.as_ref();
73        if &data[0..4] == b"Yaz0" {
74            let mut yaz = yaz0::Yaz0Archive::new(Cursor::new(data))?;
75            Byml::read_binary(&mut Cursor::new(yaz.decompress()?))
76        } else {
77            Byml::read_binary(&mut Cursor::new(data))
78        }
79    }
80
81    pub fn read_binary<R: Read + Seek>(reader: &mut R) -> BymlResult {
82        let mut parser = BymlParser::new(reader)?;
83        parser.parse()
84    }
85}
86
87struct BymlParser<'a, R: Read + Seek> {
88    endian: Endian,
89    hash_strings: Vec<String>,
90    value_strings: Vec<String>,
91    root_node_offset: u32,
92    reader: &'a mut R,
93}
94
95impl<R: Read + Seek> BymlParser<'_, R> {
96    fn new(reader: &mut R) -> Result<BymlParser<R>, AnyError> {
97        let doc: BymlDoc = BymlDoc::read(reader)?;
98        let endian = match &doc.magic {
99            b"BY" => Endian::Big,
100            b"YB" => Endian::Little,
101            _ => unreachable!(),
102        };
103        let mut opts = binread::ReadOptions::default();
104        opts.endian = endian;
105        reader.seek(SeekFrom::Start(doc.header.hash_table_offset.into()))?;
106        let hash_strings: Vec<String> = match StringTable::read_options(reader, &opts, ()) {
107            Ok(s) => s.strings,
108            Err(_) => vec![],
109        };
110        reader.seek(SeekFrom::Start(doc.header.string_table_offset.into()))?;
111        let value_strings: Vec<String> = match StringTable::read_options(reader, &opts, ()) {
112            Ok(s) => s.strings,
113            Err(_) => vec![],
114        };
115        Ok(BymlParser {
116            endian,
117            hash_strings,
118            value_strings,
119            root_node_offset: doc.header.root_node_offset,
120            reader,
121        })
122    }
123
124    fn read<B: BinRead>(&mut self) -> Result<B, binread::Error> {
125        match self.endian {
126            Endian::Big => self.reader.read_be(),
127            Endian::Little => self.reader.read_le(),
128            _ => unreachable!(),
129        }
130    }
131
132    fn align(&mut self) -> Result<(), AnyError> {
133        let pos = self.reader.stream_position()?;
134        self.reader.seek(SeekFrom::Start(((pos + 4 - 1) / 4) * 4))?;
135        Ok(())
136    }
137
138    fn parse(&mut self) -> BymlResult {
139        self.reader
140            .seek(SeekFrom::Start(self.root_node_offset as u64))?;
141        let node_type: NodeType = self.read::<u8>()?.into();
142        self.parse_node_with_type(&node_type, 12)
143    }
144
145    fn parse_node(&mut self, offset: u32) -> BymlResult {
146        self.reader.seek(SeekFrom::Start(offset.into()))?;
147        let node_type: NodeType = self.read::<u8>()?.into();
148        self.parse_node_with_type(&node_type, offset + 1)
149    }
150
151    fn parse_node_with_type(&mut self, node_type: &NodeType, offset: u32) -> BymlResult {
152        self.reader.seek(SeekFrom::Start(offset.into()))?;
153        Ok(match node_type {
154            NodeType::String => Byml::String({
155                let idx = self.read::<u32>()?;
156                self.value_strings[idx as usize].to_owned()
157            }),
158            NodeType::Int => Byml::Int(self.read::<i32>()?),
159            NodeType::UInt => Byml::UInt(self.read::<u32>()?),
160            NodeType::Float => Byml::Float(crate::Float(self.read::<u32>()?, self.endian.into())),
161            NodeType::Bool => Byml::Bool(self.read::<u32>()? != 0),
162            NodeType::Array => {
163                let offset = self.read::<u32>()?;
164                self.parse_array(offset)?
165            }
166            NodeType::Hash => {
167                let offset = self.read::<u32>()?;
168                self.parse_hash(offset)?
169            }
170            NodeType::Int64 => {
171                let offset = self.read::<u32>()?;
172                Byml::Int64(self.read_long(offset)? as i64)
173            }
174            NodeType::UInt64 => {
175                let offset = self.read::<u32>()?;
176                Byml::UInt64(self.read_long(offset)?)
177            }
178            NodeType::Double => {
179                let offset = self.read::<u32>()?;
180                Byml::Double(crate::Double(self.read_long(offset)?, self.endian.into()))
181            }
182            NodeType::Binary => {
183                let offset = self.read::<u32>()?;
184                self.parse_binary(offset)?
185            }
186            NodeType::StringTable => unreachable!(),
187            NodeType::Null => Byml::Null,
188        })
189    }
190
191    fn parse_binary(&mut self, offset: u32) -> BymlResult {
192        self.reader.seek(SeekFrom::Start(offset.into()))?;
193        let size = self.read::<u32>()?;
194        let mut opts = binread::ReadOptions::default();
195        opts.endian = self.endian;
196        opts.count = Some(size as usize);
197        Ok(Byml::Binary(Vec::<u8>::read_options(
198            self.reader,
199            &opts,
200            (),
201        )?))
202    }
203
204    fn read_long(&mut self, offset: u32) -> Result<u64, binread::Error> {
205        self.reader.seek(SeekFrom::Start(offset.into()))?;
206        self.read::<u64>()
207    }
208
209    fn parse_hash(&mut self, offset: u32) -> BymlResult {
210        self.reader.seek(SeekFrom::Start(offset.into()))?;
211        let header: HashHeader = self.read()?;
212        let pos = self.reader.stream_position()?;
213        let hash: std::collections::BTreeMap<String, Byml> = (0..header.entries)
214            .map(|i| {
215                self.reader.seek(SeekFrom::Start(pos + i as u64 * 8))?;
216                let idx: u32 = self.read::<U24>()?.0 as u32;
217                Ok((
218                    self.hash_strings[idx as usize].to_owned(),
219                    self.parse_node(pos as u32 + i * 8 + 3)?,
220                ))
221            })
222            .collect::<Result<std::collections::BTreeMap<String, Byml>, AnyError>>()?;
223        Ok(Byml::Hash(hash))
224    }
225
226    fn parse_array(&mut self, offset: u32) -> BymlResult {
227        self.reader.seek(SeekFrom::Start(offset.into()))?;
228        let header: ArrayHeader = self.read()?;
229        self.align()?;
230        let val_start = self.reader.stream_position()?;
231        let array: Vec<Byml> = header
232            .node_types
233            .iter()
234            .enumerate()
235            .map(|(i, t)| self.parse_node_with_type(t, val_start as u32 + (i as u32 * 4)))
236            .collect::<Result<Vec<Byml>, AnyError>>()?;
237        Ok(Byml::Array(array))
238    }
239}
240
241#[derive(Debug, BinRead)]
242#[br(assert(magic == 0xC0u8))]
243struct ArrayHeader {
244    magic: u8,
245    #[br(map = |x: U24| x.0 as u32)]
246    entries: u32,
247    #[br(
248        count = entries,
249        map = |x: Vec<u8>| x.into_iter().map(|t: u8| NodeType::from(t)).collect()
250    )]
251    node_types: Vec<NodeType>,
252}
253
254#[derive(Debug, BinRead)]
255#[br(magic = 0xC1u8)]
256struct HashHeader {
257    #[br(map = |x: U24| x.0 as u32)]
258    entries: u32,
259}
260
261impl BinRead for U24 {
262    type Args = ();
263    fn read_options<R: binread::io::Seek + binread::io::Read>(
264        reader: &mut R,
265        options: &binread::ReadOptions,
266        _: (),
267    ) -> binread::BinResult<U24> {
268        let buf: [u8; 3] = <[u8; 3]>::read(reader)?;
269        match options.endian {
270            binread::Endian::Big => Ok(U24(byteorder::BigEndian::read_uint(&buf, 3))),
271            binread::Endian::Little => Ok(U24(byteorder::LittleEndian::read_uint(&buf, 3))),
272            _ => unreachable!(),
273        }
274    }
275}