byml/
write.rs

1use crate::{Byml, Endian, NodeType, U24};
2use binwrite::{BinWrite, WriterOption};
3use byteorder::{BigEndian, ByteOrder, LittleEndian};
4use indexmap::{IndexMap, IndexSet};
5use rayon::prelude::*;
6use std::collections::{hash_map::DefaultHasher, BTreeMap};
7use std::error::Error;
8use std::hash::{Hash, Hasher};
9use std::io::{Cursor, Seek, SeekFrom, Write};
10
11type WriteResult = Result<(), WriteError>;
12
13#[derive(Debug)]
14pub struct WriteError(String);
15
16impl Error for WriteError {}
17
18impl std::fmt::Display for WriteError {
19    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
20        write!(f, "Error writing BYML: {}", self.0)
21    }
22}
23
24impl From<std::io::Error> for WriteError {
25    fn from(err: std::io::Error) -> WriteError {
26        WriteError(format!("{}", err))
27    }
28}
29
30impl Byml {
31    /// Serialize the document to binary data with the specified endianness and version. Only hash,
32    /// array, or null nodes can be used.
33    pub fn to_binary(&self, endian: Endian, version: u16) -> Result<Vec<u8>, WriteError> {
34        let mut buf: Vec<u8> = Vec::new();
35        self.write_binary(&mut Cursor::new(&mut buf), endian, version)?;
36        Ok(buf)
37    }
38
39    /// Serialize the document to binary data with the specified endianness and version and yaz0
40    /// compress it. Only hash, array, or null nodes can be used.
41    pub fn to_compressed_binary(
42        &self,
43        endian: Endian,
44        version: u16,
45    ) -> Result<Vec<u8>, WriteError> {
46        let mut buf: Vec<u8> = Vec::new();
47        let mut writer = Cursor::new(&mut buf);
48        let yaz_writer = yaz0::Yaz0Writer::new(&mut writer);
49        match yaz_writer.compress_and_write(
50            &self.to_binary(endian, version)?,
51            yaz0::CompressionLevel::Lookahead { quality: 10 },
52        ) {
53            Ok(()) => Ok(buf),
54            Err(e) => Err(WriteError(format!("{}", e))),
55        }
56    }
57
58    /// Write the binary serialized BYML document to a writer with the specified endianness and
59    /// version. Only hash, array, or null nodes can be used.
60    pub fn write_binary<W: Write + Seek>(
61        &self,
62        writer: &mut W,
63        endian: Endian,
64        version: u16,
65    ) -> WriteResult {
66        if !(2..=4).contains(&version) {
67            return Err(WriteError(format!(
68                "Version {} unsupported, expected 2-4",
69                version
70            )));
71        }
72        match self {
73            Byml::Array(_) | Byml::Hash(_) | Byml::Null => {
74                let mut byml_writer = BymlWriter::new(writer, self, endian.into(), version);
75                byml_writer.write_doc()?;
76                Ok(())
77            }
78            _ => Err(WriteError(format!(
79                "Can only serialize array, hash, or null nodes, found {:?}",
80                self.get_type()
81            ))),
82        }
83    }
84}
85
86#[derive(Debug, BinWrite)]
87struct Header {
88    magic: [u8; 2],
89    version: u16,
90    hash_table_offset: u32,
91    string_table_offset: u32,
92    root_node_offset: u32,
93}
94
95#[derive(Debug, BinWrite)]
96struct Node {
97    r#type: NodeType,
98    value: NodeValue,
99}
100
101#[derive(Debug, BinWrite)]
102struct HashNode {
103    count: U24,
104    entries: Vec<HashEntry>,
105}
106
107#[derive(Debug, BinWrite)]
108struct ArrayNode {
109    count: U24,
110    types: Vec<NodeType>,
111}
112
113#[derive(Debug, BinWrite)]
114struct HashEntry {
115    key_idx: U24,
116    r#type: NodeType,
117    value: NodeValue,
118}
119
120#[derive(Debug, PartialEq)]
121enum NodeValue {
122    Bool(bool),
123    Int(i32),
124    UInt(u32),
125    Float(f32),
126    Offset(u32),
127    String(u32),
128}
129
130impl From<&Byml> for NodeValue {
131    fn from(node: &Byml) -> NodeValue {
132        match node {
133            Byml::Int(i) => NodeValue::Int(*i),
134            Byml::UInt(u) => NodeValue::UInt(*u),
135            Byml::Float(f) => NodeValue::Float(f.into()),
136            Byml::Bool(b) => NodeValue::Bool(*b),
137            Byml::String(_) => NodeValue::String(0),
138            _ => NodeValue::Offset(0),
139        }
140    }
141}
142
143impl BinWrite for NodeValue {
144    fn write_options<W: Write>(
145        &self,
146        writer: &mut W,
147        options: &WriterOption,
148    ) -> Result<(), std::io::Error> {
149        match self {
150            NodeValue::Bool(v) => (if *v { 1u32 } else { 0u32 }).write_options(writer, options),
151            NodeValue::Int(v) => v.write_options(writer, options),
152            NodeValue::UInt(v) | NodeValue::Offset(v) => v.write_options(writer, options),
153            NodeValue::Float(v) => v.write_options(writer, options),
154            NodeValue::String(v) => v.write_options(writer, options),
155        }
156    }
157}
158
159#[derive(Debug, BinWrite)]
160struct StringTable {
161    entries: U24,
162    offsets: Vec<u32>,
163}
164
165#[derive(Debug, BinWrite)]
166struct AlignedCStr {
167    #[binwrite(cstr, align(4))]
168    string: String,
169}
170
171struct BymlWriter<'a, W: Write + Seek> {
172    data: &'a Byml,
173    writer: &'a mut W,
174    opts: WriterOption,
175    version: u16,
176    keys: IndexSet<String>,
177    strings: IndexSet<String>,
178    written_nodes: IndexMap<u64, u32>,
179}
180
181#[inline]
182fn calculate_hash(t: &Byml) -> u64 {
183    let mut s = DefaultHasher::new();
184    t.hash(&mut s);
185    s.finish()
186}
187
188fn collect_strings(data: &Byml) -> IndexSet<String> {
189    let mut strs: IndexSet<String> = IndexSet::new();
190    match data {
191        Byml::String(v) => {
192            strs.insert(v.to_owned());
193        }
194        Byml::Array(v) => strs.par_extend(v.par_iter().flat_map(|x: &Byml| collect_strings(x))),
195        Byml::Hash(v) => strs.par_extend(v.par_iter().flat_map(|(_, v)| collect_strings(v))),
196        _ => (),
197    };
198    strs.par_sort();
199    strs
200}
201
202fn collect_keys(data: &Byml) -> IndexSet<String> {
203    let mut keys: IndexSet<String> = IndexSet::new();
204    match data {
205        Byml::Hash(v) => {
206            keys.par_extend(v.par_iter().map(|(k, _)| k.to_owned()));
207            keys.par_extend(v.par_iter().flat_map(|(_, v)| collect_keys(v)))
208        }
209        Byml::Array(v) => keys.par_extend(v.par_iter().flat_map(|x| collect_keys(x))),
210        _ => (),
211    }
212    keys.par_sort();
213    keys
214}
215
216impl<W: Write + Seek> BymlWriter<'_, W> {
217    fn new<'a>(
218        writer: &'a mut W,
219        data: &'a Byml,
220        endian: binwrite::Endian,
221        version: u16,
222    ) -> BymlWriter<'a, W> {
223        BymlWriter {
224            writer,
225            data,
226            opts: binwrite::writer_option_new!(endian: endian),
227            version,
228            strings: collect_strings(data),
229            keys: collect_keys(data),
230            written_nodes: IndexMap::new(),
231        }
232    }
233
234    #[inline]
235    fn write<B: BinWrite>(&mut self, val: &B) -> WriteResult {
236        val.write_options(self.writer, &self.opts)?;
237        Ok(())
238    }
239
240    fn write_string_table(&mut self, strings: &IndexSet<String>) -> WriteResult {
241        let start_pos = self.writer.stream_position()?;
242        self.write(&NodeType::StringTable)?;
243        self.write(&U24(strings.len() as u64))?;
244        fn gen_str_offsets(x: &IndexSet<String>) -> Vec<u32> {
245            let mut offsets: Vec<u32> = vec![];
246            let mut pos = 4 + ((x.len() + 1) as u32 * 4);
247            for string in x.iter() {
248                offsets.push(pos);
249                pos += string.len() as u32 + 1;
250                pos = ((pos as i32 + 3) & -4) as u32;
251            }
252            offsets.push(pos);
253            offsets
254        }
255        let offsets = gen_str_offsets(strings);
256        self.write(&offsets)?;
257        self.align_cursor()?;
258        for (i, s) in strings.iter().enumerate() {
259            self.writer
260                .seek(SeekFrom::Start(start_pos + offsets[i] as u64))?;
261            self.write(s)?;
262            self.write(&0u8)?;
263        }
264        self.align_cursor()?;
265        Ok(())
266    }
267
268    fn write_doc(&mut self) -> WriteResult {
269        if !self.data.is_container() {
270            return Err(WriteError(format!(
271                "Root node must be a hash or array, not {:?}",
272                self.data.get_type()
273            )));
274        }
275        let mut header = Header {
276            magic: match self.opts.endian {
277                binwrite::Endian::Big => *b"BY",
278                binwrite::Endian::Little => *b"YB",
279                _ => unreachable!(),
280            },
281            version: self.version,
282            hash_table_offset: 0x0,
283            string_table_offset: 0x0,
284            root_node_offset: 0x0,
285        };
286        self.writer.seek(SeekFrom::Start(0x10))?;
287        if !self.keys.is_empty() {
288            header.hash_table_offset = self.writer.stream_position()? as u32;
289            self.write_string_table(&self.keys.clone())?;
290            self.align_cursor()?;
291        }
292        if !self.strings.is_empty() {
293            header.string_table_offset = self.writer.stream_position()? as u32;
294            self.write_string_table(&self.strings.clone())?;
295            self.align_cursor()?;
296        }
297        header.root_node_offset = self.writer.stream_position()? as u32;
298        self.writer.seek(SeekFrom::Start(0))?;
299        self.write(&header)?;
300        self.writer
301            .seek(SeekFrom::Start(header.root_node_offset.into()))?;
302        self.write_offset_node(&self.data)?;
303        Ok(())
304    }
305
306    fn write_offset_node(&mut self, node: &Byml) -> WriteResult {
307        let pos = self.writer.stream_position()?;
308        match node {
309            Byml::Hash(v) => self.write_hash(v),
310            Byml::Array(v) => self.write_array(v),
311            Byml::Double(v) => {
312                let dbl: f64 = v.into();
313                self.write(&(dbl))
314            }
315            Byml::Int64(v) => self.write(v),
316            Byml::UInt64(v) => self.write(v),
317            Byml::Binary(v) => {
318                self.write(&(v.len() as u32))?;
319                self.write(v)
320            }
321            _ => Err(WriteError(format!(
322                "Node {:?} is not a valid offset node",
323                node
324            ))),
325        }?;
326        self.written_nodes.insert(calculate_hash(node), pos as u32);
327        Ok(())
328    }
329
330    fn write_hash(&mut self, hash: &BTreeMap<String, Byml>) -> WriteResult {
331        let start_pos = self.writer.stream_position()?;
332        let mut after_nodes: IndexMap<usize, &Byml> = IndexMap::new();
333        let mut hash_node = HashNode {
334            count: U24(hash.len() as u64),
335            entries: hash
336                .iter()
337                .enumerate()
338                .map(|(i, (k, v))| {
339                    let mut entry = HashEntry {
340                        key_idx: U24(self.keys.get_index_of(k).unwrap() as u64),
341                        r#type: v.get_type(),
342                        value: NodeValue::from(v),
343                    };
344                    if !v.is_value() && !v.is_string() {
345                        after_nodes.insert(i, v);
346                    }
347                    if let Byml::String(s) = v {
348                        entry.value =
349                            NodeValue::String(self.strings.get_index_of(s).unwrap() as u32)
350                    }
351                    entry
352                })
353                .collect::<Vec<HashEntry>>(),
354        };
355        self.writer
356            .seek(SeekFrom::Current((hash.len() as i64 * 8) + 4))?;
357        for (i, b) in after_nodes.into_iter() {
358            match self.written_nodes.get(&calculate_hash(b)) {
359                Some(off) => hash_node.entries[i].value = NodeValue::Offset(*off),
360                None => {
361                    hash_node.entries[i].value =
362                        NodeValue::Offset(self.writer.stream_position()? as u32);
363                    self.write_offset_node(&b)?;
364                    self.align_cursor()?;
365                }
366            }
367        }
368        let end_pos = self.writer.stream_position()?;
369        self.writer.seek(SeekFrom::Start(start_pos))?;
370        self.write(&NodeType::Hash)?;
371        self.write(&hash_node)?;
372        self.writer.seek(SeekFrom::Start(end_pos))?;
373        Ok(())
374    }
375
376    fn write_array(&mut self, array: &[Byml]) -> WriteResult {
377        let start_pos = self.writer.stream_position()?;
378        let mut after_nodes: IndexMap<usize, &Byml> = IndexMap::new();
379        let array_node = ArrayNode {
380            count: U24(array.len() as u64),
381            types: array.par_iter().map(|x| x.get_type()).collect(),
382        };
383        let mut array_values = array
384            .iter()
385            .enumerate()
386            .map(|(i, v)| {
387                let mut val = NodeValue::from(v);
388                if !v.is_value() && !v.is_string() {
389                    after_nodes.insert(i, v);
390                }
391                if let Byml::String(s) = v {
392                    val = NodeValue::String(self.strings.get_index_of(s).unwrap() as u32)
393                }
394                val
395            })
396            .collect::<Vec<NodeValue>>();
397        self.writer.seek(SeekFrom::Current(
398            (array.len() as i64) + (array.len() as i64 * 4) + 4,
399        ))?;
400        self.align_cursor()?;
401        for (i, b) in after_nodes.into_iter() {
402            match self.written_nodes.get(&calculate_hash(b)) {
403                Some(off) => array_values[i] = NodeValue::Offset(*off),
404                None => {
405                    array_values[i] = NodeValue::Offset(self.writer.stream_position()? as u32);
406                    self.write_offset_node(&b)?;
407                    self.align_cursor()?;
408                }
409            }
410        }
411        let end_pos = self.writer.stream_position()?;
412        self.writer.seek(SeekFrom::Start(start_pos))?;
413        self.write(&NodeType::Array)?;
414        self.write(&array_node)?;
415        self.align_cursor()?;
416        self.write(&array_values)?;
417        self.writer.seek(SeekFrom::Start(end_pos))?;
418        Ok(())
419    }
420
421    fn align_cursor(&mut self) -> WriteResult {
422        let aligned_pos = ((self.writer.stream_position()? as i64 + 3) & -4) as u64;
423        self.writer.seek(SeekFrom::Start(aligned_pos))?;
424        Ok(())
425    }
426}
427
428impl Into<u8> for &NodeType {
429    fn into(self) -> u8 {
430        match self {
431            NodeType::String => 0xA0,
432            NodeType::Binary => 0xA1,
433            NodeType::Array => 0xC0,
434            NodeType::Hash => 0xC1,
435            NodeType::Bool => 0xD0,
436            NodeType::Int => 0xD1,
437            NodeType::Float => 0xD2,
438            NodeType::UInt => 0xD3,
439            NodeType::Int64 => 0xD4,
440            NodeType::UInt64 => 0xD5,
441            NodeType::Double => 0xD6,
442            NodeType::Null => 0xFF,
443            NodeType::StringTable => 0xC2,
444        }
445    }
446}
447
448impl BinWrite for NodeType {
449    fn write_options<W: Write>(
450        self: &NodeType,
451        writer: &mut W,
452        _options: &WriterOption,
453    ) -> Result<(), std::io::Error> {
454        let v: u8 = self.into();
455        v.write(writer)
456    }
457}
458
459impl BinWrite for U24 {
460    fn write_options<W: Write>(
461        &self,
462        writer: &mut W,
463        options: &WriterOption,
464    ) -> Result<(), std::io::Error> {
465        let mut buf: [u8; 3] = [0; 3];
466        match options.endian {
467            binwrite::Endian::Big => BigEndian::write_uint(&mut buf, self.0, 3),
468            binwrite::Endian::Little => LittleEndian::write_uint(&mut buf, self.0, 3),
469            _ => unreachable!(),
470        };
471        writer.write_all(&buf)?;
472        Ok(())
473    }
474}