twmap/datafile/
parse.rs

1/*
2 * Documentation used: https://github.com/heinrich5991/libtw2/blob/master/doc/datafile.md
3 * Author of the documentation: heinrich5991
4*/
5
6/*
7 * most functions are given a byte slice and also return one.
8 *  -> the parameter byte slice is the remaining, unread part of the file
9 *  -> the function will then consume a chunk of bytes from the front of the slice
10 *  -> then return the remaining data, alongside the parsed data
11*/
12
13use crate::compression::compress;
14use crate::convert::{To, TryTo};
15use crate::Error;
16
17use log::info;
18use structview::{i32_le, u16_le, View};
19use thiserror::Error;
20
21use std::borrow::Cow;
22use std::cmp::Ordering;
23use std::collections::{HashMap, HashSet};
24use std::convert::TryFrom;
25use std::fmt;
26use std::mem;
27
28#[derive(Error, Debug)]
29#[error(transparent)]
30pub struct DatafileParseError(DatafileParseErr);
31
32#[derive(Error, Debug)]
33#[error(transparent)]
34enum DatafileParseErr {
35    Magic(#[from] MagicError),
36    Header(#[from] HeaderError),
37    ItemTypes(#[from] ItemTypeError),
38    ItemOffsets(OffsetsError),
39    DataOffsets(OffsetsError),
40    DataSizes(#[from] DataSizesError),
41    Items(#[from] ItemError),
42    DataItems(#[from] DataItemsError),
43    #[error("There is unused data at the end of the file")]
44    LeftOverData,
45}
46
47impl From<DatafileParseErr> for Error {
48    fn from(err: DatafileParseErr) -> Self {
49        Error::DatafileParse(DatafileParseError(err))
50    }
51}
52
53impl<T: Into<DatafileParseErr>> From<T> for DatafileParseError {
54    fn from(err: T) -> Self {
55        Self(err.into())
56    }
57}
58
59#[derive(Error, Debug)]
60struct LengthError;
61
62impl fmt::Display for LengthError {
63    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
64        write!(f, "Not enough bytes left")
65    }
66}
67
68fn view<T: View>(data: &[u8]) -> Result<(&T, &[u8]), LengthError> {
69    if data.len() < mem::size_of::<T>() {
70        Err(LengthError)
71    } else {
72        let (struct_data, remaining_data) = data.split_at(mem::size_of::<T>());
73        Ok((T::view(struct_data).unwrap(), remaining_data))
74    }
75}
76
77fn view_multiple<T: View>(data: &[u8], amount: usize) -> Result<(&[T], &[u8]), LengthError> {
78    let size = mem::size_of::<T>() * amount;
79    if data.len() < size {
80        Err(LengthError)
81    } else {
82        let (struct_data, remaining_data) = data.split_at(size);
83        Ok((T::view_slice(struct_data).unwrap(), remaining_data))
84    }
85}
86
87#[derive(Error, Debug)]
88enum MagicError {
89    #[error("{0}")]
90    Length(#[from] LengthError),
91    #[error("This is not a teeworlds map")]
92    Incorrect,
93}
94
95fn parse_magic(data: &[u8]) -> Result<&[u8], MagicError> {
96    let (magic, data) = view::<[u8; 4]>(data)?;
97    if magic != b"DATA" && magic != b"ATAD" {
98        Err(MagicError::Incorrect)
99    } else {
100        Ok(data)
101    }
102}
103
104#[derive(Debug, View, Copy, Clone)]
105#[repr(C)]
106pub struct Header {
107    pub version: i32_le,
108    pub size: i32_le,
109    pub swap_len: i32_le,
110    pub num_item_types: i32_le,
111    pub num_items: i32_le,
112    pub num_data: i32_le,
113    pub item_block_size: i32_le,
114    pub data_block_size: i32_le,
115}
116
117#[derive(Error, Debug)]
118enum HeaderError {
119    #[error("{0}")]
120    Length(#[from] LengthError),
121    #[error("Unsupported version, supported are versions 3 and 4")]
122    Version,
123    #[error("The size is not accurate")]
124    Size,
125    #[error("The swap len is not accurate")]
126    SwapLen,
127    #[error("Amount of item types is negative")]
128    NumItemTypes,
129    #[error("Amount of items is negative")]
130    NumItems,
131    #[error("Amount of data items is negative")]
132    NumData,
133    #[error("Item block size is negative")]
134    ItemBlockSize,
135    #[error("Data block size is negative")]
136    DataBlockSize,
137}
138
139impl Header {
140    fn parse(data: &[u8]) -> Result<(&Header, &[u8]), HeaderError> {
141        use HeaderError::*;
142        let (header, data) = view::<Header>(data)?;
143
144        if ![3, 4].contains(&header.version.to_int()) {
145            return Err(Version);
146        }
147        if header.num_item_types.to_int() < 0 {
148            return Err(NumItemTypes);
149        }
150        if header.num_items.to_int() < 0 {
151            return Err(NumItems);
152        }
153        if header.num_data.to_int() < 0 {
154            return Err(NumData);
155        }
156        if header.item_block_size.to_int() < 0 {
157            return Err(ItemBlockSize);
158        }
159        if header.data_block_size.to_int() < 0 {
160            return Err(DataBlockSize);
161        }
162        let expected_size = data.len().try_to::<i64>() + 20;
163        let offset = header.num_data.to_int().try_to::<i64>() * 4;
164        if header.size.to_int().to::<i64>() != expected_size {
165            if header.size.to_int().to::<i64>() == expected_size - offset {
166                info!("Faulty size calculation in the datafile header");
167            } else {
168                return Err(Size);
169            }
170        }
171        let expected_swap_len = expected_size - header.data_block_size.to_int().to::<i64>();
172        if header.swap_len.to_int().to::<i64>() != expected_swap_len {
173            if header.swap_len.to_int().to::<i64>() == expected_swap_len - offset {
174                info!("Faulty swap_len calculation in the datafile header");
175            } else {
176                return Err(SwapLen);
177            }
178        }
179        Ok((header, data))
180    }
181}
182
183#[derive(Debug, View, Copy, Clone)]
184#[repr(C)]
185pub struct ItemType {
186    pub type_id: i32_le,
187    pub start: i32_le,
188    pub num: i32_le,
189}
190
191#[derive(Error, Debug)]
192enum ItemTypeError {
193    #[error("{0}")]
194    Length(#[from] LengthError),
195    #[error("A type id is too large (must fit into an u16")]
196    InvalidTypeId,
197    #[error("The same type id is used twice")]
198    DuplicateTypeId,
199    #[error("Negative item amount")]
200    NegativeItemAmount,
201    #[error("Item amount is zero")]
202    ZeroItems,
203    #[error("The item ranges overlap")]
204    Overlap,
205    #[error("The item ranges leave a gap")]
206    Gap,
207    #[error("The item ranges together use more items than there are")]
208    TooFewItems,
209    #[error("The item ranges together use less items than there are")]
210    TooManyItems,
211}
212
213impl ItemType {
214    fn parse(
215        data: &[u8],
216        num_item_types: i32_le,
217        num_items: i32_le,
218    ) -> Result<(&[ItemType], &[u8]), ItemTypeError> {
219        let (item_types, data) = view_multiple::<ItemType>(data, num_item_types.to_int().try_to())?;
220
221        let mut used_type_ids = HashSet::new();
222        let mut expected_start = 0_i64;
223        for item_type in item_types {
224            if u16::try_from(item_type.type_id.to_int()).is_err() {
225                return Err(ItemTypeError::InvalidTypeId);
226            }
227            let new = used_type_ids.insert(item_type.type_id.to_int());
228            if !new {
229                return Err(ItemTypeError::DuplicateTypeId);
230            }
231            match item_type.num.to_int() {
232                i32::MIN..=-1 => return Err(ItemTypeError::NegativeItemAmount),
233                0 => return Err(ItemTypeError::ZeroItems),
234                1..=i32::MAX => {}
235            }
236            match item_type.start.to_int().to::<i64>().cmp(&expected_start) {
237                Ordering::Less => return Err(ItemTypeError::Overlap),
238                Ordering::Equal => {}
239                Ordering::Greater => return Err(ItemTypeError::Gap),
240            }
241            expected_start += item_type.num.to_int().to::<i64>();
242        }
243        match expected_start.cmp(&num_items.to_int().to::<i64>()) {
244            Ordering::Less => return Err(ItemTypeError::TooFewItems),
245            Ordering::Equal => {}
246            Ordering::Greater => return Err(ItemTypeError::TooManyItems),
247        }
248        Ok((item_types, data))
249    }
250}
251
252#[derive(Error, Debug)]
253enum OffsetsError {
254    #[error("{0}")]
255    Length(#[from] LengthError),
256    #[error("The first offset value isn't 0")]
257    FirstNonZero,
258    #[error("Negative value")]
259    Negative,
260    #[error("A value is lower than the last one")]
261    TooLow,
262}
263
264fn parse_offsets(data: &[u8], amount: i32_le) -> Result<(&[i32_le], &[u8]), OffsetsError> {
265    let (offsets, data) = view_multiple::<i32_le>(data, amount.to_int().try_to())?;
266
267    if let Some(offset) = offsets.first() {
268        if offset.to_int() != 0 {
269            return Err(OffsetsError::FirstNonZero);
270        }
271    }
272    let mut min_value = 0_i64;
273    for offset in offsets {
274        let offset = offset.to_int().to::<i64>();
275        if offset < 0 {
276            return Err(OffsetsError::Negative);
277        }
278        if offset < min_value {
279            return Err(OffsetsError::TooLow);
280        }
281        min_value = offset;
282    }
283    Ok((offsets, data))
284}
285
286#[derive(Error, Debug)]
287enum DataSizesError {
288    #[error("{0}")]
289    Length(#[from] LengthError),
290    #[error("Negative value")]
291    Negative,
292}
293
294fn parse_data_sizes(data: &[u8], amount: i32_le) -> Result<(&[i32_le], &[u8]), DataSizesError> {
295    let (data_sizes, data) = view_multiple::<i32_le>(data, amount.to_int().try_to())?;
296
297    if data_sizes.iter().any(|size| size.to_int() < 0) {
298        return Err(DataSizesError::Negative);
299    }
300    Ok((data_sizes, data))
301}
302
303#[derive(Debug, View, Copy, Clone)]
304#[repr(C)]
305pub struct ItemHeader {
306    pub id: u16_le,
307    pub type_id: u16_le,
308    pub size: i32_le,
309}
310
311#[derive(Debug)]
312pub struct ViewedItem<'a> {
313    pub item_header: &'a ItemHeader,
314    pub item_data: &'a [i32_le],
315}
316
317#[derive(Error, Debug)]
318enum ItemError {
319    #[error("{0}")]
320    Length(#[from] LengthError),
321    #[error("Negative item data size")]
322    NegativeSize,
323    #[error("Item data size is not divisible by 4")]
324    InvalidSize,
325    #[error("Wrong type id")]
326    WrongTypeId,
327}
328
329impl ViewedItem<'_> {
330    fn parse<'a>(
331        mut data: &'a [u8],
332        item_types: &[ItemType],
333    ) -> Result<(Vec<ViewedItem<'a>>, &'a [u8]), ItemError> {
334        let mut items = Vec::new();
335        for item_type in item_types {
336            for _ in 0..item_type.num.to_int() {
337                let (item_header, tmp_data) = view::<ItemHeader>(data)?;
338                if item_header.size.to_int() < 0 {
339                    return Err(ItemError::NegativeSize);
340                }
341                if item_header.size.to_int() % 4 != 0 {
342                    return Err(ItemError::InvalidSize);
343                }
344                if item_header.type_id.to_int() != item_type.type_id.to_int().try_to::<u16>() {
345                    return Err(ItemError::WrongTypeId);
346                }
347                let (item_data, tmp_data) = view_multiple::<i32_le>(
348                    tmp_data,
349                    item_header.size.to_int().try_to::<usize>() / 4,
350                )?;
351                data = tmp_data;
352                items.push(ViewedItem {
353                    item_header,
354                    item_data,
355                })
356            }
357        }
358        Ok((items, data))
359    }
360}
361
362#[derive(Error, Debug)]
363enum DataItemsError {
364    #[error("{0}")]
365    Length(#[from] LengthError),
366    #[error("The last data item supposedly has a negative size")]
367    LastDataItemNegativeSize,
368}
369
370fn parse_data_items<'a>(
371    mut data: &'a [u8],
372    data_offsets: &[i32_le],
373    total_data_size: i32_le,
374) -> Result<(Vec<&'a [u8]>, &'a [u8]), DataItemsError> {
375    let mut data_items = Vec::new();
376    for size in data_offsets.windows(2).map(|offsets| {
377        offsets[1].to_int().try_to::<usize>() - offsets[0].to_int().try_to::<usize>()
378    }) {
379        let (data_item, new_data) = view_multiple::<u8>(data, size)?;
380        data = new_data;
381        data_items.push(data_item);
382    }
383    if let Some(offset) = data_offsets.last() {
384        if offset.to_int() > total_data_size.to_int() {
385            return Err(DataItemsError::LastDataItemNegativeSize);
386        }
387        let size = total_data_size.to_int().try_to::<usize>() - offset.to_int().try_to::<usize>();
388        let (last_data_item, new_data) = view_multiple::<u8>(data, size)?;
389        data = new_data;
390        data_items.push(last_data_item);
391    }
392    Ok((data_items, data))
393}
394
395pub struct RawDatafile<'a> {
396    pub header: &'a Header,
397    pub item_types: &'a [ItemType],
398    pub item_offsets: &'a [i32_le],
399    pub data_offsets: &'a [i32_le],
400    pub data_sizes: Option<&'a [i32_le]>,
401    pub items: Vec<ViewedItem<'a>>,
402    pub data_items: Vec<&'a [u8]>,
403}
404
405impl RawDatafile<'_> {
406    pub fn parse(data: &[u8]) -> Result<RawDatafile, DatafileParseError> {
407        let data = parse_magic(data)?;
408        let (header, data) = Header::parse(data)?;
409        let (item_types, data) = ItemType::parse(data, header.num_item_types, header.num_items)?;
410        let (item_offsets, data) =
411            parse_offsets(data, header.num_items).map_err(DatafileParseErr::ItemOffsets)?;
412        let (data_offsets, mut data) =
413            parse_offsets(data, header.num_data).map_err(DatafileParseErr::DataOffsets)?;
414        let mut data_sizes = None;
415        if header.version.to_int() >= 4 {
416            let (new_data_sizes, new_data) = parse_data_sizes(data, header.num_data)?;
417            data = new_data;
418            data_sizes = Some(new_data_sizes);
419        }
420        let (items, data) = ViewedItem::parse(data, item_types)?;
421        let (data_items, data) = parse_data_items(data, data_offsets, header.data_block_size)?;
422        if !data.is_empty() {
423            return Err(DatafileParseErr::LeftOverData.into());
424        }
425        Ok(RawDatafile {
426            header,
427            item_types,
428            item_offsets,
429            data_offsets,
430            data_sizes,
431            items,
432            data_items,
433        })
434    }
435}
436
437#[derive(Clone, Debug, Eq, PartialEq)]
438pub struct Item {
439    pub id: u16,
440    pub item_data: Vec<i32>,
441}
442
443impl From<&ViewedItem<'_>> for Item {
444    fn from(viewed_item: &ViewedItem) -> Self {
445        Item {
446            id: viewed_item.item_header.id.to_int(),
447            item_data: viewed_item
448                .item_data
449                .iter()
450                .map(|i32_le| i32_le.to_int())
451                .collect(),
452        }
453    }
454}
455
456pub struct Datafile<'a> {
457    pub items: HashMap<u16, Vec<Item>>,
458    pub data_items: Vec<(Cow<'a, [u8]>, usize)>,
459}
460
461impl<'a> RawDatafile<'a> {
462    pub fn to_datafile(&self) -> Datafile<'a> {
463        let mut items = HashMap::new();
464        for item_type in self.item_types {
465            let start: usize = item_type.start.to_int().try_to();
466            let end = start + item_type.num.to_int().try_to::<usize>();
467            let item_type_items = self.items[start..end].iter().map(Item::from).collect();
468            items.insert(item_type.type_id.to_int().try_to::<u16>(), item_type_items);
469        }
470        let data_items = match self.data_sizes {
471            None => self
472                .data_items
473                .iter()
474                .map(|&data_item| (Cow::from(compress(data_item)), data_item.len()))
475                .collect(),
476            Some(data_sizes) => self
477                .data_items
478                .iter()
479                .zip(data_sizes.iter())
480                .map(|(&data_item, data_size)| {
481                    (Cow::from(data_item), data_size.to_int().try_to::<usize>())
482                })
483                .collect(),
484        };
485        Datafile { items, data_items }
486    }
487}