tpk/
read.rs

1use crate::model::Entry;
2use crate::read::Error::{Syntax, UnknownType};
3use crate::Element;
4use byteorder::{ByteOrder, LE};
5use std::{io, string};
6use thiserror::Error;
7
8/// Representation of a TPK read error.
9#[derive(Error, Debug)]
10pub enum Error {
11    /// An unknown error happened.
12    ///
13    /// This error is "technical unknown", it should only be used in cases where the user is not
14    /// supposed to get an error but gets one anyway. For example, this error should *never* be
15    /// thrown for a problem with a TPK file. More simply put, this error being returned anywhere
16    /// should be considered a bug or a feature that is not yet implemented.
17    #[error("Unknown error")]
18    Unknown,
19
20    /// A I/O error happened.
21    #[error("I/O error while reading TPK data: {source}")]
22    Io {
23        #[from]
24        source: io::Error,
25    },
26
27    /// The end of file has been reached.
28    ///
29    /// Note that this error can be considered normal behavior,
30    #[error("End of file reached")]
31    Eof,
32
33    /// A syntax error happened.
34    ///
35    /// This error happens when the TPK payload that is being read is corrupted or invalid.
36    #[error("Syntax error at byte {0}: {1}")]
37    Syntax(usize, &'static str),
38
39    /// A type is unknown.
40    ///
41    /// This error happens when the TPK payload that is being read is lexically valid, but an
42    /// unknown type byte has been encountered.
43    #[error("Unknown element type at byte {0}: {1:#X}")]
44    UnknownType(usize, u8),
45
46    /// A UTF-8 string is invalid.
47    ///
48    /// This error happens when the TPK payload that is being read contains an invalid UTF-8
49    /// character at a place where it should be expected.
50    #[error("Invalid UTF-8 character at byte {pos}: {source}")]
51    InvalidString {
52        pos: usize,
53
54        #[source]
55        source: string::FromUtf8Error,
56    },
57
58    /// A type is unsupported.
59    ///
60    /// This error happens when the TPK payload that is being read is both lexically and
61    /// semantically valid, but an unsupported type byte has been encountered.
62    ///
63    /// Note that the mere existence of this error makes this crate non-TPK-compliant, and as such
64    /// this error case should be expected to be removed in the near future.
65    #[deprecated]
66    #[error("Unsupported element type at byte {0}: {1}")]
67    UnsupportedType(usize, &'static str),
68}
69
70/// Representation of a TPK read result.
71pub type Result<T> = std::result::Result<T, Error>;
72
73/// A TPK reader structure.
74///
75/// This structure holds the source from which TPK data should be read, as well as internal reader
76/// contextual data.
77pub struct Reader<T> {
78    read: T,
79    previous_bytes_read: usize,
80    bytes_read: usize,
81    current_name: String,
82    retained_element: Option<Element>,
83}
84
85const UNEXPECTED_EOF: &str = "expected more, got EOF";
86
87impl<T> Reader<T>
88where
89    T: io::Read,
90{
91    /// Create a new [TPK reader][Reader].
92    pub fn new(read: T) -> Reader<T> {
93        Reader {
94            read,
95            previous_bytes_read: 0,
96            bytes_read: 0,
97            current_name: String::from("/"),
98            retained_element: None,
99        }
100    }
101
102    /// Read an [element][Element] from this reader.
103    ///
104    /// This function will consume bytes from the source reader, and will attempt to parse them
105    /// and construct a new [element][Element].
106    pub fn read_element(&mut self) -> Result<Option<Element>> {
107        if let Some(retained_element) = self.retained_element.take() {
108            return Ok(Some(retained_element));
109        }
110
111        let mut type_byte_buf = [0u8; 1];
112        let bytes_read = self.read.read(&mut type_byte_buf)?;
113        if bytes_read == 0 {
114            return Ok(None);
115        }
116        self.previous_bytes_read = self.bytes_read;
117        self.bytes_read += bytes_read;
118        let type_byte = type_byte_buf[0];
119        if type_byte & 0b10000000 != 0 {
120            let element = self.read_marker(type_byte)?;
121            return Ok(Some(element));
122        }
123
124        #[allow(deprecated)]
125        let element = match (type_byte & 0xF0) >> 4 {
126            0b0000 => self.read_folder(type_byte),
127            0b0010 => self.read_number(type_byte),
128            0b0011 => self.read_boolean(type_byte),
129            0b0001 => self.read_string_or_blob(type_byte),
130            0b0111 => Err(Error::UnsupportedType(
131                self.previous_bytes_read,
132                "extension",
133            )),
134            _ => Err(UnknownType(self.previous_bytes_read, type_byte)),
135        }?;
136        Ok(Some(element))
137    }
138
139    /// Read an [entry][Entry] from this reader.
140    ///
141    /// Reading an entry means reading one marker element, followed by a zero, one or more
142    /// non-marker elements, until another marker or the end of file is reached.
143    ///
144    /// Note that due to the fact that this reader exposes
145    /// [lower level functions][Self::read_element], the marker element corresponding to an entry
146    /// may have already been read. As such, this reader remembers the name of the last marker
147    /// element read, and if the entry does not begin with an marker element, the remembered
148    /// marker will be used instead.
149    pub fn read_entry(&mut self) -> Result<Option<Entry>> {
150        let first_element = self.read_element()?;
151        if first_element.is_none() {
152            return Ok(None);
153        }
154
155        let mut elements = Vec::with_capacity(1); // Entries usually have one element.
156        let name = if let Some(Element::Marker(name)) = first_element {
157            name
158        } else {
159            elements.push(first_element.unwrap());
160            self.current_name.clone()
161        };
162
163        while let Some(element) = self.read_element()? {
164            match element {
165                Element::Marker(name) => {
166                    self.retained_element = Some(Element::Marker(name));
167                    break;
168                }
169                _ => {
170                    elements.push(element);
171                }
172            }
173        }
174
175        Ok(Some(Entry { name, elements }))
176    }
177
178    fn read_marker(&mut self, type_byte: u8) -> Result<Element> {
179        let mut has_more = type_byte & 0b01000000 != 0;
180        let mut size = (type_byte & 0b111111) as usize;
181        let mut shift = 6;
182        while has_more {
183            let byte = self.expect::<1>()?[0];
184            has_more = byte & 0b10000000 != 0;
185            size |= ((byte & 0b01111111) as usize) << shift;
186            shift += 7;
187        }
188
189        let name = self.read_utf8_string(size)?;
190        self.current_name.clear();
191        self.current_name.push_str(name.as_str());
192        Ok(Element::Marker(name))
193    }
194
195    fn read_folder(&mut self, type_byte: u8) -> Result<Element> {
196        match type_byte {
197            0 => Ok(Element::Folder),
198            1 => Ok(Element::Collection),
199            _ => Err(UnknownType(self.previous_bytes_read, type_byte)),
200        }
201    }
202
203    fn read_number(&mut self, type_byte: u8) -> Result<Element> {
204        match type_byte {
205            0b00100000 => Ok(Element::UInteger8(self.expect::<1>()?[0])),
206            0b00100001 => Ok(Element::UInteger16(LE::read_u16(
207                self.expect::<2>()?.as_slice(),
208            ))),
209            0b00100010 => Ok(Element::UInteger32(LE::read_u32(
210                self.expect::<4>()?.as_slice(),
211            ))),
212            0b00100011 => Ok(Element::UInteger64(LE::read_u64(
213                self.expect::<8>()?.as_slice(),
214            ))),
215            0b00100100 => Ok(Element::Integer8(self.expect::<1>()?[0] as i8)),
216            0b00100101 => Ok(Element::Integer16(LE::read_i16(
217                self.expect::<2>()?.as_slice(),
218            ))),
219            0b00100110 => Ok(Element::Integer32(LE::read_i32(
220                self.expect::<4>()?.as_slice(),
221            ))),
222            0b00100111 => Ok(Element::Integer64(LE::read_i64(
223                self.expect::<8>()?.as_slice(),
224            ))),
225            0b00101110 => Ok(Element::Float32(LE::read_f32(
226                self.expect::<4>()?.as_slice(),
227            ))),
228            0b00101111 => Ok(Element::Float64(LE::read_f64(
229                self.expect::<8>()?.as_slice(),
230            ))),
231            _ => Err(UnknownType(self.previous_bytes_read, type_byte)),
232        }
233    }
234
235    fn read_boolean(&mut self, type_byte: u8) -> Result<Element> {
236        match type_byte {
237            0b00110000 => Ok(Element::Boolean(false)),
238            0b00110001 => Ok(Element::Boolean(true)),
239            _ => Err(UnknownType(self.previous_bytes_read, type_byte)),
240        }
241    }
242
243    fn read_string_or_blob(&mut self, type_byte: u8) -> Result<Element> {
244        // We need to store this because "read_bundled_size" ALSO reads the size bytes, so the
245        // position in the error is wrong if the sub type is invalid.
246        let previous_bytes_read = self.previous_bytes_read;
247
248        let sub_type_byte = type_byte & 0b1100;
249        let size = self.read_bundled_size(type_byte)?;
250
251        match sub_type_byte {
252            0b0000 => self.read_utf8_string(size).map(Element::String),
253            0b0100 => self.expect_heap(size).map(Element::Blob),
254            _ => Err(UnknownType(previous_bytes_read, type_byte)),
255        }
256    }
257
258    #[inline]
259    fn read_utf8_string(&mut self, size: usize) -> Result<String> {
260        let string_bytes = self.expect_heap(size)?;
261        String::from_utf8(string_bytes).map_err(|e| Error::InvalidString {
262            pos: self.previous_bytes_read + e.utf8_error().valid_up_to(),
263            source: e,
264        })
265    }
266
267    #[inline]
268    fn read_bundled_size(&mut self, type_byte: u8) -> Result<usize> {
269        match type_byte & 0b11 {
270            0b00 => Ok(self.expect::<1>()?[0] as usize),
271            0b01 => Ok(LE::read_u16(self.expect::<2>()?.as_slice()) as usize),
272            0b10 => Ok(LE::read_u32(self.expect::<4>()?.as_slice()) as usize),
273            0b11 => Ok(LE::read_u64(self.expect::<8>()?.as_slice()) as usize),
274            _ => Err(UnknownType(self.previous_bytes_read, type_byte)),
275        }
276    }
277
278    fn expect<const N: usize>(&mut self) -> Result<[u8; N]> {
279        let mut buf = [0u8; N];
280        let bytes_read = self.read.read(&mut buf)?;
281        self.previous_bytes_read = self.bytes_read;
282        self.bytes_read += bytes_read;
283        if bytes_read != N {
284            return Err(Syntax(self.bytes_read, UNEXPECTED_EOF));
285        }
286        Ok(buf)
287    }
288
289    fn expect_heap(&mut self, count: usize) -> Result<Vec<u8>> {
290        let mut buf = vec![0u8; count];
291        let bytes_read = self.read.read(&mut buf)?;
292        self.previous_bytes_read = self.bytes_read;
293        self.bytes_read += bytes_read;
294        if bytes_read != count {
295            return Err(Syntax(self.bytes_read, UNEXPECTED_EOF));
296        }
297        Ok(buf)
298    }
299}