jbcrs_basic/parser/
decode.rs

1//! The `decode` crate is used for decoding simple data,
2//! like integers, floats and Strings.
3
4use result::*;
5use byteorder::{BigEndian, ByteOrder};
6use std::char;
7
8pub struct Decoder<'a> {
9    bytes: &'a [u8],
10    cursor: &'a mut usize,
11    limit: usize,
12}
13
14impl<'a> Decoder<'a> {
15    /// Creates a new decoder,
16    /// the cursor has to be a mutable pointer to support limits without copying
17    pub fn new(bytes: &'a [u8], cursor: &'a mut usize) -> Decoder<'a> {
18        Decoder {
19            bytes,
20            cursor,
21            limit: bytes.len(),
22        }
23    }
24
25    /// Limits the decoder to `to` after the cursor
26    pub fn limit(&mut self, to: usize) -> Result<Decoder> {
27        let end = *self.cursor + to;
28        self.check(end)?;
29        Ok(Decoder {
30            bytes: self.bytes,
31            cursor: self.cursor,
32            limit: end,
33        })
34    }
35
36    /// Removes the limit and returns an error if the limit was exceeded, or not reached
37    pub fn remove_limit(self) -> Result<()> {
38        if self.limit == *self.cursor {
39            Ok(())
40        } else {
41            Err(Error::LimitExceeded)
42        }
43    }
44
45    /// Skips a certain amount of bytes and returns an error if it exceeded the limit
46    pub fn skip(&mut self, to: usize) -> Result<()> {
47        let end = *self.cursor + to;
48        self.check(end)?;
49        *self.cursor = end;
50        Ok(())
51    }
52
53    /// Returns the current cursor
54    pub fn cursor(&self) -> usize {
55        *self.cursor
56    }
57
58    /// Reads a specific amount of bytes.
59    /// If not enough bytes are available, an EOF error is returned.
60    pub fn read_bytes(&mut self, count: usize) -> Result<&'a [u8]> {
61        let end = *self.cursor + count;
62        self.check(end)?;
63
64        let bytes = &self.bytes[*self.cursor..end];
65        *self.cursor = end;
66        Ok(bytes)
67    }
68
69    pub fn read_u8(&mut self) -> Result<u8> {
70        Ok(self.read_bytes(1)?[0])
71    }
72
73    pub fn read_u16(&mut self) -> Result<u16> {
74        Ok(BigEndian::read_u16(self.read_bytes(2)?))
75    }
76
77    pub fn read_u32(&mut self) -> Result<u32> {
78        Ok(BigEndian::read_u32(self.read_bytes(4)?))
79    }
80
81    pub fn read_u64(&mut self) -> Result<u64> {
82        Ok(BigEndian::read_u64(self.read_bytes(8)?))
83    }
84
85    pub fn read_i8(&mut self) -> Result<i8> {
86        Ok(self.read_u8()? as i8)
87    }
88
89    pub fn read_i16(&mut self) -> Result<i16> {
90        Ok(self.read_u16()? as i16)
91    }
92
93    pub fn read_i32(&mut self) -> Result<i32> {
94        Ok(self.read_u32()? as i32)
95    }
96
97    pub fn read_i64(&mut self) -> Result<i64> {
98        Ok(self.read_u64()? as i64)
99    }
100
101    pub fn read_f32(&mut self) -> Result<f32> {
102        Ok(BigEndian::read_f32(self.read_bytes(4)?))
103    }
104
105    pub fn read_f64(&mut self) -> Result<f64> {
106        Ok(BigEndian::read_f64(self.read_bytes(8)?))
107    }
108
109    /// Decodes a modified UTF-8 string.
110    /// Length is the amount of bytes the String was encoded in.
111    /// The length used here may differ from the count of all chars.
112    pub fn read_str(&mut self, length: usize) -> Result<String> {
113        let mut out = String::with_capacity(length);
114
115        let mut i = length;
116        while i > 0 {
117            // read first byte
118            let r1 = u32::from(self.read_u8()?);
119            let ch = if r1 != 0 && r1 < 0x80 {
120                // single byte
121                i -= 1;
122                r1
123            } else if r1 >= 0xC0 && r1 < 0xE0 && i >= 2 {
124                // 2 bytes
125                i -= 2;
126                let r2 = u32::from(self.read_u8()?);
127                (r1 & 0x1F) << 6 | (r2 & 0x3F)
128            } else if r1 >= 0xE0 && r1 < 0xF0 && i >= 3 {
129                i -= 3;
130                let r2 = u32::from(self.read_u8()?);
131                let r3 = u32::from(self.read_u8()?);
132                if r1 == 0xED && r2 >= 0xA0 && r2 <= 0xAF {
133                    if i >= 3 {
134                        i -= 3;
135
136                        self.read_u8()?;
137                        let r5 = u32::from(self.read_u8()?);
138                        let r6 = u32::from(self.read_u8()?);
139                        // r1 and r4 can be ignored
140                        0x1_0000 + ((r2 & 0x0F) << 16) + ((r3 & 0x3F) << 10) + ((r5 & 0x0F) << 6)
141                            + (r6 & 0x3F)
142                    } else {
143                        return Err(Error::InvalidUTF8);
144                    }
145                } else {
146                    ((r1 & 0x0F) << 12) + ((r2 & 0x3F) << 6) + (r3 & 0x3F)
147                }
148            } else {
149                // this is not a valid utf8 scalar value
150                return Err(Error::InvalidUTF8);
151            };
152
153            // convert the u32 to a char and push it to the output string
154            let ch = char::from_u32(ch).ok_or(Error::InvalidUTF8)?;
155            out.push(ch);
156        }
157
158        Ok(out)
159    }
160
161    /// Checks for bounds
162    fn check(&self, location: usize) -> Result<()> {
163        if location <= self.limit {
164            Ok(())
165        } else {
166            Err(Error::LimitExceeded)
167        }
168    }
169}
170
171/// **Very** important tests (yes)
172#[cfg(test)]
173mod test {
174    macro_rules! test_values {
175        ( $func:ident { $( $input:expr => $expected:expr ),* } ) => {
176            #[test]
177            fn $func() {
178                $(
179                    let mut cursor = 0;
180                    let mut decoder = super::Decoder::new(&$input, &mut cursor);
181                    for e in &$expected {
182                        assert_eq!(decoder.$func().unwrap(), *e);
183                    }
184                )*
185            }
186        };
187        ( $func:ident { $( $input:expr => $expected:expr,) + } ) => {
188            test_values!{$func { $($input => $expected),+ }}
189        };
190    }
191
192    test_values!{read_u8 {
193        [0] => [0],
194        [5] => [5],
195        [0xFF] => [0xFF],
196        [0xAA, 0xBB, 0xCC] => [0xAA, 0xBB, 0xCC],
197    }}
198
199    test_values!{read_u16 {
200        [0x00, 0xFE] => [0x00FE],
201        [0xAA, 0xBB] => [0xAABB],
202        [0xFF, 0xFF, 0xCA, 0xFE] => [0xFFFF, 0xCAFE],
203    }}
204
205    test_values!{read_u32 {
206        [0x00, 0xFE, 0x00, 0xAB] => [0x00FE00AB],
207        [0xCA, 0xFE, 0xBA, 0xBE] => [0xCAFEBABE],
208        [0x00, 0x00, 0x00, 0x00] => [0x00000000],
209        [0xFF, 0xFF, 0xFF, 0xFF] => [0xFFFFFFFF],
210    }}
211
212    test_values!{read_u64 {
213        [0x00, 0xFE, 0x00, 0xAB, 0xCD, 0x00, 0xEF, 0x00] => [0x00FE00AB_CD00EF00],
214        [0xCA, 0xFE, 0xBA, 0xBE, 0xCA, 0xFE, 0xD0, 0x0D] => [0xCAFEBABE_CAFED00D],
215        [0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00] => [0x00000000_00000000],
216        [0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF] => [0xFFFFFFFF_FFFFFFFF],
217    }}
218
219    test_values!{read_i8 {
220        [0] => [0],
221        [5] => [5],
222        [0xFF] => [-1],
223        [0x80] => [-128],
224    }}
225
226    test_values!{read_i16 {
227        [0x00, 0xFE] => [0x00FE],
228        [0xF5, 0x45] => [-0x0ABB],
229        [0x80, 0xFF, 0xCA, 0xFE] => [-0x7F01, -0x3502],
230    }}
231
232    test_values!{read_i32 {
233        [0xFF, 0x01, 0xFF, 0x55] => [-0x00FE00AB],
234        [0xCA, 0xFE, 0xBA, 0xBE] => [-0x35014542],
235        [0xFF, 0xFF, 0xFF, 0xFF] => [-0x00000001],
236        [0x00, 0x00, 0x00, 0x00] => [ 0x00000000],
237    }}
238
239    test_values!{read_i64 {
240        [0xA0, 0xFE, 0x00, 0xAB, 0xCD, 0x00, 0xEF, 0x00] => [-0x5F01FF54_32FF1100],
241        [0xA5, 0x01, 0x45, 0x41, 0x35, 0x01, 0x2F, 0xF3] => [-0x5AFEBABE_CAFED00D],
242        [0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00] => [0x00000000_00000000],
243        [0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF] => [0x7FFFFFFF_FFFFFFFF],
244    }}
245
246    #[test]
247    fn read_str() {
248        let mut cursor = 0;
249        let mut decoder = super::Decoder::new("Hello, world!".as_bytes(), &mut cursor);
250        assert_eq!(decoder.read_str(13).unwrap(), "Hello, world!".to_owned());
251        // not complete, add more later.
252    }
253
254}