twine_data/
deser.rs

1//! Deserialization.
2//!
3//! Reading data from `twine` is done by offset.
4
5use crate::shallow_value::{ArrayCursor, MapCursor};
6
7pub use super::shallow_value::ShallowValue;
8use super::types::*;
9
10/// A decoder for a twine blob.
11#[derive(Clone)]
12pub struct Decoder<'a> {
13    bs: &'a [u8],
14}
15
16impl<'a> std::fmt::Debug for Decoder<'a> {
17    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
18        write!(f, "Decoder {{bs: {} bytes}}", self.bs.len())
19    }
20}
21
22impl<'a> Decoder<'a> {
23    /// Create a new decoder reading from these bytes.
24    pub fn new(bs: &'a [u8]) -> Result<Self> {
25        if bs.len() > u32::MAX as usize {
26            return Err(Error {
27                msg: "byte buffer is too long",
28                off: 0,
29            });
30        }
31        Ok(Self { bs })
32    }
33
34    /// Read (high, low) nibbles at the given offset.
35    #[inline]
36    pub(crate) fn first_byte(&self, off: Offset) -> (u8, u8) {
37        let c = self.bs[off as usize];
38        let high = c >> 4;
39        let low = c & 0xf;
40        (high, low)
41    }
42
43    /// read an integer in LEB128
44    fn leb128(&self, mut off: Offset) -> Result<(u64, u8)> {
45        let mut res: u64 = 0;
46        let mut shift = 0;
47        let mut n_bytes = 0;
48
49        loop {
50            n_bytes += 1;
51            let c = self.bs[off as usize];
52            off += 1;
53            let cur = c & 0x7f;
54            res = res | ((cur as u64) << shift);
55
56            if cur == c {
57                // last byte
58                return Ok((res, n_bytes));
59            } else {
60                shift += 7;
61                if shift >= 64 {
62                    return Err(Error {
63                        msg: "out of bound for LEB128",
64                        off,
65                    });
66                }
67            }
68        }
69    }
70
71    pub(crate) fn u64_with_low(&self, off: Offset, low: u8) -> Result<(u64, Offset)> {
72        if low < 15 {
73            return Ok((low as u64, 0));
74        }
75        let (rest, consumed) = self.leb128(off + 1)?;
76        Ok((rest + 15, consumed as Offset))
77    }
78
79    /// Dereference the offset.
80    ///
81    /// If the value at this offset is a pointer, follow the pointer;
82    /// repeat until it's not. This is done implicitly by most
83    /// other functions in this module, but it can be useful to do it
84    /// by hand in case there is caching done on decoding (eg. to memoize the
85    /// value decoded at a particular offset, it's better to dereference
86    /// the offset first).
87    pub fn deref(&self, mut off: Offset) -> Result<Offset> {
88        loop {
89            let (high, low) = self.first_byte(off);
90            if high == 15 {
91                let (p, _) = self.u64_with_low(off, low)?;
92                // checked sub
93                off = off.checked_sub(p as Offset + 1).ok_or_else(|| Error {
94                    msg: "pointer underflow",
95                    off,
96                })?;
97            } else {
98                return Ok(off);
99            }
100        }
101    }
102
103    fn i64_pos(&'_ self, off: Offset, low: u8) -> Result<i64> {
104        let (x, _) = self.u64_with_low(off, low)?;
105        if x > i64::MAX as u64 {
106            return Err(Error {
107                msg: "i64 overflow",
108                off,
109            });
110        }
111        Ok(x as i64)
112    }
113
114    fn i64_neg(&'_ self, off: Offset, low: u8) -> Result<i64> {
115        let (x, _) = self.u64_with_low(off, low)?;
116        if x > i64::MAX as u64 {
117            return Err(Error {
118                msg: "i64 overflow",
119                off,
120            });
121        }
122        Ok(-(x as i64) - 1)
123    }
124
125    fn str(&'_ self, mut off: Offset, low: u8) -> Result<&'a str> {
126        let (len, n_bytes) = self.u64_with_low(off, low)?;
127        off = off + 1 + n_bytes;
128        std::str::from_utf8(&self.bs[off as usize..(off as usize + len as usize)]).map_err(|_| {
129            Error {
130                msg: "overflow in string",
131                off,
132            }
133        })
134    }
135
136    fn bytes(&'_ self, mut off: Offset, low: u8) -> Result<&'a [u8]> {
137        let (len, n_bytes) = self.u64_with_low(off, low)?;
138        off = off + 1 + n_bytes;
139        Ok(&self.bs[off as usize..(off as usize + len as usize)])
140    }
141
142    fn float(&'_ self, off: Offset, low: u8) -> Result<f64> {
143        let off1 = (off + 1) as usize;
144        if low == 0 {
145            let arr: [u8; 4] = self.bs[off1..off1 + 4].try_into().unwrap();
146            let u: u32 = u32::from_le_bytes(arr);
147            let f = f32::from_bits(u);
148            Ok(f as f64)
149        } else if low == 1 {
150            let arr: [u8; 8] = self.bs[off1..off1 + 8].try_into().unwrap();
151            let u: u64 = u64::from_le_bytes(arr);
152            let f = f64::from_bits(u);
153            Ok(f)
154        } else {
155            Err(Error {
156                msg: "expected float",
157                off,
158            })
159        }
160    }
161
162    fn tag(&'_ self, mut off: Offset, low: u8) -> Result<(Tag, Offset)> {
163        let (tag, n_bytes) = self.u64_with_low(off, low)?;
164        off = off + 1 + n_bytes;
165        Ok((tag as Tag, off))
166    }
167
168    fn array_cursor(&'_ self, mut off: Offset, low: u8) -> Result<ArrayCursor<'a>> {
169        let (len, n_bytes) = self.u64_with_low(off, low)?;
170        if len > u32::MAX as u64 {
171            return Err(Error {
172                msg: "Size overflow for array",
173                off,
174            });
175        }
176        off = off + 1 + n_bytes;
177        let dec = self.clone();
178        Ok(ArrayCursor {
179            dec,
180            off,
181            n_items: len as u32,
182        })
183    }
184
185    fn map_cursor(&'_ self, mut off: Offset, low: u8) -> Result<MapCursor<'a>> {
186        let (len, n_bytes) = self.u64_with_low(off, low)?;
187        if len > u32::MAX as u64 {
188            return Err(Error {
189                msg: "Size overflow for dict",
190                off,
191            });
192        }
193        off = off + 1 + n_bytes;
194        let dec = self.clone();
195        Ok(MapCursor {
196            dec,
197            off,
198            n_items: len as u32,
199        })
200    }
201
202    fn variant(
203        &'_ self,
204        mut off: Offset,
205        high: u8,
206        low: u8,
207    ) -> Result<(VariantIdx, ArrayCursor<'a>)> {
208        macro_rules! mk_variant {
209            ($idx: expr) => {{
210                if $idx > u32::MAX as u64 {
211                    return Err(Error {
212                        msg: "variant overflow",
213                        off,
214                    });
215                }
216                VariantIdx($idx as u32)
217            }};
218        }
219
220        let dec = self.clone();
221        if high == 10 {
222            let (idx, _) = self.u64_with_low(off, low)?;
223            Ok((
224                mk_variant!(idx),
225                ArrayCursor {
226                    dec,
227                    off,
228                    n_items: 0,
229                },
230            ))
231        } else if high == 11 {
232            let (idx, n_bytes_idx) = self.u64_with_low(off, low)?;
233            let arr = ArrayCursor {
234                dec,
235                off: off + 1 + n_bytes_idx,
236                n_items: 1,
237            };
238            Ok((mk_variant!(idx), arr))
239        } else if high == 12 {
240            let (idx, n_bytes_idx) = self.u64_with_low(off, low)?;
241            off = off + 1 + n_bytes_idx;
242            let (n_items, n_bytes_n_items) = self.leb128(off)?;
243            if n_items > u32::MAX as u64 {
244                return Err(Error {
245                    msg: "overflow in variant arguments",
246                    off,
247                });
248            }
249            let n_items = n_items as u32;
250
251            off = off + n_bytes_n_items as Offset;
252            let arr = ArrayCursor { off, n_items, dec };
253            Ok((mk_variant!(idx), arr))
254        } else {
255            Err(Error {
256                msg: "expected variant",
257                off,
258            })
259        }
260    }
261
262    /// Skip an immediate value, return offset of next value.
263    pub(crate) fn skip(&self, off: Offset) -> Result<Offset> {
264        let (high, low) = self.first_byte(off);
265        let off: Offset = match high {
266            0 => off + 1,
267            1 | 2 => {
268                let (_, n_bytes) = self.u64_with_low(off, low)?;
269                off + 1 + n_bytes
270            }
271            3 => {
272                if low == 0 {
273                    off + 5
274                } else {
275                    off + 9
276                }
277            }
278            4 | 5 => {
279                let (len, n_bytes) = self.u64_with_low(off, low)?;
280                if len > u32::MAX as u64 {
281                    return Err(Error {
282                        msg: "length overflow",
283                        off,
284                    });
285                }
286                off + 1 + n_bytes + len as Offset
287            }
288            6 | 7 | 8 => {
289                return Err(Error {
290                    msg: "cannot skip over array/dict/tag",
291                    off,
292                })
293            }
294
295            9 | 13 => {
296                return Err(Error {
297                    msg: "tag is reserved",
298                    off,
299                })
300            }
301            10 => {
302                let (_, n_bytes) = self.u64_with_low(off, low)?;
303                off + 1 + n_bytes
304            }
305            11 | 12 => {
306                return Err(Error {
307                    msg: "cannot skip over variant",
308                    off,
309                })
310            }
311            14 | 15 => {
312                let (_, n_bytes) = self.u64_with_low(off, low)?;
313                off + 1 + n_bytes
314            }
315            _ => {
316                unreachable!()
317            }
318        };
319        Ok(off)
320    }
321
322    /// Read one value at the given offset.
323    ///
324    /// This does not recurse into subvalues, so it is fairly fast. The main way of
325    /// deserializing from twine is through this function.
326    /// If the value at `off` is a pointer, it is implicitly followed.
327    /// As a consequence, this never returns a `Immediate::Pointer` value.
328    pub fn get_shallow_value(&'_ self, mut off: Offset) -> Result<ShallowValue<'a>> {
329        use ShallowValue::*;
330
331        off = self.deref(off)?;
332        let (high, low) = self.first_byte(off);
333        let v: ShallowValue = match high {
334            0 => {
335                if low == 2 {
336                    Imm(Immediate::Null)
337                } else if low == 0 {
338                    Imm(Immediate::Bool(false))
339                } else if low == 1 {
340                    Imm(Immediate::Bool(true))
341                } else {
342                    return Err(Error {
343                        msg: "invalid value with high=0",
344                        off,
345                    });
346                }
347            }
348            1 => Imm(Immediate::Int64(self.i64_pos(off, low)?)),
349            2 => Imm(Immediate::Int64(self.i64_neg(off, low)?)),
350            3 => Imm(Immediate::Float(self.float(off, low)?)),
351            4 => Imm(Immediate::String(self.str(off, low)?)),
352            5 => Imm(Immediate::Bytes(self.bytes(off, low)?)),
353            6 => {
354                let arr = self.array_cursor(off, low)?;
355                Array(arr)
356            }
357            7 => {
358                let map = self.map_cursor(off, low)?;
359                Map(map)
360            }
361            8 => {
362                let (tag, off) = self.tag(off, low)?;
363                Tag(tag, off)
364            }
365            10 | 11 | 12 => {
366                let (variant_idx, args) = self.variant(off, high, low)?;
367                Variant(variant_idx, args)
368            }
369            14 => {
370                let (p, _) = self.u64_with_low(off, low)?;
371                // checked sub
372                let p = off.checked_sub(p as Offset + 1).ok_or_else(|| Error {
373                    msg: "ref underflow",
374                    off,
375                })?;
376                Imm(Immediate::Ref(p))
377            }
378            15 => unreachable!(), // we did deref!
379            _ => {
380                return Err(Error {
381                    msg: "invalid value",
382                    off,
383                })
384            }
385        };
386        Ok(v)
387    }
388
389    /// Get an integer.
390    pub fn get_i64(&self, off: Offset) -> Result<i64> {
391        match self.get_shallow_value(off)? {
392            ShallowValue::Imm(Immediate::Int64(i)) => Ok(i),
393            _ => Err(Error {
394                msg: "expected integer",
395                off,
396            }),
397        }
398    }
399
400    pub fn get_bool(&self, off: Offset) -> Result<bool> {
401        match self.get_shallow_value(off)? {
402            ShallowValue::Imm(Immediate::Bool(b)) => Ok(b),
403            _ => Err(Error {
404                msg: "expected bool",
405                off,
406            }),
407        }
408    }
409
410    pub fn get_null(&self, off: Offset) -> Result<()> {
411        match self.get_shallow_value(off)? {
412            ShallowValue::Imm(Immediate::Null) => Ok(()),
413            _ => Err(Error {
414                msg: "expected null",
415                off,
416            }),
417        }
418    }
419
420    pub fn get_float(&self, off: Offset) -> Result<f64> {
421        match self.get_shallow_value(off)? {
422            ShallowValue::Imm(Immediate::Float(f)) => Ok(f),
423            _ => Err(Error {
424                msg: "expected float",
425                off,
426            }),
427        }
428    }
429
430    pub fn get_str(&self, off: Offset) -> Result<&'a str> {
431        match self.get_shallow_value(off)? {
432            ShallowValue::Imm(Immediate::String(s)) => Ok(s),
433            _ => Err(Error {
434                msg: "expected string",
435                off,
436            }),
437        }
438    }
439
440    pub fn get_bytes(&self, off: Offset) -> Result<&'a [u8]> {
441        match self.get_shallow_value(off)? {
442            ShallowValue::Imm(Immediate::Bytes(s)) => Ok(s),
443            _ => Err(Error {
444                msg: "expected bytes",
445                off,
446            }),
447        }
448    }
449
450    /// Read an array of offsets into `res`
451    pub fn get_array(&self, off: Offset, res: &mut Vec<Offset>) -> Result<()> {
452        res.clear();
453        match self.get_shallow_value(off)? {
454            ShallowValue::Array(arr) => {
455                for off in arr {
456                    res.push(off?)
457                }
458                Ok(())
459            }
460            _ => Err(Error {
461                msg: "expected array",
462                off,
463            }),
464        }
465    }
466
467    /// Read a dictionary of offsets into `res`.
468    ///
469    /// `res` is cleared before reading.
470    pub fn get_dict(&self, off: Offset, res: &mut Vec<(Offset, Offset)>) -> Result<()> {
471        res.clear();
472        match self.get_shallow_value(off)? {
473            ShallowValue::Map(d) => {
474                for pair in d {
475                    let (k, v) = pair?;
476                    res.push((k, v))
477                }
478                Ok(())
479            }
480            _ => Err(Error {
481                msg: "expected dict",
482                off,
483            }),
484        }
485    }
486
487    /// Read a tagged value.
488    ///
489    /// The value itself is not read, only an offset to it is returned.
490    pub fn get_tag(&self, off: Offset) -> Result<(Tag, Offset)> {
491        match self.get_shallow_value(off)? {
492            ShallowValue::Tag(tag, off) => Ok((tag, off)),
493            _ => Err(Error {
494                msg: "expected tag",
495                off,
496            }),
497        }
498    }
499
500    /// Read a variant value. The variant index is returned,
501    /// and (the offsets of the) arguments are pushed into `args`.
502    ///
503    /// `args` is cleared first.
504    pub fn get_variant(&self, off: Offset, args: &mut Vec<Offset>) -> Result<VariantIdx> {
505        args.clear();
506
507        match self.get_shallow_value(off)? {
508            ShallowValue::Variant(variant_idx, c_args) => {
509                for off in c_args {
510                    args.push(off?)
511                }
512                Ok(variant_idx)
513            }
514            _ => Err(Error {
515                msg: "expected variant",
516                off,
517            }),
518        }
519    }
520
521    /// Find the entrypoint.
522    ///
523    /// A twine blob is terminated with a postfix (in essence, a pointer to the actual
524    /// toplevel value). This reads the postfix and returns the offset of the toplevel value.
525    pub fn entrypoint(&self) -> Result<Offset> {
526        let last = self.bs.len() as Offset - 1;
527        let off = last - self.bs[last as usize] as Offset - 1;
528        self.deref(off)
529    }
530}
531
532#[cfg(test)]
533mod tests {
534    use super::*;
535    use proptest::prelude::*;
536
537    #[test]
538    fn test_dec_leb128() {
539        {
540            let dec = Decoder::new(&[17]).unwrap();
541            assert_eq!(Some((17, 1)), dec.leb128(0).ok());
542        }
543
544        {
545            let dec = Decoder::new(&[0x88, 0x85, 0x09]).unwrap();
546            assert_eq!(Some(((9 << (7 + 7)) + (5 << 7) + 8, 3)), dec.leb128(0).ok());
547        }
548    }
549
550    proptest! {
551        #[test]
552        fn decode_from_leb128_crate(n: u64){
553            let mut ref_v = vec![];
554            let ref_len = leb128::write::unsigned( &mut ref_v,n).unwrap();
555
556            let dec = Decoder::new(&ref_v).unwrap();
557            let (n2, len) = dec.leb128(0).unwrap();
558            assert_eq!(n2, n);
559            assert_eq!(ref_len, len as usize);
560        }
561    }
562}