twine_data/
ser.rs

1//! Encoder.
2//!
3//! The encoder is used to write a complex DAG, in the twine format, into
4//! a byte stream.
5
6use std::io;
7
8use crate::{
9    types::{Offset, Tag, VariantIdx},
10    Immediate,
11};
12
13/// Encode `n` as LEB128 into `buf`, returns how many bytes were used.
14///
15/// Requires `buf` to have at least 10 bytes of capacity.
16#[allow(dead_code)]
17pub(crate) fn enc_leb128(mut n: u64, buf: &mut [u8]) -> usize {
18    assert!(buf.len() >= 10);
19
20    let mut count = 0;
21    loop {
22        let c = (n & 0b0111_1111) as u8;
23
24        if c as u64 == n {
25            buf[count] = c;
26            count += 1;
27            return count;
28        } else {
29            buf[count] = c | 0b1000_0000;
30            count += 1;
31            n = n >> 7;
32        }
33    }
34}
35
36pub struct Encoder<W: io::Write> {
37    w: W,
38    offset: Offset,
39}
40
41pub type Result<T> = std::result::Result<T, io::Error>;
42
43impl<W: io::Write> Encoder<W> {
44    /// Create an encoder.
45    pub fn new(w: W) -> Self {
46        Encoder { w, offset: 0 }
47    }
48
49    /// Write the tag and small integer.
50    #[inline(always)]
51    fn first_byte(&mut self, high: u8, low: u8) -> Result<Offset> {
52        self.w.write(&[(high << 4) | low])?;
53        let off = self.offset;
54        self.offset += 1;
55        Ok(off)
56    }
57
58    /// Write the tag `high` and an integer `n`.
59    fn first_byte_and_u64(&mut self, high: u8, n: u64) -> Result<Offset> {
60        if n < 15 {
61            return self.first_byte(high, n as u8);
62        }
63
64        let mut buf = [0u8; 11];
65        buf[0] = (high << 4) | 15;
66        let len = enc_leb128(n - 15, &mut buf[1..]);
67        self.w.write(&buf[0..len + 1])?;
68
69        let off = self.offset;
70        self.offset += len as u64 + 1;
71        Ok(off)
72    }
73
74    #[inline(always)]
75    pub fn write_null(&mut self) -> Result<Offset> {
76        return self.first_byte(0, 2);
77    }
78
79    #[inline(always)]
80    pub fn write_bool(&mut self, b: bool) -> Result<Offset> {
81        return self.first_byte(0, b as u8);
82    }
83
84    /// Write an integer.
85    pub fn write_i64(&mut self, n: i64) -> Result<Offset> {
86        if n < 0 {
87            let n = ((-n) - 1) as u64;
88            self.first_byte_and_u64(2, n)
89        } else {
90            self.first_byte_and_u64(1, n as u64)
91        }
92    }
93
94    pub fn write_ref(&mut self, p: Offset) -> Result<Offset> {
95        let off = self.offset;
96        debug_assert!(off > p); // can only point to previous values.
97
98        // compute relative offset to `p`.
99        let n = off - p - 1;
100        self.first_byte_and_u64(14, n)
101    }
102
103    pub fn write_pointer(&mut self, p: Offset) -> Result<Offset> {
104        let off = self.offset;
105        debug_assert!(off > p); // can only point to previous values.
106
107        // compute relative offset to `p`.
108        let n = off - p - 1;
109        self.first_byte_and_u64(15, n)
110    }
111
112    pub fn write_f32(&mut self, f: f32) -> Result<Offset> {
113        let bytes = f32::to_le_bytes(f);
114        let off = self.first_byte(3, 0)?;
115        self.w.write(&bytes)?;
116        self.offset += bytes.len() as u64;
117        Ok(off)
118    }
119
120    pub fn write_f64(&mut self, f: f64) -> Result<Offset> {
121        let bytes = f64::to_le_bytes(f);
122        let off = self.first_byte(3, 1)?;
123        self.w.write(&bytes)?;
124        self.offset += bytes.len() as u64;
125        Ok(off)
126    }
127
128    /// Write a unicode string.
129    pub fn write_string(&mut self, s: &str) -> Result<Offset> {
130        let len = s.len() as u64;
131        let off = self.first_byte_and_u64(4, len)?;
132        self.w.write(s.as_bytes())?;
133        self.offset += len;
134        Ok(off)
135    }
136
137    /// Write a binary blob.
138    pub fn write_bytes(&mut self, b: &[u8]) -> Result<Offset> {
139        let len = b.len() as u64;
140        let off = self.first_byte_and_u64(5, len)?;
141        self.w.write(b)?;
142        self.offset += len;
143        Ok(off)
144    }
145
146    /// Write a nullary variant.
147    #[inline(always)]
148    pub fn write_variant0(&mut self, c: VariantIdx) -> Result<Offset> {
149        self.first_byte_and_u64(10, c.0 as u64)
150    }
151
152    /// Write an immediate value.
153    #[must_use]
154    pub fn write_immediate(&mut self, imm: Immediate) -> Result<Offset> {
155        match imm {
156            Immediate::Null => self.write_null(),
157            Immediate::Bool(b) => self.write_bool(b),
158            Immediate::Int64(i) => self.write_i64(i),
159            Immediate::Float(f) => self.write_f64(f),
160            Immediate::String(s) => self.write_string(s),
161            Immediate::Bytes(b) => self.write_bytes(b),
162            Immediate::Variant0(c) => self.write_variant0(c),
163            Immediate::Ref(p) => self.write_ref(p),
164            Immediate::Pointer(p) => self.write_pointer(p),
165        }
166    }
167
168    /// Write the immediate; but if it's a pointer, return the pointer
169    /// without writing a thing.
170    #[inline(always)]
171    #[must_use]
172    pub fn write_immediate_or_return_pointer(&mut self, imm: Immediate) -> Result<Offset> {
173        match imm {
174            Immediate::Pointer(p) => Ok(p),
175            _ => self.write_immediate(imm),
176        }
177    }
178
179    pub fn write_tag(&mut self, tag: Tag, v: Immediate) -> Result<Offset> {
180        let off = self.first_byte_and_u64(8, tag as u64)?;
181        let _ = self.write_immediate(v)?;
182        Ok(off)
183    }
184
185    /// Write an array.
186    ///
187    /// The values in the array must be converted to immediates already,
188    /// possibly by way of writing them first and making pointers to
189    /// their written representation.
190    pub fn write_array(&mut self, arr: &[Immediate]) -> Result<Offset> {
191        let off = self.first_byte_and_u64(6, arr.len() as u64)?;
192        for v in arr {
193            let _ = self.write_immediate(*v)?;
194        }
195        Ok(off)
196    }
197
198    /// Write a map. Keys and values must already be encoded into immediates.
199    pub fn write_map(&mut self, map: &[(Immediate, Immediate)]) -> Result<Offset> {
200        let off = self.first_byte_and_u64(7, map.len() as u64)?;
201        for (k, v) in map {
202            let _ = self.write_immediate(*k)?;
203            let _ = self.write_immediate(*v)?;
204        }
205        Ok(off)
206    }
207
208    /// Write a variant `c` with arguments `args`.
209    pub fn write_variant(
210        &mut self,
211        c: VariantIdx,
212        args: &[Immediate],
213    ) -> Result<Immediate<'static>> {
214        match args.len() {
215            0 => Ok(Immediate::Variant0(c)),
216            1 => {
217                let off = self.first_byte_and_u64(11, c.0 as u64)?;
218                let _ = self.write_immediate(args[0])?;
219                Ok(Immediate::Pointer(off))
220            }
221            _ => {
222                let off = self.first_byte_and_u64(12, c.0 as u64)?;
223
224                // now write number of arguments as LEB128
225                let mut buf_len = [0u8; 10];
226                let len_of_len = enc_leb128(args.len() as u64, &mut buf_len[..]);
227                self.w.write(&buf_len[0..len_of_len])?;
228                self.offset += len_of_len as u64;
229
230                for a in args {
231                    let _ = self.write_immediate(*a)?;
232                }
233                Ok(Immediate::Pointer(off))
234            }
235        }
236    }
237
238    /// Write the postfix to point to `entrypoint`, and consume the encoder.
239    pub fn finalize(mut self, entrypoint: Immediate) -> Result<()> {
240        // first, write the entrypoint.
241        let entrypoint = self.write_immediate_or_return_pointer(entrypoint)?;
242
243        let mut top = self.offset;
244        debug_assert!(top > entrypoint);
245        let mut delta = top - entrypoint - 1;
246        // if delta is too big (ie if entrypoint is a large value, too large to fit
247        // in a `u8` pointer), we go through an intermediate `Immediate::Pointer` step.
248        if delta > 250 {
249            let ptr_to_entry = self.write_pointer(entrypoint)?;
250
251            // recompute delta. Writing the pointer should take at most 11 bytes,
252            // so this time the delta will fit in a single byte.
253            top = self.offset;
254            delta = top - ptr_to_entry - 1;
255        }
256
257        debug_assert!(delta <= 250);
258        self.w.write(&[delta as u8])?;
259        self.offset += 1;
260
261        Ok(())
262    }
263}
264
265#[cfg(test)]
266mod tests {
267
268    use super::*;
269    use proptest::prelude::*;
270
271    #[test]
272    fn test_enc_leb128() {
273        let mut buf = [0u8; 16];
274        {
275            let n = enc_leb128(42, &mut buf);
276            assert_eq!(1, n);
277            assert_eq!(Some(42), leb128::read::unsigned(&mut &buf[..]).ok());
278        }
279        {
280            let n = enc_leb128(329282522, &mut buf);
281            assert_eq!(5, n);
282            assert_eq!(Some(329282522), leb128::read::unsigned(&mut &buf[..]).ok());
283        }
284        {
285            let n = enc_leb128(u64::MAX, &mut buf);
286            assert_eq!(10, n);
287            dbg!(&buf);
288            assert_eq!(Some(u64::MAX), leb128::read::unsigned(&mut &buf[..]).ok());
289        }
290    }
291
292    proptest! {
293        #[test]
294        fn same_as_leb128_crate(n: u64){
295            let mut ours = [0u8; 12];
296            let ours_len = enc_leb128(n, &mut ours[..]);
297
298            let mut ref_v = vec![];
299            let _ref_len = leb128::write::unsigned( &mut ref_v,n).unwrap();
300            assert_eq!(&ref_v, &ours[0..ours_len])
301        }
302    }
303
304    #[test]
305    fn test_ref() {
306        use crate::value::Value as V;
307        use crate::Decoder;
308
309        let mut res: Vec<u8> = vec![];
310        let mut enc = Encoder::new(&mut res);
311
312        let off1 = enc.write_string("hello").unwrap();
313        let off2 = enc.write_string("world").unwrap();
314        let v = V::Array(vec![V::Ref(off1), V::Ref(off2)]);
315        let off_v = crate::value::write_value(&mut enc, &v).unwrap();
316        assert_eq!(
317            &[69u8, 104, 101, 108, 108, 111, 69, 119, 111, 114, 108, 100, 98, 236, 231][..]
318                as &[u8],
319            res.as_slice()
320        );
321
322        let dec = Decoder::new(&res).unwrap();
323        let v2 = crate::value::read_value(&dec, off_v).unwrap();
324        assert_eq!(v, v2);
325    }
326}