pallas_codec/flat/encode/
encoder.rs

1use super::Encode;
2use super::Error;
3use crate::flat::zigzag::ZigZag;
4
5#[cfg(feature = "num-bigint")]
6use num_bigint::{BigInt, BigUint};
7
8pub struct Encoder {
9    pub buffer: Vec<u8>,
10    // Int
11    used_bits: i64,
12    // Int
13    current_byte: u8,
14}
15
16impl Default for Encoder {
17    fn default() -> Self {
18        Self::new()
19    }
20}
21
22impl Encoder {
23    pub fn new() -> Encoder {
24        Encoder {
25            buffer: Vec::new(),
26            used_bits: 0,
27            current_byte: 0,
28        }
29    }
30
31    /// Encode any type that implements [`Encode`].
32    pub fn encode<T: Encode>(&mut self, x: T) -> Result<&mut Self, Error> {
33        x.encode(self)?;
34
35        Ok(self)
36    }
37
38    /// Encode 1 unsigned byte.
39    /// Uses the next 8 bits in the buffer, can be byte aligned or byte
40    /// unaligned
41    pub fn u8(&mut self, x: u8) -> Result<&mut Self, Error> {
42        if self.used_bits == 0 {
43            self.current_byte = x;
44            self.next_word();
45        } else {
46            self.byte_unaligned(x);
47        }
48
49        Ok(self)
50    }
51
52    /// Encode a `bool` value. This is byte alignment agnostic.
53    /// Uses the next unused bit in the current byte to encode this information.
54    /// One for true and Zero for false
55    pub fn bool(&mut self, x: bool) -> &mut Self {
56        if x {
57            self.one();
58        } else {
59            self.zero();
60        }
61
62        self
63    }
64
65    /// Encode a byte array.
66    /// Uses filler to byte align the buffer, then writes byte array length up
67    /// to 255. Following that it writes the next 255 bytes from the array.
68    /// We repeat writing length up to 255 and the next 255 bytes until we reach
69    /// the end of the byte array. After reaching the end of the byte array
70    /// we write a 0 byte. Only write 0 byte if the byte array is empty.
71    pub fn bytes(&mut self, x: &[u8]) -> Result<&mut Self, Error> {
72        // use filler to write current buffer so bits used gets reset
73        self.filler();
74
75        self.byte_array(x)
76    }
77
78    /// Encode a byte array in a byte aligned buffer. Throws exception if any
79    /// bits for the current byte were used. Writes byte array length up to
80    /// 255 Following that it writes the next 255 bytes from the array.
81    /// We repeat writing length up to 255 and the next 255 bytes until we reach
82    /// the end of the byte array. After reaching the end of the buffer we
83    /// write a 0 byte. Only write 0 if the byte array is empty.
84    pub fn byte_array(&mut self, arr: &[u8]) -> Result<&mut Self, Error> {
85        if self.used_bits != 0 {
86            return Err(Error::BufferNotByteAligned);
87        }
88
89        self.write_blk(arr);
90
91        Ok(self)
92    }
93
94    /// Encode an isize integer.
95    ///
96    /// This is byte alignment agnostic.
97    /// First we use zigzag once to double the number and encode the negative
98    /// sign as the least significant bit. Next we encode the 7 least
99    /// significant bits of the unsigned integer. If the number is greater than
100    /// 127 we encode a leading 1 followed by repeating the encoding above for
101    /// the next 7 bits and so on.
102    pub fn integer(&mut self, i: isize) -> &mut Self {
103        self.word(i.zigzag());
104        self
105    }
106
107    /// Encode an arbitrarily sized integer.
108    ///
109    /// This is byte alignment agnostic.
110    /// First we use zigzag once to double the number and encode the negative
111    /// sign as the least significant bit. Next we encode the 7 least
112    /// significant bits of the unsigned integer. If the number is greater than
113    /// 127 we encode a leading 1 followed by repeating the encoding above for
114    /// the next 7 bits and so on.
115    #[cfg(feature = "num-bigint")]
116    pub fn big_integer(&mut self, i: BigInt) -> &mut Self {
117        self.big_word(i.zigzag());
118        self
119    }
120
121    /// Encode a char of 32 bits.
122    /// This is byte alignment agnostic.
123    /// We encode the 7 least significant bits of the unsigned byte. If the char
124    /// value is greater than 127 we encode a leading 1 followed by
125    /// repeating the above for the next 7 bits and so on.
126    pub fn char(&mut self, c: char) -> &mut Self {
127        self.word(c as usize);
128
129        self
130    }
131
132    // TODO: Do we need this?
133    pub fn string(&mut self, s: &str) -> &mut Self {
134        for i in s.chars() {
135            self.one();
136            self.char(i);
137        }
138
139        self.zero();
140
141        self
142    }
143
144    /// Encode a string.
145    /// Convert to byte array and then use byte array encoding.
146    /// Uses filler to byte align the buffer, then writes byte array length up
147    /// to 255. Following that it writes the next 255 bytes from the array.
148    /// After reaching the end of the buffer we write a 0 byte. Only write 0
149    /// byte if the byte array is empty.
150    pub fn utf8(&mut self, s: &str) -> Result<&mut Self, Error> {
151        self.bytes(s.as_bytes())
152    }
153
154    /// Encode a unsigned integer of any size.
155    /// This is byte alignment agnostic.
156    /// We encode the 7 least significant bits of the unsigned byte. If the char
157    /// value is greater than 127 we encode a leading 1 followed by
158    /// repeating the above for the next 7 bits and so on.
159    pub fn word(&mut self, c: usize) -> &mut Self {
160        let mut d = c;
161        loop {
162            let mut w = (d & 127) as u8;
163            d >>= 7;
164
165            if d != 0 {
166                w |= 128;
167            }
168            self.bits(8, w);
169
170            if d == 0 {
171                break;
172            }
173        }
174
175        self
176    }
177
178    /// Encode a unsigned integer of 128 bits size.
179    /// This is byte alignment agnostic.
180    /// We encode the 7 least significant bits of the unsigned byte. If the char
181    /// value is greater than 127 we encode a leading 1 followed by
182    /// repeating the above for the next 7 bits and so on.
183    #[cfg(feature = "num-bigint")]
184    pub fn big_word(&mut self, c: BigUint) -> &mut Self {
185        let mut d = c;
186        let zero = (0_u8).into();
187        loop {
188            let m: usize = 127;
189            let mut w = (d.clone() & <usize as Into<BigUint>>::into(m))
190                .to_bytes_be()
191                .pop()
192                .unwrap();
193
194            d >>= 7;
195
196            if d != zero {
197                w |= 128;
198            }
199            self.bits(8, w);
200
201            if d == zero {
202                break;
203            }
204        }
205
206        self
207    }
208
209    /// Encode a list of bytes with a function
210    /// This is byte alignment agnostic.
211    /// If there are bytes in a list then write 1 bit followed by the functions
212    /// encoding. After the last item write a 0 bit. If the list is empty
213    /// only encode a 0 bit.
214    pub fn encode_list_with<T>(
215        &mut self,
216        list: &[T],
217        encoder_func: for<'r> fn(&T, &'r mut Encoder) -> Result<(), Error>,
218    ) -> Result<&mut Self, Error> {
219        for item in list {
220            self.one();
221            encoder_func(item, self)?;
222        }
223
224        self.zero();
225
226        Ok(self)
227    }
228
229    /// Encodes up to 8 bits of information and is byte alignment agnostic.
230    /// Uses unused bits in the current byte to write out the passed in byte
231    /// value. Overflows to the most significant digits of the next byte if
232    /// number of bits to use is greater than unused bits. Expects that
233    /// number of bits to use is greater than or equal to required bits by the
234    /// value. The param num_bits is i64 to match unused_bits type.
235    pub fn bits(&mut self, num_bits: i64, val: u8) -> &mut Self {
236        match (num_bits, val) {
237            (1, 0) => self.zero(),
238            (1, 1) => self.one(),
239            (2, 0) => {
240                self.zero();
241                self.zero();
242            }
243            (2, 1) => {
244                self.zero();
245                self.one();
246            }
247            (2, 2) => {
248                self.one();
249                self.zero();
250            }
251            (2, 3) => {
252                self.one();
253                self.one();
254            }
255            (_, _) => {
256                self.used_bits += num_bits;
257                let unused_bits = 8 - self.used_bits;
258                match unused_bits {
259                    0 => {
260                        self.current_byte |= val;
261                        self.next_word();
262                    }
263                    x if x > 0 => {
264                        self.current_byte |= val << x;
265                    }
266                    x => {
267                        let used = -x;
268                        self.current_byte |= val >> used;
269                        self.next_word();
270                        self.current_byte = val << (8 - used);
271                        self.used_bits = used;
272                    }
273                }
274            }
275        }
276
277        self
278    }
279
280    /// A filler amount of end 0's followed by a 1 at the end of a byte.
281    /// Used to byte align the buffer by padding out the rest of the byte.
282    pub(crate) fn filler(&mut self) -> &mut Self {
283        self.current_byte |= 1;
284        self.next_word();
285
286        self
287    }
288
289    /// Write a 0 bit into the current byte.
290    /// Write out to buffer if last used bit in the current byte.
291    fn zero(&mut self) {
292        if self.used_bits == 7 {
293            self.next_word();
294        } else {
295            self.used_bits += 1;
296        }
297    }
298
299    /// Write a 1 bit into the current byte.
300    /// Write out to buffer if last used bit in the current byte.
301    fn one(&mut self) {
302        if self.used_bits == 7 {
303            self.current_byte |= 1;
304            self.next_word();
305        } else {
306            self.current_byte |= 128 >> self.used_bits;
307            self.used_bits += 1;
308        }
309    }
310    /// Write out byte regardless of current buffer alignment.
311    /// Write most significant bits in remaining unused bits for the current
312    /// byte, then write out the remaining bits at the beginning of the next
313    /// byte.
314    fn byte_unaligned(&mut self, x: u8) {
315        let x_shift = self.current_byte | (x >> self.used_bits);
316        self.buffer.push(x_shift);
317
318        self.current_byte = x << (8 - self.used_bits);
319    }
320
321    /// Write the current byte out to the buffer and begin next byte to write
322    /// out. Add current byte to the buffer and set current byte and used
323    /// bits to 0.
324    fn next_word(&mut self) {
325        self.buffer.push(self.current_byte);
326
327        self.current_byte = 0;
328        self.used_bits = 0;
329    }
330
331    /// Writes byte array length up to 255
332    /// Following that it writes the next 255 bytes from the array.
333    /// After reaching the end of the buffer we write a 0 byte. Only write 0 if
334    /// the byte array is empty. This is byte alignment agnostic.
335    fn write_blk(&mut self, arr: &[u8]) {
336        let chunks = arr.chunks(255);
337
338        for chunk in chunks {
339            self.buffer.push(chunk.len() as u8);
340            self.buffer.extend(chunk);
341        }
342        self.buffer.push(0_u8);
343    }
344}