flat_rs/encode/
encoder.rs

1use crate::{encode::Encode, zigzag};
2
3use super::Error;
4
5pub struct Encoder {
6    pub buffer: Vec<u8>,
7    // Int
8    used_bits: i64,
9    // Int
10    current_byte: u8,
11}
12
13impl Default for Encoder {
14    fn default() -> Self {
15        Self::new()
16    }
17}
18
19impl Encoder {
20    pub fn new() -> Encoder {
21        Encoder {
22            buffer: Vec::new(),
23            used_bits: 0,
24            current_byte: 0,
25        }
26    }
27
28    /// Encode any type that implements [`Encode`].
29    pub fn encode<T: Encode>(&mut self, x: T) -> Result<&mut Self, Error> {
30        x.encode(self)?;
31
32        Ok(self)
33    }
34
35    /// Encode 1 unsigned byte.
36    /// Uses the next 8 bits in the buffer, can be byte aligned or byte unaligned
37    pub fn u8(&mut self, x: u8) -> Result<&mut Self, Error> {
38        if self.used_bits == 0 {
39            self.current_byte = x;
40            self.next_word();
41        } else {
42            self.byte_unaligned(x);
43        }
44
45        Ok(self)
46    }
47
48    /// Encode a `bool` value. This is byte alignment agnostic.
49    /// Uses the next unused bit in the current byte to encode this information.
50    /// One for true and Zero for false
51    pub fn bool(&mut self, x: bool) -> &mut Self {
52        if x {
53            self.one();
54        } else {
55            self.zero();
56        }
57
58        self
59    }
60
61    /// Encode a byte array.
62    /// Uses filler to byte align the buffer, then writes byte array length up to 255.
63    /// Following that it writes the next 255 bytes from the array.
64    /// We repeat writing length up to 255 and the next 255 bytes until we reach the end of the byte array.
65    /// After reaching the end of the byte array we write a 0 byte. Only write 0 byte if the byte array is empty.
66    pub fn bytes(&mut self, x: &[u8]) -> Result<&mut Self, Error> {
67        // use filler to write current buffer so bits used gets reset
68        self.filler();
69
70        self.byte_array(x)
71    }
72
73    /// Encode a byte array in a byte aligned buffer. Throws exception if any bits for the current byte were used.
74    /// Writes byte array length up to 255
75    /// Following that it writes the next 255 bytes from the array.
76    /// We repeat writing length up to 255 and the next 255 bytes until we reach the end of the byte array.
77    /// After reaching the end of the buffer we write a 0 byte. Only write 0 if the byte array is empty.
78    pub fn byte_array(&mut self, arr: &[u8]) -> Result<&mut Self, Error> {
79        if self.used_bits != 0 {
80            return Err(Error::BufferNotByteAligned);
81        }
82
83        self.write_blk(arr);
84
85        Ok(self)
86    }
87
88    /// Encode an integer of any size.
89    /// This is byte alignment agnostic.
90    /// First we use zigzag once to double the number and encode the negative sign as the least significant bit.
91    /// Next we encode the 7 least significant bits of the unsigned integer. If the number is greater than
92    /// 127 we encode a leading 1 followed by repeating the encoding above for the next 7 bits and so on.
93    pub fn integer(&mut self, i: isize) -> &mut Self {
94        let i = zigzag::to_usize(i);
95
96        self.word(i);
97
98        self
99    }
100
101    /// Encode an integer of 128 bits size.
102    /// This is byte alignment agnostic.
103    /// First we use zigzag once to double the number and encode the negative sign as the least significant bit.
104    /// Next we encode the 7 least significant bits of the unsigned integer. If the number is greater than
105    /// 127 we encode a leading 1 followed by repeating the encoding above for the next 7 bits and so on.
106    pub fn big_integer(&mut self, i: i128) -> &mut Self {
107        let i = zigzag::to_u128(i);
108
109        self.big_word(i);
110
111        self
112    }
113
114    /// Encode a char of 32 bits.
115    /// This is byte alignment agnostic.
116    /// We encode the 7 least significant bits of the unsigned byte. If the char value is greater than
117    /// 127 we encode a leading 1 followed by repeating the above for the next 7 bits and so on.
118    pub fn char(&mut self, c: char) -> &mut Self {
119        self.word(c as usize);
120
121        self
122    }
123
124    // TODO: Do we need this?
125    pub fn string(&mut self, s: &str) -> &mut Self {
126        for i in s.chars() {
127            self.one();
128            self.char(i);
129        }
130
131        self.zero();
132
133        self
134    }
135
136    /// Encode a string.
137    /// Convert to byte array and then use byte array encoding.
138    /// Uses filler to byte align the buffer, then writes byte array length up to 255.
139    /// Following that it writes the next 255 bytes from the array.
140    /// After reaching the end of the buffer we write a 0 byte. Only write 0 byte if the byte array is empty.
141    pub fn utf8(&mut self, s: &str) -> Result<&mut Self, Error> {
142        self.bytes(s.as_bytes())
143    }
144
145    /// Encode a unsigned integer of any size.
146    /// This is byte alignment agnostic.
147    /// We encode the 7 least significant bits of the unsigned byte. If the char value is greater than
148    /// 127 we encode a leading 1 followed by repeating the above for the next 7 bits and so on.
149    pub fn word(&mut self, c: usize) -> &mut Self {
150        let mut d = c;
151        loop {
152            let mut w = (d & 127) as u8;
153            d >>= 7;
154
155            if d != 0 {
156                w |= 128;
157            }
158            self.bits(8, w);
159
160            if d == 0 {
161                break;
162            }
163        }
164
165        self
166    }
167
168    /// Encode a unsigned integer of 128 bits size.
169    /// This is byte alignment agnostic.
170    /// We encode the 7 least significant bits of the unsigned byte. If the char value is greater than
171    /// 127 we encode a leading 1 followed by repeating the above for the next 7 bits and so on.
172    pub fn big_word(&mut self, c: u128) -> &mut Self {
173        let mut d = c;
174        loop {
175            let mut w = (d & 127) as u8;
176            d >>= 7;
177
178            if d != 0 {
179                w |= 128;
180            }
181            self.bits(8, w);
182
183            if d == 0 {
184                break;
185            }
186        }
187
188        self
189    }
190
191    /// Encode a list of bytes with a function
192    /// This is byte alignment agnostic.
193    /// If there are bytes in a list then write 1 bit followed by the functions encoding.
194    /// After the last item write a 0 bit. If the list is empty only encode a 0 bit.
195    pub fn encode_list_with<T>(
196        &mut self,
197        list: &[T],
198        encoder_func: for<'r> fn(&T, &'r mut Encoder) -> Result<(), Error>,
199    ) -> Result<&mut Self, Error>
200    where
201        T: Encode,
202    {
203        for item in list {
204            self.one();
205            encoder_func(item, self)?;
206        }
207
208        self.zero();
209
210        Ok(self)
211    }
212
213    /// Encodes up to 8 bits of information and is byte alignment agnostic.
214    /// Uses unused bits in the current byte to write out the passed in byte value.
215    /// Overflows to the most significant digits of the next byte if number of bits to use is greater than unused bits.
216    /// Expects that number of bits to use is greater than or equal to required bits by the value.
217    /// The param num_bits is i64 to match unused_bits type.
218    pub fn bits(&mut self, num_bits: i64, val: u8) -> &mut Self {
219        match (num_bits, val) {
220            (1, 0) => self.zero(),
221            (1, 1) => self.one(),
222            (2, 0) => {
223                self.zero();
224                self.zero();
225            }
226            (2, 1) => {
227                self.zero();
228                self.one();
229            }
230            (2, 2) => {
231                self.one();
232                self.zero();
233            }
234            (2, 3) => {
235                self.one();
236                self.one();
237            }
238            (_, _) => {
239                self.used_bits += num_bits;
240                let unused_bits = 8 - self.used_bits;
241                match unused_bits {
242                    x if x > 0 => {
243                        self.current_byte |= val << x;
244                    }
245                    x if x == 0 => {
246                        self.current_byte |= val;
247                        self.next_word();
248                    }
249                    x => {
250                        let used = -x;
251                        self.current_byte |= val >> used;
252                        self.next_word();
253                        self.current_byte = val << (8 - used);
254                        self.used_bits = used;
255                    }
256                }
257            }
258        }
259
260        self
261    }
262
263    /// A filler amount of end 0's followed by a 1 at the end of a byte.
264    /// Used to byte align the buffer by padding out the rest of the byte.
265    pub(crate) fn filler(&mut self) -> &mut Self {
266        self.current_byte |= 1;
267        self.next_word();
268
269        self
270    }
271
272    /// Write a 0 bit into the current byte.
273    /// Write out to buffer if last used bit in the current byte.
274    fn zero(&mut self) {
275        if self.used_bits == 7 {
276            self.next_word();
277        } else {
278            self.used_bits += 1;
279        }
280    }
281
282    /// Write a 1 bit into the current byte.
283    /// Write out to buffer if last used bit in the current byte.
284    fn one(&mut self) {
285        if self.used_bits == 7 {
286            self.current_byte |= 1;
287            self.next_word();
288        } else {
289            self.current_byte |= 128 >> self.used_bits;
290            self.used_bits += 1;
291        }
292    }
293    /// Write out byte regardless of current buffer alignment.
294    /// Write most signifcant bits in remaining unused bits for the current byte,
295    /// then write out the remaining bits at the beginning of the next byte.
296    fn byte_unaligned(&mut self, x: u8) {
297        let x_shift = self.current_byte | (x >> self.used_bits);
298        self.buffer.push(x_shift);
299
300        self.current_byte = x << (8 - self.used_bits);
301    }
302
303    /// Write the current byte out to the buffer and begin next byte to write out.
304    /// Add current byte to the buffer and set current byte and used bits to 0.
305    fn next_word(&mut self) {
306        self.buffer.push(self.current_byte);
307
308        self.current_byte = 0;
309        self.used_bits = 0;
310    }
311
312    /// Writes byte array length up to 255
313    /// Following that it writes the next 255 bytes from the array.
314    /// After reaching the end of the buffer we write a 0 byte. Only write 0 if the byte array is empty.
315    /// This is byte alignment agnostic.
316    fn write_blk(&mut self, arr: &[u8]) {
317        let chunks = arr.chunks(255);
318
319        for chunk in chunks {
320            self.buffer.push(chunk.len() as u8);
321            self.buffer.extend(chunk);
322        }
323        self.buffer.push(0);
324    }
325}