flat_rs/encode/encoder.rs
1use crate::{encode::Encode, zigzag};
2
3use super::Error;
4
5pub struct Encoder {
6 pub buffer: Vec<u8>,
7 // Int
8 used_bits: i64,
9 // Int
10 current_byte: u8,
11}
12
13impl Default for Encoder {
14 fn default() -> Self {
15 Self::new()
16 }
17}
18
19impl Encoder {
20 pub fn new() -> Encoder {
21 Encoder {
22 buffer: Vec::new(),
23 used_bits: 0,
24 current_byte: 0,
25 }
26 }
27
28 /// Encode any type that implements [`Encode`].
29 pub fn encode<T: Encode>(&mut self, x: T) -> Result<&mut Self, Error> {
30 x.encode(self)?;
31
32 Ok(self)
33 }
34
35 /// Encode 1 unsigned byte.
36 /// Uses the next 8 bits in the buffer, can be byte aligned or byte unaligned
37 pub fn u8(&mut self, x: u8) -> Result<&mut Self, Error> {
38 if self.used_bits == 0 {
39 self.current_byte = x;
40 self.next_word();
41 } else {
42 self.byte_unaligned(x);
43 }
44
45 Ok(self)
46 }
47
48 /// Encode a `bool` value. This is byte alignment agnostic.
49 /// Uses the next unused bit in the current byte to encode this information.
50 /// One for true and Zero for false
51 pub fn bool(&mut self, x: bool) -> &mut Self {
52 if x {
53 self.one();
54 } else {
55 self.zero();
56 }
57
58 self
59 }
60
61 /// Encode a byte array.
62 /// Uses filler to byte align the buffer, then writes byte array length up to 255.
63 /// Following that it writes the next 255 bytes from the array.
64 /// We repeat writing length up to 255 and the next 255 bytes until we reach the end of the byte array.
65 /// After reaching the end of the byte array we write a 0 byte. Only write 0 byte if the byte array is empty.
66 pub fn bytes(&mut self, x: &[u8]) -> Result<&mut Self, Error> {
67 // use filler to write current buffer so bits used gets reset
68 self.filler();
69
70 self.byte_array(x)
71 }
72
73 /// Encode a byte array in a byte aligned buffer. Throws exception if any bits for the current byte were used.
74 /// Writes byte array length up to 255
75 /// Following that it writes the next 255 bytes from the array.
76 /// We repeat writing length up to 255 and the next 255 bytes until we reach the end of the byte array.
77 /// After reaching the end of the buffer we write a 0 byte. Only write 0 if the byte array is empty.
78 pub fn byte_array(&mut self, arr: &[u8]) -> Result<&mut Self, Error> {
79 if self.used_bits != 0 {
80 return Err(Error::BufferNotByteAligned);
81 }
82
83 self.write_blk(arr);
84
85 Ok(self)
86 }
87
88 /// Encode an integer of any size.
89 /// This is byte alignment agnostic.
90 /// First we use zigzag once to double the number and encode the negative sign as the least significant bit.
91 /// Next we encode the 7 least significant bits of the unsigned integer. If the number is greater than
92 /// 127 we encode a leading 1 followed by repeating the encoding above for the next 7 bits and so on.
93 pub fn integer(&mut self, i: isize) -> &mut Self {
94 let i = zigzag::to_usize(i);
95
96 self.word(i);
97
98 self
99 }
100
101 /// Encode an integer of 128 bits size.
102 /// This is byte alignment agnostic.
103 /// First we use zigzag once to double the number and encode the negative sign as the least significant bit.
104 /// Next we encode the 7 least significant bits of the unsigned integer. If the number is greater than
105 /// 127 we encode a leading 1 followed by repeating the encoding above for the next 7 bits and so on.
106 pub fn big_integer(&mut self, i: i128) -> &mut Self {
107 let i = zigzag::to_u128(i);
108
109 self.big_word(i);
110
111 self
112 }
113
114 /// Encode a char of 32 bits.
115 /// This is byte alignment agnostic.
116 /// We encode the 7 least significant bits of the unsigned byte. If the char value is greater than
117 /// 127 we encode a leading 1 followed by repeating the above for the next 7 bits and so on.
118 pub fn char(&mut self, c: char) -> &mut Self {
119 self.word(c as usize);
120
121 self
122 }
123
124 // TODO: Do we need this?
125 pub fn string(&mut self, s: &str) -> &mut Self {
126 for i in s.chars() {
127 self.one();
128 self.char(i);
129 }
130
131 self.zero();
132
133 self
134 }
135
136 /// Encode a string.
137 /// Convert to byte array and then use byte array encoding.
138 /// Uses filler to byte align the buffer, then writes byte array length up to 255.
139 /// Following that it writes the next 255 bytes from the array.
140 /// After reaching the end of the buffer we write a 0 byte. Only write 0 byte if the byte array is empty.
141 pub fn utf8(&mut self, s: &str) -> Result<&mut Self, Error> {
142 self.bytes(s.as_bytes())
143 }
144
145 /// Encode a unsigned integer of any size.
146 /// This is byte alignment agnostic.
147 /// We encode the 7 least significant bits of the unsigned byte. If the char value is greater than
148 /// 127 we encode a leading 1 followed by repeating the above for the next 7 bits and so on.
149 pub fn word(&mut self, c: usize) -> &mut Self {
150 let mut d = c;
151 loop {
152 let mut w = (d & 127) as u8;
153 d >>= 7;
154
155 if d != 0 {
156 w |= 128;
157 }
158 self.bits(8, w);
159
160 if d == 0 {
161 break;
162 }
163 }
164
165 self
166 }
167
168 /// Encode a unsigned integer of 128 bits size.
169 /// This is byte alignment agnostic.
170 /// We encode the 7 least significant bits of the unsigned byte. If the char value is greater than
171 /// 127 we encode a leading 1 followed by repeating the above for the next 7 bits and so on.
172 pub fn big_word(&mut self, c: u128) -> &mut Self {
173 let mut d = c;
174 loop {
175 let mut w = (d & 127) as u8;
176 d >>= 7;
177
178 if d != 0 {
179 w |= 128;
180 }
181 self.bits(8, w);
182
183 if d == 0 {
184 break;
185 }
186 }
187
188 self
189 }
190
191 /// Encode a list of bytes with a function
192 /// This is byte alignment agnostic.
193 /// If there are bytes in a list then write 1 bit followed by the functions encoding.
194 /// After the last item write a 0 bit. If the list is empty only encode a 0 bit.
195 pub fn encode_list_with<T>(
196 &mut self,
197 list: &[T],
198 encoder_func: for<'r> fn(&T, &'r mut Encoder) -> Result<(), Error>,
199 ) -> Result<&mut Self, Error>
200 where
201 T: Encode,
202 {
203 for item in list {
204 self.one();
205 encoder_func(item, self)?;
206 }
207
208 self.zero();
209
210 Ok(self)
211 }
212
213 /// Encodes up to 8 bits of information and is byte alignment agnostic.
214 /// Uses unused bits in the current byte to write out the passed in byte value.
215 /// Overflows to the most significant digits of the next byte if number of bits to use is greater than unused bits.
216 /// Expects that number of bits to use is greater than or equal to required bits by the value.
217 /// The param num_bits is i64 to match unused_bits type.
218 pub fn bits(&mut self, num_bits: i64, val: u8) -> &mut Self {
219 match (num_bits, val) {
220 (1, 0) => self.zero(),
221 (1, 1) => self.one(),
222 (2, 0) => {
223 self.zero();
224 self.zero();
225 }
226 (2, 1) => {
227 self.zero();
228 self.one();
229 }
230 (2, 2) => {
231 self.one();
232 self.zero();
233 }
234 (2, 3) => {
235 self.one();
236 self.one();
237 }
238 (_, _) => {
239 self.used_bits += num_bits;
240 let unused_bits = 8 - self.used_bits;
241 match unused_bits {
242 x if x > 0 => {
243 self.current_byte |= val << x;
244 }
245 x if x == 0 => {
246 self.current_byte |= val;
247 self.next_word();
248 }
249 x => {
250 let used = -x;
251 self.current_byte |= val >> used;
252 self.next_word();
253 self.current_byte = val << (8 - used);
254 self.used_bits = used;
255 }
256 }
257 }
258 }
259
260 self
261 }
262
263 /// A filler amount of end 0's followed by a 1 at the end of a byte.
264 /// Used to byte align the buffer by padding out the rest of the byte.
265 pub(crate) fn filler(&mut self) -> &mut Self {
266 self.current_byte |= 1;
267 self.next_word();
268
269 self
270 }
271
272 /// Write a 0 bit into the current byte.
273 /// Write out to buffer if last used bit in the current byte.
274 fn zero(&mut self) {
275 if self.used_bits == 7 {
276 self.next_word();
277 } else {
278 self.used_bits += 1;
279 }
280 }
281
282 /// Write a 1 bit into the current byte.
283 /// Write out to buffer if last used bit in the current byte.
284 fn one(&mut self) {
285 if self.used_bits == 7 {
286 self.current_byte |= 1;
287 self.next_word();
288 } else {
289 self.current_byte |= 128 >> self.used_bits;
290 self.used_bits += 1;
291 }
292 }
293 /// Write out byte regardless of current buffer alignment.
294 /// Write most signifcant bits in remaining unused bits for the current byte,
295 /// then write out the remaining bits at the beginning of the next byte.
296 fn byte_unaligned(&mut self, x: u8) {
297 let x_shift = self.current_byte | (x >> self.used_bits);
298 self.buffer.push(x_shift);
299
300 self.current_byte = x << (8 - self.used_bits);
301 }
302
303 /// Write the current byte out to the buffer and begin next byte to write out.
304 /// Add current byte to the buffer and set current byte and used bits to 0.
305 fn next_word(&mut self) {
306 self.buffer.push(self.current_byte);
307
308 self.current_byte = 0;
309 self.used_bits = 0;
310 }
311
312 /// Writes byte array length up to 255
313 /// Following that it writes the next 255 bytes from the array.
314 /// After reaching the end of the buffer we write a 0 byte. Only write 0 if the byte array is empty.
315 /// This is byte alignment agnostic.
316 fn write_blk(&mut self, arr: &[u8]) {
317 let chunks = arr.chunks(255);
318
319 for chunk in chunks {
320 self.buffer.push(chunk.len() as u8);
321 self.buffer.extend(chunk);
322 }
323 self.buffer.push(0);
324 }
325}