uplc_turbo/flat/encode/
encoder.rs

1use crate::{constant::Integer, flat::zigzag::ZigZag};
2
3use super::FlatEncodeError;
4
5#[derive(Default)]
6pub struct Encoder {
7    pub buffer: Vec<u8>,
8    // Int
9    used_bits: i64,
10    // Int
11    current_byte: u8,
12}
13
14impl Encoder {
15    /// Encode a unsigned integer of any size.
16    /// This is byte alignment agnostic.
17    /// We encode the 7 least significant bits of the unsigned byte. If the char
18    /// value is greater than 127 we encode a leading 1 followed by
19    /// repeating the above for the next 7 bits and so on.
20    pub fn word(&mut self, c: usize) -> &mut Self {
21        let mut d = c;
22        loop {
23            let mut w = (d & 127) as u8;
24            d >>= 7;
25
26            if d != 0 {
27                w |= 128;
28            }
29            self.bits(8, w);
30
31            if d == 0 {
32                break;
33            }
34        }
35
36        self
37    }
38
39    /// Encode a `bool` value. This is byte alignment agnostic.
40    /// Uses the next unused bit in the current byte to encode this information.
41    /// One for true and Zero for false
42    pub fn bool(&mut self, x: bool) -> &mut Self {
43        if x {
44            self.one();
45        } else {
46            self.zero();
47        }
48
49        self
50    }
51
52    /// Encode an arbitrarily sized integer.
53    ///
54    /// This is byte alignment agnostic.
55    /// First we use zigzag once to double the number and encode the negative
56    /// sign as the least significant bit. Next we encode the 7 least
57    /// significant bits of the unsigned integer. If the number is greater than
58    /// 127 we encode a leading 1 followed by repeating the encoding above for
59    /// the next 7 bits and so on.
60    pub fn integer(&mut self, i: &Integer) -> &mut Self {
61        self.big_word(i.zigzag());
62
63        self
64    }
65
66    /// Encodes up to 8 bits of information and is byte alignment agnostic.
67    /// Uses unused bits in the current byte to write out the passed in byte
68    /// value. Overflows to the most significant digits of the next byte if
69    /// number of bits to use is greater than unused bits. Expects that
70    /// number of bits to use is greater than or equal to required bits by the
71    /// value. The param num_bits is i64 to match unused_bits type.
72    pub fn bits(&mut self, num_bits: i64, val: u8) -> &mut Self {
73        match (num_bits, val) {
74            (1, 0) => self.zero(),
75            (1, 1) => self.one(),
76            (2, 0) => {
77                self.zero();
78                self.zero();
79            }
80            (2, 1) => {
81                self.zero();
82                self.one();
83            }
84            (2, 2) => {
85                self.one();
86                self.zero();
87            }
88            (2, 3) => {
89                self.one();
90                self.one();
91            }
92            (_, _) => {
93                self.used_bits += num_bits;
94                let unused_bits = 8 - self.used_bits;
95                match unused_bits {
96                    0 => {
97                        self.current_byte |= val;
98                        self.next_word();
99                    }
100                    x if x > 0 => {
101                        self.current_byte |= val << x;
102                    }
103                    x => {
104                        let used = -x;
105                        self.current_byte |= val >> used;
106                        self.next_word();
107                        self.current_byte = val << (8 - used);
108                        self.used_bits = used;
109                    }
110                }
111            }
112        }
113
114        self
115    }
116
117    /// Encode a byte array.
118    /// Uses filler to byte align the buffer, then writes byte array length up
119    /// to 255. Following that it writes the next 255 bytes from the array.
120    /// We repeat writing length up to 255 and the next 255 bytes until we reach
121    /// the end of the byte array. After reaching the end of the byte array
122    /// we write a 0 byte. Only write 0 byte if the byte array is empty.
123    pub fn bytes(&mut self, x: &[u8]) -> Result<&mut Self, FlatEncodeError> {
124        // use filler to write current buffer so bits used gets reset
125        self.filler();
126
127        self.byte_array(x)
128    }
129
130    /// Encode a byte array in a byte aligned buffer. Throws exception if any
131    /// bits for the current byte were used. Writes byte array length up to
132    /// 255 Following that it writes the next 255 bytes from the array.
133    /// We repeat writing length up to 255 and the next 255 bytes until we reach
134    /// the end of the byte array. After reaching the end of the buffer we
135    /// write a 0 byte. Only write 0 if the byte array is empty.
136    pub fn byte_array(&mut self, arr: &[u8]) -> Result<&mut Self, FlatEncodeError> {
137        if self.used_bits != 0 {
138            return Err(FlatEncodeError::BufferNotByteAligned);
139        }
140
141        self.write_blk(arr);
142
143        Ok(self)
144    }
145
146    /// Encode a unsigned integer of 128 bits size.
147    /// This is byte alignment agnostic.
148    /// We encode the 7 least significant bits of the unsigned byte. If the char
149    /// value is greater than 127 we encode a leading 1 followed by
150    /// repeating the above for the next 7 bits and so on.
151    pub fn big_word(&mut self, c: Integer) -> &mut Self {
152        let mut d = c;
153
154        loop {
155            let temp: Integer = d.clone() % 128;
156            let mut w = temp.to_u8().unwrap();
157
158            d >>= 7;
159
160            if d != 0 {
161                w |= 128;
162            }
163            self.bits(8, w);
164
165            if d == 0 {
166                break;
167            }
168        }
169
170        self
171    }
172
173    /// Encode a string.
174    /// Convert to byte array and then use byte array encoding.
175    /// Uses filler to byte align the buffer, then writes byte array length up
176    /// to 255. Following that it writes the next 255 bytes from the array.
177    /// After reaching the end of the buffer we write a 0 byte. Only write 0
178    /// byte if the byte array is empty.
179    pub fn utf8(&mut self, s: &str) -> Result<&mut Self, FlatEncodeError> {
180        self.bytes(s.as_bytes())
181    }
182
183    /// Encode a list of bytes with a function
184    /// This is byte alignment agnostic.
185    /// If there are bytes in a list then write 1 bit followed by the functions
186    /// encoding. After the last item write a 0 bit. If the list is empty
187    /// only encode a 0 bit.
188    pub fn list_with<T>(
189        &mut self,
190        list: &[T],
191        encoder_func: for<'r> fn(&'r mut Encoder, &T) -> Result<(), FlatEncodeError>,
192    ) -> Result<&mut Self, FlatEncodeError> {
193        for item in list {
194            self.one();
195
196            encoder_func(self, item)?;
197        }
198
199        self.zero();
200
201        Ok(self)
202    }
203
204    /// A filler amount of end 0's followed by a 1 at the end of a byte.
205    /// Used to byte align the buffer by padding out the rest of the byte.
206    pub fn filler(&mut self) -> &mut Self {
207        self.current_byte |= 1;
208        self.next_word();
209
210        self
211    }
212
213    /// Write a 0 bit into the current byte.
214    /// Write out to buffer if last used bit in the current byte.
215    fn zero(&mut self) {
216        if self.used_bits == 7 {
217            self.next_word();
218        } else {
219            self.used_bits += 1;
220        }
221    }
222
223    /// Write a 1 bit into the current byte.
224    /// Write out to buffer if last used bit in the current byte.
225    fn one(&mut self) {
226        if self.used_bits == 7 {
227            self.current_byte |= 1;
228            self.next_word();
229        } else {
230            self.current_byte |= 128 >> self.used_bits;
231            self.used_bits += 1;
232        }
233    }
234
235    /// Write the current byte out to the buffer and begin next byte to write
236    /// out. Add current byte to the buffer and set current byte and used
237    /// bits to 0.
238    fn next_word(&mut self) {
239        self.buffer.push(self.current_byte);
240
241        self.current_byte = 0;
242        self.used_bits = 0;
243    }
244
245    /// Writes byte array length up to 255
246    /// Following that it writes the next 255 bytes from the array.
247    /// After reaching the end of the buffer we write a 0 byte. Only write 0 if
248    /// the byte array is empty. This is byte alignment agnostic.
249    fn write_blk(&mut self, arr: &[u8]) {
250        let chunks = arr.chunks(255);
251
252        for chunk in chunks {
253            self.buffer.push(chunk.len() as u8);
254            self.buffer.extend(chunk);
255        }
256        self.buffer.push(0_u8);
257    }
258}