uplc_turbo/flat/decode/
decoder.rs

1use bumpalo::{
2    collections::{String as BumpString, Vec as BumpVec},
3    Bump,
4};
5
6use crate::{constant::Integer, flat::zigzag::ZigZag};
7
8use super::FlatDecodeError;
9
10pub struct Decoder<'b> {
11    pub buffer: &'b [u8],
12    pub used_bits: usize,
13    pub pos: usize,
14}
15
16pub struct Ctx<'a> {
17    pub arena: &'a Bump,
18}
19
20impl<'b> Decoder<'b> {
21    pub fn new(bytes: &'b [u8]) -> Decoder<'b> {
22        Decoder {
23            buffer: bytes,
24            pos: 0,
25            used_bits: 0,
26        }
27    }
28
29    /// Decode a word of any size.
30    /// This is byte alignment agnostic.
31    /// First we decode the next 8 bits of the buffer.
32    /// We take the 7 least significant bits as the 7 least significant bits of
33    /// the current unsigned integer. If the most significant bit of the 8
34    /// bits is 1 then we take the next 8 and repeat the process above,
35    /// filling in the next 7 least significant bits of the unsigned integer and
36    /// so on. If the most significant bit was instead 0 we stop decoding
37    /// any more bits.
38    pub fn word(&mut self) -> Result<usize, FlatDecodeError> {
39        let mut leading_bit = 1;
40        let mut final_word: usize = 0;
41        let mut shl: usize = 0;
42
43        // continue looping if lead bit is 1 which is 128 as a u8 otherwise exit
44        while leading_bit > 0 {
45            let word8 = self.bits8(8)?;
46
47            let word7 = word8 & 127;
48
49            final_word |= (word7 as usize) << shl;
50
51            shl += 7;
52
53            leading_bit = word8 & 128;
54        }
55
56        Ok(final_word)
57    }
58
59    /// Decode a list of items with a decoder function.
60    /// This is byte alignment agnostic.
61    /// Decode a bit from the buffer.
62    /// If 0 then stop.
63    /// Otherwise we decode an item in the list with the decoder function passed
64    /// in. Then decode the next bit in the buffer and repeat above.
65    /// Returns a list of items decoded with the decoder function.
66    pub fn list_with<'a, T, F>(
67        &mut self,
68        ctx: &mut Ctx<'a>,
69        decoder_func: F,
70    ) -> Result<BumpVec<'a, T>, FlatDecodeError>
71    where
72        F: Copy + FnOnce(&mut Ctx<'a>, &mut Decoder) -> Result<T, FlatDecodeError>,
73    {
74        let mut vec_array = BumpVec::new_in(ctx.arena);
75
76        while self.bit()? {
77            vec_array.push(decoder_func(ctx, self)?)
78        }
79
80        Ok(vec_array)
81    }
82
83    /// Decode up to 8 bits.
84    /// This is byte alignment agnostic.
85    /// If num_bits is greater than the 8 we throw an IncorrectNumBits error.
86    /// First we decode the next num_bits of bits in the buffer.
87    /// If there are less unused bits in the current byte in the buffer than
88    /// num_bits, then we decode the remaining bits from the most
89    /// significant bits in the next byte in the buffer. Otherwise we decode
90    /// the unused bits from the current byte. Returns the decoded value up
91    /// to a byte in size.
92    pub fn bits8(&mut self, num_bits: usize) -> Result<u8, FlatDecodeError> {
93        if num_bits > 8 {
94            return Err(FlatDecodeError::IncorrectNumBits);
95        }
96
97        self.ensure_bits(num_bits)?;
98
99        let unused_bits = 8 - self.used_bits;
100        let leading_zeroes = 8 - num_bits;
101        let r = (self.buffer[self.pos] << self.used_bits) >> leading_zeroes;
102
103        let x = if num_bits > unused_bits {
104            r | (self.buffer[self.pos + 1] >> (unused_bits + leading_zeroes))
105        } else {
106            r
107        };
108
109        self.drop_bits(num_bits);
110
111        Ok(x)
112    }
113
114    /// Ensures the buffer has the required bits passed in by required_bits.
115    /// Throws a NotEnoughBits error if there are less bits remaining in the
116    /// buffer than required_bits.
117    fn ensure_bits(&mut self, required_bits: usize) -> Result<(), FlatDecodeError> {
118        if required_bits > (self.buffer.len() - self.pos) * 8 - self.used_bits {
119            Err(FlatDecodeError::NotEnoughBits(required_bits))
120        } else {
121            Ok(())
122        }
123    }
124
125    /// Increment buffer by num_bits.
126    /// If num_bits + used bits is greater than 8,
127    /// then increment position by (num_bits + used bits) / 8
128    /// Use the left over remainder as the new amount of used bits.
129    fn drop_bits(&mut self, num_bits: usize) {
130        let all_used_bits = num_bits + self.used_bits;
131
132        self.used_bits = all_used_bits % 8;
133
134        self.pos += all_used_bits / 8;
135    }
136
137    /// Decodes a filler of max one byte size.
138    /// Decodes bits until we hit a bit that is 1.
139    /// Expects that the 1 is at the end of the current byte in the buffer.
140    pub fn filler(&mut self) -> Result<(), FlatDecodeError> {
141        while self.zero()? {}
142
143        Ok(())
144    }
145
146    /// Decode the next bit in the buffer.
147    /// If the bit was 0 then return true.
148    /// Otherwise return false.
149    /// Throws EndOfBuffer error if used at the end of the array.
150    fn zero(&mut self) -> Result<bool, FlatDecodeError> {
151        let current_bit = self.bit()?;
152
153        Ok(!current_bit)
154    }
155
156    /// Decode the next bit in the buffer.
157    /// If the bit was 1 then return true.
158    /// Otherwise return false.
159    /// Throws EndOfBuffer error if used at the end of the array.
160    pub fn bit(&mut self) -> Result<bool, FlatDecodeError> {
161        if self.pos >= self.buffer.len() {
162            return Err(FlatDecodeError::EndOfBuffer);
163        }
164
165        let b = self.buffer[self.pos] & (128 >> self.used_bits) > 0;
166
167        self.increment_buffer_by_bit();
168
169        Ok(b)
170    }
171
172    /// Decode an integer of an arbitrary size..
173    ///
174    /// This is byte alignment agnostic.
175    /// First we decode the next 8 bits of the buffer.
176    /// We take the 7 least significant bits as the 7 least significant bits of
177    /// the current unsigned integer. If the most significant bit of the 8
178    /// bits is 1 then we take the next 8 and repeat the process above,
179    /// filling in the next 7 least significant bits of the unsigned integer and
180    /// so on. If the most significant bit was instead 0 we stop decoding
181    /// any more bits. Finally we use zigzag to convert the unsigned integer
182    /// back to a signed integer.
183    pub fn integer(&mut self) -> Result<Integer, FlatDecodeError> {
184        Ok(ZigZag::unzigzag(&self.big_word()?))
185    }
186
187    /// Decode a word of 128 bits size.
188    /// This is byte alignment agnostic.
189    /// First we decode the next 8 bits of the buffer.
190    /// We take the 7 least significant bits as the 7 least significant bits of
191    /// the current unsigned integer. If the most significant bit of the 8
192    /// bits is 1 then we take the next 8 and repeat the process above,
193    /// filling in the next 7 least significant bits of the unsigned integer and
194    /// so on. If the most significant bit was instead 0 we stop decoding
195    /// any more bits.
196    pub fn big_word(&mut self) -> Result<Integer, FlatDecodeError> {
197        let mut leading_bit = 1;
198        let mut final_word = Integer::from(0);
199        let mut shift = 0_u32; // Using u32 for shift as it's more than enough for 128 bits
200
201        // Continue looping if lead bit is 1 (0x80) otherwise exit
202        while leading_bit > 0 {
203            let word8 = self.bits8(8)?;
204            let word7 = word8 & 0x7F; // 127, get 7 least significant bits
205
206            // Create temporary Integer from word7 and shift it
207            let part = Integer::from(word7);
208            let shifted_part = part << shift;
209
210            // OR it with our result
211            final_word |= shifted_part;
212
213            // Increment shift by 7 for next iteration
214            shift += 7;
215
216            // Check if we should continue (MSB set)
217            leading_bit = word8 & 0x80; // 128
218        }
219
220        Ok(final_word)
221    }
222
223    /// Decode a byte array.
224    /// Decodes a filler to byte align the buffer,
225    /// then decodes the next byte to get the array length up to a max of 255.
226    /// We decode bytes equal to the array length to form the byte array.
227    /// If the following byte for array length is not 0 we decode it and repeat
228    /// above to continue decoding the byte array. We stop once we hit a
229    /// byte array length of 0. If array length is 0 for first byte array
230    /// length the we return a empty array.
231    pub fn bytes<'a>(&mut self, arena: &'a Bump) -> Result<BumpVec<'a, u8>, FlatDecodeError> {
232        self.filler()?;
233        self.byte_array(arena)
234    }
235
236    /// Decode a byte array.
237    /// Throws a BufferNotByteAligned error if the buffer is not byte aligned
238    /// Decodes the next byte to get the array length up to a max of 255.
239    /// We decode bytes equal to the array length to form the byte array.
240    /// If the following byte for array length is not 0 we decode it and repeat
241    /// above to continue decoding the byte array. We stop once we hit a
242    /// byte array length of 0. If array length is 0 for first byte array
243    /// length the we return a empty array.
244    fn byte_array<'a>(&mut self, arena: &'a Bump) -> Result<BumpVec<'a, u8>, FlatDecodeError> {
245        if self.used_bits != 0 {
246            return Err(FlatDecodeError::BufferNotByteAligned);
247        }
248
249        self.ensure_bytes(1)?;
250
251        let mut blk_len = self.buffer[self.pos] as usize;
252
253        self.pos += 1;
254
255        let mut blk_array = BumpVec::with_capacity_in(blk_len, arena);
256
257        while blk_len != 0 {
258            self.ensure_bytes(blk_len + 1)?;
259
260            let decoded_array = &self.buffer[self.pos..self.pos + blk_len];
261
262            blk_array.extend(decoded_array);
263
264            self.pos += blk_len;
265
266            blk_len = self.buffer[self.pos] as usize;
267
268            self.pos += 1
269        }
270
271        Ok(blk_array)
272    }
273
274    /// Decode a string.
275    /// Convert to byte array and then use byte array decoding.
276    /// Decodes a filler to byte align the buffer,
277    /// then decodes the next byte to get the array length up to a max of 255.
278    /// We decode bytes equal to the array length to form the byte array.
279    /// If the following byte for array length is not 0 we decode it and repeat
280    /// above to continue decoding the byte array. We stop once we hit a
281    /// byte array length of 0. If array length is 0 for first byte array
282    /// length the we return a empty array.
283    pub fn utf8<'a>(&mut self, arena: &'a Bump) -> Result<&'a str, FlatDecodeError> {
284        let b = self.bytes(arena)?;
285
286        let s =
287            BumpString::from_utf8(b).map_err(|e| FlatDecodeError::DecodeUtf8(e.utf8_error()))?;
288        let s = arena.alloc(s);
289
290        Ok(s)
291    }
292
293    /// Increment used bits by 1.
294    /// If all 8 bits are used then increment buffer position by 1.
295    fn increment_buffer_by_bit(&mut self) {
296        if self.used_bits == 7 {
297            self.pos += 1;
298
299            self.used_bits = 0;
300        } else {
301            self.used_bits += 1;
302        }
303    }
304
305    /// Ensures the buffer has the required bytes passed in by required_bytes.
306    /// Throws a NotEnoughBytes error if there are less bytes remaining in the
307    /// buffer than required_bytes.
308    fn ensure_bytes(&mut self, required_bytes: usize) -> Result<(), FlatDecodeError> {
309        if required_bytes > self.buffer.len() - self.pos {
310            Err(FlatDecodeError::NotEnoughBytes(required_bytes))
311        } else {
312            Ok(())
313        }
314    }
315}