uplc_turbo/flat/decode/decoder.rs
1use bumpalo::{
2 collections::{String as BumpString, Vec as BumpVec},
3 Bump,
4};
5
6use crate::{constant::Integer, flat::zigzag::ZigZag};
7
8use super::FlatDecodeError;
9
10pub struct Decoder<'b> {
11 pub buffer: &'b [u8],
12 pub used_bits: usize,
13 pub pos: usize,
14}
15
16pub struct Ctx<'a> {
17 pub arena: &'a Bump,
18}
19
20impl<'b> Decoder<'b> {
21 pub fn new(bytes: &'b [u8]) -> Decoder<'b> {
22 Decoder {
23 buffer: bytes,
24 pos: 0,
25 used_bits: 0,
26 }
27 }
28
29 /// Decode a word of any size.
30 /// This is byte alignment agnostic.
31 /// First we decode the next 8 bits of the buffer.
32 /// We take the 7 least significant bits as the 7 least significant bits of
33 /// the current unsigned integer. If the most significant bit of the 8
34 /// bits is 1 then we take the next 8 and repeat the process above,
35 /// filling in the next 7 least significant bits of the unsigned integer and
36 /// so on. If the most significant bit was instead 0 we stop decoding
37 /// any more bits.
38 pub fn word(&mut self) -> Result<usize, FlatDecodeError> {
39 let mut leading_bit = 1;
40 let mut final_word: usize = 0;
41 let mut shl: usize = 0;
42
43 // continue looping if lead bit is 1 which is 128 as a u8 otherwise exit
44 while leading_bit > 0 {
45 let word8 = self.bits8(8)?;
46
47 let word7 = word8 & 127;
48
49 final_word |= (word7 as usize) << shl;
50
51 shl += 7;
52
53 leading_bit = word8 & 128;
54 }
55
56 Ok(final_word)
57 }
58
59 /// Decode a list of items with a decoder function.
60 /// This is byte alignment agnostic.
61 /// Decode a bit from the buffer.
62 /// If 0 then stop.
63 /// Otherwise we decode an item in the list with the decoder function passed
64 /// in. Then decode the next bit in the buffer and repeat above.
65 /// Returns a list of items decoded with the decoder function.
66 pub fn list_with<'a, T, F>(
67 &mut self,
68 ctx: &mut Ctx<'a>,
69 decoder_func: F,
70 ) -> Result<BumpVec<'a, T>, FlatDecodeError>
71 where
72 F: Copy + FnOnce(&mut Ctx<'a>, &mut Decoder) -> Result<T, FlatDecodeError>,
73 {
74 let mut vec_array = BumpVec::new_in(ctx.arena);
75
76 while self.bit()? {
77 vec_array.push(decoder_func(ctx, self)?)
78 }
79
80 Ok(vec_array)
81 }
82
83 /// Decode up to 8 bits.
84 /// This is byte alignment agnostic.
85 /// If num_bits is greater than the 8 we throw an IncorrectNumBits error.
86 /// First we decode the next num_bits of bits in the buffer.
87 /// If there are less unused bits in the current byte in the buffer than
88 /// num_bits, then we decode the remaining bits from the most
89 /// significant bits in the next byte in the buffer. Otherwise we decode
90 /// the unused bits from the current byte. Returns the decoded value up
91 /// to a byte in size.
92 pub fn bits8(&mut self, num_bits: usize) -> Result<u8, FlatDecodeError> {
93 if num_bits > 8 {
94 return Err(FlatDecodeError::IncorrectNumBits);
95 }
96
97 self.ensure_bits(num_bits)?;
98
99 let unused_bits = 8 - self.used_bits;
100 let leading_zeroes = 8 - num_bits;
101 let r = (self.buffer[self.pos] << self.used_bits) >> leading_zeroes;
102
103 let x = if num_bits > unused_bits {
104 r | (self.buffer[self.pos + 1] >> (unused_bits + leading_zeroes))
105 } else {
106 r
107 };
108
109 self.drop_bits(num_bits);
110
111 Ok(x)
112 }
113
114 /// Ensures the buffer has the required bits passed in by required_bits.
115 /// Throws a NotEnoughBits error if there are less bits remaining in the
116 /// buffer than required_bits.
117 fn ensure_bits(&mut self, required_bits: usize) -> Result<(), FlatDecodeError> {
118 if required_bits > (self.buffer.len() - self.pos) * 8 - self.used_bits {
119 Err(FlatDecodeError::NotEnoughBits(required_bits))
120 } else {
121 Ok(())
122 }
123 }
124
125 /// Increment buffer by num_bits.
126 /// If num_bits + used bits is greater than 8,
127 /// then increment position by (num_bits + used bits) / 8
128 /// Use the left over remainder as the new amount of used bits.
129 fn drop_bits(&mut self, num_bits: usize) {
130 let all_used_bits = num_bits + self.used_bits;
131
132 self.used_bits = all_used_bits % 8;
133
134 self.pos += all_used_bits / 8;
135 }
136
137 /// Decodes a filler of max one byte size.
138 /// Decodes bits until we hit a bit that is 1.
139 /// Expects that the 1 is at the end of the current byte in the buffer.
140 pub fn filler(&mut self) -> Result<(), FlatDecodeError> {
141 while self.zero()? {}
142
143 Ok(())
144 }
145
146 /// Decode the next bit in the buffer.
147 /// If the bit was 0 then return true.
148 /// Otherwise return false.
149 /// Throws EndOfBuffer error if used at the end of the array.
150 fn zero(&mut self) -> Result<bool, FlatDecodeError> {
151 let current_bit = self.bit()?;
152
153 Ok(!current_bit)
154 }
155
156 /// Decode the next bit in the buffer.
157 /// If the bit was 1 then return true.
158 /// Otherwise return false.
159 /// Throws EndOfBuffer error if used at the end of the array.
160 pub fn bit(&mut self) -> Result<bool, FlatDecodeError> {
161 if self.pos >= self.buffer.len() {
162 return Err(FlatDecodeError::EndOfBuffer);
163 }
164
165 let b = self.buffer[self.pos] & (128 >> self.used_bits) > 0;
166
167 self.increment_buffer_by_bit();
168
169 Ok(b)
170 }
171
172 /// Decode an integer of an arbitrary size..
173 ///
174 /// This is byte alignment agnostic.
175 /// First we decode the next 8 bits of the buffer.
176 /// We take the 7 least significant bits as the 7 least significant bits of
177 /// the current unsigned integer. If the most significant bit of the 8
178 /// bits is 1 then we take the next 8 and repeat the process above,
179 /// filling in the next 7 least significant bits of the unsigned integer and
180 /// so on. If the most significant bit was instead 0 we stop decoding
181 /// any more bits. Finally we use zigzag to convert the unsigned integer
182 /// back to a signed integer.
183 pub fn integer(&mut self) -> Result<Integer, FlatDecodeError> {
184 Ok(ZigZag::unzigzag(&self.big_word()?))
185 }
186
187 /// Decode a word of 128 bits size.
188 /// This is byte alignment agnostic.
189 /// First we decode the next 8 bits of the buffer.
190 /// We take the 7 least significant bits as the 7 least significant bits of
191 /// the current unsigned integer. If the most significant bit of the 8
192 /// bits is 1 then we take the next 8 and repeat the process above,
193 /// filling in the next 7 least significant bits of the unsigned integer and
194 /// so on. If the most significant bit was instead 0 we stop decoding
195 /// any more bits.
196 pub fn big_word(&mut self) -> Result<Integer, FlatDecodeError> {
197 let mut leading_bit = 1;
198 let mut final_word = Integer::from(0);
199 let mut shift = 0_u32; // Using u32 for shift as it's more than enough for 128 bits
200
201 // Continue looping if lead bit is 1 (0x80) otherwise exit
202 while leading_bit > 0 {
203 let word8 = self.bits8(8)?;
204 let word7 = word8 & 0x7F; // 127, get 7 least significant bits
205
206 // Create temporary Integer from word7 and shift it
207 let part = Integer::from(word7);
208 let shifted_part = part << shift;
209
210 // OR it with our result
211 final_word |= shifted_part;
212
213 // Increment shift by 7 for next iteration
214 shift += 7;
215
216 // Check if we should continue (MSB set)
217 leading_bit = word8 & 0x80; // 128
218 }
219
220 Ok(final_word)
221 }
222
223 /// Decode a byte array.
224 /// Decodes a filler to byte align the buffer,
225 /// then decodes the next byte to get the array length up to a max of 255.
226 /// We decode bytes equal to the array length to form the byte array.
227 /// If the following byte for array length is not 0 we decode it and repeat
228 /// above to continue decoding the byte array. We stop once we hit a
229 /// byte array length of 0. If array length is 0 for first byte array
230 /// length the we return a empty array.
231 pub fn bytes<'a>(&mut self, arena: &'a Bump) -> Result<BumpVec<'a, u8>, FlatDecodeError> {
232 self.filler()?;
233 self.byte_array(arena)
234 }
235
236 /// Decode a byte array.
237 /// Throws a BufferNotByteAligned error if the buffer is not byte aligned
238 /// Decodes the next byte to get the array length up to a max of 255.
239 /// We decode bytes equal to the array length to form the byte array.
240 /// If the following byte for array length is not 0 we decode it and repeat
241 /// above to continue decoding the byte array. We stop once we hit a
242 /// byte array length of 0. If array length is 0 for first byte array
243 /// length the we return a empty array.
244 fn byte_array<'a>(&mut self, arena: &'a Bump) -> Result<BumpVec<'a, u8>, FlatDecodeError> {
245 if self.used_bits != 0 {
246 return Err(FlatDecodeError::BufferNotByteAligned);
247 }
248
249 self.ensure_bytes(1)?;
250
251 let mut blk_len = self.buffer[self.pos] as usize;
252
253 self.pos += 1;
254
255 let mut blk_array = BumpVec::with_capacity_in(blk_len, arena);
256
257 while blk_len != 0 {
258 self.ensure_bytes(blk_len + 1)?;
259
260 let decoded_array = &self.buffer[self.pos..self.pos + blk_len];
261
262 blk_array.extend(decoded_array);
263
264 self.pos += blk_len;
265
266 blk_len = self.buffer[self.pos] as usize;
267
268 self.pos += 1
269 }
270
271 Ok(blk_array)
272 }
273
274 /// Decode a string.
275 /// Convert to byte array and then use byte array decoding.
276 /// Decodes a filler to byte align the buffer,
277 /// then decodes the next byte to get the array length up to a max of 255.
278 /// We decode bytes equal to the array length to form the byte array.
279 /// If the following byte for array length is not 0 we decode it and repeat
280 /// above to continue decoding the byte array. We stop once we hit a
281 /// byte array length of 0. If array length is 0 for first byte array
282 /// length the we return a empty array.
283 pub fn utf8<'a>(&mut self, arena: &'a Bump) -> Result<&'a str, FlatDecodeError> {
284 let b = self.bytes(arena)?;
285
286 let s =
287 BumpString::from_utf8(b).map_err(|e| FlatDecodeError::DecodeUtf8(e.utf8_error()))?;
288 let s = arena.alloc(s);
289
290 Ok(s)
291 }
292
293 /// Increment used bits by 1.
294 /// If all 8 bits are used then increment buffer position by 1.
295 fn increment_buffer_by_bit(&mut self) {
296 if self.used_bits == 7 {
297 self.pos += 1;
298
299 self.used_bits = 0;
300 } else {
301 self.used_bits += 1;
302 }
303 }
304
305 /// Ensures the buffer has the required bytes passed in by required_bytes.
306 /// Throws a NotEnoughBytes error if there are less bytes remaining in the
307 /// buffer than required_bytes.
308 fn ensure_bytes(&mut self, required_bytes: usize) -> Result<(), FlatDecodeError> {
309 if required_bytes > self.buffer.len() - self.pos {
310 Err(FlatDecodeError::NotEnoughBytes(required_bytes))
311 } else {
312 Ok(())
313 }
314 }
315}