amaru_uplc/flat/decode/decoder.rs
1use bumpalo::collections::{String as BumpString, Vec as BumpVec};
2
3use crate::{arena::Arena, constant::Integer, flat::zigzag::ZigZag};
4
5use super::FlatDecodeError;
6
7pub struct Decoder<'b> {
8 pub buffer: &'b [u8],
9 pub used_bits: usize,
10 pub pos: usize,
11}
12
13pub struct Ctx<'a> {
14 pub arena: &'a Arena,
15}
16
17impl<'b> Decoder<'b> {
18 pub fn new(bytes: &'b [u8]) -> Decoder<'b> {
19 Decoder {
20 buffer: bytes,
21 pos: 0,
22 used_bits: 0,
23 }
24 }
25
26 /// Decode a word of any size.
27 /// This is byte alignment agnostic.
28 /// First we decode the next 8 bits of the buffer.
29 /// We take the 7 least significant bits as the 7 least significant bits of
30 /// the current unsigned integer. If the most significant bit of the 8
31 /// bits is 1 then we take the next 8 and repeat the process above,
32 /// filling in the next 7 least significant bits of the unsigned integer and
33 /// so on. If the most significant bit was instead 0 we stop decoding
34 /// any more bits.
35 pub fn word(&mut self) -> Result<usize, FlatDecodeError> {
36 let mut leading_bit = 1;
37 let mut final_word: usize = 0;
38 let mut shl: usize = 0;
39
40 // continue looping if lead bit is 1 which is 128 as a u8 otherwise exit
41 while leading_bit > 0 {
42 let word8 = self.bits8(8)?;
43
44 let word7 = word8 & 127;
45
46 final_word |= (word7 as usize) << shl;
47
48 shl += 7;
49
50 leading_bit = word8 & 128;
51 }
52
53 Ok(final_word)
54 }
55
56 /// Decode a list of items with a decoder function.
57 /// This is byte alignment agnostic.
58 /// Decode a bit from the buffer.
59 /// If 0 then stop.
60 /// Otherwise we decode an item in the list with the decoder function passed
61 /// in. Then decode the next bit in the buffer and repeat above.
62 /// Returns a list of items decoded with the decoder function.
63 pub fn list_with<'a, T, F>(
64 &mut self,
65 ctx: &mut Ctx<'a>,
66 decoder_func: F,
67 ) -> Result<BumpVec<'a, T>, FlatDecodeError>
68 where
69 F: Copy + FnOnce(&mut Ctx<'a>, &mut Decoder) -> Result<T, FlatDecodeError>,
70 {
71 let mut vec_array = BumpVec::new_in(ctx.arena.as_bump());
72
73 while self.bit()? {
74 vec_array.push(decoder_func(ctx, self)?)
75 }
76
77 Ok(vec_array)
78 }
79
80 /// Decode up to 8 bits.
81 /// This is byte alignment agnostic.
82 /// If num_bits is greater than the 8 we throw an IncorrectNumBits error.
83 /// First we decode the next num_bits of bits in the buffer.
84 /// If there are less unused bits in the current byte in the buffer than
85 /// num_bits, then we decode the remaining bits from the most
86 /// significant bits in the next byte in the buffer. Otherwise we decode
87 /// the unused bits from the current byte. Returns the decoded value up
88 /// to a byte in size.
89 pub fn bits8(&mut self, num_bits: usize) -> Result<u8, FlatDecodeError> {
90 if num_bits > 8 {
91 return Err(FlatDecodeError::IncorrectNumBits);
92 }
93
94 self.ensure_bits(num_bits)?;
95
96 let unused_bits = 8 - self.used_bits;
97 let leading_zeroes = 8 - num_bits;
98 let r = (self.buffer[self.pos] << self.used_bits) >> leading_zeroes;
99
100 let x = if num_bits > unused_bits {
101 r | (self.buffer[self.pos + 1] >> (unused_bits + leading_zeroes))
102 } else {
103 r
104 };
105
106 self.drop_bits(num_bits);
107
108 Ok(x)
109 }
110
111 /// Ensures the buffer has the required bits passed in by required_bits.
112 /// Throws a NotEnoughBits error if there are less bits remaining in the
113 /// buffer than required_bits.
114 fn ensure_bits(&mut self, required_bits: usize) -> Result<(), FlatDecodeError> {
115 if required_bits > (self.buffer.len() - self.pos) * 8 - self.used_bits {
116 Err(FlatDecodeError::NotEnoughBits(required_bits))
117 } else {
118 Ok(())
119 }
120 }
121
122 /// Increment buffer by num_bits.
123 /// If num_bits + used bits is greater than 8,
124 /// then increment position by (num_bits + used bits) / 8
125 /// Use the left over remainder as the new amount of used bits.
126 fn drop_bits(&mut self, num_bits: usize) {
127 let all_used_bits = num_bits + self.used_bits;
128
129 self.used_bits = all_used_bits % 8;
130
131 self.pos += all_used_bits / 8;
132 }
133
134 /// Decodes a filler of max one byte size.
135 /// Decodes bits until we hit a bit that is 1.
136 /// Expects that the 1 is at the end of the current byte in the buffer.
137 pub fn filler(&mut self) -> Result<(), FlatDecodeError> {
138 while self.zero()? {}
139
140 Ok(())
141 }
142
143 /// Decode the next bit in the buffer.
144 /// If the bit was 0 then return true.
145 /// Otherwise return false.
146 /// Throws EndOfBuffer error if used at the end of the array.
147 fn zero(&mut self) -> Result<bool, FlatDecodeError> {
148 let current_bit = self.bit()?;
149
150 Ok(!current_bit)
151 }
152
153 /// Decode the next bit in the buffer.
154 /// If the bit was 1 then return true.
155 /// Otherwise return false.
156 /// Throws EndOfBuffer error if used at the end of the array.
157 pub fn bit(&mut self) -> Result<bool, FlatDecodeError> {
158 if self.pos >= self.buffer.len() {
159 return Err(FlatDecodeError::EndOfBuffer);
160 }
161
162 let b = self.buffer[self.pos] & (128 >> self.used_bits) > 0;
163
164 self.increment_buffer_by_bit();
165
166 Ok(b)
167 }
168
169 /// Decode an integer of an arbitrary size..
170 ///
171 /// This is byte alignment agnostic.
172 /// First we decode the next 8 bits of the buffer.
173 /// We take the 7 least significant bits as the 7 least significant bits of
174 /// the current unsigned integer. If the most significant bit of the 8
175 /// bits is 1 then we take the next 8 and repeat the process above,
176 /// filling in the next 7 least significant bits of the unsigned integer and
177 /// so on. If the most significant bit was instead 0 we stop decoding
178 /// any more bits. Finally we use zigzag to convert the unsigned integer
179 /// back to a signed integer.
180 pub fn integer(&mut self) -> Result<Integer, FlatDecodeError> {
181 Ok(ZigZag::unzigzag(&self.big_word()?))
182 }
183
184 /// Decode a word of 128 bits size.
185 /// This is byte alignment agnostic.
186 /// First we decode the next 8 bits of the buffer.
187 /// We take the 7 least significant bits as the 7 least significant bits of
188 /// the current unsigned integer. If the most significant bit of the 8
189 /// bits is 1 then we take the next 8 and repeat the process above,
190 /// filling in the next 7 least significant bits of the unsigned integer and
191 /// so on. If the most significant bit was instead 0 we stop decoding
192 /// any more bits.
193 pub fn big_word(&mut self) -> Result<Integer, FlatDecodeError> {
194 let mut leading_bit = 1;
195 let mut final_word = Integer::from(0);
196 let mut shift = 0_u32; // Using u32 for shift as it's more than enough for 128 bits
197
198 // Continue looping if lead bit is 1 (0x80) otherwise exit
199 while leading_bit > 0 {
200 let word8 = self.bits8(8)?;
201 let word7 = word8 & 0x7F; // 127, get 7 least significant bits
202
203 // Create temporary Integer from word7 and shift it
204 let part = Integer::from(word7);
205 let shifted_part = part << shift;
206
207 // OR it with our result
208 final_word |= shifted_part;
209
210 // Increment shift by 7 for next iteration
211 shift += 7;
212
213 // Check if we should continue (MSB set)
214 leading_bit = word8 & 0x80; // 128
215 }
216
217 Ok(final_word)
218 }
219
220 /// Decode a byte array.
221 /// Decodes a filler to byte align the buffer,
222 /// then decodes the next byte to get the array length up to a max of 255.
223 /// We decode bytes equal to the array length to form the byte array.
224 /// If the following byte for array length is not 0 we decode it and repeat
225 /// above to continue decoding the byte array. We stop once we hit a
226 /// byte array length of 0. If array length is 0 for first byte array
227 /// length the we return a empty array.
228 pub fn bytes<'a>(&mut self, arena: &'a Arena) -> Result<BumpVec<'a, u8>, FlatDecodeError> {
229 self.filler()?;
230 self.byte_array(arena)
231 }
232
233 /// Decode a byte array.
234 /// Throws a BufferNotByteAligned error if the buffer is not byte aligned
235 /// Decodes the next byte to get the array length up to a max of 255.
236 /// We decode bytes equal to the array length to form the byte array.
237 /// If the following byte for array length is not 0 we decode it and repeat
238 /// above to continue decoding the byte array. We stop once we hit a
239 /// byte array length of 0. If array length is 0 for first byte array
240 /// length the we return a empty array.
241 fn byte_array<'a>(&mut self, arena: &'a Arena) -> Result<BumpVec<'a, u8>, FlatDecodeError> {
242 if self.used_bits != 0 {
243 return Err(FlatDecodeError::BufferNotByteAligned);
244 }
245
246 self.ensure_bytes(1)?;
247
248 let mut blk_len = self.buffer[self.pos] as usize;
249
250 self.pos += 1;
251
252 let mut blk_array = BumpVec::with_capacity_in(blk_len, arena.as_bump());
253
254 while blk_len != 0 {
255 self.ensure_bytes(blk_len + 1)?;
256
257 let decoded_array = &self.buffer[self.pos..self.pos + blk_len];
258
259 blk_array.extend(decoded_array);
260
261 self.pos += blk_len;
262
263 blk_len = self.buffer[self.pos] as usize;
264
265 self.pos += 1
266 }
267
268 Ok(blk_array)
269 }
270
271 /// Decode a string.
272 /// Convert to byte array and then use byte array decoding.
273 /// Decodes a filler to byte align the buffer,
274 /// then decodes the next byte to get the array length up to a max of 255.
275 /// We decode bytes equal to the array length to form the byte array.
276 /// If the following byte for array length is not 0 we decode it and repeat
277 /// above to continue decoding the byte array. We stop once we hit a
278 /// byte array length of 0. If array length is 0 for first byte array
279 /// length the we return a empty array.
280 pub fn utf8<'a>(&mut self, arena: &'a Arena) -> Result<&'a str, FlatDecodeError> {
281 let b = self.bytes(arena)?;
282
283 let s =
284 BumpString::from_utf8(b).map_err(|e| FlatDecodeError::DecodeUtf8(e.utf8_error()))?;
285 let s = arena.alloc(s);
286
287 Ok(s)
288 }
289
290 /// Increment used bits by 1.
291 /// If all 8 bits are used then increment buffer position by 1.
292 fn increment_buffer_by_bit(&mut self) {
293 if self.used_bits == 7 {
294 self.pos += 1;
295
296 self.used_bits = 0;
297 } else {
298 self.used_bits += 1;
299 }
300 }
301
302 /// Ensures the buffer has the required bytes passed in by required_bytes.
303 /// Throws a NotEnoughBytes error if there are less bytes remaining in the
304 /// buffer than required_bytes.
305 fn ensure_bytes(&mut self, required_bytes: usize) -> Result<(), FlatDecodeError> {
306 if required_bytes > self.buffer.len() - self.pos {
307 Err(FlatDecodeError::NotEnoughBytes(required_bytes))
308 } else {
309 Ok(())
310 }
311 }
312}