uplc_turbo/flat/encode/encoder.rs
1use crate::{constant::Integer, flat::zigzag::ZigZag};
2
3use super::FlatEncodeError;
4
5#[derive(Default)]
6pub struct Encoder {
7 pub buffer: Vec<u8>,
8 // Int
9 used_bits: i64,
10 // Int
11 current_byte: u8,
12}
13
14impl Encoder {
15 /// Encode a unsigned integer of any size.
16 /// This is byte alignment agnostic.
17 /// We encode the 7 least significant bits of the unsigned byte. If the char
18 /// value is greater than 127 we encode a leading 1 followed by
19 /// repeating the above for the next 7 bits and so on.
20 pub fn word(&mut self, c: usize) -> &mut Self {
21 let mut d = c;
22 loop {
23 let mut w = (d & 127) as u8;
24 d >>= 7;
25
26 if d != 0 {
27 w |= 128;
28 }
29 self.bits(8, w);
30
31 if d == 0 {
32 break;
33 }
34 }
35
36 self
37 }
38
39 /// Encode a `bool` value. This is byte alignment agnostic.
40 /// Uses the next unused bit in the current byte to encode this information.
41 /// One for true and Zero for false
42 pub fn bool(&mut self, x: bool) -> &mut Self {
43 if x {
44 self.one();
45 } else {
46 self.zero();
47 }
48
49 self
50 }
51
52 /// Encode an arbitrarily sized integer.
53 ///
54 /// This is byte alignment agnostic.
55 /// First we use zigzag once to double the number and encode the negative
56 /// sign as the least significant bit. Next we encode the 7 least
57 /// significant bits of the unsigned integer. If the number is greater than
58 /// 127 we encode a leading 1 followed by repeating the encoding above for
59 /// the next 7 bits and so on.
60 pub fn integer(&mut self, i: &Integer) -> &mut Self {
61 self.big_word(i.zigzag());
62
63 self
64 }
65
66 /// Encodes up to 8 bits of information and is byte alignment agnostic.
67 /// Uses unused bits in the current byte to write out the passed in byte
68 /// value. Overflows to the most significant digits of the next byte if
69 /// number of bits to use is greater than unused bits. Expects that
70 /// number of bits to use is greater than or equal to required bits by the
71 /// value. The param num_bits is i64 to match unused_bits type.
72 pub fn bits(&mut self, num_bits: i64, val: u8) -> &mut Self {
73 match (num_bits, val) {
74 (1, 0) => self.zero(),
75 (1, 1) => self.one(),
76 (2, 0) => {
77 self.zero();
78 self.zero();
79 }
80 (2, 1) => {
81 self.zero();
82 self.one();
83 }
84 (2, 2) => {
85 self.one();
86 self.zero();
87 }
88 (2, 3) => {
89 self.one();
90 self.one();
91 }
92 (_, _) => {
93 self.used_bits += num_bits;
94 let unused_bits = 8 - self.used_bits;
95 match unused_bits {
96 0 => {
97 self.current_byte |= val;
98 self.next_word();
99 }
100 x if x > 0 => {
101 self.current_byte |= val << x;
102 }
103 x => {
104 let used = -x;
105 self.current_byte |= val >> used;
106 self.next_word();
107 self.current_byte = val << (8 - used);
108 self.used_bits = used;
109 }
110 }
111 }
112 }
113
114 self
115 }
116
117 /// Encode a byte array.
118 /// Uses filler to byte align the buffer, then writes byte array length up
119 /// to 255. Following that it writes the next 255 bytes from the array.
120 /// We repeat writing length up to 255 and the next 255 bytes until we reach
121 /// the end of the byte array. After reaching the end of the byte array
122 /// we write a 0 byte. Only write 0 byte if the byte array is empty.
123 pub fn bytes(&mut self, x: &[u8]) -> Result<&mut Self, FlatEncodeError> {
124 // use filler to write current buffer so bits used gets reset
125 self.filler();
126
127 self.byte_array(x)
128 }
129
130 /// Encode a byte array in a byte aligned buffer. Throws exception if any
131 /// bits for the current byte were used. Writes byte array length up to
132 /// 255 Following that it writes the next 255 bytes from the array.
133 /// We repeat writing length up to 255 and the next 255 bytes until we reach
134 /// the end of the byte array. After reaching the end of the buffer we
135 /// write a 0 byte. Only write 0 if the byte array is empty.
136 pub fn byte_array(&mut self, arr: &[u8]) -> Result<&mut Self, FlatEncodeError> {
137 if self.used_bits != 0 {
138 return Err(FlatEncodeError::BufferNotByteAligned);
139 }
140
141 self.write_blk(arr);
142
143 Ok(self)
144 }
145
146 /// Encode a unsigned integer of 128 bits size.
147 /// This is byte alignment agnostic.
148 /// We encode the 7 least significant bits of the unsigned byte. If the char
149 /// value is greater than 127 we encode a leading 1 followed by
150 /// repeating the above for the next 7 bits and so on.
151 pub fn big_word(&mut self, c: Integer) -> &mut Self {
152 let mut d = c;
153
154 loop {
155 let temp: Integer = d.clone() % 128;
156 let mut w = temp.to_u8().unwrap();
157
158 d >>= 7;
159
160 if d != 0 {
161 w |= 128;
162 }
163 self.bits(8, w);
164
165 if d == 0 {
166 break;
167 }
168 }
169
170 self
171 }
172
173 /// Encode a string.
174 /// Convert to byte array and then use byte array encoding.
175 /// Uses filler to byte align the buffer, then writes byte array length up
176 /// to 255. Following that it writes the next 255 bytes from the array.
177 /// After reaching the end of the buffer we write a 0 byte. Only write 0
178 /// byte if the byte array is empty.
179 pub fn utf8(&mut self, s: &str) -> Result<&mut Self, FlatEncodeError> {
180 self.bytes(s.as_bytes())
181 }
182
183 /// Encode a list of bytes with a function
184 /// This is byte alignment agnostic.
185 /// If there are bytes in a list then write 1 bit followed by the functions
186 /// encoding. After the last item write a 0 bit. If the list is empty
187 /// only encode a 0 bit.
188 pub fn list_with<T>(
189 &mut self,
190 list: &[T],
191 encoder_func: for<'r> fn(&'r mut Encoder, &T) -> Result<(), FlatEncodeError>,
192 ) -> Result<&mut Self, FlatEncodeError> {
193 for item in list {
194 self.one();
195
196 encoder_func(self, item)?;
197 }
198
199 self.zero();
200
201 Ok(self)
202 }
203
204 /// A filler amount of end 0's followed by a 1 at the end of a byte.
205 /// Used to byte align the buffer by padding out the rest of the byte.
206 pub fn filler(&mut self) -> &mut Self {
207 self.current_byte |= 1;
208 self.next_word();
209
210 self
211 }
212
213 /// Write a 0 bit into the current byte.
214 /// Write out to buffer if last used bit in the current byte.
215 fn zero(&mut self) {
216 if self.used_bits == 7 {
217 self.next_word();
218 } else {
219 self.used_bits += 1;
220 }
221 }
222
223 /// Write a 1 bit into the current byte.
224 /// Write out to buffer if last used bit in the current byte.
225 fn one(&mut self) {
226 if self.used_bits == 7 {
227 self.current_byte |= 1;
228 self.next_word();
229 } else {
230 self.current_byte |= 128 >> self.used_bits;
231 self.used_bits += 1;
232 }
233 }
234
235 /// Write the current byte out to the buffer and begin next byte to write
236 /// out. Add current byte to the buffer and set current byte and used
237 /// bits to 0.
238 fn next_word(&mut self) {
239 self.buffer.push(self.current_byte);
240
241 self.current_byte = 0;
242 self.used_bits = 0;
243 }
244
245 /// Writes byte array length up to 255
246 /// Following that it writes the next 255 bytes from the array.
247 /// After reaching the end of the buffer we write a 0 byte. Only write 0 if
248 /// the byte array is empty. This is byte alignment agnostic.
249 fn write_blk(&mut self, arr: &[u8]) {
250 let chunks = arr.chunks(255);
251
252 for chunk in chunks {
253 self.buffer.push(chunk.len() as u8);
254 self.buffer.extend(chunk);
255 }
256 self.buffer.push(0_u8);
257 }
258}