1use crate::rle::RleReader;
2use crate::varint::{read_uvarint, read_varint};
3
4enum DictEntry {
6 Pos(usize, usize),
7 Owned(String),
8}
9
10pub struct Decoder<'a> {
12 buffer: &'a [u8],
13 pos: usize,
14 dict: Vec<DictEntry>,
15 rle: RleReader<'a>,
16}
17
18impl<'a> Decoder<'a> {
19 pub fn new(buf: &'a [u8]) -> Self {
20 Self {
21 buffer: buf,
22 pos: 0,
23 dict: Vec::new(),
24 rle: RleReader::new(buf),
25 }
26 }
27
28 #[inline]
31 pub fn decode<F, R>(buf: &[u8], f: F) -> R
32 where
33 F: FnOnce(&mut Decoder) -> R,
34 {
35 use std::cell::RefCell;
36 thread_local! {
37 static DICT: RefCell<Vec<DictEntry>> = const { RefCell::new(Vec::new()) };
38 }
39 DICT.with(|dict| {
40 let mut dict = dict.borrow_mut();
41 dict.clear();
42 let mut dec = Decoder {
43 buffer: buf,
44 pos: 0,
45 dict: std::mem::take(&mut *dict),
46 rle: RleReader::new(buf),
47 };
48 let result = f(&mut dec);
49 *dict = dec.dict;
50 result
51 })
52 }
53
54 #[inline]
57 pub fn next_string(&mut self) -> String {
58 let (len_or_idx, bytes_read) = read_varint(self.buffer, self.pos);
59 self.pos += bytes_read;
60
61 if len_or_idx == 0 {
62 return String::new();
63 }
64
65 if len_or_idx > 0 {
66 let len = len_or_idx as usize;
67 let start = self.pos;
68 self.pos += len;
69 self.dict.push(DictEntry::Pos(start, len));
70 return unsafe {
71 String::from(std::str::from_utf8_unchecked(
72 &self.buffer[start..start + len],
73 ))
74 };
75 }
76
77 match &self.dict[(-len_or_idx - 1) as usize] {
79 DictEntry::Pos(start, len) => unsafe {
80 String::from(std::str::from_utf8_unchecked(
81 &self.buffer[*start..*start + *len],
82 ))
83 },
84 DictEntry::Owned(s) => s.clone(),
85 }
86 }
87
88 #[inline]
89 pub fn next_int(&mut self) -> i64 {
90 let (val, bytes_read) = read_varint(self.buffer, self.pos);
91 self.pos += bytes_read;
92 val
93 }
94
95 #[inline]
96 pub fn next_uint(&mut self) -> u64 {
97 let (val, bytes_read) = read_uvarint(self.buffer, self.pos);
98 self.pos += bytes_read;
99 val
100 }
101
102 #[inline]
103 pub fn next_bounded_int(&mut self, min: i64) -> i64 {
104 self.next_uint() as i64 + min
105 }
106
107 #[inline]
108 pub fn next_float(&mut self) -> f32 {
109 let bytes: [u8; 4] = self.buffer[self.pos..self.pos + 4].try_into().unwrap();
110 self.pos += 4;
111 f32::from_le_bytes(bytes)
112 }
113
114 #[inline]
115 pub fn next_float_quantized(&mut self, precision: f32) -> f32 {
116 self.next_int() as f32 * precision
117 }
118
119 #[inline]
120 pub fn next_boolean(&mut self) -> bool {
121 self.rle.next_bit()
122 }
123
124 #[inline]
125 pub fn next_enum(&mut self, num_bits: u8) -> u32 {
126 self.rle.next_bits(num_bits)
127 }
128
129 #[inline]
132 pub fn next_string_diff(&mut self, a: &str) -> String {
133 let already_in_dict = self.dict.iter().any(|entry| match entry {
135 DictEntry::Pos(start, len) => unsafe {
136 std::str::from_utf8_unchecked(&self.buffer[*start..*start + *len]) == a
137 },
138 DictEntry::Owned(s) => s == a,
139 });
140 if !already_in_dict {
141 self.dict.push(DictEntry::Owned(a.to_string()));
142 }
143 self.next_string()
144 }
145
146 #[inline]
147 pub fn next_int_diff(&mut self, _a: i64) -> i64 {
148 self.next_int()
149 }
150
151 #[inline]
152 pub fn next_uint_diff(&mut self, _a: u64) -> u64 {
153 self.next_uint()
154 }
155
156 #[inline]
157 pub fn next_bounded_int_diff(&mut self, _a: i64, min: i64) -> i64 {
158 self.next_bounded_int(min)
159 }
160
161 #[inline]
162 pub fn next_float_diff(&mut self, _a: f32) -> f32 {
163 self.next_float()
164 }
165
166 #[inline]
167 pub fn next_float_quantized_diff(&mut self, _a: f32, precision: f32) -> f32 {
168 self.next_float_quantized(precision)
169 }
170
171 #[inline]
172 pub fn next_boolean_diff(&mut self, a: bool) -> bool {
173 a ^ self.next_boolean()
175 }
176
177 #[inline]
178 pub fn next_enum_diff(&mut self, _a: u32, num_bits: u8) -> u32 {
179 self.next_enum(num_bits)
180 }
181
182 #[inline]
185 pub fn next_object_diff<T, F>(&mut self, a: &T, decode_diff: F) -> T
186 where
187 T: Clone,
188 F: FnOnce(&mut Self) -> T,
189 {
190 if self.next_boolean() {
191 decode_diff(self)
192 } else {
193 a.clone()
194 }
195 }
196
197 #[inline]
200 pub fn next_field_diff<T, F>(&mut self, a: &T, decode_diff: F) -> T
201 where
202 T: Clone,
203 F: FnOnce(&mut Self, &T) -> T,
204 {
205 if self.next_boolean() {
206 decode_diff(self, a)
207 } else {
208 a.clone()
209 }
210 }
211
212 #[inline]
216 pub fn next_array<T, F>(&mut self, mut inner_read: F) -> Vec<T>
217 where
218 F: FnMut(&mut Self) -> T,
219 {
220 let len = self.next_uint() as usize;
221 let mut arr = Vec::with_capacity(len);
222 for _ in 0..len {
223 arr.push(inner_read(self));
224 }
225 arr
226 }
227
228 #[inline]
231 pub fn next_array_diff<T, F, FD>(
232 &mut self,
233 a: &[T],
234 mut inner_read: F,
235 mut inner_diff: FD,
236 ) -> Vec<T>
237 where
238 T: Clone,
239 F: FnMut(&mut Self) -> T,
240 FD: FnMut(&mut Self, &T) -> T,
241 {
242 let new_len = self.next_uint() as usize;
243
244 let mut arr: Vec<T> = a.iter().take(new_len.min(a.len())).cloned().collect();
246
247 let num_updates = self.next_uint() as usize;
249 for _ in 0..num_updates {
250 let idx = self.next_uint() as usize;
251 arr[idx] = inner_diff(self, &a[idx]);
252 }
253
254 for _ in a.len()..new_len {
256 arr.push(inner_read(self));
257 }
258
259 arr
260 }
261
262 #[inline]
265 pub fn next_optional<T, F>(&mut self, mut inner_read: F) -> Option<T>
266 where
267 F: FnMut(&mut Self) -> T,
268 {
269 self.next_boolean().then(|| inner_read(self))
270 }
271
272 #[inline]
276 pub fn next_optional_diff<T, F, FD>(
277 &mut self,
278 a: &Option<T>,
279 mut inner_read: F,
280 mut inner_diff: FD,
281 ) -> Option<T>
282 where
283 T: Clone,
284 F: FnMut(&mut Self) -> T,
285 FD: FnMut(&mut Self, &T) -> T,
286 {
287 match a {
288 None => {
289 Some(inner_read(self))
291 }
292 Some(av) => {
293 if self.next_boolean() {
294 Some(inner_diff(self, av)) } else {
296 None }
298 }
299 }
300 }
301
302 #[inline]
306 pub fn next_record<K, V, FK, FV>(
307 &mut self,
308 mut key_read: FK,
309 mut val_read: FV,
310 ) -> indexmap::IndexMap<K, V>
311 where
312 K: Eq + std::hash::Hash,
313 FK: FnMut(&mut Self) -> K,
314 FV: FnMut(&mut Self) -> V,
315 {
316 let len = self.next_uint() as usize;
317 let mut map = indexmap::IndexMap::with_capacity(len);
318 for _ in 0..len {
319 let k = key_read(self);
320 let v = val_read(self);
321 map.insert(k, v);
322 }
323 map
324 }
325
326 #[inline]
330 pub fn next_record_diff<K, V, FK, FV, FVD>(
331 &mut self,
332 a: &indexmap::IndexMap<K, V>,
333 mut key_read: FK,
334 mut val_read: FV,
335 mut val_diff: FVD,
336 ) -> indexmap::IndexMap<K, V>
337 where
338 K: Clone + Eq + std::hash::Hash,
339 V: Clone,
340 FK: FnMut(&mut Self) -> K,
341 FV: FnMut(&mut Self) -> V,
342 FVD: FnMut(&mut Self, &V) -> V,
343 {
344 let mut result = a.clone();
345
346 if !a.is_empty() {
348 let num_deletions = self.next_uint() as usize;
349 for _ in 0..num_deletions {
350 let key = key_read(self);
351 result.shift_remove(&key);
352 }
353 let num_updates = self.next_uint() as usize;
354 for _ in 0..num_updates {
355 let key = key_read(self);
356 let new_val = val_diff(self, result.get(&key).unwrap());
357 result.insert(key, new_val);
358 }
359 }
360
361 let num_additions = self.next_uint() as usize;
363 for _ in 0..num_additions {
364 let k = key_read(self);
365 let v = val_read(self);
366 result.insert(k, v);
367 }
368
369 result
370 }
371}
372
373#[cfg(test)]
374mod tests {
375 use super::*;
376 use crate::Encoder;
377
378 #[test]
379 fn test_encode_decode_string() {
380 let mut encoder = Encoder::new();
381 encoder.push_string("hello");
382 encoder.push_string("world");
383 let buf = encoder.finish();
384
385 let mut decoder = Decoder::new(&buf);
386 assert_eq!(decoder.next_string(), "hello");
387 assert_eq!(decoder.next_string(), "world");
388 }
389
390 #[test]
391 fn test_encode_decode_string_dictionary() {
392 let mut encoder = Encoder::new();
393 encoder.push_string("hello");
394 encoder.push_string("hello"); let buf = encoder.finish();
396
397 let mut decoder = Decoder::new(&buf);
398 assert_eq!(decoder.next_string(), "hello");
399 assert_eq!(decoder.next_string(), "hello");
400 }
401
402 #[test]
403 fn test_encode_decode_int() {
404 let mut encoder = Encoder::new();
405 encoder.push_int(42);
406 encoder.push_int(-100);
407 encoder.push_int(0);
408 let buf = encoder.finish();
409
410 let mut decoder = Decoder::new(&buf);
411 assert_eq!(decoder.next_int(), 42);
412 assert_eq!(decoder.next_int(), -100);
413 assert_eq!(decoder.next_int(), 0);
414 }
415
416 #[test]
417 fn test_encode_decode_uint() {
418 let mut encoder = Encoder::new();
419 encoder.push_uint(0);
420 encoder.push_uint(127);
421 encoder.push_uint(128);
422 encoder.push_uint(16383);
423 let buf = encoder.finish();
424
425 let mut decoder = Decoder::new(&buf);
426 assert_eq!(decoder.next_uint(), 0);
427 assert_eq!(decoder.next_uint(), 127);
428 assert_eq!(decoder.next_uint(), 128);
429 assert_eq!(decoder.next_uint(), 16383);
430 }
431
432 #[test]
433 fn test_encode_decode_float() {
434 let mut encoder = Encoder::new();
435 encoder.push_float(3.14);
436 encoder.push_float(-2.5);
437 let buf = encoder.finish();
438
439 let mut decoder = Decoder::new(&buf);
440 assert!((decoder.next_float() - 3.14).abs() < 0.001);
441 assert!((decoder.next_float() - (-2.5)).abs() < 0.001);
442 }
443
444 #[test]
445 fn test_encode_decode_float_quantized() {
446 let mut encoder = Encoder::new();
447 encoder.push_float_quantized(3.14159, 0.01);
448 let buf = encoder.finish();
449
450 let mut decoder = Decoder::new(&buf);
451 let val = decoder.next_float_quantized(0.01);
452 assert!((val - 3.14).abs() < 0.01);
453 }
454
455 #[test]
456 fn test_encode_decode_boolean() {
457 let mut encoder = Encoder::new();
458 encoder.push_boolean(true);
459 encoder.push_boolean(false);
460 encoder.push_boolean(true);
461 encoder.push_boolean(true);
462 let buf = encoder.finish();
463
464 let mut decoder = Decoder::new(&buf);
465 assert!(decoder.next_boolean());
466 assert!(!decoder.next_boolean());
467 assert!(decoder.next_boolean());
468 assert!(decoder.next_boolean());
469 }
470
471 #[test]
472 fn test_encode_decode_mixed() {
473 let mut encoder = Encoder::new();
474 encoder.push_string("test");
475 encoder.push_int(42);
476 encoder.push_boolean(true);
477 encoder.push_float(3.14);
478 encoder.push_boolean(false);
479 let buf = encoder.finish();
480
481 let mut decoder = Decoder::new(&buf);
482 assert_eq!(decoder.next_string(), "test");
483 assert_eq!(decoder.next_int(), 42);
484 assert!(decoder.next_boolean());
485 assert!((decoder.next_float() - 3.14).abs() < 0.001);
486 assert!(!decoder.next_boolean());
487 }
488
489 #[test]
490 fn test_diff_string() {
491 let mut encoder = Encoder::new();
492 encoder.push_string_diff("hello", "hello"); encoder.push_string_diff("hello", "world"); let buf = encoder.finish();
495
496 let mut decoder = Decoder::new(&buf);
497 assert_eq!(decoder.next_string_diff("hello"), "hello");
498 assert_eq!(decoder.next_string_diff("hello"), "world");
499 }
500
501 #[test]
502 fn test_diff_int() {
503 let mut encoder = Encoder::new();
504 encoder.push_int_diff(10, 10); encoder.push_int_diff(10, 20); let buf = encoder.finish();
507
508 let mut decoder = Decoder::new(&buf);
509 assert_eq!(decoder.next_int_diff(10), 10);
510 assert_eq!(decoder.next_int_diff(10), 20);
511 }
512
513 #[test]
514 fn test_array_encode_decode() {
515 let mut encoder = Encoder::new();
516 let arr = vec![1i64, 2, 3, 4, 5];
517 encoder.push_array(&arr, |enc, &x| enc.push_int(x));
518 let buf = encoder.finish();
519
520 let mut decoder = Decoder::new(&buf);
521 let result: Vec<i64> = decoder.next_array(|dec| dec.next_int());
522 assert_eq!(result, arr);
523 }
524
525 #[test]
526 fn test_array_diff() {
527 let a = vec![1i64, 2, 3];
528 let b = vec![1i64, 5, 3, 4]; let mut encoder = Encoder::new();
531 encoder.push_array_diff(
532 &a,
533 &b,
534 |x, y| x == y,
535 |enc: &mut Encoder, &x| enc.push_int(x),
536 |enc: &mut Encoder, _, &x| enc.push_int(x), );
538 let buf = encoder.finish();
539
540 let mut decoder = Decoder::new(&buf);
541 let result: Vec<i64> = decoder.next_array_diff(
542 &a,
543 |dec| dec.next_int(),
544 |dec, _| dec.next_int(), );
546 assert_eq!(result, b);
547 }
548
549 #[test]
550 fn test_optional_encode_decode() {
551 let mut encoder = Encoder::new();
552 encoder.push_optional(&Some(42i64), |enc, &x| enc.push_int(x));
553 encoder.push_optional(&None::<i64>, |enc, &x| enc.push_int(x));
554 let buf = encoder.finish();
555
556 let mut decoder = Decoder::new(&buf);
557 assert_eq!(decoder.next_optional(|dec| dec.next_int()), Some(42));
558 assert_eq!(decoder.next_optional(|dec| dec.next_int()), None);
559 }
560
561 #[test]
562 fn test_optional_diff() {
563 let mut encoder = Encoder::new();
565 encoder.push_optional_diff(
566 &None::<i64>,
567 &Some(42i64),
568 |enc: &mut Encoder, &x| enc.push_int(x),
569 |enc: &mut Encoder, _, &x| enc.push_int(x), );
571 let buf = encoder.finish();
572
573 let mut decoder = Decoder::new(&buf);
574 let result =
575 decoder.next_optional_diff(&None, |dec| dec.next_int(), |dec, _| dec.next_int());
576 assert_eq!(result, Some(42));
577 }
578
579 #[test]
580 fn test_record_encode_decode() {
581 let mut encoder = Encoder::new();
582 let mut map = indexmap::IndexMap::new();
583 map.insert("a".to_string(), 1i64);
584 map.insert("b".to_string(), 2i64);
585 encoder.push_record(&map, |enc, k| enc.push_string(k), |enc, &v| enc.push_int(v));
586 let buf = encoder.finish();
587
588 let mut decoder = Decoder::new(&buf);
589 let result: indexmap::IndexMap<String, i64> =
590 decoder.next_record(|dec| dec.next_string(), |dec| dec.next_int());
591 assert_eq!(result, map);
592 }
593}