1use crate::rle::RleReader;
2use crate::varint::{read_uvarint, read_varint};
3
4enum DictEntry {
6 Pos(usize, usize),
7 Owned(String),
8}
9
10pub struct Decoder<'a> {
12 buffer: &'a [u8],
13 pos: usize,
14 dict: Vec<DictEntry>,
15 rle: RleReader<'a>,
16}
17
18impl<'a> Decoder<'a> {
19 pub fn new(buf: &'a [u8]) -> Self {
20 Self {
21 buffer: buf,
22 pos: 0,
23 dict: Vec::new(),
24 rle: RleReader::new(buf),
25 }
26 }
27
28 #[inline]
31 pub fn decode<F, R>(buf: &[u8], f: F) -> R
32 where
33 F: FnOnce(&mut Decoder) -> R,
34 {
35 use std::cell::RefCell;
36 thread_local! {
37 static DICT: RefCell<Vec<DictEntry>> = const { RefCell::new(Vec::new()) };
38 }
39 DICT.with(|dict| {
40 let mut dict = dict.borrow_mut();
41 dict.clear();
42 let mut dec = Decoder {
43 buffer: buf,
44 pos: 0,
45 dict: std::mem::take(&mut *dict),
46 rle: RleReader::new(buf),
47 };
48 let result = f(&mut dec);
49 *dict = dec.dict;
50 result
51 })
52 }
53
54 #[inline]
57 pub fn next_string(&mut self) -> String {
58 let (len_or_idx, bytes_read) = read_varint(self.buffer, self.pos);
59 self.pos += bytes_read;
60
61 if len_or_idx == 0 {
62 return String::new();
63 }
64
65 if len_or_idx > 0 {
66 let len = len_or_idx as usize;
67 let start = self.pos;
68 self.pos += len;
69 self.dict.push(DictEntry::Pos(start, len));
70 return String::from(
71 std::str::from_utf8(&self.buffer[start..start + len])
72 .expect("invalid UTF-8 in buffer"),
73 );
74 }
75
76 match &self.dict[(-len_or_idx - 1) as usize] {
78 DictEntry::Pos(start, len) => String::from(
79 std::str::from_utf8(&self.buffer[*start..*start + *len])
80 .expect("invalid UTF-8 in buffer"),
81 ),
82 DictEntry::Owned(s) => s.clone(),
83 }
84 }
85
86 #[inline]
87 pub fn next_int(&mut self) -> i64 {
88 let (val, bytes_read) = read_varint(self.buffer, self.pos);
89 self.pos += bytes_read;
90 val
91 }
92
93 #[inline]
94 pub fn next_uint(&mut self) -> u64 {
95 let (val, bytes_read) = read_uvarint(self.buffer, self.pos);
96 self.pos += bytes_read;
97 val
98 }
99
100 #[inline]
101 pub fn next_bounded_int(&mut self, min: i64) -> i64 {
102 self.next_uint() as i64 + min
103 }
104
105 #[inline]
106 pub fn next_float(&mut self) -> f32 {
107 let bytes: [u8; 4] = self.buffer[self.pos..self.pos + 4].try_into().unwrap();
108 self.pos += 4;
109 f32::from_le_bytes(bytes)
110 }
111
112 #[inline]
113 pub fn next_float_quantized(&mut self, precision: f32) -> f32 {
114 self.next_int() as f32 * precision
115 }
116
117 #[inline]
118 pub fn next_boolean(&mut self) -> bool {
119 self.rle.next_bit()
120 }
121
122 #[inline]
123 pub fn next_enum(&mut self, num_bits: u8) -> u32 {
124 self.rle.next_bits(num_bits)
125 }
126
127 #[inline]
130 pub fn next_string_diff(&mut self, a: &str) -> String {
131 let already_in_dict = self.dict.iter().any(|entry| match entry {
133 DictEntry::Pos(start, len) => std::str::from_utf8(&self.buffer[*start..*start + *len])
134 .map(|s| s == a)
135 .unwrap_or(false),
136 DictEntry::Owned(s) => s == a,
137 });
138 if !already_in_dict {
139 self.dict.push(DictEntry::Owned(a.to_string()));
140 }
141 self.next_string()
142 }
143
144 #[inline]
145 pub fn next_int_diff(&mut self, _a: i64) -> i64 {
146 self.next_int()
147 }
148
149 #[inline]
150 pub fn next_uint_diff(&mut self, _a: u64) -> u64 {
151 self.next_uint()
152 }
153
154 #[inline]
155 pub fn next_bounded_int_diff(&mut self, _a: i64, min: i64) -> i64 {
156 self.next_bounded_int(min)
157 }
158
159 #[inline]
160 pub fn next_float_diff(&mut self, _a: f32) -> f32 {
161 self.next_float()
162 }
163
164 #[inline]
165 pub fn next_float_quantized_diff(&mut self, _a: f32, precision: f32) -> f32 {
166 self.next_float_quantized(precision)
167 }
168
169 #[inline]
170 pub fn next_boolean_diff(&mut self, a: bool) -> bool {
171 a ^ self.next_boolean()
173 }
174
175 #[inline]
176 pub fn next_enum_diff(&mut self, _a: u32, num_bits: u8) -> u32 {
177 self.next_enum(num_bits)
178 }
179
180 #[inline]
183 pub fn next_object_diff<T, F>(&mut self, a: &T, decode_diff: F) -> T
184 where
185 T: Clone,
186 F: FnOnce(&mut Self) -> T,
187 {
188 if self.next_boolean() {
189 decode_diff(self)
190 } else {
191 a.clone()
192 }
193 }
194
195 #[inline]
198 pub fn next_field_diff<T, F>(&mut self, a: &T, decode_diff: F) -> T
199 where
200 T: Clone,
201 F: FnOnce(&mut Self, &T) -> T,
202 {
203 if self.next_boolean() {
204 decode_diff(self, a)
205 } else {
206 a.clone()
207 }
208 }
209
210 #[inline]
214 pub fn next_array<T, F>(&mut self, mut inner_read: F) -> Vec<T>
215 where
216 F: FnMut(&mut Self) -> T,
217 {
218 let len = self.next_uint() as usize;
219 let mut arr = Vec::with_capacity(len);
220 for _ in 0..len {
221 arr.push(inner_read(self));
222 }
223 arr
224 }
225
226 #[inline]
229 pub fn next_array_diff<T, F, FD>(
230 &mut self,
231 a: &[T],
232 mut inner_read: F,
233 mut inner_diff: FD,
234 ) -> Vec<T>
235 where
236 T: Clone,
237 F: FnMut(&mut Self) -> T,
238 FD: FnMut(&mut Self, &T) -> T,
239 {
240 let new_len = self.next_uint() as usize;
241
242 let mut arr: Vec<T> = a.iter().take(new_len.min(a.len())).cloned().collect();
244
245 let num_updates = self.next_uint() as usize;
247 for _ in 0..num_updates {
248 let idx = self.next_uint() as usize;
249 arr[idx] = inner_diff(self, &a[idx]);
250 }
251
252 for _ in a.len()..new_len {
254 arr.push(inner_read(self));
255 }
256
257 arr
258 }
259
260 #[inline]
263 pub fn next_optional<T, F>(&mut self, mut inner_read: F) -> Option<T>
264 where
265 F: FnMut(&mut Self) -> T,
266 {
267 self.next_boolean().then(|| inner_read(self))
268 }
269
270 #[inline]
274 pub fn next_optional_diff<T, F, FD>(
275 &mut self,
276 a: &Option<T>,
277 mut inner_read: F,
278 mut inner_diff: FD,
279 ) -> Option<T>
280 where
281 T: Clone,
282 F: FnMut(&mut Self) -> T,
283 FD: FnMut(&mut Self, &T) -> T,
284 {
285 match a {
286 None => {
287 Some(inner_read(self))
289 }
290 Some(av) => {
291 if self.next_boolean() {
292 Some(inner_diff(self, av)) } else {
294 None }
296 }
297 }
298 }
299
300 #[inline]
304 pub fn next_record<K, V, FK, FV>(
305 &mut self,
306 mut key_read: FK,
307 mut val_read: FV,
308 ) -> indexmap::IndexMap<K, V>
309 where
310 K: Eq + std::hash::Hash,
311 FK: FnMut(&mut Self) -> K,
312 FV: FnMut(&mut Self) -> V,
313 {
314 let len = self.next_uint() as usize;
315 let mut map = indexmap::IndexMap::with_capacity(len);
316 for _ in 0..len {
317 let k = key_read(self);
318 let v = val_read(self);
319 map.insert(k, v);
320 }
321 map
322 }
323
324 #[inline]
328 pub fn next_record_diff<K, V, FK, FV, FVD>(
329 &mut self,
330 a: &indexmap::IndexMap<K, V>,
331 mut key_read: FK,
332 mut val_read: FV,
333 mut val_diff: FVD,
334 ) -> indexmap::IndexMap<K, V>
335 where
336 K: Clone + Eq + std::hash::Hash,
337 V: Clone,
338 FK: FnMut(&mut Self) -> K,
339 FV: FnMut(&mut Self) -> V,
340 FVD: FnMut(&mut Self, &V) -> V,
341 {
342 let mut result = a.clone();
343
344 if !a.is_empty() {
346 let num_deletions = self.next_uint() as usize;
347 for _ in 0..num_deletions {
348 let idx = self.next_uint() as usize;
349 let key = a.get_index(idx).unwrap().0.clone();
350 result.shift_remove(&key);
351 }
352 let num_updates = self.next_uint() as usize;
353 for _ in 0..num_updates {
354 let idx = self.next_uint() as usize;
355 let key = a.get_index(idx).unwrap().0.clone();
356 let new_val = val_diff(self, result.get(&key).unwrap());
357 result.insert(key, new_val);
358 }
359 }
360
361 let num_additions = self.next_uint() as usize;
363 for _ in 0..num_additions {
364 let k = key_read(self);
365 let v = val_read(self);
366 result.insert(k, v);
367 }
368
369 result
370 }
371}
372
373#[cfg(test)]
374mod tests {
375 use super::*;
376 use crate::Encoder;
377
378 #[test]
379 fn test_encode_decode_string() {
380 let mut encoder = Encoder::new();
381 encoder.push_string("hello");
382 encoder.push_string("world");
383 let buf = encoder.finish();
384
385 let mut decoder = Decoder::new(&buf);
386 assert_eq!(decoder.next_string(), "hello");
387 assert_eq!(decoder.next_string(), "world");
388 }
389
390 #[test]
391 fn test_encode_decode_string_dictionary() {
392 let mut encoder = Encoder::new();
393 encoder.push_string("hello");
394 encoder.push_string("hello"); let buf = encoder.finish();
396
397 let mut decoder = Decoder::new(&buf);
398 assert_eq!(decoder.next_string(), "hello");
399 assert_eq!(decoder.next_string(), "hello");
400 }
401
402 #[test]
403 fn test_encode_decode_int() {
404 let mut encoder = Encoder::new();
405 encoder.push_int(42);
406 encoder.push_int(-100);
407 encoder.push_int(0);
408 let buf = encoder.finish();
409
410 let mut decoder = Decoder::new(&buf);
411 assert_eq!(decoder.next_int(), 42);
412 assert_eq!(decoder.next_int(), -100);
413 assert_eq!(decoder.next_int(), 0);
414 }
415
416 #[test]
417 fn test_encode_decode_uint() {
418 let mut encoder = Encoder::new();
419 encoder.push_uint(0);
420 encoder.push_uint(127);
421 encoder.push_uint(128);
422 encoder.push_uint(16383);
423 let buf = encoder.finish();
424
425 let mut decoder = Decoder::new(&buf);
426 assert_eq!(decoder.next_uint(), 0);
427 assert_eq!(decoder.next_uint(), 127);
428 assert_eq!(decoder.next_uint(), 128);
429 assert_eq!(decoder.next_uint(), 16383);
430 }
431
432 #[test]
433 fn test_encode_decode_float() {
434 let mut encoder = Encoder::new();
435 encoder.push_float(3.14);
436 encoder.push_float(-2.5);
437 let buf = encoder.finish();
438
439 let mut decoder = Decoder::new(&buf);
440 assert!((decoder.next_float() - 3.14).abs() < 0.001);
441 assert!((decoder.next_float() - (-2.5)).abs() < 0.001);
442 }
443
444 #[test]
445 fn test_encode_decode_float_quantized() {
446 let mut encoder = Encoder::new();
447 encoder.push_float_quantized(3.14159, 0.01);
448 let buf = encoder.finish();
449
450 let mut decoder = Decoder::new(&buf);
451 let val = decoder.next_float_quantized(0.01);
452 assert!((val - 3.14).abs() < 0.01);
453 }
454
455 #[test]
456 fn test_encode_decode_boolean() {
457 let mut encoder = Encoder::new();
458 encoder.push_boolean(true);
459 encoder.push_boolean(false);
460 encoder.push_boolean(true);
461 encoder.push_boolean(true);
462 let buf = encoder.finish();
463
464 let mut decoder = Decoder::new(&buf);
465 assert!(decoder.next_boolean());
466 assert!(!decoder.next_boolean());
467 assert!(decoder.next_boolean());
468 assert!(decoder.next_boolean());
469 }
470
471 #[test]
472 fn test_encode_decode_mixed() {
473 let mut encoder = Encoder::new();
474 encoder.push_string("test");
475 encoder.push_int(42);
476 encoder.push_boolean(true);
477 encoder.push_float(3.14);
478 encoder.push_boolean(false);
479 let buf = encoder.finish();
480
481 let mut decoder = Decoder::new(&buf);
482 assert_eq!(decoder.next_string(), "test");
483 assert_eq!(decoder.next_int(), 42);
484 assert!(decoder.next_boolean());
485 assert!((decoder.next_float() - 3.14).abs() < 0.001);
486 assert!(!decoder.next_boolean());
487 }
488
489 #[test]
490 fn test_diff_string() {
491 let mut encoder = Encoder::new();
492 encoder.push_string_diff("hello", "hello"); encoder.push_string_diff("hello", "world"); let buf = encoder.finish();
495
496 let mut decoder = Decoder::new(&buf);
497 assert_eq!(decoder.next_string_diff("hello"), "hello");
498 assert_eq!(decoder.next_string_diff("hello"), "world");
499 }
500
501 #[test]
502 fn test_diff_int() {
503 let mut encoder = Encoder::new();
504 encoder.push_int_diff(10, 10); encoder.push_int_diff(10, 20); let buf = encoder.finish();
507
508 let mut decoder = Decoder::new(&buf);
509 assert_eq!(decoder.next_int_diff(10), 10);
510 assert_eq!(decoder.next_int_diff(10), 20);
511 }
512
513 #[test]
514 fn test_array_encode_decode() {
515 let mut encoder = Encoder::new();
516 let arr = vec![1i64, 2, 3, 4, 5];
517 encoder.push_array(&arr, |enc, &x| enc.push_int(x));
518 let buf = encoder.finish();
519
520 let mut decoder = Decoder::new(&buf);
521 let result: Vec<i64> = decoder.next_array(|dec| dec.next_int());
522 assert_eq!(result, arr);
523 }
524
525 #[test]
526 fn test_array_diff() {
527 let a = vec![1i64, 2, 3];
528 let b = vec![1i64, 5, 3, 4]; let mut encoder = Encoder::new();
531 encoder.push_array_diff(
532 &a,
533 &b,
534 |x, y| x == y,
535 |enc: &mut Encoder, &x| enc.push_int(x),
536 |enc: &mut Encoder, _, &x| enc.push_int(x), );
538 let buf = encoder.finish();
539
540 let mut decoder = Decoder::new(&buf);
541 let result: Vec<i64> = decoder.next_array_diff(
542 &a,
543 |dec| dec.next_int(),
544 |dec, _| dec.next_int(), );
546 assert_eq!(result, b);
547 }
548
549 #[test]
550 fn test_optional_encode_decode() {
551 let mut encoder = Encoder::new();
552 encoder.push_optional(&Some(42i64), |enc, &x| enc.push_int(x));
553 encoder.push_optional(&None::<i64>, |enc, &x| enc.push_int(x));
554 let buf = encoder.finish();
555
556 let mut decoder = Decoder::new(&buf);
557 assert_eq!(decoder.next_optional(|dec| dec.next_int()), Some(42));
558 assert_eq!(decoder.next_optional(|dec| dec.next_int()), None);
559 }
560
561 #[test]
562 fn test_optional_diff() {
563 let mut encoder = Encoder::new();
565 encoder.push_optional_diff(
566 &None::<i64>,
567 &Some(42i64),
568 |enc: &mut Encoder, &x| enc.push_int(x),
569 |enc: &mut Encoder, _, &x| enc.push_int(x), );
571 let buf = encoder.finish();
572
573 let mut decoder = Decoder::new(&buf);
574 let result =
575 decoder.next_optional_diff(&None, |dec| dec.next_int(), |dec, _| dec.next_int());
576 assert_eq!(result, Some(42));
577 }
578
579 #[test]
580 fn test_record_encode_decode() {
581 let mut encoder = Encoder::new();
582 let mut map = indexmap::IndexMap::new();
583 map.insert("a".to_string(), 1i64);
584 map.insert("b".to_string(), 2i64);
585 encoder.push_record(&map, |enc, k| enc.push_string(k), |enc, &v| enc.push_int(v));
586 let buf = encoder.finish();
587
588 let mut decoder = Decoder::new(&buf);
589 let result: indexmap::IndexMap<String, i64> =
590 decoder.next_record(|dec| dec.next_string(), |dec| dec.next_int());
591 assert_eq!(result, map);
592 }
593}