Skip to main content

delta_pack/
decoder.rs

1use crate::rle::RleReader;
2use crate::varint::{read_uvarint, read_varint};
3
4/// Dictionary entry - either a buffer position or an owned string (for diff entries).
5enum DictEntry {
6    Pos(usize, usize),
7    Owned(String),
8}
9
10/// Binary decoder with string dictionary and RLE bit unpacking.
11pub struct Decoder<'a> {
12    buffer: &'a [u8],
13    pos: usize,
14    dict: Vec<DictEntry>,
15    rle: RleReader<'a>,
16}
17
18impl<'a> Decoder<'a> {
19    pub fn new(buf: &'a [u8]) -> Self {
20        Self {
21            buffer: buf,
22            pos: 0,
23            dict: Vec::new(),
24            rle: RleReader::new(buf),
25        }
26    }
27
28    /// Decode using a thread-local decoder for optimal performance.
29    /// This avoids allocation overhead by reusing the dictionary Vec across calls.
30    #[inline]
31    pub fn decode<F, R>(buf: &[u8], f: F) -> R
32    where
33        F: FnOnce(&mut Decoder) -> R,
34    {
35        use std::cell::RefCell;
36        thread_local! {
37            static DICT: RefCell<Vec<DictEntry>> = const { RefCell::new(Vec::new()) };
38        }
39        DICT.with(|dict| {
40            let mut dict = dict.borrow_mut();
41            dict.clear();
42            let mut dec = Decoder {
43                buffer: buf,
44                pos: 0,
45                dict: std::mem::take(&mut *dict),
46                rle: RleReader::new(buf),
47            };
48            let result = f(&mut dec);
49            *dict = dec.dict;
50            result
51        })
52    }
53
54    // Primitive methods
55
56    #[inline]
57    pub fn next_string(&mut self) -> String {
58        let (len_or_idx, bytes_read) = read_varint(self.buffer, self.pos);
59        self.pos += bytes_read;
60
61        if len_or_idx == 0 {
62            return String::new();
63        }
64
65        if len_or_idx > 0 {
66            let len = len_or_idx as usize;
67            let start = self.pos;
68            self.pos += len;
69            self.dict.push(DictEntry::Pos(start, len));
70            return String::from(
71                std::str::from_utf8(&self.buffer[start..start + len])
72                    .expect("invalid UTF-8 in buffer"),
73            );
74        }
75
76        // Negative = dictionary index
77        match &self.dict[(-len_or_idx - 1) as usize] {
78            DictEntry::Pos(start, len) => String::from(
79                std::str::from_utf8(&self.buffer[*start..*start + *len])
80                    .expect("invalid UTF-8 in buffer"),
81            ),
82            DictEntry::Owned(s) => s.clone(),
83        }
84    }
85
86    #[inline]
87    pub fn next_int(&mut self) -> i64 {
88        let (val, bytes_read) = read_varint(self.buffer, self.pos);
89        self.pos += bytes_read;
90        val
91    }
92
93    #[inline]
94    pub fn next_uint(&mut self) -> u64 {
95        let (val, bytes_read) = read_uvarint(self.buffer, self.pos);
96        self.pos += bytes_read;
97        val
98    }
99
100    #[inline]
101    pub fn next_bounded_int(&mut self, min: i64) -> i64 {
102        self.next_uint() as i64 + min
103    }
104
105    #[inline]
106    pub fn next_float(&mut self) -> f32 {
107        let bytes: [u8; 4] = self.buffer[self.pos..self.pos + 4].try_into().unwrap();
108        self.pos += 4;
109        f32::from_le_bytes(bytes)
110    }
111
112    #[inline]
113    pub fn next_float_quantized(&mut self, precision: f32) -> f32 {
114        self.next_int() as f32 * precision
115    }
116
117    #[inline]
118    pub fn next_boolean(&mut self) -> bool {
119        self.rle.next_bit()
120    }
121
122    #[inline]
123    pub fn next_enum(&mut self, num_bits: u8) -> u32 {
124        self.rle.next_bits(num_bits)
125    }
126
127    // Diff methods (value-only - caller handles change bit for object fields)
128
129    #[inline]
130    pub fn next_string_diff(&mut self, a: &str) -> String {
131        // Only add to dictionary if not already present (for decoder sync)
132        let already_in_dict = self.dict.iter().any(|entry| match entry {
133            DictEntry::Pos(start, len) => std::str::from_utf8(&self.buffer[*start..*start + *len])
134                .map(|s| s == a)
135                .unwrap_or(false),
136            DictEntry::Owned(s) => s == a,
137        });
138        if !already_in_dict {
139            self.dict.push(DictEntry::Owned(a.to_string()));
140        }
141        self.next_string()
142    }
143
144    #[inline]
145    pub fn next_int_diff(&mut self, _a: i64) -> i64 {
146        self.next_int()
147    }
148
149    #[inline]
150    pub fn next_uint_diff(&mut self, _a: u64) -> u64 {
151        self.next_uint()
152    }
153
154    #[inline]
155    pub fn next_bounded_int_diff(&mut self, _a: i64, min: i64) -> i64 {
156        self.next_bounded_int(min)
157    }
158
159    #[inline]
160    pub fn next_float_diff(&mut self, _a: f32) -> f32 {
161        self.next_float()
162    }
163
164    #[inline]
165    pub fn next_float_quantized_diff(&mut self, _a: f32, precision: f32) -> f32 {
166        self.next_float_quantized(precision)
167    }
168
169    #[inline]
170    pub fn next_boolean_diff(&mut self, a: bool) -> bool {
171        // Boolean diff is special - change bit IS the diff
172        a ^ self.next_boolean()
173    }
174
175    #[inline]
176    pub fn next_enum_diff(&mut self, _a: u32, num_bits: u8) -> u32 {
177        self.next_enum(num_bits)
178    }
179
180    // Object diff helper (read change bit, decode if changed)
181
182    #[inline]
183    pub fn next_object_diff<T, F>(&mut self, a: &T, decode_diff: F) -> T
184    where
185        T: Clone,
186        F: FnOnce(&mut Self) -> T,
187    {
188        if self.next_boolean() {
189            decode_diff(self)
190        } else {
191            a.clone()
192        }
193    }
194
195    // Field diff helper (read change bit, decode if changed)
196
197    #[inline]
198    pub fn next_field_diff<T, F>(&mut self, a: &T, decode_diff: F) -> T
199    where
200        T: Clone,
201        F: FnOnce(&mut Self, &T) -> T,
202    {
203        if self.next_boolean() {
204            decode_diff(self, a)
205        } else {
206            a.clone()
207        }
208    }
209
210    // Array helpers
211
212    /// Decode an array by reading length followed by each element.
213    #[inline]
214    pub fn next_array<T, F>(&mut self, mut inner_read: F) -> Vec<T>
215    where
216        F: FnMut(&mut Self) -> T,
217    {
218        let len = self.next_uint() as usize;
219        let mut arr = Vec::with_capacity(len);
220        for _ in 0..len {
221            arr.push(inner_read(self));
222        }
223        arr
224    }
225
226    /// Decode array diff, using sparse format with index-based updates.
227    /// Caller handles change bit.
228    #[inline]
229    pub fn next_array_diff<T, F, FD>(
230        &mut self,
231        a: &[T],
232        mut inner_read: F,
233        mut inner_diff: FD,
234    ) -> Vec<T>
235    where
236        T: Clone,
237        F: FnMut(&mut Self) -> T,
238        FD: FnMut(&mut Self, &T) -> T,
239    {
240        let new_len = self.next_uint() as usize;
241
242        // Start with copy of old array (truncated to new length)
243        let mut arr: Vec<T> = a.iter().take(new_len.min(a.len())).cloned().collect();
244
245        // Apply updates (sparse)
246        let num_updates = self.next_uint() as usize;
247        for _ in 0..num_updates {
248            let idx = self.next_uint() as usize;
249            arr[idx] = inner_diff(self, &a[idx]);
250        }
251
252        // Read additions
253        for _ in a.len()..new_len {
254            arr.push(inner_read(self));
255        }
256
257        arr
258    }
259
260    // Optional helpers
261
262    #[inline]
263    pub fn next_optional<T, F>(&mut self, mut inner_read: F) -> Option<T>
264    where
265        F: FnMut(&mut Self) -> T,
266    {
267        self.next_boolean().then(|| inner_read(self))
268    }
269
270    /// Decode optional diff, matching TS/C# format.
271    /// Optimization: if a was None, we know b must be Some (else unchanged).
272    /// So no present bit in None→Some case.
273    #[inline]
274    pub fn next_optional_diff<T, F, FD>(
275        &mut self,
276        a: &Option<T>,
277        mut inner_read: F,
278        mut inner_diff: FD,
279    ) -> Option<T>
280    where
281        T: Clone,
282        F: FnMut(&mut Self) -> T,
283        FD: FnMut(&mut Self, &T) -> T,
284    {
285        match a {
286            None => {
287                // None → Some (guaranteed Some by caller)
288                Some(inner_read(self))
289            }
290            Some(av) => {
291                if self.next_boolean() {
292                    Some(inner_diff(self, av)) // Some → Some
293                } else {
294                    None // Some → None
295                }
296            }
297        }
298    }
299
300    // Record (map) helpers
301
302    /// Decode a record (map) by reading length followed by key-value pairs.
303    #[inline]
304    pub fn next_record<K, V, FK, FV>(
305        &mut self,
306        mut key_read: FK,
307        mut val_read: FV,
308    ) -> indexmap::IndexMap<K, V>
309    where
310        K: Eq + std::hash::Hash,
311        FK: FnMut(&mut Self) -> K,
312        FV: FnMut(&mut Self) -> V,
313    {
314        let len = self.next_uint() as usize;
315        let mut map = indexmap::IndexMap::with_capacity(len);
316        for _ in 0..len {
317            let k = key_read(self);
318            let v = val_read(self);
319            map.insert(k, v);
320        }
321        map
322    }
323
324    /// Decode record diff, matching TS/C# format.
325    /// Caller handles change bit.
326    /// Format: if a.len > 0: deletions, updates; then additions
327    #[inline]
328    pub fn next_record_diff<K, V, FK, FV, FVD>(
329        &mut self,
330        a: &indexmap::IndexMap<K, V>,
331        mut key_read: FK,
332        mut val_read: FV,
333        mut val_diff: FVD,
334    ) -> indexmap::IndexMap<K, V>
335    where
336        K: Clone + Eq + std::hash::Hash,
337        V: Clone,
338        FK: FnMut(&mut Self) -> K,
339        FV: FnMut(&mut Self) -> V,
340        FVD: FnMut(&mut Self, &V) -> V,
341    {
342        let mut result = a.clone();
343
344        // Read deletions and updates (only if a was non-empty)
345        if !a.is_empty() {
346            let num_deletions = self.next_uint() as usize;
347            for _ in 0..num_deletions {
348                let idx = self.next_uint() as usize;
349                let key = a.get_index(idx).unwrap().0.clone();
350                result.shift_remove(&key);
351            }
352            let num_updates = self.next_uint() as usize;
353            for _ in 0..num_updates {
354                let idx = self.next_uint() as usize;
355                let key = a.get_index(idx).unwrap().0.clone();
356                let new_val = val_diff(self, result.get(&key).unwrap());
357                result.insert(key, new_val);
358            }
359        }
360
361        // Read additions
362        let num_additions = self.next_uint() as usize;
363        for _ in 0..num_additions {
364            let k = key_read(self);
365            let v = val_read(self);
366            result.insert(k, v);
367        }
368
369        result
370    }
371}
372
373#[cfg(test)]
374mod tests {
375    use super::*;
376    use crate::Encoder;
377
378    #[test]
379    fn test_encode_decode_string() {
380        let mut encoder = Encoder::new();
381        encoder.push_string("hello");
382        encoder.push_string("world");
383        let buf = encoder.finish();
384
385        let mut decoder = Decoder::new(&buf);
386        assert_eq!(decoder.next_string(), "hello");
387        assert_eq!(decoder.next_string(), "world");
388    }
389
390    #[test]
391    fn test_encode_decode_string_dictionary() {
392        let mut encoder = Encoder::new();
393        encoder.push_string("hello");
394        encoder.push_string("hello"); // Uses dictionary
395        let buf = encoder.finish();
396
397        let mut decoder = Decoder::new(&buf);
398        assert_eq!(decoder.next_string(), "hello");
399        assert_eq!(decoder.next_string(), "hello");
400    }
401
402    #[test]
403    fn test_encode_decode_int() {
404        let mut encoder = Encoder::new();
405        encoder.push_int(42);
406        encoder.push_int(-100);
407        encoder.push_int(0);
408        let buf = encoder.finish();
409
410        let mut decoder = Decoder::new(&buf);
411        assert_eq!(decoder.next_int(), 42);
412        assert_eq!(decoder.next_int(), -100);
413        assert_eq!(decoder.next_int(), 0);
414    }
415
416    #[test]
417    fn test_encode_decode_uint() {
418        let mut encoder = Encoder::new();
419        encoder.push_uint(0);
420        encoder.push_uint(127);
421        encoder.push_uint(128);
422        encoder.push_uint(16383);
423        let buf = encoder.finish();
424
425        let mut decoder = Decoder::new(&buf);
426        assert_eq!(decoder.next_uint(), 0);
427        assert_eq!(decoder.next_uint(), 127);
428        assert_eq!(decoder.next_uint(), 128);
429        assert_eq!(decoder.next_uint(), 16383);
430    }
431
432    #[test]
433    fn test_encode_decode_float() {
434        let mut encoder = Encoder::new();
435        encoder.push_float(3.14);
436        encoder.push_float(-2.5);
437        let buf = encoder.finish();
438
439        let mut decoder = Decoder::new(&buf);
440        assert!((decoder.next_float() - 3.14).abs() < 0.001);
441        assert!((decoder.next_float() - (-2.5)).abs() < 0.001);
442    }
443
444    #[test]
445    fn test_encode_decode_float_quantized() {
446        let mut encoder = Encoder::new();
447        encoder.push_float_quantized(3.14159, 0.01);
448        let buf = encoder.finish();
449
450        let mut decoder = Decoder::new(&buf);
451        let val = decoder.next_float_quantized(0.01);
452        assert!((val - 3.14).abs() < 0.01);
453    }
454
455    #[test]
456    fn test_encode_decode_boolean() {
457        let mut encoder = Encoder::new();
458        encoder.push_boolean(true);
459        encoder.push_boolean(false);
460        encoder.push_boolean(true);
461        encoder.push_boolean(true);
462        let buf = encoder.finish();
463
464        let mut decoder = Decoder::new(&buf);
465        assert!(decoder.next_boolean());
466        assert!(!decoder.next_boolean());
467        assert!(decoder.next_boolean());
468        assert!(decoder.next_boolean());
469    }
470
471    #[test]
472    fn test_encode_decode_mixed() {
473        let mut encoder = Encoder::new();
474        encoder.push_string("test");
475        encoder.push_int(42);
476        encoder.push_boolean(true);
477        encoder.push_float(3.14);
478        encoder.push_boolean(false);
479        let buf = encoder.finish();
480
481        let mut decoder = Decoder::new(&buf);
482        assert_eq!(decoder.next_string(), "test");
483        assert_eq!(decoder.next_int(), 42);
484        assert!(decoder.next_boolean());
485        assert!((decoder.next_float() - 3.14).abs() < 0.001);
486        assert!(!decoder.next_boolean());
487    }
488
489    #[test]
490    fn test_diff_string() {
491        let mut encoder = Encoder::new();
492        encoder.push_string_diff("hello", "hello"); // unchanged
493        encoder.push_string_diff("hello", "world"); // changed
494        let buf = encoder.finish();
495
496        let mut decoder = Decoder::new(&buf);
497        assert_eq!(decoder.next_string_diff("hello"), "hello");
498        assert_eq!(decoder.next_string_diff("hello"), "world");
499    }
500
501    #[test]
502    fn test_diff_int() {
503        let mut encoder = Encoder::new();
504        encoder.push_int_diff(10, 10); // unchanged
505        encoder.push_int_diff(10, 20); // changed
506        let buf = encoder.finish();
507
508        let mut decoder = Decoder::new(&buf);
509        assert_eq!(decoder.next_int_diff(10), 10);
510        assert_eq!(decoder.next_int_diff(10), 20);
511    }
512
513    #[test]
514    fn test_array_encode_decode() {
515        let mut encoder = Encoder::new();
516        let arr = vec![1i64, 2, 3, 4, 5];
517        encoder.push_array(&arr, |enc, &x| enc.push_int(x));
518        let buf = encoder.finish();
519
520        let mut decoder = Decoder::new(&buf);
521        let result: Vec<i64> = decoder.next_array(|dec| dec.next_int());
522        assert_eq!(result, arr);
523    }
524
525    #[test]
526    fn test_array_diff() {
527        let a = vec![1i64, 2, 3];
528        let b = vec![1i64, 5, 3, 4]; // changed element and added element
529
530        let mut encoder = Encoder::new();
531        encoder.push_array_diff(
532            &a,
533            &b,
534            |x, y| x == y,
535            |enc: &mut Encoder, &x| enc.push_int(x),
536            |enc: &mut Encoder, _, &x| enc.push_int(x), // diff takes (a, b)
537        );
538        let buf = encoder.finish();
539
540        let mut decoder = Decoder::new(&buf);
541        let result: Vec<i64> = decoder.next_array_diff(
542            &a,
543            |dec| dec.next_int(),
544            |dec, _| dec.next_int(), // diff returns new value
545        );
546        assert_eq!(result, b);
547    }
548
549    #[test]
550    fn test_optional_encode_decode() {
551        let mut encoder = Encoder::new();
552        encoder.push_optional(&Some(42i64), |enc, &x| enc.push_int(x));
553        encoder.push_optional(&None::<i64>, |enc, &x| enc.push_int(x));
554        let buf = encoder.finish();
555
556        let mut decoder = Decoder::new(&buf);
557        assert_eq!(decoder.next_optional(|dec| dec.next_int()), Some(42));
558        assert_eq!(decoder.next_optional(|dec| dec.next_int()), None);
559    }
560
561    #[test]
562    fn test_optional_diff() {
563        // None to Some
564        let mut encoder = Encoder::new();
565        encoder.push_optional_diff(
566            &None::<i64>,
567            &Some(42i64),
568            |enc: &mut Encoder, &x| enc.push_int(x),
569            |enc: &mut Encoder, _, &x| enc.push_int(x), // diff takes (a, b)
570        );
571        let buf = encoder.finish();
572
573        let mut decoder = Decoder::new(&buf);
574        let result =
575            decoder.next_optional_diff(&None, |dec| dec.next_int(), |dec, _| dec.next_int());
576        assert_eq!(result, Some(42));
577    }
578
579    #[test]
580    fn test_record_encode_decode() {
581        let mut encoder = Encoder::new();
582        let mut map = indexmap::IndexMap::new();
583        map.insert("a".to_string(), 1i64);
584        map.insert("b".to_string(), 2i64);
585        encoder.push_record(&map, |enc, k| enc.push_string(k), |enc, &v| enc.push_int(v));
586        let buf = encoder.finish();
587
588        let mut decoder = Decoder::new(&buf);
589        let result: indexmap::IndexMap<String, i64> =
590            decoder.next_record(|dec| dec.next_string(), |dec| dec.next_int());
591        assert_eq!(result, map);
592    }
593}