Skip to main content

delta_pack/
encoder.rs

1use crate::rle::RleWriter;
2use crate::varint::{write_uvarint_vec, write_varint_vec};
3
4/// Fast string hash (FxHash algorithm).
5#[inline]
6fn hash_str(s: &str) -> u64 {
7    const K: u64 = 0x517cc1b727220a95;
8    let bytes = s.as_bytes();
9    let mut hash: u64 = 0;
10    for chunk in bytes.chunks(8) {
11        let mut val = 0u64;
12        for (i, &b) in chunk.iter().enumerate() {
13            val |= (b as u64) << (i * 8);
14        }
15        hash = (hash.rotate_left(5) ^ val).wrapping_mul(K);
16    }
17    hash
18}
19
20/// Binary encoder with RLE bit packing.
21pub struct Encoder {
22    buffer: Vec<u8>,
23    dict: Vec<u64>,
24    rle: RleWriter,
25}
26
27impl Encoder {
28    /// Create a new encoder.
29    #[inline]
30    pub fn new() -> Self {
31        Self {
32            buffer: Vec::with_capacity(32),
33            dict: Vec::new(),
34            rle: RleWriter::new(),
35        }
36    }
37
38    /// Encode using a thread-local encoder for optimal performance.
39    /// This avoids allocation overhead by reusing the encoder across calls.
40    #[inline]
41    pub fn encode<F, R>(f: F) -> R
42    where
43        F: FnOnce(&mut Encoder) -> R,
44    {
45        use std::cell::RefCell;
46        thread_local! {
47            static ENCODER: RefCell<Encoder> = RefCell::new(Encoder::new());
48        }
49        ENCODER.with(|enc| f(&mut enc.borrow_mut()))
50    }
51
52    // Primitive methods
53
54    /// Push a string value with dictionary deduplication.
55    #[inline]
56    pub fn push_string(&mut self, val: &str) {
57        if val.is_empty() {
58            self.push_int(0);
59            return;
60        }
61
62        // Check dictionary for deduplication
63        let hash = hash_str(val);
64        if let Some(idx) = self.dict.iter().position(|&h| h == hash) {
65            self.push_int(-(idx as i64) - 1);
66            return;
67        }
68
69        self.dict.push(hash);
70        let bytes = val.as_bytes();
71        self.push_int(bytes.len() as i64);
72        self.buffer.extend_from_slice(bytes);
73    }
74
75    #[inline]
76    pub fn push_int(&mut self, val: i64) {
77        write_varint_vec(&mut self.buffer, val);
78    }
79
80    #[inline]
81    pub fn push_bounded_int(&mut self, val: i64, min: i64) {
82        self.push_uint((val - min) as u64);
83    }
84
85    #[inline]
86    pub fn push_uint(&mut self, val: u64) {
87        write_uvarint_vec(&mut self.buffer, val);
88    }
89
90    #[inline]
91    pub fn push_float(&mut self, val: f32) {
92        self.buffer.extend_from_slice(&val.to_le_bytes());
93    }
94
95    #[inline]
96    pub fn push_float_quantized(&mut self, val: f32, precision: f32) {
97        self.push_int((val / precision).round() as i64);
98    }
99
100    #[inline]
101    pub fn push_boolean(&mut self, val: bool) {
102        self.rle.push_bit(val);
103    }
104
105    #[inline]
106    pub fn push_enum(&mut self, val: u32, num_bits: u8) {
107        self.rle.push_bits(val, num_bits);
108    }
109
110    // Diff methods (value-only - caller handles change bit for object fields)
111
112    #[inline]
113    pub fn push_string_diff(&mut self, a: &str, b: &str) {
114        // Ensure 'a' is in dictionary for decoder sync
115        let hash_a = hash_str(a);
116        if !self.dict.contains(&hash_a) {
117            self.dict.push(hash_a);
118        }
119        self.push_string(b);
120    }
121
122    #[inline]
123    pub fn push_int_diff(&mut self, _a: i64, b: i64) {
124        self.push_int(b);
125    }
126
127    #[inline]
128    pub fn push_uint_diff(&mut self, _a: u64, b: u64) {
129        self.push_uint(b);
130    }
131
132    #[inline]
133    pub fn push_bounded_int_diff(&mut self, _a: i64, b: i64, min: i64) {
134        self.push_bounded_int(b, min);
135    }
136
137    #[inline]
138    pub fn push_float_diff(&mut self, _a: f32, b: f32) {
139        self.push_float(b);
140    }
141
142    #[inline]
143    pub fn push_float_quantized_diff(&mut self, _a: f32, b: f32, precision: f32) {
144        self.push_float_quantized(b, precision);
145    }
146
147    #[inline]
148    pub fn push_boolean_diff(&mut self, a: bool, b: bool) {
149        // Boolean diff is special - change bit IS the diff
150        self.push_boolean(a != b);
151    }
152
153    #[inline]
154    pub fn push_enum_diff(&mut self, _a: u32, b: u32, num_bits: u8) {
155        self.push_enum(b, num_bits);
156    }
157
158    // Object diff helper (wrap object encoding with change bit)
159
160    #[inline]
161    pub fn push_object_diff<T, E, F>(&mut self, a: &T, b: &T, equals: E, encode_diff: F)
162    where
163        E: FnOnce(&T, &T) -> bool,
164        F: FnOnce(&mut Self),
165    {
166        let changed = !equals(a, b);
167        self.push_boolean(changed);
168        if changed {
169            encode_diff(self);
170        }
171    }
172
173    // Field diff helper (wrap value-only diff with change bit)
174
175    #[inline]
176    pub fn push_field_diff<T, E, F>(&mut self, a: &T, b: &T, equals: E, encode_diff: F)
177    where
178        E: FnOnce(&T, &T) -> bool,
179        F: FnOnce(&mut Self, &T, &T),
180    {
181        let changed = !equals(a, b);
182        self.push_boolean(changed);
183        if changed {
184            encode_diff(self, a, b);
185        }
186    }
187
188    // Array helpers
189
190    /// Encode an array by writing length followed by each element.
191    #[inline]
192    pub fn push_array<T, F>(&mut self, arr: &[T], mut inner_write: F)
193    where
194        F: FnMut(&mut Self, &T),
195    {
196        self.push_uint(arr.len() as u64);
197        for item in arr {
198            inner_write(self, item);
199        }
200    }
201
202    /// Encode array diff by comparing lengths and elements.
203    /// Caller handles change bit.
204    #[inline]
205    pub fn push_array_diff<T, FW, FD, E>(
206        &mut self,
207        a: &[T],
208        b: &[T],
209        mut equals: E,
210        mut inner_write: FW,
211        mut inner_diff: FD,
212    ) where
213        FW: FnMut(&mut Self, &T),
214        FD: FnMut(&mut Self, &T, &T),
215        E: FnMut(&T, &T) -> bool,
216    {
217        self.push_uint(b.len() as u64);
218
219        // Collect changed indices (sparse encoding)
220        let min_len = a.len().min(b.len());
221        let updates: Vec<usize> = (0..min_len).filter(|&i| !equals(&a[i], &b[i])).collect();
222
223        // Write updates (sparse)
224        self.push_uint(updates.len() as u64);
225        for i in updates {
226            self.push_uint(i as u64);
227            inner_diff(self, &a[i], &b[i]);
228        }
229
230        // Write additions
231        for item in b.iter().skip(a.len()) {
232            inner_write(self, item);
233        }
234    }
235
236    // Optional helpers
237
238    /// Encode an optional value by writing presence flag followed by value if present.
239    #[inline]
240    pub fn push_optional<T, F>(&mut self, opt: &Option<T>, mut inner_write: F)
241    where
242        F: FnMut(&mut Self, &T),
243    {
244        self.push_boolean(opt.is_some());
245        if let Some(val) = opt {
246            inner_write(self, val);
247        }
248    }
249
250    /// Encode optional diff, matching TS/C# format.
251    /// Optimization: if a was None, we know b must be Some (else unchanged).
252    /// So skip the present bit in None→Some case.
253    #[inline]
254    pub fn push_optional_diff<T, FW, FD>(
255        &mut self,
256        a: &Option<T>,
257        b: &Option<T>,
258        mut inner_write: FW,
259        mut inner_diff: FD,
260    ) where
261        FW: FnMut(&mut Self, &T),
262        FD: FnMut(&mut Self, &T, &T),
263    {
264        match a {
265            None => {
266                // None → Some (b guaranteed Some by caller)
267                inner_write(self, b.as_ref().unwrap());
268            }
269            Some(av) => {
270                self.push_boolean(b.is_some());
271                if let Some(bv) = b {
272                    inner_diff(self, av, bv); // Some → Some
273                }
274                // else Some → None
275            }
276        }
277    }
278
279    // Record (map) helpers
280
281    /// Encode a record (map) by writing length followed by key-value pairs.
282    #[inline]
283    pub fn push_record<K, V, FK, FV>(
284        &mut self,
285        map: &std::collections::HashMap<K, V>,
286        mut key_write: FK,
287        mut val_write: FV,
288    ) where
289        K: std::hash::Hash + Eq,
290        FK: FnMut(&mut Self, &K),
291        FV: FnMut(&mut Self, &V),
292    {
293        self.push_uint(map.len() as u64);
294        for (k, v) in map {
295            key_write(self, k);
296            val_write(self, v);
297        }
298    }
299
300    /// Encode record diff, matching TS/C# format.
301    /// Caller handles change bit.
302    #[inline]
303    pub fn push_record_diff<K, V, FK, FV, FVD, E>(
304        &mut self,
305        a: &std::collections::HashMap<K, V>,
306        b: &std::collections::HashMap<K, V>,
307        mut equals: E,
308        mut key_write: FK,
309        mut val_write: FV,
310        mut val_diff: FVD,
311    ) where
312        K: Clone + std::hash::Hash + Eq,
313        FK: FnMut(&mut Self, &K),
314        FV: FnMut(&mut Self, &V),
315        FVD: FnMut(&mut Self, &V, &V),
316        E: FnMut(&V, &V) -> bool,
317    {
318        let mut deletions = Vec::new();
319        let mut updates = Vec::new();
320        let mut additions = Vec::new();
321
322        for (k, av) in a {
323            if let Some(bv) = b.get(k) {
324                if !equals(av, bv) {
325                    updates.push(k.clone());
326                }
327            } else {
328                deletions.push(k.clone());
329            }
330        }
331
332        for (k, v) in b {
333            if !a.contains_key(k) {
334                additions.push((k.clone(), v));
335            }
336        }
337
338        // Write deletions and updates (only if a was non-empty)
339        if !a.is_empty() {
340            self.push_uint(deletions.len() as u64);
341            for key in &deletions {
342                key_write(self, key);
343            }
344            self.push_uint(updates.len() as u64);
345            for key in &updates {
346                key_write(self, key);
347                val_diff(self, a.get(key).unwrap(), b.get(key).unwrap());
348            }
349        }
350
351        // Write additions
352        self.push_uint(additions.len() as u64);
353        for (k, v) in additions {
354            key_write(self, &k);
355            val_write(self, v);
356        }
357    }
358
359    /// Finish encoding and return the buffer.
360    #[inline]
361    pub fn finish(&mut self) -> Vec<u8> {
362        self.rle.write_to_buffer(&mut self.buffer);
363        let result = std::mem::take(&mut self.buffer);
364        self.buffer = Vec::with_capacity(64);
365        self.dict.clear();
366        self.rle.reset();
367        result
368    }
369}
370
371impl Default for Encoder {
372    fn default() -> Self {
373        Self::new()
374    }
375}
376
377#[cfg(test)]
378mod tests {
379    use super::*;
380
381    #[test]
382    fn test_encode_string() {
383        let mut encoder = Encoder::new();
384        encoder.push_string("hello");
385        let buf = encoder.finish();
386        assert!(!buf.is_empty());
387    }
388
389    #[test]
390    fn test_encode_string_dictionary() {
391        let mut encoder = Encoder::new();
392        encoder.push_string("hello");
393        encoder.push_string("world");
394        encoder.push_string("hello"); // Should use dictionary
395        let buf = encoder.finish();
396        assert!(!buf.is_empty());
397    }
398
399    #[test]
400    fn test_encode_int() {
401        let mut encoder = Encoder::new();
402        encoder.push_int(42);
403        encoder.push_int(-100);
404        let buf = encoder.finish();
405        assert!(!buf.is_empty());
406    }
407
408    #[test]
409    fn test_encode_float() {
410        let mut encoder = Encoder::new();
411        encoder.push_float(3.14);
412        let buf = encoder.finish();
413        assert_eq!(buf.len(), 4 + 1); // 4 bytes float + 1 byte RLE (0 bits)
414    }
415
416    #[test]
417    fn test_encode_boolean() {
418        let mut encoder = Encoder::new();
419        encoder.push_boolean(true);
420        encoder.push_boolean(false);
421        encoder.push_boolean(true);
422        let buf = encoder.finish();
423        assert!(!buf.is_empty());
424    }
425}