1use crate::rle::RleWriter;
2use crate::varint::{write_uvarint_vec, write_varint_vec};
3
4#[inline]
6fn hash_str(s: &str) -> u64 {
7 const K: u64 = 0x517cc1b727220a95;
8 let bytes = s.as_bytes();
9 let mut hash: u64 = 0;
10 for chunk in bytes.chunks(8) {
11 let mut val = 0u64;
12 for (i, &b) in chunk.iter().enumerate() {
13 val |= (b as u64) << (i * 8);
14 }
15 hash = (hash.rotate_left(5) ^ val).wrapping_mul(K);
16 }
17 hash
18}
19
20pub struct Encoder {
22 buffer: Vec<u8>,
23 dict: Vec<u64>,
24 rle: RleWriter,
25}
26
27impl Encoder {
28 #[inline]
30 pub fn new() -> Self {
31 Self {
32 buffer: Vec::with_capacity(32),
33 dict: Vec::new(),
34 rle: RleWriter::new(),
35 }
36 }
37
38 #[inline]
41 pub fn encode<F, R>(f: F) -> R
42 where
43 F: FnOnce(&mut Encoder) -> R,
44 {
45 use std::cell::RefCell;
46 thread_local! {
47 static ENCODER: RefCell<Encoder> = RefCell::new(Encoder::new());
48 }
49 ENCODER.with(|enc| f(&mut enc.borrow_mut()))
50 }
51
52 #[inline]
56 pub fn push_string(&mut self, val: &str) {
57 if val.is_empty() {
58 self.push_int(0);
59 return;
60 }
61
62 let hash = hash_str(val);
64 if let Some(idx) = self.dict.iter().position(|&h| h == hash) {
65 self.push_int(-(idx as i64) - 1);
66 return;
67 }
68
69 self.dict.push(hash);
70 let bytes = val.as_bytes();
71 self.push_int(bytes.len() as i64);
72 self.buffer.extend_from_slice(bytes);
73 }
74
75 #[inline]
76 pub fn push_int(&mut self, val: i64) {
77 write_varint_vec(&mut self.buffer, val);
78 }
79
80 #[inline]
81 pub fn push_bounded_int(&mut self, val: i64, min: i64) {
82 self.push_uint((val - min) as u64);
83 }
84
85 #[inline]
86 pub fn push_uint(&mut self, val: u64) {
87 write_uvarint_vec(&mut self.buffer, val);
88 }
89
90 #[inline]
91 pub fn push_float(&mut self, val: f32) {
92 self.buffer.extend_from_slice(&val.to_le_bytes());
93 }
94
95 #[inline]
96 pub fn push_float_quantized(&mut self, val: f32, precision: f32) {
97 self.push_int((val / precision).round() as i64);
98 }
99
100 #[inline]
101 pub fn push_boolean(&mut self, val: bool) {
102 self.rle.push_bit(val);
103 }
104
105 #[inline]
106 pub fn push_enum(&mut self, val: u32, num_bits: u8) {
107 self.rle.push_bits(val, num_bits);
108 }
109
110 #[inline]
113 pub fn push_string_diff(&mut self, a: &str, b: &str) {
114 let hash_a = hash_str(a);
116 if !self.dict.contains(&hash_a) {
117 self.dict.push(hash_a);
118 }
119 self.push_string(b);
120 }
121
122 #[inline]
123 pub fn push_int_diff(&mut self, _a: i64, b: i64) {
124 self.push_int(b);
125 }
126
127 #[inline]
128 pub fn push_uint_diff(&mut self, _a: u64, b: u64) {
129 self.push_uint(b);
130 }
131
132 #[inline]
133 pub fn push_bounded_int_diff(&mut self, _a: i64, b: i64, min: i64) {
134 self.push_bounded_int(b, min);
135 }
136
137 #[inline]
138 pub fn push_float_diff(&mut self, _a: f32, b: f32) {
139 self.push_float(b);
140 }
141
142 #[inline]
143 pub fn push_float_quantized_diff(&mut self, _a: f32, b: f32, precision: f32) {
144 self.push_float_quantized(b, precision);
145 }
146
147 #[inline]
148 pub fn push_boolean_diff(&mut self, a: bool, b: bool) {
149 self.push_boolean(a != b);
151 }
152
153 #[inline]
154 pub fn push_enum_diff(&mut self, _a: u32, b: u32, num_bits: u8) {
155 self.push_enum(b, num_bits);
156 }
157
158 #[inline]
161 pub fn push_object_diff<T, E, F>(&mut self, a: &T, b: &T, equals: E, encode_diff: F)
162 where
163 E: FnOnce(&T, &T) -> bool,
164 F: FnOnce(&mut Self),
165 {
166 let changed = !equals(a, b);
167 self.push_boolean(changed);
168 if changed {
169 encode_diff(self);
170 }
171 }
172
173 #[inline]
176 pub fn push_field_diff<T, E, F>(&mut self, a: &T, b: &T, equals: E, encode_diff: F)
177 where
178 E: FnOnce(&T, &T) -> bool,
179 F: FnOnce(&mut Self, &T, &T),
180 {
181 let changed = !equals(a, b);
182 self.push_boolean(changed);
183 if changed {
184 encode_diff(self, a, b);
185 }
186 }
187
188 #[inline]
192 pub fn push_array<T, F>(&mut self, arr: &[T], mut inner_write: F)
193 where
194 F: FnMut(&mut Self, &T),
195 {
196 self.push_uint(arr.len() as u64);
197 for item in arr {
198 inner_write(self, item);
199 }
200 }
201
202 #[inline]
205 pub fn push_array_diff<T, FW, FD, E>(
206 &mut self,
207 a: &[T],
208 b: &[T],
209 mut equals: E,
210 mut inner_write: FW,
211 mut inner_diff: FD,
212 ) where
213 FW: FnMut(&mut Self, &T),
214 FD: FnMut(&mut Self, &T, &T),
215 E: FnMut(&T, &T) -> bool,
216 {
217 self.push_uint(b.len() as u64);
218
219 let min_len = a.len().min(b.len());
221 let updates: Vec<usize> = (0..min_len).filter(|&i| !equals(&a[i], &b[i])).collect();
222
223 self.push_uint(updates.len() as u64);
225 for i in updates {
226 self.push_uint(i as u64);
227 inner_diff(self, &a[i], &b[i]);
228 }
229
230 for item in b.iter().skip(a.len()) {
232 inner_write(self, item);
233 }
234 }
235
236 #[inline]
240 pub fn push_optional<T, F>(&mut self, opt: &Option<T>, mut inner_write: F)
241 where
242 F: FnMut(&mut Self, &T),
243 {
244 self.push_boolean(opt.is_some());
245 if let Some(val) = opt {
246 inner_write(self, val);
247 }
248 }
249
250 #[inline]
254 pub fn push_optional_diff<T, FW, FD>(
255 &mut self,
256 a: &Option<T>,
257 b: &Option<T>,
258 mut inner_write: FW,
259 mut inner_diff: FD,
260 ) where
261 FW: FnMut(&mut Self, &T),
262 FD: FnMut(&mut Self, &T, &T),
263 {
264 match a {
265 None => {
266 inner_write(self, b.as_ref().unwrap());
268 }
269 Some(av) => {
270 self.push_boolean(b.is_some());
271 if let Some(bv) = b {
272 inner_diff(self, av, bv); }
274 }
276 }
277 }
278
279 #[inline]
283 pub fn push_record<K, V, FK, FV>(
284 &mut self,
285 map: &indexmap::IndexMap<K, V>,
286 mut key_write: FK,
287 mut val_write: FV,
288 ) where
289 K: std::hash::Hash + Eq,
290 FK: FnMut(&mut Self, &K),
291 FV: FnMut(&mut Self, &V),
292 {
293 self.push_uint(map.len() as u64);
294 for (k, v) in map {
295 key_write(self, k);
296 val_write(self, v);
297 }
298 }
299
300 #[inline]
303 pub fn push_record_diff<K, V, FK, FV, FVD, E>(
304 &mut self,
305 a: &indexmap::IndexMap<K, V>,
306 b: &indexmap::IndexMap<K, V>,
307 mut equals: E,
308 mut key_write: FK,
309 mut val_write: FV,
310 mut val_diff: FVD,
311 ) where
312 K: Clone + std::hash::Hash + Eq,
313 FK: FnMut(&mut Self, &K),
314 FV: FnMut(&mut Self, &V),
315 FVD: FnMut(&mut Self, &V, &V),
316 E: FnMut(&V, &V) -> bool,
317 {
318 let mut deletions: Vec<usize> = Vec::new();
319 let mut updates: Vec<(usize, K)> = Vec::new();
320 let mut additions = Vec::new();
321
322 for (idx, (k, av)) in a.iter().enumerate() {
323 if let Some(bv) = b.get(k) {
324 if !equals(av, bv) {
325 updates.push((idx, k.clone()));
326 }
327 } else {
328 deletions.push(idx);
329 }
330 }
331
332 for (k, v) in b {
333 if !a.contains_key(k) {
334 additions.push((k.clone(), v));
335 }
336 }
337
338 if !a.is_empty() {
340 self.push_uint(deletions.len() as u64);
341 for del_idx in &deletions {
342 self.push_uint(*del_idx as u64);
343 }
344 self.push_uint(updates.len() as u64);
345 for (upd_idx, key) in &updates {
346 self.push_uint(*upd_idx as u64);
347 val_diff(self, a.get(key).unwrap(), b.get(key).unwrap());
348 }
349 }
350
351 self.push_uint(additions.len() as u64);
353 for (k, v) in additions {
354 key_write(self, &k);
355 val_write(self, v);
356 }
357 }
358
359 #[inline]
361 pub fn finish(&mut self) -> Vec<u8> {
362 self.rle.write_to_buffer(&mut self.buffer);
363 let result = std::mem::take(&mut self.buffer);
364 self.buffer = Vec::with_capacity(64);
365 self.dict.clear();
366 self.rle.reset();
367 result
368 }
369}
370
371impl Default for Encoder {
372 fn default() -> Self {
373 Self::new()
374 }
375}
376
377#[cfg(test)]
378mod tests {
379 use super::*;
380
381 #[test]
382 fn test_encode_string() {
383 let mut encoder = Encoder::new();
384 encoder.push_string("hello");
385 let buf = encoder.finish();
386 assert!(!buf.is_empty());
387 }
388
389 #[test]
390 fn test_encode_string_dictionary() {
391 let mut encoder = Encoder::new();
392 encoder.push_string("hello");
393 encoder.push_string("world");
394 encoder.push_string("hello"); let buf = encoder.finish();
396 assert!(!buf.is_empty());
397 }
398
399 #[test]
400 fn test_encode_int() {
401 let mut encoder = Encoder::new();
402 encoder.push_int(42);
403 encoder.push_int(-100);
404 let buf = encoder.finish();
405 assert!(!buf.is_empty());
406 }
407
408 #[test]
409 fn test_encode_float() {
410 let mut encoder = Encoder::new();
411 encoder.push_float(3.14);
412 let buf = encoder.finish();
413 assert_eq!(buf.len(), 4 + 1); }
415
416 #[test]
417 fn test_encode_boolean() {
418 let mut encoder = Encoder::new();
419 encoder.push_boolean(true);
420 encoder.push_boolean(false);
421 encoder.push_boolean(true);
422 let buf = encoder.finish();
423 assert!(!buf.is_empty());
424 }
425}