1use crate::rle::RleWriter;
2use crate::varint::{write_uvarint_vec, write_varint_vec};
3
4#[inline]
6fn hash_str(s: &str) -> u64 {
7 const K: u64 = 0x517cc1b727220a95;
8 let bytes = s.as_bytes();
9 let mut hash: u64 = 0;
10 for chunk in bytes.chunks(8) {
11 let mut val = 0u64;
12 for (i, &b) in chunk.iter().enumerate() {
13 val |= (b as u64) << (i * 8);
14 }
15 hash = (hash.rotate_left(5) ^ val).wrapping_mul(K);
16 }
17 hash
18}
19
20pub struct Encoder {
22 buffer: Vec<u8>,
23 dict: Vec<u64>,
24 rle: RleWriter,
25}
26
27impl Encoder {
28 #[inline]
30 pub fn new() -> Self {
31 Self {
32 buffer: Vec::with_capacity(32),
33 dict: Vec::new(),
34 rle: RleWriter::new(),
35 }
36 }
37
38 #[inline]
41 pub fn encode<F, R>(f: F) -> R
42 where
43 F: FnOnce(&mut Encoder) -> R,
44 {
45 use std::cell::RefCell;
46 thread_local! {
47 static ENCODER: RefCell<Encoder> = RefCell::new(Encoder::new());
48 }
49 ENCODER.with(|enc| f(&mut enc.borrow_mut()))
50 }
51
52 #[inline]
56 pub fn push_string(&mut self, val: &str) {
57 if val.is_empty() {
58 self.push_int(0);
59 return;
60 }
61
62 let hash = hash_str(val);
64 if let Some(idx) = self.dict.iter().position(|&h| h == hash) {
65 self.push_int(-(idx as i64) - 1);
66 return;
67 }
68
69 self.dict.push(hash);
70 let bytes = val.as_bytes();
71 self.push_int(bytes.len() as i64);
72 self.buffer.extend_from_slice(bytes);
73 }
74
75 #[inline]
76 pub fn push_int(&mut self, val: i64) {
77 write_varint_vec(&mut self.buffer, val);
78 }
79
80 #[inline]
81 pub fn push_bounded_int(&mut self, val: i64, min: i64) {
82 assert!(val >= min, "int {} below minimum {}", val, min);
83 self.push_uint((val - min) as u64);
84 }
85
86 #[inline]
87 pub fn push_uint(&mut self, val: u64) {
88 write_uvarint_vec(&mut self.buffer, val);
89 }
90
91 #[inline]
92 pub fn push_float(&mut self, val: f32) {
93 self.buffer.extend_from_slice(&val.to_le_bytes());
94 }
95
96 #[inline]
97 pub fn push_float_quantized(&mut self, val: f32, precision: f32) {
98 assert!(val.is_finite(), "invalid quantized float: {}", val);
99 self.push_int((val / precision).round() as i64);
100 }
101
102 #[inline]
103 pub fn push_boolean(&mut self, val: bool) {
104 self.rle.push_bit(val);
105 }
106
107 #[inline]
108 pub fn push_enum(&mut self, val: u32, num_bits: u8) {
109 assert!(
110 (val as u64) < (1u64 << num_bits),
111 "value {} out of range for {} bits",
112 val,
113 num_bits
114 );
115 self.rle.push_bits(val, num_bits);
116 }
117
118 #[inline]
119 pub fn push_bit_packed_int(&mut self, val: i64, min: i64, max: i64, num_bits: u8) {
120 assert!(
121 val >= min && val <= max,
122 "int {} outside [{}, {}]",
123 val,
124 min,
125 max
126 );
127 self.rle.push_bits((val - min) as u32, num_bits);
128 }
129
130 #[inline]
133 pub fn push_string_diff(&mut self, a: &str, b: &str) {
134 let hash_a = hash_str(a);
136 if !self.dict.contains(&hash_a) {
137 self.dict.push(hash_a);
138 }
139 self.push_string(b);
140 }
141
142 #[inline]
143 pub fn push_int_diff(&mut self, _a: i64, b: i64) {
144 self.push_int(b);
145 }
146
147 #[inline]
148 pub fn push_uint_diff(&mut self, _a: u64, b: u64) {
149 self.push_uint(b);
150 }
151
152 #[inline]
153 pub fn push_bounded_int_diff(&mut self, _a: i64, b: i64, min: i64) {
154 self.push_bounded_int(b, min);
155 }
156
157 #[inline]
158 pub fn push_float_diff(&mut self, _a: f32, b: f32) {
159 self.push_float(b);
160 }
161
162 #[inline]
163 pub fn push_float_quantized_diff(&mut self, _a: f32, b: f32, precision: f32) {
164 self.push_float_quantized(b, precision);
165 }
166
167 #[inline]
168 pub fn push_boolean_diff(&mut self, a: bool, b: bool) {
169 self.push_boolean(a != b);
171 }
172
173 #[inline]
174 pub fn push_enum_diff(&mut self, _a: u32, b: u32, num_bits: u8) {
175 self.push_enum(b, num_bits);
176 }
177
178 #[inline]
179 pub fn push_bit_packed_int_diff(&mut self, _a: i64, b: i64, min: i64, max: i64, num_bits: u8) {
180 self.push_bit_packed_int(b, min, max, num_bits);
181 }
182
183 #[inline]
186 pub fn push_object_diff<T, E, F>(&mut self, a: &T, b: &T, equals: E, encode_diff: F)
187 where
188 E: FnOnce(&T, &T) -> bool,
189 F: FnOnce(&mut Self),
190 {
191 let changed = !equals(a, b);
192 self.push_boolean(changed);
193 if changed {
194 encode_diff(self);
195 }
196 }
197
198 #[inline]
201 pub fn push_field_diff<T, E, F>(&mut self, a: &T, b: &T, equals: E, encode_diff: F)
202 where
203 E: FnOnce(&T, &T) -> bool,
204 F: FnOnce(&mut Self, &T, &T),
205 {
206 let changed = !equals(a, b);
207 self.push_boolean(changed);
208 if changed {
209 encode_diff(self, a, b);
210 }
211 }
212
213 #[inline]
217 pub fn push_array<T, F>(&mut self, arr: &[T], mut inner_write: F)
218 where
219 F: FnMut(&mut Self, &T),
220 {
221 self.push_uint(arr.len() as u64);
222 for item in arr {
223 inner_write(self, item);
224 }
225 }
226
227 #[inline]
230 pub fn push_array_diff<T, FW, FD, E>(
231 &mut self,
232 a: &[T],
233 b: &[T],
234 mut equals: E,
235 mut inner_write: FW,
236 mut inner_diff: FD,
237 ) where
238 FW: FnMut(&mut Self, &T),
239 FD: FnMut(&mut Self, &T, &T),
240 E: FnMut(&T, &T) -> bool,
241 {
242 self.push_uint(b.len() as u64);
243
244 let min_len = a.len().min(b.len());
246 let updates: Vec<usize> = (0..min_len).filter(|&i| !equals(&a[i], &b[i])).collect();
247
248 self.push_uint(updates.len() as u64);
250 for i in updates {
251 self.push_uint(i as u64);
252 inner_diff(self, &a[i], &b[i]);
253 }
254
255 for item in b.iter().skip(a.len()) {
257 inner_write(self, item);
258 }
259 }
260
261 #[inline]
265 pub fn push_optional<T, F>(&mut self, opt: &Option<T>, mut inner_write: F)
266 where
267 F: FnMut(&mut Self, &T),
268 {
269 self.push_boolean(opt.is_some());
270 if let Some(val) = opt {
271 inner_write(self, val);
272 }
273 }
274
275 #[inline]
279 pub fn push_optional_diff<T, FW, FD>(
280 &mut self,
281 a: &Option<T>,
282 b: &Option<T>,
283 mut inner_write: FW,
284 mut inner_diff: FD,
285 ) where
286 FW: FnMut(&mut Self, &T),
287 FD: FnMut(&mut Self, &T, &T),
288 {
289 match a {
290 None => {
291 inner_write(self, b.as_ref().unwrap());
293 }
294 Some(av) => {
295 self.push_boolean(b.is_some());
296 if let Some(bv) = b {
297 inner_diff(self, av, bv); }
299 }
301 }
302 }
303
304 #[inline]
308 pub fn push_record<K, V, FK, FV>(
309 &mut self,
310 map: &indexmap::IndexMap<K, V>,
311 mut key_write: FK,
312 mut val_write: FV,
313 ) where
314 K: std::hash::Hash + Eq,
315 FK: FnMut(&mut Self, &K),
316 FV: FnMut(&mut Self, &V),
317 {
318 self.push_uint(map.len() as u64);
319 for (k, v) in map {
320 key_write(self, k);
321 val_write(self, v);
322 }
323 }
324
325 #[inline]
328 pub fn push_record_diff<K, V, FK, FV, FVD, E>(
329 &mut self,
330 a: &indexmap::IndexMap<K, V>,
331 b: &indexmap::IndexMap<K, V>,
332 mut equals: E,
333 mut key_write: FK,
334 mut val_write: FV,
335 mut val_diff: FVD,
336 ) where
337 K: Clone + std::hash::Hash + Eq,
338 FK: FnMut(&mut Self, &K),
339 FV: FnMut(&mut Self, &V),
340 FVD: FnMut(&mut Self, &V, &V),
341 E: FnMut(&V, &V) -> bool,
342 {
343 let mut deletions: Vec<usize> = Vec::new();
344 let mut updates: Vec<(usize, K)> = Vec::new();
345 let mut additions = Vec::new();
346
347 for (idx, (k, av)) in a.iter().enumerate() {
348 if let Some(bv) = b.get(k) {
349 if !equals(av, bv) {
350 updates.push((idx, k.clone()));
351 }
352 } else {
353 deletions.push(idx);
354 }
355 }
356
357 for (k, v) in b {
358 if !a.contains_key(k) {
359 additions.push((k.clone(), v));
360 }
361 }
362
363 if !a.is_empty() {
365 self.push_uint(deletions.len() as u64);
366 for del_idx in &deletions {
367 self.push_uint(*del_idx as u64);
368 }
369 self.push_uint(updates.len() as u64);
370 for (upd_idx, key) in &updates {
371 self.push_uint(*upd_idx as u64);
372 val_diff(self, a.get(key).unwrap(), b.get(key).unwrap());
373 }
374 }
375
376 self.push_uint(additions.len() as u64);
378 for (k, v) in additions {
379 key_write(self, &k);
380 val_write(self, v);
381 }
382 }
383
384 #[inline]
386 pub fn finish(&mut self) -> Vec<u8> {
387 self.rle.write_to_buffer(&mut self.buffer);
388 let result = std::mem::take(&mut self.buffer);
389 self.buffer = Vec::with_capacity(64);
390 self.dict.clear();
391 self.rle.reset();
392 result
393 }
394}
395
396impl Default for Encoder {
397 fn default() -> Self {
398 Self::new()
399 }
400}
401
402#[cfg(test)]
403mod tests {
404 use super::*;
405
406 #[test]
407 fn test_encode_string() {
408 let mut encoder = Encoder::new();
409 encoder.push_string("hello");
410 let buf = encoder.finish();
411 assert!(!buf.is_empty());
412 }
413
414 #[test]
415 fn test_encode_string_dictionary() {
416 let mut encoder = Encoder::new();
417 encoder.push_string("hello");
418 encoder.push_string("world");
419 encoder.push_string("hello"); let buf = encoder.finish();
421 assert!(!buf.is_empty());
422 }
423
424 #[test]
425 fn test_encode_int() {
426 let mut encoder = Encoder::new();
427 encoder.push_int(42);
428 encoder.push_int(-100);
429 let buf = encoder.finish();
430 assert!(!buf.is_empty());
431 }
432
433 #[test]
434 fn test_encode_float() {
435 let mut encoder = Encoder::new();
436 encoder.push_float(3.14);
437 let buf = encoder.finish();
438 assert_eq!(buf.len(), 4 + 1); }
440
441 #[test]
442 fn test_encode_boolean() {
443 let mut encoder = Encoder::new();
444 encoder.push_boolean(true);
445 encoder.push_boolean(false);
446 encoder.push_boolean(true);
447 let buf = encoder.finish();
448 assert!(!buf.is_empty());
449 }
450}