Skip to main content

oxirs_vec/
delta_encoder.rs

1// Delta encoding for vector updates (v1.1.0 round 11)
2//
3// Implements incremental embedding updates via delta encoding:
4// delta = new - old, with optional compression and quantization.
5
6/// A delta (difference) between two version of a vector
7#[derive(Debug, Clone)]
8pub struct VectorDelta {
9    /// Identifier of the vector this delta belongs to
10    pub id: usize,
11    /// Per-component differences (`delta[i] = new[i] - old[i]`)
12    pub delta: Vec<f32>,
13    /// Scale factor used for quantization (max absolute value of the delta)
14    pub scale: f32,
15    /// Creation timestamp (milliseconds since epoch or arbitrary counter)
16    pub timestamp: u64,
17}
18
19impl VectorDelta {
20    /// Create a new delta directly
21    pub fn new(id: usize, delta: Vec<f32>, scale: f32, timestamp: u64) -> Self {
22        Self {
23            id,
24            delta,
25            scale,
26            timestamp,
27        }
28    }
29
30    /// True if all components are zero
31    pub fn is_zero(&self) -> bool {
32        self.delta.iter().all(|&v| v == 0.0)
33    }
34}
35
36/// Statistics about a sequence of deltas
37#[derive(Debug, Clone)]
38pub struct DeltaStats {
39    pub total_deltas: usize,
40    pub applied: usize,
41    /// RMS magnitude across all deltas
42    pub magnitude: f32,
43    /// Mean absolute change per component per delta
44    pub mean_change: f32,
45}
46
47/// Stateless delta encoding utilities
48pub struct DeltaEncoder;
49
50impl DeltaEncoder {
51    /// Encode the difference between `old` and `new` as a `VectorDelta`.
52    ///
53    /// `scale` is set to the maximum absolute value in the delta (useful for quantization).
54    /// If `old` and `new` have different lengths, the shorter is padded with 0.0.
55    pub fn encode(old: &[f32], new: &[f32]) -> VectorDelta {
56        let len = old.len().max(new.len());
57        let delta: Vec<f32> = (0..len)
58            .map(|i| {
59                let o = old.get(i).copied().unwrap_or(0.0);
60                let n = new.get(i).copied().unwrap_or(0.0);
61                n - o
62            })
63            .collect();
64
65        let scale = delta.iter().map(|v| v.abs()).fold(0.0_f32, f32::max);
66
67        VectorDelta::new(0, delta, scale, 0)
68    }
69
70    /// Apply a single delta to `base`, returning the updated vector.
71    ///
72    /// If `base` and `delta.delta` have different lengths, the shorter is padded.
73    pub fn apply(base: &[f32], delta: &VectorDelta) -> Vec<f32> {
74        let len = base.len().max(delta.delta.len());
75        (0..len)
76            .map(|i| {
77                let b = base.get(i).copied().unwrap_or(0.0);
78                let d = delta.delta.get(i).copied().unwrap_or(0.0);
79                b + d
80            })
81            .collect()
82    }
83
84    /// Apply a sequence of deltas in order to `base`.
85    pub fn apply_sequence(base: &[f32], deltas: &[VectorDelta]) -> Vec<f32> {
86        deltas
87            .iter()
88            .fold(base.to_vec(), |acc, d| Self::apply(&acc, d))
89    }
90
91    /// Zero out delta components whose absolute value is below `threshold`.
92    pub fn compress(delta: &VectorDelta, threshold: f32) -> VectorDelta {
93        let compressed: Vec<f32> = delta
94            .delta
95            .iter()
96            .map(|&v| if v.abs() < threshold { 0.0 } else { v })
97            .collect();
98
99        let new_scale = compressed.iter().map(|v| v.abs()).fold(0.0_f32, f32::max);
100
101        VectorDelta::new(delta.id, compressed, new_scale, delta.timestamp)
102    }
103
104    /// Merge multiple deltas into a single combined delta by summing components.
105    ///
106    /// Returns `None` if `deltas` is empty.
107    /// The merged delta gets `id = 0`, `timestamp = 0`, and the scale of the sum.
108    pub fn merge(deltas: &[VectorDelta]) -> Option<VectorDelta> {
109        if deltas.is_empty() {
110            return None;
111        }
112
113        let len = deltas.iter().map(|d| d.delta.len()).max().unwrap_or(0);
114        let mut sum = vec![0.0f32; len];
115
116        for d in deltas {
117            for (i, &v) in d.delta.iter().enumerate() {
118                sum[i] += v;
119            }
120        }
121
122        let scale = sum.iter().map(|v| v.abs()).fold(0.0_f32, f32::max);
123        Some(VectorDelta::new(0, sum, scale, 0))
124    }
125
126    /// Compute statistics about a sequence of deltas.
127    pub fn stats(deltas: &[VectorDelta]) -> DeltaStats {
128        let total_deltas = deltas.len();
129
130        if deltas.is_empty() {
131            return DeltaStats {
132                total_deltas: 0,
133                applied: 0,
134                magnitude: 0.0,
135                mean_change: 0.0,
136            };
137        }
138
139        // Magnitude: RMS of all delta components across all deltas
140        let all_vals: Vec<f32> = deltas
141            .iter()
142            .flat_map(|d| d.delta.iter().copied())
143            .collect();
144        let sq_sum: f32 = all_vals.iter().map(|v| v * v).sum();
145        let magnitude = if all_vals.is_empty() {
146            0.0
147        } else {
148            (sq_sum / all_vals.len() as f32).sqrt()
149        };
150
151        // Mean absolute change
152        let abs_sum: f32 = all_vals.iter().map(|v| v.abs()).sum();
153        let mean_change = if all_vals.is_empty() {
154            0.0
155        } else {
156            abs_sum / all_vals.len() as f32
157        };
158
159        DeltaStats {
160            total_deltas,
161            applied: total_deltas,
162            magnitude,
163            mean_change,
164        }
165    }
166
167    /// Compute the L2 magnitude of a single delta (Euclidean norm).
168    pub fn magnitude(delta: &VectorDelta) -> f32 {
169        let sq_sum: f32 = delta.delta.iter().map(|v| v * v).sum();
170        sq_sum.sqrt()
171    }
172
173    /// Quantize the delta to int8 range [-127, 127].
174    ///
175    /// Uses the stored `scale` as the normalisation factor.
176    /// A scale of 0.0 results in all-zero output.
177    pub fn quantize_to_i8(delta: &VectorDelta) -> Vec<i8> {
178        if delta.scale == 0.0 {
179            return vec![0i8; delta.delta.len()];
180        }
181        delta
182            .delta
183            .iter()
184            .map(|&v| {
185                let quantized = (v / delta.scale * 127.0).round();
186                quantized.clamp(-127.0, 127.0) as i8
187            })
188            .collect()
189    }
190
191    /// Reconstruct a `VectorDelta` from int8 quantized values.
192    /// `scale` must be the same scale used during `quantize_to_i8`.
193    pub fn from_i8(quantized: &[i8], scale: f32, id: usize, timestamp: u64) -> VectorDelta {
194        let delta: Vec<f32> = quantized
195            .iter()
196            .map(|&v| (v as f32) / 127.0 * scale)
197            .collect();
198        VectorDelta::new(id, delta, scale, timestamp)
199    }
200}
201
202#[cfg(test)]
203mod tests {
204    use super::*;
205
206    const EPS: f32 = 1e-5;
207
208    fn approx_eq(a: f32, b: f32) -> bool {
209        (a - b).abs() < EPS
210    }
211
212    fn approx_vec(a: &[f32], b: &[f32]) -> bool {
213        a.len() == b.len() && a.iter().zip(b.iter()).all(|(x, y)| approx_eq(*x, *y))
214    }
215
216    // ── encode / apply round-trip ─────────────────────────────────────────
217
218    #[test]
219    fn test_encode_apply_round_trip() {
220        let old = vec![1.0, 2.0, 3.0];
221        let new = vec![2.0, 3.0, 4.0];
222        let delta = DeltaEncoder::encode(&old, &new);
223        let result = DeltaEncoder::apply(&old, &delta);
224        assert!(approx_vec(&result, &new));
225    }
226
227    #[test]
228    fn test_encode_zero_delta() {
229        let v = vec![1.0, 2.0, 3.0];
230        let delta = DeltaEncoder::encode(&v, &v);
231        assert!(delta.is_zero());
232        assert!(approx_eq(delta.scale, 0.0));
233    }
234
235    #[test]
236    fn test_encode_delta_values() {
237        let old = vec![0.0, 0.0, 0.0];
238        let new = vec![1.0, -2.0, 3.0];
239        let delta = DeltaEncoder::encode(&old, &new);
240        assert!(approx_eq(delta.delta[0], 1.0));
241        assert!(approx_eq(delta.delta[1], -2.0));
242        assert!(approx_eq(delta.delta[2], 3.0));
243    }
244
245    #[test]
246    fn test_encode_scale_is_max_abs() {
247        let old = vec![0.0, 0.0, 0.0];
248        let new = vec![1.0, 5.0, -3.0];
249        let delta = DeltaEncoder::encode(&old, &new);
250        assert!(approx_eq(delta.scale, 5.0));
251    }
252
253    #[test]
254    fn test_encode_different_lengths_pads_shorter() {
255        let old = vec![1.0, 2.0];
256        let new = vec![2.0, 3.0, 4.0];
257        let delta = DeltaEncoder::encode(&old, &new);
258        assert_eq!(delta.delta.len(), 3);
259        assert!(approx_eq(delta.delta[2], 4.0)); // 4.0 - 0.0
260    }
261
262    // ── apply ─────────────────────────────────────────────────────────────
263
264    #[test]
265    fn test_apply_zero_delta_unchanged() {
266        let base = vec![1.0, 2.0, 3.0];
267        let delta = VectorDelta::new(0, vec![0.0, 0.0, 0.0], 0.0, 0);
268        let result = DeltaEncoder::apply(&base, &delta);
269        assert!(approx_vec(&result, &base));
270    }
271
272    #[test]
273    fn test_apply_positive_delta() {
274        let base = vec![1.0, 2.0];
275        let delta = VectorDelta::new(0, vec![0.5, -0.5], 0.5, 0);
276        let result = DeltaEncoder::apply(&base, &delta);
277        assert!(approx_eq(result[0], 1.5));
278        assert!(approx_eq(result[1], 1.5));
279    }
280
281    // ── apply_sequence ────────────────────────────────────────────────────
282
283    #[test]
284    fn test_apply_sequence_ordered() {
285        let base = vec![0.0, 0.0];
286        let d1 = VectorDelta::new(0, vec![1.0, 2.0], 2.0, 1);
287        let d2 = VectorDelta::new(0, vec![0.5, 0.5], 0.5, 2);
288        let result = DeltaEncoder::apply_sequence(&base, &[d1, d2]);
289        assert!(approx_eq(result[0], 1.5));
290        assert!(approx_eq(result[1], 2.5));
291    }
292
293    #[test]
294    fn test_apply_sequence_empty_deltas() {
295        let base = vec![1.0, 2.0, 3.0];
296        let result = DeltaEncoder::apply_sequence(&base, &[]);
297        assert!(approx_vec(&result, &base));
298    }
299
300    #[test]
301    fn test_apply_sequence_single_delta() {
302        let base = vec![1.0];
303        let d = VectorDelta::new(0, vec![1.0], 1.0, 0);
304        let result = DeltaEncoder::apply_sequence(&base, &[d]);
305        assert!(approx_eq(result[0], 2.0));
306    }
307
308    // ── compress ──────────────────────────────────────────────────────────
309
310    #[test]
311    fn test_compress_zeroes_below_threshold() {
312        let delta = VectorDelta::new(0, vec![0.5, 0.01, -0.001, 1.0], 1.0, 0);
313        let compressed = DeltaEncoder::compress(&delta, 0.05);
314        assert!(approx_eq(compressed.delta[1], 0.0));
315        assert!(approx_eq(compressed.delta[2], 0.0));
316        assert!(approx_eq(compressed.delta[0], 0.5));
317        assert!(approx_eq(compressed.delta[3], 1.0));
318    }
319
320    #[test]
321    fn test_compress_threshold_zero_unchanged() {
322        let delta = VectorDelta::new(0, vec![0.5, 0.01, 1.0], 1.0, 0);
323        let compressed = DeltaEncoder::compress(&delta, 0.0);
324        assert!(approx_vec(&compressed.delta, &delta.delta));
325    }
326
327    #[test]
328    fn test_compress_all_below_threshold() {
329        let delta = VectorDelta::new(0, vec![0.001, 0.002, 0.003], 0.003, 0);
330        let compressed = DeltaEncoder::compress(&delta, 0.01);
331        assert!(compressed.is_zero());
332        assert!(approx_eq(compressed.scale, 0.0));
333    }
334
335    // ── merge ─────────────────────────────────────────────────────────────
336
337    #[test]
338    fn test_merge_empty_returns_none() {
339        assert!(DeltaEncoder::merge(&[]).is_none());
340    }
341
342    #[test]
343    fn test_merge_single_delta() {
344        let d = VectorDelta::new(0, vec![1.0, 2.0], 2.0, 0);
345        let merged = DeltaEncoder::merge(&[d]).unwrap();
346        assert!(approx_eq(merged.delta[0], 1.0));
347        assert!(approx_eq(merged.delta[1], 2.0));
348    }
349
350    #[test]
351    fn test_merge_sums_deltas() {
352        let d1 = VectorDelta::new(0, vec![1.0, 2.0], 2.0, 0);
353        let d2 = VectorDelta::new(0, vec![3.0, 4.0], 4.0, 0);
354        let merged = DeltaEncoder::merge(&[d1, d2]).unwrap();
355        assert!(approx_eq(merged.delta[0], 4.0));
356        assert!(approx_eq(merged.delta[1], 6.0));
357    }
358
359    #[test]
360    fn test_merge_scale_is_max_abs_sum() {
361        let d1 = VectorDelta::new(0, vec![1.0], 1.0, 0);
362        let d2 = VectorDelta::new(0, vec![-3.0], 3.0, 0);
363        let merged = DeltaEncoder::merge(&[d1, d2]).unwrap();
364        // sum = -2.0 → scale = 2.0
365        assert!(approx_eq(merged.scale, 2.0));
366    }
367
368    // ── stats ─────────────────────────────────────────────────────────────
369
370    #[test]
371    fn test_stats_empty() {
372        let s = DeltaEncoder::stats(&[]);
373        assert_eq!(s.total_deltas, 0);
374        assert!(approx_eq(s.magnitude, 0.0));
375    }
376
377    #[test]
378    fn test_stats_total_deltas() {
379        let deltas: Vec<_> = (0..3)
380            .map(|i| VectorDelta::new(i, vec![1.0, 1.0], 1.0, 0))
381            .collect();
382        let s = DeltaEncoder::stats(&deltas);
383        assert_eq!(s.total_deltas, 3);
384    }
385
386    #[test]
387    fn test_stats_magnitude_positive() {
388        let d = VectorDelta::new(0, vec![1.0, 0.0], 1.0, 0);
389        let s = DeltaEncoder::stats(&[d]);
390        assert!(s.magnitude > 0.0);
391    }
392
393    #[test]
394    fn test_stats_mean_change_positive() {
395        let d = VectorDelta::new(0, vec![1.0, 2.0], 2.0, 0);
396        let s = DeltaEncoder::stats(&[d]);
397        assert!(s.mean_change > 0.0);
398    }
399
400    // ── quantize_to_i8 / from_i8 ─────────────────────────────────────────
401
402    #[test]
403    fn test_quantize_to_i8_range() {
404        let delta = VectorDelta::new(0, vec![1.0, -1.0, 0.5, -0.5], 1.0, 0);
405        let q = DeltaEncoder::quantize_to_i8(&delta);
406        for &v in &q {
407            assert!(v >= -127);
408        }
409    }
410
411    #[test]
412    fn test_quantize_max_value() {
413        let delta = VectorDelta::new(0, vec![1.0], 1.0, 0);
414        let q = DeltaEncoder::quantize_to_i8(&delta);
415        assert_eq!(q[0], 127);
416    }
417
418    #[test]
419    fn test_quantize_min_value() {
420        let delta = VectorDelta::new(0, vec![-1.0], 1.0, 0);
421        let q = DeltaEncoder::quantize_to_i8(&delta);
422        assert_eq!(q[0], -127);
423    }
424
425    #[test]
426    fn test_quantize_zero_scale_all_zeros() {
427        let delta = VectorDelta::new(0, vec![0.0, 0.0], 0.0, 0);
428        let q = DeltaEncoder::quantize_to_i8(&delta);
429        assert!(q.iter().all(|&v| v == 0));
430    }
431
432    #[test]
433    fn test_from_i8_reconstruction_approx() {
434        let original = vec![1.0, -1.0, 0.5];
435        let old = vec![0.0, 0.0, 0.0];
436        let delta = DeltaEncoder::encode(&old, &original);
437        let q = DeltaEncoder::quantize_to_i8(&delta);
438        let reconstructed = DeltaEncoder::from_i8(&q, delta.scale, 0, 0);
439        // Reconstruction should be close (within quantization error ≈ scale/127)
440        for (a, b) in original.iter().zip(reconstructed.delta.iter()) {
441            assert!(
442                (a - b).abs() < delta.scale / 100.0 + 1e-4,
443                "Reconstruction error too large: {a} vs {b}"
444            );
445        }
446    }
447
448    #[test]
449    fn test_from_i8_preserves_id_and_timestamp() {
450        let q = vec![10i8, -20i8];
451        let d = DeltaEncoder::from_i8(&q, 1.0, 42, 99999);
452        assert_eq!(d.id, 42);
453        assert_eq!(d.timestamp, 99999);
454    }
455
456    // ── magnitude ─────────────────────────────────────────────────────────
457
458    #[test]
459    fn test_magnitude_zero_delta() {
460        let delta = VectorDelta::new(0, vec![0.0, 0.0, 0.0], 0.0, 0);
461        assert!(approx_eq(DeltaEncoder::magnitude(&delta), 0.0));
462    }
463
464    #[test]
465    fn test_magnitude_unit_vector() {
466        let delta = VectorDelta::new(0, vec![1.0, 0.0, 0.0], 1.0, 0);
467        assert!(approx_eq(DeltaEncoder::magnitude(&delta), 1.0));
468    }
469
470    #[test]
471    fn test_magnitude_known_value() {
472        // [3, 4] → magnitude = 5
473        let delta = VectorDelta::new(0, vec![3.0, 4.0], 4.0, 0);
474        assert!(approx_eq(DeltaEncoder::magnitude(&delta), 5.0));
475    }
476
477    // ── VectorDelta helpers ───────────────────────────────────────────────
478
479    #[test]
480    fn test_vector_delta_is_zero_true() {
481        let d = VectorDelta::new(0, vec![0.0, 0.0], 0.0, 0);
482        assert!(d.is_zero());
483    }
484
485    #[test]
486    fn test_vector_delta_is_zero_false() {
487        let d = VectorDelta::new(0, vec![0.0, 1.0], 1.0, 0);
488        assert!(!d.is_zero());
489    }
490
491    #[test]
492    fn test_encode_empty_vectors() {
493        let delta = DeltaEncoder::encode(&[], &[]);
494        assert!(delta.delta.is_empty());
495        assert!(approx_eq(delta.scale, 0.0));
496    }
497
498    // ── additional coverage ───────────────────────────────────────────────
499
500    #[test]
501    fn test_encode_preserves_length_when_equal() {
502        let old = vec![1.0, 2.0, 3.0, 4.0];
503        let new = vec![5.0, 6.0, 7.0, 8.0];
504        let delta = DeltaEncoder::encode(&old, &new);
505        assert_eq!(delta.delta.len(), 4);
506    }
507
508    #[test]
509    fn test_apply_extends_base_for_longer_delta() {
510        let base = vec![1.0];
511        let delta = VectorDelta::new(0, vec![0.5, 0.5, 0.5], 0.5, 0);
512        let result = DeltaEncoder::apply(&base, &delta);
513        assert_eq!(result.len(), 3);
514        assert!(approx_eq(result[0], 1.5));
515        assert!(approx_eq(result[1], 0.5)); // 0.0 + 0.5
516        assert!(approx_eq(result[2], 0.5)); // 0.0 + 0.5
517    }
518
519    #[test]
520    fn test_apply_sequence_three_deltas() {
521        let base = vec![0.0];
522        let d1 = VectorDelta::new(0, vec![1.0], 1.0, 1);
523        let d2 = VectorDelta::new(0, vec![2.0], 2.0, 2);
524        let d3 = VectorDelta::new(0, vec![3.0], 3.0, 3);
525        let result = DeltaEncoder::apply_sequence(&base, &[d1, d2, d3]);
526        assert!(approx_eq(result[0], 6.0));
527    }
528
529    #[test]
530    fn test_merge_three_deltas_sums_all() {
531        let d1 = VectorDelta::new(1, vec![1.0, 0.0], 1.0, 0);
532        let d2 = VectorDelta::new(2, vec![2.0, 0.0], 2.0, 0);
533        let d3 = VectorDelta::new(3, vec![3.0, 0.0], 3.0, 0);
534        let merged = DeltaEncoder::merge(&[d1, d2, d3]).unwrap();
535        assert!(approx_eq(merged.delta[0], 6.0));
536        assert_eq!(merged.id, 0); // merged id is always 0
537    }
538
539    #[test]
540    fn test_quantize_mid_value() {
541        // 0.5 / 1.0 * 127 = 63.5 → rounds to 64
542        let delta = VectorDelta::new(0, vec![0.5], 1.0, 0);
543        let q = DeltaEncoder::quantize_to_i8(&delta);
544        assert_eq!(q[0], 64);
545    }
546
547    #[test]
548    fn test_from_i8_zero_values() {
549        let q = vec![0i8, 0i8, 0i8];
550        let d = DeltaEncoder::from_i8(&q, 2.0, 0, 0);
551        assert!(d.delta.iter().all(|&v| approx_eq(v, 0.0)));
552    }
553
554    #[test]
555    fn test_stats_applied_equals_total() {
556        let deltas: Vec<_> = (0..5)
557            .map(|i| VectorDelta::new(i, vec![1.0], 1.0, 0))
558            .collect();
559        let s = DeltaEncoder::stats(&deltas);
560        assert_eq!(s.applied, s.total_deltas);
561        assert_eq!(s.total_deltas, 5);
562    }
563}