Skip to main content

grafeo_core/storage/
delta.rs

1//! Delta encoding for sorted integer sequences.
2//!
3//! Instead of storing [100, 105, 107, 110], store the base (100) and the deltas
4//! [5, 2, 3]. The deltas are often tiny even when values are huge, making this
5//! a great first step before bit-packing.
6//!
7//! For signed integers, we use zig-zag encoding to map negative deltas to small
8//! positive numbers: 0→0, -1→1, 1→2, -2→3, etc.
9//!
10//! # Example
11//!
12//! ```ignore
13//! let values = vec![100u64, 105, 107, 110, 115];
14//! let encoded = DeltaEncoding::encode(&values);
15//! // base=100, deltas=[5, 2, 3, 5]
16//! assert_eq!(encoded.decode(), values);
17//! ```
18
19use std::io;
20
21/// Stores differences between consecutive values instead of the values themselves.
22///
23/// Pair this with [`BitPackedInts`](super::BitPackedInts) for maximum compression -
24/// use [`DeltaBitPacked`](super::DeltaBitPacked) for the combo.
25#[derive(Debug, Clone)]
26pub struct DeltaEncoding {
27    /// The first value in the sequence.
28    base: u64,
29    /// Deltas between consecutive values.
30    deltas: Vec<u64>,
31    /// Number of values (base + deltas.len()).
32    count: usize,
33}
34
35impl DeltaEncoding {
36    /// Encodes a slice of u64 values using delta encoding.
37    ///
38    /// For best compression, values should be sorted in ascending order.
39    #[must_use]
40    pub fn encode(values: &[u64]) -> Self {
41        if values.is_empty() {
42            return Self {
43                base: 0,
44                deltas: Vec::new(),
45                count: 0,
46            };
47        }
48
49        let base = values[0];
50        let deltas: Vec<u64> = values
51            .windows(2)
52            .map(|w| w[1].saturating_sub(w[0]))
53            .collect();
54
55        Self {
56            base,
57            deltas,
58            count: values.len(),
59        }
60    }
61
62    /// Encodes a slice of i64 values using delta encoding.
63    ///
64    /// Computes signed deltas between consecutive values, then zig-zag encodes
65    /// the deltas to store them as unsigned integers.
66    #[must_use]
67    pub fn encode_signed(values: &[i64]) -> Self {
68        if values.is_empty() {
69            return Self {
70                base: 0,
71                deltas: Vec::new(),
72                count: 0,
73            };
74        }
75
76        // Store base as zig-zag encoded
77        let base = zigzag_encode(values[0]);
78
79        // Compute signed deltas, then zig-zag encode them
80        let deltas: Vec<u64> = values
81            .windows(2)
82            .map(|w| zigzag_encode(w[1] - w[0]))
83            .collect();
84
85        Self {
86            base,
87            deltas,
88            count: values.len(),
89        }
90    }
91
92    /// Decodes the delta-encoded values back to the original sequence.
93    #[must_use]
94    pub fn decode(&self) -> Vec<u64> {
95        if self.count == 0 {
96            return Vec::new();
97        }
98
99        let mut result = Vec::with_capacity(self.count);
100        let mut current = self.base;
101        result.push(current);
102
103        for &delta in &self.deltas {
104            current = current.wrapping_add(delta);
105            result.push(current);
106        }
107
108        result
109    }
110
111    /// Decodes to signed integers using zig-zag decoding.
112    ///
113    /// Assumes the encoding was created with `encode_signed`.
114    #[must_use]
115    pub fn decode_signed(&self) -> Vec<i64> {
116        if self.count == 0 {
117            return Vec::new();
118        }
119
120        let mut result = Vec::with_capacity(self.count);
121        let mut current = zigzag_decode(self.base);
122        result.push(current);
123
124        for &delta in &self.deltas {
125            current += zigzag_decode(delta);
126            result.push(current);
127        }
128
129        result
130    }
131
132    /// Returns the number of values.
133    #[must_use]
134    pub fn len(&self) -> usize {
135        self.count
136    }
137
138    /// Returns whether the encoding is empty.
139    #[must_use]
140    pub fn is_empty(&self) -> bool {
141        self.count == 0
142    }
143
144    /// Returns the base value.
145    #[must_use]
146    pub fn base(&self) -> u64 {
147        self.base
148    }
149
150    /// Returns the deltas.
151    #[must_use]
152    pub fn deltas(&self) -> &[u64] {
153        &self.deltas
154    }
155
156    /// Returns the maximum delta value.
157    ///
158    /// Useful for determining bit width for bit-packing.
159    #[must_use]
160    pub fn max_delta(&self) -> u64 {
161        self.deltas.iter().copied().max().unwrap_or(0)
162    }
163
164    /// Returns the number of bits needed to represent the largest delta.
165    #[must_use]
166    pub fn bits_for_max_delta(&self) -> u8 {
167        let max = self.max_delta();
168        if max == 0 {
169            1
170        } else {
171            64 - max.leading_zeros() as u8
172        }
173    }
174
175    /// Estimates the compression ratio.
176    ///
177    /// Returns the ratio of original size to compressed size.
178    #[must_use]
179    pub fn compression_ratio(&self) -> f64 {
180        if self.count == 0 {
181            return 1.0;
182        }
183
184        let original_size = self.count * 8; // 8 bytes per u64
185        let compressed_size = 8 + (self.deltas.len() * 8); // base + deltas
186
187        original_size as f64 / compressed_size as f64
188    }
189
190    /// Serializes the delta encoding to bytes.
191    pub fn to_bytes(&self) -> Vec<u8> {
192        let mut buf = Vec::with_capacity(8 + 4 + self.deltas.len() * 8);
193        buf.extend_from_slice(&self.base.to_le_bytes());
194        buf.extend_from_slice(&(self.count as u32).to_le_bytes());
195        for &delta in &self.deltas {
196            buf.extend_from_slice(&delta.to_le_bytes());
197        }
198        buf
199    }
200
201    /// Deserializes a delta encoding from bytes.
202    pub fn from_bytes(bytes: &[u8]) -> io::Result<Self> {
203        if bytes.len() < 12 {
204            return Err(io::Error::new(
205                io::ErrorKind::InvalidData,
206                "Delta encoding too short",
207            ));
208        }
209
210        let base = u64::from_le_bytes(bytes[0..8].try_into().unwrap());
211        let count = u32::from_le_bytes(bytes[8..12].try_into().unwrap()) as usize;
212
213        let expected_len = 12 + (count.saturating_sub(1)) * 8;
214        if bytes.len() < expected_len {
215            return Err(io::Error::new(
216                io::ErrorKind::InvalidData,
217                "Delta encoding truncated",
218            ));
219        }
220
221        let mut deltas = Vec::with_capacity(count.saturating_sub(1));
222        let mut offset = 12;
223        for _ in 1..count {
224            let delta = u64::from_le_bytes(bytes[offset..offset + 8].try_into().unwrap());
225            deltas.push(delta);
226            offset += 8;
227        }
228
229        Ok(Self {
230            base,
231            deltas,
232            count,
233        })
234    }
235}
236
237/// Zig-zag encodes a signed integer to unsigned.
238///
239/// Maps signed integers to unsigned: 0 -> 0, -1 -> 1, 1 -> 2, -2 -> 3, etc.
240#[inline]
241#[must_use]
242pub fn zigzag_encode(value: i64) -> u64 {
243    ((value << 1) ^ (value >> 63)) as u64
244}
245
246/// Zig-zag decodes an unsigned integer to signed.
247#[inline]
248#[must_use]
249pub fn zigzag_decode(value: u64) -> i64 {
250    ((value >> 1) as i64) ^ (-((value & 1) as i64))
251}
252
253#[cfg(test)]
254mod tests {
255    use super::*;
256
257    #[test]
258    fn test_delta_encode_decode() {
259        let values = vec![100u64, 105, 107, 110, 115, 120, 128, 130];
260        let encoded = DeltaEncoding::encode(&values);
261        let decoded = encoded.decode();
262        assert_eq!(values, decoded);
263    }
264
265    #[test]
266    fn test_delta_empty() {
267        let values: Vec<u64> = vec![];
268        let encoded = DeltaEncoding::encode(&values);
269        assert!(encoded.is_empty());
270        assert_eq!(encoded.decode(), values);
271    }
272
273    #[test]
274    fn test_delta_single_value() {
275        let values = vec![42u64];
276        let encoded = DeltaEncoding::encode(&values);
277        assert_eq!(encoded.len(), 1);
278        assert_eq!(encoded.decode(), values);
279    }
280
281    #[test]
282    fn test_delta_sequential() {
283        let values: Vec<u64> = (0..100).collect();
284        let encoded = DeltaEncoding::encode(&values);
285        assert_eq!(encoded.max_delta(), 1);
286        assert_eq!(encoded.bits_for_max_delta(), 1);
287        assert_eq!(encoded.decode(), values);
288    }
289
290    #[test]
291    fn test_delta_large_gaps() {
292        let values = vec![0u64, 1000, 2000, 3000];
293        let encoded = DeltaEncoding::encode(&values);
294        assert_eq!(encoded.max_delta(), 1000);
295        assert_eq!(encoded.decode(), values);
296    }
297
298    #[test]
299    fn test_zigzag_encode_decode() {
300        assert_eq!(zigzag_encode(0), 0);
301        assert_eq!(zigzag_encode(-1), 1);
302        assert_eq!(zigzag_encode(1), 2);
303        assert_eq!(zigzag_encode(-2), 3);
304        assert_eq!(zigzag_encode(2), 4);
305
306        for v in [-100i64, -50, -1, 0, 1, 50, 100, i64::MIN, i64::MAX] {
307            assert_eq!(zigzag_decode(zigzag_encode(v)), v);
308        }
309    }
310
311    #[test]
312    fn test_delta_signed() {
313        let values = vec![-100i64, -50, 0, 50, 100];
314        let encoded = DeltaEncoding::encode_signed(&values);
315        let decoded = encoded.decode_signed();
316        assert_eq!(values, decoded);
317    }
318
319    #[test]
320    fn test_delta_serialization() {
321        let values = vec![100u64, 105, 107, 110, 115];
322        let encoded = DeltaEncoding::encode(&values);
323        let bytes = encoded.to_bytes();
324        let restored = DeltaEncoding::from_bytes(&bytes).unwrap();
325        assert_eq!(encoded.decode(), restored.decode());
326    }
327
328    #[test]
329    fn test_compression_ratio() {
330        // Sequential values should compress well
331        let sequential: Vec<u64> = (0..1000).collect();
332        let encoded = DeltaEncoding::encode(&sequential);
333        // Each delta is 1, so compression is minimal but base + deltas is stored
334        assert!(encoded.len() == 1000);
335    }
336}