Skip to main content

grafeo_core/storage/
delta.rs

1//! Delta encoding for sorted integer sequences.
2//!
3//! Instead of storing [100, 105, 107, 110], store the base (100) and the deltas
4//! [5, 2, 3]. The deltas are often tiny even when values are huge, making this
5//! a great first step before bit-packing.
6//!
7//! For signed integers, we use zig-zag encoding to map negative deltas to small
8//! positive numbers: 0→0, -1→1, 1→2, -2→3, etc.
9//!
10//! # Example
11//!
12//! ```ignore
13//! let values = vec![100u64, 105, 107, 110, 115];
14//! let encoded = DeltaEncoding::encode(&values);
15//! // base=100, deltas=[5, 2, 3, 5]
16//! assert_eq!(encoded.decode(), values);
17//! ```
18
19use std::io;
20
21/// Stores differences between consecutive values instead of the values themselves.
22///
23/// Pair this with [`BitPackedInts`](super::BitPackedInts) for maximum compression -
24/// use [`DeltaBitPacked`](super::DeltaBitPacked) for the combo.
25#[derive(Debug, Clone)]
26pub struct DeltaEncoding {
27    /// The first value in the sequence.
28    base: u64,
29    /// Deltas between consecutive values.
30    deltas: Vec<u64>,
31    /// Number of values (base + deltas.len()).
32    count: usize,
33}
34
35impl DeltaEncoding {
36    /// Encodes a slice of u64 values using delta encoding.
37    ///
38    /// # Requirements
39    ///
40    /// Values **must** be sorted in ascending order. Unsorted input will produce
41    /// incorrect results due to `saturating_sub` mapping negative deltas to zero.
42    /// A debug assertion checks this in debug builds.
43    ///
44    /// For signed or unsorted data, use [`encode_signed`](Self::encode_signed) instead.
45    #[must_use]
46    pub fn encode(values: &[u64]) -> Self {
47        if values.is_empty() {
48            return Self {
49                base: 0,
50                deltas: Vec::new(),
51                count: 0,
52            };
53        }
54
55        // Values must be sorted for unsigned delta encoding to work correctly
56        debug_assert!(
57            values.windows(2).all(|w| w[0] <= w[1]),
58            "DeltaEncoding::encode requires sorted input; use encode_signed for unsorted data"
59        );
60
61        let base = values[0];
62        let deltas: Vec<u64> = values
63            .windows(2)
64            .map(|w| w[1].saturating_sub(w[0]))
65            .collect();
66
67        Self {
68            base,
69            deltas,
70            count: values.len(),
71        }
72    }
73
74    /// Encodes a slice of i64 values using delta encoding.
75    ///
76    /// Computes signed deltas between consecutive values, then zig-zag encodes
77    /// the deltas to store them as unsigned integers.
78    #[must_use]
79    pub fn encode_signed(values: &[i64]) -> Self {
80        if values.is_empty() {
81            return Self {
82                base: 0,
83                deltas: Vec::new(),
84                count: 0,
85            };
86        }
87
88        // Store base as zig-zag encoded
89        let base = zigzag_encode(values[0]);
90
91        // Compute signed deltas, then zig-zag encode them
92        let deltas: Vec<u64> = values
93            .windows(2)
94            .map(|w| zigzag_encode(w[1] - w[0]))
95            .collect();
96
97        Self {
98            base,
99            deltas,
100            count: values.len(),
101        }
102    }
103
104    /// Decodes the delta-encoded values back to the original sequence.
105    #[must_use]
106    pub fn decode(&self) -> Vec<u64> {
107        if self.count == 0 {
108            return Vec::new();
109        }
110
111        let mut result = Vec::with_capacity(self.count);
112        let mut current = self.base;
113        result.push(current);
114
115        for &delta in &self.deltas {
116            current = current.wrapping_add(delta);
117            result.push(current);
118        }
119
120        result
121    }
122
123    /// Decodes to signed integers using zig-zag decoding.
124    ///
125    /// Assumes the encoding was created with `encode_signed`.
126    #[must_use]
127    pub fn decode_signed(&self) -> Vec<i64> {
128        if self.count == 0 {
129            return Vec::new();
130        }
131
132        let mut result = Vec::with_capacity(self.count);
133        let mut current = zigzag_decode(self.base);
134        result.push(current);
135
136        for &delta in &self.deltas {
137            current += zigzag_decode(delta);
138            result.push(current);
139        }
140
141        result
142    }
143
144    /// Returns the number of values.
145    #[must_use]
146    pub fn len(&self) -> usize {
147        self.count
148    }
149
150    /// Returns whether the encoding is empty.
151    #[must_use]
152    pub fn is_empty(&self) -> bool {
153        self.count == 0
154    }
155
156    /// Returns the base value.
157    #[must_use]
158    pub fn base(&self) -> u64 {
159        self.base
160    }
161
162    /// Returns the deltas.
163    #[must_use]
164    pub fn deltas(&self) -> &[u64] {
165        &self.deltas
166    }
167
168    /// Returns the maximum delta value.
169    ///
170    /// Useful for determining bit width for bit-packing.
171    #[must_use]
172    pub fn max_delta(&self) -> u64 {
173        self.deltas.iter().copied().max().unwrap_or(0)
174    }
175
176    /// Returns the number of bits needed to represent the largest delta.
177    #[must_use]
178    pub fn bits_for_max_delta(&self) -> u8 {
179        let max = self.max_delta();
180        if max == 0 {
181            1
182        } else {
183            64 - max.leading_zeros() as u8
184        }
185    }
186
187    /// Estimates the compression ratio.
188    ///
189    /// Returns the ratio of original size to compressed size.
190    #[must_use]
191    pub fn compression_ratio(&self) -> f64 {
192        if self.count == 0 {
193            return 1.0;
194        }
195
196        let original_size = self.count * 8; // 8 bytes per u64
197        let compressed_size = 8 + (self.deltas.len() * 8); // base + deltas
198
199        original_size as f64 / compressed_size as f64
200    }
201
202    /// Serializes the delta encoding to bytes.
203    pub fn to_bytes(&self) -> Vec<u8> {
204        let mut buf = Vec::with_capacity(8 + 4 + self.deltas.len() * 8);
205        buf.extend_from_slice(&self.base.to_le_bytes());
206        buf.extend_from_slice(&(self.count as u32).to_le_bytes());
207        for &delta in &self.deltas {
208            buf.extend_from_slice(&delta.to_le_bytes());
209        }
210        buf
211    }
212
213    /// Deserializes a delta encoding from bytes.
214    pub fn from_bytes(bytes: &[u8]) -> io::Result<Self> {
215        if bytes.len() < 12 {
216            return Err(io::Error::new(
217                io::ErrorKind::InvalidData,
218                "Delta encoding too short",
219            ));
220        }
221
222        let base = u64::from_le_bytes(bytes[0..8].try_into().unwrap());
223        let count = u32::from_le_bytes(bytes[8..12].try_into().unwrap()) as usize;
224
225        let expected_len = 12 + (count.saturating_sub(1)) * 8;
226        if bytes.len() < expected_len {
227            return Err(io::Error::new(
228                io::ErrorKind::InvalidData,
229                "Delta encoding truncated",
230            ));
231        }
232
233        let mut deltas = Vec::with_capacity(count.saturating_sub(1));
234        let mut offset = 12;
235        for _ in 1..count {
236            let delta = u64::from_le_bytes(bytes[offset..offset + 8].try_into().unwrap());
237            deltas.push(delta);
238            offset += 8;
239        }
240
241        Ok(Self {
242            base,
243            deltas,
244            count,
245        })
246    }
247}
248
249/// Zig-zag encodes a signed integer to unsigned.
250///
251/// Maps signed integers to unsigned: 0 -> 0, -1 -> 1, 1 -> 2, -2 -> 3, etc.
252#[inline]
253#[must_use]
254pub fn zigzag_encode(value: i64) -> u64 {
255    ((value << 1) ^ (value >> 63)) as u64
256}
257
258/// Zig-zag decodes an unsigned integer to signed.
259#[inline]
260#[must_use]
261pub fn zigzag_decode(value: u64) -> i64 {
262    ((value >> 1) as i64) ^ (-((value & 1) as i64))
263}
264
265#[cfg(test)]
266mod tests {
267    use super::*;
268
269    #[test]
270    fn test_delta_encode_decode() {
271        let values = vec![100u64, 105, 107, 110, 115, 120, 128, 130];
272        let encoded = DeltaEncoding::encode(&values);
273        let decoded = encoded.decode();
274        assert_eq!(values, decoded);
275    }
276
277    #[test]
278    fn test_delta_empty() {
279        let values: Vec<u64> = vec![];
280        let encoded = DeltaEncoding::encode(&values);
281        assert!(encoded.is_empty());
282        assert_eq!(encoded.decode(), values);
283    }
284
285    #[test]
286    fn test_delta_single_value() {
287        let values = vec![42u64];
288        let encoded = DeltaEncoding::encode(&values);
289        assert_eq!(encoded.len(), 1);
290        assert_eq!(encoded.decode(), values);
291    }
292
293    #[test]
294    fn test_delta_sequential() {
295        let values: Vec<u64> = (0..100).collect();
296        let encoded = DeltaEncoding::encode(&values);
297        assert_eq!(encoded.max_delta(), 1);
298        assert_eq!(encoded.bits_for_max_delta(), 1);
299        assert_eq!(encoded.decode(), values);
300    }
301
302    #[test]
303    fn test_delta_large_gaps() {
304        let values = vec![0u64, 1000, 2000, 3000];
305        let encoded = DeltaEncoding::encode(&values);
306        assert_eq!(encoded.max_delta(), 1000);
307        assert_eq!(encoded.decode(), values);
308    }
309
310    #[test]
311    fn test_zigzag_encode_decode() {
312        assert_eq!(zigzag_encode(0), 0);
313        assert_eq!(zigzag_encode(-1), 1);
314        assert_eq!(zigzag_encode(1), 2);
315        assert_eq!(zigzag_encode(-2), 3);
316        assert_eq!(zigzag_encode(2), 4);
317
318        for v in [-100i64, -50, -1, 0, 1, 50, 100, i64::MIN, i64::MAX] {
319            assert_eq!(zigzag_decode(zigzag_encode(v)), v);
320        }
321    }
322
323    #[test]
324    fn test_delta_signed() {
325        let values = vec![-100i64, -50, 0, 50, 100];
326        let encoded = DeltaEncoding::encode_signed(&values);
327        let decoded = encoded.decode_signed();
328        assert_eq!(values, decoded);
329    }
330
331    #[test]
332    fn test_delta_serialization() {
333        let values = vec![100u64, 105, 107, 110, 115];
334        let encoded = DeltaEncoding::encode(&values);
335        let bytes = encoded.to_bytes();
336        let restored = DeltaEncoding::from_bytes(&bytes).unwrap();
337        assert_eq!(encoded.decode(), restored.decode());
338    }
339
340    #[test]
341    fn test_compression_ratio() {
342        // Sequential values should compress well
343        let sequential: Vec<u64> = (0..1000).collect();
344        let encoded = DeltaEncoding::encode(&sequential);
345        // Each delta is 1, so compression is minimal but base + deltas is stored
346        assert!(encoded.len() == 1000);
347    }
348}