Skip to main content

graphos_core/storage/
delta.rs

1//! Delta encoding for sorted integer sequences.
2//!
3//! Delta encoding stores the differences between consecutive values rather than
4//! the values themselves. This is highly effective for sorted or sequential data
5//! where consecutive values are close to each other.
6//!
7//! # Example
8//!
9//! ```ignore
10//! // Original: [100, 105, 107, 110, 115]
11//! // Deltas:   [5, 2, 3, 5]
12//! // Stored:   base=100, deltas=[5, 2, 3, 5]
13//!
14//! let values = vec![100u64, 105, 107, 110, 115];
15//! let encoded = DeltaEncoding::encode(&values);
16//! let decoded = encoded.decode();
17//! assert_eq!(values, decoded);
18//! ```
19
20use std::io;
21
22/// Delta-encoded integer sequence.
23///
24/// Stores a base value and the deltas between consecutive values.
25/// Effective for sorted sequences where deltas are small.
26#[derive(Debug, Clone)]
27pub struct DeltaEncoding {
28    /// The first value in the sequence.
29    base: u64,
30    /// Deltas between consecutive values.
31    deltas: Vec<u64>,
32    /// Number of values (base + deltas.len()).
33    count: usize,
34}
35
36impl DeltaEncoding {
37    /// Encodes a slice of u64 values using delta encoding.
38    ///
39    /// For best compression, values should be sorted in ascending order.
40    #[must_use]
41    pub fn encode(values: &[u64]) -> Self {
42        if values.is_empty() {
43            return Self {
44                base: 0,
45                deltas: Vec::new(),
46                count: 0,
47            };
48        }
49
50        let base = values[0];
51        let deltas: Vec<u64> = values
52            .windows(2)
53            .map(|w| w[1].saturating_sub(w[0]))
54            .collect();
55
56        Self {
57            base,
58            deltas,
59            count: values.len(),
60        }
61    }
62
63    /// Encodes a slice of i64 values using delta encoding.
64    ///
65    /// Computes signed deltas between consecutive values, then zig-zag encodes
66    /// the deltas to store them as unsigned integers.
67    #[must_use]
68    pub fn encode_signed(values: &[i64]) -> Self {
69        if values.is_empty() {
70            return Self {
71                base: 0,
72                deltas: Vec::new(),
73                count: 0,
74            };
75        }
76
77        // Store base as zig-zag encoded
78        let base = zigzag_encode(values[0]);
79
80        // Compute signed deltas, then zig-zag encode them
81        let deltas: Vec<u64> = values
82            .windows(2)
83            .map(|w| zigzag_encode(w[1] - w[0]))
84            .collect();
85
86        Self {
87            base,
88            deltas,
89            count: values.len(),
90        }
91    }
92
93    /// Decodes the delta-encoded values back to the original sequence.
94    #[must_use]
95    pub fn decode(&self) -> Vec<u64> {
96        if self.count == 0 {
97            return Vec::new();
98        }
99
100        let mut result = Vec::with_capacity(self.count);
101        let mut current = self.base;
102        result.push(current);
103
104        for &delta in &self.deltas {
105            current = current.wrapping_add(delta);
106            result.push(current);
107        }
108
109        result
110    }
111
112    /// Decodes to signed integers using zig-zag decoding.
113    ///
114    /// Assumes the encoding was created with `encode_signed`.
115    #[must_use]
116    pub fn decode_signed(&self) -> Vec<i64> {
117        if self.count == 0 {
118            return Vec::new();
119        }
120
121        let mut result = Vec::with_capacity(self.count);
122        let mut current = zigzag_decode(self.base);
123        result.push(current);
124
125        for &delta in &self.deltas {
126            current += zigzag_decode(delta);
127            result.push(current);
128        }
129
130        result
131    }
132
133    /// Returns the number of values.
134    #[must_use]
135    pub fn len(&self) -> usize {
136        self.count
137    }
138
139    /// Returns whether the encoding is empty.
140    #[must_use]
141    pub fn is_empty(&self) -> bool {
142        self.count == 0
143    }
144
145    /// Returns the base value.
146    #[must_use]
147    pub fn base(&self) -> u64 {
148        self.base
149    }
150
151    /// Returns the deltas.
152    #[must_use]
153    pub fn deltas(&self) -> &[u64] {
154        &self.deltas
155    }
156
157    /// Returns the maximum delta value.
158    ///
159    /// Useful for determining bit width for bit-packing.
160    #[must_use]
161    pub fn max_delta(&self) -> u64 {
162        self.deltas.iter().copied().max().unwrap_or(0)
163    }
164
165    /// Returns the number of bits needed to represent the largest delta.
166    #[must_use]
167    pub fn bits_for_max_delta(&self) -> u8 {
168        let max = self.max_delta();
169        if max == 0 {
170            1
171        } else {
172            64 - max.leading_zeros() as u8
173        }
174    }
175
176    /// Estimates the compression ratio.
177    ///
178    /// Returns the ratio of original size to compressed size.
179    #[must_use]
180    pub fn compression_ratio(&self) -> f64 {
181        if self.count == 0 {
182            return 1.0;
183        }
184
185        let original_size = self.count * 8; // 8 bytes per u64
186        let compressed_size = 8 + (self.deltas.len() * 8); // base + deltas
187
188        original_size as f64 / compressed_size as f64
189    }
190
191    /// Serializes the delta encoding to bytes.
192    pub fn to_bytes(&self) -> Vec<u8> {
193        let mut buf = Vec::with_capacity(8 + 4 + self.deltas.len() * 8);
194        buf.extend_from_slice(&self.base.to_le_bytes());
195        buf.extend_from_slice(&(self.count as u32).to_le_bytes());
196        for &delta in &self.deltas {
197            buf.extend_from_slice(&delta.to_le_bytes());
198        }
199        buf
200    }
201
202    /// Deserializes a delta encoding from bytes.
203    pub fn from_bytes(bytes: &[u8]) -> io::Result<Self> {
204        if bytes.len() < 12 {
205            return Err(io::Error::new(
206                io::ErrorKind::InvalidData,
207                "Delta encoding too short",
208            ));
209        }
210
211        let base = u64::from_le_bytes(bytes[0..8].try_into().unwrap());
212        let count = u32::from_le_bytes(bytes[8..12].try_into().unwrap()) as usize;
213
214        let expected_len = 12 + (count.saturating_sub(1)) * 8;
215        if bytes.len() < expected_len {
216            return Err(io::Error::new(
217                io::ErrorKind::InvalidData,
218                "Delta encoding truncated",
219            ));
220        }
221
222        let mut deltas = Vec::with_capacity(count.saturating_sub(1));
223        let mut offset = 12;
224        for _ in 1..count {
225            let delta = u64::from_le_bytes(bytes[offset..offset + 8].try_into().unwrap());
226            deltas.push(delta);
227            offset += 8;
228        }
229
230        Ok(Self {
231            base,
232            deltas,
233            count,
234        })
235    }
236}
237
238/// Zig-zag encodes a signed integer to unsigned.
239///
240/// Maps signed integers to unsigned: 0 -> 0, -1 -> 1, 1 -> 2, -2 -> 3, etc.
241#[inline]
242#[must_use]
243pub fn zigzag_encode(value: i64) -> u64 {
244    ((value << 1) ^ (value >> 63)) as u64
245}
246
247/// Zig-zag decodes an unsigned integer to signed.
248#[inline]
249#[must_use]
250pub fn zigzag_decode(value: u64) -> i64 {
251    ((value >> 1) as i64) ^ (-((value & 1) as i64))
252}
253
254#[cfg(test)]
255mod tests {
256    use super::*;
257
258    #[test]
259    fn test_delta_encode_decode() {
260        let values = vec![100u64, 105, 107, 110, 115, 120, 128, 130];
261        let encoded = DeltaEncoding::encode(&values);
262        let decoded = encoded.decode();
263        assert_eq!(values, decoded);
264    }
265
266    #[test]
267    fn test_delta_empty() {
268        let values: Vec<u64> = vec![];
269        let encoded = DeltaEncoding::encode(&values);
270        assert!(encoded.is_empty());
271        assert_eq!(encoded.decode(), values);
272    }
273
274    #[test]
275    fn test_delta_single_value() {
276        let values = vec![42u64];
277        let encoded = DeltaEncoding::encode(&values);
278        assert_eq!(encoded.len(), 1);
279        assert_eq!(encoded.decode(), values);
280    }
281
282    #[test]
283    fn test_delta_sequential() {
284        let values: Vec<u64> = (0..100).collect();
285        let encoded = DeltaEncoding::encode(&values);
286        assert_eq!(encoded.max_delta(), 1);
287        assert_eq!(encoded.bits_for_max_delta(), 1);
288        assert_eq!(encoded.decode(), values);
289    }
290
291    #[test]
292    fn test_delta_large_gaps() {
293        let values = vec![0u64, 1000, 2000, 3000];
294        let encoded = DeltaEncoding::encode(&values);
295        assert_eq!(encoded.max_delta(), 1000);
296        assert_eq!(encoded.decode(), values);
297    }
298
299    #[test]
300    fn test_zigzag_encode_decode() {
301        assert_eq!(zigzag_encode(0), 0);
302        assert_eq!(zigzag_encode(-1), 1);
303        assert_eq!(zigzag_encode(1), 2);
304        assert_eq!(zigzag_encode(-2), 3);
305        assert_eq!(zigzag_encode(2), 4);
306
307        for v in [-100i64, -50, -1, 0, 1, 50, 100, i64::MIN, i64::MAX] {
308            assert_eq!(zigzag_decode(zigzag_encode(v)), v);
309        }
310    }
311
312    #[test]
313    fn test_delta_signed() {
314        let values = vec![-100i64, -50, 0, 50, 100];
315        let encoded = DeltaEncoding::encode_signed(&values);
316        let decoded = encoded.decode_signed();
317        assert_eq!(values, decoded);
318    }
319
320    #[test]
321    fn test_delta_serialization() {
322        let values = vec![100u64, 105, 107, 110, 115];
323        let encoded = DeltaEncoding::encode(&values);
324        let bytes = encoded.to_bytes();
325        let restored = DeltaEncoding::from_bytes(&bytes).unwrap();
326        assert_eq!(encoded.decode(), restored.decode());
327    }
328
329    #[test]
330    fn test_compression_ratio() {
331        // Sequential values should compress well
332        let sequential: Vec<u64> = (0..1000).collect();
333        let encoded = DeltaEncoding::encode(&sequential);
334        // Each delta is 1, so compression is minimal but base + deltas is stored
335        assert!(encoded.len() == 1000);
336    }
337}