Skip to main content

oxigdal_sync/
delta.rs

1//! Delta encoding for bandwidth-efficient synchronization
2//!
3//! This module provides delta encoding/decoding capabilities to minimize
4//! the amount of data transmitted during synchronization.
5
6use crate::{SyncError, SyncResult};
7use serde::{Deserialize, Serialize};
8use std::collections::HashMap;
9
10/// Delta operation
11#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
12pub enum DeltaOp {
13    /// Copy bytes from the base at offset, length
14    Copy {
15        /// Offset in the base data
16        offset: usize,
17        /// Number of bytes to copy
18        length: usize,
19    },
20    /// Insert new data
21    Insert {
22        /// Data to insert
23        data: Vec<u8>,
24    },
25}
26
27/// A delta representing the difference between two byte sequences
28#[derive(Debug, Clone, Serialize, Deserialize)]
29pub struct Delta {
30    /// The operations that transform base into target
31    ops: Vec<DeltaOp>,
32    /// Size of the base data
33    base_size: usize,
34    /// Size of the target data
35    target_size: usize,
36}
37
38impl Delta {
39    /// Creates a new delta
40    pub fn new(base_size: usize, target_size: usize) -> Self {
41        Self {
42            ops: Vec::new(),
43            base_size,
44            target_size,
45        }
46    }
47
48    /// Adds a copy operation
49    ///
50    /// # Arguments
51    ///
52    /// * `offset` - Offset in base data
53    /// * `length` - Number of bytes to copy
54    pub fn add_copy(&mut self, offset: usize, length: usize) {
55        if length == 0 {
56            return;
57        }
58
59        self.ops.push(DeltaOp::Copy { offset, length });
60    }
61
62    /// Adds an insert operation
63    ///
64    /// # Arguments
65    ///
66    /// * `data` - Data to insert
67    pub fn add_insert(&mut self, data: Vec<u8>) {
68        if data.is_empty() {
69            return;
70        }
71
72        self.ops.push(DeltaOp::Insert { data });
73    }
74
75    /// Gets the operations
76    pub fn operations(&self) -> &[DeltaOp] {
77        &self.ops
78    }
79
80    /// Gets the base size
81    pub fn base_size(&self) -> usize {
82        self.base_size
83    }
84
85    /// Gets the target size
86    pub fn target_size(&self) -> usize {
87        self.target_size
88    }
89
90    /// Applies the delta to base data to produce target data
91    ///
92    /// # Arguments
93    ///
94    /// * `base` - The base data
95    ///
96    /// # Returns
97    ///
98    /// The reconstructed target data
99    pub fn apply(&self, base: &[u8]) -> SyncResult<Vec<u8>> {
100        if base.len() != self.base_size {
101            return Err(SyncError::DeltaEncodingError(format!(
102                "Base size mismatch: expected {}, got {}",
103                self.base_size,
104                base.len()
105            )));
106        }
107
108        let mut result = Vec::with_capacity(self.target_size);
109
110        for op in &self.ops {
111            match op {
112                DeltaOp::Copy { offset, length } => {
113                    if *offset + *length > base.len() {
114                        return Err(SyncError::DeltaEncodingError(
115                            "Copy beyond base data".to_string(),
116                        ));
117                    }
118                    result.extend_from_slice(&base[*offset..*offset + *length]);
119                }
120                DeltaOp::Insert { data } => {
121                    result.extend_from_slice(data);
122                }
123            }
124        }
125
126        if result.len() != self.target_size {
127            return Err(SyncError::DeltaEncodingError(format!(
128                "Target size mismatch: expected {}, got {}",
129                self.target_size,
130                result.len()
131            )));
132        }
133
134        Ok(result)
135    }
136
137    /// Calculates the compression ratio
138    ///
139    /// # Returns
140    ///
141    /// Ratio of delta size to target size (lower is better)
142    pub fn compression_ratio(&self) -> f64 {
143        let delta_size = self.delta_size();
144        if self.target_size == 0 {
145            return 0.0;
146        }
147        delta_size as f64 / self.target_size as f64
148    }
149
150    /// Calculates the size of the delta in bytes
151    fn delta_size(&self) -> usize {
152        let mut size = 0;
153        for op in &self.ops {
154            match op {
155                DeltaOp::Copy { .. } => {
156                    // Size of offset + length (roughly 16 bytes)
157                    size += 16;
158                }
159                DeltaOp::Insert { data } => {
160                    size += data.len() + 8; // Data plus length field
161                }
162            }
163        }
164        size
165    }
166}
167
168/// Delta encoder using a simple block-based algorithm
169pub struct DeltaEncoder {
170    /// Block size for matching
171    block_size: usize,
172}
173
174impl DeltaEncoder {
175    /// Creates a new delta encoder
176    ///
177    /// # Arguments
178    ///
179    /// * `block_size` - Size of blocks for matching (larger = faster but less compression)
180    pub fn new(block_size: usize) -> Self {
181        Self { block_size }
182    }
183
184    /// Creates a delta encoder with default settings
185    pub fn default_encoder() -> Self {
186        Self::new(16) // 16-byte blocks
187    }
188
189    /// Encodes the difference between base and target
190    ///
191    /// # Arguments
192    ///
193    /// * `base` - The base data
194    /// * `target` - The target data
195    ///
196    /// # Returns
197    ///
198    /// A delta that transforms base into target
199    pub fn encode(&self, base: &[u8], target: &[u8]) -> SyncResult<Delta> {
200        let mut delta = Delta::new(base.len(), target.len());
201
202        // Build a hash table of base blocks
203        let base_blocks = self.build_block_index(base);
204
205        let mut target_pos = 0;
206        let mut pending_insert = Vec::new();
207
208        while target_pos < target.len() {
209            // Try to find a match in base
210            if let Some(match_info) = self.find_match(base, target, target_pos, &base_blocks) {
211                // Flush any pending insert
212                if !pending_insert.is_empty() {
213                    delta.add_insert(pending_insert.clone());
214                    pending_insert.clear();
215                }
216
217                // Add copy operation
218                delta.add_copy(match_info.0, match_info.1);
219                target_pos += match_info.1;
220            } else {
221                // No match found, accumulate for insert
222                if target_pos < target.len() {
223                    pending_insert.push(target[target_pos]);
224                }
225                target_pos += 1;
226            }
227        }
228
229        // Flush remaining insert
230        if !pending_insert.is_empty() {
231            delta.add_insert(pending_insert);
232        }
233
234        Ok(delta)
235    }
236
237    /// Builds an index of block positions in base data
238    fn build_block_index(&self, base: &[u8]) -> HashMap<u64, Vec<usize>> {
239        let mut index = HashMap::new();
240
241        for i in 0..base.len().saturating_sub(self.block_size - 1) {
242            let block = &base[i..i + self.block_size];
243            let hash = self.hash_block(block);
244            index.entry(hash).or_insert_with(Vec::new).push(i);
245        }
246
247        index
248    }
249
250    /// Simple hash function for blocks
251    fn hash_block(&self, block: &[u8]) -> u64 {
252        let mut hash: u64 = 0;
253        for (i, &byte) in block.iter().enumerate() {
254            hash = hash.wrapping_add((byte as u64).wrapping_mul(31_u64.wrapping_pow(i as u32)));
255        }
256        hash
257    }
258
259    /// Finds a match for target data in base
260    ///
261    /// Returns (offset, length) of the match, or None if no match found
262    fn find_match(
263        &self,
264        base: &[u8],
265        target: &[u8],
266        target_pos: usize,
267        base_blocks: &HashMap<u64, Vec<usize>>,
268    ) -> Option<(usize, usize)> {
269        if target_pos + self.block_size > target.len() {
270            return None;
271        }
272
273        let target_block = &target[target_pos..target_pos + self.block_size];
274        let hash = self.hash_block(target_block);
275
276        // Find candidates
277        let candidates = base_blocks.get(&hash)?;
278
279        // Find the longest match among candidates
280        let mut best_match: Option<(usize, usize)> = None;
281
282        for &base_pos in candidates {
283            let mut length = 0;
284
285            while base_pos + length < base.len()
286                && target_pos + length < target.len()
287                && base[base_pos + length] == target[target_pos + length]
288            {
289                length += 1;
290            }
291
292            if length >= self.block_size {
293                if let Some((_, best_len)) = best_match {
294                    if length > best_len {
295                        best_match = Some((base_pos, length));
296                    }
297                } else {
298                    best_match = Some((base_pos, length));
299                }
300            }
301        }
302
303        best_match
304    }
305}
306
307impl Default for DeltaEncoder {
308    fn default() -> Self {
309        Self::default_encoder()
310    }
311}
312
313#[cfg(test)]
314mod tests {
315    use super::*;
316
317    #[test]
318    fn test_delta_creation() {
319        let delta = Delta::new(100, 150);
320        assert_eq!(delta.base_size(), 100);
321        assert_eq!(delta.target_size(), 150);
322    }
323
324    #[test]
325    fn test_delta_add_copy() {
326        let mut delta = Delta::new(100, 100);
327        delta.add_copy(0, 50);
328        assert_eq!(delta.operations().len(), 1);
329    }
330
331    #[test]
332    fn test_delta_add_insert() {
333        let mut delta = Delta::new(100, 110);
334        delta.add_insert(b"hello".to_vec());
335        assert_eq!(delta.operations().len(), 1);
336    }
337
338    #[test]
339    fn test_delta_apply_copy() -> SyncResult<()> {
340        let base = b"hello world";
341        let mut delta = Delta::new(base.len(), 5);
342        delta.add_copy(0, 5);
343
344        let result = delta.apply(base)?;
345        assert_eq!(result, b"hello");
346
347        Ok(())
348    }
349
350    #[test]
351    fn test_delta_apply_insert() -> SyncResult<()> {
352        let base = b"";
353        let mut delta = Delta::new(0, 5);
354        delta.add_insert(b"hello".to_vec());
355
356        let result = delta.apply(base)?;
357        assert_eq!(result, b"hello");
358
359        Ok(())
360    }
361
362    #[test]
363    fn test_delta_apply_mixed() -> SyncResult<()> {
364        let base = b"hello world";
365        // "hello, world" = 12 characters
366        let mut delta = Delta::new(base.len(), 12);
367        delta.add_copy(0, 5);
368        delta.add_insert(b", ".to_vec());
369        delta.add_copy(6, 5);
370
371        let result = delta.apply(base)?;
372        assert_eq!(result, b"hello, world");
373
374        Ok(())
375    }
376
377    #[test]
378    fn test_delta_encoder() -> SyncResult<()> {
379        let encoder = DeltaEncoder::default_encoder();
380        let base = b"hello world, this is a test";
381        let target = b"hello world, this is a test!";
382
383        let delta = encoder.encode(base, target)?;
384        let result = delta.apply(base)?;
385
386        assert_eq!(result, target);
387
388        Ok(())
389    }
390
391    #[test]
392    fn test_delta_encoder_identical() -> SyncResult<()> {
393        let encoder = DeltaEncoder::default_encoder();
394        let base = b"hello world";
395        let target = b"hello world";
396
397        let delta = encoder.encode(base, target)?;
398        let result = delta.apply(base)?;
399
400        assert_eq!(result, target);
401
402        Ok(())
403    }
404
405    #[test]
406    fn test_delta_encoder_no_match() -> SyncResult<()> {
407        let encoder = DeltaEncoder::default_encoder();
408        let base = b"hello world";
409        let target = b"completely different";
410
411        let delta = encoder.encode(base, target)?;
412        let result = delta.apply(base)?;
413
414        assert_eq!(result, target);
415
416        Ok(())
417    }
418}