oxigdal-sync 0.1.4

Multi-device synchronization with CRDTs, vector clocks, and operational transformation for OxiGDAL
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
//! Delta encoding for bandwidth-efficient synchronization
//!
//! This module provides delta encoding/decoding capabilities to minimize
//! the amount of data transmitted during synchronization.

use crate::{SyncError, SyncResult};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;

/// Delta operation
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub enum DeltaOp {
    /// Copy bytes from the base at offset, length
    Copy {
        /// Offset in the base data
        offset: usize,
        /// Number of bytes to copy
        length: usize,
    },
    /// Insert new data
    Insert {
        /// Data to insert
        data: Vec<u8>,
    },
}

/// A delta representing the difference between two byte sequences
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Delta {
    /// The operations that transform base into target
    ops: Vec<DeltaOp>,
    /// Size of the base data
    base_size: usize,
    /// Size of the target data
    target_size: usize,
}

impl Delta {
    /// Creates a new delta
    pub fn new(base_size: usize, target_size: usize) -> Self {
        Self {
            ops: Vec::new(),
            base_size,
            target_size,
        }
    }

    /// Adds a copy operation
    ///
    /// # Arguments
    ///
    /// * `offset` - Offset in base data
    /// * `length` - Number of bytes to copy
    pub fn add_copy(&mut self, offset: usize, length: usize) {
        if length == 0 {
            return;
        }

        self.ops.push(DeltaOp::Copy { offset, length });
    }

    /// Adds an insert operation
    ///
    /// # Arguments
    ///
    /// * `data` - Data to insert
    pub fn add_insert(&mut self, data: Vec<u8>) {
        if data.is_empty() {
            return;
        }

        self.ops.push(DeltaOp::Insert { data });
    }

    /// Gets the operations
    pub fn operations(&self) -> &[DeltaOp] {
        &self.ops
    }

    /// Gets the base size
    pub fn base_size(&self) -> usize {
        self.base_size
    }

    /// Gets the target size
    pub fn target_size(&self) -> usize {
        self.target_size
    }

    /// Applies the delta to base data to produce target data
    ///
    /// # Arguments
    ///
    /// * `base` - The base data
    ///
    /// # Returns
    ///
    /// The reconstructed target data
    pub fn apply(&self, base: &[u8]) -> SyncResult<Vec<u8>> {
        if base.len() != self.base_size {
            return Err(SyncError::DeltaEncodingError(format!(
                "Base size mismatch: expected {}, got {}",
                self.base_size,
                base.len()
            )));
        }

        let mut result = Vec::with_capacity(self.target_size);

        for op in &self.ops {
            match op {
                DeltaOp::Copy { offset, length } => {
                    if *offset + *length > base.len() {
                        return Err(SyncError::DeltaEncodingError(
                            "Copy beyond base data".to_string(),
                        ));
                    }
                    result.extend_from_slice(&base[*offset..*offset + *length]);
                }
                DeltaOp::Insert { data } => {
                    result.extend_from_slice(data);
                }
            }
        }

        if result.len() != self.target_size {
            return Err(SyncError::DeltaEncodingError(format!(
                "Target size mismatch: expected {}, got {}",
                self.target_size,
                result.len()
            )));
        }

        Ok(result)
    }

    /// Calculates the compression ratio
    ///
    /// # Returns
    ///
    /// Ratio of delta size to target size (lower is better)
    pub fn compression_ratio(&self) -> f64 {
        let delta_size = self.delta_size();
        if self.target_size == 0 {
            return 0.0;
        }
        delta_size as f64 / self.target_size as f64
    }

    /// Calculates the size of the delta in bytes
    fn delta_size(&self) -> usize {
        let mut size = 0;
        for op in &self.ops {
            match op {
                DeltaOp::Copy { .. } => {
                    // Size of offset + length (roughly 16 bytes)
                    size += 16;
                }
                DeltaOp::Insert { data } => {
                    size += data.len() + 8; // Data plus length field
                }
            }
        }
        size
    }
}

/// Delta encoder using a simple block-based algorithm
pub struct DeltaEncoder {
    /// Block size for matching
    block_size: usize,
}

impl DeltaEncoder {
    /// Creates a new delta encoder
    ///
    /// # Arguments
    ///
    /// * `block_size` - Size of blocks for matching (larger = faster but less compression)
    pub fn new(block_size: usize) -> Self {
        Self { block_size }
    }

    /// Creates a delta encoder with default settings
    pub fn default_encoder() -> Self {
        Self::new(16) // 16-byte blocks
    }

    /// Encodes the difference between base and target
    ///
    /// # Arguments
    ///
    /// * `base` - The base data
    /// * `target` - The target data
    ///
    /// # Returns
    ///
    /// A delta that transforms base into target
    pub fn encode(&self, base: &[u8], target: &[u8]) -> SyncResult<Delta> {
        let mut delta = Delta::new(base.len(), target.len());

        // Build a hash table of base blocks
        let base_blocks = self.build_block_index(base);

        let mut target_pos = 0;
        let mut pending_insert = Vec::new();

        while target_pos < target.len() {
            // Try to find a match in base
            if let Some(match_info) = self.find_match(base, target, target_pos, &base_blocks) {
                // Flush any pending insert
                if !pending_insert.is_empty() {
                    delta.add_insert(pending_insert.clone());
                    pending_insert.clear();
                }

                // Add copy operation
                delta.add_copy(match_info.0, match_info.1);
                target_pos += match_info.1;
            } else {
                // No match found, accumulate for insert
                if target_pos < target.len() {
                    pending_insert.push(target[target_pos]);
                }
                target_pos += 1;
            }
        }

        // Flush remaining insert
        if !pending_insert.is_empty() {
            delta.add_insert(pending_insert);
        }

        Ok(delta)
    }

    /// Builds an index of block positions in base data
    fn build_block_index(&self, base: &[u8]) -> HashMap<u64, Vec<usize>> {
        let mut index = HashMap::new();

        for i in 0..base.len().saturating_sub(self.block_size - 1) {
            let block = &base[i..i + self.block_size];
            let hash = self.hash_block(block);
            index.entry(hash).or_insert_with(Vec::new).push(i);
        }

        index
    }

    /// Simple hash function for blocks
    fn hash_block(&self, block: &[u8]) -> u64 {
        let mut hash: u64 = 0;
        for (i, &byte) in block.iter().enumerate() {
            hash = hash.wrapping_add((byte as u64).wrapping_mul(31_u64.wrapping_pow(i as u32)));
        }
        hash
    }

    /// Finds a match for target data in base
    ///
    /// Returns (offset, length) of the match, or None if no match found
    fn find_match(
        &self,
        base: &[u8],
        target: &[u8],
        target_pos: usize,
        base_blocks: &HashMap<u64, Vec<usize>>,
    ) -> Option<(usize, usize)> {
        if target_pos + self.block_size > target.len() {
            return None;
        }

        let target_block = &target[target_pos..target_pos + self.block_size];
        let hash = self.hash_block(target_block);

        // Find candidates
        let candidates = base_blocks.get(&hash)?;

        // Find the longest match among candidates
        let mut best_match: Option<(usize, usize)> = None;

        for &base_pos in candidates {
            let mut length = 0;

            while base_pos + length < base.len()
                && target_pos + length < target.len()
                && base[base_pos + length] == target[target_pos + length]
            {
                length += 1;
            }

            if length >= self.block_size {
                if let Some((_, best_len)) = best_match {
                    if length > best_len {
                        best_match = Some((base_pos, length));
                    }
                } else {
                    best_match = Some((base_pos, length));
                }
            }
        }

        best_match
    }
}

impl Default for DeltaEncoder {
    fn default() -> Self {
        Self::default_encoder()
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_delta_creation() {
        let delta = Delta::new(100, 150);
        assert_eq!(delta.base_size(), 100);
        assert_eq!(delta.target_size(), 150);
    }

    #[test]
    fn test_delta_add_copy() {
        let mut delta = Delta::new(100, 100);
        delta.add_copy(0, 50);
        assert_eq!(delta.operations().len(), 1);
    }

    #[test]
    fn test_delta_add_insert() {
        let mut delta = Delta::new(100, 110);
        delta.add_insert(b"hello".to_vec());
        assert_eq!(delta.operations().len(), 1);
    }

    #[test]
    fn test_delta_apply_copy() -> SyncResult<()> {
        let base = b"hello world";
        let mut delta = Delta::new(base.len(), 5);
        delta.add_copy(0, 5);

        let result = delta.apply(base)?;
        assert_eq!(result, b"hello");

        Ok(())
    }

    #[test]
    fn test_delta_apply_insert() -> SyncResult<()> {
        let base = b"";
        let mut delta = Delta::new(0, 5);
        delta.add_insert(b"hello".to_vec());

        let result = delta.apply(base)?;
        assert_eq!(result, b"hello");

        Ok(())
    }

    #[test]
    fn test_delta_apply_mixed() -> SyncResult<()> {
        let base = b"hello world";
        // "hello, world" = 12 characters
        let mut delta = Delta::new(base.len(), 12);
        delta.add_copy(0, 5);
        delta.add_insert(b", ".to_vec());
        delta.add_copy(6, 5);

        let result = delta.apply(base)?;
        assert_eq!(result, b"hello, world");

        Ok(())
    }

    #[test]
    fn test_delta_encoder() -> SyncResult<()> {
        let encoder = DeltaEncoder::default_encoder();
        let base = b"hello world, this is a test";
        let target = b"hello world, this is a test!";

        let delta = encoder.encode(base, target)?;
        let result = delta.apply(base)?;

        assert_eq!(result, target);

        Ok(())
    }

    #[test]
    fn test_delta_encoder_identical() -> SyncResult<()> {
        let encoder = DeltaEncoder::default_encoder();
        let base = b"hello world";
        let target = b"hello world";

        let delta = encoder.encode(base, target)?;
        let result = delta.apply(base)?;

        assert_eq!(result, target);

        Ok(())
    }

    #[test]
    fn test_delta_encoder_no_match() -> SyncResult<()> {
        let encoder = DeltaEncoder::default_encoder();
        let base = b"hello world";
        let target = b"completely different";

        let delta = encoder.encode(base, target)?;
        let result = delta.apply(base)?;

        assert_eq!(result, target);

        Ok(())
    }
}