1use crate::{SyncError, SyncResult};
7use serde::{Deserialize, Serialize};
8use std::collections::HashMap;
9
10#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
12pub enum DeltaOp {
13 Copy {
15 offset: usize,
17 length: usize,
19 },
20 Insert {
22 data: Vec<u8>,
24 },
25}
26
27#[derive(Debug, Clone, Serialize, Deserialize)]
29pub struct Delta {
30 ops: Vec<DeltaOp>,
32 base_size: usize,
34 target_size: usize,
36}
37
38impl Delta {
39 pub fn new(base_size: usize, target_size: usize) -> Self {
41 Self {
42 ops: Vec::new(),
43 base_size,
44 target_size,
45 }
46 }
47
48 pub fn add_copy(&mut self, offset: usize, length: usize) {
55 if length == 0 {
56 return;
57 }
58
59 self.ops.push(DeltaOp::Copy { offset, length });
60 }
61
62 pub fn add_insert(&mut self, data: Vec<u8>) {
68 if data.is_empty() {
69 return;
70 }
71
72 self.ops.push(DeltaOp::Insert { data });
73 }
74
75 pub fn operations(&self) -> &[DeltaOp] {
77 &self.ops
78 }
79
80 pub fn base_size(&self) -> usize {
82 self.base_size
83 }
84
85 pub fn target_size(&self) -> usize {
87 self.target_size
88 }
89
90 pub fn apply(&self, base: &[u8]) -> SyncResult<Vec<u8>> {
100 if base.len() != self.base_size {
101 return Err(SyncError::DeltaEncodingError(format!(
102 "Base size mismatch: expected {}, got {}",
103 self.base_size,
104 base.len()
105 )));
106 }
107
108 let mut result = Vec::with_capacity(self.target_size);
109
110 for op in &self.ops {
111 match op {
112 DeltaOp::Copy { offset, length } => {
113 if *offset + *length > base.len() {
114 return Err(SyncError::DeltaEncodingError(
115 "Copy beyond base data".to_string(),
116 ));
117 }
118 result.extend_from_slice(&base[*offset..*offset + *length]);
119 }
120 DeltaOp::Insert { data } => {
121 result.extend_from_slice(data);
122 }
123 }
124 }
125
126 if result.len() != self.target_size {
127 return Err(SyncError::DeltaEncodingError(format!(
128 "Target size mismatch: expected {}, got {}",
129 self.target_size,
130 result.len()
131 )));
132 }
133
134 Ok(result)
135 }
136
137 pub fn compression_ratio(&self) -> f64 {
143 let delta_size = self.delta_size();
144 if self.target_size == 0 {
145 return 0.0;
146 }
147 delta_size as f64 / self.target_size as f64
148 }
149
150 fn delta_size(&self) -> usize {
152 let mut size = 0;
153 for op in &self.ops {
154 match op {
155 DeltaOp::Copy { .. } => {
156 size += 16;
158 }
159 DeltaOp::Insert { data } => {
160 size += data.len() + 8; }
162 }
163 }
164 size
165 }
166}
167
168pub struct DeltaEncoder {
170 block_size: usize,
172}
173
174impl DeltaEncoder {
175 pub fn new(block_size: usize) -> Self {
181 Self { block_size }
182 }
183
184 pub fn default_encoder() -> Self {
186 Self::new(16) }
188
189 pub fn encode(&self, base: &[u8], target: &[u8]) -> SyncResult<Delta> {
200 let mut delta = Delta::new(base.len(), target.len());
201
202 let base_blocks = self.build_block_index(base);
204
205 let mut target_pos = 0;
206 let mut pending_insert = Vec::new();
207
208 while target_pos < target.len() {
209 if let Some(match_info) = self.find_match(base, target, target_pos, &base_blocks) {
211 if !pending_insert.is_empty() {
213 delta.add_insert(pending_insert.clone());
214 pending_insert.clear();
215 }
216
217 delta.add_copy(match_info.0, match_info.1);
219 target_pos += match_info.1;
220 } else {
221 if target_pos < target.len() {
223 pending_insert.push(target[target_pos]);
224 }
225 target_pos += 1;
226 }
227 }
228
229 if !pending_insert.is_empty() {
231 delta.add_insert(pending_insert);
232 }
233
234 Ok(delta)
235 }
236
237 fn build_block_index(&self, base: &[u8]) -> HashMap<u64, Vec<usize>> {
239 let mut index = HashMap::new();
240
241 for i in 0..base.len().saturating_sub(self.block_size - 1) {
242 let block = &base[i..i + self.block_size];
243 let hash = self.hash_block(block);
244 index.entry(hash).or_insert_with(Vec::new).push(i);
245 }
246
247 index
248 }
249
250 fn hash_block(&self, block: &[u8]) -> u64 {
252 let mut hash: u64 = 0;
253 for (i, &byte) in block.iter().enumerate() {
254 hash = hash.wrapping_add((byte as u64).wrapping_mul(31_u64.wrapping_pow(i as u32)));
255 }
256 hash
257 }
258
259 fn find_match(
263 &self,
264 base: &[u8],
265 target: &[u8],
266 target_pos: usize,
267 base_blocks: &HashMap<u64, Vec<usize>>,
268 ) -> Option<(usize, usize)> {
269 if target_pos + self.block_size > target.len() {
270 return None;
271 }
272
273 let target_block = &target[target_pos..target_pos + self.block_size];
274 let hash = self.hash_block(target_block);
275
276 let candidates = base_blocks.get(&hash)?;
278
279 let mut best_match: Option<(usize, usize)> = None;
281
282 for &base_pos in candidates {
283 let mut length = 0;
284
285 while base_pos + length < base.len()
286 && target_pos + length < target.len()
287 && base[base_pos + length] == target[target_pos + length]
288 {
289 length += 1;
290 }
291
292 if length >= self.block_size {
293 if let Some((_, best_len)) = best_match {
294 if length > best_len {
295 best_match = Some((base_pos, length));
296 }
297 } else {
298 best_match = Some((base_pos, length));
299 }
300 }
301 }
302
303 best_match
304 }
305}
306
307impl Default for DeltaEncoder {
308 fn default() -> Self {
309 Self::default_encoder()
310 }
311}
312
313#[cfg(test)]
314mod tests {
315 use super::*;
316
317 #[test]
318 fn test_delta_creation() {
319 let delta = Delta::new(100, 150);
320 assert_eq!(delta.base_size(), 100);
321 assert_eq!(delta.target_size(), 150);
322 }
323
324 #[test]
325 fn test_delta_add_copy() {
326 let mut delta = Delta::new(100, 100);
327 delta.add_copy(0, 50);
328 assert_eq!(delta.operations().len(), 1);
329 }
330
331 #[test]
332 fn test_delta_add_insert() {
333 let mut delta = Delta::new(100, 110);
334 delta.add_insert(b"hello".to_vec());
335 assert_eq!(delta.operations().len(), 1);
336 }
337
338 #[test]
339 fn test_delta_apply_copy() -> SyncResult<()> {
340 let base = b"hello world";
341 let mut delta = Delta::new(base.len(), 5);
342 delta.add_copy(0, 5);
343
344 let result = delta.apply(base)?;
345 assert_eq!(result, b"hello");
346
347 Ok(())
348 }
349
350 #[test]
351 fn test_delta_apply_insert() -> SyncResult<()> {
352 let base = b"";
353 let mut delta = Delta::new(0, 5);
354 delta.add_insert(b"hello".to_vec());
355
356 let result = delta.apply(base)?;
357 assert_eq!(result, b"hello");
358
359 Ok(())
360 }
361
362 #[test]
363 fn test_delta_apply_mixed() -> SyncResult<()> {
364 let base = b"hello world";
365 let mut delta = Delta::new(base.len(), 12);
367 delta.add_copy(0, 5);
368 delta.add_insert(b", ".to_vec());
369 delta.add_copy(6, 5);
370
371 let result = delta.apply(base)?;
372 assert_eq!(result, b"hello, world");
373
374 Ok(())
375 }
376
377 #[test]
378 fn test_delta_encoder() -> SyncResult<()> {
379 let encoder = DeltaEncoder::default_encoder();
380 let base = b"hello world, this is a test";
381 let target = b"hello world, this is a test!";
382
383 let delta = encoder.encode(base, target)?;
384 let result = delta.apply(base)?;
385
386 assert_eq!(result, target);
387
388 Ok(())
389 }
390
391 #[test]
392 fn test_delta_encoder_identical() -> SyncResult<()> {
393 let encoder = DeltaEncoder::default_encoder();
394 let base = b"hello world";
395 let target = b"hello world";
396
397 let delta = encoder.encode(base, target)?;
398 let result = delta.apply(base)?;
399
400 assert_eq!(result, target);
401
402 Ok(())
403 }
404
405 #[test]
406 fn test_delta_encoder_no_match() -> SyncResult<()> {
407 let encoder = DeltaEncoder::default_encoder();
408 let base = b"hello world";
409 let target = b"completely different";
410
411 let delta = encoder.encode(base, target)?;
412 let result = delta.apply(base)?;
413
414 assert_eq!(result, target);
415
416 Ok(())
417 }
418}