files_diff/
zip.rs

1use crate::{
2    apply,
3    compress::CompressAlgorithm,
4    diff, hash,
5    patch::{DiffAlgorithm, Operation, Operations, PatchSet},
6    Error,
7};
8use log::{debug, info, trace, warn};
9use std::io::{Read as _, Write};
10
11// Process all files in both archives without recursion
12fn process_directory(
13    dir_path: &str,
14    files_before: &mut zip::ZipArchive<std::io::Cursor<Vec<u8>>>,
15    files_after: &mut zip::ZipArchive<std::io::Cursor<Vec<u8>>>,
16    processed_files: &mut std::collections::HashSet<String>,
17    patches: &mut Vec<(String, Operation)>,
18    diff_algorithm: DiffAlgorithm,
19    compress_algorithm: CompressAlgorithm,
20) -> Result<(), Error> {
21    debug!("Processing files starting from: `{}`", dir_path);
22
23    // Get all files from both archives
24    let mut all_files = std::collections::HashSet::new();
25
26    // Add files from before archive
27    for i in 0..files_before.len() {
28        let file = files_before
29            .by_index(i)
30            .map_err(|e| Error::ZipError(e.to_string()))?;
31        all_files.insert(file.name().to_string());
32    }
33
34    // Add files from after archive
35    for i in 0..files_after.len() {
36        let file = files_after
37            .by_index(i)
38            .map_err(|e| Error::ZipError(e.to_string()))?;
39        all_files.insert(file.name().to_string());
40    }
41
42    // Process all files
43    for path in all_files {
44        if processed_files.contains(&path) {
45            continue;
46        }
47        processed_files.insert(path.clone());
48
49        // Check if file exists in before archive
50        let before_exists = files_before.by_name(&path).is_ok();
51        let after_exists = files_after.by_name(&path).is_ok();
52
53        match (before_exists, after_exists) {
54            (true, true) => {
55                // File exists in both archives
56                let before_contents = read_file_contents(files_before, &path)?
57                    .ok_or_else(|| Error::ZipError("Failed to read before contents".to_string()))?;
58                let after_contents = read_file_contents(files_after, &path)?
59                    .ok_or_else(|| Error::ZipError("Failed to read after contents".to_string()))?;
60
61                if before_contents != after_contents {
62                    debug!("File modified: {}", path);
63                    let patch = diff(
64                        &before_contents,
65                        &after_contents,
66                        diff_algorithm,
67                        compress_algorithm,
68                    )?;
69                    patches.push((path, Operation::Patch(patch)));
70                } else {
71                    trace!("File unchanged: {}", path);
72                    patches.push((path, Operation::FileStaysSame));
73                }
74            }
75            (true, false) => {
76                // File was deleted
77                debug!("File deleted: {}", path);
78                patches.push((path, Operation::DeleteFile));
79            }
80            (false, true) => {
81                // New file
82                debug!("New file: {}", path);
83                if let Some(contents) = read_file_contents(files_after, &path)? {
84                    patches.push((path, Operation::PutFile(contents)));
85                }
86            }
87            (false, false) => {
88                // This shouldn't happen as the file must exist in at least one archive
89                warn!("File {} not found in either archive", path);
90            }
91        }
92    }
93
94    Ok(())
95}
96
97// Helper function to read file contents
98fn read_file_contents(
99    archive: &mut zip::ZipArchive<std::io::Cursor<Vec<u8>>>,
100    path: &str,
101) -> Result<Option<Vec<u8>>, Error> {
102    match archive.by_name(path) {
103        Ok(mut file) => {
104            let mut contents = Vec::new();
105            file.read_to_end(&mut contents)
106                .map_err(|e| Error::IoError(e.to_string()))?;
107            Ok(Some(contents))
108        }
109        Err(_) => Ok(None),
110    }
111}
112
113fn get_directories_of_file(path: &str) -> Vec<String> {
114    let mut dirs = Vec::new();
115    let mut current = String::new();
116    let parts: Vec<&str> = path.split('/').collect();
117
118    // Skip the last part since it's the filename
119    for part in parts.iter().take(parts.len() - 1) {
120        if !part.is_empty() {
121            if current.is_empty() {
122                current = part.to_string();
123            } else {
124                current = format!("{}/{}", current, part);
125            }
126            dirs.push(current.clone());
127        }
128    }
129    dirs
130}
131
132/// Generates a patch set that can transform one zip archive into another.
133///
134/// Creates a set of operations that describe how to transform the contents of
135/// one zip archive into another, handling file additions, deletions, and
136/// modifications efficiently.
137///
138/// # Example
139/// ```no_run
140/// use files_diff::{diff_zip, DiffAlgorithm, CompressAlgorithm};
141///
142/// // Generate patches for transforming a zip archive
143/// let patch_set = diff_zip(
144///     "v1.zip".to_string(),
145///     "v2.zip".to_string(),
146///     DiffAlgorithm::Rsync020,
147///     CompressAlgorithm::Zstd
148/// )?;
149///
150/// // Check the total size of all patches
151/// println!("Patch set size: {} bytes", patch_set.get_size());
152/// # Ok::<(), files_diff::Error>(())
153/// ```
154///
155/// The function handles:
156/// - Nested directory structures
157/// - File additions and deletions
158/// - File modifications using the specified diff algorithm
159/// - Directory creation and deletion
160pub fn diff_zip(
161    path_before: String,
162    path_after: String,
163    diff_algorithm: DiffAlgorithm,
164    compress_algorithm: CompressAlgorithm,
165) -> Result<PatchSet, Error> {
166    info!("Generating diff between {} and {}", path_before, path_after);
167    debug!("Using diff algorithm: {:?}", diff_algorithm);
168    debug!("Using compression algorithm: {:?}", compress_algorithm);
169
170    let before = std::fs::read(path_before).map_err(|e| Error::IoError(e.to_string()))?;
171    info!("before size: {}", before.len());
172    let after = std::fs::read(path_after).map_err(|e| Error::IoError(e.to_string()))?;
173    info!("after size: {}", after.len());
174
175    let hash_before = hash(&before);
176
177    trace!("Before archive size: {} bytes", before.len());
178    trace!("After archive size: {} bytes", after.len());
179
180    let mut files_before = zip::ZipArchive::new(std::io::Cursor::new(before))
181        .map_err(|e| Error::ZipError(e.to_string()))?;
182    let mut files_after = zip::ZipArchive::new(std::io::Cursor::new(after))
183        .map_err(|e| Error::ZipError(e.to_string()))?;
184
185    let mut patches = Vec::new();
186    let mut processed_files = std::collections::HashSet::new();
187
188    // Start processing from root
189    process_directory(
190        "",
191        &mut files_before,
192        &mut files_after,
193        &mut processed_files,
194        &mut patches,
195        diff_algorithm,
196        compress_algorithm,
197    )?;
198
199    let operations = Operations(patches);
200    let operations_hash = operations.hash()?;
201
202    debug!("Generated {} patch operations", operations.0.len());
203    Ok(PatchSet {
204        operations,
205        hash_before,
206        operations_hash,
207    })
208}
209
210/// Applies a patch set to transform a zip archive into a new version.
211///
212/// Takes a source zip archive and a patch set, and creates a new zip archive
213/// that represents the target version. Validates all operations and maintains
214/// the integrity of the archive structure.
215///
216/// # Example
217/// ```no_run
218/// use files_diff::{diff_zip, apply_zip, DiffAlgorithm, CompressAlgorithm};
219///
220/// // First generate a patch set
221/// let patch_set = diff_zip(
222///     "source.zip".to_string(),
223///     "target.zip".to_string(),
224///     DiffAlgorithm::Rsync020,
225///     CompressAlgorithm::Zstd
226/// )?;
227///
228/// // Apply the patches to create a new version
229/// apply_zip(
230///     "source.zip",
231///     patch_set,
232///     "result.zip".to_string()
233/// )?;
234/// # Ok::<(), files_diff::Error>(())
235/// ```
236///
237/// The function:
238/// - Preserves directory structure
239/// - Handles file additions, deletions, and modifications
240/// - Maintains file metadata
241/// - Validates all operations during application
242pub fn apply_zip(path_base: &str, delta: PatchSet, path_after: String) -> Result<(), Error> {
243    info!("Applying patch to {} to create {}", path_base, path_after);
244    debug!("Patch contains {} operations", delta.operations.0.len());
245
246    let base_data = std::fs::read(path_base).map_err(|e| Error::IoError(e.to_string()))?;
247
248    let base_hash = hash(&base_data);
249    if base_hash != delta.hash_before {
250        return Err(Error::BeforeHashMismatch);
251    }
252
253    if delta.operations_hash != delta.operations.hash()? {
254        return Err(Error::OperationsHashMismatch);
255    }
256
257    let mut base_archive = zip::ZipArchive::new(std::io::Cursor::new(base_data))
258        .map_err(|e| Error::ZipError(e.to_string()))?;
259
260    let file = std::fs::File::create(&path_after).map_err(|e| Error::IoError(e.to_string()))?;
261
262    let mut new_archive = zip::ZipWriter::new(file);
263    let options =
264        zip::write::FileOptions::default().compression_method(zip::CompressionMethod::Stored);
265
266    // Track processed files to handle deletions
267    let mut processed_files = std::collections::HashSet::new();
268    let mut directories_to_create: std::collections::HashSet<String> =
269        std::collections::HashSet::new();
270
271    // First, apply all patches
272    for (path, operation) in delta.operations.0 {
273        processed_files.insert(path.clone());
274
275        match operation {
276            Operation::Patch(patch) => {
277                debug!("Applying patch to file: {}", path);
278                // Read original file
279                let mut base_file = base_archive
280                    .by_name(&path)
281                    .map_err(|e| Error::ZipError(e.to_string()))?;
282                let mut original_contents = Vec::new();
283                base_file
284                    .read_to_end(&mut original_contents)
285                    .map_err(|e| Error::IoError(e.to_string()))?;
286
287                // Apply patch to get new contents
288                let new_contents = apply(&original_contents, &patch)?;
289
290                // Write new file
291                new_archive
292                    .start_file(&path, options)
293                    .map_err(|e| Error::ZipError(e.to_string()))?;
294                new_archive
295                    .write_all(&new_contents)
296                    .map_err(|e| Error::IoError(e.to_string()))?;
297                directories_to_create.extend(get_directories_of_file(&path));
298            }
299            Operation::PutFile(contents) => {
300                debug!("Adding new file: {}", path);
301                // Write new file directly
302                new_archive
303                    .start_file(&path, options)
304                    .map_err(|e| Error::ZipError(e.to_string()))?;
305                new_archive
306                    .write_all(&contents)
307                    .map_err(|e| Error::IoError(e.to_string()))?;
308                directories_to_create.extend(get_directories_of_file(&path));
309            }
310            Operation::DeleteFile => {
311                debug!("Deleting file: {}", path);
312                // Skip this file - don't copy it to new archive
313                continue;
314            }
315            Operation::FileStaysSame => {
316                debug!("File stays same: {}", path);
317                // Copy file from base archive
318                // Copy file contents in a single operation
319                let mut contents = Vec::new();
320                base_archive
321                    .by_name(&path)
322                    .map_err(|e| Error::ZipError(e.to_string()))?
323                    .read_to_end(&mut contents)
324                    .map_err(|e| Error::IoError(e.to_string()))?;
325
326                new_archive
327                    .start_file(&path, options)
328                    .map_err(|e| Error::ZipError(e.to_string()))?;
329                new_archive
330                    .write_all(&contents)
331                    .map_err(|e| Error::IoError(e.to_string()))?;
332                directories_to_create.extend(get_directories_of_file(&path));
333            }
334        }
335    }
336
337    for dir in directories_to_create {
338        trace!("creating directory {}", dir);
339
340        new_archive
341            .add_directory(dir, options)
342            .map_err(|e| Error::ZipError(e.to_string()))?;
343    }
344
345    // Finalize the ZIP file
346    new_archive
347        .finish()
348        .map_err(|e| Error::ZipError(e.to_string()))?;
349
350    info!("Successfully created patched archive: {}", path_after);
351    Ok(())
352}
353
354#[cfg(test)]
355mod tests {
356    use crate::patch::Patch;
357
358    use super::*;
359    use pretty_assertions::assert_eq;
360    use std::fs;
361    use std::io::{Read, Write};
362    use tempfile::TempDir;
363
364    use std::sync::Once;
365
366    static INIT: Once = Once::new();
367
368    fn setup_logger() {
369        INIT.call_once(|| {
370            unsafe {
371                std::env::set_var("RUST_LOG", "trace");
372            }
373            pretty_env_logger::init()
374        });
375    }
376
377    fn create_test_zip(files: &[(&str, Vec<u8>)]) -> Result<Vec<u8>, Error> {
378        let cursor = std::io::Cursor::new(Vec::new());
379        let mut zip = zip::ZipWriter::new(cursor);
380        let options =
381            zip::write::FileOptions::default().compression_method(zip::CompressionMethod::Stored);
382
383        for (name, contents) in files {
384            if *name == "" {
385                continue;
386            }
387            zip.start_file(*name, options)
388                .map_err(|e| Error::ZipError(e.to_string()))?;
389            zip.write_all(contents)
390                .map_err(|e| Error::IoError(e.to_string()))?;
391        }
392
393        Ok(zip
394            .finish()
395            .map_err(|e| Error::ZipError(e.to_string()))?
396            .into_inner())
397    }
398
399    #[test]
400    fn test_diff_and_apply_basic() -> Result<(), Error> {
401        setup_logger();
402
403        let temp_dir = TempDir::new().map_err(|e| Error::IoError(e.to_string()))?;
404
405        // Create before.zip with a single file
406        let before_zip = create_test_zip(&[("test.txt", b"Hello World".into())])?;
407        let before_path = temp_dir.path().join("before.zip");
408        fs::write(&before_path, before_zip).map_err(|e| Error::IoError(e.to_string()))?;
409
410        // Create after.zip with modified content
411        let after_zip = create_test_zip(&[("test.txt", b"Hello Modified World".into())])?;
412        let after_path = temp_dir.path().join("after.zip");
413        fs::write(&after_path, after_zip).map_err(|e| Error::IoError(e.to_string()))?;
414
415        // Generate diff
416        let patch_set = diff_zip(
417            before_path.to_string_lossy().to_string(),
418            after_path.to_string_lossy().to_string(),
419            DiffAlgorithm::Bidiff1,
420            CompressAlgorithm::None,
421        )?;
422
423        assert_eq!(patch_set.operations.0.len(), 1);
424        assert_eq!(
425            patch_set.operations.0[0].1,
426            Operation::Patch(Patch {
427                diff_algorithm: DiffAlgorithm::Bidiff1,
428                compress_algorithm: CompressAlgorithm::None,
429                before_hash: "b10a8db164e0754105b7a99be72e3fe5".to_string(),
430                after_hash: "77a55ec2b0808d5a1ef1173fcfce9763".to_string(),
431                patch: vec![
432                    223, 177, 0, 0, 0, 16, 0, 0, 6, 0, 0, 0, 0, 0, 0, 14, 77, 111, 100, 105, 102,
433                    105, 101, 100, 32, 87, 111, 114, 108, 100, 0,
434                ],
435            })
436        );
437
438        // Create output path for patched zip
439        let output_path = temp_dir.path().join("output.zip");
440
441        // Apply patch
442        apply_zip(
443            &before_path.to_string_lossy(),
444            patch_set,
445            output_path.to_string_lossy().to_string(),
446        )?;
447
448        // Verify the contents
449        let mut output_archive = zip::ZipArchive::new(std::io::Cursor::new(
450            fs::read(&output_path).map_err(|e| Error::IoError(e.to_string()))?,
451        ))
452        .map_err(|e| Error::ZipError(e.to_string()))?;
453
454        let mut file = output_archive
455            .by_name("test.txt")
456            .map_err(|e| Error::ZipError(e.to_string()))?;
457        let mut contents = Vec::new();
458        file.read_to_end(&mut contents)
459            .map_err(|e| Error::IoError(e.to_string()))?;
460
461        assert_eq!(contents, b"Hello Modified World");
462        Ok(())
463    }
464
465    #[test]
466    fn test_diff_and_apply_with_deletions() -> Result<(), Error> {
467        setup_logger();
468
469        let temp_dir = TempDir::new().map_err(|e| Error::IoError(e.to_string()))?;
470
471        // Create before.zip with multiple files
472        let before_zip = create_test_zip(&[
473            ("file1.txt", b"File 1 content".into()),
474            ("file2.txt", b"File 2 content".into()),
475        ])?;
476        let before_hash = hash(&before_zip);
477        let before_path = temp_dir.path().join("before.zip");
478        fs::write(&before_path, before_zip).map_err(|e| Error::IoError(e.to_string()))?;
479
480        // Create after.zip with one file deleted
481        let after_zip = create_test_zip(&[("file1.txt", b"File 1 content".into())])?;
482        let after_path = temp_dir.path().join("after.zip");
483        fs::write(&after_path, after_zip).map_err(|e| Error::IoError(e.to_string()))?;
484
485        // Generate and apply patch
486        let patch_set = diff_zip(
487            before_path.to_string_lossy().to_string(),
488            after_path.to_string_lossy().to_string(),
489            DiffAlgorithm::Bidiff1,
490            CompressAlgorithm::None,
491        )?;
492
493        assert_eq!(
494            patch_set,
495            PatchSet {
496                operations: Operations(vec![
497                    ("file1.txt".to_string(), Operation::FileStaysSame),
498                    ("file2.txt".to_string(), Operation::DeleteFile),
499                ]),
500                hash_before: before_hash,
501                operations_hash: "2a8a469ad35c75f628e7c1ebe37afbf0".to_string(),
502            }
503        );
504
505        let output_path = temp_dir.path().join("output.zip");
506        apply_zip(
507            &before_path.to_string_lossy(),
508            patch_set,
509            output_path.to_string_lossy().to_string(),
510        )?;
511
512        // Verify the contents
513        let mut output_archive = zip::ZipArchive::new(std::io::Cursor::new(
514            fs::read(&output_path).map_err(|e| Error::IoError(e.to_string()))?,
515        ))
516        .map_err(|e| Error::ZipError(e.to_string()))?;
517
518        assert_eq!(output_archive.len(), 1);
519        assert!(output_archive.by_name("file1.txt").is_ok());
520        assert!(output_archive.by_name("file2.txt").is_err());
521
522        Ok(())
523    }
524
525    #[test]
526    fn test_diff_and_apply_with_directories() -> Result<(), Error> {
527        setup_logger();
528
529        let temp_dir = TempDir::new().map_err(|e| Error::IoError(e.to_string()))?;
530
531        // Create before.zip with nested structure
532        let before_zip = create_test_zip(&[
533            ("dir1/", vec![]),
534            ("dir1/file1.txt", b"File 1".into()),
535            ("dir2/", vec![]),
536            ("dir2/file2.txt", b"File 2".into()),
537        ])?;
538        let before_hash = hash(&before_zip);
539        let before_path = temp_dir.path().join("before.zip");
540        fs::write(&before_path, before_zip).map_err(|e| Error::IoError(e.to_string()))?;
541
542        // Create after.zip with modified structure
543        let after_zip = create_test_zip(&[
544            ("dir1/", vec![]),
545            ("dir1/file1.txt", b"File 1 Modified".into()),
546            ("dir3/", vec![]),
547            ("dir3/file3.txt", b"File 3".into()),
548        ])?;
549        let after_path = temp_dir.path().join("after.zip");
550        fs::write(&after_path, after_zip).map_err(|e| Error::IoError(e.to_string()))?;
551
552        // Generate and apply patch
553        let patch_set = diff_zip(
554            before_path.to_string_lossy().to_string(),
555            after_path.to_string_lossy().to_string(),
556            DiffAlgorithm::Bidiff1,
557            CompressAlgorithm::None,
558        )?;
559
560        assert_eq!(
561            patch_set,
562            PatchSet {
563                operations: Operations(vec![
564                    (
565                        "dir1/file1.txt".to_string(),
566                        Operation::Patch(Patch {
567                            diff_algorithm: DiffAlgorithm::Bidiff1,
568                            compress_algorithm: CompressAlgorithm::None,
569                            before_hash: "2f03b03637bf162937793f756f0f1583".to_string(),
570                            after_hash: "15b8181404e3a6b2e046de781b702654".to_string(),
571                            patch: vec![
572                                223, 177, 0, 0, 0, 16, 0, 0, 6, 0, 0, 0, 0, 0, 0, 9, 32, 77, 111,
573                                100, 105, 102, 105, 101, 100, 0,
574                            ],
575                        }),
576                    ),
577                    ("dir2/".to_string(), Operation::DeleteFile),
578                    (
579                        "dir3/file3.txt".to_string(),
580                        Operation::PutFile(vec![70, 105, 108, 101, 32, 51]),
581                    ),
582                ]),
583                hash_before: before_hash,
584                operations_hash: "c52153314592d31ddfda9bbf6390a991".to_string(),
585            }
586        );
587
588        let output_path = temp_dir.path().join("output.zip");
589        apply_zip(
590            &before_path.to_string_lossy(),
591            patch_set,
592            output_path.to_string_lossy().to_string(),
593        )?;
594
595        // Verify the contents
596        let mut output_archive = zip::ZipArchive::new(std::io::Cursor::new(
597            fs::read(&output_path).map_err(|e| Error::IoError(e.to_string()))?,
598        ))
599        .map_err(|e| Error::ZipError(e.to_string()))?;
600
601        // Check dir1/file1.txt was modified
602        let mut file1_contents = Vec::new();
603        output_archive
604            .by_name("dir1/file1.txt")
605            .map_err(|e| Error::ZipError(e.to_string()))?
606            .read_to_end(&mut file1_contents)
607            .map_err(|e| Error::IoError(e.to_string()))?;
608        assert_eq!(file1_contents, b"File 1 Modified");
609
610        // Check dir2 was deleted
611        assert!(output_archive.by_name("dir2/file2.txt").is_err());
612
613        // Check dir3 was added
614        let mut file3_contents = Vec::new();
615        output_archive
616            .by_name("dir3/file3.txt")
617            .map_err(|e| Error::ZipError(e.to_string()))?
618            .read_to_end(&mut file3_contents)
619            .map_err(|e| Error::IoError(e.to_string()))?;
620        assert_eq!(file3_contents, b"File 3");
621
622        Ok(())
623    }
624
625    #[test]
626    fn test_complex_roundtrip_diff_and_apply() -> Result<(), Error> {
627        setup_logger();
628        let temp_dir = TempDir::new().map_err(|e| Error::IoError(e.to_string()))?;
629
630        // Initial state (version 1)
631        let v1_files = vec![
632            ("root1.txt", b"Root file 1".into()),
633            ("root2.txt", b"Root file 2".into()),
634            ("parent1/", vec![]),
635            ("parent1/file1.txt", b"Parent 1 file".into()),
636            ("parent1/child1/", vec![]),
637            ("parent1/child1/deep1.txt", b"Deep file 1".into()),
638            ("parent1/child1/deep2.txt", b"Deep file 2".into()),
639            ("parent2/", vec![]),
640            ("parent2/file2.txt", b"Parent 2 file".into()),
641            ("parent2/child2/", vec![]),
642            ("parent2/child2/deep3.txt", b"Deep file 3".into()),
643        ];
644        let v1_zip = create_test_zip(&v1_files)?;
645        let v1_hash = hash(&v1_zip);
646        let v1_path = temp_dir.path().join("v1.zip");
647        fs::write(&v1_path, v1_zip).map_err(|e| Error::IoError(e.to_string()))?;
648
649        // Version 2: modify some files, add new ones, delete some
650        let v2_files = vec![
651            ("root1.txt", b"Root file 1 modified".into()), // modified
652            // root2.txt deleted
653            ("parent1/", vec![]),
654            ("parent1/file1.txt", b"Parent 1 file modified".into()), // modified
655            ("parent1/child1/", vec![]),
656            ("parent1/child1/deep1.txt", b"Deep file 1".into()), // unchanged
657            // deep2.txt deleted
658            ("parent1/child1/deep3.txt", b"New deep file".into()), // added
659            ("parent2/", vec![]),
660            ("parent2/file2.txt", b"Parent 2 file".into()), // unchanged
661            ("parent2/child2/", vec![]),
662            ("parent2/child2/deep3.txt", b"Deep file 3 modified".into()), // modified
663            ("parent3/", vec![]),                                         // new directory
664            ("parent3/newfile.txt", b"Brand new file".into()),
665        ];
666        let v2_zip = create_test_zip(&v2_files)?;
667        let v2_hash = hash(&v2_zip);
668        let v2_path = temp_dir.path().join("v2.zip");
669        fs::write(&v2_path, v2_zip).map_err(|e| Error::IoError(e.to_string()))?;
670
671        // Version 3: more changes
672        let v3_files = vec![
673            ("root1.txt", b"Root file 1 modified again".into()), // modified again
674            ("parent1/", vec![]),
675            ("parent1/file1.txt", b"Parent 1 file modified".into()), // unchanged
676            ("parent1/child1/", vec![]),
677            // deep1.txt deleted
678            ("parent1/child1/deep3.txt", b"New deep file modified".into()), /* modified */
679            // parent2 directory completely deleted
680            ("parent3/", vec![]),
681            ("parent3/newfile.txt", b"Brand new file modified".into()), // modified
682            ("parent3/another.txt", b"Another new file".into()),        // added
683        ];
684        let v3_zip = create_test_zip(&v3_files)?;
685        let v3_path = temp_dir.path().join("v3.zip");
686        fs::write(&v3_path, v3_zip).map_err(|e| Error::IoError(e.to_string()))?;
687
688        // First roundtrip: v1 -> v2
689        let patch_v1_to_v2 = diff_zip(
690            v1_path.to_string_lossy().to_string(),
691            v2_path.to_string_lossy().to_string(),
692            DiffAlgorithm::Bidiff1,
693            CompressAlgorithm::None,
694        )?;
695
696        assert_eq!(
697            patch_v1_to_v2,
698            PatchSet {
699                operations: Operations(vec![
700                    // Root directory changes
701                    (
702                        "root1.txt".to_string(),
703                        Operation::Patch(Patch {
704                            diff_algorithm: DiffAlgorithm::Bidiff1,
705                            compress_algorithm: CompressAlgorithm::None,
706                            before_hash: "f675e8894edcf33ae7097dcc4bfb89f9".to_string(),
707                            after_hash: "3468f9d6535a07b35c8acb8aa6aac781".to_string(),
708                            patch: vec![
709                                223, 177, 0, 0, 0, 16, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
710                                9, 32, 109, 111, 100, 105, 102, 105, 101, 100, 0,
711                            ],
712                        })
713                    ),
714                    ("root2.txt".to_string(), Operation::DeleteFile),
715                    (
716                        "parent1/file1.txt".to_string(),
717                        Operation::Patch(Patch {
718                            diff_algorithm: DiffAlgorithm::Bidiff1,
719                            compress_algorithm: CompressAlgorithm::None,
720                            before_hash: "a138a74adecabef6294b55d2b28d3ea1".to_string(),
721                            after_hash: "710d2bbb6df79b88d7b75bdefdcf28aa".to_string(),
722                            patch: vec![
723                                223, 177, 0, 0, 0, 16, 0, 0, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
724                                0, 0, 9, 32, 109, 111, 100, 105, 102, 105, 101, 100, 0,
725                            ],
726                        })
727                    ),
728                    (
729                        "parent1/child1/deep1.txt".to_string(),
730                        Operation::FileStaysSame
731                    ),
732                    (
733                        "parent1/child1/deep2.txt".to_string(),
734                        Operation::DeleteFile
735                    ),
736                    (
737                        "parent1/child1/deep3.txt".to_string(),
738                        Operation::PutFile(b"New deep file".to_vec())
739                    ),
740                    // parent2/ directory changes
741                    ("parent2/file2.txt".to_string(), Operation::FileStaysSame),
742                    (
743                        "parent2/child2/deep3.txt".to_string(),
744                        Operation::Patch(Patch {
745                            diff_algorithm: DiffAlgorithm::Bidiff1,
746                            compress_algorithm: CompressAlgorithm::None,
747                            before_hash: "15bf70eee30b1805ab0e11510d30b41e".to_string(),
748                            after_hash: "804237ac129569f027a2b55f8cf8d7db".to_string(),
749                            patch: vec![
750                                223, 177, 0, 0, 0, 16, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
751                                9, 32, 109, 111, 100, 105, 102, 105, 101, 100, 0,
752                            ],
753                        })
754                    ),
755                    (
756                        "parent3/newfile.txt".to_string(),
757                        Operation::PutFile(b"Brand new file".to_vec())
758                    ),
759                ]),
760                hash_before: v1_hash,
761                operations_hash: "caf887830891091723fe5ada783f48b6".to_string(),
762            }
763        );
764
765        let v2_patched_path = temp_dir.path().join("v2_patched.zip");
766        apply_zip(
767            &v1_path.to_string_lossy(),
768            patch_v1_to_v2,
769            v2_patched_path.to_string_lossy().to_string(),
770        )?;
771
772        // Verify v2_patched matches v2
773        let v2_original = fs::read(&v2_path).map_err(|e| Error::IoError(e.to_string()))?;
774        let v2_patched = fs::read(&v2_patched_path).map_err(|e| Error::IoError(e.to_string()))?;
775        verify_archives_match(&v2_original, &v2_patched)?;
776
777        // Second roundtrip: v2 -> v3
778        let patch_v2_to_v3 = diff_zip(
779            v2_path.to_string_lossy().to_string(),
780            v3_path.to_string_lossy().to_string(),
781            DiffAlgorithm::Bidiff1,
782            CompressAlgorithm::None,
783        )?;
784
785        assert_eq!(
786            patch_v2_to_v3,
787            PatchSet {
788                operations: Operations(vec![
789                    (
790                        "root1.txt".to_string(),
791                        Operation::Patch(Patch {
792                            diff_algorithm: DiffAlgorithm::Bidiff1,
793                            compress_algorithm: CompressAlgorithm::None,
794                            before_hash: "3468f9d6535a07b35c8acb8aa6aac781".to_string(),
795                            after_hash: "2ad3c7437786d6625776f0583bc3d6b2".to_string(),
796                            patch: vec![
797                                223, 177, 0, 0, 0, 16, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
798                                0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 32, 97, 103, 97, 105, 110, 0
799                            ],
800                        })
801                    ),
802                    ("parent1/file1.txt".to_string(), Operation::FileStaysSame),
803                    (
804                        "parent1/child1/deep1.txt".to_string(),
805                        Operation::DeleteFile
806                    ),
807                    (
808                        "parent1/child1/deep3.txt".to_string(),
809                        Operation::Patch(Patch {
810                            diff_algorithm: DiffAlgorithm::Bidiff1,
811                            compress_algorithm: CompressAlgorithm::None,
812                            before_hash: "eb60615cbd4f6c8befc5dc7b387e77b9".to_string(),
813                            after_hash: "ad96d84598d4994a819489d1762967e3".to_string(),
814                            patch: vec![
815                                223, 177, 0, 0, 0, 16, 0, 0, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
816                                0, 0, 9, 32, 109, 111, 100, 105, 102, 105, 101, 100, 0
817                            ]
818                        })
819                    ),
820                    ("parent2/".to_string(), Operation::DeleteFile),
821                    (
822                        "parent3/newfile.txt".to_string(),
823                        Operation::Patch(Patch {
824                            diff_algorithm: DiffAlgorithm::Bidiff1,
825                            compress_algorithm: CompressAlgorithm::None,
826                            before_hash: "98de949196bc048ff94069ea5e1c4446".to_string(),
827                            after_hash: "0afd1f99b76a45e02719a43715c7071b".to_string(),
828                            patch: vec![
829                                223, 177, 0, 0, 0, 16, 0, 0, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
830                                0, 0, 0, 9, 32, 109, 111, 100, 105, 102, 105, 101, 100, 0
831                            ]
832                        })
833                    ),
834                    (
835                        "parent3/another.txt".to_string(),
836                        Operation::PutFile(vec![
837                            65, 110, 111, 116, 104, 101, 114, 32, 110, 101, 119, 32, 102, 105, 108,
838                            101
839                        ])
840                    )
841                ]),
842                hash_before: v2_hash,
843                operations_hash: "772e8078384f8a99cda819d2d3807864".to_string(),
844            }
845        );
846
847        let v3_patched_path = temp_dir.path().join("v3_patched.zip");
848        apply_zip(
849            &v2_path.to_string_lossy(),
850            patch_v2_to_v3,
851            v3_patched_path.to_string_lossy().to_string(),
852        )?;
853
854        // Verify v3_patched matches v3
855        let v3_original = fs::read(&v3_path).map_err(|e| Error::IoError(e.to_string()))?;
856        let v3_patched = fs::read(&v3_patched_path).map_err(|e| Error::IoError(e.to_string()))?;
857        verify_archives_match(&v3_original, &v3_patched)?;
858
859        Ok(())
860    }
861
862    // Helper function to verify two ZIP archives have identical contents
863    fn verify_archives_match(data1: &[u8], data2: &[u8]) -> Result<(), Error> {
864        let mut archive1 = zip::ZipArchive::new(std::io::Cursor::new(data1))
865            .map_err(|e| Error::ZipError(e.to_string()))?;
866        let mut archive2 = zip::ZipArchive::new(std::io::Cursor::new(data2))
867            .map_err(|e| Error::ZipError(e.to_string()))?;
868
869        if archive1.len() != archive2.len() {
870            return Err(Error::ZipError(
871                "Archives have different number of files".to_string(),
872            ));
873        }
874
875        for i in 0..archive1.len() {
876            let mut file1 = archive1
877                .by_index(i)
878                .map_err(|e| Error::ZipError(e.to_string()))?;
879            let file1_name = file1.name().to_string();
880
881            let mut file2 = match archive2.by_name(file1.name()) {
882                Ok(file) => file,
883                Err(_) => {
884                    return Err(Error::ZipError(format!(
885                        "File {} not found in second archive",
886                        file1_name
887                    )));
888                }
889            };
890
891            if file1.is_dir() != file2.is_dir() {
892                return Err(Error::ZipError(format!(
893                    "Directory status mismatch for {}",
894                    file1_name
895                )));
896            }
897
898            if !file1.is_dir() {
899                let mut contents1 = Vec::new();
900                let mut contents2 = Vec::new();
901                file1
902                    .read_to_end(&mut contents1)
903                    .map_err(|e| Error::IoError(e.to_string()))?;
904                file2
905                    .read_to_end(&mut contents2)
906                    .map_err(|e| Error::IoError(e.to_string()))?;
907
908                if contents1 != contents2 {
909                    return Err(Error::ZipError(format!(
910                        "Contents mismatch for file {}",
911                        file1_name
912                    )));
913                }
914            }
915        }
916
917        Ok(())
918    }
919
920    #[test]
921    fn test_realword_archive_diff() -> Result<(), Error> {
922        setup_logger();
923
924        let before = "/home/eli/darkwing/tests/data/lot-of-datadirs/24132775.datadir.zip.before";
925        let after = "/home/eli/darkwing/tests/data/lot-of-datadirs/24132775.datadir.zip.after";
926
927        let diff = diff_zip(
928            before.to_string(),
929            after.to_string(),
930            DiffAlgorithm::Rsync020,
931            CompressAlgorithm::None,
932        )?;
933
934        debug!("total size: {}", diff.get_size());
935
936        for op in diff.operations.0 {
937            debug!("file: {:?}", op.0);
938        }
939
940        Ok(())
941    }
942}