files_diff/
lib.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
// TODO: set timeouts for patch (in DiffMachine), do pass reader from file for
// zips. do fuzz tests and proptest regressions.
// test for changing compress and diff algos

//! A library for generating and applying binary diffs between files and
//! archives.
//!
//! This library provides efficient diffing algorithms to generate patches
//! between binary files or zip archives. It supports multiple diffing
//! algorithms (rsync and bidiff) and compression methods to optimize patch
//! sizes.
//!
//! # Basic Usage
//! ```rust
//! use files_diff::{diff, apply, DEFAULT_ALGO};
//!
//! let before = b"hello world";
//! let after = b"hello darkness my old friend";
//!
//! // Generate a patch using default algorithms
//! let (diff_algo, compress_algo) = DEFAULT_ALGO;
//! let patch = diff(before, after, diff_algo, compress_algo)?;
//!
//! // Apply the patch to recreate the target file
//! let result = apply(before, &patch)?;
//! assert_eq!(&result, after);
//! # Ok::<(), files_diff::Error>(())
//! ```
//!
//! # Advanced Usage
//! ```no_run
//! use files_diff::{diff_zip, apply_zip, DiffAlgorithm, CompressAlgorithm};
//!
//! // For zip archives, use the specialized zip functions
//! let patch_set = diff_zip(
//!     "before.zip".to_string(),
//!     "after.zip".to_string(),
//!     DiffAlgorithm::Bidiff1,
//!     CompressAlgorithm::Zstd
//! )?;
//!
//! // Apply patches to transform the original zip
//! apply_zip("before.zip", patch_set, "result.zip".to_string())?;
//! # Ok::<(), files_diff::Error>(())
//! ```
//!
//! The library uses [fast-rsync] for the rsync algorithm and [bidiff] for the
//! bidiff algorithm. Each patch includes hash validation to ensure data
//! integrity during the patching process.
//!
//! [fast-rsync]: https://github.com/dropbox/fast-rsync
//! [bidiff]: https://github.com/divvun/bidiff

#![forbid(clippy::unwrap_used)]
#![forbid(clippy::expect_used)]
#![warn(missing_docs)]

mod bd;
mod compress;
mod error;
mod patch;
mod rsync;
mod zip;

use bd::BidiffDiffMachine;
pub use compress::CompressAlgorithm;
pub use error::Error;
pub use patch::{DiffAlgorithm, Patch, PatchSet};
use rsync::RsyncDiffMachine;
pub use zip::{apply_zip, diff_zip};

/// The default diff and compression algorithm.
pub const DEFAULT_ALGO: (DiffAlgorithm, CompressAlgorithm) =
  (DiffAlgorithm::Rsync020, CompressAlgorithm::None);

/// A trait that implements diffing and patching operations.
///
/// This trait defines the core operations needed to generate and apply patches
/// between binary data. Implementations provide specific diffing algorithms.
///
/// # Example
/// ```no_run
/// use files_diff::{DiffMachine, CompressAlgorithm, Error, Patch};
/// struct MyDiffMachine;
///
/// impl DiffMachine for MyDiffMachine {
///     fn diff(before: &[u8], after: &[u8], compress: CompressAlgorithm) -> Result<Patch, Error> {
///         // Implementation details...
///         # todo!()
///     }
///     
///     fn apply(base: &[u8], patch: &Patch) -> Result<Vec<u8>, Error> {
///         // Implementation details...
///         # todo!()
///     }
/// }
/// ```
pub trait DiffMachine {
  /// Diff two byte slices using the given compression algorithm.
  fn diff(
    before: &[u8],
    after: &[u8],
    compress_algorithm: CompressAlgorithm,
  ) -> Result<Patch, Error>;

  /// Apply a patch to a byte slice.
  fn apply(base: &[u8], delta: &Patch) -> Result<Vec<u8>, Error>;
}

/// Generates an MD5 hash of the provided data as a hexadecimal string.
///
/// This function is used internally for patch validation to ensure data
/// integrity.
///
/// # Example
/// ```rust
/// use files_diff::hash;
///
/// let data = b"Hello, world!";
/// let hash_str = hash(data);
/// assert_eq!(hash_str.len(), 32); // MD5 hash is always 32 hex chars
/// ```
pub fn hash(data: &[u8]) -> String {
  let hash = md5::compute(data);
  hex::encode(hash.0)
}

/// Generates a patch between two byte slices using specified algorithms.
///
/// Creates a patch that can transform the `before` data into the `after` data,
/// using the specified diffing and compression algorithms.
///
/// # Example
/// ```rust
/// use files_diff::{diff, DiffAlgorithm, CompressAlgorithm};
///
/// let before = b"original data";
/// let after = b"modified data";
///
/// let patch = diff(
///     before,
///     after,
///     DiffAlgorithm::Rsync020,
///     CompressAlgorithm::Zstd
/// )?;
/// # Ok::<(), files_diff::Error>(())
/// ```
pub fn diff(
  before: &[u8],
  after: &[u8],
  diff_algorithm: DiffAlgorithm,
  compress_algorithm: CompressAlgorithm,
) -> Result<Patch, Error> {
  match diff_algorithm {
    DiffAlgorithm::Rsync020 => {
      RsyncDiffMachine::diff(before, after, compress_algorithm)
    }
    DiffAlgorithm::Bidiff1 => {
      BidiffDiffMachine::diff(before, after, compress_algorithm)
    }
  }
}

/// Applies a patch to transform the base data.
///
/// Takes a patch generated by `diff()` and applies it to the base data
/// to recreate the target data.
///
/// # Example
/// ```rust
/// use files_diff::{diff, apply, DEFAULT_ALGO};
///
/// let base = b"original data";
/// let target = b"modified data";
///
/// let (diff_algo, compress_algo) = DEFAULT_ALGO;
/// let patch = diff(base, target, diff_algo, compress_algo)?;
/// let result = apply(base, &patch)?;
///
/// assert_eq!(result, target);
/// # Ok::<(), files_diff::Error>(())
/// ```
pub fn apply(base: &[u8], delta: &Patch) -> Result<Vec<u8>, Error> {
  match delta.diff_algorithm {
    DiffAlgorithm::Rsync020 => RsyncDiffMachine::apply(base, delta),
    DiffAlgorithm::Bidiff1 => BidiffDiffMachine::apply(base, delta),
  }
}

#[cfg(test)]
mod tests {
  use super::*;

  #[test]
  fn test_diff() {
    let before = b"hello world";
    let after = b"hello darkness my old friend";

    let patch_rsync = diff(
      before,
      after,
      DiffAlgorithm::Rsync020,
      CompressAlgorithm::None,
    )
    .expect("failed to diff with rsync");

    let after_rsync =
      apply(before, &patch_rsync).expect("failed to apply with rsync");
    assert_eq!(after_rsync, after);

    let patch_bidiff = diff(
      before,
      after,
      DiffAlgorithm::Bidiff1,
      CompressAlgorithm::Zstd,
    )
    .expect("failed to diff with bidiff");

    let after_bidiff = apply(before, &patch_bidiff).expect("failed to apply");
    assert_eq!(after_bidiff, after);
  }
}