files_diff/
lib.rs

1// TODO: set timeouts for patch (in DiffMachine), do pass reader from file for
2// zips. do fuzz tests and proptest regressions.
3// test for changing compress and diff algos
4
5//! A library for generating and applying binary diffs between files and
6//! archives.
7//!
8//! This library provides efficient differing algorithms to generate patches
9//! between binary files or zip archives. It supports multiple differing
10//! algorithms (rsync and bidiff) and compression methods to optimize patch
11//! sizes.
12//!
13//! # Basic Usage
14//! ```rust
15//! use files_diff::{diff, apply, DEFAULT_ALGO};
16//!
17//! let before = b"hello world";
18//! let after = b"hello darkness my old friend";
19//!
20//! // Generate a patch using default algorithms
21//! let (diff_algo, compress_algo) = DEFAULT_ALGO;
22//! let patch = diff(before, after, diff_algo, compress_algo)?;
23//!
24//! // Apply the patch to recreate the target file
25//! let result = apply(before, &patch)?;
26//! assert_eq!(&result, after);
27//! # Ok::<(), files_diff::Error>(())
28//! ```
29//!
30//! # Advanced Usage
31//! ```no_run
32//! use files_diff::{diff_zip, apply_zip, DiffAlgorithm, CompressAlgorithm};
33//!
34//! // For zip archives, use the specialized zip functions
35//! let patch_set = diff_zip(
36//!     "before.zip".to_string(),
37//!     "after.zip".to_string(),
38//!     DiffAlgorithm::Bidiff1,
39//!     CompressAlgorithm::Zstd
40//! )?;
41//!
42//! // Apply patches to transform the original zip
43//! apply_zip("before.zip", patch_set, "result.zip".to_string())?;
44//! # Ok::<(), files_diff::Error>(())
45//! ```
46//!
47//! The library uses [fast-rsync] for the rsync algorithm and [bidiff] for the
48//! bidiff algorithm. Each patch includes hash validation to ensure data
49//! integrity during the patching process.
50//!
51//! [fast-rsync]: https://github.com/dropbox/fast-rsync
52//! [bidiff]: https://github.com/divvun/bidiff
53
54#![forbid(clippy::unwrap_used)]
55#![forbid(clippy::expect_used)]
56#![warn(missing_docs)]
57
58mod bd;
59mod compress;
60mod error;
61mod patch;
62mod rsync;
63mod zip;
64
65use bd::BidiffDiffMachine;
66pub use compress::CompressAlgorithm;
67pub use error::Error;
68pub use patch::{DiffAlgorithm, Patch, PatchSet};
69use rsync::RsyncDiffMachine;
70pub use zip::{apply_zip, diff_zip};
71
72/// The default diff and compression algorithm.
73pub const DEFAULT_ALGO: (DiffAlgorithm, CompressAlgorithm) =
74    (DiffAlgorithm::Rsync020, CompressAlgorithm::None);
75
76/// A trait that implements differing and patching operations.
77///
78/// This trait defines the core operations needed to generate and apply patches
79/// between binary data. Implementations provide specific differing algorithms.
80///
81/// # Example
82/// ```no_run
83/// use files_diff::{DiffMachine, CompressAlgorithm, Error, Patch};
84/// struct MyDiffMachine;
85///
86/// impl DiffMachine for MyDiffMachine {
87///     fn diff(before: &[u8], after: &[u8], compress: CompressAlgorithm) -> Result<Patch, Error> {
88///         // Implementation details...
89///         # todo!()
90///     }
91///     
92///     fn apply(base: &[u8], patch: &Patch) -> Result<Vec<u8>, Error> {
93///         // Implementation details...
94///         # todo!()
95///     }
96/// }
97/// ```
98pub trait DiffMachine {
99    /// Diff two byte slices using the given compression algorithm.
100    fn diff(
101        before: &[u8],
102        after: &[u8],
103        compress_algorithm: CompressAlgorithm,
104    ) -> Result<Patch, Error>;
105
106    /// Apply a patch to a byte slice.
107    fn apply(base: &[u8], delta: &Patch) -> Result<Vec<u8>, Error>;
108}
109
110/// Generates an MD5 hash of the provided data as a hexadecimal string.
111///
112/// This function is used internally for patch validation to ensure data
113/// integrity.
114///
115/// # Example
116/// ```rust
117/// use files_diff::hash;
118///
119/// let data = b"Hello, world!";
120/// let hash_str = hash(data);
121/// assert_eq!(hash_str.len(), 32); // MD5 hash is always 32 hex chars
122/// ```
123pub fn hash(data: &[u8]) -> String {
124    let hash = md5::compute(data);
125    hex::encode(hash.0)
126}
127
128/// Generates a patch between two byte slices using specified algorithms.
129///
130/// Creates a patch that can transform the `before` data into the `after` data,
131/// using the specified diff and compression algorithms.
132///
133/// # Example
134/// ```rust
135/// use files_diff::{diff, DiffAlgorithm, CompressAlgorithm};
136///
137/// let before = b"original data";
138/// let after = b"modified data";
139///
140/// let patch = diff(
141///     before,
142///     after,
143///     DiffAlgorithm::Rsync020,
144///     CompressAlgorithm::Zstd
145/// )?;
146/// # Ok::<(), files_diff::Error>(())
147/// ```
148pub fn diff(
149    before: &[u8],
150    after: &[u8],
151    diff_algorithm: DiffAlgorithm,
152    compress_algorithm: CompressAlgorithm,
153) -> Result<Patch, Error> {
154    match diff_algorithm {
155        DiffAlgorithm::Rsync020 => RsyncDiffMachine::diff(before, after, compress_algorithm),
156        DiffAlgorithm::Bidiff1 => BidiffDiffMachine::diff(before, after, compress_algorithm),
157    }
158}
159
160/// Applies a patch to transform the base data.
161///
162/// Takes a patch generated by `diff()` and applies it to the base data
163/// to recreate the target data.
164///
165/// # Example
166/// ```rust
167/// use files_diff::{diff, apply, DEFAULT_ALGO};
168///
169/// let base = b"original data";
170/// let target = b"modified data";
171///
172/// let (diff_algo, compress_algo) = DEFAULT_ALGO;
173/// let patch = diff(base, target, diff_algo, compress_algo)?;
174/// let result = apply(base, &patch)?;
175///
176/// assert_eq!(result, target);
177/// # Ok::<(), files_diff::Error>(())
178/// ```
179pub fn apply(base: &[u8], delta: &Patch) -> Result<Vec<u8>, Error> {
180    match delta.diff_algorithm {
181        DiffAlgorithm::Rsync020 => RsyncDiffMachine::apply(base, delta),
182        DiffAlgorithm::Bidiff1 => BidiffDiffMachine::apply(base, delta),
183    }
184}
185
186#[cfg(test)]
187mod tests {
188    use super::*;
189
190    #[test]
191    fn test_diff() {
192        let before = b"hello world";
193        let after = b"hello darkness my old friend";
194
195        let patch_rsync = diff(
196            before,
197            after,
198            DiffAlgorithm::Rsync020,
199            CompressAlgorithm::None,
200        )
201        .expect("failed to diff with rsync");
202
203        let after_rsync = apply(before, &patch_rsync).expect("failed to apply with rsync");
204        assert_eq!(after_rsync, after);
205
206        let patch_bidiff = diff(
207            before,
208            after,
209            DiffAlgorithm::Bidiff1,
210            CompressAlgorithm::Zstd,
211        )
212        .expect("failed to diff with bidiff");
213
214        let after_bidiff = apply(before, &patch_bidiff).expect("failed to apply");
215        assert_eq!(after_bidiff, after);
216    }
217}