files_diff/
patch.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
use rkyv::Archive;

use crate::{Error, compress::CompressAlgorithm, hash};

/// Algorithms available for generating binary diffs.
///
/// Each algorithm offers different tradeoffs between patch size, generation
/// speed, and application speed.
///
/// # Example
/// ```rust
/// use files_diff::{diff, DiffAlgorithm, CompressAlgorithm};
///
/// // Use rsync for fast diffing of similar files
/// let rsync_patch = diff(
///     b"original",
///     b"modified",
///     DiffAlgorithm::Rsync020,
///     CompressAlgorithm::None
/// )?;
///
/// // Use bidiff for potentially smaller patches
/// let bidiff_patch = diff(
///     b"original",
///     b"modified",
///     DiffAlgorithm::Bidiff1,
///     CompressAlgorithm::Zstd
/// )?;
/// # Ok::<(), files_diff::Error>(())
/// ```
#[derive(
  Archive,
  rkyv::Deserialize,
  rkyv::Serialize,
  Debug,
  PartialEq,
  Clone,
  Copy,
  Eq,
  Hash,
)]
#[rkyv(derive(Debug, PartialEq))]
pub enum DiffAlgorithm {
  /// Fast-rsync algorithm version 0.2.0.
  /// Optimized for files that are mostly similar.
  Rsync020,

  /// Bidirectional diff algorithm version 1.
  /// May produce smaller patches for very different files.
  Bidiff1,
}

impl std::fmt::Display for DiffAlgorithm {
  fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
    write!(f, "{:?}", self)
  }
}

/// A patch that can transform one file into another.
///
/// Contains all the information needed to verify and apply a patch,
/// including source and target file hashes for integrity validation.
///
/// # Example
/// ```rust
/// use files_diff::{diff, apply, DiffAlgorithm, CompressAlgorithm};
///
/// let source = b"original content";
/// let target = b"modified content";
///
/// // Generate a patch
/// let patch = diff(
///     source,
///     target,
///     DiffAlgorithm::Rsync020,
///     CompressAlgorithm::Zstd
/// )?;
///
/// // Verify source hash matches
/// assert_eq!(files_diff::hash(source), patch.before_hash);
///
/// // Apply patch and verify result
/// let result = apply(source, &patch)?;
/// assert_eq!(files_diff::hash(&result), patch.after_hash);
/// # Ok::<(), files_diff::Error>(())
/// ```
#[derive(Archive, rkyv::Deserialize, rkyv::Serialize, Debug, PartialEq)]
#[rkyv(derive(Debug))]
pub struct Patch {
  /// Algorithm used to generate this patch
  pub diff_algorithm: DiffAlgorithm,
  /// Compression method used for the patch data
  pub compress_algorithm: CompressAlgorithm,
  /// MD5 hash of the source file
  pub before_hash: String,
  /// MD5 hash of the target file
  pub after_hash: String,
  /// The actual patch data
  pub patch: Vec<u8>,
}

impl Patch {
  /// Returns the total size in bytes of this patch.
  pub fn get_size(&self) -> usize {
    self.patch.len()
      + self.before_hash.len()
      + self.after_hash.len()
      + std::mem::size_of::<CompressAlgorithm>()
      + std::mem::size_of::<DiffAlgorithm>()
  }

  /// Serializes this patch to a byte vector.
  pub fn to_bytes(&self) -> Result<Vec<u8>, Error> {
    Ok(
      rkyv::to_bytes::<rkyv::rancor::Error>(self)
        .map_err(Error::SerializeError)?
        .to_vec(),
    )
  }

  /// Deserializes a patch from a byte vector.
  pub fn from_bytes(bytes: &[u8]) -> Result<Self, Error> {
    rkyv::from_bytes::<_, rkyv::rancor::Error>(bytes)
      .map_err(Error::DeserializeError)
  }
}

/// Type alias for filenames in patch sets
pub type Filename = String;

/// Operations that can be performed on a file in a patch set.
///
/// Used primarily for zip archive diffing to track changes to individual files
/// within the archive.
///
/// # Example
/// ```no_run
/// use files_diff::{diff_zip, DiffAlgorithm, CompressAlgorithm};
///
/// let patch_set = diff_zip(
///     "before.zip".to_string(),
///     "after.zip".to_string(),
///     DiffAlgorithm::Rsync020,
///     CompressAlgorithm::Zstd
/// )?;
///
/// # Ok::<(), files_diff::Error>(())
/// ```
#[derive(Archive, rkyv::Deserialize, rkyv::Serialize, Debug, PartialEq)]
#[rkyv(derive(Debug))]
pub enum Operation {
  /// File was modified - contains patch to transform it
  Patch(Patch),
  /// File is new or completely different - contains full file contents
  PutFile(Vec<u8>),
  /// File was removed in the target
  DeleteFile,
  /// File is identical in source and target
  FileStaysSame,
}

impl Operation {
  /// Returns the size in bytes of this operation's data.
  pub fn get_size(&self) -> usize {
    match self {
      Operation::Patch(patch) => patch.get_size(),
      Operation::PutFile(file) => file.len(),
      Operation::DeleteFile => 0,
      Operation::FileStaysSame => 0,
    }
  }
}

#[derive(Archive, rkyv::Deserialize, rkyv::Serialize, Debug, PartialEq)]
#[rkyv(derive(Debug))]
pub struct Operations(pub(crate) Vec<(Filename, Operation)>);

impl Operations {
  pub(crate) fn to_bytes(&self) -> Result<Vec<u8>, Error> {
    Ok(
      rkyv::to_bytes::<rkyv::rancor::Error>(self)
        .map_err(Error::SerializeError)?
        .to_vec(),
    )
  }

  pub(crate) fn hash(&self) -> Result<String, Error> {
    Ok(hash(&self.to_bytes()?))
  }
}

/// A collection of file operations that transform one archive into another.
///
/// Contains all the operations needed to transform a zip archive into a
/// target version, tracking changes to individual files within the archive.
///
/// # Example
/// ```no_run
/// use files_diff::{diff_zip, apply_zip, DiffAlgorithm, CompressAlgorithm};
///
/// // Generate patches for all files in the zip
/// let patch_set = diff_zip(
///     "source.zip".to_string(),
///     "target.zip".to_string(),
///     DiffAlgorithm::Rsync020,
///     CompressAlgorithm::Zstd
/// )?;
///
/// // Apply all patches to transform the zip
/// apply_zip("source.zip", patch_set, "result.zip".to_string())?;
/// # Ok::<(), files_diff::Error>(())
/// ```
#[derive(Archive, rkyv::Deserialize, rkyv::Serialize, Debug, PartialEq)]
#[rkyv(derive(Debug))]
pub struct PatchSet {
  /// The operations that transform the source zip into the target zip
  pub operations: Operations,
  /// The hash of the source zip
  pub hash_before: String,
  /// The hash of the operations
  pub operations_hash: String,
}

impl PatchSet {
  /// Returns the total size in bytes of all operations in this patch set.
  pub fn get_size(&self) -> usize {
    self
      .operations
      .0
      .iter()
      .map(|(filename, op)| filename.len() + op.get_size())
      .sum::<usize>()
      + self.hash_before.len()
      + self.operations_hash.len()
  }

  /// Serializes this patch set to a byte vector.
  pub fn to_bytes(&self) -> Result<Vec<u8>, Error> {
    Ok(
      rkyv::to_bytes::<rkyv::rancor::Error>(self)
        .map_err(Error::SerializeError)?
        .to_vec(),
    )
  }

  /// Deserializes a patch set from a byte vector.
  pub fn from_bytes(bytes: &[u8]) -> Result<Self, Error> {
    rkyv::from_bytes::<_, rkyv::rancor::Error>(bytes)
      .map_err(Error::DeserializeError)
  }
}