rdiff/
lib.rs

1//! Finds the difference between sequential versions of files.
2//!
3//! Based on the rsync algorithm.
4//! The `BlockHashes` struct will find the differences between versions of the same file.
5//! It does this through the [`diff_and_update()`](struct.BlockHashes.html#method.diff_and_update) method.
6//!
7//! # Example
8//!
9//! ```
10//! use std::io::Cursor;
11//! use rdiff::BlockHashes;
12//!
13//! let mut hash = BlockHashes::new(Cursor::new("The initial version"), 8).unwrap();
14//! let diffs = hash.diff_and_update(Cursor::new("The next version")).unwrap();
15//! println!("Diffs: {:?}", diffs);
16//! // Outputs "Diffs: Diff{inserts: [Insert(0, The next vers)], deletes:[Delete(13, 16)]}"
17//! ```
18//!
19//! This crate also contains methods relating to finding the differences between two strings, in the [string_diff](string_diff/index.html) module.
20//! These methods can be used to refine the course differences found through the rsync method.
21
22#![deny(missing_docs)]
23extern crate crypto;
24extern crate byteorder;
25#[macro_use]
26extern crate log;
27
28mod window;
29mod hashing;
30pub mod string_diff;
31
32use std::collections::HashMap;
33use std::fs::File;
34use std::io::{self, Read, Write, Seek, SeekFrom};
35use std::slice::Iter;
36use std::fmt;
37use std::mem;
38use std::string::FromUtf8Error;
39
40use byteorder::{NetworkEndian, ByteOrder};
41
42/// Used for calculating and re-calculating the differences between two versions of the same file
43///
44/// See the [module level documentation](index.html) for examples on how to use this
45#[derive(Debug, PartialEq)]
46pub struct BlockHashes {
47    hashes: HashMap<u32, Vec<(usize, [u8; 16])>>,
48    block_size: usize,
49    file_size: usize
50}
51
52/// Represents an operation to insert bytes at a particular position into a file
53#[derive(PartialEq)]
54pub struct Insert {
55    position: usize,
56    data: Vec<u8>
57}
58
59/// Represents an operation to delete a certain number of bytes at a particular position in a file
60#[derive(PartialEq)]
61pub struct Delete {
62    position: usize,
63    len: usize
64}
65
66/// Represents a series of operations that were performed on a file to transform it into a new
67/// version.
68///
69/// The operations are stored in file order, which means that every operation that affects
70/// an earlier part of the file must be stored before an operation that affects a later part.
71/// The diff also assumes that insert operations are performed prior to delete operations.
72#[derive(Debug, PartialEq)]
73pub struct Diff {
74    inserts: Vec<Insert>,
75    deletes: Vec<Delete>
76}
77
78/// A sliding window over a reader.  This monatins an internal buffer read from the file,
79/// which can be read from at any time.
80struct Window<R: Read> {
81    front: Vec<u8>,
82    back: Vec<u8>,
83    block_size: usize,
84    offset: usize,
85    bytes_read: usize,
86    reader: R
87}
88
89impl Diff {
90    /// Creates a new `Diff`
91    #[inline]
92    pub fn new() -> Diff {
93        Diff {
94            inserts: Vec::new(),
95            deletes: Vec::new()
96        }
97    }
98
99    /// Adds an insert operation into this diff.  The operation must occur after
100    /// all previously added insert operations in file order.  If the operation
101    /// can be merged with the previous operation, then it is.
102    ///
103    /// Consumes the data that is passed in
104    fn add_insert(&mut self, position: usize, mut data: Vec<u8>) {
105        if let Some(tail) = self.inserts.last_mut() {
106            if tail.position + tail.data.len() == position {
107                tail.data.append(&mut data);
108                return;
109            }
110        }
111        self.inserts.push(Insert {
112            position: position,
113            data: data
114        });
115    }
116
117    // Adds an delete operation into this diff.  The operation must occur after
118    /// all previously added insert and delete operations in file order.  If the operation
119    /// can be merged with the previous operation, then it is.
120    fn add_delete(&mut self, position: usize, len: usize) {
121        if let Some(tail) = self.deletes.last_mut() {
122            if tail.position  == position {
123                tail.len += len;
124                return;
125            }
126        }
127        self.deletes.push(Delete {
128            position: position,
129            len: len
130        });
131    }
132
133    /// Gets an iterator over all insert operations
134    pub fn inserts(&self) -> Iter<Insert> {
135        self.inserts.iter()
136    }
137
138    /// Gets an iterator over all delete operations
139    pub fn deletes(&self) -> Iter<Delete> {
140        self.deletes.iter()
141    }
142
143    /// Checks if this set of diffs has any actual content
144    pub fn is_empty(&self) -> bool {
145        self.deletes.is_empty() && self.inserts.is_empty()
146    }
147
148    /// Applies all of the operations in the diff to the given string.
149    /// Gives an error if the resulting string can't be represented by utf8.
150    ///
151    /// # Panics
152    /// When the operations refer to positions that are not represented by the string.
153    pub fn apply_to_string(&self, string: &str) -> Result<String, FromUtf8Error> {
154        let mut old_bytes = string.bytes();
155        let mut new_bytes = Vec::new();
156        let mut index = 0;
157        for insert in self.inserts() {
158            while index < insert.position {
159                new_bytes.push(old_bytes.next().unwrap().clone());
160                index += 1;
161            }
162            new_bytes.append(&mut insert.data.clone());
163            index += insert.data.len();
164        }
165        while let Some(byte) = old_bytes.next() {
166            new_bytes.push(byte);
167        }
168        let old_bytes = mem::replace(&mut new_bytes, Vec::new());
169        let mut  old_bytes = old_bytes.into_iter();
170        index = 0;
171        for delete in self.deletes() {
172            while index < delete.position {
173                new_bytes.push(old_bytes.next().unwrap());
174                index += 1;
175            }
176            for _ in 0..delete.len {
177                old_bytes.next();
178            }
179        }
180        while let Some(byte) = old_bytes.next() {
181            new_bytes.push(byte);
182        }
183        String::from_utf8(new_bytes)
184    }
185
186    /// Apply the operations in this sequence to a file.  This should not be called until after
187    /// the sequence has been integrated via [`Engine::integrate_remote`](struct.Engine.html#method.integrate_remote)
188    /// The file must have been opened on both read and write mode (see [OpenOptions](https://doc.rust-lang.org/nightly/std/fs/struct.OpenOptions.html)).
189    pub fn apply(&self, file: &mut File) -> io::Result<()> {
190        let mut new_bytes = Vec::new();
191        try!(file.seek(SeekFrom::Start(0)));
192        let mut old_bytes = file.try_clone().unwrap().bytes();
193        let mut index = 0;
194        for insert in self.inserts.iter() {
195            while index < insert.position {
196                new_bytes.push(try!(old_bytes.next().unwrap()).clone());
197                index += 1;
198            }
199            new_bytes.extend_from_slice(&insert.data[..]);
200            index += insert.data.len();
201        }
202        while let Some(byte) = old_bytes.next() {
203            new_bytes.push(try!(byte));
204        }
205        let old_bytes = mem::replace(&mut new_bytes, Vec::new());
206        let mut old_bytes = old_bytes.into_iter();
207        index = 0;
208        for delete in self.deletes.iter() {
209            while index < delete.position {
210                new_bytes.push(old_bytes.next().unwrap());
211                index += 1;
212            }
213            for _ in 0..delete.len {
214                old_bytes.next();
215            }
216        }
217        while let Some(byte) = old_bytes.next() {
218            new_bytes.push(byte);
219        }
220
221        try!(file.seek(SeekFrom::Start(0)));
222        try!(file.set_len(new_bytes.len() as u64));
223        file.write_all(new_bytes.as_slice())
224    }
225
226    /// Compress this diff and write to `writer`.  The output can then be expanded
227    /// back into an equivilent Diff using `expand_from()`
228    pub fn compress_to<W: Write>(&self, writer: &mut W) -> io::Result<()> {
229
230        let mut int_buf = [0;4];
231        NetworkEndian::write_u32(&mut int_buf, self.inserts.len() as u32);
232        try!(writer.write(&mut int_buf));
233        for insert in self.inserts.iter() {
234            try!(insert.compress_to(writer));
235        }
236        NetworkEndian::write_u32(&mut int_buf, self.deletes.len() as u32);
237        try!(writer.write(&mut int_buf));
238        for delete in self.deletes.iter() {
239            try!(delete.compress_to(writer));
240        }
241        Ok(())
242    }
243
244    /// Expand this diff from previously compressed data in `reader`.  The data in reader
245    /// should have been written using `compress_to()`
246    pub fn expand_from<R: Read>(reader: &mut R) -> io::Result<Diff> {
247        let mut int_buf = [0;4];
248
249        trace!("Reading insert length");
250        try!(reader.read_exact(&mut int_buf));
251        let insert_len = NetworkEndian::read_u32(&int_buf);
252        trace!("Insert length was: {}", insert_len);
253        let inserts = (0..insert_len).map(|_|Insert::expand_from(reader).unwrap()).collect();
254        trace!("Read inserts");
255        trace!("Reading delete length");
256        try!(reader.read_exact(&mut int_buf));
257        let delete_len = NetworkEndian::read_u32(&int_buf);
258        trace!("Delete length was: {}", delete_len);
259        let deletes = (0..delete_len).map(|_|Delete::expand_from(reader).unwrap()).collect();
260        trace!("Read deletes");
261        Ok(Diff {
262            inserts: inserts,
263            deletes: deletes
264        })
265    }
266}
267
268impl fmt::Debug for Insert {
269    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
270        write!(fmt, "Insert({}, '{}')", self.position, String::from_utf8_lossy(&self.data).replace('\r', "").replace('\n', "\\n"))
271    }
272}
273
274impl fmt::Debug for Delete {
275    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
276        write!(fmt, "Delete({}, {})", self.position, self.len)
277    }
278}
279
280impl Insert {
281    /// Gets the byte position of this insert operation in its file
282    #[inline]
283    pub fn get_position(&self) -> usize {
284        self.position
285    }
286
287    /// Gets the data this insert operation will insert
288    #[inline]
289    pub fn get_data(&self) -> &Vec<u8> {
290        &self.data
291    }
292
293    /// Compress this operation and write to `writer`.  The output can then be expanded
294    /// back into an equivilent operation using `expand_from()`
295    pub fn compress_to<W: Write>(&self, writer: &mut W) -> io::Result<()> {
296
297        let mut int_buf = [0;4];
298        NetworkEndian::write_u32(&mut int_buf, self.position as u32);
299        try!(writer.write(&int_buf));
300        NetworkEndian::write_u32(&mut int_buf, self.data.len() as u32);
301        try!(writer.write(&int_buf));
302        try!(writer.write(&self.data));
303        Ok(())
304    }
305
306    /// Expand this operation from previously compressed data in `reader`.  The data in reader
307    /// should have been written using `compress_to()`
308    pub fn expand_from<R: Read>(reader: &mut R) -> io::Result<Insert> {
309        let mut int_buf = [0;4];
310        try!(reader.read_exact(&mut int_buf));
311        let position = NetworkEndian::read_u32(&int_buf);
312        try!(reader.read_exact(&mut int_buf));
313        let data_len = NetworkEndian::read_u32(&int_buf) as usize;
314        let mut data = Vec::with_capacity(data_len);
315        data.resize(data_len, 0);
316        try!(reader.read_exact(&mut data));
317        Ok(Insert{
318            position: position as usize,
319            data: data
320        })
321    }
322
323}
324
325impl Delete {
326    /// Gets the byte position of this delete operation in its file
327    #[inline]
328    pub fn get_position(&self) -> usize {
329        self.position
330    }
331
332    /// Gets the length in bytes of this delete operation
333    #[inline]
334    pub fn get_length(&self) -> usize {
335        self.len
336    }
337
338    /// Compress this operation and write to `writer`.  The output can then be expanded
339    /// back into an equivilent operation using `expand_from()`
340    pub fn compress_to<W: Write>(&self, writer: &mut W) -> io::Result<()> {
341
342        let mut int_buf = [0;4];
343        NetworkEndian::write_u32(&mut int_buf, self.position as u32);
344        try!(writer.write(&int_buf));
345        NetworkEndian::write_u32(&mut int_buf, self.len as u32);
346        try!(writer.write(&int_buf));
347        Ok(())
348    }
349
350    /// Expand this operation from previously compressed data in `reader`.  The data in reader
351    /// should have been written using `compress_to()`
352    pub fn expand_from<R: Read>(reader: &mut R) -> io::Result<Delete> {
353        let mut int_buf = [0;4];
354        try!(reader.read_exact(&mut int_buf));
355        let position = NetworkEndian::read_u32(&int_buf);
356        try!(reader.read_exact(&mut int_buf));
357        let len = NetworkEndian::read_u32(&int_buf);
358        Ok(Delete{
359            position: position as usize,
360            len: len as usize,
361        })
362    }
363
364}
365
366#[cfg(test)]
367mod test {
368    use super::Diff;
369
370
371
372
373    #[test]
374    fn applying_diff_to_string() {
375        let string = "Mr. and Mrs. Dursley, of number four, Privet Drive, were proud to say that they were perfectly normal, thank you very much. They were the last people you'd expect to be involved in anything strange or mysterious, because they just didn't hold with such nonsense.";
376        let mut diff = Diff::new();
377        diff.add_insert(2, vec![115]); // 's'
378        diff.add_insert(37, vec![116, 121]); //'ty'
379        diff.add_insert(98, vec![97, 98]); // ab
380        diff.add_insert(253, vec![109]); // m
381        diff.add_delete(35, 1); // 'u'
382        diff.add_delete(181, 34);
383        diff.add_delete(219, 1);
384        let result = diff.apply_to_string(string).unwrap();
385        assert_eq!(result, "Mrs. and Mrs. Dursley, of number forty, Privet Drive, were proud to say that they were perfectly abnormal, thank you very much. They were the last people you'd expect to be involved, because they just didn't hold with much nonsense.".to_string());
386    }
387}