1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
/*
 * Copyright 2018 Jordan Miner
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 *
 */

use std::fmt::{self, Debug};
use std::io::{self, BufReader, BufWriter, Read, Seek, SeekFrom, Write};

use blake2_rfc::blake2b::Blake2b;
use byteorder::{BE, ReadBytesExt, WriteBytesExt};

use delta_generator::DeltaGenerator;
use rolling_checksum::RollingChecksum;
use signature_lookup::SignatureLookup;

mod delta_generator;
mod rolling_checksum;
mod signature_lookup;

// TODO: move ProgressReader to its own crate

///  A `Read` adaptor that calls a callback every so often as data is read from it so that the
///  progress of an operation can be reported.
pub struct ProgressReader<R: Read> {
    inner: R,
    total_len: u64,
    read_len: u64,
    // The number of bytes to read between calls to the callback. Precalculated to avoid a division
    // every read call.
    progress_step: u64,
    // The value of `read_len` the last time the callback was called
    last_read_len: u64,
    callback: Box<dyn FnMut(f32)>,
}

impl<R: Read> ProgressReader<R> {
    /// Creates a new `ProgressReader`. The `total_len` is the total number of bytes in the
    /// operation and the `callback` is the function to be called after every (currently) 1% has
    /// been read.
    pub fn new(inner: R, total_len: u64, callback: Box<dyn FnMut(f32)>) -> Self {
        ProgressReader {
            inner,
            total_len,
            read_len: 0,
            progress_step: total_len / 100,
            last_read_len: 0,
            callback,
        }
    }

    /// Returns the number of bytes that have been read so far.
    pub fn read_len(&self) -> u64 {
        self.read_len
    }
}

impl<R: Read> Read for ProgressReader<R> {
    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
        // Call the callback first because now the previously read data has been processed.
        if self.read_len - self.last_read_len >= self.progress_step {
            (self.callback)(self.read_len as f32 / self.total_len as f32);
            self.last_read_len = self.read_len;
        }

        let res = self.inner.read(buf);
        if let Ok(len) = res {
            self.read_len += len as u64;
        }
        res
    }
}

#[derive(Eq, PartialEq)]
struct HexPrinter<'a>(&'a [u8]);

impl<'a> Debug for HexPrinter<'a> {
    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
        write!(f, "[")?;
        let mut first = true;
        for b in self.0 {
            if !first {
                write!(f, " ")?;
            }
            write!(f, "{:02X}", b)?;
            first = false;
        }
        write!(f, "]")?;
        Ok(())
    }
}


// It should be fine to truncate the strong hash to 128 bits. Even 128 bits is overkill. I don't
// know why they used 256. If you had a 2 GB file (using 2KB blocks) containing 1,000,000 blocks and
// you synced 1 billion such files, there is a 99.999999999999999+% chance that there would never be
// a collision.
//
// 1 - e^(-1000000^2/(2*2^128)) = 1.47 * 10^-27
// (1.47 * 10^-27) ^ 1000000000 = 99.999999+%

//const SIG_MAGIC_MD4: u32 = 0x72730136;
const SIG_MAGIC_BLAKE2: u32 = 0x72730137;
const DELTA_MAGIC: u32 = 0x72730236;

//const MD4_SUM_SIZE: u32 = 16;
const BLAKE2_SUM_SIZE: u32 = 32;

#[derive(Debug)]
pub enum Error {
    UnsupportedSignatureFormat(u32),
    UnsupportedDeltaFormat(u32),
    UnsupportedDeltaCommand(u8),
    Io(io::Error),
}

impl From<io::Error> for Error {
    fn from(err: io::Error) -> Self {
        Error::Io(err)
    }
}

/// Calculates the signature file from an old version of a file using the default block size
/// (currently 2048) and strong sum size (currently 32).
pub fn calculate_signature<R: Read, W: Write>(old_file: R, sig_file: W) -> io::Result<()> {
    calculate_signature_with_sizes(old_file, sig_file, 2048, None)
}

/// Calculates the signature file from an old version of a file.
///
/// The signature file contains a weak sum and strong sum (currently Blake2b) for each block in the
/// old file.
///
/// Using a smaller `block_size` results in more blocks and a larger signature file. However, with
/// randomly distributed differences, it will also make it more likely that matching blocks can be
/// found when generating the delta file, producing a smaller delta file.
///
/// Using a smaller `strong_sum_size` results in a smaller signature file, but if it is small enough
/// that it results in a collision, then an invalid delta file could be produced that results in a
/// corrupt new file. If `None`, a default of 32 is used. Using 16 is fine, but you should check the
/// odds of a collision before using smaller than that.
pub fn calculate_signature_with_sizes<R: Read, W: Write>(
    mut old_file: R,
    sig_file: W,
    block_size: u32,
    strong_sum_size: Option<u32>
) -> io::Result<()> {
    let strong_sum_size = strong_sum_size.unwrap_or(BLAKE2_SUM_SIZE);
    debug_assert!(strong_sum_size >= 1 && strong_sum_size <= BLAKE2_SUM_SIZE);

    // No BufReader for old_file because we're already buffering it to a block size below.
    let mut sig_file = BufWriter::new(sig_file);

    sig_file.write_u32::<BE>(SIG_MAGIC_BLAKE2)?;
    sig_file.write_u32::<BE>(block_size)?;
    sig_file.write_u32::<BE>(strong_sum_size)?;

    let block_size = block_size as usize;
    let strong_sum_size = strong_sum_size as usize;

    let mut buffer = vec![0u8; block_size];
    let mut buffer_end = 0;
    loop {
        let num_read = old_file.read(&mut buffer[buffer_end..block_size])?;
        buffer_end += num_read;
        if num_read == 0 || buffer_end == block_size {
            let mut rolling_checksum = RollingChecksum::new();
            rolling_checksum.add_slice(&buffer[0..buffer_end]);
            sig_file.write_u32::<BE>(rolling_checksum.get())?;

            // Setting the Blake2b digest size changes the hash, so it has to be 32 for
            // compatibility with librsync.
            let mut hash = Blake2b::new(32);
            hash.update(&buffer[0..buffer_end]);
            let res = hash.finalize();
            sig_file.write(&res.as_bytes()[0..strong_sum_size])?;

            buffer_end = 0;
        }
        if num_read == 0 {
            break;
        }
    }

    Ok(())
}

/// Calculates the delta file from the new file, referencing the signature file.
pub fn calculate_delta<SR: Read, NR: Read, W: Write>(
    sig_file: SR,
    new_file: NR,
    delta_file: W
) -> Result<(), Error> {
    let sig_lookup = SignatureLookup::new(sig_file)?;

    let delta_gen = DeltaGenerator::new(delta_file, sig_lookup)?;
    delta_gen.generate(new_file)?;

    Ok(())
}

/// Creates the new file from the old file and the delta file.
pub fn apply_delta<OR: Read + Seek, DR: Read, W: Write>(
    mut old_file: OR,
    delta_file: DR,
    new_file: W
) -> Result<(), Error> {
    let mut delta_file = BufReader::new(delta_file);
    let mut new_file = BufWriter::new(new_file);

    fn read_uint<DR: Read>(mut reader: DR, size_class: u8) -> io::Result<u64> {
        Ok(match size_class {
            0 => reader.read_u8()? as u64,
            1 => reader.read_u16::<BE>()? as u64,
            2 => reader.read_u32::<BE>()? as u64,
            3 => reader.read_u64::<BE>()?,
            _ => panic!(),
        })
    };

    let magic = delta_file.read_u32::<BE>()?;
    if magic != DELTA_MAGIC {
        return Err(Error::UnsupportedDeltaFormat(magic));
    }
    loop {
        let cmd = delta_file.read_u8()?;
        match cmd {
            0 => break,
            1..=64 => { // literal
                let literal_len = cmd as u64;

                let mut take = delta_file.take(literal_len);
                io::copy(&mut take, &mut new_file)?;
                delta_file = take.into_inner();
            }
            65..=68 => { // literal
                let len_size_class = cmd - 65;
                let literal_len = read_uint(&mut delta_file, len_size_class)?;

                let mut take = delta_file.take(literal_len);
                io::copy(&mut take, &mut new_file)?;
                delta_file = take.into_inner();
            }
            69..=84 => { // copy
                let cmd_offset = cmd - 69;
                let start_size_class = cmd_offset / 4;
                let len_size_class = cmd_offset % 4;
                let copy_start = read_uint(&mut delta_file, start_size_class)?;
                let copy_len = read_uint(&mut delta_file, len_size_class)?;

                old_file.seek(SeekFrom::Start(copy_start))?;
                let mut take = old_file.take(copy_len);
                io::copy(&mut take, &mut new_file)?;
                old_file = take.into_inner();
            }
            _ => {
                return Err(Error::UnsupportedDeltaCommand(cmd));
            }
        }
    }

    Ok(())
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::io::Cursor;

    #[test]
    fn test_small_signature() {
        let old_file: &[u8] = &[
            // "Red car"
            0x52, 0x65, 0x64, 0x20, 0x63, 0x61, 0x72
        ];
        let mut sig_file = vec![];
        calculate_signature_with_sizes(old_file, &mut sig_file, 2, Some(8)).unwrap();
        assert_eq!(sig_file, &[
            // Copied from rdiff's output.
            0x72, 0x73, 0x01, 0x37, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x08, // header
            0x01, 0x66, 0x00, 0xF5, 0xAF, 0x6B, 0x83, 0x63, 0xC7, 0x6D, 0x47, 0x51, // block 0
            0x01, 0x45, 0x00, 0xC2, 0x84, 0x10, 0xCD, 0x9D, 0xB3, 0x5F, 0x5F, 0x04, // block 1
            0x01, 0x84, 0x01, 0x02, 0xE4, 0xCF, 0x4F, 0xC9, 0xCE, 0xB1, 0x3D, 0xB0, // block 2
            0x00, 0x91, 0x00, 0x91, 0xCE, 0xA4, 0x07, 0xCA, 0x10, 0x6D, 0xCF, 0x64, // block 3
        ][..]);
    }

    #[test]
    fn test_signature_lookup() {
        let sig_file: &[u8] = &[
            // Copied from the test above.
            0x72, 0x73, 0x01, 0x37, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x08, // header
            0x01, 0x66, 0x00, 0xF5, 0xAF, 0x6B, 0x83, 0x63, 0xC7, 0x6D, 0x47, 0x51, // block 0
            0x01, 0x45, 0x00, 0xC2, 0x84, 0x10, 0xCD, 0x9D, 0xB3, 0x5F, 0x5F, 0x04, // block 1
            0x01, 0x84, 0x01, 0x02, 0xE4, 0xCF, 0x4F, 0xC9, 0xCE, 0xB1, 0x3D, 0xB0, // block 2
            0x00, 0x91, 0x00, 0x91, 0xCE, 0xA4, 0x07, 0xCA, 0x10, 0x6D, 0xCF, 0x64, // block 3
        ];

        let sig_lookup = SignatureLookup::new(sig_file).expect("failed to create SignatureLookup");

        let blocks0 = sig_lookup.get_blocks(0x016600F5);
        assert!(blocks0.is_some());
        let blocks0 = blocks0.unwrap();
        assert_eq!(blocks0.len(), 1);
        assert_eq!(blocks0[0].strong_sum, &[
            0xAF, 0x6B, 0x83, 0x63, 0xC7, 0x6D, 0x47, 0x51
        ]);

        let blocks3 = sig_lookup.get_blocks(0x00910091);
        assert!(blocks3.is_some());
        let blocks3 = blocks3.unwrap();
        assert_eq!(blocks3.len(), 1);
        assert_eq!(blocks3[0].strong_sum, &[
            0xCE, 0xA4, 0x07, 0xCA, 0x10, 0x6D, 0xCF, 0x64
        ]);
    }

    #[test]
    fn test_small_delta_gen() {
        let old_file: &[u8] = &[
            // "car ice "
            0x63, 0x61, 0x72, 0x20, 0x69, 0x63, 0x65, 0x20
        ];
        let new_file: &[u8] = &[
            // "mug nice jog car try lip "
            0x6D, 0x75, 0x67, 0x20,
            0x6E, 0x69, 0x63, 0x65, 0x20, // "nice "
            0x6A, 0x6F, 0x67, 0x20, // "jog "
            0x63, 0x61, 0x72, 0x20,
            0x74, 0x72, 0x79, 0x20, // "try "
            0x6C, 0x69, 0x70, 0x20
        ];
        let mut sig_file = vec![];
        calculate_signature_with_sizes(old_file, &mut sig_file, 4, Some(8)).unwrap();
        let mut delta_file = vec![];
        calculate_delta(&sig_file[..], new_file, &mut delta_file).unwrap();
        assert_eq!(HexPrinter(&delta_file[..]), HexPrinter(&[
            0x72, 0x73, 0x02, 0x36, // header
            0x05, 0x6D, 0x75, 0x67, 0x20, 0x6E, // literal cmd "mug n"
            0x45, 0x04, 0x04, // copy cmd for block 1 "ice "
            0x04, 0x6A, 0x6F, 0x67, 0x20, // literal cmd "jog "
            0x45, 0x00, 0x04, // copy cmd for block 0 "car "
            0x08, 0x74, 0x72, 0x79, 0x20, 0x6C, 0x69, 0x70, 0x20, // literal cmd "try lip "
            0x00, // end of file
        ]));
    }

    #[test]
    fn test_applying_delta() {
        // This is the same data as the above test.
        let old_file: &[u8] = &[
            // "car ice "
            0x63, 0x61, 0x72, 0x20, 0x69, 0x63, 0x65, 0x20
        ];
        let delta_file: &[u8] = &[
            0x72, 0x73, 0x02, 0x36, // header
            0x05, 0x6D, 0x75, 0x67, 0x20, 0x6E, // literal cmd "mug n"
            0x45, 0x04, 0x04, // copy cmd for block 1 "ice "
            0x04, 0x6A, 0x6F, 0x67, 0x20, // literal cmd "jog "
            0x45, 0x00, 0x04, // copy cmd for block 0 "car "
            0x08, 0x74, 0x72, 0x79, 0x20, 0x6C, 0x69, 0x70, 0x20, // literal cmd "try lip "
            0x00, // end of file
        ];
        let mut new_file = vec![];
        apply_delta(Cursor::new(old_file), delta_file, &mut new_file).unwrap();
        assert_eq!(HexPrinter(&new_file[..]), HexPrinter(&[
            // "mug nice jog car try lip "
            0x6D, 0x75, 0x67, 0x20,
            0x6E, 0x69, 0x63, 0x65, 0x20, // "nice "
            0x6A, 0x6F, 0x67, 0x20, // "jog "
            0x63, 0x61, 0x72, 0x20,
            0x74, 0x72, 0x79, 0x20, // "try "
            0x6C, 0x69, 0x70, 0x20
        ]));
    }
}