backpak 0.3.0

A content-addressed backup system with deduplication and compression
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
//! Build, read, and write compressed packs of [blobs](blob::Blob),
//! suitable for storing in a [backend]
//!
//! A pack file contains:
//! 1. Magic bytes
//! 2. A zstd-compressed stream of all blobs in the file
//! 3. A *separate* zstd stream of the CBOR-encoded manifest.
//!    Each manifest entry contains its blob's type, length, and ID.
//! 4. A 32-bit, big-endian manifest length.
//!
//! Compressing the manifest separately and ending with its length makes it trivial to read
//! without having to decompress or read any blobs first.
//!
//! The hash of the manifest is the pack's ID, since it uniquely describes the file.
//! At the end of a backup, each pack's manifest is stored in an index.
//! This means future readers don't need to reference the manifest unless rebuilding an index
//! or verifying the pack.

use std::fs::File;
use std::io::prelude::*;
use std::io::{self, SeekFrom};
use std::sync::{
    atomic::{AtomicU64, Ordering},
    mpsc::{Receiver, SyncSender},
};

use anyhow::{Context, Result, ensure};
use byte_unit::Byte;
use serde_derive::{Deserialize, Serialize};
use tempfile::NamedTempFile;
use tracing::*;

use crate::backend;
use crate::blob::{self, Blob};
use crate::file_util::{self, nice_size};
use crate::hashing::{HashingReader, ObjectId};
use crate::progress::AtomicCountWrite;
use crate::tree;

pub const MAGIC_BYTES: &[u8] = b"MKBAKPAK1";

/// The desired size of [crate::pack] files
pub const DEFAULT_PACK_SIZE: Byte = Byte::from_u64(100_000_000); // 100 MB

#[derive(Debug, Copy, Clone, Eq, PartialEq, Serialize, Deserialize)]
pub struct PackManifestEntry {
    #[serde(rename = "type")]
    pub blob_type: blob::Type,
    pub length: u32,
    pub id: ObjectId,
}

pub type PackManifest = Vec<PackManifestEntry>;

#[derive(Debug, Clone)]
pub struct PackMetadata {
    pub id: ObjectId,
    pub manifest: PackManifest,
}

/// Serializes a pack's manifest and get its ID.
///
/// A pack file is identified by the hash of its (uncompressed) manifest.
fn serialize_and_hash(manifest: &[PackManifestEntry]) -> Result<(Vec<u8>, ObjectId)> {
    let mut manifest_cbor = Vec::new();
    ciborium::into_writer(&manifest, &mut manifest_cbor)?;
    let id = ObjectId::hash(&manifest_cbor);

    Ok((manifest_cbor, id))
}

/// Packs blobs received from the given channel.
/// Returns the number of bytes packed
pub fn pack(
    target_size: Byte,
    rx: Receiver<Blob>,
    to_index: SyncSender<PackMetadata>,
    to_upload: SyncSender<(String, File)>,
    total_bytes_packed: &AtomicU64,
    total_bytes_compressed: &AtomicU64,
) -> Result<()> {
    let target_size = target_size.as_u64();
    let mut writer = PackfileWriter::new(total_bytes_compressed)?;

    let mut pass_bytes_written: u64 = 0; // Bytes written since the last size check
    let mut bytes_in_pack: u64 = 0;
    let mut bytes_before_next_check = target_size;

    // For each blob...
    while let Ok(blob) = rx.recv() {
        // Each blob arriving here is assumed to be unique per backup run,
        // i.e., the backup/prune/copy main thread handles deduplication.
        // Handling it here seems better at first (DRY) but would only complicate things:
        //
        // 1. `backup` prints out the path of each chunk and whether it was deduped.
        //    To do that here we'd have to pass each blob's path (or a closure?)
        //    through the rx channel.
        //
        // 2. This set of "already backed up blobs" is prepopulated,
        //    and in different ways for different commands.
        //    `backup` and `copy` just use all blobs in the master index.
        //    `prune` uses all blobs we *don't need to repack*.
        //    We'd also have to pass that here, and then...
        //
        // 3. We're running two instances of this function in two threads,
        //    one for trees and one for chunks.
        //    They'd either have to share the set via a mutex,
        //    or we'd have to write code here to figure out which packer we are and
        //    split the set accordingly.
        //    Please no! All the code so far is agnostic to which packer it is.
        //
        // We don't check this invariant here to avoid extra O(N) RAM usage,
        // where N is every single blob in the backup run.

        // Write a blob and check how many (uncompressed) bytes we've written to the file so far.
        let blob_size = writer.write_blob(blob)?;
        pass_bytes_written += blob_size;
        bytes_in_pack += blob_size;
        total_bytes_packed.fetch_add(blob_size, Ordering::Relaxed);

        // If we exceed our target size, stop.
        // We'll probably overshoot since this isn't flushing the Zstd buffer,
        // but it's quick and keeps us from running away.
        let mut end_pack = false;
        if writer.check_size()? >= target_size {
            let compressed_size = writer.flush_and_check_size()?;
            trace!(
                "Compressed pack is {} (>= target of {}). Starting next pack.",
                nice_size(compressed_size),
                nice_size(target_size)
            );
            end_pack = true;
        }
        // We've written as many bytes as we want the pack size to to be,
        // but we don't know how much they've compressed to.
        // Flush the compressor to see how much space we've actually taken up.
        else if pass_bytes_written >= bytes_before_next_check {
            trace!(
                "Wrote {} bytes into pack, checking compressed size...",
                nice_size(bytes_in_pack)
            );

            let compressed_size = writer.flush_and_check_size()?;

            // If we pass our target size, stop
            if compressed_size >= target_size {
                end_pack = true;
                trace!(
                    "Compressed pack is {} (>= target of {}). Starting next pack.",
                    nice_size(compressed_size),
                    nice_size(target_size)
                );
            }
            // Otherwise, write some more
            else {
                // Take our current compression ratio to estimate how much more
                // we need to write to hit the target pack size.
                let current_ratio = bytes_in_pack as f64 / compressed_size as f64;
                bytes_before_next_check =
                    (current_ratio * (target_size - compressed_size) as f64) as u64;
                pass_bytes_written = 0;
                trace!(
                    "Compressed pack is {} (ratio {:.02}). Writing {} before next flush",
                    nice_size(compressed_size),
                    current_ratio,
                    nice_size(bytes_before_next_check)
                );
            }
        }

        if end_pack {
            let (metadata, persisted) = writer.finalize()?;
            let finalized_path = format!("{}.pack", metadata.id);

            to_upload
                .send((finalized_path, persisted))
                .context("packer -> uploader channel exited early")?;
            to_index
                .send(metadata)
                .context("packer -> indexer channel exited early")?;

            writer = PackfileWriter::new(total_bytes_compressed)?;
            pass_bytes_written = 0;
            bytes_in_pack = 0;
            bytes_before_next_check = target_size;
        }
    }
    if bytes_in_pack > 0 {
        let (metadata, persisted) = writer.finalize()?;
        let finalized_path = format!("{}.pack", metadata.id);
        to_upload
            .send((finalized_path, persisted))
            .context("packer -> uploader channel exited early")?;
        to_index
            .send(metadata)
            .context("packer -> indexer channel exited early")?;
    }
    Ok(())
}

type ZstdEncoder<W> = zstd::stream::write::Encoder<'static, W>;
type ZstdDecoder<R> = zstd::stream::read::Decoder<'static, R>;

struct PackfileWriter<'a> {
    writer: ZstdEncoder<AtomicCountWrite<'a, NamedTempFile>>,
    manifest: PackManifest,
}

// TODO: Obviously this should all take place in a configurable temp directory

impl<'a> PackfileWriter<'a> {
    fn new(byte_count: &'a AtomicU64) -> Result<Self> {
        let mut fh = tempfile::Builder::new()
            .prefix("temp-backpak-")
            .suffix(".pack")
            .tempfile_in(".")
            .context("Couldn't open temporary packfile for writing")?;

        fh.write_all(MAGIC_BYTES)?;
        let acw = AtomicCountWrite::new(fh, byte_count);
        let mut zstd = ZstdEncoder::new(acw, 0)?;
        zstd.multithread(num_cpus::get_physical() as u32)?;
        Ok(Self {
            writer: zstd,
            manifest: Vec::new(),
        })
    }

    /// Write the given file chunk or tree to the packfile and add it to the manifest.
    fn write_blob(&mut self, blob: Blob) -> io::Result<u64> {
        let blob_bytes: &[u8] = blob.bytes();

        let blob_length = blob_bytes.len();
        assert!(blob_length <= u32::MAX as usize);

        self.writer.write_all(blob_bytes)?;
        self.manifest.push(PackManifestEntry {
            blob_type: blob.kind,
            length: blob_length as u32,
            id: blob.id,
        });
        Ok(blob_length as u64)
    }

    /// Flush the compressor and check the size of the packfile so far.
    ///
    /// **Warning:** Doing this too frequently hurts the compression ratio, at least a little.
    fn flush_and_check_size(&mut self) -> Result<u64> {
        self.writer.flush()?;
        let pos = self.writer.get_ref().get_ref().stream_position()?;
        Ok(pos)
    }

    /// Check the size of the underlying compressed file *without* flushing.
    ///
    /// Doesn't account for whatever data is in the Zstd buffer,
    /// but doesn't change compression ratios either.
    fn check_size(&self) -> Result<u64> {
        let pos = self.writer.get_ref().get_ref().stream_position()?;
        Ok(pos)
    }

    /// Finalize the packfile, returning the manifest & ID with a handle to
    /// the persisted file (so that the uploader doesn't have to reopen it).
    fn finalize(self) -> Result<(PackMetadata, File)> {
        let (manifest, id) = serialize_and_hash(&self.manifest)?;

        // Finish the compression stream for blobs and trees.
        // We'll compress the manifest separately so we can decompress it
        // without reading everything before it.
        let mut fh: NamedTempFile = self.writer.finish()?.into_inner();

        // The manifest CBOR will have lots of redundant data - compress it down.
        // TODO: Is multithreading worth it here?
        // This shouldn't be much data compared to blobs and trees.
        let mut manifest = zstd::bulk::compress(&manifest, 0)?;

        // Write the length of the (compressed) manifest to the end of the file,
        // making it simple and fast to examine the manifest:
        // read the last four bytes, seek to the start of the manifest,
        // and decompress it.
        let manifest_length = manifest.len() as u32;
        manifest.extend_from_slice(&manifest_length.to_be_bytes());
        fh.write_all(&manifest)?;

        // All done! Sync, persist, and go home.
        fh.as_file().sync_all()?;
        let pack_name = format!("{}.pack", id);
        let persisted = fh
            .persist(&pack_name)
            .with_context(|| format!("Couldn't persist finished pack to {}", pack_name))?;

        Ok((
            PackMetadata {
                id,
                manifest: self.manifest,
            },
            persisted,
        ))
    }
}

/// Verifies everything in the packfile matches the given manifest from the index.
pub fn verify<R: Read + Seek>(
    packfile: &mut R,
    manifest_from_index: &[PackManifestEntry],
    blobs_read: &AtomicU64,
) -> Result<()> {
    check_magic(packfile)?;

    let mut decoder = ZstdDecoder::new(packfile).context("Decompression of blob stream failed")?;

    for entry in manifest_from_index {
        let mut hashing_decoder = HashingReader::new((&mut decoder).take(entry.length as u64));

        io::copy(&mut hashing_decoder, &mut io::sink())?;

        let (hash, _) = hashing_decoder.finalize();
        ensure!(
            entry.id == hash,
            "Calculated hash of blob ({}) doesn't match its ID ({})",
            hash,
            entry.id
        );
        blobs_read.fetch_add(1, Ordering::Relaxed);
    }

    // Attempting to read the manifest from CBOR
    // without the correct `take()` length produces errors.
    // Should we rearrange the file so that isn't a problem?
    // Or is that fine, since verification isn't as performance critical
    // as other interactions?
    let mut packfile = decoder.finish();
    let (manifest_from_file, _id) = manifest_from_reader(&mut packfile)?;

    ensure!(
        manifest_from_index == manifest_from_file,
        "Pack manifest doesn't match its index entry and file contents"
    );

    Ok(())
}

/// Reads the pack manifest from the back of the given reader,
/// also returning its calculated ID.
///
/// _Does not_ check the pack's magic bytes or anything besides the manifest.
fn manifest_from_reader<R: Seek + Read>(r: &mut R) -> Result<(PackManifest, ObjectId)> {
    r.seek(SeekFrom::End(-4))?;
    let mut manifest_length: [u8; 4] = [0; 4];
    r.read_exact(&mut manifest_length)?;

    let manifest_length = u32::from_be_bytes(manifest_length);
    let manifest_location = -(manifest_length as i64) - 4;
    r.seek(SeekFrom::End(manifest_location)).with_context(|| {
        format!(
            "Couldn't seek {} bytes from the end of the pack to find the manifest",
            manifest_location
        )
    })?;
    let decoder = ZstdDecoder::new(r.take(manifest_length as u64))
        .context("Decompression of pack manifest failed")?;
    let mut hasher = HashingReader::new(decoder);

    let manifest: PackManifest =
        ciborium::from_reader(&mut hasher).context("CBOR decoding of the pack manifest failed")?;
    let (id, _) = hasher.finalize();
    Ok((manifest, id))
}

/// Loads the manifest of the pack with the given ID from the backend,
/// verifying its contents match its ID.
pub fn load_manifest(
    id: &ObjectId,
    cached_backend: &backend::CachedBackend,
) -> Result<PackManifest> {
    debug!("Loading pack manifest {}", id);
    let mut fh = cached_backend.read_pack(id)?;
    check_magic(&mut fh)?;

    let (manifest, calculated_id) =
        manifest_from_reader(&mut fh).with_context(|| format!("Couldn't load pack {}", id))?;
    ensure!(
        *id == calculated_id,
        "Pack {}'s manifest changed! Now hashes to {}",
        id,
        calculated_id
    );
    Ok(manifest)
}

/// Extracts a single blob from a packfile.
/// Useful for `cat blob`.
pub fn extract_blob<R: Read>(
    packfile: &mut R,
    blob_id: &ObjectId,
    manifest_from_index: &[PackManifestEntry],
) -> Result<(PackManifestEntry, Vec<u8>)> {
    assert!(
        manifest_from_index.iter().any(|entry| entry.id == *blob_id),
        "Given blob ID isn't in the given index"
    );

    check_magic(packfile)?;

    let mut decoder = ZstdDecoder::new(packfile).context("Decompression of blob stream failed")?;

    let mut sink = io::sink();

    for entry in manifest_from_index {
        if entry.id == *blob_id {
            let mut hashing_decoder = HashingReader::new(decoder.take(entry.length as u64));

            let mut buf = Vec::with_capacity(entry.length as usize);
            hashing_decoder.read_to_end(&mut buf)?;

            let (hash, _) = hashing_decoder.finalize();
            ensure!(
                *blob_id == hash,
                "Calculated hash of blob ({}) doesn't match ID {}",
                hash,
                *blob_id
            );

            return Ok((*entry, buf));
        } else {
            io::copy(&mut (&mut decoder).take(entry.length as u64), &mut sink)?;
        }
    }

    unreachable!();
}

/// Reads in a packfile (presumably of all trees) and appends them to the given forest
pub fn append_to_forest<R: Read + Seek>(
    packfile: &mut R,
    manifest_from_index: &[PackManifestEntry],
    forest: &mut tree::Forest,
) -> Result<()> {
    check_magic(packfile)?;

    let mut decoder = ZstdDecoder::new(packfile).context("Decompression of blob stream failed")?;

    for entry in manifest_from_index {
        // If it's not a tree, or if we have it already, skip it!
        let skip = if entry.blob_type != blob::Type::Tree {
            warn!(
                "Chunk {} found in pack where we expected only trees",
                entry.id
            );
            true
        } else {
            forest.contains_key(&entry.id)
        };
        let entry_length = entry.length as u64;
        if skip {
            assert_eq!(
                entry_length,
                io::copy(&mut (&mut decoder).take(entry_length), &mut io::sink())?
            );
            continue;
        }

        let mut hashing_decoder = HashingReader::new((&mut decoder).take(entry_length));

        let to_add: tree::Tree = ciborium::from_reader(&mut hashing_decoder)
            .with_context(|| format!("CBOR decoding of tree {} failed", entry.id))?;

        let (hash, _) = hashing_decoder.finalize();
        ensure!(
            entry.id == hash,
            "Calculated hash of tree ({}) doesn't match its ID ({})",
            hash,
            entry.id
        );

        assert!(
            forest
                .insert(entry.id, std::sync::Arc::new(to_add))
                .is_none()
        );
    }
    Ok(())
}

pub fn check_magic<R: Read>(r: &mut R) -> Result<()> {
    file_util::check_magic(r, MAGIC_BYTES).context("Wrong magic bytes for packfile")
}

#[cfg(test)]
mod test {
    use super::*;

    use std::fs;
    use std::sync::mpsc::sync_channel;

    use crate::chunk;

    #[test]
    /// Pack manifest and ID remains stable from build to build.
    fn stability() -> Result<()> {
        let manifest = vec![
            PackManifestEntry {
                blob_type: blob::Type::Chunk,
                length: 42,
                id: ObjectId::hash(b"first"),
            },
            PackManifestEntry {
                blob_type: blob::Type::Tree,
                length: 22,
                id: ObjectId::hash(b"second"),
            },
            PackManifestEntry {
                blob_type: blob::Type::Chunk,
                length: 42,
                id: ObjectId::hash(b"third"),
            },
        ];

        let (manifest, id) = serialize_and_hash(&manifest)?;

        // ID remains stable
        assert_eq!(
            format!("{}", id),
            "7c3gq0qmkioptdivd2iid0j3s7u7coci7vmluicktm20q"
        );
        // Contents remain stable
        // (We could just use the ID and length,
        // but having some example CBOR in the repo seems helpful.)
        let from_example = fs::read("tests/references/pack.stability")?;
        assert_eq!(manifest, from_example);
        Ok(())
    }

    #[test]
    fn smoke() -> Result<()> {
        let chunks: Vec<_> = chunk::chunk_file("tests/references/sr71.txt")
            .context("Couldn't chunk reference file")?
            .collect();
        let (chunk_tx, chunk_rx) = sync_channel(0);
        let (pack_tx, pack_rx) = sync_channel(0);
        let (upload_tx, upload_rx) = sync_channel(0);

        let unused_byte_count = std::sync::atomic::AtomicU64::new(0);
        let chunk_packer = std::thread::spawn(move || {
            pack(
                DEFAULT_PACK_SIZE,
                chunk_rx,
                pack_tx,
                upload_tx,
                &unused_byte_count,
                &unused_byte_count,
            )
        });

        let upload_chucker = std::thread::spawn(move || -> Result<()> {
            // This test doesn't actually care about the files themselves,
            // at least for now. Axe em!
            while let Ok((to_upload, _)) = upload_rx.recv() {
                fs::remove_file(&to_upload)
                    .with_context(|| format!("Couldn't remove completed packfile {}", to_upload))?;
            }
            Ok(())
        });

        for chunk in &chunks {
            chunk_tx.send(chunk.clone())?
        }
        drop(chunk_tx);

        let mut merged_manifest: PackManifest = Vec::new();
        while let Ok(mut metadata) = pack_rx.recv() {
            merged_manifest.append(&mut metadata.manifest);
        }

        chunk_packer.join().unwrap()?;
        upload_chucker.join().unwrap()?;

        assert_eq!(chunks.len(), merged_manifest.len());
        for (chunk, manifest_entry) in chunks.iter().zip(merged_manifest.iter()) {
            assert_eq!(manifest_entry.blob_type, blob::Type::Chunk);
            assert_eq!(manifest_entry.id, chunk.id);
            assert_eq!(manifest_entry.length as usize, chunk.bytes().len());
        }
        Ok(())
    }
}