bitbottle 0.10.0

a modern archive file format
Documentation
use std::cell::RefCell;
use std::collections::HashSet;
use std::fs;
use std::io::{Read, Seek, SeekFrom, Write};
use std::path::PathBuf;
use std::rc::Rc;
use crate::asymmetric::{Ed25519PublicKey, Ed25519SecretKey};
use crate::bottle_error::BottleResult;
use crate::compressed_bottle::CompressedBottleWriter;
use crate::compression::CompressionAlgorithm;
use crate::counting::CountingWriter;
use crate::encrypted_bottle::{EncryptedBottleWriter, EncryptedBottleWriterOptions, EncryptionKey};
use crate::encryption::EncryptionAlgorithm;
use crate::file_atlas::FileAtlasRef;
use crate::file_bottle::{FileBottleOptions, FileListBottleWriter};
use crate::file_list::{BlockHash, FileList, Symlink};
use crate::file_scanner::{FileScanner, ScanState};
use crate::hashing::HashType;
use crate::signed_bottle::{SignatureAlgorithm, SignedBottleWriter};


/// Which stage of building an archive are we at?
///
/// For each state, `bytes_written` is how many bytes have been actually
/// written into the output stream so far (after compression and encryption,
/// and including metadata overhead).
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum CreateArchiveState {
    /// Building the list of files to scan, by traversing recursively into
    /// the given paths. `file_count` and `bytes` will be the (growing)
    /// number of files so far, and their summed sizes. `atlas` will contain
    /// each new file or folder as it's unearthed.
    FileList { atlas: FileAtlasRef, file_count: usize, bytes: u64, bytes_written: u64 },

    /// Warning that a symlink is invalid and not being added to the archive.
    SymlinkIgnoredWarning { symlink: Symlink, bytes_written: u64 },

    /// Scanning each file to break it into blocks, compute the hash of
    /// each block, and the hash of the overall file. `blocks` and `bytes` are
    /// the number of blocks and bytes processed so far. `file_count` and
    /// `total_bytes` are the total number of files and bytes to process.
    /// `unique_bytes` is the number of bytes in *unique* blocks discovered
    /// so far.
    FileScan { blocks: usize, file_count: usize, bytes: u64, total_bytes: u64, unique_bytes: u64, bytes_written: u64 },

    /// Writing the table of contents (filenames, metadata, and block maps)
    /// into the archive. `file_count` is the number of files completed so
    /// far, out of `total_file_count`.
    TableOfContents { file_count: usize, total_file_count: usize, bytes_written: u64 },

    /// Writing block data to the archive. `blocks` is the number of
    /// blocks so far, out of `total_blocks`, and `size` is the bytes so far,
    /// out of `total_size`.
    BlockData { blocks: usize, total_blocks: usize, size: u64, total_size: u64, bytes_written: u64 },

    Done { file_list: FileList },
}


/// Specify any compression or encryption you want around a bitbottle file
/// archive, as well as details like hashing algorithm and buzscan parameters.
pub struct CreateArchiveOptions {
    pub hash_type: HashType,

    // sign it?
    pub sign: SignatureAlgorithm,
    pub signing_key: Option<Ed25519SecretKey>,

    // encrypt it?
    pub encryption: EncryptionAlgorithm,
    pub password: Option<String>,
    pub public_keys: Vec<Ed25519PublicKey>,

    // compress it?
    pub compression: Option<CompressionAlgorithm>,

    // for buzscan:
    pub min_bits: u8,
    pub pref_bits: u8,
    pub max_bits: u8,
    pub window_bits: u8,

    // file attributes:
    pub include_user_group: bool,
}

impl Default for CreateArchiveOptions {
    fn default() -> Self {
        CreateArchiveOptions {
            hash_type: HashType::BLAKE3,
            sign: SignatureAlgorithm::SHA_512_ED25519,
            signing_key: None,
            encryption: EncryptionAlgorithm::XCHACHA20_POLY1305,
            password: None,
            public_keys: vec![],
            compression: None,
            min_bits: 18,
            pref_bits: 20,
            max_bits: 22,
            window_bits: 10,
            include_user_group: true,
        }
    }
}


/// Given paths to scan, a `Writer` to write to, write an entire file archive,
/// optionally with compression and/or encryption. This is the top-level API
/// for creating bitbottle archives.
pub fn create_archive<W: Write, Callback>(
    writer: W,
    paths: &[PathBuf],
    options: CreateArchiveOptions,
    updater: Callback,
) -> BottleResult<u64>
    where Callback: FnMut (CreateArchiveState)
{
    // the archive is built from a file list, then compressed, then optionally
    // encrypted, and finally the output bytes are counted so we can show
    // status. but we have to specify these layers inside out:
    // counting, encryption, compression, archive.
    let (writer, bytes_written) = CountingWriter::new(writer);
    let (mut writer, total_bytes) = create_signed_archive(
        writer, paths, &options, bytes_written, updater
    )?.into_inner();
    writer.flush()?;
    Ok(total_bytes)
}

fn create_signed_archive<W: Write, Callback: FnMut (CreateArchiveState)>(
    writer: W,
    paths: &[PathBuf],
    options: &CreateArchiveOptions,
    bytes_written: Rc<RefCell<u64>>,
    updater: Callback,
) -> BottleResult<W> {
    if let Some(ref secret_key) = options.signing_key {
        // sign!
        let writer = SignedBottleWriter::new(writer, SignatureAlgorithm::SHA_512_ED25519, secret_key.clone())?;
        create_encrypted_archive(writer, paths, options, bytes_written, updater)?.close()
    } else {
        create_encrypted_archive(writer, paths, options, bytes_written, updater)
    }
}

fn create_encrypted_archive<W: Write, Callback: FnMut (CreateArchiveState)>(
    writer: W,
    paths: &[PathBuf],
    options: &CreateArchiveOptions,
    bytes_written: Rc<RefCell<u64>>,
    updater: Callback,
) -> BottleResult<W> {
    if !options.public_keys.is_empty() || options.password.is_some() {
        // let's encrypt!
        let mut encryption_options = EncryptedBottleWriterOptions::new();
        if let Some(ref password) = options.password {
            encryption_options.key = EncryptionKey::Password(password.to_string());
        }
        encryption_options.algorithm = options.encryption;

        let writer = if options.public_keys.is_empty() {
            EncryptedBottleWriter::new(writer, encryption_options)?
        } else {
            EncryptedBottleWriter::for_recipients(writer, encryption_options, options.public_keys.as_slice())?
        };
        create_compressed_archive(writer, paths, options, bytes_written, updater)?.close()
    } else {
        create_compressed_archive(writer, paths, options, bytes_written, updater)
    }
}

fn create_compressed_archive<W: Write, Callback: FnMut (CreateArchiveState)>(
    writer: W,
    paths: &[PathBuf],
    options: &CreateArchiveOptions,
    bytes_written: Rc<RefCell<u64>>,
    updater: Callback,
) -> BottleResult<W> {
    if let Some(compression) = options.compression {
        let writer = CompressedBottleWriter::new(writer, compression)?;
        create_file_archive(writer, paths, options, bytes_written, updater)?.close()
    } else {
        create_file_archive(writer, paths, options, bytes_written, updater)
    }
}

fn create_file_archive<W: Write, Callback: FnMut (CreateArchiveState)>(
    mut writer: W,
    paths: &[PathBuf],
    options: &CreateArchiveOptions,
    bytes_written: Rc<RefCell<u64>>,
    updater: Callback,
) -> BottleResult<W> {
    write_archive_files(&mut writer, paths, options, bytes_written, updater)?;
    Ok(writer)
}

/// Given paths to scan and a `Writer` to write to, build a file list, create
/// a `FileList` bottle, and write all of the nested `File` and `FileBlock`
/// inner bottles, to create a complete archive.
pub fn write_archive_files<W: Write, Callback: FnMut (CreateArchiveState)>(
    writer: W,
    paths: &[PathBuf],
    options: &CreateArchiveOptions,
    bytes_written: Rc<RefCell<u64>>,
    mut updater: Callback,
) -> BottleResult<()> {
    // first, build a file & block list
    let mut file_scanner = FileScanner::new(
        options.hash_type,
        options.min_bits,
        options.pref_bits,
        options.max_bits,
        options.window_bits,
        vec![0u8; 0x1_0000],
        |scan_state| {
            let bytes_written = *bytes_written.borrow();
            match scan_state {
                ScanState::FileList { atlas, file_count, bytes } => {
                    updater(CreateArchiveState::FileList { atlas, file_count, bytes, bytes_written })
                },
                ScanState::Blocks { blocks, file_count, bytes, total_bytes, unique_bytes }=> {
                    updater(CreateArchiveState::FileScan {
                        blocks, file_count, bytes, total_bytes, unique_bytes, bytes_written
                    });
                },
            }
        },
    );

    file_scanner.scan_paths(paths)?;
    file_scanner.build_block_list(options.hash_type)?;
    let mut file_list: FileList = file_scanner.into();

    for symlink in file_list.drop_stray_symlinks() {
        let bytes_written = *bytes_written.borrow();
        updater(CreateArchiveState::SymlinkIgnoredWarning { symlink: symlink.clone(), bytes_written });
    }

    // write the table of contents.
    let mut seen_blocks: HashSet<BlockHash> = HashSet::new();
    let mut block_bytes_written: u64 = 0;
    let file_options = FileBottleOptions {
        include_user_group: options.include_user_group,
    };
    let mut bottle_writer = FileListBottleWriter::new(writer, options.hash_type, &file_list, file_options, |file_count| {
        let total_file_count = file_list.total_file_count();
        let bytes_written = *bytes_written.borrow();
        updater(CreateArchiveState::TableOfContents { file_count, total_file_count, bytes_written });
    })?;

    // write the individual blocks, whenever we come across one we haven't written yet.
    for atlas in &file_list.files {
        let mut open_file: Option<fs::File> = None;
        let mut seek: u64 = 0;
        let atlas = atlas.borrow();
        for block in &atlas.contents.blocks {
            if !seen_blocks.contains(&block.hash) {
                // read this block from the file (opening if necessary)
                let mut f = open_file.take().map(Ok).unwrap_or_else(|| fs::File::open(&atlas.path))?;
                f.seek(SeekFrom::Start(seek))?;
                let mut buffer = vec![0u8; block.size];
                f.read_exact(&mut buffer)?;

                // just another block in the wall
                bottle_writer.add_block(block, &buffer)?;
                seen_blocks.insert(block.hash);
                block_bytes_written += block.size as u64;
                open_file = Some(f);

                let blocks = seen_blocks.len();
                let total_blocks = file_list.blocks.len();
                let size = block_bytes_written;
                let total_size = file_list.total_block_size();
                let bytes_written = *bytes_written.borrow();
                updater(CreateArchiveState::BlockData { blocks, total_blocks, size, total_size, bytes_written });
            }
            seek += block.size as u64;
        }
    }

    // bye!
    bottle_writer.close()?;
    updater(CreateArchiveState::Done { file_list });
    Ok(())
}