bitbottle 0.9.1

a modern archive file format
Documentation
use std::collections::{HashMap, HashSet};
use std::fs;
use std::io::Read;
use std::os::unix::prelude::{FileExt, PermissionsExt};
use std::path::{Component, Path, PathBuf};
use crate::FileAtlasRef;
use crate::hashing::HashType;
use crate::bottle::BottleReader;
use crate::bottle_error::{BottleError, BottleResult};
use crate::file_atlas::FileAtlas;
use crate::file_bottle::FileListBottleReader;
use crate::file_list::{Block, BlockData, FileList, FileListRef, Symlink};


/// Which stage of reading or expanding an archive are we at?
#[derive(Clone, Debug)]
pub enum ReadArchiveState {
    /// Reading the table of contents (filenames, metadata, and block maps)
    /// from the archive. `index` is the number of the file being reported
    /// (counting from 0), and `file_count` is the total reported by the
    /// archive header. `hash_type` is the hash algorithm used for blocking.
    /// `atlas` is the current file's atlas.
    TableOfContents { index: usize, file_count: usize, hash_type: HashType, atlas: FileAtlasRef },

    /// If we encounter a path with illegal segments (`.`, `..`, or an
    /// absolute path), we emit this warning, weth the bad path and our
    /// attempt to correct it. We will use the corrected path. You are free
    /// to ignore this warning.
    BadPathWarning { bad_path: PathBuf, new_path: PathBuf },

    /// Warning that the archive contains an invalid symlink that will be
    /// ignored.
    SymlinkIgnoredWarning { symlink: Symlink },

    /// The entire file list has been read and filtered for illegal entries.
    FileList { file_count: usize, block_count: usize, hash_type: HashType, file_list: FileListRef },

    /// Reading blocks from the archive and writing them to disk.
    /// `blocks_read` may be less than `blocks_written` because a single
    /// block may appear in multiple places.
    BlockData { blocks_read: usize, blocks_written: usize, bytes_read: u64, bytes_written: u64 },
}


pub struct ReadArchiveOptions {
    file_filter: Box<dyn FnMut (&FileAtlas) -> bool>,
}

impl ReadArchiveOptions {
    // good defaults
    pub fn new() -> ReadArchiveOptions {
        ReadArchiveOptions {
            file_filter: Box::new(|_| true),
        }
    }
}

impl Default for ReadArchiveOptions {
    fn default() -> Self {
        Self::new()
    }
}


/// Read the archive's file list, but don't expand the files.
pub fn read_archive<R: Read, Callback: FnMut (ReadArchiveState)>(
    bottle_reader: BottleReader<R>,
    updater: &mut Callback,
) -> BottleResult<(FileListBottleReader<R>, FileListRef)> {
    // read the table of contents.
    let mut bottle_reader = FileListBottleReader::new(bottle_reader)?;
    let hash_type = bottle_reader.hash_type;
    let file_count = bottle_reader.file_count;
    let block_count = bottle_reader.block_count;

    let mut file_list = FileList::new();
    for (index, atlas) in bottle_reader.iter_files().enumerate() {
        let atlas = atlas?;
        // check for bad path that must be corrected
        {
            let mut atlas = atlas.borrow_mut();
            let mut components = atlas.normalized_path.components();
            if !components.all(|c| matches!(c, Component::Normal(_))) {
                let bad_path = atlas.normalized_path.clone();
                let new_path: PathBuf = components.filter(|c| matches!(c, Component::Normal(_))).collect();
                atlas.normalized_path = new_path.clone();
                updater(ReadArchiveState::BadPathWarning { bad_path, new_path });
            }
        }

        file_list.files.push(atlas.clone());
        updater(ReadArchiveState::TableOfContents { index, file_count, hash_type, atlas });
    }

    file_list.build_file_map();

    // for safety:
    for symlink in file_list.drop_stray_symlinks() {
        updater(ReadArchiveState::SymlinkIgnoredWarning { symlink: symlink.clone() });
    }

    let file_list = file_list.bobble();
    updater(ReadArchiveState::FileList { file_count, block_count, hash_type, file_list: file_list.clone() });
    Ok((bottle_reader, file_list))
}


pub fn expand_archive<R: Read, Callback: FnMut (ReadArchiveState)>(
    bottle_reader: BottleReader<R>,
    mut options: ReadArchiveOptions,
    dest_path: &Path,
    mut updater: Callback,
) -> BottleResult<R> {
    fs::create_dir_all(dest_path)?;

    let (mut bottle_reader, file_list) = read_archive(bottle_reader, &mut updater)?;

    let mut blocks_read = 0;
    let mut blocks_written = 0;
    let mut bytes_read = 0u64;
    let mut bytes_written = 0u64;

    // cache open files, and which blocks are missing
    let mut file_cache: HashMap<PathBuf, (fs::File, HashSet<Block>)> = HashMap::new();

    // first, create folders
    for atlas in file_list.borrow().files.iter().filter(|atlas| atlas.borrow().is_folder) {
        let atlas = atlas.borrow();
        let path = dest_path.join(&atlas.normalized_path);
        fs::create_dir_all(&path)?;
        fs::set_permissions(&path, PermissionsExt::from_mode(atlas.perms))?;
    }

    for block_data in bottle_reader.iter_blocks() {
        let BlockData { block, data } = block_data?;
        blocks_read += 1;
        bytes_read += data.len() as u64;

        for atlas in file_list.borrow().file_map.get(&block.hash).into_iter().flatten() {
            let atlas = atlas.borrow();
            if !(options.file_filter)(&atlas) { continue; }
            let path = dest_path.join(&atlas.normalized_path);

            // find (or create) the file handle, and set of remaining blocks for this file
            if !file_cache.contains_key(&path) {
                // in case we had to create this path by defanging:
                if let Some(p) = path.parent() {
                    fs::create_dir_all(p)?;
                }

                let file = fs::File::create(&path)?;
                file.set_len(atlas.size)?;
                file.set_permissions(PermissionsExt::from_mode(atlas.perms))?;
                let block_set: HashSet<Block> = atlas.contents.blocks.iter().cloned().collect();
                file_cache.insert(path.clone(), (file, block_set));
            }
            // fine to assert since we just added it:
            let (file, block_set) = file_cache.get_mut(&path).unwrap();

            // write this block wherever it was wronged!
            for (offset, _block) in atlas.contents.offsets_of(&block.hash) {
                file.write_all_at(&data, offset)?;
                blocks_written += 1;
                bytes_written += data.len() as u64;
                updater(ReadArchiveState::BlockData { blocks_read, blocks_written, bytes_read, bytes_written });
            }

            // close the file if that was the last block it needed.
            block_set.remove(&block);
            if block_set.is_empty() {
                file.sync_all()?;
                file_cache.remove(&path);
            }
        }
    }

    // create any zero-length files that have no block data
    for atlas in file_list.borrow().files.iter().filter(|atlas| {
        let atlas = atlas.borrow();
        !atlas.is_folder && atlas.symlink_target.is_none() && atlas.size == 0
    }) {
        let atlas = atlas.borrow();
        let path = dest_path.join(&atlas.normalized_path);

        // in case we had to create this path by defanging:
        if let Some(p) = path.parent() {
            fs::create_dir_all(p)?;
        }

        fs::File::create(&path)?.set_len(atlas.size)?;
    }

    // create symlinks
    for atlas in file_list.borrow().files.iter().filter(|atlas| atlas.borrow().symlink_target.is_some()) {
        let atlas = atlas.borrow();
        let path = dest_path.join(&atlas.normalized_path);
        std::os::unix::fs::symlink(atlas.symlink_target.as_ref().unwrap(), path)?;
    }

    if !file_cache.is_empty() {
        // this really can't happen unless someone made a logic error
        return Err(BottleError::IncompleteFileArchive);
    }

    bottle_reader.close()
}