bitbottle 0.10.0

a modern archive file format
Documentation
use digest::generic_array::GenericArray;
use std::collections::{HashMap, HashSet};
use std::fs;
use std::io::Read;
use std::os::unix::fs::{FileExt, PermissionsExt};
use std::path::{Path, PathBuf};

use crate::bottle_error::{BottleError, BottleResult};
use crate::file_atlas::FileAtlas;
use crate::file_list::FileListRef;
use crate::hashing::HashingOutput;
use crate::read_archive::{ArchiveReader, ArchiveReaderEvent, ArchiveReaderOptions};


pub type SimpleArchiveExpander = ArchiveExpander<fn (&FileAtlas) -> bool>;

/// Wrap an ArchiveReader so that it also expands the archive into a folder.
/// Works by handling ArchiveReaderEvents as they occur, but still emitting
/// them as it goes.
pub struct ArchiveExpander<F> where F: FnMut (&FileAtlas) -> bool {
    dest_path: PathBuf,
    file_filter: Option<F>,

    reader: ArchiveReader,

    // copy of the file list from ArchiveReader
    file_list: Option<FileListRef>,
    // cache open files, and which blocks are missing
    file_cache: HashMap<PathBuf, (fs::File, HashSet<HashingOutput>)>,
}

impl<F: FnMut (&FileAtlas) -> bool> ArchiveExpander<F> {
    pub fn new(
        reader: Box<dyn Read>,
        dest_path: &Path,
        options: ArchiveReaderOptions,
    ) -> BottleResult<SimpleArchiveExpander> {
        let reader = ArchiveReader::new(reader, options)?;
        Ok(ArchiveExpander {
            dest_path: dest_path.to_path_buf(),
            file_filter: None,
            reader,
            file_list: None,
            file_cache: HashMap::new(),
        })
    }

    pub fn new_with_filter(
        reader: Box<dyn Read>,
        dest_path: &Path,
        file_filter: Option<F>,
        options: ArchiveReaderOptions,
    ) -> BottleResult<ArchiveExpander<F>> {
        let reader = ArchiveReader::new(reader, options)?;
        Ok(ArchiveExpander {
            dest_path: dest_path.to_path_buf(),
            file_filter,
            reader,
            file_list: None,
            file_cache: HashMap::new(),
        })
    }

    // first, we get the file list
    fn expand_file_list(&mut self, file_list: FileListRef) -> BottleResult<()> {
        // create folders
        for atlas in file_list.borrow().files.iter().filter(|atlas| atlas.borrow().is_folder) {
            let atlas = atlas.borrow();
            let path = self.dest_path.join(&atlas.normalized_path);
            fs::create_dir_all(&path)?;
            fs::set_permissions(&path, PermissionsExt::from_mode(atlas.perms))?;
        }

        // create any zero-length files that have no block data
        for atlas in file_list.borrow().files.iter().filter(|atlas| {
            let atlas = atlas.borrow();
            !atlas.is_folder && atlas.symlink_target.is_none() && atlas.size == 0
        }) {
            let atlas = atlas.borrow();
            let path = self.dest_path.join(&atlas.normalized_path);

            // in case we had to create this path by defanging:
            if let Some(p) = path.parent() {
                fs::create_dir_all(p)?;
            }

            fs::File::create(&path)?.set_len(atlas.size)?;
        }

        self.file_list = Some(file_list.clone());
        Ok(())
    }

    fn process_block(&mut self, hash: &HashingOutput, data: &[u8]) -> BottleResult<usize> {
        let mut count = 0;
        if let Some(file_list) = self.file_list.clone() {
            for atlas in file_list.borrow().file_map.get(hash).into_iter().flatten() {
                let atlas = atlas.borrow();
                if self.file_filter.as_mut().is_some_and(|file_filter| !file_filter(&atlas)) { continue; }
                self.write_block(&atlas, hash, data)?;
                count += 1;
            }
        }
        Ok(count)
    }

    fn write_block(&mut self, atlas: &FileAtlas, hash: &HashingOutput, data: &[u8]) -> BottleResult<()> {
        let path = self.dest_path.join(&atlas.normalized_path);

        // find (or create) the file handle, and set of remaining blocks for this file
        if !self.file_cache.contains_key(&path) {
            // in case we had to create this path by defanging:
            if let Some(p) = path.parent() {
                fs::create_dir_all(p)?;
            }

            let file = fs::File::create(&path)?;
            file.set_len(atlas.size)?;
            file.set_permissions(PermissionsExt::from_mode(atlas.perms))?;
            let block_set: HashSet<HashingOutput> = atlas.contents.blocks.iter().map(|b| b.hash).collect();
            self.file_cache.insert(path.clone(), (file, block_set));
        }
        // fine to assert since we just added it:
        let (file, block_set) = self.file_cache.get_mut(&path).unwrap();

        // write this block wherever it was wronged!
        for (offset, _block) in atlas.contents.offsets_of(hash) {
            file.write_all_at(data, offset)?;
        }

        // close the file if that was the last block it needed.
        block_set.remove(hash);
        if block_set.is_empty() {
            file.sync_all()?;
            self.file_cache.remove(&path);
        }

        Ok(())
    }

    fn finish_expanding(&mut self) -> BottleResult<()> {
        let file_list = self.file_list.as_ref().unwrap().clone();

        // create symlinks
        for atlas in file_list.borrow().files.iter().filter(|atlas| atlas.borrow().symlink_target.is_some()) {
            let atlas = atlas.borrow();
            let path = self.dest_path.join(&atlas.normalized_path);
            std::os::unix::fs::symlink(atlas.symlink_target.as_ref().unwrap(), path)?;
        }

        if !self.file_cache.is_empty() {
            // this really can't happen unless someone made a logic error
            return Err(BottleError::IncompleteFileArchive);
        }

        // clear out state, we're done
        self.file_list = None;

        Ok(())
    }
}

impl<F: FnMut (&FileAtlas) -> bool> Iterator for ArchiveExpander<F> {
    type Item = ArchiveReaderEvent;

    // pass events through, but handle file & block events by unpacking them into a folder
    fn next(&mut self) -> Option<ArchiveReaderEvent> {
        let event = self.reader.next()?;
        let event = match event {
            ArchiveReaderEvent::FileListDone { ref file_list, .. } => {
                self.expand_file_list(file_list.clone()).map(|_| event)
            },
            ArchiveReaderEvent::FileBlock { ref hash, ref data, size, .. } if data.is_some() => {
                self.process_block(GenericArray::from_slice(hash), data.as_ref().unwrap()).map(|write_count| {
                    ArchiveReaderEvent::FileBlockWritten { size, write_count }
                })
            },
            ArchiveReaderEvent::BottleEnd(_) if self.file_list.is_some() => {
                self.finish_expanding().map(|_| event)
            }

            _ => Ok(event),
        };
        Some(event.unwrap_or_else(ArchiveReaderEvent::Error))
    }
}