sett 0.4.0

Rust port of sett (data compression, encryption and transfer tool).
Documentation
use std::io;

use super::{ZipFile, error, spec};
use crate::package::source::{Either, IntoAsyncRead, PackageStream};

#[derive(Clone, Debug)]
pub(crate) struct ZipReader<S> {
    files: Vec<ZipFile>,
    // Offset (in bytes) at which the ZIP archive starts in the original file
    // (ZIP file may be prepended with arbitrary data).
    zip_offset: u64,
    pub(crate) source: S,
}

pub(crate) async fn local_header_size<R: tokio::io::AsyncBufRead + Unpin>(
    reader: &mut R,
) -> Result<u64, error::HeaderParseError> {
    let local_header = spec::LocalFileHeader::parse(reader).await?;
    Ok(local_header.header_size as u64)
}

impl<A> From<std::io::Error> for Either<A, std::io::Error> {
    fn from(value: std::io::Error) -> Self {
        Self::Or(value)
    }
}

impl PackageStream for std::path::PathBuf {
    type Error = std::io::Error;
    type FileStream = tokio::io::Take<tokio::io::BufReader<tokio::fs::File>>;

    async fn open_central_directory_stream(
        &self,
    ) -> Result<(impl std::io::Read + std::io::Seek, u64), Self::Error> {
        Ok((io::BufReader::new(std::fs::File::open(self)?), 0))
    }

    async fn open_file_header_stream(
        &self,
        offset: u64,
        total_size: u64,
    ) -> Result<Self::FileStream, Either<crate::zip::error::HeaderParseError, Self::Error>> {
        // Read the local file header to find where the file contents starts
        let mut reader = tokio::io::BufReader::new(tokio::fs::File::open(self).await?);
        use tokio::io::AsyncSeekExt as _;
        reader.seek(io::SeekFrom::Start(offset)).await?;
        let header_size = Self::local_header_size(&mut reader).await?;
        let offset = offset + header_size;
        reader.seek(io::SeekFrom::Start(offset)).await?;
        use tokio::io::AsyncReadExt as _;
        Ok(reader.take(total_size))
    }

    async fn open_raw(&self) -> Result<Self::FileStream, Self::Error> {
        let reader = tokio::io::BufReader::new(tokio::fs::File::open(self).await?);
        use tokio::io::AsyncReadExt as _;
        Ok(reader.take(self.size().await?))
    }

    async fn size(&self) -> Result<u64, Self::Error> {
        Ok(tokio::fs::File::open(self).await?.metadata().await?.len())
    }

    fn name(&self) -> String {
        self.file_name()
            .unwrap_or(self.as_os_str())
            .to_string_lossy()
            .to_string()
    }
}

impl<S> ZipReader<S> {
    /// Returns the list of files present in the Zip archive.
    pub(crate) fn file_names(&self) -> impl Iterator<Item = &str> {
        self.files.iter().map(|f| f.name.as_str())
    }
}

impl<E> From<Either<error::HeaderParseError, E>> for error::ReadStreamError<E> {
    fn from(value: Either<error::HeaderParseError, E>) -> Self {
        match value {
            Either::Either(value) => Self::Header(value),
            Either::Or(value) => Self::Source(value),
        }
    }
}

impl<Src> ZipReader<Src>
where
    Src: PackageStream,
{
    pub(crate) async fn open(source: Src) -> Result<Self, error::OpenStreamError<Src::Error>> {
        let (files, zip_offset) = {
            let (mut reader, offset) = source.open_central_directory_stream().await?;
            parse_zip_central_directory(&mut reader, offset).map_err(error::OpenStreamError::Zip)?
        };
        Ok(Self {
            files,
            zip_offset,
            source,
        })
    }

    /// Opens a file from the ZIP archive as data stream.
    ///
    /// Returns a tuple of a reader and a file size.
    pub(crate) async fn open_file(
        &self,
        name: &str,
    ) -> Result<(Src::FileStream, u64), error::ReadStreamError<Src::Error>> {
        let file_info = self
            .files
            .iter()
            .find(|f| f.name == name)
            .ok_or_else(|| error::ReadStreamError::NotFound(name.to_string()))?;
        if file_info.compression_method != 0 {
            return Err(error::ReadStreamError::InvalidCompressionMethod);
        }
        let offset = file_info.offset + self.zip_offset;
        let file_stream = self
            .source
            .open_file_header_stream(offset, file_info.size)
            .await?;
        Ok((file_stream, file_info.size))
    }

    pub(crate) async fn read_file(
        &self,
        name: &str,
    ) -> Result<Vec<u8>, error::ReadStreamError<Src::Error>> {
        let (reader, size) = self.open_file(name).await?;
        let mut reader = IntoAsyncRead::into_async_reader(reader);
        let mut buffer = Vec::with_capacity(size as usize);
        use tokio::io::AsyncReadExt;
        reader.read_to_end(&mut buffer).await?;
        Ok(buffer)
    }
}

/// Finds and parses ZIP central directory
///
/// The provided `reader` doesn't need to contain the entire ZIP archive.
/// It can be limited only to the end part of the archive (containing all
/// ZIP data structures that follow the last data file). In such a case
/// `buffer_offset` must specify reader's position relative to the beginning
/// of the ZIP file.
fn parse_zip_central_directory<R: io::Read + io::Seek>(
    reader: &mut R,
    buffer_offset: u64,
) -> Result<(Vec<ZipFile>, u64), error::ZipError> {
    let with_buffer_offset = |offset: u64| {
        offset
            .checked_sub(buffer_offset)
            .ok_or(error::ZipError::InvalidOffset)
    };

    spec::CentralDirectoryEnd::find(reader)?;
    let cde = spec::CentralDirectoryEnd::parse(reader)?;

    // If present the Zip64 central directory end locator is at 20 bytes
    // before the central directory end.
    reader.seek(io::SeekFrom::End(
        -((cde.size() + spec::Zip64CentralDirectoryEndLocator::SIZE) as i64),
    ))?;
    let (number_of_records, central_directory_offset, zip_offset) = if let Some(zip64_end) =
        spec::Zip64CentralDirectoryEndLocator::parse(reader)?
    {
        reader.seek(io::SeekFrom::Start(with_buffer_offset(
            zip64_end.zip64_central_directory_end_offset,
        )?))?;
        let zip64cde_actual_position =
            spec::Zip64CentralDirectoryEnd::find(reader)? + buffer_offset;
        let zip64cde = spec::Zip64CentralDirectoryEnd::parse(reader)?;
        if zip64cde.disk_number_of_records != zip64cde.total_number_of_records {
            return Err(error::ZipError::UnimplementedMultiDisk);
        }
        (
            zip64cde.total_number_of_records,
            zip64cde.central_directory_offset,
            zip64cde_actual_position - zip64_end.zip64_central_directory_end_offset,
        )
    } else {
        reader.seek(io::SeekFrom::Start(with_buffer_offset(
            cde.central_directory_offset as u64,
        )?))?;
        let central_directory_actual_offset = spec::CentralDirectoryHeader::find(reader)?;
        (
            cde.total_number_of_records as u64,
            cde.central_directory_offset as u64,
            central_directory_actual_offset + buffer_offset - (cde.central_directory_offset as u64),
        )
    };
    reader.seek(io::SeekFrom::Start(with_buffer_offset(
        zip_offset + central_directory_offset,
    )?))?;
    let mut files = Vec::with_capacity(number_of_records as usize);
    for _ in 0..number_of_records {
        let header = spec::CentralDirectoryHeader::parse(reader)?;
        files.push(ZipFile {
            name: header.name.into_owned(),
            offset: header.offset,
            size: header.size,
            hasher: crc32fast::Hasher::new(),
            crc32: header.crc32,
            flags: header.flags,
            external_attributes: header.external_attributes,
            timestamp: header.modified,
            compression_method: header.compression_method,
        });
    }
    Ok((files, zip_offset))
}