async-mtzip 0.0.3

A library for making zip archives with multithreaded async compression
Documentation
use std::io::{Seek, Write};
use std::path::PathBuf;

use cfg_if::cfg_if;
use tokio::fs::read_dir;
use tokio::io::{AsyncSeek, AsyncWrite};

use super::extra_field::ExtraFields;
use crate::CompressionType;

const LOCAL_FILE_HEADER_SIGNATURE: u32 = 0x04034B50;
const CENTRAL_FILE_HEADER_SIGNATURE: u32 = 0x02014B50;

const VERSION_NEEDED_TO_EXTRACT: u16 = 20;
#[cfg(not(target_os = "windows"))]
/// OS - Unix assumed, id 3
/// Specification version 6.2
const VERSION_MADE_BY: u16 = (3 << 8) + 62;
#[cfg(target_os = "windows")]
/// OS - Windows, id 11 per Info-Zip spec
/// Specification version 6.2
const VERSION_MADE_BY: u16 = (11 << 8) + 62;

#[cfg(any(target_os = "linux", unix))]
#[allow(unused)]
pub(crate) const DEFAULT_UNIX_FILE_ATTRS: u16 = 0o100644;
#[cfg(any(target_os = "linux", unix))]
#[allow(unused)]
pub(crate) const DEFAULT_UNIX_DIR_ATTRS: u16 = 0o040755;

#[cfg(target_os = "windows")]
pub(crate) const DEFAULT_WINDOWS_FILE_ATTRS: u16 = 128;
#[cfg(target_os = "windows")]
pub(crate) const DEFAULT_WINDOWS_DIR_ATTRS: u16 = 16;

/// Set bit 11 to indicate that the file names are in UTF-8, because all strings in rust are valid
/// UTF-8
const GENERAL_PURPOSE_BIT_FLAG: u16 = 1 << 11;

#[derive(Debug)]
pub struct ZipFile {
    pub header: ZipFileHeader,
    pub data: Vec<u8>,
}

#[derive(Debug)]
pub struct TokioReceiveZipFile(pub tokio::sync::mpsc::Receiver<ZipFile>);

#[derive(Debug)]
pub struct ZipFileHeader {
    pub compression_type: CompressionType,
    pub crc: u32,
    pub uncompressed_size: u32,
    pub filename: String,
    pub external_file_attributes: u32,
    pub extra_fields: ExtraFields,
}

#[derive(Debug)]
pub struct ZipFileNoData {
    pub header: ZipFileHeader,
    pub local_header_offset: u32,
    pub compressed_size: u32,
}

pub async fn dirs(dir: PathBuf) -> Result<Vec<PathBuf>, String> {
    let mut dirs = vec![dir];
    let mut files = vec![];
    while !dirs.is_empty() {
        let mut dir_iter = read_dir(dirs.remove(0))
            .await
            .map_err(|e| format!("read_dir error: {}", e))?;
        while let Some(entry) = dir_iter
            .next_entry()
            .await
            .map_err(|e| format!("next_entry error: {}", e))?
        {
            let entry_path_buf = entry.path();
            if entry_path_buf.is_dir() {
                dirs.push(entry_path_buf);
            } else {
                files.push(entry_path_buf);
            }
        }
    }
    Ok(files)
}

impl ZipFile {
    pub(crate) const fn default_dir_attrs() -> u16 {
        cfg_if! {
            if #[cfg(target_os = "windows")] {
                DEFAULT_WINDOWS_DIR_ATTRS
            } else if #[cfg(any(target_os = "linux", unix))] {
                DEFAULT_UNIX_DIR_ATTRS
            } else {
                0
            }
        }
    }

    pub fn write_local_file_header_with_data_consuming<W: Write + Seek>(
        self,
        buf: &mut W,
    ) -> std::io::Result<ZipFileNoData> {
        let local_header_offset = super::stream_position_u32(buf)?;
        self.write_local_file_header_and_data(buf)?;
        let Self { header, data } = self;
        Ok(ZipFileNoData {
            header,
            local_header_offset,
            compressed_size: data.len() as u32,
        })
    }

    pub async fn write_local_file_header_with_data_consuming_with_tokio<
        W: AsyncWrite + AsyncSeek + Unpin,
    >(
        self,
        buf: &mut W,
    ) -> std::io::Result<ZipFileNoData> {
        let local_header_offset = super::stream_position_u32_with_tokio(buf).await?;
        self.write_local_file_header_and_data_with_tokio(buf)
            .await?;
        let Self { header, data } = self;
        Ok(ZipFileNoData {
            header,
            local_header_offset,
            compressed_size: data.len() as u32,
        })
    }

    const LOCAL_FILE_HEADER_LEN: usize = 30;

    pub fn write_local_file_header_and_data<W: Write>(&self, buf: &mut W) -> std::io::Result<()> {
        // Writing to a temporary in-memory statically sized array first
        let mut header = [0; Self::LOCAL_FILE_HEADER_LEN];
        {
            let mut header_buf: &mut [u8] = &mut header;

            // signature
            header_buf.write_all(&LOCAL_FILE_HEADER_SIGNATURE.to_le_bytes())?;
            // version needed to extract
            header_buf.write_all(&VERSION_NEEDED_TO_EXTRACT.to_le_bytes())?;
            // general purpose bit flag
            header_buf.write_all(&GENERAL_PURPOSE_BIT_FLAG.to_le_bytes())?;
            // compression type
            header_buf.write_all(&(self.header.compression_type as u16).to_le_bytes())?;
            // Last modification time // moved to extra fields
            header_buf.write_all(&0_u16.to_le_bytes())?;
            // Last modification date // moved to extra fields
            header_buf.write_all(&0_u16.to_le_bytes())?;
            // crc
            header_buf.write_all(&self.header.crc.to_le_bytes())?;
            // Compressed size
            debug_assert!(self.data.len() <= u32::MAX as usize);
            header_buf.write_all(&(self.data.len() as u32).to_le_bytes())?;
            // Uncompressed size
            header_buf.write_all(&self.header.uncompressed_size.to_le_bytes())?;
            // Filename size
            debug_assert!(self.header.filename.len() <= u16::MAX as usize);
            header_buf.write_all(&(self.header.filename.len() as u16).to_le_bytes())?;
            // extra field size
            header_buf.write_all(
                &self
                    .header
                    .extra_fields
                    .data_length::<false>()
                    .to_le_bytes(),
            )?;
        }

        buf.write_all(&header)?;

        // Filename
        buf.write_all(self.header.filename.as_bytes())?;
        // Extra field
        self.header.extra_fields.write::<_, false>(buf)?;

        // Data
        buf.write_all(&self.data)?;

        Ok(())
    }

    pub async fn write_local_file_header_and_data_with_tokio<W: AsyncWrite + Unpin>(
        &self,
        buf: &mut W,
    ) -> std::io::Result<()> {
        // Writing to a temporary in-memory statically sized array first
        let mut header = [0; Self::LOCAL_FILE_HEADER_LEN];
        {
            let mut header_buf: &mut [u8] = &mut header;

            // signature
            header_buf.write_all(&LOCAL_FILE_HEADER_SIGNATURE.to_le_bytes())?;
            // version needed to extract
            header_buf.write_all(&VERSION_NEEDED_TO_EXTRACT.to_le_bytes())?;
            // general purpose bit flag
            header_buf.write_all(&GENERAL_PURPOSE_BIT_FLAG.to_le_bytes())?;
            // compression type
            header_buf.write_all(&(self.header.compression_type as u16).to_le_bytes())?;
            // Last modification time // moved to extra fields
            header_buf.write_all(&0_u16.to_le_bytes())?;
            // Last modification date // moved to extra fields
            header_buf.write_all(&0_u16.to_le_bytes())?;
            // crc
            header_buf.write_all(&self.header.crc.to_le_bytes())?;
            // Compressed size
            debug_assert!(self.data.len() <= u32::MAX as usize);
            header_buf.write_all(&(self.data.len() as u32).to_le_bytes())?;
            // Uncompressed size
            header_buf.write_all(&self.header.uncompressed_size.to_le_bytes())?;
            // Filename size
            debug_assert!(self.header.filename.len() <= u16::MAX as usize);
            header_buf.write_all(&(self.header.filename.len() as u16).to_le_bytes())?;
            // extra field size
            header_buf.write_all(
                &self
                    .header
                    .extra_fields
                    .data_length::<false>()
                    .to_le_bytes(),
            )?;
        }

        {
            use tokio::io::AsyncWriteExt;
            buf.write_all(&header).await?;

            // Filename
            buf.write_all(self.header.filename.as_bytes()).await?;

            // Extra field
            self.header
                .extra_fields
                .write_with_tokio::<_, false>(buf)
                .await?;

            // Data
            buf.write_all(&self.data).await?;
        }

        Ok(())
    }

    #[inline]
    pub fn directory(
        mut name: String,
        extra_fields: ExtraFields,
        external_attributes: u16,
    ) -> Self {
        if !(name.ends_with('/') || name.ends_with('\\')) {
            name += "/"
        };
        Self {
            header: ZipFileHeader {
                compression_type: CompressionType::Stored,
                crc: 0,
                uncompressed_size: 0,
                filename: name,
                external_file_attributes: (external_attributes as u32) << 16,
                extra_fields,
            },
            data: vec![],
        }
    }
}

impl ZipFileNoData {
    const CENTRAL_DIR_ENTRY_LEN: usize = 46;

    pub fn write_central_directory_entry<W: Write>(&self, buf: &mut W) -> std::io::Result<()> {
        // Writing to a temporary in-memory statically sized array first
        let mut central_dir_entry_header = [0; Self::CENTRAL_DIR_ENTRY_LEN];
        {
            let mut central_dir_entry_buf: &mut [u8] = &mut central_dir_entry_header;

            // signature
            central_dir_entry_buf.write_all(&CENTRAL_FILE_HEADER_SIGNATURE.to_le_bytes())?;
            // version made by
            central_dir_entry_buf.write_all(&VERSION_MADE_BY.to_le_bytes())?;
            // version needed to extract
            central_dir_entry_buf.write_all(&VERSION_NEEDED_TO_EXTRACT.to_le_bytes())?;
            // general purpose bit flag
            central_dir_entry_buf.write_all(&GENERAL_PURPOSE_BIT_FLAG.to_le_bytes())?;
            // compression type
            central_dir_entry_buf
                .write_all(&(self.header.compression_type as u16).to_le_bytes())?;
            // Last modification time // moved to extra fields
            central_dir_entry_buf.write_all(&0_u16.to_le_bytes())?;
            // Last modification date // moved to extra fields
            central_dir_entry_buf.write_all(&0_u16.to_le_bytes())?;
            // crc
            central_dir_entry_buf.write_all(&self.header.crc.to_le_bytes())?;
            // Compressed size
            central_dir_entry_buf.write_all(&self.compressed_size.to_le_bytes())?;
            // Uncompressed size
            central_dir_entry_buf.write_all(&self.header.uncompressed_size.to_le_bytes())?;
            // Filename size
            debug_assert!(self.header.filename.len() <= u16::MAX as usize);
            central_dir_entry_buf.write_all(&(self.header.filename.len() as u16).to_le_bytes())?;
            // extra field size
            central_dir_entry_buf
                .write_all(&self.header.extra_fields.data_length::<true>().to_le_bytes())?;
            // comment size
            central_dir_entry_buf.write_all(&0_u16.to_le_bytes())?;
            // disk number start
            central_dir_entry_buf.write_all(&0_u16.to_le_bytes())?;
            // internal file attributes
            central_dir_entry_buf.write_all(&0_u16.to_le_bytes())?;
            // external file attributes
            central_dir_entry_buf.write_all(&self.header.external_file_attributes.to_le_bytes())?;
            // relative offset of local header
            central_dir_entry_buf.write_all(&self.local_header_offset.to_le_bytes())?;
        }

        buf.write_all(&central_dir_entry_header)?;

        // Filename
        buf.write_all(self.header.filename.as_bytes())?;
        // Extra field
        self.header.extra_fields.write::<_, true>(buf)?;

        Ok(())
    }

    pub async fn write_central_directory_entry_with_tokio<W: AsyncWrite + Unpin>(
        &self,
        buf: &mut W,
    ) -> std::io::Result<()> {
        // Writing to a temporary in-memory statically sized array first
        let mut central_dir_entry_header = [0; Self::CENTRAL_DIR_ENTRY_LEN];
        {
            let mut central_dir_entry_buf: &mut [u8] = &mut central_dir_entry_header;

            // signature
            central_dir_entry_buf.write_all(&CENTRAL_FILE_HEADER_SIGNATURE.to_le_bytes())?;
            // version made by
            central_dir_entry_buf.write_all(&VERSION_MADE_BY.to_le_bytes())?;
            // version needed to extract
            central_dir_entry_buf.write_all(&VERSION_NEEDED_TO_EXTRACT.to_le_bytes())?;
            // general purpose bit flag
            central_dir_entry_buf.write_all(&GENERAL_PURPOSE_BIT_FLAG.to_le_bytes())?;
            // compression type
            central_dir_entry_buf
                .write_all(&(self.header.compression_type as u16).to_le_bytes())?;
            // Last modification time // moved to extra fields
            central_dir_entry_buf.write_all(&0_u16.to_le_bytes())?;
            // Last modification date // moved to extra fields
            central_dir_entry_buf.write_all(&0_u16.to_le_bytes())?;
            // crc
            central_dir_entry_buf.write_all(&self.header.crc.to_le_bytes())?;
            // Compressed size
            central_dir_entry_buf.write_all(&self.compressed_size.to_le_bytes())?;
            // Uncompressed size
            central_dir_entry_buf.write_all(&self.header.uncompressed_size.to_le_bytes())?;
            // Filename size
            debug_assert!(self.header.filename.len() <= u16::MAX as usize);
            central_dir_entry_buf.write_all(&(self.header.filename.len() as u16).to_le_bytes())?;
            // extra field size
            central_dir_entry_buf
                .write_all(&self.header.extra_fields.data_length::<true>().to_le_bytes())?;
            // comment size
            central_dir_entry_buf.write_all(&0_u16.to_le_bytes())?;
            // disk number start
            central_dir_entry_buf.write_all(&0_u16.to_le_bytes())?;
            // internal file attributes
            central_dir_entry_buf.write_all(&0_u16.to_le_bytes())?;
            // external file attributes
            central_dir_entry_buf.write_all(&self.header.external_file_attributes.to_le_bytes())?;
            // relative offset of local header
            central_dir_entry_buf.write_all(&self.local_header_offset.to_le_bytes())?;
        }

        {
            use tokio::io::AsyncWriteExt;
            buf.write_all(&central_dir_entry_header).await?;

            // Filename
            buf.write_all(self.header.filename.as_bytes()).await?;
            // Extra field
        }
        self.header
            .extra_fields
            .write_with_tokio::<_, true>(buf)
            .await?;

        Ok(())
    }
}