remozipsy 0.2.0

Remote Zip Sync - sync remote zip to local fs
Documentation
use std::{fmt::Debug, future::Future, ops::RangeInclusive, pin::Pin};

use bytes::Bytes;
use zip_core::{
    Signature,
    raw::{
        CentralDirectoryHeader, EndOfCentralDirectory, EndOfCentralDirectoryFixed,
        parse::{Parse, find_next_signature},
    },
};

use crate::model::RemoteFileInfo;

#[derive(Debug, thiserror::Error)]
pub enum RemoteFetchError<E: Debug> {
    #[error("Error during fetch: {0}")]
    Fetch(#[from] E),
    #[error("Filename contains invalid UTF-8 or zero byte")]
    InvalidFileName,
    #[error("Remote Zip or configuration invalid, no EOCD found")]
    NoEocdFound,
    #[error("Remote Zip invalid, invalid CentralDirectoryHeader signature")]
    InvalidCentralDirectoryHeaderSignature,
    #[error("Remote Zip invalid, no CentralDirectoryHeaders found")]
    NoCentralDirectoryHeaderFound,
}

/// Helper function to read [`RemoteFileInfo`] if you can provide zip size and a
/// fetch function.
///
/// max_eocd_size: We have to guess the start of the EOCD, increasing this
/// number means we might fetch more unnecessary bytes, however if it's
/// too small we might miss it.
pub async fn fetch_remote_file_info<F, FE>(
    zip_size: usize,
    max_eocd_size: usize,
    mut fetch: F,
) -> Result<Vec<RemoteFileInfo>, RemoteFetchError<FE>>
where
    FE: Debug,
    F: FnMut(RangeInclusive<usize>) -> Pin<Box<dyn Future<Output = Result<Bytes, FE>> + Send>>,
{
    let eocd = download_eocd::<F, FE>(zip_size, max_eocd_size, &mut fetch).await?;
    let cds = download_cds::<F, FE>(&mut fetch, &eocd).await?;

    tracing::trace!(?eocd, "eocd information");

    cds.into_iter()
        .filter(|cd| cd.fixed.compressed_size != 0) // ignore directories
        .map(|cd| {
            let file_name = String::from_utf8(cd.file_name.clone()).map_err(|_| RemoteFetchError::InvalidFileName)?;
            if file_name.contains('\0') {
                return Err(RemoteFetchError::InvalidFileName);
            }

            Ok(RemoteFileInfo {
                crc32: cd.fixed.crc_32,
                compressed_size: cd.fixed.compressed_size,
                uncompressed_size: cd.fixed.uncompressed_size,
                compression_method: cd.fixed.compression_method,
                file_name,
                start_offset: cd.fixed.relative_offset_of_local_header,
                extra_field_length: cd.fixed.extra_field_length,
                file_name_length: cd.fixed.file_name_length,
                offset_of_start_of_central_directory_with_respect_to_the_starting_disk_number: eocd
                    .fixed
                    .offset_of_start_of_central_directory_with_respect_to_the_starting_disk_number,
            })
        })
        .collect::<Result<Vec<_>, RemoteFetchError<FE>>>()
}

async fn download_eocd<F, FE>(
    zip_size: usize,
    max_eocd_size: usize,
    fetch: &mut F,
) -> Result<EndOfCentralDirectory, RemoteFetchError<FE>>
where
    FE: Debug,
    F: FnMut(RangeInclusive<usize>) -> Pin<Box<dyn Future<Output = Result<Bytes, FE>> + Send>>,
{
    let approx_eocd_start = zip_size.saturating_sub(max_eocd_size);

    let eocd_bytes = fetch(approx_eocd_start..=zip_size.saturating_sub(1)).await?;

    let pos = find_next_signature(
        &eocd_bytes,
        EndOfCentralDirectoryFixed::END_OF_CENTRAL_DIR_SIGNATURE.to_le_bytes(),
    )
    .ok_or(RemoteFetchError::NoEocdFound)?;
    let mut buf = &eocd_bytes[pos..];
    EndOfCentralDirectory::from_buf(&mut buf).map_err(|_| RemoteFetchError::NoEocdFound)
}

async fn download_cds<F, FE>(
    fetch: &mut F,
    eocd: &EndOfCentralDirectory,
) -> Result<Vec<CentralDirectoryHeader>, RemoteFetchError<FE>>
where
    FE: Debug,
    F: FnMut(RangeInclusive<usize>) -> Pin<Box<dyn Future<Output = Result<Bytes, FE>> + Send>>,
{
    let cd_start = eocd
        .fixed
        .offset_of_start_of_central_directory_with_respect_to_the_starting_disk_number as usize;
    let cd_end = cd_start
        .saturating_add(eocd.fixed.size_of_the_central_directory as usize)
        .saturating_sub(1);

    let cds_bytes = fetch(cd_start..=cd_end).await?;

    let mut buf = &cds_bytes[..];
    let mut cds = Vec::new();
    while let Ok(cd) = CentralDirectoryHeader::from_buf(&mut buf) {
        if !cd.is_valid_signature() {
            return Err(RemoteFetchError::InvalidCentralDirectoryHeaderSignature);
        }
        cds.push(cd);
    }

    if cds.is_empty() {
        return Err(RemoteFetchError::NoCentralDirectoryHeaderFound);
    }

    Ok(cds)
}

#[cfg(test)]
mod tests {
    use super::*;

    const ZIPFILE: &[u8] = include_bytes!("../../tests/testfiles/example1.zip");

    #[test]
    fn test_helper_fetch_remote_file_info() {
        let bytes = Bytes::from_static(ZIPFILE);
        let zip_len = bytes.len();

        let max_eocd_size: usize = 10_000.min(zip_len);

        let rt = tokio::runtime::Builder::new_current_thread()
            .enable_all()
            .build()
            .unwrap();

        let files = rt.block_on(async move {
            let bytes = bytes.clone();
            fetch_remote_file_info(zip_len, max_eocd_size, move |range| {
                let bytes = bytes.slice(range);
                Box::pin(async move { Result::<_, ()>::Ok(bytes) })
            })
            .await
        });

        assert!(files.is_ok());
        let files = files.unwrap();

        assert_eq!(files[0], RemoteFileInfo {
            crc32: 1320751325,
            compressed_size: 106,
            uncompressed_size: 561,
            compression_method: 8,
            file_name: "directory/subfile.txt".to_string(),
            start_offset: 68,
            file_name_length: 21,
            extra_field_length: 24,
            offset_of_start_of_central_directory_with_respect_to_the_starting_disk_number: 13091,
        });

        assert_eq!(&files[1].file_name, "stored.txt");
        assert_eq!(files[1].compression_method, 0);

        assert_eq!(&files[2].file_name, "deflate.txt");
        assert_eq!(files[2].compression_method, 8);

        assert_eq!(files.len(), 5);
    }
}