remozipsy 0.2.0

Remote Zip Sync - sync remote zip to local fs
Documentation
use zip_core::{raw::LocalFileHeaderFixed, structs::CompressionMethod};

use crate::model::{Config, Error, RemoteFileInfo};

use super::{FileSystem, RemoteZip};

/// A separate enum for the supported compression methods lets us rely on the
/// check made during construction later on, during file extraction.
#[cfg_attr(test, derive(Clone))]
#[derive(Debug)]
pub(super) enum SupportedCompressionMethod {
    Stored,
    #[cfg(feature = "deflate")]
    Deflated,
}

impl TryFrom<CompressionMethod> for SupportedCompressionMethod {
    type Error = CompressionMethod;

    fn try_from(value: CompressionMethod) -> Result<Self, Self::Error> {
        match value {
            CompressionMethod::Stored => Ok(Self::Stored),
            #[cfg(feature = "deflate")]
            CompressionMethod::Deflated => Ok(Self::Deflated),
            v => Err(v),
        }
    }
}

/// We need to calculate the `end_offset_inclusive` ourselves, so we clone this
/// struct.
#[cfg_attr(test, derive(Clone))]
#[derive(Debug)]
pub(super) struct ProcessedRemoteFileInfo {
    pub(super) crc32: u32,
    pub(super) compressed_size: u32,
    pub(super) uncompressed_size: u32,
    pub(super) compression_method: SupportedCompressionMethod,
    pub(super) file_name: String,
    pub(super) start_offset: u32,
    /// Calculated, it's not simply `start_offset + compressed_size` because the
    /// `start_offset` points to the LocalHeader of unknown size.
    /// This is inclusive end!
    /// It's permitted for this to include too many bytes, but never too few.
    pub(super) end_offset_inclusive: u64,
}

pub(super) async fn rfile_infos<R, F>(
    remote: &R,
    config: &Config,
) -> Result<Vec<ProcessedRemoteFileInfo>, Error<R::Error, F::Error>>
where
    R: RemoteZip,
    F: FileSystem,
{
    let mut rfi = remote.fetch_remote_file_info().await.map_err(Error::Remote)?;

    rfi.sort_by_key(|e| e.start_offset);

    let build_rfi =
        |rfi: &RemoteFileInfo, end_offset: u64| -> Result<ProcessedRemoteFileInfo, Error<R::Error, F::Error>> {
            Ok(ProcessedRemoteFileInfo {
                crc32: rfi.crc32,
                compressed_size: rfi.compressed_size,
                uncompressed_size: rfi.uncompressed_size,
                compression_method: CompressionMethod::try_from(rfi.compression_method)
                    .map_err(Error::InvalidCompressionMethod)?
                    .try_into()
                    .map_err(|e| Error::UnsupportedCompressionMethod(format!("{:?}", e), rfi.compression_method))?,
                file_name: rfi.file_name.clone(),
                start_offset: rfi.start_offset,
                end_offset_inclusive: end_offset,
            })
        };
    let assume_size = |rfi: &RemoteFileInfo| -> u64 {
        rfi.start_offset as u64
            + LocalFileHeaderFixed::SIZE_IN_BYTES as u64
            + rfi.file_name_length as u64
            + rfi.extra_field_length as u64
            + rfi.compressed_size as u64
            - 1
    };

    let mut rfiles = rfi
        .windows(2)
        .filter_map(|slice| {
            let rfi = &slice[0];
            if rfi.compressed_size == 0 {
                None // ignore directories
            } else {
                let next_rfi = &slice[1];
                let max_possible_end = (next_rfi.start_offset as u64).saturating_sub(1);
                let end = if config.assume_cd_contains_lh_content {
                    assume_size(rfi).min(max_possible_end)
                } else {
                    max_possible_end
                };
                Some(build_rfi(rfi, end))
            }
        })
        .collect::<Result<Vec<_>, Error<R::Error, F::Error>>>()?;

    // Due to the nature of windows() we ALWAYS need to add an entry for the last
    // element
    if let Some(last) = rfi.last()
        && last.compressed_size != 0
    {
        rfiles.push(build_rfi(
            last,
            if config.assume_cd_contains_lh_content {
                assume_size(last)
            } else {
                last.offset_of_start_of_central_directory_with_respect_to_the_starting_disk_number as u64
            },
        )?);
    }

    Ok(rfiles)
}

#[cfg(test)]
mod tests {

    use super::*;
    use crate::RemoteFileInfo;
    use bytes::Bytes;
    use std::ops::RangeInclusive;

    #[test]
    fn test_rfile_infos_end_correct() {
        const ZIPFILE: &[u8] = include_bytes!("../../../tests/testfiles/example1.zip");

        pub(crate) struct DummyRemoteZip {}
        impl RemoteZip for DummyRemoteZip {
            type Error = ();

            async fn fetch_remote_file_info(&self) -> Result<Vec<RemoteFileInfo>, ()> {
                let zip_size = ZIPFILE.len();

                async fn fetch_internal(range: RangeInclusive<usize>) -> Result<Bytes, ()> {
                    Ok(Bytes::from_static(ZIPFILE).slice(range))
                }

                super::super::super::remote_file_info::fetch_remote_file_info(zip_size, 10_000, |range| {
                    Box::pin(fetch_internal(range))
                })
                .await
                .map_err(|_| ())
            }

            async fn fetch_bytes_stream(
                &self,
                range: RangeInclusive<usize>,
            ) -> Result<impl futures_lite::Stream<Item = Result<Bytes, ()>> + std::marker::Send, ()> {
                Ok(futures_lite::stream::once(Ok(Bytes::from_static(ZIPFILE).slice(range))))
            }
        }

        impl FileSystem for DummyRemoteZip {
            type Error = ();
            type StorePrepare = ();

            async fn all_files(&mut self) -> Result<Vec<crate::FileInfo>, Self::Error> { unimplemented!() }

            async fn delete_file(&self, _: crate::FileInfo) -> Result<(), Self::Error> { unimplemented!() }

            async fn prepare_store_file(&self, _: crate::FileInfo) -> Result<Self::StorePrepare, Self::Error> {
                unimplemented!()
            }

            async fn store_file(&self, _: Self::StorePrepare, _: Bytes) -> Result<(), Self::Error> { unimplemented!() }
        }

        let remote = DummyRemoteZip {};
        let config = Config {
            assume_cd_contains_lh_content: true,
            ..Default::default()
        };
        let rt = tokio::runtime::Builder::new_current_thread()
            .enable_all()
            .build()
            .unwrap();

        let rfi = rt
            .block_on(rfile_infos::<DummyRemoteZip, DummyRemoteZip>(&remote, &config))
            .expect("could not extract rfile_infos from integrated test zip");

        assert_eq!(rfi.len(), 5);
        let start_offsets = [
            rfi[0].start_offset,
            rfi[1].start_offset,
            rfi[2].start_offset,
            rfi[3].start_offset,
            rfi[4].start_offset,
        ];
        let end_offsets_inclusive = [
            rfi[0].end_offset_inclusive,
            rfi[1].end_offset_inclusive,
            rfi[2].end_offset_inclusive,
            rfi[3].end_offset_inclusive,
            rfi[4].end_offset_inclusive,
        ];
        assert_eq!(start_offsets, [68, 253, 333, 508, 820]);
        assert_eq!(end_offsets_inclusive, [248, 328, 503, 815, 13086]);
    }
}