remozipsy 0.0.1

zip implementation independent structs and helpers
Documentation
use std::collections::{HashMap, VecDeque};

use crate::model::{LocalFileInfo, RemoteFileInfo};

use super::download::Batch;

#[derive(Debug)]
pub(super) struct Compared {
    pub needs_download_batches: VecDeque<Batch>,
    pub needs_deletion:         Vec<LocalFileInfo>,
    pub needs_download_bytes:   u64,
    pub needs_persistent_bytes: u64,
}

pub(super) fn build_compared(remote: Vec<RemoteFileInfo>, local: Vec<LocalFileInfo>) -> Compared {
    let mut compare_map: HashMap<String, (Option<LocalFileInfo>, Option<RemoteFileInfo>)> = HashMap::new();

    for l in local {
        let _ = compare_map.entry(l.local_unix_path.clone()).or_insert((Some(l), None));
    }
    for r in remote {
        let e = compare_map.entry(r.file_name.clone()).or_insert((None, None));
        e.1 = Some(r);
    }

    let mut needs_download = Vec::new();
    let mut needs_deletion = Vec::new();
    let mut clean_bytes_total = 0;

    for value in compare_map.into_values() {
        match (value.0, value.1) {
            (None, Some(remote)) => {
                needs_download.push(remote);
            },
            (Some(local), None) => {
                needs_deletion.push(local);
            },
            (Some(local), Some(remote)) => {
                if local.crc32 == remote.crc32 {
                    clean_bytes_total += remote.compressed_size as u64;
                } else {
                    needs_download.push(remote);
                }
            },
            (None, None) => unreachable!(),
        }
    }

    needs_download.sort_by_key(|e| e.start_offset);

    let mut current_batch = VecDeque::new();
    let mut needs_download_batches = VecDeque::new();

    for rfi in needs_download.into_iter() {
        let start = rfi.start_offset;
        let end = rfi.end_offset_inclusive;
        let file = &rfi.file_name;
        let fits_in_current_batch = current_batch.back().is_none_or(|last: &RemoteFileInfo| {
            rfi.start_offset as u64 == (last.end_offset_inclusive.saturating_add(1))
        });
        tracing::trace!(?start, ?end, ?fits_in_current_batch, ?file, "evaluting");
        if !fits_in_current_batch {
            needs_download_batches.push_back(current_batch);
            current_batch = VecDeque::with_capacity(1);
        }
        current_batch.push_back(rfi);
    }

    // add last batch
    if !current_batch.is_empty() {
        needs_download_batches.push_back(current_batch);
    }

    let needs_download_bytes = needs_download_batches
        .iter()
        // since batches are sorted we can take the first and last elements
        // instead of doing max and min
        .map(|batch| {
            let Some(first) = batch.front() else { return 0 };
            let Some(last) = batch.back() else { return 0 };
            (last.end_offset_inclusive .saturating_add(1)) - first.start_offset as u64
        })
        .sum();

    let needs_persistent_bytes = needs_download_batches
        .iter()
        .map(|batch| batch.iter().map(|f| f.uncompressed_size as u64).sum::<u64>())
        .sum();

    tracing::debug!(?clean_bytes_total, ?needs_persistent_bytes, "compare bytes size");

    Compared {
        needs_download_batches,
        needs_deletion,
        needs_download_bytes,
        needs_persistent_bytes,
    }
}