use super::constants::{PACK_SMALL_FILE_THRESHOLD, PACK_TARGET};
#[derive(Debug, Clone)]
pub struct Pack {
pub data: Vec<u8>,
pub members: Vec<(String, u64, u64)>,
}
pub fn is_small(len: u64) -> bool {
(len as usize) < PACK_SMALL_FILE_THRESHOLD
}
pub fn assemble_packs(mut small: Vec<(String, Vec<u8>)>) -> Vec<Pack> {
small.sort_by(|a, b| a.0.cmp(&b.0));
let mut packs = Vec::new();
let mut data: Vec<u8> = Vec::new();
let mut members: Vec<(String, u64, u64)> = Vec::new();
for (path, bytes) in &small {
let start = data.len() as u64;
data.extend_from_slice(bytes);
let len = bytes.len() as u64;
members.push((path.clone(), start, len));
if data.len() >= PACK_TARGET {
packs.push(Pack {
data: std::mem::take(&mut data),
members: std::mem::take(&mut members),
});
}
}
if !members.is_empty() {
packs.push(Pack { data, members });
}
packs
}
pub fn cover(
chunk_ranges: &[(usize, usize)],
fstart: u64,
flen: u64,
) -> Result<(Vec<usize>, u64), String> {
if flen == 0 {
return Ok((Vec::new(), 0));
}
let fend = fstart + flen;
let mut cov = Vec::new();
let mut first: Option<u64> = None;
for (i, (cstart, cend)) in chunk_ranges.iter().enumerate() {
let (cs, ce) = (*cstart as u64, *cend as u64);
if cs < fend && fstart < ce {
if first.is_none() {
first = Some(cs);
}
cov.push(i);
}
}
let fc = first.ok_or_else(|| "Packing failed to cover file".to_string())?;
Ok((cov, fstart - fc))
}
#[cfg(test)]
mod tests {
use super::*;
use crate::v3::chunking::{chunk_ranges_with, CdcBounds};
#[test]
fn small_threshold() {
assert!(is_small(0));
assert!(is_small((PACK_SMALL_FILE_THRESHOLD - 1) as u64));
assert!(!is_small(PACK_SMALL_FILE_THRESHOLD as u64));
}
#[test]
fn packs_are_path_sorted_and_offsets_contiguous() {
let small = vec![
("b.txt".to_string(), vec![1u8; 10]),
("a.txt".to_string(), vec![2u8; 20]),
("c.txt".to_string(), vec![3u8; 5]),
];
let packs = assemble_packs(small);
assert_eq!(packs.len(), 1, "well under PACK_TARGET -> one pack");
let p = &packs[0];
assert_eq!(p.members[0].0, "a.txt");
assert_eq!(p.members[1].0, "b.txt");
assert_eq!(p.members[2].0, "c.txt");
assert_eq!(p.members[0], ("a.txt".to_string(), 0, 20));
assert_eq!(p.members[1], ("b.txt".to_string(), 20, 10));
assert_eq!(p.members[2], ("c.txt".to_string(), 30, 5));
assert_eq!(p.data.len(), 35);
}
#[test]
fn pack_flushes_at_target() {
let chunk = PACK_SMALL_FILE_THRESHOLD - 1; let small = vec![
("a".to_string(), vec![0u8; chunk]),
("b".to_string(), vec![0u8; chunk]),
("c".to_string(), vec![0u8; chunk]),
];
let total = 3 * chunk;
let packs = assemble_packs(small);
let summed: usize = packs.iter().map(|p| p.data.len()).sum();
assert_eq!(summed, total, "no bytes lost across flush boundary");
for p in &packs[..packs.len() - 1] {
assert!(p.data.len() >= PACK_TARGET);
}
}
#[test]
fn cover_maps_spans_to_chunks() {
let mut data = vec![0u8; 6 * 1024 * 1024];
let mut x = 0x1234_5678_9abc_def0u64;
for b in data.iter_mut() {
x ^= x << 13;
x ^= x >> 7;
x ^= x << 17;
*b = (x & 0xff) as u8;
}
let ranges = chunk_ranges_with(&data, &CdcBounds::defaults());
assert!(ranges.len() > 1);
assert_eq!(cover(&ranges, 100, 0).unwrap(), (Vec::new(), 0));
let (first_s, first_e) = ranges[0];
let fstart = (first_s + 10) as u64;
let flen = ((first_e - first_s) / 4) as u64;
let (cov, off) = cover(&ranges, fstart, flen).unwrap();
assert_eq!(cov, vec![0]);
assert_eq!(off, 10);
let fstart2 = (first_e - 5) as u64;
let flen2 = 10u64;
let (cov2, off2) = cover(&ranges, fstart2, flen2).unwrap();
assert_eq!(cov2, vec![0, 1]);
assert_eq!(off2, fstart2 - first_s as u64);
}
}