use crate::vfs::VFS;
use rayon::prelude::*;
use std::{
collections::HashMap,
hash::BuildHasher,
io::{self, BufReader, Read},
path::{Path, PathBuf},
time::SystemTime,
};
use walkdir::WalkDir;
pub type Snapshot = HashMap<PathBuf, [u8; 32]>;
#[derive(Debug, Clone)]
pub struct SnapshotEntry {
pub hash: [u8; 32],
pub size: u64,
pub modified: Option<SystemTime>,
}
pub type MetadataSnapshot = HashMap<PathBuf, SnapshotEntry>;
pub fn run_setup(
vfs: &VFS,
merged_dir: &Path,
use_hardlinks: bool,
) -> io::Result<(usize, Snapshot)> {
if merged_dir.exists() {
std::fs::remove_dir_all(merged_dir)?;
}
std::fs::create_dir_all(merged_dir)?;
let count = vfs.dump_to_directory_strict(merged_dir, use_hardlinks)?;
let baseline = snapshot_directory(merged_dir)?;
Ok((count, baseline))
}
pub fn run_setup_tracked(
vfs: &VFS,
merged_dir: &Path,
use_hardlinks: bool,
) -> io::Result<(usize, MetadataSnapshot)> {
if merged_dir.exists() {
std::fs::remove_dir_all(merged_dir)?;
}
std::fs::create_dir_all(merged_dir)?;
let count = vfs.dump_to_directory_strict(merged_dir, use_hardlinks)?;
let baseline = snapshot_directory_metadata(merged_dir)?;
Ok((count, baseline))
}
pub fn run_finalize(
merged_dir: &Path,
baseline: &Snapshot,
output_dir: &Path,
) -> io::Result<Vec<(PathBuf, PathBuf)>> {
let changed = changed_files(merged_dir, baseline)?;
let mut copied = Vec::new();
for rel in changed {
let dest = output_dir.join(&rel);
if let Some(parent) = dest.parent() {
std::fs::create_dir_all(parent)?;
}
std::fs::copy(merged_dir.join(&rel), &dest)?;
copied.push((rel, dest));
}
Ok(copied)
}
pub fn run_finalize_tracked(
merged_dir: &Path,
baseline: &MetadataSnapshot,
output_dir: &Path,
) -> io::Result<Vec<(PathBuf, PathBuf)>> {
let changed = changed_files_metadata(merged_dir, baseline)?;
let mut copied = Vec::new();
for rel in changed {
let dest = output_dir.join(&rel);
if let Some(parent) = dest.parent() {
std::fs::create_dir_all(parent)?;
}
std::fs::copy(merged_dir.join(&rel), &dest)?;
copied.push((rel, dest));
}
Ok(copied)
}
pub fn hash_file(path: &Path) -> io::Result<[u8; 32]> {
let file = std::fs::File::open(path)?;
let mut reader = BufReader::new(file);
let mut hasher = blake3::Hasher::new();
let mut buf = vec![0u8; 65536];
loop {
let n = reader.read(&mut buf)?;
if n == 0 {
break;
}
hasher.update(&buf[..n]);
}
Ok(*hasher.finalize().as_bytes())
}
fn snapshot_entry(path: &Path) -> io::Result<SnapshotEntry> {
let metadata = std::fs::metadata(path)?;
Ok(SnapshotEntry {
hash: hash_file(path)?,
size: metadata.len(),
modified: metadata.modified().ok(),
})
}
pub fn snapshot_directory(dir: &Path) -> io::Result<Snapshot> {
WalkDir::new(dir)
.into_iter()
.filter_map(|entry| match entry.map_err(io::Error::other) {
Ok(entry) if entry.file_type().is_file() => Some(Ok(entry)),
Ok(_) => None,
Err(err) => Some(Err(err)),
})
.par_bridge()
.map(|entry| {
let entry = entry?;
let rel = entry
.path()
.strip_prefix(dir)
.map_err(|_| io::Error::other("walkdir entry should be under root"))?
.to_path_buf();
let hash = hash_file(entry.path())?;
Ok((rel, hash))
})
.collect::<io::Result<Snapshot>>()
}
pub fn snapshot_directory_metadata(dir: &Path) -> io::Result<MetadataSnapshot> {
WalkDir::new(dir)
.into_iter()
.filter_map(|entry| match entry.map_err(io::Error::other) {
Ok(entry) if entry.file_type().is_file() => Some(Ok(entry)),
Ok(_) => None,
Err(err) => Some(Err(err)),
})
.par_bridge()
.map(|entry| {
let entry = entry?;
let rel = entry
.path()
.strip_prefix(dir)
.map_err(|_| io::Error::other("walkdir entry should be under root"))?
.to_path_buf();
let snapshot = snapshot_entry(entry.path())?;
Ok((rel, snapshot))
})
.collect::<io::Result<MetadataSnapshot>>()
}
pub fn changed_files<S: BuildHasher + Sync>(
dir: &Path,
baseline: &HashMap<PathBuf, [u8; 32], S>,
) -> io::Result<Vec<PathBuf>> {
let mut changed = WalkDir::new(dir)
.into_iter()
.filter_map(|entry| match entry.map_err(io::Error::other) {
Ok(entry) if entry.file_type().is_file() => Some(Ok(entry)),
Ok(_) => None,
Err(err) => Some(Err(err)),
})
.par_bridge()
.filter_map(|entry| {
let entry = match entry {
Ok(entry) => entry,
Err(err) => return Some(Err(err)),
};
let rel = match entry.path().strip_prefix(dir) {
Ok(path) => path.to_path_buf(),
Err(_) => return Some(Err(io::Error::other("walkdir entry should be under root"))),
};
let hash = match hash_file(entry.path()) {
Ok(h) => h,
Err(e) => return Some(Err(e)),
};
let is_changed = match baseline.get(&rel) {
None => true,
Some(bh) => &hash != bh,
};
if is_changed { Some(Ok(rel)) } else { None }
})
.collect::<io::Result<Vec<_>>>()?;
changed.sort();
Ok(changed)
}
pub fn changed_files_metadata<S: BuildHasher + Sync>(
dir: &Path,
baseline: &HashMap<PathBuf, SnapshotEntry, S>,
) -> io::Result<Vec<PathBuf>> {
let mut changed = WalkDir::new(dir)
.into_iter()
.filter_map(|entry| match entry.map_err(io::Error::other) {
Ok(entry) if entry.file_type().is_file() => Some(Ok(entry)),
Ok(_) => None,
Err(err) => Some(Err(err)),
})
.par_bridge()
.filter_map(|entry| {
let entry = match entry {
Ok(entry) => entry,
Err(err) => return Some(Err(err)),
};
let rel = match entry.path().strip_prefix(dir) {
Ok(path) => path.to_path_buf(),
Err(_) => return Some(Err(io::Error::other("walkdir entry should be under root"))),
};
let Some(baseline_entry) = baseline.get(&rel) else {
return Some(Ok(rel));
};
let hash = match hash_file(entry.path()) {
Ok(hash) => hash,
Err(err) => return Some(Err(err)),
};
(hash != baseline_entry.hash).then_some(Ok(rel))
})
.collect::<io::Result<Vec<_>>>()?;
changed.sort();
Ok(changed)
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
#[cfg(feature = "zip")]
use std::io::Write as IoWrite;
struct TempDir(PathBuf);
impl TempDir {
fn new(name: &str) -> Self {
let dir = std::env::temp_dir().join(format!(
"{name}_{}_{}",
std::process::id(),
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_nanos()
));
fs::create_dir_all(&dir).unwrap();
Self(dir)
}
fn path(&self) -> &Path {
&self.0
}
fn write(&self, rel: &str, data: &[u8]) -> PathBuf {
let target = self.0.join(rel);
fs::create_dir_all(target.parent().unwrap()).unwrap();
fs::write(&target, data).unwrap();
target
}
#[cfg(feature = "zip")]
fn create_zip(&self, filename: &str, entries: &[(&str, &[u8])]) -> PathBuf {
let path = self.0.join(filename);
let file = fs::File::create(&path).unwrap();
let mut zip = zip::ZipWriter::new(file);
let options = zip::write::SimpleFileOptions::default()
.compression_method(zip::CompressionMethod::Stored);
for (name, data) in entries {
zip.start_file(*name, options).unwrap();
zip.write_all(data).unwrap();
}
zip.finish().unwrap();
path
}
}
impl Drop for TempDir {
fn drop(&mut self) {
let _ = fs::remove_dir_all(&self.0);
}
}
#[test]
fn snapshot_empty_dir() {
let dir = TempDir::new("runtest_snapshot_empty");
let map = snapshot_directory(dir.path()).unwrap();
assert!(map.is_empty());
}
#[test]
fn snapshot_captures_all_files() {
let dir = TempDir::new("runtest_snapshot_all");
dir.write("a.txt", b"hello");
dir.write("sub/b.txt", b"world");
let map = snapshot_directory(dir.path()).unwrap();
assert_eq!(map.len(), 2);
assert!(map.contains_key(Path::new("a.txt")));
assert!(map.contains_key(&PathBuf::from("sub").join("b.txt")));
}
#[test]
fn snapshot_hash_is_content_based() {
let dir = TempDir::new("runtest_snapshot_content");
let p1 = dir.write("file1.txt", b"aaa");
let p2 = dir.write("file2.txt", b"bbb");
assert_eq!(
fs::metadata(&p1).unwrap().len(),
fs::metadata(&p2).unwrap().len(),
);
let h1 = hash_file(&p1).unwrap();
let h2 = hash_file(&p2).unwrap();
assert_ne!(h1, h2);
}
#[test]
fn changed_files_new_file() {
let dir = TempDir::new("runtest_changed_new");
let baseline = snapshot_directory(dir.path()).unwrap();
dir.write("new.txt", b"hello");
let changed = changed_files(dir.path(), &baseline).unwrap();
assert_eq!(changed, vec![PathBuf::from("new.txt")]);
}
#[test]
fn changed_files_modified_content() {
let dir = TempDir::new("runtest_changed_modified");
dir.write("f.txt", b"aaa");
let baseline = snapshot_directory(dir.path()).unwrap();
dir.write("f.txt", b"bbb");
let changed = changed_files(dir.path(), &baseline).unwrap();
assert!(changed.contains(&PathBuf::from("f.txt")));
}
#[test]
fn changed_files_unchanged() {
let dir = TempDir::new("runtest_changed_unchanged");
dir.write("f.txt", b"hello");
let baseline = snapshot_directory(dir.path()).unwrap();
let changed = changed_files(dir.path(), &baseline).unwrap();
assert!(changed.is_empty());
}
#[test]
fn changed_files_deleted_not_reported() {
let dir = TempDir::new("runtest_changed_deleted");
dir.write("to_delete.txt", b"x");
let baseline = snapshot_directory(dir.path()).unwrap();
fs::remove_file(dir.path().join("to_delete.txt")).unwrap();
let changed = changed_files(dir.path(), &baseline).unwrap();
assert!(!changed.contains(&PathBuf::from("to_delete.txt")));
}
#[test]
fn changed_files_empty_baseline() {
let dir = TempDir::new("runtest_changed_empty_baseline");
dir.write("a.txt", b"x");
dir.write("b.txt", b"y");
let changed = changed_files(dir.path(), &HashMap::new()).unwrap();
assert_eq!(changed.len(), 2);
}
#[test]
fn changed_files_are_sorted() {
let dir = TempDir::new("runtest_changed_sorted");
dir.write("z.txt", b"z");
dir.write("a.txt", b"a");
dir.write("m.txt", b"m");
let changed = changed_files(dir.path(), &HashMap::new()).unwrap();
assert_eq!(
changed,
vec![
PathBuf::from("a.txt"),
PathBuf::from("m.txt"),
PathBuf::from("z.txt")
]
);
}
#[test]
fn metadata_snapshot_captures_hash_and_size() {
let dir = TempDir::new("runtest_metadata_snapshot");
dir.write("a.txt", b"hello");
let snapshot = snapshot_directory_metadata(dir.path()).unwrap();
let entry = snapshot.get(Path::new("a.txt")).unwrap();
assert_eq!(entry.size, 5);
assert_eq!(entry.hash, hash_file(&dir.path().join("a.txt")).unwrap());
}
#[test]
fn changed_files_metadata_reports_new_files() {
let dir = TempDir::new("runtest_metadata_changed_new");
let baseline = snapshot_directory_metadata(dir.path()).unwrap();
dir.write("new.txt", b"hello");
let changed = changed_files_metadata(dir.path(), &baseline).unwrap();
assert_eq!(changed, vec![PathBuf::from("new.txt")]);
}
#[test]
fn changed_files_metadata_ignores_unchanged_files() {
let dir = TempDir::new("runtest_metadata_changed_unchanged");
dir.write("same.txt", b"hello");
let baseline = snapshot_directory_metadata(dir.path()).unwrap();
let changed = changed_files_metadata(dir.path(), &baseline).unwrap();
assert!(changed.is_empty());
}
#[test]
fn changed_files_metadata_reports_modified_size() {
let dir = TempDir::new("runtest_metadata_changed_size");
dir.write("f.txt", b"hello");
let baseline = snapshot_directory_metadata(dir.path()).unwrap();
dir.write("f.txt", b"hello world");
let changed = changed_files_metadata(dir.path(), &baseline).unwrap();
assert_eq!(changed, vec![PathBuf::from("f.txt")]);
}
#[test]
fn changed_files_metadata_reports_same_size_content_change() {
let dir = TempDir::new("runtest_metadata_changed_same_size");
dir.write("f.txt", b"hello");
let baseline = snapshot_directory_metadata(dir.path()).unwrap();
dir.write("f.txt", b"jello");
let changed = changed_files_metadata(dir.path(), &baseline).unwrap();
assert_eq!(changed, vec![PathBuf::from("f.txt")]);
}
#[test]
fn run_setup_creates_merged_dir() {
let src = TempDir::new("run_new_setup_src");
src.write("file.txt", b"hello");
let vfs = VFS::from_directories(vec![src.path()], None);
let base = TempDir::new("run_new_setup_base");
let merged = base.path().join("merged_does_not_exist");
assert!(!merged.exists());
run_setup(&vfs, &merged, false).unwrap();
assert!(merged.exists(), "run_setup should create merged_dir");
}
#[test]
fn run_setup_count_matches_vfs_size() {
let src = TempDir::new("run_new_setup_count_src");
src.write("a.txt", b"1");
src.write("b.txt", b"2");
src.write("sub/c.txt", b"3");
let vfs = VFS::from_directories(vec![src.path()], None);
let merged = TempDir::new("run_new_setup_count_merged");
let (count, _) = run_setup(&vfs, merged.path(), false).unwrap();
assert_eq!(count, 3);
}
#[test]
fn run_setup_returns_non_empty_snapshot() {
let src = TempDir::new("run_new_setup_snap_src");
src.write("file.txt", b"data");
let vfs = VFS::from_directories(vec![src.path()], None);
let merged = TempDir::new("run_new_setup_snap_merged");
let (_, snapshot) = run_setup(&vfs, merged.path(), false).unwrap();
assert!(
!snapshot.is_empty(),
"snapshot should contain entries after setup"
);
}
#[test]
fn run_setup_tracked_returns_metadata_snapshot() {
let src = TempDir::new("run_new_setup_tracked_src");
src.write("file.txt", b"data");
let vfs = VFS::from_directories(vec![src.path()], None);
let merged = TempDir::new("run_new_setup_tracked_merged");
let (_, snapshot) = run_setup_tracked(&vfs, merged.path(), false).unwrap();
assert!(snapshot.contains_key(Path::new("file.txt")));
}
#[test]
#[cfg(feature = "zip")]
fn run_setup_tracked_fails_when_archive_entry_cannot_be_materialized() {
let src = TempDir::new("run_setup_tracked_archive_failure_src");
let oversized = [b'x'; 65];
src.create_zip("data.zip", &[("big.bin", oversized.as_slice())]);
let vfs = VFS::from_directories(vec![src.path()], Some(vec!["data.zip"]));
let merged = TempDir::new("run_setup_tracked_archive_failure_merged");
let err = run_setup_tracked(&vfs, merged.path(), false)
.expect_err("archive read failure must abort run setup");
assert_eq!(err.kind(), io::ErrorKind::OutOfMemory);
}
#[test]
fn run_setup_clears_preexisting_merged_files() {
let src = TempDir::new("run_new_setup_existing_src");
src.write("file.txt", b"data");
let vfs = VFS::from_directories(vec![src.path()], None);
let merged = TempDir::new("run_new_setup_existing_merged");
merged.write("preexisting.txt", b"keep");
let (_, snapshot) = run_setup(&vfs, merged.path(), false).unwrap();
assert!(snapshot.contains_key(Path::new("file.txt")));
assert!(!snapshot.contains_key(Path::new("preexisting.txt")));
assert!(!merged.path().join("preexisting.txt").exists());
}
#[test]
fn run_finalize_empty_when_nothing_changed() {
let src = TempDir::new("run_new_finalize_nochange_src");
src.write("file.txt", b"data");
let vfs = VFS::from_directories(vec![src.path()], None);
let merged = TempDir::new("run_new_finalize_nochange_merged");
let (_, baseline) = run_setup(&vfs, merged.path(), false).unwrap();
let output = TempDir::new("run_new_finalize_nochange_out");
let copied = run_finalize(merged.path(), &baseline, output.path()).unwrap();
assert!(
copied.is_empty(),
"nothing changed so nothing should be copied"
);
}
#[test]
fn run_finalize_copies_modified_file() {
let src = TempDir::new("run_new_finalize_mod_src");
src.write("file.txt", b"original");
let vfs = VFS::from_directories(vec![src.path()], None);
let merged = TempDir::new("run_new_finalize_mod_merged");
let (_, baseline) = run_setup(&vfs, merged.path(), false).unwrap();
fs::write(merged.path().join("file.txt"), b"modified").unwrap();
let output = TempDir::new("run_new_finalize_mod_out");
let copied = run_finalize(merged.path(), &baseline, output.path()).unwrap();
assert!(!copied.is_empty(), "modified file should be copied");
let (rel, dest) = &copied[0];
assert_eq!(rel, &PathBuf::from("file.txt"));
assert_eq!(fs::read(dest).unwrap(), b"modified");
}
#[test]
fn run_finalize_tracked_copies_modified_file() {
let src = TempDir::new("run_new_finalize_tracked_mod_src");
src.write("file.txt", b"original");
let vfs = VFS::from_directories(vec![src.path()], None);
let merged = TempDir::new("run_new_finalize_tracked_mod_merged");
let (_, baseline) = run_setup_tracked(&vfs, merged.path(), false).unwrap();
fs::write(merged.path().join("file.txt"), b"modified with new size").unwrap();
let output = TempDir::new("run_new_finalize_tracked_mod_out");
let copied = run_finalize_tracked(merged.path(), &baseline, output.path()).unwrap();
assert_eq!(copied.len(), 1);
let (rel, dest) = &copied[0];
assert_eq!(rel, &PathBuf::from("file.txt"));
assert_eq!(fs::read(dest).unwrap(), b"modified with new size");
}
}