use std::collections::{HashMap, HashSet};
use std::path::{Path, PathBuf};
use std::time::SystemTime;
#[derive(Debug, Clone)]
pub struct FileEntry {
pub mtime: SystemTime,
pub size: u64,
pub ino: u64,
pub blake3: [u8; 32],
}
impl FileEntry {
#[must_use]
pub fn from_bytes(metadata: &std::fs::Metadata, bytes: &[u8]) -> Self {
Self {
mtime: metadata.modified().unwrap_or(SystemTime::UNIX_EPOCH),
size: metadata.len(),
ino: inode(metadata),
blake3: *blake3::hash(bytes).as_bytes(),
}
}
pub fn from_path(path: &Path) -> std::io::Result<Self> {
let metadata = std::fs::metadata(path)?;
let bytes = std::fs::read(path)?;
Ok(Self::from_bytes(&metadata, &bytes))
}
}
#[derive(Debug, Clone, Default)]
pub struct Manifest {
pub files: HashMap<PathBuf, FileEntry>,
}
impl Manifest {
#[must_use]
pub fn new() -> Self {
Self {
files: HashMap::new(),
}
}
#[must_use]
pub fn len(&self) -> usize {
self.files.len()
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.files.is_empty()
}
pub fn insert(&mut self, path: PathBuf, entry: FileEntry) {
self.files.insert(path, entry);
}
#[must_use]
pub fn get(&self, path: &Path) -> Option<&FileEntry> {
self.files.get(path)
}
}
#[derive(Debug, Default)]
pub struct Diff {
pub dirty: Vec<PathBuf>,
pub new: Vec<PathBuf>,
pub deleted: Vec<PathBuf>,
pub touched_clean: Vec<(PathBuf, FileEntry)>,
}
impl Diff {
#[must_use]
pub fn is_empty(&self) -> bool {
self.dirty.is_empty() && self.new.is_empty() && self.deleted.is_empty()
}
#[must_use]
pub fn total(&self) -> usize {
self.dirty.len() + self.new.len() + self.deleted.len()
}
}
pub fn diff_against_walk(manifest: &mut Manifest, current_files: &[PathBuf]) -> Diff {
let mut diff = Diff::default();
let mut seen: HashSet<&Path> = HashSet::with_capacity(current_files.len());
for path in current_files {
seen.insert(path.as_path());
let Ok(metadata) = std::fs::metadata(path) else {
continue;
};
let mtime = metadata.modified().unwrap_or(SystemTime::UNIX_EPOCH);
let size = metadata.len();
let ino = inode(&metadata);
match manifest.files.get(path) {
None => {
diff.new.push(path.clone());
}
Some(entry) => {
if entry.mtime == mtime && entry.size == size && entry.ino == ino {
continue;
}
let Ok(bytes) = std::fs::read(path) else {
diff.dirty.push(path.clone());
continue;
};
let new_hash = *blake3::hash(&bytes).as_bytes();
if new_hash == entry.blake3 {
let refreshed = FileEntry {
mtime,
size,
ino,
blake3: new_hash,
};
diff.touched_clean.push((path.clone(), refreshed));
if let Some(entry_mut) = manifest.files.get_mut(path) {
entry_mut.mtime = mtime;
entry_mut.size = size;
entry_mut.ino = ino;
}
} else {
diff.dirty.push(path.clone());
}
}
}
}
for path in manifest.files.keys() {
if !seen.contains(path.as_path()) {
diff.deleted.push(path.clone());
}
}
diff
}
#[cfg(unix)]
fn inode(metadata: &std::fs::Metadata) -> u64 {
use std::os::unix::fs::MetadataExt;
metadata.ino()
}
#[cfg(not(unix))]
fn inode(_metadata: &std::fs::Metadata) -> u64 {
0
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Write;
use tempfile::TempDir;
fn write_file(dir: &Path, name: &str, content: &[u8]) -> PathBuf {
let path = dir.join(name);
let mut f = std::fs::File::create(&path).unwrap();
f.write_all(content).unwrap();
path
}
fn manifest_with(path: PathBuf, content: &[u8]) -> Manifest {
let metadata = std::fs::metadata(&path).unwrap();
let entry = FileEntry::from_bytes(&metadata, content);
let mut m = Manifest::new();
m.insert(path, entry);
m
}
#[test]
fn empty_diff_against_empty_walk() {
let mut m = Manifest::new();
let diff = diff_against_walk(&mut m, &[]);
assert!(diff.is_empty());
assert_eq!(diff.total(), 0);
}
#[test]
fn detects_new_file() {
let dir = TempDir::new().unwrap();
let p1 = write_file(dir.path(), "a.txt", b"hello");
let mut m = Manifest::new();
let diff = diff_against_walk(&mut m, std::slice::from_ref(&p1));
assert_eq!(diff.new, vec![p1]);
assert!(diff.dirty.is_empty());
assert!(diff.deleted.is_empty());
}
#[test]
fn detects_deleted_file_via_missing_from_walk() {
let dir = TempDir::new().unwrap();
let p1 = write_file(dir.path(), "gone.txt", b"hello");
let mut m = manifest_with(p1.clone(), b"hello");
std::fs::remove_file(&p1).unwrap();
let diff = diff_against_walk(&mut m, &[]);
assert_eq!(diff.deleted, vec![p1]);
assert!(diff.dirty.is_empty());
assert!(diff.new.is_empty());
}
#[test]
fn unchanged_file_skipped_via_stat_tuple() {
let dir = TempDir::new().unwrap();
let p1 = write_file(dir.path(), "stable.txt", b"hello");
let mut m = manifest_with(p1.clone(), b"hello");
let diff = diff_against_walk(&mut m, &[p1]);
assert!(diff.is_empty(), "stat tuple match must skip blake3");
}
#[test]
fn detects_content_change_when_size_changes() {
let dir = TempDir::new().unwrap();
let p1 = write_file(dir.path(), "edit.txt", b"hello");
let mut m = manifest_with(p1.clone(), b"hello");
std::thread::sleep(std::time::Duration::from_millis(20));
write_file(dir.path(), "edit.txt", b"hello world"); let diff = diff_against_walk(&mut m, std::slice::from_ref(&p1));
assert_eq!(diff.dirty, vec![p1]);
}
#[test]
fn detects_content_change_when_size_unchanged() {
let dir = TempDir::new().unwrap();
let p1 = write_file(dir.path(), "rename-vars.rs", b"let foo = 1;");
let mut m = manifest_with(p1.clone(), b"let foo = 1;");
std::thread::sleep(std::time::Duration::from_millis(20));
write_file(dir.path(), "rename-vars.rs", b"let bar = 1;"); let diff = diff_against_walk(&mut m, std::slice::from_ref(&p1));
assert_eq!(diff.dirty, vec![p1], "blake3 must catch same-size change");
}
#[test]
fn touched_but_unchanged_does_not_appear_in_diff() {
let dir = TempDir::new().unwrap();
let p1 = write_file(dir.path(), "touched.txt", b"identical");
let mut m = manifest_with(p1.clone(), b"identical");
let original_mtime = m.get(&p1).unwrap().mtime;
std::thread::sleep(std::time::Duration::from_millis(20));
write_file(dir.path(), "touched.txt", b"identical");
let new_mtime_on_disk = std::fs::metadata(&p1).unwrap().modified().unwrap();
assert_ne!(
original_mtime, new_mtime_on_disk,
"setup: mtime must differ for this test to mean anything"
);
let diff = diff_against_walk(&mut m, std::slice::from_ref(&p1));
assert!(
diff.is_empty(),
"touch-without-content-change must not appear in diff"
);
let refreshed = m.get(&p1).unwrap();
assert_eq!(
refreshed.mtime, new_mtime_on_disk,
"manifest mtime must be refreshed on touch-without-change"
);
}
#[test]
fn diff_against_walk_records_touched_clean() {
let dir = TempDir::new().unwrap();
let p1 = write_file(dir.path(), "touched_clean.txt", b"same content");
let mut m = manifest_with(p1.clone(), b"same content");
std::thread::sleep(std::time::Duration::from_millis(20));
write_file(dir.path(), "touched_clean.txt", b"same content");
let diff = diff_against_walk(&mut m, std::slice::from_ref(&p1));
assert!(
diff.is_empty(),
"touched_clean file must not appear in dirty/new/deleted"
);
assert_eq!(
diff.touched_clean.len(),
1,
"touched_clean must have exactly one entry; got {:?}",
diff.touched_clean
.iter()
.map(|(p, _)| p)
.collect::<Vec<_>>()
);
let (tc_path, tc_entry) = &diff.touched_clean[0];
assert_eq!(
tc_path, &p1,
"touched_clean path must match the touched file"
);
let expected_hash = *blake3::hash(b"same content").as_bytes();
assert_eq!(
tc_entry.blake3, expected_hash,
"touched_clean entry must carry the correct (unchanged) blake3"
);
}
#[test]
fn repeated_touch_without_edit_pays_one_blake3_then_zero() {
let dir = TempDir::new().unwrap();
let p1 = write_file(dir.path(), "repeated_touch.txt", b"constant");
let mut m = manifest_with(p1.clone(), b"constant");
std::thread::sleep(std::time::Duration::from_millis(20));
write_file(dir.path(), "repeated_touch.txt", b"constant");
let diff1 = diff_against_walk(&mut m, std::slice::from_ref(&p1));
assert!(diff1.is_empty(), "first pass: no structural changes");
assert_eq!(
diff1.touched_clean.len(),
1,
"first pass: touched_clean must have one entry"
);
let diff2 = diff_against_walk(&mut m, std::slice::from_ref(&p1));
assert!(diff2.is_empty(), "second pass: no structural changes");
assert!(
diff2.touched_clean.is_empty(),
"second pass: touched_clean must be empty after in-place refresh; \
got {} entries",
diff2.touched_clean.len()
);
}
#[test]
fn touched_then_real_edit_still_detected() {
let dir = TempDir::new().unwrap();
let p1 = write_file(dir.path(), "touch_then_edit.txt", b"v1");
let mut m = manifest_with(p1.clone(), b"v1");
std::thread::sleep(std::time::Duration::from_millis(20));
write_file(dir.path(), "touch_then_edit.txt", b"v1");
let diff1 = diff_against_walk(&mut m, std::slice::from_ref(&p1));
assert!(diff1.is_empty(), "pass 1: touch only, no structural diff");
assert_eq!(diff1.touched_clean.len(), 1, "pass 1: one touched_clean");
std::thread::sleep(std::time::Duration::from_millis(20));
write_file(dir.path(), "touch_then_edit.txt", b"v2 changed");
let diff2 = diff_against_walk(&mut m, std::slice::from_ref(&p1));
assert_eq!(
diff2.dirty,
vec![p1.clone()],
"pass 2: real edit must appear in dirty"
);
assert!(
diff2.touched_clean.is_empty(),
"pass 2: touched_clean must be empty when content changed"
);
}
#[test]
fn touched_unchanged_then_real_change_still_detected() {
let dir = TempDir::new().unwrap();
let p1 = write_file(dir.path(), "twice.txt", b"original");
let mut m = manifest_with(p1.clone(), b"original");
std::thread::sleep(std::time::Duration::from_millis(20));
write_file(dir.path(), "twice.txt", b"original"); let diff1 = diff_against_walk(&mut m, std::slice::from_ref(&p1));
assert!(diff1.is_empty(), "first pass: touch only");
std::thread::sleep(std::time::Duration::from_millis(20));
write_file(dir.path(), "twice.txt", b"modified"); let diff2 = diff_against_walk(&mut m, std::slice::from_ref(&p1));
assert_eq!(diff2.dirty, vec![p1], "second pass: real edit detected");
}
#[test]
fn new_plus_deleted_plus_dirty_simultaneously() {
let dir = TempDir::new().unwrap();
let keep = write_file(dir.path(), "keep.txt", b"keep");
let edit = write_file(dir.path(), "edit.txt", b"orig");
let gone = write_file(dir.path(), "gone.txt", b"gone");
let added_path = dir.path().join("added.txt");
let mut m = Manifest::new();
let keep_meta = std::fs::metadata(&keep).unwrap();
let edit_meta = std::fs::metadata(&edit).unwrap();
let gone_meta = std::fs::metadata(&gone).unwrap();
m.insert(keep.clone(), FileEntry::from_bytes(&keep_meta, b"keep"));
m.insert(edit.clone(), FileEntry::from_bytes(&edit_meta, b"orig"));
m.insert(gone.clone(), FileEntry::from_bytes(&gone_meta, b"gone"));
std::thread::sleep(std::time::Duration::from_millis(20));
write_file(dir.path(), "edit.txt", b"changed");
std::fs::remove_file(&gone).unwrap();
write_file(dir.path(), "added.txt", b"added");
let walk = vec![keep.clone(), edit.clone(), added_path.clone()];
let diff = diff_against_walk(&mut m, &walk);
assert_eq!(diff.dirty, vec![edit]);
assert_eq!(diff.new, vec![added_path]);
assert_eq!(diff.deleted, vec![gone]);
assert!(!diff.is_empty());
assert_eq!(diff.total(), 3);
}
#[test]
fn file_entry_from_path_round_trips_from_bytes() {
let dir = TempDir::new().unwrap();
let p = write_file(dir.path(), "x.txt", b"some content");
let from_path = FileEntry::from_path(&p).unwrap();
let metadata = std::fs::metadata(&p).unwrap();
let from_bytes = FileEntry::from_bytes(&metadata, b"some content");
assert_eq!(from_path.blake3, from_bytes.blake3);
assert_eq!(from_path.size, from_bytes.size);
}
#[test]
fn manifest_default_is_empty() {
let m = Manifest::default();
assert!(m.is_empty());
assert_eq!(m.len(), 0);
}
#[cfg(unix)]
#[test]
fn inode_is_non_zero_on_unix() {
let dir = TempDir::new().unwrap();
let p = write_file(dir.path(), "x", b"data");
let entry = FileEntry::from_path(&p).unwrap();
assert!(entry.ino > 0, "Unix metadata must produce a non-zero inode");
}
}