ripvec_core/encoder/ripvec/
manifest.rs1use std::collections::{HashMap, HashSet};
48use std::path::{Path, PathBuf};
49use std::time::SystemTime;
50
51#[derive(Debug, Clone)]
57pub struct FileEntry {
58 pub mtime: SystemTime,
61 pub size: u64,
63 pub ino: u64,
67 pub blake3: [u8; 32],
70}
71
72impl FileEntry {
73 #[must_use]
78 pub fn from_bytes(metadata: &std::fs::Metadata, bytes: &[u8]) -> Self {
79 Self {
80 mtime: metadata.modified().unwrap_or(SystemTime::UNIX_EPOCH),
81 size: metadata.len(),
82 ino: inode(metadata),
83 blake3: *blake3::hash(bytes).as_bytes(),
84 }
85 }
86
87 pub fn from_path(path: &Path) -> std::io::Result<Self> {
96 let metadata = std::fs::metadata(path)?;
97 let bytes = std::fs::read(path)?;
98 Ok(Self::from_bytes(&metadata, &bytes))
99 }
100}
101
102#[derive(Debug, Clone, Default)]
107pub struct Manifest {
108 pub files: HashMap<PathBuf, FileEntry>,
109}
110
111impl Manifest {
112 #[must_use]
114 pub fn new() -> Self {
115 Self {
116 files: HashMap::new(),
117 }
118 }
119
120 #[must_use]
122 pub fn len(&self) -> usize {
123 self.files.len()
124 }
125
126 #[must_use]
128 pub fn is_empty(&self) -> bool {
129 self.files.is_empty()
130 }
131
132 pub fn insert(&mut self, path: PathBuf, entry: FileEntry) {
134 self.files.insert(path, entry);
135 }
136
137 #[must_use]
139 pub fn get(&self, path: &Path) -> Option<&FileEntry> {
140 self.files.get(path)
141 }
142}
143
144#[derive(Debug, Default)]
151pub struct Diff {
152 pub dirty: Vec<PathBuf>,
154 pub new: Vec<PathBuf>,
156 pub deleted: Vec<PathBuf>,
158}
159
160impl Diff {
161 #[must_use]
163 pub fn is_empty(&self) -> bool {
164 self.dirty.is_empty() && self.new.is_empty() && self.deleted.is_empty()
165 }
166
167 #[must_use]
169 pub fn total(&self) -> usize {
170 self.dirty.len() + self.new.len() + self.deleted.len()
171 }
172}
173
174pub fn diff_against_walk(manifest: &mut Manifest, current_files: &[PathBuf]) -> Diff {
199 let mut diff = Diff::default();
200 let mut seen: HashSet<&Path> = HashSet::with_capacity(current_files.len());
201
202 for path in current_files {
203 seen.insert(path.as_path());
204 let Ok(metadata) = std::fs::metadata(path) else {
205 continue;
208 };
209 let mtime = metadata.modified().unwrap_or(SystemTime::UNIX_EPOCH);
210 let size = metadata.len();
211 let ino = inode(&metadata);
212
213 match manifest.files.get(path) {
214 None => {
215 diff.new.push(path.clone());
216 }
217 Some(entry) => {
218 if entry.mtime == mtime && entry.size == size && entry.ino == ino {
219 continue;
222 }
223 let Ok(bytes) = std::fs::read(path) else {
226 diff.dirty.push(path.clone());
229 continue;
230 };
231 let new_hash = *blake3::hash(&bytes).as_bytes();
232 if new_hash == entry.blake3 {
233 if let Some(entry_mut) = manifest.files.get_mut(path) {
236 entry_mut.mtime = mtime;
237 entry_mut.size = size;
238 entry_mut.ino = ino;
239 }
240 } else {
241 diff.dirty.push(path.clone());
242 }
243 }
244 }
245 }
246
247 for path in manifest.files.keys() {
251 if !seen.contains(path.as_path()) {
252 diff.deleted.push(path.clone());
253 }
254 }
255
256 diff
257}
258
259#[cfg(unix)]
260fn inode(metadata: &std::fs::Metadata) -> u64 {
261 use std::os::unix::fs::MetadataExt;
262 metadata.ino()
263}
264
265#[cfg(not(unix))]
266fn inode(_metadata: &std::fs::Metadata) -> u64 {
267 0
268}
269
270#[cfg(test)]
271mod tests {
272 use super::*;
273 use std::io::Write;
274 use tempfile::TempDir;
275
276 fn write_file(dir: &Path, name: &str, content: &[u8]) -> PathBuf {
277 let path = dir.join(name);
278 let mut f = std::fs::File::create(&path).unwrap();
279 f.write_all(content).unwrap();
280 path
281 }
282
283 fn manifest_with(path: PathBuf, content: &[u8]) -> Manifest {
284 let metadata = std::fs::metadata(&path).unwrap();
285 let entry = FileEntry::from_bytes(&metadata, content);
286 let mut m = Manifest::new();
287 m.insert(path, entry);
288 m
289 }
290
291 #[test]
292 fn empty_diff_against_empty_walk() {
293 let mut m = Manifest::new();
294 let diff = diff_against_walk(&mut m, &[]);
295 assert!(diff.is_empty());
296 assert_eq!(diff.total(), 0);
297 }
298
299 #[test]
300 fn detects_new_file() {
301 let dir = TempDir::new().unwrap();
302 let p1 = write_file(dir.path(), "a.txt", b"hello");
303 let mut m = Manifest::new();
304 let diff = diff_against_walk(&mut m, std::slice::from_ref(&p1));
305 assert_eq!(diff.new, vec![p1]);
306 assert!(diff.dirty.is_empty());
307 assert!(diff.deleted.is_empty());
308 }
309
310 #[test]
311 fn detects_deleted_file_via_missing_from_walk() {
312 let dir = TempDir::new().unwrap();
313 let p1 = write_file(dir.path(), "gone.txt", b"hello");
314 let mut m = manifest_with(p1.clone(), b"hello");
315 std::fs::remove_file(&p1).unwrap();
316 let diff = diff_against_walk(&mut m, &[]);
318 assert_eq!(diff.deleted, vec![p1]);
319 assert!(diff.dirty.is_empty());
320 assert!(diff.new.is_empty());
321 }
322
323 #[test]
324 fn unchanged_file_skipped_via_stat_tuple() {
325 let dir = TempDir::new().unwrap();
326 let p1 = write_file(dir.path(), "stable.txt", b"hello");
327 let mut m = manifest_with(p1.clone(), b"hello");
328 let diff = diff_against_walk(&mut m, &[p1]);
329 assert!(diff.is_empty(), "stat tuple match must skip blake3");
330 }
331
332 #[test]
333 fn detects_content_change_when_size_changes() {
334 let dir = TempDir::new().unwrap();
335 let p1 = write_file(dir.path(), "edit.txt", b"hello");
336 let mut m = manifest_with(p1.clone(), b"hello");
337 std::thread::sleep(std::time::Duration::from_millis(20));
338 write_file(dir.path(), "edit.txt", b"hello world"); let diff = diff_against_walk(&mut m, std::slice::from_ref(&p1));
340 assert_eq!(diff.dirty, vec![p1]);
341 }
342
343 #[test]
344 fn detects_content_change_when_size_unchanged() {
345 let dir = TempDir::new().unwrap();
346 let p1 = write_file(dir.path(), "rename-vars.rs", b"let foo = 1;");
348 let mut m = manifest_with(p1.clone(), b"let foo = 1;");
349 std::thread::sleep(std::time::Duration::from_millis(20));
350 write_file(dir.path(), "rename-vars.rs", b"let bar = 1;"); let diff = diff_against_walk(&mut m, std::slice::from_ref(&p1));
352 assert_eq!(diff.dirty, vec![p1], "blake3 must catch same-size change");
353 }
354
355 #[test]
356 fn touched_but_unchanged_does_not_appear_in_diff() {
357 let dir = TempDir::new().unwrap();
358 let p1 = write_file(dir.path(), "touched.txt", b"identical");
359 let mut m = manifest_with(p1.clone(), b"identical");
360 let original_mtime = m.get(&p1).unwrap().mtime;
361 std::thread::sleep(std::time::Duration::from_millis(20));
362 write_file(dir.path(), "touched.txt", b"identical");
364 let new_mtime_on_disk = std::fs::metadata(&p1).unwrap().modified().unwrap();
365 assert_ne!(
366 original_mtime, new_mtime_on_disk,
367 "setup: mtime must differ for this test to mean anything"
368 );
369
370 let diff = diff_against_walk(&mut m, std::slice::from_ref(&p1));
371 assert!(
372 diff.is_empty(),
373 "touch-without-content-change must not appear in diff"
374 );
375
376 let refreshed = m.get(&p1).unwrap();
379 assert_eq!(
380 refreshed.mtime, new_mtime_on_disk,
381 "manifest mtime must be refreshed on touch-without-change"
382 );
383 }
384
385 #[test]
386 fn touched_unchanged_then_real_change_still_detected() {
387 let dir = TempDir::new().unwrap();
390 let p1 = write_file(dir.path(), "twice.txt", b"original");
391 let mut m = manifest_with(p1.clone(), b"original");
392
393 std::thread::sleep(std::time::Duration::from_millis(20));
394 write_file(dir.path(), "twice.txt", b"original"); let diff1 = diff_against_walk(&mut m, std::slice::from_ref(&p1));
396 assert!(diff1.is_empty(), "first pass: touch only");
397
398 std::thread::sleep(std::time::Duration::from_millis(20));
399 write_file(dir.path(), "twice.txt", b"modified"); let diff2 = diff_against_walk(&mut m, std::slice::from_ref(&p1));
401 assert_eq!(diff2.dirty, vec![p1], "second pass: real edit detected");
402 }
403
404 #[test]
405 fn new_plus_deleted_plus_dirty_simultaneously() {
406 let dir = TempDir::new().unwrap();
407 let keep = write_file(dir.path(), "keep.txt", b"keep");
408 let edit = write_file(dir.path(), "edit.txt", b"orig");
409 let gone = write_file(dir.path(), "gone.txt", b"gone");
410 let added_path = dir.path().join("added.txt"); let mut m = Manifest::new();
413 let keep_meta = std::fs::metadata(&keep).unwrap();
414 let edit_meta = std::fs::metadata(&edit).unwrap();
415 let gone_meta = std::fs::metadata(&gone).unwrap();
416 m.insert(keep.clone(), FileEntry::from_bytes(&keep_meta, b"keep"));
417 m.insert(edit.clone(), FileEntry::from_bytes(&edit_meta, b"orig"));
418 m.insert(gone.clone(), FileEntry::from_bytes(&gone_meta, b"gone"));
419
420 std::thread::sleep(std::time::Duration::from_millis(20));
421 write_file(dir.path(), "edit.txt", b"changed");
422 std::fs::remove_file(&gone).unwrap();
423 write_file(dir.path(), "added.txt", b"added");
424
425 let walk = vec![keep.clone(), edit.clone(), added_path.clone()];
426 let diff = diff_against_walk(&mut m, &walk);
427 assert_eq!(diff.dirty, vec![edit]);
428 assert_eq!(diff.new, vec![added_path]);
429 assert_eq!(diff.deleted, vec![gone]);
430 assert!(!diff.is_empty());
431 assert_eq!(diff.total(), 3);
432 }
433
434 #[test]
435 fn file_entry_from_path_round_trips_from_bytes() {
436 let dir = TempDir::new().unwrap();
437 let p = write_file(dir.path(), "x.txt", b"some content");
438 let from_path = FileEntry::from_path(&p).unwrap();
439 let metadata = std::fs::metadata(&p).unwrap();
440 let from_bytes = FileEntry::from_bytes(&metadata, b"some content");
441 assert_eq!(from_path.blake3, from_bytes.blake3);
442 assert_eq!(from_path.size, from_bytes.size);
443 }
446
447 #[test]
448 fn manifest_default_is_empty() {
449 let m = Manifest::default();
450 assert!(m.is_empty());
451 assert_eq!(m.len(), 0);
452 }
453
454 #[cfg(unix)]
455 #[test]
456 fn inode_is_non_zero_on_unix() {
457 let dir = TempDir::new().unwrap();
458 let p = write_file(dir.path(), "x", b"data");
459 let entry = FileEntry::from_path(&p).unwrap();
460 assert!(entry.ino > 0, "Unix metadata must produce a non-zero inode");
461 }
462}