1use crate::{
2 ColumnFormatter, DirectoryComparer, FileComparer, FileHashCache, FileIterator, Progress,
3 ProgressBuilder,
4};
5use globset::GlobSet;
6use indicatif::FormattedDuration;
7use std::{
8 collections::HashMap,
9 fs,
10 io::{self, Read, stdout},
11 path::{Path, PathBuf},
12 sync::{
13 Arc,
14 atomic::{AtomicUsize, Ordering},
15 mpsc,
16 },
17 time,
18};
19
20#[derive(Debug, Clone)]
21enum HashProgress {
22 StartDiscovering,
23 TotalFiles(usize),
24 Result(PathBuf, u64, blake3::Hash, bool),
25 Error,
26}
27
28#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
29enum CheckStatus {
30 Unchanged,
31 New,
32 Modified,
33}
34
35#[derive(Debug, PartialEq)]
36enum CheckEvent {
37 StartChecking,
38 TotalFiles(usize),
39 Result(PathBuf, CheckStatus),
40 FileDone,
41 Error,
42}
43
44enum EntryState {
45 Single(PathBuf, time::SystemTime),
46 Hashing,
47}
48
49pub struct FileHasher {
51 dirs: Vec<PathBuf>,
52 pub buffer_size: usize,
53 pub(crate) cache: Arc<FileHashCache>,
54 pub(crate) num_hashed: AtomicUsize,
55 pub(crate) num_hash_looked_up: AtomicUsize,
56 pub exclude: Option<GlobSet>,
57 pub progress: Option<Arc<ProgressBuilder>>,
58 pub is_yaml_format: bool,
59 pub jobs: usize,
60}
61
62impl FileHasher {
63 const DEFAULT_JOBS: usize = DirectoryComparer::DEFAULT_JOBS;
64
65 pub fn new<P: AsRef<Path>>(dirs: &[P]) -> anyhow::Result<Self> {
67 if dirs.is_empty() {
68 anyhow::bail!("At least one directory must be specified.");
69 }
70 let common_ancestor = crate::common_ancestor(dirs)
71 .ok_or_else(|| anyhow::anyhow!("No common ancestor found"))?;
72 Ok(Self {
73 dirs: dirs.iter().map(|p| p.as_ref().to_path_buf()).collect(),
74 buffer_size: FileComparer::DEFAULT_BUFFER_SIZE,
75 cache: FileHashCache::find_or_new(&common_ancestor),
76 num_hashed: AtomicUsize::new(0),
77 num_hash_looked_up: AtomicUsize::new(0),
78 exclude: None,
79 progress: None,
80 is_yaml_format: false,
81 jobs: Self::DEFAULT_JOBS,
82 })
83 }
84
85 pub fn remove_cache_entry(&self, path: &Path) -> anyhow::Result<()> {
87 let relative = crate::strip_prefix(path, self.cache.base_dir())?;
88 self.cache.remove(relative);
89 Ok(())
90 }
91
92 pub fn save_cache(&self) -> anyhow::Result<()> {
94 log::info!(
95 "Hash stats for {:?}: {} computed, {} looked up",
96 self.dirs,
97 self.num_hashed.load(Ordering::Relaxed),
98 self.num_hash_looked_up.load(Ordering::Relaxed)
99 );
100 Ok(self.cache.save()?)
101 }
102
103 pub(crate) fn merge_cache(&self, other_cache: &FileHashCache) {
105 self.cache.merge(other_cache);
106 }
107
108 pub fn clear_cache(&self) -> anyhow::Result<()> {
110 for dir in &self.dirs {
111 let relative = crate::strip_prefix(dir, self.cache.base_dir())?;
112 self.cache.clear(relative);
113 }
114 Ok(())
115 }
116
117 pub fn check(&self, update: bool) -> anyhow::Result<()> {
119 if self.dirs.len() > 1 {
120 anyhow::bail!("Check mode only supports one directory.");
121 }
122 let start_time = time::Instant::now();
123 let progress = self
124 .progress
125 .as_ref()
126 .map(|progress| progress.add_spinner())
127 .unwrap_or_else(Progress::none);
128 progress.set_message("Scanning directory...");
129 let mut num_new = 0;
130 let mut num_modified = 0;
131 let mut num_error = 0;
132 std::thread::scope(|scope| {
133 let (tx, rx) = mpsc::channel();
134 scope.spawn(|| {
135 if let Err(e) = self.check_streaming(tx, update) {
136 log::error!("Error during check: {}", e);
137 }
138 });
139 while let Ok(event) = rx.recv() {
140 match event {
141 CheckEvent::StartChecking => {
142 progress.set_message("Checking files...");
143 }
144 CheckEvent::TotalFiles(total) => {
145 progress.set_length(total as u64);
146 progress.set_message("");
147 }
148 CheckEvent::Result(path, status) => {
149 let symbol = match status {
150 CheckStatus::New => {
151 num_new += 1;
152 '+'
153 }
154 CheckStatus::Modified => {
155 num_modified += 1;
156 '!'
157 }
158 CheckStatus::Unchanged => unreachable!(),
159 };
160 progress.inc(1);
161 progress.suspend_for(stdout(), || {
162 println!("{} {}", symbol, path.display());
163 });
164 }
165 CheckEvent::FileDone => {
166 progress.inc(1);
167 }
168 CheckEvent::Error => {
169 progress.inc(1);
170 num_error += 1;
171 }
172 }
173 }
174 });
175 progress.finish();
176 self.print_check_summary(&start_time, num_new, num_modified, num_error)?;
177 Ok(())
178 }
179
180 fn print_check_summary(
181 &self,
182 start_time: &time::Instant,
183 num_new: usize,
184 num_modified: usize,
185 num_error: usize,
186 ) -> io::Result<()> {
187 let summary = [
188 ("Elapsed:", 0),
189 ("Hash computed:", self.num_hashed.load(Ordering::Relaxed)),
190 ("New files:", num_new),
191 ("Modified files:", num_modified),
192 ("Errors:", num_error),
193 ];
194 let formatter = ColumnFormatter::new(summary.iter().map(|(s, _)| *s));
195 let mut writer = std::io::stderr();
196 formatter.write_value(
197 &mut writer,
198 summary[0].0,
199 FormattedDuration(start_time.elapsed()),
200 )?;
201 formatter.write_values(&mut writer, &summary[1..])
202 }
203
204 fn check_streaming(&self, tx: mpsc::Sender<CheckEvent>, update: bool) -> anyhow::Result<()> {
205 let base_dir = &self.dirs[0];
206 let relative = crate::strip_prefix(base_dir, self.cache.base_dir())?;
207 self.cache.set_remove_if_no_access(relative);
208 std::thread::scope(|global_scope| {
209 let mut it = FileIterator::new(base_dir.clone());
210 it.hasher = Some(self);
211 it.exclude = self.exclude.as_ref();
212 let it_rx = it.spawn_in_scope(global_scope);
213 tx.send(CheckEvent::StartChecking)?;
214 let pool = crate::build_thread_pool(self.jobs)?;
215 pool.scope(move |scope| -> anyhow::Result<()> {
216 let mut total_files = 0;
217 for path in it_rx {
218 total_files += 1;
219 let tx = tx.clone();
220 scope.spawn(move |_| {
221 let status = self.check_file(&path, update);
222 let event = match status {
223 Ok(CheckStatus::New) | Ok(CheckStatus::Modified) => {
224 let rel_path = crate::strip_prefix(&path, base_dir).unwrap();
225 CheckEvent::Result(rel_path.into(), status.unwrap())
226 }
227 Ok(CheckStatus::Unchanged) => CheckEvent::FileDone,
228 Err(e) => {
229 log::error!("Failed to check file {:?}: {}", path, e);
230 CheckEvent::Error
231 }
232 };
233 if tx.send(event).is_err() {
234 log::error!("Send failed");
235 }
236 });
237 }
238 tx.send(CheckEvent::TotalFiles(total_files))?;
239 Ok(())
240 })
241 })?;
242 self.save_cache()?;
243 Ok(())
244 }
245
246 fn check_file(&self, abs_path: &Path, update: bool) -> anyhow::Result<CheckStatus> {
247 assert!(abs_path.is_absolute());
248 let computed_hash = self.compute_hash(abs_path)?;
249 let rel_path = crate::strip_prefix(abs_path, self.cache.base_dir())?;
250 let cached_hash = self.cache.get_by_path(rel_path);
251 let status = match cached_hash {
252 None => CheckStatus::New,
253 Some(cached) => {
254 if computed_hash != cached {
255 CheckStatus::Modified
256 } else {
257 CheckStatus::Unchanged
258 }
259 }
260 };
261 if update {
262 let modified = fs::metadata(abs_path)?.modified()?;
263 match status {
264 CheckStatus::New | CheckStatus::Modified => {
265 self.cache.insert(rel_path, modified, computed_hash);
266 }
267 CheckStatus::Unchanged => {
268 if self.cache.get(rel_path, modified).is_none() {
269 self.cache.insert(rel_path, modified, computed_hash);
270 }
271 }
272 }
273 }
274 Ok(status)
275 }
276
277 pub fn run(&self) -> anyhow::Result<()> {
279 let start_time = time::Instant::now();
280 let mut duplicates = self.find_duplicates()?;
281 let mut total_wasted_space = 0;
282 if !duplicates.is_empty() {
283 duplicates.sort_by_key(|a| a.size);
284 for dupes in &duplicates {
285 if self.is_yaml_format {
286 dupes.write_yaml(std::io::stdout())?;
287 } else {
288 dupes.write_human(std::io::stdout())?;
289 }
290 total_wasted_space += dupes.wasted_size();
291 }
292 }
293 self.print_duplicates_summary(&start_time, total_wasted_space)?;
294 Ok(())
295 }
296
297 fn print_duplicates_summary(
298 &self,
299 start_time: &time::Instant,
300 total_wasted_space: u64,
301 ) -> io::Result<()> {
302 let elapsed = FormattedDuration(start_time.elapsed()).to_string();
303 let num_hashed = self.num_hashed.load(Ordering::Relaxed).to_string();
304 let total_wasted_space = crate::human_readable_size(total_wasted_space);
305 let summary = [
306 ("Elapsed:", elapsed),
307 ("Hash computed:", num_hashed),
308 ("Total wasted space:", total_wasted_space),
309 ];
310 let formatter = ColumnFormatter::new(summary.iter().map(|(s, _)| *s));
311 formatter.write_values(&mut io::stderr(), &summary)
312 }
313
314 pub fn find_duplicates(&self) -> anyhow::Result<Vec<DuplicatedFiles>> {
316 let progress = self
317 .progress
318 .as_ref()
319 .map(|progress| progress.add_spinner())
320 .unwrap_or_else(Progress::none);
321 progress.set_message("Scanning directories...");
322
323 let (tx, rx) = mpsc::channel();
324 let mut by_hash: HashMap<blake3::Hash, DuplicatedFiles> = HashMap::new();
325 let mut num_cache_hits = 0;
326 std::thread::scope(|scope| {
327 scope.spawn(|| {
328 if let Err(e) = self.find_duplicates_streaming(tx) {
329 log::error!("Error during duplicate finding: {}", e);
330 }
331 });
332
333 while let Ok(event) = rx.recv() {
334 match event {
335 HashProgress::StartDiscovering => {
336 progress.set_message("Hashing files...");
337 }
338 HashProgress::TotalFiles(total) => {
339 progress.set_length(total as u64);
340 if num_cache_hits > 0 {
341 progress.set_message(format!(" ({} cache hits)", num_cache_hits));
342 }
343 }
344 HashProgress::Result(path, size, hash, is_cache_hit) => {
345 if is_cache_hit {
346 num_cache_hits += 1;
347 if progress.length().is_none() {
348 progress.set_message(format!(
349 "Hashing files... ({} cache hits)",
350 num_cache_hits
351 ));
352 } else {
353 progress.set_message(format!(" ({} cache hits)", num_cache_hits));
354 }
355 }
356
357 progress.inc(1);
358 let entry = by_hash.entry(hash).or_insert_with(|| DuplicatedFiles {
359 paths: Vec::new(),
360 size,
361 });
362 assert_eq!(entry.size, size, "Hash collision: sizes do not match");
364 entry.paths.push(path);
365 }
366 HashProgress::Error => {
367 progress.inc(1);
368 }
369 }
370 }
371 });
372 progress.finish();
373
374 let mut duplicates = Vec::new();
375 for (_, mut dupes) in by_hash {
376 if dupes.paths.len() > 1 {
377 dupes.paths.sort();
378 duplicates.push(dupes);
379 }
380 }
381 Ok(duplicates)
382 }
383
384 fn find_duplicates_streaming(&self, tx: mpsc::Sender<HashProgress>) -> anyhow::Result<()> {
385 tx.send(HashProgress::StartDiscovering)?;
386 let mut by_size: HashMap<u64, EntryState> = HashMap::new();
387 let mut total_hashed = 0;
388 std::thread::scope(|global_scope| {
389 let (it_tx, it_rx) = mpsc::channel();
390 for dir in &self.dirs {
391 let it_tx = it_tx.clone();
392 let mut it = FileIterator::new(dir.clone());
393 it.hasher = Some(self);
394 it.exclude = self.exclude.as_ref();
395 it.spawn_in_scope_with_sender(global_scope, it_tx);
396 }
397 drop(it_tx);
398
399 let pool = crate::build_thread_pool(self.jobs)?;
400 pool.scope(move |scope| -> anyhow::Result<()> {
401 for current_path in it_rx {
402 let meta = fs::metadata(¤t_path)?;
403 let size = meta.len();
404 let modified = meta.modified()?;
405
406 match by_size.entry(size) {
409 std::collections::hash_map::Entry::Occupied(mut occ) => match occ.get_mut()
410 {
411 EntryState::Single(first_path, first_modified) => {
412 self.spawn_hash_task(first_path, size, *first_modified, scope, &tx);
415 self.spawn_hash_task(¤t_path, size, modified, scope, &tx);
416
417 *occ.get_mut() = EntryState::Hashing;
419 total_hashed += 2;
420 }
421 EntryState::Hashing => {
422 self.spawn_hash_task(¤t_path, size, modified, scope, &tx);
424 total_hashed += 1;
425 }
426 },
427 std::collections::hash_map::Entry::Vacant(vac) => {
428 vac.insert(EntryState::Single(current_path, modified));
429 }
430 }
431 }
432 tx.send(HashProgress::TotalFiles(total_hashed))?;
433 Ok(())
434 })
435 })?;
436
437 self.save_cache()
440 }
441
442 fn spawn_hash_task<'scope>(
443 &'scope self,
444 path: &Path,
445 size: u64,
446 modified: time::SystemTime,
447 scope: &rayon::Scope<'scope>,
448 tx: &mpsc::Sender<HashProgress>,
449 ) {
450 let (hash, relative) = self
451 .get_hash_from_cache(path, modified)
452 .expect("path should be in cache base_dir");
453 if let Some(hash) = hash {
454 let _ = tx.send(HashProgress::Result(path.to_path_buf(), size, hash, true));
455 return;
456 }
457
458 let path = path.to_path_buf();
459 let relative = relative.to_path_buf();
460 let tx = tx.clone();
461 scope.spawn(move |_| {
462 if let Ok(hash) = self.compute_hash(&path) {
463 self.cache.insert(&relative, modified, hash);
464 let _ = tx.send(HashProgress::Result(path, size, hash, false));
465 } else {
466 log::error!("Failed to hash file: {:?}", path);
467 let _ = tx.send(HashProgress::Error);
468 }
469 });
470 }
471
472 pub fn get_hash(&self, path: &Path) -> io::Result<blake3::Hash> {
474 let meta = fs::metadata(path)?;
475 let modified = meta.modified()?;
476 let (hash, relative) = self.get_hash_from_cache(path, modified)?;
477 if let Some(hash) = hash {
478 return Ok(hash);
479 }
480
481 let hash = self.compute_hash(path)?;
482 self.cache.insert(relative, modified, hash);
483 Ok(hash)
484 }
485
486 fn get_hash_from_cache<'a>(
487 &self,
488 path: &'a Path,
489 modified: time::SystemTime,
490 ) -> io::Result<(Option<blake3::Hash>, &'a Path)> {
491 let relative = crate::strip_prefix(path, self.cache.base_dir())
492 .map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?;
493 if let Some(hash) = self.cache.get(relative, modified) {
494 self.num_hash_looked_up.fetch_add(1, Ordering::Relaxed);
495 return Ok((Some(hash), relative));
496 }
497 Ok((None, relative))
498 }
499
500 fn compute_hash(&self, path: &Path) -> io::Result<blake3::Hash> {
501 let start_time = time::Instant::now();
502 let mut f = fs::File::open(path)?;
503 let len = f.metadata()?.len();
504 let progress = self
505 .progress
506 .as_ref()
507 .map(|progress| progress.add_file(path, len))
508 .unwrap_or_else(Progress::none);
509 let mut hasher = blake3::Hasher::new();
510 if self.buffer_size == 0 {
511 if len > 0 {
512 let mmap = unsafe { memmap2::MmapOptions::new().map(&f)? };
513 hasher.update(&mmap[..]);
514 progress.inc(len);
515 }
516 } else {
517 let mut buf = vec![0u8; self.buffer_size];
518 loop {
519 let n = f.read(&mut buf)?;
520 if n == 0 {
521 break;
522 }
523 hasher.update(&buf[..n]);
524 progress.inc(n as u64);
525 }
526 }
527 progress.finish();
528 self.num_hashed.fetch_add(1, Ordering::Relaxed);
529 let hash = hasher.finalize();
530 log::debug!(
531 "Computed hash in {}: {:?}",
532 FormattedDuration(start_time.elapsed()),
533 path
534 );
535 Ok(hash)
536 }
537}
538
539#[derive(Clone, Debug)]
541pub struct DuplicatedFiles {
542 pub paths: Vec<PathBuf>,
543 pub size: u64,
544}
545
546impl DuplicatedFiles {
547 fn write_human(&self, mut writer: impl io::Write) -> anyhow::Result<()> {
548 writeln!(
549 writer,
550 "Identical {} files of {}:",
551 self.paths.len(),
552 crate::human_readable_size(self.size)
553 )?;
554 for path in &self.paths {
555 writeln!(writer, " {}", path.display())?;
556 }
557 Ok(())
558 }
559
560 fn write_yaml(&self, mut writer: impl io::Write) -> anyhow::Result<()> {
561 writeln!(writer, "- paths:")?;
562 for path in &self.paths {
563 writeln!(writer, " - {:?}", path)?;
564 }
565 writeln!(writer, " size: {}", self.size)?;
566 Ok(())
567 }
568
569 fn wasted_size(&self) -> u64 {
570 self.size * (self.paths.len() as u64 - 1)
571 }
572}
573
574#[cfg(test)]
575mod tests {
576 use super::*;
577
578 fn default_exclude() -> globset::GlobSet {
579 let mut builder = globset::GlobSetBuilder::new();
580 builder.add(
581 globset::GlobBuilder::new(".hash_cache")
582 .case_insensitive(true)
583 .build()
584 .unwrap(),
585 );
586 builder.build().unwrap()
587 }
588
589 #[test]
590 fn find_duplicates() -> anyhow::Result<()> {
591 let dir = tempfile::tempdir()?;
592
593 let file1_path = dir.path().join("same1.txt");
594 fs::write(&file1_path, "same content")?;
595
596 let file2_path = dir.path().join("same2.txt");
597 fs::write(&file2_path, "same content")?;
598
599 let diff_path = dir.path().join("diff.txt");
600 fs::write(&diff_path, "different content")?;
601
602 let mut hasher = FileHasher::new(&[dir.path()])?;
603 hasher.buffer_size = 8192;
604 let duplicates = hasher.find_duplicates()?;
605
606 assert_eq!(hasher.num_hashed.load(Ordering::Relaxed), 2);
607 assert_eq!(hasher.num_hash_looked_up.load(Ordering::Relaxed), 0);
608
609 assert_eq!(duplicates.len(), 1);
610 let group = &duplicates[0];
611 assert_eq!(group.paths.len(), 2);
612 assert_eq!(group.size, 12); assert!(group.paths.contains(&file1_path));
615 assert!(group.paths.contains(&file2_path));
616
617 Ok(())
618 }
619
620 #[test]
621 fn find_duplicates_merge_cache() -> anyhow::Result<()> {
622 let dir = tempfile::tempdir()?;
623 let dir_path = dir.path();
624
625 let sub_dir = dir_path.join("a").join("a");
626 fs::create_dir_all(&sub_dir)?;
627
628 let file1_path = sub_dir.join("1");
629 fs::write(&file1_path, "same content")?;
630
631 let file2_path = sub_dir.join("2");
632 fs::write(&file2_path, "same content")?;
633
634 let cache_aa_path = sub_dir.join(FileHashCache::FILE_NAME);
636 fs::File::create(&cache_aa_path)?;
637
638 let hasher_aa = FileHasher::new(&[&sub_dir])?;
640 let duplicates_aa = hasher_aa.find_duplicates()?;
641 assert_eq!(duplicates_aa.len(), 1);
642 assert!(cache_aa_path.exists());
643 assert_eq!(hasher_aa.num_hashed.load(Ordering::Relaxed), 2);
644 assert_eq!(hasher_aa.num_hash_looked_up.load(Ordering::Relaxed), 0);
645
646 let root_a = dir_path.join("a");
648 let cache_a_path = root_a.join(FileHashCache::FILE_NAME);
649 fs::File::create(&cache_a_path)?;
650
651 let hasher_a = FileHasher::new(&[&root_a])?;
653 let duplicates_a = hasher_a.find_duplicates()?;
654 assert_eq!(duplicates_a.len(), 1);
655 assert_eq!(hasher_a.num_hashed.load(Ordering::Relaxed), 0);
656 assert_eq!(hasher_a.num_hash_looked_up.load(Ordering::Relaxed), 2);
657
658 assert!(cache_a_path.exists());
660 assert!(!cache_aa_path.exists());
661
662 Ok(())
663 }
664
665 #[test]
666 fn find_duplicates_with_exclude() -> anyhow::Result<()> {
667 let dir = tempfile::tempdir()?;
668
669 let file1_path = dir.path().join("same1.txt");
670 fs::write(&file1_path, "same content")?;
671
672 let file2_path = dir.path().join("same2.txt");
673 fs::write(&file2_path, "same content")?;
674
675 let exclude_path = dir.path().join("exclude.txt");
676 fs::write(&exclude_path, "same content")?;
677
678 let mut hasher = FileHasher::new(&[dir.path()])?;
679 hasher.buffer_size = 8192;
680 let mut builder = globset::GlobSetBuilder::new();
681 builder.add(
682 globset::GlobBuilder::new("exclude.txt")
683 .case_insensitive(true)
684 .build()?,
685 );
686 let filter = builder.build()?;
687 hasher.exclude = Some(filter);
688
689 let duplicates = hasher.find_duplicates()?;
690 assert_eq!(duplicates.len(), 1);
691 let group = &duplicates[0];
692 assert_eq!(group.paths.len(), 2);
693 assert!(group.paths.contains(&file1_path));
694 assert!(group.paths.contains(&file2_path));
695 assert!(!group.paths.contains(&exclude_path));
696 Ok(())
697 }
698
699 #[test]
700 fn check_mode_empty_cache() -> anyhow::Result<()> {
701 let dir = tempfile::tempdir()?;
702 let dir_path = dir.path().to_path_buf();
703 println!("{:?}", dir_path);
704 let file1_path = dir.path().join("file1.txt");
705 fs::write(&file1_path, "content 1")?;
706 let file2_path = dir.path().join("file2.txt");
707 fs::write(&file2_path, "content 2")?;
708
709 let mut hasher = FileHasher::new(&[&dir_path])?;
710 hasher.exclude = Some(default_exclude());
711 let (tx, rx) = mpsc::channel();
712 hasher.check_streaming(tx, false)?;
713 let mut results = Vec::new();
714 let mut start_seen = false;
715 let mut total_files = None;
716 let mut file_done_count = 0;
717 let mut num_error = 0;
718 while let Ok(event) = rx.recv() {
719 match event {
720 CheckEvent::StartChecking => start_seen = true,
721 CheckEvent::TotalFiles(total) => total_files = Some(total),
722 CheckEvent::Result(path, status) => results.push((path, status)),
723 CheckEvent::FileDone => file_done_count += 1,
724 CheckEvent::Error => num_error += 1,
725 }
726 }
727 assert!(start_seen);
728 assert_eq!(total_files, Some(2));
729 assert_eq!(file_done_count, 0);
730 assert_eq!(num_error, 0);
731
732 results.sort_by(|a, b| a.0.cmp(&b.0));
733 assert_eq!(results.len(), 2);
734 assert_eq!(results[0], (PathBuf::from("file1.txt"), CheckStatus::New));
735 assert_eq!(results[1], (PathBuf::from("file2.txt"), CheckStatus::New));
736
737 assert!(!dir.path().join(FileHashCache::FILE_NAME).exists());
738 Ok(())
739 }
740
741 #[test]
742 fn check_mode_with_cache() -> anyhow::Result<()> {
743 let dir = tempfile::tempdir()?;
744 let dir_path = dir.path().to_path_buf();
745 let file1_path = dir.path().join("file1.txt");
746 fs::write(&file1_path, "content 1")?;
747 let file2_path = dir.path().join("file2.txt");
748 fs::write(&file2_path, "content 2")?;
749
750 let mut hasher = FileHasher::new(&[&dir_path])?;
751 hasher.exclude = Some(default_exclude());
752 let _hash1 = hasher.get_hash(&file1_path)?;
753 let _hash2 = hasher.get_hash(&file2_path)?;
754 hasher.save_cache()?;
755 assert!(dir.path().join(FileHashCache::FILE_NAME).exists());
756
757 let mut hasher = FileHasher::new(&[&dir_path])?;
758 hasher.exclude = Some(default_exclude());
759 let (tx, rx) = mpsc::channel();
760 hasher.check_streaming(tx, false)?;
761 let mut results = Vec::new();
762 let mut file_done_count = 0;
763 while let Ok(event) = rx.recv() {
764 match event {
765 CheckEvent::Result(path, status) => results.push((path, status)),
766 CheckEvent::FileDone => file_done_count += 1,
767 _ => {}
768 }
769 }
770 assert_eq!(results.len(), 0);
771 assert_eq!(file_done_count, 2);
772
773 fs::write(&file1_path, "content 1 modified")?;
774
775 let file2_meta_before = fs::metadata(&file2_path)?;
776 let mtime_before = file2_meta_before.modified()?;
777 std::thread::sleep(time::Duration::from_millis(10));
778 fs::write(&file2_path, "content 2")?;
779 let file2_meta_after = fs::metadata(&file2_path)?;
780 let mtime_after = file2_meta_after.modified()?;
781 assert!(mtime_after > mtime_before);
782
783 let mut hasher = FileHasher::new(&[&dir_path])?;
784 hasher.exclude = Some(default_exclude());
785 let (tx, rx) = mpsc::channel();
786 hasher.check_streaming(tx, false)?;
787 let mut results = Vec::new();
788 let mut file_done_count = 0;
789 while let Ok(event) = rx.recv() {
790 match event {
791 CheckEvent::Result(path, status) => results.push((path, status)),
792 CheckEvent::FileDone => file_done_count += 1,
793 _ => {}
794 }
795 }
796 assert_eq!(results.len(), 1);
797 assert_eq!(
798 results[0],
799 (PathBuf::from("file1.txt"), CheckStatus::Modified)
800 );
801 assert_eq!(file_done_count, 1);
802 Ok(())
803 }
804
805 #[test]
806 fn check_update_mode() -> anyhow::Result<()> {
807 let dir = tempfile::tempdir()?;
808 let dir_path = dir.path().to_path_buf();
809 let file1_path = dir.path().join("file1.txt");
810 fs::write(&file1_path, "content 1")?;
811
812 let mut hasher = FileHasher::new(&[&dir_path])?;
813 hasher.exclude = Some(default_exclude());
814 let (tx, rx) = mpsc::channel();
815 hasher.check_streaming(tx, true)?;
816 while rx.recv().is_ok() {}
817 hasher.save_cache()?;
818 assert!(dir.path().join(FileHashCache::FILE_NAME).exists());
819
820 let cache = FileHashCache::new(&dir_path);
821 let mtime1 = fs::metadata(&file1_path)?.modified()?;
822 let hash1 = cache.get(&PathBuf::from("file1.txt"), mtime1);
823 assert!(hash1.is_some());
824
825 std::thread::sleep(time::Duration::from_millis(10));
826 fs::write(&file1_path, "content 1 modified")?;
827 let mtime1_mod = fs::metadata(&file1_path)?.modified()?;
828
829 let mut hasher = FileHasher::new(&[&dir_path])?;
830 hasher.exclude = Some(default_exclude());
831 let (tx, rx) = mpsc::channel();
832 hasher.check_streaming(tx, true)?;
833 while rx.recv().is_ok() {}
834 hasher.save_cache()?;
835
836 let cache = FileHashCache::new(&dir_path);
837 let hash_mod = cache.get(&PathBuf::from("file1.txt"), mtime1_mod);
838 assert!(hash_mod.is_some());
839 assert_ne!(hash1, hash_mod);
840
841 std::thread::sleep(time::Duration::from_millis(10));
842 fs::write(&file1_path, "content 1 modified")?;
843 let mtime1_mod2 = fs::metadata(&file1_path)?.modified()?;
844 assert!(mtime1_mod2 > mtime1_mod);
845
846 assert!(
847 cache
848 .get(&PathBuf::from("file1.txt"), mtime1_mod2)
849 .is_none()
850 );
851
852 let mut hasher = FileHasher::new(&[&dir_path])?;
853 hasher.exclude = Some(default_exclude());
854 let (tx, rx) = mpsc::channel();
855 hasher.check_streaming(tx, true)?;
856 while rx.recv().is_ok() {}
857 hasher.save_cache()?;
858
859 let cache = FileHashCache::new(&dir_path);
860 assert!(
861 cache
862 .get(&PathBuf::from("file1.txt"), mtime1_mod2)
863 .is_some()
864 );
865 Ok(())
866 }
867
868 #[test]
869 fn check_cleanup_deleted_files() -> anyhow::Result<()> {
870 let dir = tempfile::tempdir()?;
871 let dir_path = dir.path().to_path_buf();
872 let file1_path = dir.path().join("file1.txt");
873 let file2_path = dir.path().join("file2.txt");
874 fs::write(&file1_path, "content 1")?;
875 fs::write(&file2_path, "content 2")?;
876 let mtime1 = fs::metadata(&file1_path)?.modified()?;
877 let mtime2 = fs::metadata(&file2_path)?.modified()?;
878
879 let mut hasher = FileHasher::new(&[&dir_path])?;
880 hasher.exclude = Some(default_exclude());
881 let (tx, rx) = mpsc::channel();
882 hasher.check_streaming(tx, true)?;
883 while rx.recv().is_ok() {}
884 hasher.save_cache()?;
885
886 let cache = FileHashCache::new(&dir_path);
888 assert!(cache.get(&PathBuf::from("file1.txt"), mtime1).is_some());
889 assert!(cache.get(&PathBuf::from("file2.txt"), mtime2).is_some());
890
891 fs::remove_file(&file2_path)?;
893
894 let mut hasher = FileHasher::new(&[&dir_path])?;
896 hasher.exclude = Some(default_exclude());
897 let (tx, rx) = mpsc::channel();
898 hasher.check_streaming(tx, true)?;
899 while rx.recv().is_ok() {}
900 hasher.save_cache()?;
901
902 let cache = FileHashCache::new(&dir_path);
904 assert!(cache.get(&PathBuf::from("file2.txt"), mtime2).is_none());
905 assert!(cache.get(&PathBuf::from("file1.txt"), mtime1).is_some());
906 Ok(())
907 }
908
909 #[test]
910 fn find_duplicates_multiple_dirs() -> anyhow::Result<()> {
911 let tmp = tempfile::tempdir()?;
912 let dir1 = tmp.path().join("dir1");
913 let dir2 = tmp.path().join("dir2");
914 fs::create_dir(&dir1)?;
915 fs::create_dir(&dir2)?;
916 let file1_path = dir1.join("file1.txt");
917 fs::write(&file1_path, "same content")?;
918 let file2_path = dir2.join("file2.txt");
919 fs::write(&file2_path, "same content")?;
920 let hasher = FileHasher::new(&[&dir1, &dir2])?;
921 let duplicates = hasher.find_duplicates()?;
922 assert_eq!(duplicates.len(), 1);
923 let group = &duplicates[0];
924 assert_eq!(group.paths.len(), 2);
925 assert_eq!(group.size, 12);
926 assert!(group.paths.contains(&file1_path));
927 assert!(group.paths.contains(&file2_path));
928
929 Ok(())
930 }
931
932 #[test]
933 fn check_fails_with_multiple_dirs() -> anyhow::Result<()> {
934 let tmp = tempfile::tempdir()?;
935 let dir1 = tmp.path().join("dir1");
936 let dir2 = tmp.path().join("dir2");
937 fs::create_dir(&dir1)?;
938 fs::create_dir(&dir2)?;
939 let hasher = FileHasher::new(&[&dir1, &dir2])?;
940 assert!(hasher.check(false).is_err());
941 Ok(())
942 }
943}