Skip to main content

compare_dir/
file_hasher.rs

1use crate::{
2    DirectoryComparer, FileComparer, FileHashCache, FileIterator, Progress, ProgressBuilder,
3};
4use globset::GlobSet;
5use indicatif::FormattedDuration;
6use std::collections::HashMap;
7use std::fs;
8use std::io::{self, Read};
9use std::path::{Path, PathBuf};
10use std::sync::atomic::{AtomicUsize, Ordering};
11use std::sync::{Arc, mpsc};
12
13#[derive(Debug, Clone)]
14enum HashProgress {
15    StartDiscovering,
16    TotalFiles(usize),
17    Result(PathBuf, u64, blake3::Hash, bool),
18}
19
20#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
21enum CheckStatus {
22    Unchanged,
23    New,
24    Modified,
25}
26
27#[derive(Debug, PartialEq)]
28enum CheckEvent {
29    StartChecking,
30    TotalFiles(usize),
31    Result(PathBuf, CheckStatus),
32    FileDone,
33}
34
35enum EntryState {
36    Single(PathBuf, std::time::SystemTime),
37    Hashing,
38}
39
40/// A group of duplicated files and their size.
41#[derive(Debug, Clone)]
42pub struct DuplicatedFiles {
43    pub paths: Vec<PathBuf>,
44    pub size: u64,
45}
46
47/// A tool for finding duplicated files in a directory.
48pub struct FileHasher {
49    dir: PathBuf,
50    pub buffer_size: usize,
51    cache: Arc<FileHashCache>,
52    pub(crate) num_hashed: AtomicUsize,
53    pub(crate) num_hash_looked_up: AtomicUsize,
54    pub exclude: Option<GlobSet>,
55    pub progress: Option<Arc<ProgressBuilder>>,
56    pub jobs: usize,
57}
58
59impl FileHasher {
60    const DEFAULT_JOBS: usize = DirectoryComparer::DEFAULT_JOBS;
61
62    /// Creates a new `FileHasher` for the given directory.
63    pub fn new(dir: PathBuf) -> Self {
64        let cache = FileHashCache::find_or_new(&dir);
65        Self {
66            dir,
67            buffer_size: FileComparer::DEFAULT_BUFFER_SIZE,
68            cache,
69            num_hashed: AtomicUsize::new(0),
70            num_hash_looked_up: AtomicUsize::new(0),
71            exclude: None,
72            progress: None,
73            jobs: Self::DEFAULT_JOBS,
74        }
75    }
76
77    /// Remove a cache entry if it exists.
78    pub fn remove_cache_entry(&self, path: &Path) -> anyhow::Result<()> {
79        let relative = crate::strip_prefix(path, self.cache.base_dir())?;
80        self.cache.remove(relative);
81        Ok(())
82    }
83
84    /// Save the hash cache if it is dirty.
85    pub fn save_cache(&self) -> anyhow::Result<()> {
86        log::info!(
87            "Hash stats for {:?}: {} computed, {} looked up",
88            self.dir,
89            self.num_hashed.load(Ordering::Relaxed),
90            self.num_hash_looked_up.load(Ordering::Relaxed)
91        );
92        Ok(self.cache.save()?)
93    }
94
95    /// Merges another cache into this hasher's cache.
96    pub(crate) fn merge_cache(&self, other_cache: &FileHashCache) {
97        self.cache.merge(other_cache);
98    }
99
100    /// Clears the loaded hashes in the cache.
101    pub fn clear_cache(&self) -> anyhow::Result<()> {
102        let relative = crate::strip_prefix(&self.dir, self.cache.base_dir())?;
103        self.cache.clear(relative);
104        Ok(())
105    }
106
107    /// Executes the check/update process.
108    pub fn check(&self, update: bool) -> anyhow::Result<()> {
109        let start_time = std::time::Instant::now();
110        let progress = self
111            .progress
112            .as_ref()
113            .map(|progress| progress.add_spinner())
114            .unwrap_or_else(Progress::none);
115        progress.set_message("Scanning directory...");
116        std::thread::scope(|scope| {
117            let (tx, rx) = mpsc::channel();
118            scope.spawn(|| {
119                if let Err(e) = self.check_streaming(tx, update) {
120                    log::error!("Error during check: {}", e);
121                }
122            });
123            while let Ok(event) = rx.recv() {
124                match event {
125                    CheckEvent::StartChecking => {
126                        progress.set_message("Checking files...");
127                    }
128                    CheckEvent::TotalFiles(total) => {
129                        progress.set_length(total as u64);
130                        progress.set_message("");
131                    }
132                    CheckEvent::Result(path, status) => {
133                        progress.inc(1);
134                        progress.suspend(|| {
135                            println!(
136                                "{} {}",
137                                match status {
138                                    CheckStatus::New => '+',
139                                    CheckStatus::Modified => '!',
140                                    CheckStatus::Unchanged => unreachable!(),
141                                },
142                                path.display()
143                            );
144                        });
145                    }
146                    CheckEvent::FileDone => {
147                        progress.inc(1);
148                    }
149                }
150            }
151        });
152        progress.finish();
153        if update {
154            self.save_cache()?;
155        }
156        eprintln!("Finished in {}.", FormattedDuration(start_time.elapsed()));
157        Ok(())
158    }
159
160    fn check_streaming(&self, tx: mpsc::Sender<CheckEvent>, update: bool) -> anyhow::Result<()> {
161        std::thread::scope(|global_scope| {
162            let mut it = FileIterator::new(self.dir.clone());
163            it.hasher = Some(self);
164            it.exclude = self.exclude.as_ref();
165            let it_rx = it.spawn_in_scope(global_scope);
166            tx.send(CheckEvent::StartChecking)?;
167            let pool = crate::build_thread_pool(self.jobs)?;
168            pool.scope(move |scope| -> anyhow::Result<()> {
169                let mut total_files = 0;
170                for (rel_path, abs_path) in it_rx {
171                    total_files += 1;
172                    let tx = tx.clone();
173                    scope.spawn(move |_| {
174                        let status = self.check_file(&abs_path, update);
175                        let event = match status {
176                            Ok(CheckStatus::New) | Ok(CheckStatus::Modified) => {
177                                CheckEvent::Result(rel_path, status.unwrap())
178                            }
179                            Ok(CheckStatus::Unchanged) => CheckEvent::FileDone,
180                            Err(e) => {
181                                log::warn!("Failed to check file {:?}: {}", rel_path, e);
182                                CheckEvent::FileDone
183                            }
184                        };
185                        if tx.send(event).is_err() {
186                            log::error!("Send failed");
187                        }
188                    });
189                }
190                tx.send(CheckEvent::TotalFiles(total_files))?;
191                Ok(())
192            })
193        })?;
194        Ok(())
195    }
196
197    fn check_file(&self, abs_path: &Path, update: bool) -> anyhow::Result<CheckStatus> {
198        assert!(abs_path.is_absolute());
199        let computed_hash = self.compute_hash(abs_path)?;
200        let rel_path = crate::strip_prefix(abs_path, self.cache.base_dir())?;
201        let cached_hash = self.cache.get_by_path(rel_path);
202        let status = match cached_hash {
203            None => CheckStatus::New,
204            Some(cached) => {
205                if computed_hash != cached {
206                    CheckStatus::Modified
207                } else {
208                    CheckStatus::Unchanged
209                }
210            }
211        };
212        if update {
213            let modified = fs::metadata(abs_path)?.modified()?;
214            match status {
215                CheckStatus::New | CheckStatus::Modified => {
216                    self.cache.insert(rel_path, modified, computed_hash);
217                }
218                CheckStatus::Unchanged => {
219                    if self.cache.get(rel_path, modified).is_none() {
220                        self.cache.insert(rel_path, modified, computed_hash);
221                    }
222                }
223            }
224        }
225        Ok(status)
226    }
227
228    /// Executes the duplicate file finding process and prints results.
229    pub fn run(&self) -> anyhow::Result<()> {
230        let start_time = std::time::Instant::now();
231        let mut duplicates = self.find_duplicates()?;
232        if duplicates.is_empty() {
233            println!("No duplicates found.");
234        } else {
235            duplicates.sort_by_key(|a| a.size);
236            let mut total_wasted_space = 0;
237            for dupes in &duplicates {
238                let paths = &dupes.paths;
239                let file_size = dupes.size;
240                println!(
241                    "Identical {} files of {}:",
242                    paths.len(),
243                    crate::human_readable_size(file_size)
244                );
245                for path in paths {
246                    println!("  {}", path.display());
247                }
248                total_wasted_space += file_size * (paths.len() as u64 - 1);
249            }
250            eprintln!(
251                "Total wasted space: {}",
252                crate::human_readable_size(total_wasted_space)
253            );
254        }
255        eprintln!("Finished in {}.", FormattedDuration(start_time.elapsed()));
256        Ok(())
257    }
258
259    /// Finds duplicated files and returns a list of duplicate groups.
260    pub fn find_duplicates(&self) -> anyhow::Result<Vec<DuplicatedFiles>> {
261        let progress = self
262            .progress
263            .as_ref()
264            .map(|progress| progress.add_spinner())
265            .unwrap_or_else(Progress::none);
266        progress.set_message("Scanning directories...");
267
268        let (tx, rx) = mpsc::channel();
269        let mut by_hash: HashMap<blake3::Hash, DuplicatedFiles> = HashMap::new();
270        let mut num_cache_hits = 0;
271        std::thread::scope(|scope| {
272            scope.spawn(|| {
273                if let Err(e) = self.find_duplicates_streaming(tx) {
274                    log::error!("Error during duplicate finding: {}", e);
275                }
276            });
277
278            while let Ok(event) = rx.recv() {
279                match event {
280                    HashProgress::StartDiscovering => {
281                        progress.set_message("Hashing files...");
282                    }
283                    HashProgress::TotalFiles(total) => {
284                        progress.set_length(total as u64);
285                        if num_cache_hits > 0 {
286                            progress.set_message(format!(" ({} cache hits)", num_cache_hits));
287                        }
288                    }
289                    HashProgress::Result(path, size, hash, is_cache_hit) => {
290                        if is_cache_hit {
291                            num_cache_hits += 1;
292                            if progress.length().is_none() {
293                                progress.set_message(format!(
294                                    "Hashing files... ({} cache hits)",
295                                    num_cache_hits
296                                ));
297                            } else {
298                                progress.set_message(format!(" ({} cache hits)", num_cache_hits));
299                            }
300                        }
301
302                        progress.inc(1);
303                        let entry = by_hash.entry(hash).or_insert_with(|| DuplicatedFiles {
304                            paths: Vec::new(),
305                            size,
306                        });
307                        // Hash collisions shouldn't happen, but if they do, sizes shouldn't mismatch.
308                        assert_eq!(entry.size, size, "Hash collision: sizes do not match");
309                        entry.paths.push(path);
310                    }
311                }
312            }
313        });
314        progress.finish();
315
316        let mut duplicates = Vec::new();
317        for (_, mut dupes) in by_hash {
318            if dupes.paths.len() > 1 {
319                dupes.paths.sort();
320                duplicates.push(dupes);
321            }
322        }
323        Ok(duplicates)
324    }
325
326    fn find_duplicates_streaming(&self, tx: mpsc::Sender<HashProgress>) -> anyhow::Result<()> {
327        tx.send(HashProgress::StartDiscovering)?;
328        let mut by_size: HashMap<u64, EntryState> = HashMap::new();
329        let mut total_hashed = 0;
330        std::thread::scope(|global_scope| {
331            let mut it = FileIterator::new(self.dir.clone());
332            it.hasher = Some(self);
333            it.exclude = self.exclude.as_ref();
334            let it_rx = it.spawn_in_scope(global_scope);
335            let pool = crate::build_thread_pool(self.jobs)?;
336            pool.scope(move |scope| -> anyhow::Result<()> {
337                for (_, current_path) in it_rx {
338                    let meta = fs::metadata(&current_path)?;
339                    let size = meta.len();
340                    let modified = meta.modified()?;
341
342                    // Small optimization: If file size is 0, it's not really worth treating
343                    // as wasted space duplicates in the same way, but keeping it unified for now.
344                    match by_size.entry(size) {
345                        std::collections::hash_map::Entry::Occupied(mut occ) => match occ.get_mut()
346                        {
347                            EntryState::Single(first_path, first_modified) => {
348                                // We found a second file of identical size.
349                                // Time to start hashing both the *original* matching file and the *new* one!
350                                self.spawn_hash_task(scope, first_path, size, *first_modified, &tx);
351                                self.spawn_hash_task(scope, &current_path, size, modified, &tx);
352
353                                // Modify the state to indicate we are now fully hashing this size bucket.
354                                *occ.get_mut() = EntryState::Hashing;
355                                total_hashed += 2;
356                            }
357                            EntryState::Hashing => {
358                                // File size bucket already hashing; just dynamically spawn the new file immediately.
359                                self.spawn_hash_task(scope, &current_path, size, modified, &tx);
360                                total_hashed += 1;
361                            }
362                        },
363                        std::collections::hash_map::Entry::Vacant(vac) => {
364                            vac.insert(EntryState::Single(current_path, modified));
365                        }
366                    }
367                }
368                tx.send(HashProgress::TotalFiles(total_hashed))?;
369                Ok(())
370            })
371        })?;
372
373        // The scope waits for all spawned tasks to complete.
374        // Channel `tx` gets naturally closed when it drops at the end of this function.
375        self.save_cache()
376    }
377
378    fn spawn_hash_task<'scope>(
379        &'scope self,
380        scope: &rayon::Scope<'scope>,
381        path: &Path,
382        size: u64,
383        modified: std::time::SystemTime,
384        tx: &mpsc::Sender<HashProgress>,
385    ) {
386        let relative = crate::strip_prefix(path, self.cache.base_dir())
387            .expect("path should be in cache base_dir");
388        if let Some(hash) = self.cache.get(relative, modified) {
389            self.num_hash_looked_up.fetch_add(1, Ordering::Relaxed);
390            let _ = tx.send(HashProgress::Result(path.to_path_buf(), size, hash, true));
391            return;
392        }
393
394        let path_owned = path.to_path_buf();
395        let relative_owned = relative.to_path_buf();
396        let tx_owned = tx.clone();
397        scope.spawn(move |_| {
398            if let Ok(hash) = self.compute_hash(&path_owned) {
399                self.num_hashed.fetch_add(1, Ordering::Relaxed);
400                self.cache.insert(&relative_owned, modified, hash);
401                let _ = tx_owned.send(HashProgress::Result(path_owned, size, hash, false));
402            } else {
403                log::warn!("Failed to hash file: {:?}", path_owned);
404            }
405        });
406    }
407
408    /// Gets the hash of a file, using the cache if available.
409    pub fn get_hash(&self, path: &Path) -> io::Result<blake3::Hash> {
410        let meta = fs::metadata(path)?;
411        let modified = meta.modified()?;
412        let relative = crate::strip_prefix(path, self.cache.base_dir())
413            .map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?;
414        if let Some(hash) = self.cache.get(relative, modified) {
415            self.num_hash_looked_up.fetch_add(1, Ordering::Relaxed);
416            return Ok(hash);
417        }
418
419        let hash = self.compute_hash(path)?;
420        self.num_hashed.fetch_add(1, Ordering::Relaxed);
421        self.cache.insert(relative, modified, hash);
422        Ok(hash)
423    }
424
425    fn compute_hash(&self, path: &Path) -> io::Result<blake3::Hash> {
426        let start_time = std::time::Instant::now();
427        let mut f = fs::File::open(path)?;
428        let len = f.metadata()?.len();
429        let progress = self
430            .progress
431            .as_ref()
432            .map(|progress| progress.add_file(path, len))
433            .unwrap_or_else(Progress::none);
434        let mut hasher = blake3::Hasher::new();
435        if self.buffer_size == 0 {
436            if len > 0 {
437                let mmap = unsafe { memmap2::MmapOptions::new().map(&f)? };
438                hasher.update(&mmap[..]);
439                progress.inc(len);
440            }
441        } else {
442            let mut buf = vec![0u8; self.buffer_size];
443            loop {
444                let n = f.read(&mut buf)?;
445                if n == 0 {
446                    break;
447                }
448                hasher.update(&buf[..n]);
449                progress.inc(n as u64);
450            }
451        }
452        progress.finish();
453        log::debug!(
454            "Computed hash in {}: {:?}",
455            FormattedDuration(start_time.elapsed()),
456            path
457        );
458        Ok(hasher.finalize())
459    }
460}
461
462#[cfg(test)]
463mod tests {
464    use super::*;
465
466    fn default_exclude() -> globset::GlobSet {
467        let mut builder = globset::GlobSetBuilder::new();
468        builder.add(
469            globset::GlobBuilder::new(".hash_cache")
470                .case_insensitive(true)
471                .build()
472                .unwrap(),
473        );
474        builder.build().unwrap()
475    }
476
477    #[test]
478    fn find_duplicates() -> anyhow::Result<()> {
479        let dir = tempfile::tempdir()?;
480
481        let file1_path = dir.path().join("same1.txt");
482        fs::write(&file1_path, "same content")?;
483
484        let file2_path = dir.path().join("same2.txt");
485        fs::write(&file2_path, "same content")?;
486
487        let diff_path = dir.path().join("diff.txt");
488        fs::write(&diff_path, "different content")?;
489
490        let mut hasher = FileHasher::new(dir.path().to_path_buf());
491        hasher.buffer_size = 8192;
492        let duplicates = hasher.find_duplicates()?;
493
494        assert_eq!(hasher.num_hashed.load(Ordering::Relaxed), 2);
495        assert_eq!(hasher.num_hash_looked_up.load(Ordering::Relaxed), 0);
496
497        assert_eq!(duplicates.len(), 1);
498        let group = &duplicates[0];
499        assert_eq!(group.paths.len(), 2);
500        assert_eq!(group.size, 12); // "same content" is 12 bytes
501
502        assert!(group.paths.contains(&file1_path));
503        assert!(group.paths.contains(&file2_path));
504
505        Ok(())
506    }
507
508    #[test]
509    fn find_duplicates_merge_cache() -> anyhow::Result<()> {
510        let dir = tempfile::tempdir()?;
511        let dir_path = dir.path();
512
513        let sub_dir = dir_path.join("a").join("a");
514        fs::create_dir_all(&sub_dir)?;
515
516        let file1_path = sub_dir.join("1");
517        fs::write(&file1_path, "same content")?;
518
519        let file2_path = sub_dir.join("2");
520        fs::write(&file2_path, "same content")?;
521
522        // Create empty cache file in a/a to force it to be the cache base
523        let cache_aa_path = sub_dir.join(FileHashCache::FILE_NAME);
524        fs::File::create(&cache_aa_path)?;
525
526        // Run find_duplicates on a/a
527        let hasher_aa = FileHasher::new(sub_dir.clone());
528        let duplicates_aa = hasher_aa.find_duplicates()?;
529        assert_eq!(duplicates_aa.len(), 1);
530        assert!(cache_aa_path.exists());
531        assert_eq!(hasher_aa.num_hashed.load(Ordering::Relaxed), 2);
532        assert_eq!(hasher_aa.num_hash_looked_up.load(Ordering::Relaxed), 0);
533
534        // Create empty cache file in a to force it to be the cache base
535        let root_a = dir_path.join("a");
536        let cache_a_path = root_a.join(FileHashCache::FILE_NAME);
537        fs::File::create(&cache_a_path)?;
538
539        // Run find_duplicates on a
540        let hasher_a = FileHasher::new(root_a.clone());
541        let duplicates_a = hasher_a.find_duplicates()?;
542        assert_eq!(duplicates_a.len(), 1);
543        assert_eq!(hasher_a.num_hashed.load(Ordering::Relaxed), 0);
544        assert_eq!(hasher_a.num_hash_looked_up.load(Ordering::Relaxed), 2);
545
546        // The merged child cache should be removed.
547        assert!(cache_a_path.exists());
548        assert!(!cache_aa_path.exists());
549
550        Ok(())
551    }
552
553    #[test]
554    fn find_duplicates_with_exclude() -> anyhow::Result<()> {
555        let dir = tempfile::tempdir()?;
556
557        let file1_path = dir.path().join("same1.txt");
558        fs::write(&file1_path, "same content")?;
559
560        let file2_path = dir.path().join("same2.txt");
561        fs::write(&file2_path, "same content")?;
562
563        let exclude_path = dir.path().join("exclude.txt");
564        fs::write(&exclude_path, "same content")?;
565
566        let mut hasher = FileHasher::new(dir.path().to_path_buf());
567        hasher.buffer_size = 8192;
568        let mut builder = globset::GlobSetBuilder::new();
569        builder.add(
570            globset::GlobBuilder::new("exclude.txt")
571                .case_insensitive(true)
572                .build()?,
573        );
574        let filter = builder.build()?;
575        hasher.exclude = Some(filter);
576
577        let duplicates = hasher.find_duplicates()?;
578        assert_eq!(duplicates.len(), 1);
579        let group = &duplicates[0];
580        assert_eq!(group.paths.len(), 2);
581        assert!(group.paths.contains(&file1_path));
582        assert!(group.paths.contains(&file2_path));
583        assert!(!group.paths.contains(&exclude_path));
584        Ok(())
585    }
586
587    #[test]
588    fn check_mode_empty_cache() -> anyhow::Result<()> {
589        let dir = tempfile::tempdir()?;
590        let dir_path = dir.path().to_path_buf();
591        println!("{:?}", dir_path);
592        let file1_path = dir.path().join("file1.txt");
593        fs::write(&file1_path, "content 1")?;
594        let file2_path = dir.path().join("file2.txt");
595        fs::write(&file2_path, "content 2")?;
596
597        let mut hasher = FileHasher::new(dir_path.clone());
598        hasher.exclude = Some(default_exclude());
599        let (tx, rx) = mpsc::channel();
600        hasher.check_streaming(tx, false)?;
601        let mut results = Vec::new();
602        let mut start_seen = false;
603        let mut total_files = None;
604        let mut file_done_count = 0;
605        while let Ok(event) = rx.recv() {
606            match event {
607                CheckEvent::StartChecking => start_seen = true,
608                CheckEvent::TotalFiles(total) => total_files = Some(total),
609                CheckEvent::Result(path, status) => results.push((path, status)),
610                CheckEvent::FileDone => file_done_count += 1,
611            }
612        }
613        assert!(start_seen);
614        assert_eq!(total_files, Some(2));
615        assert_eq!(file_done_count, 0);
616
617        results.sort_by(|a, b| a.0.cmp(&b.0));
618        assert_eq!(results.len(), 2);
619        assert_eq!(results[0], (PathBuf::from("file1.txt"), CheckStatus::New));
620        assert_eq!(results[1], (PathBuf::from("file2.txt"), CheckStatus::New));
621
622        assert!(!dir.path().join(FileHashCache::FILE_NAME).exists());
623        Ok(())
624    }
625
626    #[test]
627    fn check_mode_with_cache() -> anyhow::Result<()> {
628        let dir = tempfile::tempdir()?;
629        let dir_path = dir.path().to_path_buf();
630        let file1_path = dir.path().join("file1.txt");
631        fs::write(&file1_path, "content 1")?;
632        let file2_path = dir.path().join("file2.txt");
633        fs::write(&file2_path, "content 2")?;
634
635        let mut hasher = FileHasher::new(dir_path.clone());
636        hasher.exclude = Some(default_exclude());
637        let _hash1 = hasher.get_hash(&file1_path)?;
638        let _hash2 = hasher.get_hash(&file2_path)?;
639        hasher.save_cache()?;
640        assert!(dir.path().join(FileHashCache::FILE_NAME).exists());
641
642        let mut hasher = FileHasher::new(dir_path.clone());
643        hasher.exclude = Some(default_exclude());
644        let (tx, rx) = mpsc::channel();
645        hasher.check_streaming(tx, false)?;
646        let mut results = Vec::new();
647        let mut file_done_count = 0;
648        while let Ok(event) = rx.recv() {
649            match event {
650                CheckEvent::Result(path, status) => results.push((path, status)),
651                CheckEvent::FileDone => file_done_count += 1,
652                _ => {}
653            }
654        }
655        assert_eq!(results.len(), 0);
656        assert_eq!(file_done_count, 2);
657
658        fs::write(&file1_path, "content 1 modified")?;
659
660        let file2_meta_before = fs::metadata(&file2_path)?;
661        let mtime_before = file2_meta_before.modified()?;
662        std::thread::sleep(std::time::Duration::from_millis(10));
663        fs::write(&file2_path, "content 2")?;
664        let file2_meta_after = fs::metadata(&file2_path)?;
665        let mtime_after = file2_meta_after.modified()?;
666        assert!(mtime_after > mtime_before);
667
668        let mut hasher = FileHasher::new(dir_path.clone());
669        hasher.exclude = Some(default_exclude());
670        let (tx, rx) = mpsc::channel();
671        hasher.check_streaming(tx, false)?;
672        let mut results = Vec::new();
673        let mut file_done_count = 0;
674        while let Ok(event) = rx.recv() {
675            match event {
676                CheckEvent::Result(path, status) => results.push((path, status)),
677                CheckEvent::FileDone => file_done_count += 1,
678                _ => {}
679            }
680        }
681        assert_eq!(results.len(), 1);
682        assert_eq!(
683            results[0],
684            (PathBuf::from("file1.txt"), CheckStatus::Modified)
685        );
686        assert_eq!(file_done_count, 1);
687        Ok(())
688    }
689
690    #[test]
691    fn check_update_mode() -> anyhow::Result<()> {
692        let dir = tempfile::tempdir()?;
693        let dir_path = dir.path().to_path_buf();
694        let file1_path = dir.path().join("file1.txt");
695        fs::write(&file1_path, "content 1")?;
696
697        let mut hasher = FileHasher::new(dir_path.clone());
698        hasher.exclude = Some(default_exclude());
699        let (tx, rx) = mpsc::channel();
700        hasher.check_streaming(tx, true)?;
701        while rx.recv().is_ok() {}
702        hasher.save_cache()?;
703        assert!(dir.path().join(FileHashCache::FILE_NAME).exists());
704
705        let cache = FileHashCache::new(&dir_path);
706        let mtime1 = fs::metadata(&file1_path)?.modified()?;
707        let hash1 = cache.get(&PathBuf::from("file1.txt"), mtime1);
708        assert!(hash1.is_some());
709
710        std::thread::sleep(std::time::Duration::from_millis(10));
711        fs::write(&file1_path, "content 1 modified")?;
712        let mtime1_mod = fs::metadata(&file1_path)?.modified()?;
713
714        let mut hasher = FileHasher::new(dir_path.clone());
715        hasher.exclude = Some(default_exclude());
716        let (tx, rx) = mpsc::channel();
717        hasher.check_streaming(tx, true)?;
718        while rx.recv().is_ok() {}
719        hasher.save_cache()?;
720
721        let cache = FileHashCache::new(&dir_path);
722        let hash_mod = cache.get(&PathBuf::from("file1.txt"), mtime1_mod);
723        assert!(hash_mod.is_some());
724        assert_ne!(hash1, hash_mod);
725
726        std::thread::sleep(std::time::Duration::from_millis(10));
727        fs::write(&file1_path, "content 1 modified")?;
728        let mtime1_mod2 = fs::metadata(&file1_path)?.modified()?;
729        assert!(mtime1_mod2 > mtime1_mod);
730
731        assert!(
732            cache
733                .get(&PathBuf::from("file1.txt"), mtime1_mod2)
734                .is_none()
735        );
736
737        let mut hasher = FileHasher::new(dir_path.clone());
738        hasher.exclude = Some(default_exclude());
739        let (tx, rx) = mpsc::channel();
740        hasher.check_streaming(tx, true)?;
741        while rx.recv().is_ok() {}
742        hasher.save_cache()?;
743
744        let cache = FileHashCache::new(&dir_path);
745        assert!(
746            cache
747                .get(&PathBuf::from("file1.txt"), mtime1_mod2)
748                .is_some()
749        );
750        Ok(())
751    }
752}