Skip to main content

compare_dir/
file_hasher.rs

1use crate::{
2    DirectoryComparer, FileComparer, FileHashCache, FileIterator, Progress, ProgressBuilder,
3};
4use globset::GlobSet;
5use indicatif::FormattedDuration;
6use std::collections::HashMap;
7use std::fs;
8use std::io::{self, Read};
9use std::path::{Path, PathBuf};
10use std::sync::atomic::{AtomicUsize, Ordering};
11use std::sync::{Arc, mpsc};
12
13#[derive(Debug, Clone)]
14enum HashProgress {
15    StartDiscovering,
16    TotalFiles(usize),
17    Result(PathBuf, u64, blake3::Hash, bool),
18}
19
20#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
21enum CheckStatus {
22    Unchanged,
23    New,
24    Modified,
25}
26
27#[derive(Debug, PartialEq)]
28enum CheckEvent {
29    StartChecking,
30    TotalFiles(usize),
31    Result(PathBuf, CheckStatus),
32    FileDone,
33}
34
35enum EntryState {
36    Single(PathBuf, std::time::SystemTime),
37    Hashing,
38}
39
40/// A group of duplicated files and their size.
41#[derive(Debug, Clone)]
42pub struct DuplicatedFiles {
43    pub paths: Vec<PathBuf>,
44    pub size: u64,
45}
46
47/// A tool for finding duplicated files in a directory.
48pub struct FileHasher {
49    dir: PathBuf,
50    pub buffer_size: usize,
51    cache: Arc<FileHashCache>,
52    pub(crate) num_hashed: AtomicUsize,
53    pub(crate) num_hash_looked_up: AtomicUsize,
54    pub exclude: Option<GlobSet>,
55    pub progress: Option<Arc<ProgressBuilder>>,
56    pub jobs: usize,
57}
58
59impl FileHasher {
60    const DEFAULT_JOBS: usize = DirectoryComparer::DEFAULT_JOBS;
61
62    /// Creates a new `FileHasher` for the given directory.
63    pub fn new(dir: PathBuf) -> Self {
64        let cache = FileHashCache::find_or_new(&dir);
65        Self {
66            dir,
67            buffer_size: FileComparer::DEFAULT_BUFFER_SIZE,
68            cache,
69            num_hashed: AtomicUsize::new(0),
70            num_hash_looked_up: AtomicUsize::new(0),
71            exclude: None,
72            progress: None,
73            jobs: Self::DEFAULT_JOBS,
74        }
75    }
76
77    /// Remove a cache entry if it exists.
78    pub fn remove_cache_entry(&self, path: &Path) -> anyhow::Result<()> {
79        let relative = crate::strip_prefix(path, self.cache.base_dir())?;
80        self.cache.remove(relative);
81        Ok(())
82    }
83
84    /// Save the hash cache if it is dirty.
85    pub fn save_cache(&self) -> anyhow::Result<()> {
86        log::info!(
87            "Hash stats for {:?}: {} computed, {} looked up",
88            self.dir,
89            self.num_hashed.load(Ordering::Relaxed),
90            self.num_hash_looked_up.load(Ordering::Relaxed)
91        );
92        Ok(self.cache.save()?)
93    }
94
95    /// Merges another cache into this hasher's cache.
96    pub(crate) fn merge_cache(&self, other_cache: &FileHashCache) {
97        self.cache.merge(other_cache);
98    }
99
100    /// Clears the loaded hashes in the cache.
101    pub fn clear_cache(&self) -> anyhow::Result<()> {
102        let relative = crate::strip_prefix(&self.dir, self.cache.base_dir())?;
103        self.cache.clear(relative);
104        Ok(())
105    }
106
107    /// Executes the duplicate file finding process and prints results.
108    pub fn run(&self) -> anyhow::Result<()> {
109        let start_time = std::time::Instant::now();
110        let mut duplicates = self.find_duplicates()?;
111        if duplicates.is_empty() {
112            println!("No duplicates found.");
113        } else {
114            duplicates.sort_by_key(|a| a.size);
115            let mut total_wasted_space = 0;
116            for dupes in &duplicates {
117                let paths = &dupes.paths;
118                let file_size = dupes.size;
119                println!(
120                    "Identical {} files of {}:",
121                    paths.len(),
122                    crate::human_readable_size(file_size)
123                );
124                for path in paths {
125                    println!("  {}", path.display());
126                }
127                total_wasted_space += file_size * (paths.len() as u64 - 1);
128            }
129            eprintln!(
130                "Total wasted space: {}",
131                crate::human_readable_size(total_wasted_space)
132            );
133        }
134        eprintln!("Finished in {}.", FormattedDuration(start_time.elapsed()));
135        Ok(())
136    }
137
138    /// Finds duplicated files and returns a list of duplicate groups.
139    pub fn find_duplicates(&self) -> anyhow::Result<Vec<DuplicatedFiles>> {
140        let progress = self
141            .progress
142            .as_ref()
143            .map(|progress| progress.add_spinner())
144            .unwrap_or_else(Progress::none);
145        progress.set_message("Scanning directories...");
146
147        let (tx, rx) = mpsc::channel();
148        let mut by_hash: HashMap<blake3::Hash, DuplicatedFiles> = HashMap::new();
149        let mut num_cache_hits = 0;
150        std::thread::scope(|scope| {
151            scope.spawn(|| {
152                if let Err(e) = self.find_duplicates_internal(tx) {
153                    log::error!("Error during duplicate finding: {}", e);
154                }
155            });
156
157            while let Ok(event) = rx.recv() {
158                match event {
159                    HashProgress::StartDiscovering => {
160                        progress.set_message("Hashing files...");
161                    }
162                    HashProgress::TotalFiles(total) => {
163                        progress.set_length(total as u64);
164                        if num_cache_hits > 0 {
165                            progress.set_message(format!(" ({} cache hits)", num_cache_hits));
166                        }
167                    }
168                    HashProgress::Result(path, size, hash, is_cache_hit) => {
169                        if is_cache_hit {
170                            num_cache_hits += 1;
171                            if progress.length().is_none() {
172                                progress.set_message(format!(
173                                    "Hashing files... ({} cache hits)",
174                                    num_cache_hits
175                                ));
176                            } else {
177                                progress.set_message(format!(" ({} cache hits)", num_cache_hits));
178                            }
179                        }
180
181                        progress.inc(1);
182                        let entry = by_hash.entry(hash).or_insert_with(|| DuplicatedFiles {
183                            paths: Vec::new(),
184                            size,
185                        });
186                        // Hash collisions shouldn't happen, but if they do, sizes shouldn't mismatch.
187                        assert_eq!(entry.size, size, "Hash collision: sizes do not match");
188                        entry.paths.push(path);
189                    }
190                }
191            }
192        });
193        progress.finish();
194
195        let mut duplicates = Vec::new();
196        for (_, mut dupes) in by_hash {
197            if dupes.paths.len() > 1 {
198                dupes.paths.sort();
199                duplicates.push(dupes);
200            }
201        }
202        Ok(duplicates)
203    }
204
205    /// Executes the check/update process.
206    pub fn check(&self, update: bool) -> anyhow::Result<()> {
207        let start_time = std::time::Instant::now();
208        let progress = self
209            .progress
210            .as_ref()
211            .map(|progress| progress.add_spinner())
212            .unwrap_or_else(Progress::none);
213        progress.set_message("Scanning directory...");
214        std::thread::scope(|scope| {
215            let (tx, rx) = mpsc::channel();
216            scope.spawn(|| {
217                if let Err(e) = self.check_internal(tx, update) {
218                    log::error!("Error during check: {}", e);
219                }
220            });
221            while let Ok(event) = rx.recv() {
222                match event {
223                    CheckEvent::StartChecking => {
224                        progress.set_message("Checking files...");
225                    }
226                    CheckEvent::TotalFiles(total) => {
227                        progress.set_length(total as u64);
228                        progress.set_message("");
229                    }
230                    CheckEvent::Result(path, status) => {
231                        progress.inc(1);
232                        progress.suspend(|| {
233                            println!(
234                                "{} {}",
235                                match status {
236                                    CheckStatus::New => '+',
237                                    CheckStatus::Modified => '!',
238                                    CheckStatus::Unchanged => unreachable!(),
239                                },
240                                path.display()
241                            );
242                        });
243                    }
244                    CheckEvent::FileDone => {
245                        progress.inc(1);
246                    }
247                }
248            }
249        });
250        progress.finish();
251        if update {
252            self.save_cache()?;
253        }
254        eprintln!("Finished in {}.", FormattedDuration(start_time.elapsed()));
255        Ok(())
256    }
257
258    fn check_internal(&self, tx: mpsc::Sender<CheckEvent>, update: bool) -> anyhow::Result<()> {
259        std::thread::scope(|global_scope| {
260            let mut it = FileIterator::new(self.dir.clone());
261            it.hasher = Some(self);
262            it.exclude = self.exclude.as_ref();
263            let it_rx = it.spawn_in_scope(global_scope);
264            tx.send(CheckEvent::StartChecking)?;
265            let pool = crate::build_thread_pool(self.jobs)?;
266            pool.scope(move |scope| -> anyhow::Result<()> {
267                let mut total_files = 0;
268                for (rel_path, abs_path) in it_rx {
269                    total_files += 1;
270                    let tx_clone = tx.clone();
271                    let cache_clone = self.cache.clone();
272                    let abs_path_owned = abs_path.clone();
273                    let rel_path_owned = rel_path.clone();
274                    scope.spawn(move |_| {
275                        let status = self.check_file(&abs_path_owned, &cache_clone, update);
276                        let event = match status {
277                            Ok(CheckStatus::New) | Ok(CheckStatus::Modified) => {
278                                CheckEvent::Result(rel_path_owned, status.unwrap())
279                            }
280                            Ok(CheckStatus::Unchanged) => CheckEvent::FileDone,
281                            Err(e) => {
282                                log::warn!("Failed to check file {:?}: {}", rel_path_owned, e);
283                                CheckEvent::FileDone
284                            }
285                        };
286                        if tx_clone.send(event).is_err() {
287                            log::error!("Send failed");
288                        }
289                    });
290                }
291                tx.send(CheckEvent::TotalFiles(total_files))?;
292                Ok(())
293            })
294        })?;
295        Ok(())
296    }
297
298    fn check_file(
299        &self,
300        abs_path: &Path,
301        cache: &FileHashCache,
302        update: bool,
303    ) -> anyhow::Result<CheckStatus> {
304        assert!(abs_path.is_absolute());
305        let computed_hash = self.compute_hash(abs_path)?;
306        let rel_path = crate::strip_prefix(abs_path, self.cache.base_dir())?;
307        let cached_hash = cache.get_path(rel_path);
308        let status = match cached_hash {
309            None => CheckStatus::New,
310            Some(cached) => {
311                if computed_hash != cached {
312                    CheckStatus::Modified
313                } else {
314                    CheckStatus::Unchanged
315                }
316            }
317        };
318        if update {
319            let modified = fs::metadata(abs_path)?.modified()?;
320            match status {
321                CheckStatus::New | CheckStatus::Modified => {
322                    cache.insert(rel_path, modified, computed_hash);
323                }
324                CheckStatus::Unchanged => {
325                    if cache.get_path_time(rel_path, modified).is_none() {
326                        cache.insert(rel_path, modified, computed_hash);
327                    }
328                }
329            }
330        }
331        Ok(status)
332    }
333
334    fn find_duplicates_internal(&self, tx: mpsc::Sender<HashProgress>) -> anyhow::Result<()> {
335        tx.send(HashProgress::StartDiscovering)?;
336        let mut by_size: HashMap<u64, EntryState> = HashMap::new();
337        let mut total_hashed = 0;
338        std::thread::scope(|global_scope| {
339            let mut it = FileIterator::new(self.dir.clone());
340            it.hasher = Some(self);
341            it.exclude = self.exclude.as_ref();
342            let it_rx = it.spawn_in_scope(global_scope);
343            let pool = crate::build_thread_pool(self.jobs)?;
344            pool.scope(move |scope| -> anyhow::Result<()> {
345                for (_, current_path) in it_rx {
346                    let meta = fs::metadata(&current_path)?;
347                    let size = meta.len();
348                    let modified = meta.modified()?;
349
350                    // Small optimization: If file size is 0, it's not really worth treating
351                    // as wasted space duplicates in the same way, but keeping it unified for now.
352                    match by_size.entry(size) {
353                        std::collections::hash_map::Entry::Occupied(mut occ) => match occ.get_mut()
354                        {
355                            EntryState::Single(first_path, first_modified) => {
356                                // We found a second file of identical size.
357                                // Time to start hashing both the *original* matching file and the *new* one!
358                                self.spawn_hash_task(scope, first_path, size, *first_modified, &tx);
359                                self.spawn_hash_task(scope, &current_path, size, modified, &tx);
360
361                                // Modify the state to indicate we are now fully hashing this size bucket.
362                                *occ.get_mut() = EntryState::Hashing;
363                                total_hashed += 2;
364                            }
365                            EntryState::Hashing => {
366                                // File size bucket already hashing; just dynamically spawn the new file immediately.
367                                self.spawn_hash_task(scope, &current_path, size, modified, &tx);
368                                total_hashed += 1;
369                            }
370                        },
371                        std::collections::hash_map::Entry::Vacant(vac) => {
372                            vac.insert(EntryState::Single(current_path, modified));
373                        }
374                    }
375                }
376                tx.send(HashProgress::TotalFiles(total_hashed))?;
377                Ok(())
378            })
379        })?;
380
381        // The scope waits for all spawned tasks to complete.
382        // Channel `tx` gets naturally closed when it drops at the end of this function.
383        self.save_cache()
384    }
385
386    fn spawn_hash_task<'scope>(
387        &'scope self,
388        scope: &rayon::Scope<'scope>,
389        path: &Path,
390        size: u64,
391        modified: std::time::SystemTime,
392        tx: &mpsc::Sender<HashProgress>,
393    ) {
394        let relative = crate::strip_prefix(path, self.cache.base_dir())
395            .expect("path should be in cache base_dir");
396        if let Some(hash) = self.cache.get_path_time(relative, modified) {
397            self.num_hash_looked_up.fetch_add(1, Ordering::Relaxed);
398            let _ = tx.send(HashProgress::Result(path.to_path_buf(), size, hash, true));
399            return;
400        }
401
402        let path_owned = path.to_path_buf();
403        let relative_owned = relative.to_path_buf();
404        let tx_owned = tx.clone();
405        let cache_owned = self.cache.clone();
406        scope.spawn(move |_| {
407            if let Ok(hash) = self.compute_hash(&path_owned) {
408                self.num_hashed.fetch_add(1, Ordering::Relaxed);
409                cache_owned.insert(&relative_owned, modified, hash);
410                let _ = tx_owned.send(HashProgress::Result(path_owned, size, hash, false));
411            } else {
412                log::warn!("Failed to hash file: {:?}", path_owned);
413            }
414        });
415    }
416
417    /// Gets the hash of a file, using the cache if available.
418    pub fn get_hash(&self, path: &Path) -> io::Result<blake3::Hash> {
419        let meta = fs::metadata(path)?;
420        let modified = meta.modified()?;
421        let relative = crate::strip_prefix(path, self.cache.base_dir())
422            .map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?;
423        if let Some(hash) = self.cache.get_path_time(relative, modified) {
424            self.num_hash_looked_up.fetch_add(1, Ordering::Relaxed);
425            return Ok(hash);
426        }
427
428        let hash = self.compute_hash(path)?;
429        self.num_hashed.fetch_add(1, Ordering::Relaxed);
430        self.cache.insert(relative, modified, hash);
431        Ok(hash)
432    }
433
434    fn compute_hash(&self, path: &Path) -> io::Result<blake3::Hash> {
435        let start_time = std::time::Instant::now();
436        let mut f = fs::File::open(path)?;
437        let len = f.metadata()?.len();
438        let progress = self
439            .progress
440            .as_ref()
441            .map(|progress| progress.add_file(path, len))
442            .unwrap_or_else(Progress::none);
443        let mut hasher = blake3::Hasher::new();
444        if self.buffer_size == 0 {
445            if len > 0 {
446                let mmap = unsafe { memmap2::MmapOptions::new().map(&f)? };
447                hasher.update(&mmap[..]);
448                progress.inc(len);
449            }
450        } else {
451            let mut buf = vec![0u8; self.buffer_size];
452            loop {
453                let n = f.read(&mut buf)?;
454                if n == 0 {
455                    break;
456                }
457                hasher.update(&buf[..n]);
458                progress.inc(n as u64);
459            }
460        }
461        progress.finish();
462        log::debug!(
463            "Computed hash in {}: {:?}",
464            FormattedDuration(start_time.elapsed()),
465            path
466        );
467        Ok(hasher.finalize())
468    }
469}
470
471#[cfg(test)]
472mod tests {
473    use super::*;
474
475    fn default_exclude() -> globset::GlobSet {
476        let mut builder = globset::GlobSetBuilder::new();
477        builder.add(
478            globset::GlobBuilder::new(".hash_cache")
479                .case_insensitive(true)
480                .build()
481                .unwrap(),
482        );
483        builder.build().unwrap()
484    }
485
486    #[test]
487    fn find_duplicates() -> anyhow::Result<()> {
488        let dir = tempfile::tempdir()?;
489
490        let file1_path = dir.path().join("same1.txt");
491        fs::write(&file1_path, "same content")?;
492
493        let file2_path = dir.path().join("same2.txt");
494        fs::write(&file2_path, "same content")?;
495
496        let diff_path = dir.path().join("diff.txt");
497        fs::write(&diff_path, "different content")?;
498
499        let mut hasher = FileHasher::new(dir.path().to_path_buf());
500        hasher.buffer_size = 8192;
501        let duplicates = hasher.find_duplicates()?;
502
503        assert_eq!(hasher.num_hashed.load(Ordering::Relaxed), 2);
504        assert_eq!(hasher.num_hash_looked_up.load(Ordering::Relaxed), 0);
505
506        assert_eq!(duplicates.len(), 1);
507        let group = &duplicates[0];
508        assert_eq!(group.paths.len(), 2);
509        assert_eq!(group.size, 12); // "same content" is 12 bytes
510
511        assert!(group.paths.contains(&file1_path));
512        assert!(group.paths.contains(&file2_path));
513
514        Ok(())
515    }
516
517    #[test]
518    fn find_duplicates_merge_cache() -> anyhow::Result<()> {
519        let dir = tempfile::tempdir()?;
520        let dir_path = dir.path();
521
522        let sub_dir = dir_path.join("a").join("a");
523        fs::create_dir_all(&sub_dir)?;
524
525        let file1_path = sub_dir.join("1");
526        fs::write(&file1_path, "same content")?;
527
528        let file2_path = sub_dir.join("2");
529        fs::write(&file2_path, "same content")?;
530
531        // Create empty cache file in a/a to force it to be the cache base
532        let cache_aa_path = sub_dir.join(FileHashCache::FILE_NAME);
533        fs::File::create(&cache_aa_path)?;
534
535        // Run find_duplicates on a/a
536        let hasher_aa = FileHasher::new(sub_dir.clone());
537        let duplicates_aa = hasher_aa.find_duplicates()?;
538        assert_eq!(duplicates_aa.len(), 1);
539        assert!(cache_aa_path.exists());
540        assert_eq!(hasher_aa.num_hashed.load(Ordering::Relaxed), 2);
541        assert_eq!(hasher_aa.num_hash_looked_up.load(Ordering::Relaxed), 0);
542
543        // Create empty cache file in a to force it to be the cache base
544        let root_a = dir_path.join("a");
545        let cache_a_path = root_a.join(FileHashCache::FILE_NAME);
546        fs::File::create(&cache_a_path)?;
547
548        // Run find_duplicates on a
549        let hasher_a = FileHasher::new(root_a.clone());
550        let duplicates_a = hasher_a.find_duplicates()?;
551        assert_eq!(duplicates_a.len(), 1);
552        assert_eq!(hasher_a.num_hashed.load(Ordering::Relaxed), 0);
553        assert_eq!(hasher_a.num_hash_looked_up.load(Ordering::Relaxed), 2);
554
555        // The merged child cache should be removed.
556        assert!(cache_a_path.exists());
557        assert!(!cache_aa_path.exists());
558
559        Ok(())
560    }
561
562    #[test]
563    fn find_duplicates_with_exclude() -> anyhow::Result<()> {
564        let dir = tempfile::tempdir()?;
565
566        let file1_path = dir.path().join("same1.txt");
567        fs::write(&file1_path, "same content")?;
568
569        let file2_path = dir.path().join("same2.txt");
570        fs::write(&file2_path, "same content")?;
571
572        let exclude_path = dir.path().join("exclude.txt");
573        fs::write(&exclude_path, "same content")?;
574
575        let mut hasher = FileHasher::new(dir.path().to_path_buf());
576        hasher.buffer_size = 8192;
577        let mut builder = globset::GlobSetBuilder::new();
578        builder.add(
579            globset::GlobBuilder::new("exclude.txt")
580                .case_insensitive(true)
581                .build()?,
582        );
583        let filter = builder.build()?;
584        hasher.exclude = Some(filter);
585
586        let duplicates = hasher.find_duplicates()?;
587        assert_eq!(duplicates.len(), 1);
588        let group = &duplicates[0];
589        assert_eq!(group.paths.len(), 2);
590        assert!(group.paths.contains(&file1_path));
591        assert!(group.paths.contains(&file2_path));
592        assert!(!group.paths.contains(&exclude_path));
593        Ok(())
594    }
595
596    #[test]
597    fn test_check_mode_empty_cache() -> anyhow::Result<()> {
598        let dir = tempfile::tempdir()?;
599        let dir_path = dir.path().to_path_buf();
600        println!("{:?}", dir_path);
601        let file1_path = dir.path().join("file1.txt");
602        fs::write(&file1_path, "content 1")?;
603        let file2_path = dir.path().join("file2.txt");
604        fs::write(&file2_path, "content 2")?;
605
606        let mut hasher = FileHasher::new(dir_path.clone());
607        hasher.exclude = Some(default_exclude());
608        let (tx, rx) = mpsc::channel();
609        hasher.check_internal(tx, false)?;
610        let mut results = Vec::new();
611        let mut start_seen = false;
612        let mut total_files = None;
613        let mut file_done_count = 0;
614        while let Ok(event) = rx.recv() {
615            match event {
616                CheckEvent::StartChecking => start_seen = true,
617                CheckEvent::TotalFiles(total) => total_files = Some(total),
618                CheckEvent::Result(path, status) => results.push((path, status)),
619                CheckEvent::FileDone => file_done_count += 1,
620            }
621        }
622        assert!(start_seen);
623        assert_eq!(total_files, Some(2));
624        assert_eq!(file_done_count, 0);
625
626        results.sort_by(|a, b| a.0.cmp(&b.0));
627        assert_eq!(results.len(), 2);
628        assert_eq!(results[0], (PathBuf::from("file1.txt"), CheckStatus::New));
629        assert_eq!(results[1], (PathBuf::from("file2.txt"), CheckStatus::New));
630
631        assert!(!dir.path().join(FileHashCache::FILE_NAME).exists());
632        Ok(())
633    }
634
635    #[test]
636    fn test_check_mode_with_cache() -> anyhow::Result<()> {
637        let dir = tempfile::tempdir()?;
638        let dir_path = dir.path().to_path_buf();
639        let file1_path = dir.path().join("file1.txt");
640        fs::write(&file1_path, "content 1")?;
641        let file2_path = dir.path().join("file2.txt");
642        fs::write(&file2_path, "content 2")?;
643
644        let mut hasher = FileHasher::new(dir_path.clone());
645        hasher.exclude = Some(default_exclude());
646        let _hash1 = hasher.get_hash(&file1_path)?;
647        let _hash2 = hasher.get_hash(&file2_path)?;
648        hasher.save_cache()?;
649        assert!(dir.path().join(FileHashCache::FILE_NAME).exists());
650
651        let mut hasher = FileHasher::new(dir_path.clone());
652        hasher.exclude = Some(default_exclude());
653        let (tx, rx) = mpsc::channel();
654        hasher.check_internal(tx, false)?;
655        let mut results = Vec::new();
656        let mut file_done_count = 0;
657        while let Ok(event) = rx.recv() {
658            match event {
659                CheckEvent::Result(path, status) => results.push((path, status)),
660                CheckEvent::FileDone => file_done_count += 1,
661                _ => {}
662            }
663        }
664        assert_eq!(results.len(), 0);
665        assert_eq!(file_done_count, 2);
666
667        fs::write(&file1_path, "content 1 modified")?;
668
669        let file2_meta_before = fs::metadata(&file2_path)?;
670        let mtime_before = file2_meta_before.modified()?;
671        std::thread::sleep(std::time::Duration::from_millis(10));
672        fs::write(&file2_path, "content 2")?;
673        let file2_meta_after = fs::metadata(&file2_path)?;
674        let mtime_after = file2_meta_after.modified()?;
675        assert!(mtime_after > mtime_before);
676
677        let mut hasher = FileHasher::new(dir_path.clone());
678        hasher.exclude = Some(default_exclude());
679        let (tx, rx) = mpsc::channel();
680        hasher.check_internal(tx, false)?;
681        let mut results = Vec::new();
682        let mut file_done_count = 0;
683        while let Ok(event) = rx.recv() {
684            match event {
685                CheckEvent::Result(path, status) => results.push((path, status)),
686                CheckEvent::FileDone => file_done_count += 1,
687                _ => {}
688            }
689        }
690        assert_eq!(results.len(), 1);
691        assert_eq!(
692            results[0],
693            (PathBuf::from("file1.txt"), CheckStatus::Modified)
694        );
695        assert_eq!(file_done_count, 1);
696        Ok(())
697    }
698
699    #[test]
700    fn test_update_mode() -> anyhow::Result<()> {
701        let dir = tempfile::tempdir()?;
702        let dir_path = dir.path().to_path_buf();
703        let file1_path = dir.path().join("file1.txt");
704        fs::write(&file1_path, "content 1")?;
705
706        let mut hasher = FileHasher::new(dir_path.clone());
707        hasher.exclude = Some(default_exclude());
708        let (tx, rx) = mpsc::channel();
709        hasher.check_internal(tx, true)?;
710        while rx.recv().is_ok() {}
711        hasher.save_cache()?;
712        assert!(dir.path().join(FileHashCache::FILE_NAME).exists());
713
714        let cache = FileHashCache::new(&dir_path);
715        let mtime1 = fs::metadata(&file1_path)?.modified()?;
716        let hash1 = cache.get_path_time(&PathBuf::from("file1.txt"), mtime1);
717        assert!(hash1.is_some());
718
719        std::thread::sleep(std::time::Duration::from_millis(10));
720        fs::write(&file1_path, "content 1 modified")?;
721        let mtime1_mod = fs::metadata(&file1_path)?.modified()?;
722
723        let mut hasher = FileHasher::new(dir_path.clone());
724        hasher.exclude = Some(default_exclude());
725        let (tx, rx) = mpsc::channel();
726        hasher.check_internal(tx, true)?;
727        while rx.recv().is_ok() {}
728        hasher.save_cache()?;
729
730        let cache = FileHashCache::new(&dir_path);
731        let hash_mod = cache.get_path_time(&PathBuf::from("file1.txt"), mtime1_mod);
732        assert!(hash_mod.is_some());
733        assert_ne!(hash1, hash_mod);
734
735        std::thread::sleep(std::time::Duration::from_millis(10));
736        fs::write(&file1_path, "content 1 modified")?;
737        let mtime1_mod2 = fs::metadata(&file1_path)?.modified()?;
738        assert!(mtime1_mod2 > mtime1_mod);
739
740        assert!(
741            cache
742                .get_path_time(&PathBuf::from("file1.txt"), mtime1_mod2)
743                .is_none()
744        );
745
746        let mut hasher = FileHasher::new(dir_path.clone());
747        hasher.exclude = Some(default_exclude());
748        let (tx, rx) = mpsc::channel();
749        hasher.check_internal(tx, true)?;
750        while rx.recv().is_ok() {}
751        hasher.save_cache()?;
752
753        let cache = FileHashCache::new(&dir_path);
754        assert!(
755            cache
756                .get_path_time(&PathBuf::from("file1.txt"), mtime1_mod2)
757                .is_some()
758        );
759        Ok(())
760    }
761}