Skip to main content

compare_dir/
file_hasher.rs

1use crate::{
2    ColumnFormatter, DirectoryComparer, FileComparer, FileHashCache, FileIterator, Progress,
3    ProgressBuilder,
4};
5use globset::GlobSet;
6use indicatif::FormattedDuration;
7use std::collections::HashMap;
8use std::fs;
9use std::io::{self, Read};
10use std::path::{Path, PathBuf};
11use std::sync::atomic::{AtomicUsize, Ordering};
12use std::sync::{Arc, mpsc};
13
14#[derive(Debug, Clone)]
15enum HashProgress {
16    StartDiscovering,
17    TotalFiles(usize),
18    Result(PathBuf, u64, blake3::Hash, bool),
19}
20
21#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
22enum CheckStatus {
23    Unchanged,
24    New,
25    Modified,
26}
27
28#[derive(Debug, PartialEq)]
29enum CheckEvent {
30    StartChecking,
31    TotalFiles(usize),
32    Result(PathBuf, CheckStatus),
33    FileDone,
34}
35
36enum EntryState {
37    Single(PathBuf, std::time::SystemTime),
38    Hashing,
39}
40
41/// A group of duplicated files and their size.
42#[derive(Debug, Clone)]
43pub struct DuplicatedFiles {
44    pub paths: Vec<PathBuf>,
45    pub size: u64,
46}
47
48/// A tool for finding duplicated files in a directory.
49pub struct FileHasher {
50    dir: PathBuf,
51    pub buffer_size: usize,
52    cache: Arc<FileHashCache>,
53    pub(crate) num_hashed: AtomicUsize,
54    pub(crate) num_hash_looked_up: AtomicUsize,
55    pub exclude: Option<GlobSet>,
56    pub progress: Option<Arc<ProgressBuilder>>,
57    pub jobs: usize,
58}
59
60impl FileHasher {
61    const DEFAULT_JOBS: usize = DirectoryComparer::DEFAULT_JOBS;
62
63    /// Creates a new `FileHasher` for the given directory.
64    pub fn new(dir: PathBuf) -> Self {
65        let cache = FileHashCache::find_or_new(&dir);
66        Self {
67            dir,
68            buffer_size: FileComparer::DEFAULT_BUFFER_SIZE,
69            cache,
70            num_hashed: AtomicUsize::new(0),
71            num_hash_looked_up: AtomicUsize::new(0),
72            exclude: None,
73            progress: None,
74            jobs: Self::DEFAULT_JOBS,
75        }
76    }
77
78    /// Remove a cache entry if it exists.
79    pub fn remove_cache_entry(&self, path: &Path) -> anyhow::Result<()> {
80        let relative = crate::strip_prefix(path, self.cache.base_dir())?;
81        self.cache.remove(relative);
82        Ok(())
83    }
84
85    /// Save the hash cache if it is dirty.
86    pub fn save_cache(&self) -> anyhow::Result<()> {
87        log::info!(
88            "Hash stats for {:?}: {} computed, {} looked up",
89            self.dir,
90            self.num_hashed.load(Ordering::Relaxed),
91            self.num_hash_looked_up.load(Ordering::Relaxed)
92        );
93        Ok(self.cache.save()?)
94    }
95
96    /// Merges another cache into this hasher's cache.
97    pub(crate) fn merge_cache(&self, other_cache: &FileHashCache) {
98        self.cache.merge(other_cache);
99    }
100
101    /// Clears the loaded hashes in the cache.
102    pub fn clear_cache(&self) -> anyhow::Result<()> {
103        let relative = crate::strip_prefix(&self.dir, self.cache.base_dir())?;
104        self.cache.clear(relative);
105        Ok(())
106    }
107
108    /// Executes the check/update process.
109    pub fn check(&self, update: bool) -> anyhow::Result<()> {
110        let start_time = std::time::Instant::now();
111        let progress = self
112            .progress
113            .as_ref()
114            .map(|progress| progress.add_spinner())
115            .unwrap_or_else(Progress::none);
116        progress.set_message("Scanning directory...");
117        let mut num_new = 0;
118        let mut num_modified = 0;
119        std::thread::scope(|scope| {
120            let (tx, rx) = mpsc::channel();
121            scope.spawn(|| {
122                if let Err(e) = self.check_streaming(tx, update) {
123                    log::error!("Error during check: {}", e);
124                }
125            });
126            while let Ok(event) = rx.recv() {
127                match event {
128                    CheckEvent::StartChecking => {
129                        progress.set_message("Checking files...");
130                    }
131                    CheckEvent::TotalFiles(total) => {
132                        progress.set_length(total as u64);
133                        progress.set_message("");
134                    }
135                    CheckEvent::Result(path, status) => {
136                        let symbol = match status {
137                            CheckStatus::New => {
138                                num_new += 1;
139                                '+'
140                            }
141                            CheckStatus::Modified => {
142                                num_modified += 1;
143                                '!'
144                            }
145                            CheckStatus::Unchanged => unreachable!(),
146                        };
147                        progress.inc(1);
148                        progress.suspend(|| {
149                            println!("{} {}", symbol, path.display());
150                        });
151                    }
152                    CheckEvent::FileDone => {
153                        progress.inc(1);
154                    }
155                }
156            }
157        });
158        progress.finish();
159        if update {
160            self.save_cache()?;
161        }
162        let summary = [
163            ("Elapsed:", 0),
164            ("Hash computed:", self.num_hashed.load(Ordering::Relaxed)),
165            ("New files:", num_new),
166            ("Modified files:", num_modified),
167        ];
168        let formatter = ColumnFormatter::new(summary.iter().map(|(s, _)| *s));
169        let mut writer = std::io::stderr();
170        formatter.write_value(
171            &mut writer,
172            summary[0].0,
173            FormattedDuration(start_time.elapsed()),
174        )?;
175        formatter.write_values(&mut writer, &summary[1..])?;
176        Ok(())
177    }
178
179    fn check_streaming(&self, tx: mpsc::Sender<CheckEvent>, update: bool) -> anyhow::Result<()> {
180        std::thread::scope(|global_scope| {
181            let mut it = FileIterator::new(self.dir.clone());
182            it.hasher = Some(self);
183            it.exclude = self.exclude.as_ref();
184            let it_rx = it.spawn_in_scope(global_scope);
185            tx.send(CheckEvent::StartChecking)?;
186            let pool = crate::build_thread_pool(self.jobs)?;
187            pool.scope(move |scope| -> anyhow::Result<()> {
188                let mut total_files = 0;
189                for (rel_path, abs_path) in it_rx {
190                    total_files += 1;
191                    let tx = tx.clone();
192                    scope.spawn(move |_| {
193                        let status = self.check_file(&abs_path, update);
194                        let event = match status {
195                            Ok(CheckStatus::New) | Ok(CheckStatus::Modified) => {
196                                CheckEvent::Result(rel_path, status.unwrap())
197                            }
198                            Ok(CheckStatus::Unchanged) => CheckEvent::FileDone,
199                            Err(e) => {
200                                log::warn!("Failed to check file {:?}: {}", rel_path, e);
201                                CheckEvent::FileDone
202                            }
203                        };
204                        if tx.send(event).is_err() {
205                            log::error!("Send failed");
206                        }
207                    });
208                }
209                tx.send(CheckEvent::TotalFiles(total_files))?;
210                Ok(())
211            })
212        })?;
213        Ok(())
214    }
215
216    fn check_file(&self, abs_path: &Path, update: bool) -> anyhow::Result<CheckStatus> {
217        assert!(abs_path.is_absolute());
218        let computed_hash = self.compute_hash(abs_path)?;
219        let rel_path = crate::strip_prefix(abs_path, self.cache.base_dir())?;
220        let cached_hash = self.cache.get_by_path(rel_path);
221        let status = match cached_hash {
222            None => CheckStatus::New,
223            Some(cached) => {
224                if computed_hash != cached {
225                    CheckStatus::Modified
226                } else {
227                    CheckStatus::Unchanged
228                }
229            }
230        };
231        if update {
232            let modified = fs::metadata(abs_path)?.modified()?;
233            match status {
234                CheckStatus::New | CheckStatus::Modified => {
235                    self.cache.insert(rel_path, modified, computed_hash);
236                }
237                CheckStatus::Unchanged => {
238                    if self.cache.get(rel_path, modified).is_none() {
239                        self.cache.insert(rel_path, modified, computed_hash);
240                    }
241                }
242            }
243        }
244        Ok(status)
245    }
246
247    /// Executes the duplicate file finding process and prints results.
248    pub fn run(&self) -> anyhow::Result<()> {
249        let start_time = std::time::Instant::now();
250        let mut duplicates = self.find_duplicates()?;
251        if duplicates.is_empty() {
252            println!("No duplicates found.");
253        } else {
254            duplicates.sort_by_key(|a| a.size);
255            let mut total_wasted_space = 0;
256            for dupes in &duplicates {
257                let paths = &dupes.paths;
258                let file_size = dupes.size;
259                println!(
260                    "Identical {} files of {}:",
261                    paths.len(),
262                    crate::human_readable_size(file_size)
263                );
264                for path in paths {
265                    println!("  {}", path.display());
266                }
267                total_wasted_space += file_size * (paths.len() as u64 - 1);
268            }
269            eprintln!(
270                "Total wasted space: {}",
271                crate::human_readable_size(total_wasted_space)
272            );
273        }
274        eprintln!("Finished in {}.", FormattedDuration(start_time.elapsed()));
275        Ok(())
276    }
277
278    /// Finds duplicated files and returns a list of duplicate groups.
279    pub fn find_duplicates(&self) -> anyhow::Result<Vec<DuplicatedFiles>> {
280        let progress = self
281            .progress
282            .as_ref()
283            .map(|progress| progress.add_spinner())
284            .unwrap_or_else(Progress::none);
285        progress.set_message("Scanning directories...");
286
287        let (tx, rx) = mpsc::channel();
288        let mut by_hash: HashMap<blake3::Hash, DuplicatedFiles> = HashMap::new();
289        let mut num_cache_hits = 0;
290        std::thread::scope(|scope| {
291            scope.spawn(|| {
292                if let Err(e) = self.find_duplicates_streaming(tx) {
293                    log::error!("Error during duplicate finding: {}", e);
294                }
295            });
296
297            while let Ok(event) = rx.recv() {
298                match event {
299                    HashProgress::StartDiscovering => {
300                        progress.set_message("Hashing files...");
301                    }
302                    HashProgress::TotalFiles(total) => {
303                        progress.set_length(total as u64);
304                        if num_cache_hits > 0 {
305                            progress.set_message(format!(" ({} cache hits)", num_cache_hits));
306                        }
307                    }
308                    HashProgress::Result(path, size, hash, is_cache_hit) => {
309                        if is_cache_hit {
310                            num_cache_hits += 1;
311                            if progress.length().is_none() {
312                                progress.set_message(format!(
313                                    "Hashing files... ({} cache hits)",
314                                    num_cache_hits
315                                ));
316                            } else {
317                                progress.set_message(format!(" ({} cache hits)", num_cache_hits));
318                            }
319                        }
320
321                        progress.inc(1);
322                        let entry = by_hash.entry(hash).or_insert_with(|| DuplicatedFiles {
323                            paths: Vec::new(),
324                            size,
325                        });
326                        // Hash collisions shouldn't happen, but if they do, sizes shouldn't mismatch.
327                        assert_eq!(entry.size, size, "Hash collision: sizes do not match");
328                        entry.paths.push(path);
329                    }
330                }
331            }
332        });
333        progress.finish();
334
335        let mut duplicates = Vec::new();
336        for (_, mut dupes) in by_hash {
337            if dupes.paths.len() > 1 {
338                dupes.paths.sort();
339                duplicates.push(dupes);
340            }
341        }
342        Ok(duplicates)
343    }
344
345    fn find_duplicates_streaming(&self, tx: mpsc::Sender<HashProgress>) -> anyhow::Result<()> {
346        tx.send(HashProgress::StartDiscovering)?;
347        let mut by_size: HashMap<u64, EntryState> = HashMap::new();
348        let mut total_hashed = 0;
349        std::thread::scope(|global_scope| {
350            let mut it = FileIterator::new(self.dir.clone());
351            it.hasher = Some(self);
352            it.exclude = self.exclude.as_ref();
353            let it_rx = it.spawn_in_scope(global_scope);
354            let pool = crate::build_thread_pool(self.jobs)?;
355            pool.scope(move |scope| -> anyhow::Result<()> {
356                for (_, current_path) in it_rx {
357                    let meta = fs::metadata(&current_path)?;
358                    let size = meta.len();
359                    let modified = meta.modified()?;
360
361                    // Small optimization: If file size is 0, it's not really worth treating
362                    // as wasted space duplicates in the same way, but keeping it unified for now.
363                    match by_size.entry(size) {
364                        std::collections::hash_map::Entry::Occupied(mut occ) => match occ.get_mut()
365                        {
366                            EntryState::Single(first_path, first_modified) => {
367                                // We found a second file of identical size.
368                                // Time to start hashing both the *original* matching file and the *new* one!
369                                self.spawn_hash_task(scope, first_path, size, *first_modified, &tx);
370                                self.spawn_hash_task(scope, &current_path, size, modified, &tx);
371
372                                // Modify the state to indicate we are now fully hashing this size bucket.
373                                *occ.get_mut() = EntryState::Hashing;
374                                total_hashed += 2;
375                            }
376                            EntryState::Hashing => {
377                                // File size bucket already hashing; just dynamically spawn the new file immediately.
378                                self.spawn_hash_task(scope, &current_path, size, modified, &tx);
379                                total_hashed += 1;
380                            }
381                        },
382                        std::collections::hash_map::Entry::Vacant(vac) => {
383                            vac.insert(EntryState::Single(current_path, modified));
384                        }
385                    }
386                }
387                tx.send(HashProgress::TotalFiles(total_hashed))?;
388                Ok(())
389            })
390        })?;
391
392        // The scope waits for all spawned tasks to complete.
393        // Channel `tx` gets naturally closed when it drops at the end of this function.
394        self.save_cache()
395    }
396
397    fn spawn_hash_task<'scope>(
398        &'scope self,
399        scope: &rayon::Scope<'scope>,
400        path: &Path,
401        size: u64,
402        modified: std::time::SystemTime,
403        tx: &mpsc::Sender<HashProgress>,
404    ) {
405        let relative = crate::strip_prefix(path, self.cache.base_dir())
406            .expect("path should be in cache base_dir");
407        if let Some(hash) = self.cache.get(relative, modified) {
408            self.num_hash_looked_up.fetch_add(1, Ordering::Relaxed);
409            let _ = tx.send(HashProgress::Result(path.to_path_buf(), size, hash, true));
410            return;
411        }
412
413        let path_owned = path.to_path_buf();
414        let relative_owned = relative.to_path_buf();
415        let tx_owned = tx.clone();
416        scope.spawn(move |_| {
417            if let Ok(hash) = self.compute_hash(&path_owned) {
418                self.cache.insert(&relative_owned, modified, hash);
419                let _ = tx_owned.send(HashProgress::Result(path_owned, size, hash, false));
420            } else {
421                log::warn!("Failed to hash file: {:?}", path_owned);
422            }
423        });
424    }
425
426    /// Gets the hash of a file, using the cache if available.
427    pub fn get_hash(&self, path: &Path) -> io::Result<blake3::Hash> {
428        let meta = fs::metadata(path)?;
429        let modified = meta.modified()?;
430        let relative = crate::strip_prefix(path, self.cache.base_dir())
431            .map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?;
432        if let Some(hash) = self.cache.get(relative, modified) {
433            self.num_hash_looked_up.fetch_add(1, Ordering::Relaxed);
434            return Ok(hash);
435        }
436
437        let hash = self.compute_hash(path)?;
438        self.cache.insert(relative, modified, hash);
439        Ok(hash)
440    }
441
442    fn compute_hash(&self, path: &Path) -> io::Result<blake3::Hash> {
443        let start_time = std::time::Instant::now();
444        let mut f = fs::File::open(path)?;
445        let len = f.metadata()?.len();
446        let progress = self
447            .progress
448            .as_ref()
449            .map(|progress| progress.add_file(path, len))
450            .unwrap_or_else(Progress::none);
451        let mut hasher = blake3::Hasher::new();
452        if self.buffer_size == 0 {
453            if len > 0 {
454                let mmap = unsafe { memmap2::MmapOptions::new().map(&f)? };
455                hasher.update(&mmap[..]);
456                progress.inc(len);
457            }
458        } else {
459            let mut buf = vec![0u8; self.buffer_size];
460            loop {
461                let n = f.read(&mut buf)?;
462                if n == 0 {
463                    break;
464                }
465                hasher.update(&buf[..n]);
466                progress.inc(n as u64);
467            }
468        }
469        progress.finish();
470        self.num_hashed.fetch_add(1, Ordering::Relaxed);
471        let hash = hasher.finalize();
472        log::debug!(
473            "Computed hash in {}: {:?}",
474            FormattedDuration(start_time.elapsed()),
475            path
476        );
477        Ok(hash)
478    }
479}
480
481#[cfg(test)]
482mod tests {
483    use super::*;
484
485    fn default_exclude() -> globset::GlobSet {
486        let mut builder = globset::GlobSetBuilder::new();
487        builder.add(
488            globset::GlobBuilder::new(".hash_cache")
489                .case_insensitive(true)
490                .build()
491                .unwrap(),
492        );
493        builder.build().unwrap()
494    }
495
496    #[test]
497    fn find_duplicates() -> anyhow::Result<()> {
498        let dir = tempfile::tempdir()?;
499
500        let file1_path = dir.path().join("same1.txt");
501        fs::write(&file1_path, "same content")?;
502
503        let file2_path = dir.path().join("same2.txt");
504        fs::write(&file2_path, "same content")?;
505
506        let diff_path = dir.path().join("diff.txt");
507        fs::write(&diff_path, "different content")?;
508
509        let mut hasher = FileHasher::new(dir.path().to_path_buf());
510        hasher.buffer_size = 8192;
511        let duplicates = hasher.find_duplicates()?;
512
513        assert_eq!(hasher.num_hashed.load(Ordering::Relaxed), 2);
514        assert_eq!(hasher.num_hash_looked_up.load(Ordering::Relaxed), 0);
515
516        assert_eq!(duplicates.len(), 1);
517        let group = &duplicates[0];
518        assert_eq!(group.paths.len(), 2);
519        assert_eq!(group.size, 12); // "same content" is 12 bytes
520
521        assert!(group.paths.contains(&file1_path));
522        assert!(group.paths.contains(&file2_path));
523
524        Ok(())
525    }
526
527    #[test]
528    fn find_duplicates_merge_cache() -> anyhow::Result<()> {
529        let dir = tempfile::tempdir()?;
530        let dir_path = dir.path();
531
532        let sub_dir = dir_path.join("a").join("a");
533        fs::create_dir_all(&sub_dir)?;
534
535        let file1_path = sub_dir.join("1");
536        fs::write(&file1_path, "same content")?;
537
538        let file2_path = sub_dir.join("2");
539        fs::write(&file2_path, "same content")?;
540
541        // Create empty cache file in a/a to force it to be the cache base
542        let cache_aa_path = sub_dir.join(FileHashCache::FILE_NAME);
543        fs::File::create(&cache_aa_path)?;
544
545        // Run find_duplicates on a/a
546        let hasher_aa = FileHasher::new(sub_dir.clone());
547        let duplicates_aa = hasher_aa.find_duplicates()?;
548        assert_eq!(duplicates_aa.len(), 1);
549        assert!(cache_aa_path.exists());
550        assert_eq!(hasher_aa.num_hashed.load(Ordering::Relaxed), 2);
551        assert_eq!(hasher_aa.num_hash_looked_up.load(Ordering::Relaxed), 0);
552
553        // Create empty cache file in a to force it to be the cache base
554        let root_a = dir_path.join("a");
555        let cache_a_path = root_a.join(FileHashCache::FILE_NAME);
556        fs::File::create(&cache_a_path)?;
557
558        // Run find_duplicates on a
559        let hasher_a = FileHasher::new(root_a.clone());
560        let duplicates_a = hasher_a.find_duplicates()?;
561        assert_eq!(duplicates_a.len(), 1);
562        assert_eq!(hasher_a.num_hashed.load(Ordering::Relaxed), 0);
563        assert_eq!(hasher_a.num_hash_looked_up.load(Ordering::Relaxed), 2);
564
565        // The merged child cache should be removed.
566        assert!(cache_a_path.exists());
567        assert!(!cache_aa_path.exists());
568
569        Ok(())
570    }
571
572    #[test]
573    fn find_duplicates_with_exclude() -> anyhow::Result<()> {
574        let dir = tempfile::tempdir()?;
575
576        let file1_path = dir.path().join("same1.txt");
577        fs::write(&file1_path, "same content")?;
578
579        let file2_path = dir.path().join("same2.txt");
580        fs::write(&file2_path, "same content")?;
581
582        let exclude_path = dir.path().join("exclude.txt");
583        fs::write(&exclude_path, "same content")?;
584
585        let mut hasher = FileHasher::new(dir.path().to_path_buf());
586        hasher.buffer_size = 8192;
587        let mut builder = globset::GlobSetBuilder::new();
588        builder.add(
589            globset::GlobBuilder::new("exclude.txt")
590                .case_insensitive(true)
591                .build()?,
592        );
593        let filter = builder.build()?;
594        hasher.exclude = Some(filter);
595
596        let duplicates = hasher.find_duplicates()?;
597        assert_eq!(duplicates.len(), 1);
598        let group = &duplicates[0];
599        assert_eq!(group.paths.len(), 2);
600        assert!(group.paths.contains(&file1_path));
601        assert!(group.paths.contains(&file2_path));
602        assert!(!group.paths.contains(&exclude_path));
603        Ok(())
604    }
605
606    #[test]
607    fn check_mode_empty_cache() -> anyhow::Result<()> {
608        let dir = tempfile::tempdir()?;
609        let dir_path = dir.path().to_path_buf();
610        println!("{:?}", dir_path);
611        let file1_path = dir.path().join("file1.txt");
612        fs::write(&file1_path, "content 1")?;
613        let file2_path = dir.path().join("file2.txt");
614        fs::write(&file2_path, "content 2")?;
615
616        let mut hasher = FileHasher::new(dir_path.clone());
617        hasher.exclude = Some(default_exclude());
618        let (tx, rx) = mpsc::channel();
619        hasher.check_streaming(tx, false)?;
620        let mut results = Vec::new();
621        let mut start_seen = false;
622        let mut total_files = None;
623        let mut file_done_count = 0;
624        while let Ok(event) = rx.recv() {
625            match event {
626                CheckEvent::StartChecking => start_seen = true,
627                CheckEvent::TotalFiles(total) => total_files = Some(total),
628                CheckEvent::Result(path, status) => results.push((path, status)),
629                CheckEvent::FileDone => file_done_count += 1,
630            }
631        }
632        assert!(start_seen);
633        assert_eq!(total_files, Some(2));
634        assert_eq!(file_done_count, 0);
635
636        results.sort_by(|a, b| a.0.cmp(&b.0));
637        assert_eq!(results.len(), 2);
638        assert_eq!(results[0], (PathBuf::from("file1.txt"), CheckStatus::New));
639        assert_eq!(results[1], (PathBuf::from("file2.txt"), CheckStatus::New));
640
641        assert!(!dir.path().join(FileHashCache::FILE_NAME).exists());
642        Ok(())
643    }
644
645    #[test]
646    fn check_mode_with_cache() -> anyhow::Result<()> {
647        let dir = tempfile::tempdir()?;
648        let dir_path = dir.path().to_path_buf();
649        let file1_path = dir.path().join("file1.txt");
650        fs::write(&file1_path, "content 1")?;
651        let file2_path = dir.path().join("file2.txt");
652        fs::write(&file2_path, "content 2")?;
653
654        let mut hasher = FileHasher::new(dir_path.clone());
655        hasher.exclude = Some(default_exclude());
656        let _hash1 = hasher.get_hash(&file1_path)?;
657        let _hash2 = hasher.get_hash(&file2_path)?;
658        hasher.save_cache()?;
659        assert!(dir.path().join(FileHashCache::FILE_NAME).exists());
660
661        let mut hasher = FileHasher::new(dir_path.clone());
662        hasher.exclude = Some(default_exclude());
663        let (tx, rx) = mpsc::channel();
664        hasher.check_streaming(tx, false)?;
665        let mut results = Vec::new();
666        let mut file_done_count = 0;
667        while let Ok(event) = rx.recv() {
668            match event {
669                CheckEvent::Result(path, status) => results.push((path, status)),
670                CheckEvent::FileDone => file_done_count += 1,
671                _ => {}
672            }
673        }
674        assert_eq!(results.len(), 0);
675        assert_eq!(file_done_count, 2);
676
677        fs::write(&file1_path, "content 1 modified")?;
678
679        let file2_meta_before = fs::metadata(&file2_path)?;
680        let mtime_before = file2_meta_before.modified()?;
681        std::thread::sleep(std::time::Duration::from_millis(10));
682        fs::write(&file2_path, "content 2")?;
683        let file2_meta_after = fs::metadata(&file2_path)?;
684        let mtime_after = file2_meta_after.modified()?;
685        assert!(mtime_after > mtime_before);
686
687        let mut hasher = FileHasher::new(dir_path.clone());
688        hasher.exclude = Some(default_exclude());
689        let (tx, rx) = mpsc::channel();
690        hasher.check_streaming(tx, false)?;
691        let mut results = Vec::new();
692        let mut file_done_count = 0;
693        while let Ok(event) = rx.recv() {
694            match event {
695                CheckEvent::Result(path, status) => results.push((path, status)),
696                CheckEvent::FileDone => file_done_count += 1,
697                _ => {}
698            }
699        }
700        assert_eq!(results.len(), 1);
701        assert_eq!(
702            results[0],
703            (PathBuf::from("file1.txt"), CheckStatus::Modified)
704        );
705        assert_eq!(file_done_count, 1);
706        Ok(())
707    }
708
709    #[test]
710    fn check_update_mode() -> anyhow::Result<()> {
711        let dir = tempfile::tempdir()?;
712        let dir_path = dir.path().to_path_buf();
713        let file1_path = dir.path().join("file1.txt");
714        fs::write(&file1_path, "content 1")?;
715
716        let mut hasher = FileHasher::new(dir_path.clone());
717        hasher.exclude = Some(default_exclude());
718        let (tx, rx) = mpsc::channel();
719        hasher.check_streaming(tx, true)?;
720        while rx.recv().is_ok() {}
721        hasher.save_cache()?;
722        assert!(dir.path().join(FileHashCache::FILE_NAME).exists());
723
724        let cache = FileHashCache::new(&dir_path);
725        let mtime1 = fs::metadata(&file1_path)?.modified()?;
726        let hash1 = cache.get(&PathBuf::from("file1.txt"), mtime1);
727        assert!(hash1.is_some());
728
729        std::thread::sleep(std::time::Duration::from_millis(10));
730        fs::write(&file1_path, "content 1 modified")?;
731        let mtime1_mod = fs::metadata(&file1_path)?.modified()?;
732
733        let mut hasher = FileHasher::new(dir_path.clone());
734        hasher.exclude = Some(default_exclude());
735        let (tx, rx) = mpsc::channel();
736        hasher.check_streaming(tx, true)?;
737        while rx.recv().is_ok() {}
738        hasher.save_cache()?;
739
740        let cache = FileHashCache::new(&dir_path);
741        let hash_mod = cache.get(&PathBuf::from("file1.txt"), mtime1_mod);
742        assert!(hash_mod.is_some());
743        assert_ne!(hash1, hash_mod);
744
745        std::thread::sleep(std::time::Duration::from_millis(10));
746        fs::write(&file1_path, "content 1 modified")?;
747        let mtime1_mod2 = fs::metadata(&file1_path)?.modified()?;
748        assert!(mtime1_mod2 > mtime1_mod);
749
750        assert!(
751            cache
752                .get(&PathBuf::from("file1.txt"), mtime1_mod2)
753                .is_none()
754        );
755
756        let mut hasher = FileHasher::new(dir_path.clone());
757        hasher.exclude = Some(default_exclude());
758        let (tx, rx) = mpsc::channel();
759        hasher.check_streaming(tx, true)?;
760        while rx.recv().is_ok() {}
761        hasher.save_cache()?;
762
763        let cache = FileHashCache::new(&dir_path);
764        assert!(
765            cache
766                .get(&PathBuf::from("file1.txt"), mtime1_mod2)
767                .is_some()
768        );
769        Ok(())
770    }
771}