1use crate::{
2 ColumnFormatter, DirectoryComparer, FileComparer, FileHashCache, FileIterator, Progress,
3 ProgressBuilder,
4};
5use globset::GlobSet;
6use indicatif::FormattedDuration;
7use std::{
8 collections::HashMap,
9 fs,
10 io::{self, Read, stdout},
11 path::{Path, PathBuf},
12 sync::{
13 Arc,
14 atomic::{AtomicUsize, Ordering},
15 mpsc,
16 },
17 time,
18};
19
20#[derive(Debug, Clone)]
21enum HashProgress {
22 StartDiscovering,
23 TotalFiles(usize),
24 Result(PathBuf, u64, blake3::Hash, bool),
25 Error,
26}
27
28#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
29enum CheckStatus {
30 Unchanged,
31 New,
32 Modified,
33}
34
35#[derive(Debug, PartialEq)]
36enum CheckEvent {
37 StartChecking,
38 TotalFiles(usize),
39 Result(PathBuf, CheckStatus),
40 FileDone,
41 Error,
42}
43
44enum EntryState {
45 Single(PathBuf, time::SystemTime),
46 Hashing,
47}
48
49pub struct FileHasher {
51 dirs: Vec<PathBuf>,
52 pub buffer_size: usize,
53 pub(crate) cache: Arc<FileHashCache>,
54 pub(crate) num_hashed: AtomicUsize,
55 pub(crate) num_hash_looked_up: AtomicUsize,
56 pub exclude: Option<GlobSet>,
57 pub progress: Option<Arc<ProgressBuilder>>,
58 pub is_yaml_format: bool,
59 pub jobs: usize,
60}
61
62impl FileHasher {
63 const DEFAULT_JOBS: usize = DirectoryComparer::DEFAULT_JOBS;
64
65 pub fn new<P: AsRef<Path>>(dirs: &[P]) -> anyhow::Result<Self> {
67 if dirs.is_empty() {
68 anyhow::bail!("At least one directory must be specified.");
69 }
70 let common_ancestor = crate::common_ancestor(dirs)
71 .ok_or_else(|| anyhow::anyhow!("No common ancestor found"))?;
72 Ok(Self {
73 dirs: dirs.iter().map(|p| p.as_ref().to_path_buf()).collect(),
74 buffer_size: FileComparer::DEFAULT_BUFFER_SIZE,
75 cache: FileHashCache::find_or_new(&common_ancestor),
76 num_hashed: AtomicUsize::new(0),
77 num_hash_looked_up: AtomicUsize::new(0),
78 exclude: None,
79 progress: None,
80 is_yaml_format: false,
81 jobs: Self::DEFAULT_JOBS,
82 })
83 }
84
85 pub fn remove_cache_entry(&self, path: &Path) -> anyhow::Result<()> {
87 let relative = crate::strip_prefix(path, self.cache.base_dir())?;
88 self.cache.remove(relative);
89 Ok(())
90 }
91
92 pub fn save_cache(&self) -> anyhow::Result<()> {
94 log::info!(
95 "Hash stats for {:?}: {} computed, {} looked up",
96 self.dirs,
97 self.num_hashed.load(Ordering::Relaxed),
98 self.num_hash_looked_up.load(Ordering::Relaxed)
99 );
100 Ok(self.cache.save()?)
101 }
102
103 pub(crate) fn merge_cache(&self, other_cache: &FileHashCache) {
105 self.cache.merge(other_cache);
106 }
107
108 pub fn clear_cache(&self) -> anyhow::Result<()> {
110 for dir in &self.dirs {
111 let relative = crate::strip_prefix(dir, self.cache.base_dir())?;
112 self.cache.clear(relative);
113 }
114 Ok(())
115 }
116
117 pub fn check(&self, update: bool) -> anyhow::Result<()> {
119 if self.dirs.len() > 1 {
120 anyhow::bail!("Check mode only supports one directory.");
121 }
122 let start_time = time::Instant::now();
123 let progress = self
124 .progress
125 .as_ref()
126 .map(|progress| progress.add_spinner())
127 .unwrap_or_else(Progress::none);
128 progress.set_message("Scanning directory...");
129 let mut num_new = 0;
130 let mut num_modified = 0;
131 let mut num_error = 0;
132 std::thread::scope(|scope| {
133 let (tx, rx) = mpsc::channel();
134 scope.spawn(|| {
135 if let Err(e) = self.check_streaming(tx, update) {
136 log::error!("Error during check: {}", e);
137 }
138 });
139 while let Ok(event) = rx.recv() {
140 match event {
141 CheckEvent::StartChecking => {
142 progress.set_message("Checking files...");
143 }
144 CheckEvent::TotalFiles(total) => {
145 progress.set_length(total as u64);
146 progress.set_message("");
147 }
148 CheckEvent::Result(path, status) => {
149 let symbol = match status {
150 CheckStatus::New => {
151 num_new += 1;
152 '+'
153 }
154 CheckStatus::Modified => {
155 num_modified += 1;
156 '!'
157 }
158 CheckStatus::Unchanged => unreachable!(),
159 };
160 progress.inc(1);
161 progress.suspend_for(stdout(), || {
162 println!("{} {}", symbol, path.display());
163 });
164 }
165 CheckEvent::FileDone => {
166 progress.inc(1);
167 }
168 CheckEvent::Error => {
169 progress.inc(1);
170 num_error += 1;
171 }
172 }
173 }
174 });
175 progress.finish();
176 if update {
177 self.save_cache()?;
178 }
179 self.print_check_summary(&start_time, num_new, num_modified, num_error)?;
180 Ok(())
181 }
182
183 fn print_check_summary(
184 &self,
185 start_time: &time::Instant,
186 num_new: usize,
187 num_modified: usize,
188 num_error: usize,
189 ) -> io::Result<()> {
190 let summary = [
191 ("Elapsed:", 0),
192 ("Hash computed:", self.num_hashed.load(Ordering::Relaxed)),
193 ("New files:", num_new),
194 ("Modified files:", num_modified),
195 ("Errors:", num_error),
196 ];
197 let formatter = ColumnFormatter::new(summary.iter().map(|(s, _)| *s));
198 let mut writer = std::io::stderr();
199 formatter.write_value(
200 &mut writer,
201 summary[0].0,
202 FormattedDuration(start_time.elapsed()),
203 )?;
204 formatter.write_values(&mut writer, &summary[1..])
205 }
206
207 fn check_streaming(&self, tx: mpsc::Sender<CheckEvent>, update: bool) -> anyhow::Result<()> {
208 std::thread::scope(|global_scope| {
209 let mut it = FileIterator::new(self.dirs[0].clone());
210 it.hasher = Some(self);
211 it.exclude = self.exclude.as_ref();
212 let it_rx = it.spawn_in_scope(global_scope);
213 tx.send(CheckEvent::StartChecking)?;
214 let pool = crate::build_thread_pool(self.jobs)?;
215 pool.scope(move |scope| -> anyhow::Result<()> {
216 let mut total_files = 0;
217 for (rel_path, abs_path) in it_rx {
218 total_files += 1;
219 let tx = tx.clone();
220 scope.spawn(move |_| {
221 let status = self.check_file(&abs_path, update);
222 let event = match status {
223 Ok(CheckStatus::New) | Ok(CheckStatus::Modified) => {
224 CheckEvent::Result(rel_path, status.unwrap())
225 }
226 Ok(CheckStatus::Unchanged) => CheckEvent::FileDone,
227 Err(e) => {
228 log::error!("Failed to check file {:?}: {}", rel_path, e);
229 CheckEvent::Error
230 }
231 };
232 if tx.send(event).is_err() {
233 log::error!("Send failed");
234 }
235 });
236 }
237 tx.send(CheckEvent::TotalFiles(total_files))?;
238 Ok(())
239 })
240 })?;
241 Ok(())
242 }
243
244 fn check_file(&self, abs_path: &Path, update: bool) -> anyhow::Result<CheckStatus> {
245 assert!(abs_path.is_absolute());
246 let computed_hash = self.compute_hash(abs_path)?;
247 let rel_path = crate::strip_prefix(abs_path, self.cache.base_dir())?;
248 let cached_hash = self.cache.get_by_path(rel_path);
249 let status = match cached_hash {
250 None => CheckStatus::New,
251 Some(cached) => {
252 if computed_hash != cached {
253 CheckStatus::Modified
254 } else {
255 CheckStatus::Unchanged
256 }
257 }
258 };
259 if update {
260 let modified = fs::metadata(abs_path)?.modified()?;
261 match status {
262 CheckStatus::New | CheckStatus::Modified => {
263 self.cache.insert(rel_path, modified, computed_hash);
264 }
265 CheckStatus::Unchanged => {
266 if self.cache.get(rel_path, modified).is_none() {
267 self.cache.insert(rel_path, modified, computed_hash);
268 }
269 }
270 }
271 }
272 Ok(status)
273 }
274
275 pub fn run(&self) -> anyhow::Result<()> {
277 let start_time = time::Instant::now();
278 let mut duplicates = self.find_duplicates()?;
279 let mut total_wasted_space = 0;
280 if !duplicates.is_empty() {
281 duplicates.sort_by_key(|a| a.size);
282 for dupes in &duplicates {
283 if self.is_yaml_format {
284 dupes.write_yaml(std::io::stdout())?;
285 } else {
286 dupes.write_human(std::io::stdout())?;
287 }
288 total_wasted_space += dupes.wasted_size();
289 }
290 }
291 self.print_duplicates_summary(&start_time, total_wasted_space)?;
292 Ok(())
293 }
294
295 fn print_duplicates_summary(
296 &self,
297 start_time: &time::Instant,
298 total_wasted_space: u64,
299 ) -> io::Result<()> {
300 let summary = [
301 (
302 "Elapsed:",
303 FormattedDuration(start_time.elapsed()).to_string(),
304 ),
305 (
306 "Total wasted space:",
307 crate::human_readable_size(total_wasted_space),
308 ),
309 ];
310 let formatter = ColumnFormatter::new(summary.iter().map(|(s, _)| *s));
311 formatter.write_values(&mut io::stderr(), &summary)
312 }
313
314 pub fn find_duplicates(&self) -> anyhow::Result<Vec<DuplicatedFiles>> {
316 let progress = self
317 .progress
318 .as_ref()
319 .map(|progress| progress.add_spinner())
320 .unwrap_or_else(Progress::none);
321 progress.set_message("Scanning directories...");
322
323 let (tx, rx) = mpsc::channel();
324 let mut by_hash: HashMap<blake3::Hash, DuplicatedFiles> = HashMap::new();
325 let mut num_cache_hits = 0;
326 std::thread::scope(|scope| {
327 scope.spawn(|| {
328 if let Err(e) = self.find_duplicates_streaming(tx) {
329 log::error!("Error during duplicate finding: {}", e);
330 }
331 });
332
333 while let Ok(event) = rx.recv() {
334 match event {
335 HashProgress::StartDiscovering => {
336 progress.set_message("Hashing files...");
337 }
338 HashProgress::TotalFiles(total) => {
339 progress.set_length(total as u64);
340 if num_cache_hits > 0 {
341 progress.set_message(format!(" ({} cache hits)", num_cache_hits));
342 }
343 }
344 HashProgress::Result(path, size, hash, is_cache_hit) => {
345 if is_cache_hit {
346 num_cache_hits += 1;
347 if progress.length().is_none() {
348 progress.set_message(format!(
349 "Hashing files... ({} cache hits)",
350 num_cache_hits
351 ));
352 } else {
353 progress.set_message(format!(" ({} cache hits)", num_cache_hits));
354 }
355 }
356
357 progress.inc(1);
358 let entry = by_hash.entry(hash).or_insert_with(|| DuplicatedFiles {
359 paths: Vec::new(),
360 size,
361 });
362 assert_eq!(entry.size, size, "Hash collision: sizes do not match");
364 entry.paths.push(path);
365 }
366 HashProgress::Error => {
367 progress.inc(1);
368 }
369 }
370 }
371 });
372 progress.finish();
373
374 let mut duplicates = Vec::new();
375 for (_, mut dupes) in by_hash {
376 if dupes.paths.len() > 1 {
377 dupes.paths.sort();
378 duplicates.push(dupes);
379 }
380 }
381 Ok(duplicates)
382 }
383
384 fn find_duplicates_streaming(&self, tx: mpsc::Sender<HashProgress>) -> anyhow::Result<()> {
385 tx.send(HashProgress::StartDiscovering)?;
386 let mut by_size: HashMap<u64, EntryState> = HashMap::new();
387 let mut total_hashed = 0;
388 std::thread::scope(|global_scope| {
389 let (it_tx, it_rx) = mpsc::channel();
390 for dir in &self.dirs {
391 let it_tx = it_tx.clone();
392 let mut it = FileIterator::new(dir.clone());
393 it.hasher = Some(self);
394 it.exclude = self.exclude.as_ref();
395 it.spawn_in_scope_with_sender(global_scope, it_tx);
396 }
397 drop(it_tx);
398
399 let pool = crate::build_thread_pool(self.jobs)?;
400 pool.scope(move |scope| -> anyhow::Result<()> {
401 for (_, current_path) in it_rx {
402 let meta = fs::metadata(¤t_path)?;
403 let size = meta.len();
404 let modified = meta.modified()?;
405
406 match by_size.entry(size) {
409 std::collections::hash_map::Entry::Occupied(mut occ) => match occ.get_mut()
410 {
411 EntryState::Single(first_path, first_modified) => {
412 self.spawn_hash_task(scope, first_path, size, *first_modified, &tx);
415 self.spawn_hash_task(scope, ¤t_path, size, modified, &tx);
416
417 *occ.get_mut() = EntryState::Hashing;
419 total_hashed += 2;
420 }
421 EntryState::Hashing => {
422 self.spawn_hash_task(scope, ¤t_path, size, modified, &tx);
424 total_hashed += 1;
425 }
426 },
427 std::collections::hash_map::Entry::Vacant(vac) => {
428 vac.insert(EntryState::Single(current_path, modified));
429 }
430 }
431 }
432 tx.send(HashProgress::TotalFiles(total_hashed))?;
433 Ok(())
434 })
435 })?;
436
437 self.save_cache()
440 }
441
442 fn spawn_hash_task<'scope>(
443 &'scope self,
444 scope: &rayon::Scope<'scope>,
445 path: &Path,
446 size: u64,
447 modified: time::SystemTime,
448 tx: &mpsc::Sender<HashProgress>,
449 ) {
450 let relative = crate::strip_prefix(path, self.cache.base_dir())
451 .expect("path should be in cache base_dir");
452 if let Some(hash) = self.cache.get(relative, modified) {
453 self.num_hash_looked_up.fetch_add(1, Ordering::Relaxed);
454 let _ = tx.send(HashProgress::Result(path.to_path_buf(), size, hash, true));
455 return;
456 }
457
458 let path_owned = path.to_path_buf();
459 let relative_owned = relative.to_path_buf();
460 let tx_owned = tx.clone();
461 scope.spawn(move |_| {
462 if let Ok(hash) = self.compute_hash(&path_owned) {
463 self.cache.insert(&relative_owned, modified, hash);
464 let _ = tx_owned.send(HashProgress::Result(path_owned, size, hash, false));
465 } else {
466 log::error!("Failed to hash file: {:?}", path_owned);
467 let _ = tx_owned.send(HashProgress::Error);
468 }
469 });
470 }
471
472 pub fn get_hash(&self, path: &Path) -> io::Result<blake3::Hash> {
474 let meta = fs::metadata(path)?;
475 let modified = meta.modified()?;
476 let relative = crate::strip_prefix(path, self.cache.base_dir())
477 .map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?;
478 if let Some(hash) = self.cache.get(relative, modified) {
479 self.num_hash_looked_up.fetch_add(1, Ordering::Relaxed);
480 return Ok(hash);
481 }
482
483 let hash = self.compute_hash(path)?;
484 self.cache.insert(relative, modified, hash);
485 Ok(hash)
486 }
487
488 fn compute_hash(&self, path: &Path) -> io::Result<blake3::Hash> {
489 let start_time = time::Instant::now();
490 let mut f = fs::File::open(path)?;
491 let len = f.metadata()?.len();
492 let progress = self
493 .progress
494 .as_ref()
495 .map(|progress| progress.add_file(path, len))
496 .unwrap_or_else(Progress::none);
497 let mut hasher = blake3::Hasher::new();
498 if self.buffer_size == 0 {
499 if len > 0 {
500 let mmap = unsafe { memmap2::MmapOptions::new().map(&f)? };
501 hasher.update(&mmap[..]);
502 progress.inc(len);
503 }
504 } else {
505 let mut buf = vec![0u8; self.buffer_size];
506 loop {
507 let n = f.read(&mut buf)?;
508 if n == 0 {
509 break;
510 }
511 hasher.update(&buf[..n]);
512 progress.inc(n as u64);
513 }
514 }
515 progress.finish();
516 self.num_hashed.fetch_add(1, Ordering::Relaxed);
517 let hash = hasher.finalize();
518 log::debug!(
519 "Computed hash in {}: {:?}",
520 FormattedDuration(start_time.elapsed()),
521 path
522 );
523 Ok(hash)
524 }
525}
526
527#[derive(Clone, Debug)]
529pub struct DuplicatedFiles {
530 pub paths: Vec<PathBuf>,
531 pub size: u64,
532}
533
534impl DuplicatedFiles {
535 fn write_human(&self, mut writer: impl io::Write) -> anyhow::Result<()> {
536 writeln!(
537 writer,
538 "Identical {} files of {}:",
539 self.paths.len(),
540 crate::human_readable_size(self.size)
541 )?;
542 for path in &self.paths {
543 writeln!(writer, " {}", path.display())?;
544 }
545 Ok(())
546 }
547
548 fn write_yaml(&self, mut writer: impl io::Write) -> anyhow::Result<()> {
549 writeln!(writer, "- paths:")?;
550 for path in &self.paths {
551 writeln!(writer, " - {:?}", path)?;
552 }
553 writeln!(writer, " size: {}", self.size)?;
554 Ok(())
555 }
556
557 fn wasted_size(&self) -> u64 {
558 self.size * (self.paths.len() as u64 - 1)
559 }
560}
561
562#[cfg(test)]
563mod tests {
564 use super::*;
565
566 fn default_exclude() -> globset::GlobSet {
567 let mut builder = globset::GlobSetBuilder::new();
568 builder.add(
569 globset::GlobBuilder::new(".hash_cache")
570 .case_insensitive(true)
571 .build()
572 .unwrap(),
573 );
574 builder.build().unwrap()
575 }
576
577 #[test]
578 fn find_duplicates() -> anyhow::Result<()> {
579 let dir = tempfile::tempdir()?;
580
581 let file1_path = dir.path().join("same1.txt");
582 fs::write(&file1_path, "same content")?;
583
584 let file2_path = dir.path().join("same2.txt");
585 fs::write(&file2_path, "same content")?;
586
587 let diff_path = dir.path().join("diff.txt");
588 fs::write(&diff_path, "different content")?;
589
590 let mut hasher = FileHasher::new(&[dir.path()])?;
591 hasher.buffer_size = 8192;
592 let duplicates = hasher.find_duplicates()?;
593
594 assert_eq!(hasher.num_hashed.load(Ordering::Relaxed), 2);
595 assert_eq!(hasher.num_hash_looked_up.load(Ordering::Relaxed), 0);
596
597 assert_eq!(duplicates.len(), 1);
598 let group = &duplicates[0];
599 assert_eq!(group.paths.len(), 2);
600 assert_eq!(group.size, 12); assert!(group.paths.contains(&file1_path));
603 assert!(group.paths.contains(&file2_path));
604
605 Ok(())
606 }
607
608 #[test]
609 fn find_duplicates_merge_cache() -> anyhow::Result<()> {
610 let dir = tempfile::tempdir()?;
611 let dir_path = dir.path();
612
613 let sub_dir = dir_path.join("a").join("a");
614 fs::create_dir_all(&sub_dir)?;
615
616 let file1_path = sub_dir.join("1");
617 fs::write(&file1_path, "same content")?;
618
619 let file2_path = sub_dir.join("2");
620 fs::write(&file2_path, "same content")?;
621
622 let cache_aa_path = sub_dir.join(FileHashCache::FILE_NAME);
624 fs::File::create(&cache_aa_path)?;
625
626 let hasher_aa = FileHasher::new(&[&sub_dir])?;
628 let duplicates_aa = hasher_aa.find_duplicates()?;
629 assert_eq!(duplicates_aa.len(), 1);
630 assert!(cache_aa_path.exists());
631 assert_eq!(hasher_aa.num_hashed.load(Ordering::Relaxed), 2);
632 assert_eq!(hasher_aa.num_hash_looked_up.load(Ordering::Relaxed), 0);
633
634 let root_a = dir_path.join("a");
636 let cache_a_path = root_a.join(FileHashCache::FILE_NAME);
637 fs::File::create(&cache_a_path)?;
638
639 let hasher_a = FileHasher::new(&[&root_a])?;
641 let duplicates_a = hasher_a.find_duplicates()?;
642 assert_eq!(duplicates_a.len(), 1);
643 assert_eq!(hasher_a.num_hashed.load(Ordering::Relaxed), 0);
644 assert_eq!(hasher_a.num_hash_looked_up.load(Ordering::Relaxed), 2);
645
646 assert!(cache_a_path.exists());
648 assert!(!cache_aa_path.exists());
649
650 Ok(())
651 }
652
653 #[test]
654 fn find_duplicates_with_exclude() -> anyhow::Result<()> {
655 let dir = tempfile::tempdir()?;
656
657 let file1_path = dir.path().join("same1.txt");
658 fs::write(&file1_path, "same content")?;
659
660 let file2_path = dir.path().join("same2.txt");
661 fs::write(&file2_path, "same content")?;
662
663 let exclude_path = dir.path().join("exclude.txt");
664 fs::write(&exclude_path, "same content")?;
665
666 let mut hasher = FileHasher::new(&[dir.path()])?;
667 hasher.buffer_size = 8192;
668 let mut builder = globset::GlobSetBuilder::new();
669 builder.add(
670 globset::GlobBuilder::new("exclude.txt")
671 .case_insensitive(true)
672 .build()?,
673 );
674 let filter = builder.build()?;
675 hasher.exclude = Some(filter);
676
677 let duplicates = hasher.find_duplicates()?;
678 assert_eq!(duplicates.len(), 1);
679 let group = &duplicates[0];
680 assert_eq!(group.paths.len(), 2);
681 assert!(group.paths.contains(&file1_path));
682 assert!(group.paths.contains(&file2_path));
683 assert!(!group.paths.contains(&exclude_path));
684 Ok(())
685 }
686
687 #[test]
688 fn check_mode_empty_cache() -> anyhow::Result<()> {
689 let dir = tempfile::tempdir()?;
690 let dir_path = dir.path().to_path_buf();
691 println!("{:?}", dir_path);
692 let file1_path = dir.path().join("file1.txt");
693 fs::write(&file1_path, "content 1")?;
694 let file2_path = dir.path().join("file2.txt");
695 fs::write(&file2_path, "content 2")?;
696
697 let mut hasher = FileHasher::new(&[&dir_path])?;
698 hasher.exclude = Some(default_exclude());
699 let (tx, rx) = mpsc::channel();
700 hasher.check_streaming(tx, false)?;
701 let mut results = Vec::new();
702 let mut start_seen = false;
703 let mut total_files = None;
704 let mut file_done_count = 0;
705 let mut num_error = 0;
706 while let Ok(event) = rx.recv() {
707 match event {
708 CheckEvent::StartChecking => start_seen = true,
709 CheckEvent::TotalFiles(total) => total_files = Some(total),
710 CheckEvent::Result(path, status) => results.push((path, status)),
711 CheckEvent::FileDone => file_done_count += 1,
712 CheckEvent::Error => num_error += 1,
713 }
714 }
715 assert!(start_seen);
716 assert_eq!(total_files, Some(2));
717 assert_eq!(file_done_count, 0);
718 assert_eq!(num_error, 0);
719
720 results.sort_by(|a, b| a.0.cmp(&b.0));
721 assert_eq!(results.len(), 2);
722 assert_eq!(results[0], (PathBuf::from("file1.txt"), CheckStatus::New));
723 assert_eq!(results[1], (PathBuf::from("file2.txt"), CheckStatus::New));
724
725 assert!(!dir.path().join(FileHashCache::FILE_NAME).exists());
726 Ok(())
727 }
728
729 #[test]
730 fn check_mode_with_cache() -> anyhow::Result<()> {
731 let dir = tempfile::tempdir()?;
732 let dir_path = dir.path().to_path_buf();
733 let file1_path = dir.path().join("file1.txt");
734 fs::write(&file1_path, "content 1")?;
735 let file2_path = dir.path().join("file2.txt");
736 fs::write(&file2_path, "content 2")?;
737
738 let mut hasher = FileHasher::new(&[&dir_path])?;
739 hasher.exclude = Some(default_exclude());
740 let _hash1 = hasher.get_hash(&file1_path)?;
741 let _hash2 = hasher.get_hash(&file2_path)?;
742 hasher.save_cache()?;
743 assert!(dir.path().join(FileHashCache::FILE_NAME).exists());
744
745 let mut hasher = FileHasher::new(&[&dir_path])?;
746 hasher.exclude = Some(default_exclude());
747 let (tx, rx) = mpsc::channel();
748 hasher.check_streaming(tx, false)?;
749 let mut results = Vec::new();
750 let mut file_done_count = 0;
751 while let Ok(event) = rx.recv() {
752 match event {
753 CheckEvent::Result(path, status) => results.push((path, status)),
754 CheckEvent::FileDone => file_done_count += 1,
755 _ => {}
756 }
757 }
758 assert_eq!(results.len(), 0);
759 assert_eq!(file_done_count, 2);
760
761 fs::write(&file1_path, "content 1 modified")?;
762
763 let file2_meta_before = fs::metadata(&file2_path)?;
764 let mtime_before = file2_meta_before.modified()?;
765 std::thread::sleep(time::Duration::from_millis(10));
766 fs::write(&file2_path, "content 2")?;
767 let file2_meta_after = fs::metadata(&file2_path)?;
768 let mtime_after = file2_meta_after.modified()?;
769 assert!(mtime_after > mtime_before);
770
771 let mut hasher = FileHasher::new(&[&dir_path])?;
772 hasher.exclude = Some(default_exclude());
773 let (tx, rx) = mpsc::channel();
774 hasher.check_streaming(tx, false)?;
775 let mut results = Vec::new();
776 let mut file_done_count = 0;
777 while let Ok(event) = rx.recv() {
778 match event {
779 CheckEvent::Result(path, status) => results.push((path, status)),
780 CheckEvent::FileDone => file_done_count += 1,
781 _ => {}
782 }
783 }
784 assert_eq!(results.len(), 1);
785 assert_eq!(
786 results[0],
787 (PathBuf::from("file1.txt"), CheckStatus::Modified)
788 );
789 assert_eq!(file_done_count, 1);
790 Ok(())
791 }
792
793 #[test]
794 fn check_update_mode() -> anyhow::Result<()> {
795 let dir = tempfile::tempdir()?;
796 let dir_path = dir.path().to_path_buf();
797 let file1_path = dir.path().join("file1.txt");
798 fs::write(&file1_path, "content 1")?;
799
800 let mut hasher = FileHasher::new(&[&dir_path])?;
801 hasher.exclude = Some(default_exclude());
802 let (tx, rx) = mpsc::channel();
803 hasher.check_streaming(tx, true)?;
804 while rx.recv().is_ok() {}
805 hasher.save_cache()?;
806 assert!(dir.path().join(FileHashCache::FILE_NAME).exists());
807
808 let cache = FileHashCache::new(&dir_path);
809 let mtime1 = fs::metadata(&file1_path)?.modified()?;
810 let hash1 = cache.get(&PathBuf::from("file1.txt"), mtime1);
811 assert!(hash1.is_some());
812
813 std::thread::sleep(time::Duration::from_millis(10));
814 fs::write(&file1_path, "content 1 modified")?;
815 let mtime1_mod = fs::metadata(&file1_path)?.modified()?;
816
817 let mut hasher = FileHasher::new(&[&dir_path])?;
818 hasher.exclude = Some(default_exclude());
819 let (tx, rx) = mpsc::channel();
820 hasher.check_streaming(tx, true)?;
821 while rx.recv().is_ok() {}
822 hasher.save_cache()?;
823
824 let cache = FileHashCache::new(&dir_path);
825 let hash_mod = cache.get(&PathBuf::from("file1.txt"), mtime1_mod);
826 assert!(hash_mod.is_some());
827 assert_ne!(hash1, hash_mod);
828
829 std::thread::sleep(time::Duration::from_millis(10));
830 fs::write(&file1_path, "content 1 modified")?;
831 let mtime1_mod2 = fs::metadata(&file1_path)?.modified()?;
832 assert!(mtime1_mod2 > mtime1_mod);
833
834 assert!(
835 cache
836 .get(&PathBuf::from("file1.txt"), mtime1_mod2)
837 .is_none()
838 );
839
840 let mut hasher = FileHasher::new(&[&dir_path])?;
841 hasher.exclude = Some(default_exclude());
842 let (tx, rx) = mpsc::channel();
843 hasher.check_streaming(tx, true)?;
844 while rx.recv().is_ok() {}
845 hasher.save_cache()?;
846
847 let cache = FileHashCache::new(&dir_path);
848 assert!(
849 cache
850 .get(&PathBuf::from("file1.txt"), mtime1_mod2)
851 .is_some()
852 );
853 Ok(())
854 }
855
856 #[test]
857 fn find_duplicates_multiple_dirs() -> anyhow::Result<()> {
858 let tmp = tempfile::tempdir()?;
859 let dir1 = tmp.path().join("dir1");
860 let dir2 = tmp.path().join("dir2");
861 fs::create_dir(&dir1)?;
862 fs::create_dir(&dir2)?;
863 let file1_path = dir1.join("file1.txt");
864 fs::write(&file1_path, "same content")?;
865 let file2_path = dir2.join("file2.txt");
866 fs::write(&file2_path, "same content")?;
867 let hasher = FileHasher::new(&[&dir1, &dir2])?;
868 let duplicates = hasher.find_duplicates()?;
869 assert_eq!(duplicates.len(), 1);
870 let group = &duplicates[0];
871 assert_eq!(group.paths.len(), 2);
872 assert_eq!(group.size, 12);
873 assert!(group.paths.contains(&file1_path));
874 assert!(group.paths.contains(&file2_path));
875
876 Ok(())
877 }
878
879 #[test]
880 fn check_fails_with_multiple_dirs() -> anyhow::Result<()> {
881 let tmp = tempfile::tempdir()?;
882 let dir1 = tmp.path().join("dir1");
883 let dir2 = tmp.path().join("dir2");
884 fs::create_dir(&dir1)?;
885 fs::create_dir(&dir2)?;
886 let hasher = FileHasher::new(&[&dir1, &dir2])?;
887 assert!(hasher.check(false).is_err());
888 Ok(())
889 }
890}