1use crate::{
2 ColumnFormatter, DirectoryComparer, FileComparer, FileHashCache, FileIterator, Progress,
3 ProgressBuilder,
4};
5use globset::GlobSet;
6use indicatif::FormattedDuration;
7use std::collections::HashMap;
8use std::fs;
9use std::io::{self, Read, stdout};
10use std::path::{Path, PathBuf};
11use std::sync::atomic::{AtomicUsize, Ordering};
12use std::sync::{Arc, mpsc};
13
14#[derive(Debug, Clone)]
15enum HashProgress {
16 StartDiscovering,
17 TotalFiles(usize),
18 Result(PathBuf, u64, blake3::Hash, bool),
19}
20
21#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
22enum CheckStatus {
23 Unchanged,
24 New,
25 Modified,
26}
27
28#[derive(Debug, PartialEq)]
29enum CheckEvent {
30 StartChecking,
31 TotalFiles(usize),
32 Result(PathBuf, CheckStatus),
33 FileDone,
34}
35
36enum EntryState {
37 Single(PathBuf, std::time::SystemTime),
38 Hashing,
39}
40
41#[derive(Debug, Clone)]
43pub struct DuplicatedFiles {
44 pub paths: Vec<PathBuf>,
45 pub size: u64,
46}
47
48pub struct FileHasher {
50 dirs: Vec<PathBuf>,
51 pub buffer_size: usize,
52 pub(crate) cache: Arc<FileHashCache>,
53 pub(crate) num_hashed: AtomicUsize,
54 pub(crate) num_hash_looked_up: AtomicUsize,
55 pub exclude: Option<GlobSet>,
56 pub progress: Option<Arc<ProgressBuilder>>,
57 pub jobs: usize,
58}
59
60impl FileHasher {
61 const DEFAULT_JOBS: usize = DirectoryComparer::DEFAULT_JOBS;
62
63 pub fn new<P: AsRef<Path>>(dirs: &[P]) -> anyhow::Result<Self> {
65 if dirs.is_empty() {
66 anyhow::bail!("At least one directory must be specified.");
67 }
68 let common_ancestor = crate::common_ancestor(dirs)
69 .ok_or_else(|| anyhow::anyhow!("No common ancestor found"))?;
70 Ok(Self {
71 dirs: dirs.iter().map(|p| p.as_ref().to_path_buf()).collect(),
72 buffer_size: FileComparer::DEFAULT_BUFFER_SIZE,
73 cache: FileHashCache::find_or_new(&common_ancestor),
74 num_hashed: AtomicUsize::new(0),
75 num_hash_looked_up: AtomicUsize::new(0),
76 exclude: None,
77 progress: None,
78 jobs: Self::DEFAULT_JOBS,
79 })
80 }
81
82 pub fn remove_cache_entry(&self, path: &Path) -> anyhow::Result<()> {
84 let relative = crate::strip_prefix(path, self.cache.base_dir())?;
85 self.cache.remove(relative);
86 Ok(())
87 }
88
89 pub fn save_cache(&self) -> anyhow::Result<()> {
91 log::info!(
92 "Hash stats for {:?}: {} computed, {} looked up",
93 self.dirs,
94 self.num_hashed.load(Ordering::Relaxed),
95 self.num_hash_looked_up.load(Ordering::Relaxed)
96 );
97 Ok(self.cache.save()?)
98 }
99
100 pub(crate) fn merge_cache(&self, other_cache: &FileHashCache) {
102 self.cache.merge(other_cache);
103 }
104
105 pub fn clear_cache(&self) -> anyhow::Result<()> {
107 for dir in &self.dirs {
108 let relative = crate::strip_prefix(dir, self.cache.base_dir())?;
109 self.cache.clear(relative);
110 }
111 Ok(())
112 }
113
114 pub fn check(&self, update: bool) -> anyhow::Result<()> {
116 if self.dirs.len() > 1 {
117 anyhow::bail!("Check mode only supports one directory.");
118 }
119 let start_time = std::time::Instant::now();
120 let progress = self
121 .progress
122 .as_ref()
123 .map(|progress| progress.add_spinner())
124 .unwrap_or_else(Progress::none);
125 progress.set_message("Scanning directory...");
126 let mut num_new = 0;
127 let mut num_modified = 0;
128 std::thread::scope(|scope| {
129 let (tx, rx) = mpsc::channel();
130 scope.spawn(|| {
131 if let Err(e) = self.check_streaming(tx, update) {
132 log::error!("Error during check: {}", e);
133 }
134 });
135 while let Ok(event) = rx.recv() {
136 match event {
137 CheckEvent::StartChecking => {
138 progress.set_message("Checking files...");
139 }
140 CheckEvent::TotalFiles(total) => {
141 progress.set_length(total as u64);
142 progress.set_message("");
143 }
144 CheckEvent::Result(path, status) => {
145 let symbol = match status {
146 CheckStatus::New => {
147 num_new += 1;
148 '+'
149 }
150 CheckStatus::Modified => {
151 num_modified += 1;
152 '!'
153 }
154 CheckStatus::Unchanged => unreachable!(),
155 };
156 progress.inc(1);
157 progress.suspend_for(stdout(), || {
158 println!("{} {}", symbol, path.display());
159 });
160 }
161 CheckEvent::FileDone => {
162 progress.inc(1);
163 }
164 }
165 }
166 });
167 progress.finish();
168 if update {
169 self.save_cache()?;
170 }
171 let summary = [
172 ("Elapsed:", 0),
173 ("Hash computed:", self.num_hashed.load(Ordering::Relaxed)),
174 ("New files:", num_new),
175 ("Modified files:", num_modified),
176 ];
177 let formatter = ColumnFormatter::new(summary.iter().map(|(s, _)| *s));
178 let mut writer = std::io::stderr();
179 formatter.write_value(
180 &mut writer,
181 summary[0].0,
182 FormattedDuration(start_time.elapsed()),
183 )?;
184 formatter.write_values(&mut writer, &summary[1..])?;
185 Ok(())
186 }
187
188 fn check_streaming(&self, tx: mpsc::Sender<CheckEvent>, update: bool) -> anyhow::Result<()> {
189 std::thread::scope(|global_scope| {
190 let mut it = FileIterator::new(self.dirs[0].clone());
191 it.hasher = Some(self);
192 it.exclude = self.exclude.as_ref();
193 let it_rx = it.spawn_in_scope(global_scope);
194 tx.send(CheckEvent::StartChecking)?;
195 let pool = crate::build_thread_pool(self.jobs)?;
196 pool.scope(move |scope| -> anyhow::Result<()> {
197 let mut total_files = 0;
198 for (rel_path, abs_path) in it_rx {
199 total_files += 1;
200 let tx = tx.clone();
201 scope.spawn(move |_| {
202 let status = self.check_file(&abs_path, update);
203 let event = match status {
204 Ok(CheckStatus::New) | Ok(CheckStatus::Modified) => {
205 CheckEvent::Result(rel_path, status.unwrap())
206 }
207 Ok(CheckStatus::Unchanged) => CheckEvent::FileDone,
208 Err(e) => {
209 log::warn!("Failed to check file {:?}: {}", rel_path, e);
210 CheckEvent::FileDone
211 }
212 };
213 if tx.send(event).is_err() {
214 log::error!("Send failed");
215 }
216 });
217 }
218 tx.send(CheckEvent::TotalFiles(total_files))?;
219 Ok(())
220 })
221 })?;
222 Ok(())
223 }
224
225 fn check_file(&self, abs_path: &Path, update: bool) -> anyhow::Result<CheckStatus> {
226 assert!(abs_path.is_absolute());
227 let computed_hash = self.compute_hash(abs_path)?;
228 let rel_path = crate::strip_prefix(abs_path, self.cache.base_dir())?;
229 let cached_hash = self.cache.get_by_path(rel_path);
230 let status = match cached_hash {
231 None => CheckStatus::New,
232 Some(cached) => {
233 if computed_hash != cached {
234 CheckStatus::Modified
235 } else {
236 CheckStatus::Unchanged
237 }
238 }
239 };
240 if update {
241 let modified = fs::metadata(abs_path)?.modified()?;
242 match status {
243 CheckStatus::New | CheckStatus::Modified => {
244 self.cache.insert(rel_path, modified, computed_hash);
245 }
246 CheckStatus::Unchanged => {
247 if self.cache.get(rel_path, modified).is_none() {
248 self.cache.insert(rel_path, modified, computed_hash);
249 }
250 }
251 }
252 }
253 Ok(status)
254 }
255
256 pub fn run(&self) -> anyhow::Result<()> {
258 let start_time = std::time::Instant::now();
259 let mut duplicates = self.find_duplicates()?;
260 if duplicates.is_empty() {
261 println!("No duplicates found.");
262 } else {
263 duplicates.sort_by_key(|a| a.size);
264 let mut total_wasted_space = 0;
265 for dupes in &duplicates {
266 let paths = &dupes.paths;
267 let file_size = dupes.size;
268 println!(
269 "Identical {} files of {}:",
270 paths.len(),
271 crate::human_readable_size(file_size)
272 );
273 for path in paths {
274 println!(" {}", path.display());
275 }
276 total_wasted_space += file_size * (paths.len() as u64 - 1);
277 }
278 eprintln!(
279 "Total wasted space: {}",
280 crate::human_readable_size(total_wasted_space)
281 );
282 }
283 eprintln!("Finished in {}.", FormattedDuration(start_time.elapsed()));
284 Ok(())
285 }
286
287 pub fn find_duplicates(&self) -> anyhow::Result<Vec<DuplicatedFiles>> {
289 let progress = self
290 .progress
291 .as_ref()
292 .map(|progress| progress.add_spinner())
293 .unwrap_or_else(Progress::none);
294 progress.set_message("Scanning directories...");
295
296 let (tx, rx) = mpsc::channel();
297 let mut by_hash: HashMap<blake3::Hash, DuplicatedFiles> = HashMap::new();
298 let mut num_cache_hits = 0;
299 std::thread::scope(|scope| {
300 scope.spawn(|| {
301 if let Err(e) = self.find_duplicates_streaming(tx) {
302 log::error!("Error during duplicate finding: {}", e);
303 }
304 });
305
306 while let Ok(event) = rx.recv() {
307 match event {
308 HashProgress::StartDiscovering => {
309 progress.set_message("Hashing files...");
310 }
311 HashProgress::TotalFiles(total) => {
312 progress.set_length(total as u64);
313 if num_cache_hits > 0 {
314 progress.set_message(format!(" ({} cache hits)", num_cache_hits));
315 }
316 }
317 HashProgress::Result(path, size, hash, is_cache_hit) => {
318 if is_cache_hit {
319 num_cache_hits += 1;
320 if progress.length().is_none() {
321 progress.set_message(format!(
322 "Hashing files... ({} cache hits)",
323 num_cache_hits
324 ));
325 } else {
326 progress.set_message(format!(" ({} cache hits)", num_cache_hits));
327 }
328 }
329
330 progress.inc(1);
331 let entry = by_hash.entry(hash).or_insert_with(|| DuplicatedFiles {
332 paths: Vec::new(),
333 size,
334 });
335 assert_eq!(entry.size, size, "Hash collision: sizes do not match");
337 entry.paths.push(path);
338 }
339 }
340 }
341 });
342 progress.finish();
343
344 let mut duplicates = Vec::new();
345 for (_, mut dupes) in by_hash {
346 if dupes.paths.len() > 1 {
347 dupes.paths.sort();
348 duplicates.push(dupes);
349 }
350 }
351 Ok(duplicates)
352 }
353
354 fn find_duplicates_streaming(&self, tx: mpsc::Sender<HashProgress>) -> anyhow::Result<()> {
355 tx.send(HashProgress::StartDiscovering)?;
356 let mut by_size: HashMap<u64, EntryState> = HashMap::new();
357 let mut total_hashed = 0;
358 std::thread::scope(|global_scope| {
359 let (it_tx, it_rx) = mpsc::channel();
360 for dir in &self.dirs {
361 let it_tx = it_tx.clone();
362 let mut it = FileIterator::new(dir.clone());
363 it.hasher = Some(self);
364 it.exclude = self.exclude.as_ref();
365 it.spawn_in_scope_with_sender(global_scope, it_tx);
366 }
367 drop(it_tx);
368
369 let pool = crate::build_thread_pool(self.jobs)?;
370 pool.scope(move |scope| -> anyhow::Result<()> {
371 for (_, current_path) in it_rx {
372 let meta = fs::metadata(¤t_path)?;
373 let size = meta.len();
374 let modified = meta.modified()?;
375
376 match by_size.entry(size) {
379 std::collections::hash_map::Entry::Occupied(mut occ) => match occ.get_mut()
380 {
381 EntryState::Single(first_path, first_modified) => {
382 self.spawn_hash_task(scope, first_path, size, *first_modified, &tx);
385 self.spawn_hash_task(scope, ¤t_path, size, modified, &tx);
386
387 *occ.get_mut() = EntryState::Hashing;
389 total_hashed += 2;
390 }
391 EntryState::Hashing => {
392 self.spawn_hash_task(scope, ¤t_path, size, modified, &tx);
394 total_hashed += 1;
395 }
396 },
397 std::collections::hash_map::Entry::Vacant(vac) => {
398 vac.insert(EntryState::Single(current_path, modified));
399 }
400 }
401 }
402 tx.send(HashProgress::TotalFiles(total_hashed))?;
403 Ok(())
404 })
405 })?;
406
407 self.save_cache()
410 }
411
412 fn spawn_hash_task<'scope>(
413 &'scope self,
414 scope: &rayon::Scope<'scope>,
415 path: &Path,
416 size: u64,
417 modified: std::time::SystemTime,
418 tx: &mpsc::Sender<HashProgress>,
419 ) {
420 let relative = crate::strip_prefix(path, self.cache.base_dir())
421 .expect("path should be in cache base_dir");
422 if let Some(hash) = self.cache.get(relative, modified) {
423 self.num_hash_looked_up.fetch_add(1, Ordering::Relaxed);
424 let _ = tx.send(HashProgress::Result(path.to_path_buf(), size, hash, true));
425 return;
426 }
427
428 let path_owned = path.to_path_buf();
429 let relative_owned = relative.to_path_buf();
430 let tx_owned = tx.clone();
431 scope.spawn(move |_| {
432 if let Ok(hash) = self.compute_hash(&path_owned) {
433 self.cache.insert(&relative_owned, modified, hash);
434 let _ = tx_owned.send(HashProgress::Result(path_owned, size, hash, false));
435 } else {
436 log::warn!("Failed to hash file: {:?}", path_owned);
437 }
438 });
439 }
440
441 pub fn get_hash(&self, path: &Path) -> io::Result<blake3::Hash> {
443 let meta = fs::metadata(path)?;
444 let modified = meta.modified()?;
445 let relative = crate::strip_prefix(path, self.cache.base_dir())
446 .map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?;
447 if let Some(hash) = self.cache.get(relative, modified) {
448 self.num_hash_looked_up.fetch_add(1, Ordering::Relaxed);
449 return Ok(hash);
450 }
451
452 let hash = self.compute_hash(path)?;
453 self.cache.insert(relative, modified, hash);
454 Ok(hash)
455 }
456
457 fn compute_hash(&self, path: &Path) -> io::Result<blake3::Hash> {
458 let start_time = std::time::Instant::now();
459 let mut f = fs::File::open(path)?;
460 let len = f.metadata()?.len();
461 let progress = self
462 .progress
463 .as_ref()
464 .map(|progress| progress.add_file(path, len))
465 .unwrap_or_else(Progress::none);
466 let mut hasher = blake3::Hasher::new();
467 if self.buffer_size == 0 {
468 if len > 0 {
469 let mmap = unsafe { memmap2::MmapOptions::new().map(&f)? };
470 hasher.update(&mmap[..]);
471 progress.inc(len);
472 }
473 } else {
474 let mut buf = vec![0u8; self.buffer_size];
475 loop {
476 let n = f.read(&mut buf)?;
477 if n == 0 {
478 break;
479 }
480 hasher.update(&buf[..n]);
481 progress.inc(n as u64);
482 }
483 }
484 progress.finish();
485 self.num_hashed.fetch_add(1, Ordering::Relaxed);
486 let hash = hasher.finalize();
487 log::debug!(
488 "Computed hash in {}: {:?}",
489 FormattedDuration(start_time.elapsed()),
490 path
491 );
492 Ok(hash)
493 }
494}
495
496#[cfg(test)]
497mod tests {
498 use super::*;
499
500 fn default_exclude() -> globset::GlobSet {
501 let mut builder = globset::GlobSetBuilder::new();
502 builder.add(
503 globset::GlobBuilder::new(".hash_cache")
504 .case_insensitive(true)
505 .build()
506 .unwrap(),
507 );
508 builder.build().unwrap()
509 }
510
511 #[test]
512 fn find_duplicates() -> anyhow::Result<()> {
513 let dir = tempfile::tempdir()?;
514
515 let file1_path = dir.path().join("same1.txt");
516 fs::write(&file1_path, "same content")?;
517
518 let file2_path = dir.path().join("same2.txt");
519 fs::write(&file2_path, "same content")?;
520
521 let diff_path = dir.path().join("diff.txt");
522 fs::write(&diff_path, "different content")?;
523
524 let mut hasher = FileHasher::new(&[dir.path()])?;
525 hasher.buffer_size = 8192;
526 let duplicates = hasher.find_duplicates()?;
527
528 assert_eq!(hasher.num_hashed.load(Ordering::Relaxed), 2);
529 assert_eq!(hasher.num_hash_looked_up.load(Ordering::Relaxed), 0);
530
531 assert_eq!(duplicates.len(), 1);
532 let group = &duplicates[0];
533 assert_eq!(group.paths.len(), 2);
534 assert_eq!(group.size, 12); assert!(group.paths.contains(&file1_path));
537 assert!(group.paths.contains(&file2_path));
538
539 Ok(())
540 }
541
542 #[test]
543 fn find_duplicates_merge_cache() -> anyhow::Result<()> {
544 let dir = tempfile::tempdir()?;
545 let dir_path = dir.path();
546
547 let sub_dir = dir_path.join("a").join("a");
548 fs::create_dir_all(&sub_dir)?;
549
550 let file1_path = sub_dir.join("1");
551 fs::write(&file1_path, "same content")?;
552
553 let file2_path = sub_dir.join("2");
554 fs::write(&file2_path, "same content")?;
555
556 let cache_aa_path = sub_dir.join(FileHashCache::FILE_NAME);
558 fs::File::create(&cache_aa_path)?;
559
560 let hasher_aa = FileHasher::new(&[&sub_dir])?;
562 let duplicates_aa = hasher_aa.find_duplicates()?;
563 assert_eq!(duplicates_aa.len(), 1);
564 assert!(cache_aa_path.exists());
565 assert_eq!(hasher_aa.num_hashed.load(Ordering::Relaxed), 2);
566 assert_eq!(hasher_aa.num_hash_looked_up.load(Ordering::Relaxed), 0);
567
568 let root_a = dir_path.join("a");
570 let cache_a_path = root_a.join(FileHashCache::FILE_NAME);
571 fs::File::create(&cache_a_path)?;
572
573 let hasher_a = FileHasher::new(&[&root_a])?;
575 let duplicates_a = hasher_a.find_duplicates()?;
576 assert_eq!(duplicates_a.len(), 1);
577 assert_eq!(hasher_a.num_hashed.load(Ordering::Relaxed), 0);
578 assert_eq!(hasher_a.num_hash_looked_up.load(Ordering::Relaxed), 2);
579
580 assert!(cache_a_path.exists());
582 assert!(!cache_aa_path.exists());
583
584 Ok(())
585 }
586
587 #[test]
588 fn find_duplicates_with_exclude() -> anyhow::Result<()> {
589 let dir = tempfile::tempdir()?;
590
591 let file1_path = dir.path().join("same1.txt");
592 fs::write(&file1_path, "same content")?;
593
594 let file2_path = dir.path().join("same2.txt");
595 fs::write(&file2_path, "same content")?;
596
597 let exclude_path = dir.path().join("exclude.txt");
598 fs::write(&exclude_path, "same content")?;
599
600 let mut hasher = FileHasher::new(&[dir.path()])?;
601 hasher.buffer_size = 8192;
602 let mut builder = globset::GlobSetBuilder::new();
603 builder.add(
604 globset::GlobBuilder::new("exclude.txt")
605 .case_insensitive(true)
606 .build()?,
607 );
608 let filter = builder.build()?;
609 hasher.exclude = Some(filter);
610
611 let duplicates = hasher.find_duplicates()?;
612 assert_eq!(duplicates.len(), 1);
613 let group = &duplicates[0];
614 assert_eq!(group.paths.len(), 2);
615 assert!(group.paths.contains(&file1_path));
616 assert!(group.paths.contains(&file2_path));
617 assert!(!group.paths.contains(&exclude_path));
618 Ok(())
619 }
620
621 #[test]
622 fn check_mode_empty_cache() -> anyhow::Result<()> {
623 let dir = tempfile::tempdir()?;
624 let dir_path = dir.path().to_path_buf();
625 println!("{:?}", dir_path);
626 let file1_path = dir.path().join("file1.txt");
627 fs::write(&file1_path, "content 1")?;
628 let file2_path = dir.path().join("file2.txt");
629 fs::write(&file2_path, "content 2")?;
630
631 let mut hasher = FileHasher::new(&[&dir_path])?;
632 hasher.exclude = Some(default_exclude());
633 let (tx, rx) = mpsc::channel();
634 hasher.check_streaming(tx, false)?;
635 let mut results = Vec::new();
636 let mut start_seen = false;
637 let mut total_files = None;
638 let mut file_done_count = 0;
639 while let Ok(event) = rx.recv() {
640 match event {
641 CheckEvent::StartChecking => start_seen = true,
642 CheckEvent::TotalFiles(total) => total_files = Some(total),
643 CheckEvent::Result(path, status) => results.push((path, status)),
644 CheckEvent::FileDone => file_done_count += 1,
645 }
646 }
647 assert!(start_seen);
648 assert_eq!(total_files, Some(2));
649 assert_eq!(file_done_count, 0);
650
651 results.sort_by(|a, b| a.0.cmp(&b.0));
652 assert_eq!(results.len(), 2);
653 assert_eq!(results[0], (PathBuf::from("file1.txt"), CheckStatus::New));
654 assert_eq!(results[1], (PathBuf::from("file2.txt"), CheckStatus::New));
655
656 assert!(!dir.path().join(FileHashCache::FILE_NAME).exists());
657 Ok(())
658 }
659
660 #[test]
661 fn check_mode_with_cache() -> anyhow::Result<()> {
662 let dir = tempfile::tempdir()?;
663 let dir_path = dir.path().to_path_buf();
664 let file1_path = dir.path().join("file1.txt");
665 fs::write(&file1_path, "content 1")?;
666 let file2_path = dir.path().join("file2.txt");
667 fs::write(&file2_path, "content 2")?;
668
669 let mut hasher = FileHasher::new(&[&dir_path])?;
670 hasher.exclude = Some(default_exclude());
671 let _hash1 = hasher.get_hash(&file1_path)?;
672 let _hash2 = hasher.get_hash(&file2_path)?;
673 hasher.save_cache()?;
674 assert!(dir.path().join(FileHashCache::FILE_NAME).exists());
675
676 let mut hasher = FileHasher::new(&[&dir_path])?;
677 hasher.exclude = Some(default_exclude());
678 let (tx, rx) = mpsc::channel();
679 hasher.check_streaming(tx, false)?;
680 let mut results = Vec::new();
681 let mut file_done_count = 0;
682 while let Ok(event) = rx.recv() {
683 match event {
684 CheckEvent::Result(path, status) => results.push((path, status)),
685 CheckEvent::FileDone => file_done_count += 1,
686 _ => {}
687 }
688 }
689 assert_eq!(results.len(), 0);
690 assert_eq!(file_done_count, 2);
691
692 fs::write(&file1_path, "content 1 modified")?;
693
694 let file2_meta_before = fs::metadata(&file2_path)?;
695 let mtime_before = file2_meta_before.modified()?;
696 std::thread::sleep(std::time::Duration::from_millis(10));
697 fs::write(&file2_path, "content 2")?;
698 let file2_meta_after = fs::metadata(&file2_path)?;
699 let mtime_after = file2_meta_after.modified()?;
700 assert!(mtime_after > mtime_before);
701
702 let mut hasher = FileHasher::new(&[&dir_path])?;
703 hasher.exclude = Some(default_exclude());
704 let (tx, rx) = mpsc::channel();
705 hasher.check_streaming(tx, false)?;
706 let mut results = Vec::new();
707 let mut file_done_count = 0;
708 while let Ok(event) = rx.recv() {
709 match event {
710 CheckEvent::Result(path, status) => results.push((path, status)),
711 CheckEvent::FileDone => file_done_count += 1,
712 _ => {}
713 }
714 }
715 assert_eq!(results.len(), 1);
716 assert_eq!(
717 results[0],
718 (PathBuf::from("file1.txt"), CheckStatus::Modified)
719 );
720 assert_eq!(file_done_count, 1);
721 Ok(())
722 }
723
724 #[test]
725 fn check_update_mode() -> anyhow::Result<()> {
726 let dir = tempfile::tempdir()?;
727 let dir_path = dir.path().to_path_buf();
728 let file1_path = dir.path().join("file1.txt");
729 fs::write(&file1_path, "content 1")?;
730
731 let mut hasher = FileHasher::new(&[&dir_path])?;
732 hasher.exclude = Some(default_exclude());
733 let (tx, rx) = mpsc::channel();
734 hasher.check_streaming(tx, true)?;
735 while rx.recv().is_ok() {}
736 hasher.save_cache()?;
737 assert!(dir.path().join(FileHashCache::FILE_NAME).exists());
738
739 let cache = FileHashCache::new(&dir_path);
740 let mtime1 = fs::metadata(&file1_path)?.modified()?;
741 let hash1 = cache.get(&PathBuf::from("file1.txt"), mtime1);
742 assert!(hash1.is_some());
743
744 std::thread::sleep(std::time::Duration::from_millis(10));
745 fs::write(&file1_path, "content 1 modified")?;
746 let mtime1_mod = fs::metadata(&file1_path)?.modified()?;
747
748 let mut hasher = FileHasher::new(&[&dir_path])?;
749 hasher.exclude = Some(default_exclude());
750 let (tx, rx) = mpsc::channel();
751 hasher.check_streaming(tx, true)?;
752 while rx.recv().is_ok() {}
753 hasher.save_cache()?;
754
755 let cache = FileHashCache::new(&dir_path);
756 let hash_mod = cache.get(&PathBuf::from("file1.txt"), mtime1_mod);
757 assert!(hash_mod.is_some());
758 assert_ne!(hash1, hash_mod);
759
760 std::thread::sleep(std::time::Duration::from_millis(10));
761 fs::write(&file1_path, "content 1 modified")?;
762 let mtime1_mod2 = fs::metadata(&file1_path)?.modified()?;
763 assert!(mtime1_mod2 > mtime1_mod);
764
765 assert!(
766 cache
767 .get(&PathBuf::from("file1.txt"), mtime1_mod2)
768 .is_none()
769 );
770
771 let mut hasher = FileHasher::new(&[&dir_path])?;
772 hasher.exclude = Some(default_exclude());
773 let (tx, rx) = mpsc::channel();
774 hasher.check_streaming(tx, true)?;
775 while rx.recv().is_ok() {}
776 hasher.save_cache()?;
777
778 let cache = FileHashCache::new(&dir_path);
779 assert!(
780 cache
781 .get(&PathBuf::from("file1.txt"), mtime1_mod2)
782 .is_some()
783 );
784 Ok(())
785 }
786
787 #[test]
788 fn find_duplicates_multiple_dirs() -> anyhow::Result<()> {
789 let tmp = tempfile::tempdir()?;
790 let dir1 = tmp.path().join("dir1");
791 let dir2 = tmp.path().join("dir2");
792 fs::create_dir(&dir1)?;
793 fs::create_dir(&dir2)?;
794 let file1_path = dir1.join("file1.txt");
795 fs::write(&file1_path, "same content")?;
796 let file2_path = dir2.join("file2.txt");
797 fs::write(&file2_path, "same content")?;
798 let hasher = FileHasher::new(&[&dir1, &dir2])?;
799 let duplicates = hasher.find_duplicates()?;
800 assert_eq!(duplicates.len(), 1);
801 let group = &duplicates[0];
802 assert_eq!(group.paths.len(), 2);
803 assert_eq!(group.size, 12);
804 assert!(group.paths.contains(&file1_path));
805 assert!(group.paths.contains(&file2_path));
806
807 Ok(())
808 }
809
810 #[test]
811 fn check_fails_with_multiple_dirs() -> anyhow::Result<()> {
812 let tmp = tempfile::tempdir()?;
813 let dir1 = tmp.path().join("dir1");
814 let dir2 = tmp.path().join("dir2");
815 fs::create_dir(&dir1)?;
816 fs::create_dir(&dir2)?;
817 let hasher = FileHasher::new(&[&dir1, &dir2])?;
818 assert!(hasher.check(false).is_err());
819 Ok(())
820 }
821}