1use crate::{
2 ColumnFormatter, DirectoryComparer, FileComparer, FileHashCache, FileIterator, Progress,
3 ProgressBuilder,
4};
5use globset::GlobSet;
6use indicatif::FormattedDuration;
7use rayon::prelude::*;
8use std::{
9 collections::HashMap,
10 fs,
11 io::{self, Read, stdout},
12 path::{Path, PathBuf},
13 sync::{
14 Arc,
15 atomic::{AtomicUsize, Ordering},
16 mpsc,
17 },
18 time,
19};
20
21type FileItem = (PathBuf, usize);
22
23#[derive(Debug, Clone)]
24enum DupEvent {
25 StartHashing,
26 NumFiles(usize),
27 Result(PathBuf, u64, blake3::Hash),
28 Error,
29}
30
31#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
32enum CheckStatus {
33 Unchanged,
34 New,
35 Modified,
36}
37
38#[derive(Debug, PartialEq)]
39enum CheckEvent {
40 StartChecking,
41 TotalFiles(usize),
42 Result(PathBuf, CheckStatus),
43 FileDone,
44 Error,
45}
46
47enum DupState {
48 Single(PathBuf, time::SystemTime, usize),
49 Hashing,
50}
51
52pub struct FileHasher {
54 dirs: Vec<PathBuf>,
55 pub buffer_size: usize,
56 cache: Option<Arc<FileHashCache>>,
57 num_hashed: AtomicUsize,
58 num_hash_looked_up: AtomicUsize,
59 pub exclude: Option<GlobSet>,
60 pub progress: Option<Arc<ProgressBuilder>>,
61 pub is_yaml_format: bool,
62 pub jobs: usize,
63}
64
65impl FileHasher {
66 const DEFAULT_JOBS: usize = DirectoryComparer::DEFAULT_JOBS;
67
68 pub fn new<P: AsRef<Path>>(dirs: &[P]) -> anyhow::Result<Self> {
70 if dirs.is_empty() {
71 anyhow::bail!("At least one directory must be specified.");
72 }
73 Ok(Self {
74 dirs: dirs.iter().map(|p| p.as_ref().to_path_buf()).collect(),
75 buffer_size: FileComparer::DEFAULT_BUFFER_SIZE,
76 cache: None,
77 num_hashed: AtomicUsize::new(0),
78 num_hash_looked_up: AtomicUsize::new(0),
79 exclude: None,
80 progress: None,
81 is_yaml_format: false,
82 jobs: Self::DEFAULT_JOBS,
83 })
84 }
85
86 pub(crate) fn new_with_cache<P: AsRef<Path>>(dirs: &[P]) -> anyhow::Result<Self> {
87 let mut hasher = Self::new(dirs)?;
88 hasher.cache = Some(hasher.new_cache()?);
89 Ok(hasher)
90 }
91
92 fn new_cache(&self) -> anyhow::Result<Arc<FileHashCache>> {
93 let common_ancestor = crate::common_ancestor(&self.dirs)
94 .ok_or_else(|| anyhow::anyhow!("No common ancestor found"))?;
95 Ok(FileHashCache::find_or_new(&common_ancestor))
96 }
97
98 pub(crate) fn cache(&mut self) -> anyhow::Result<Arc<FileHashCache>> {
100 if self.cache.is_none() {
101 self.cache = Some(self.new_cache()?);
102 }
103 Ok(Arc::clone(self.cache.as_ref().unwrap()))
104 }
105
106 pub(crate) fn remove_cache_entry(&mut self, path: &Path) -> anyhow::Result<()> {
108 let cache = self.cache()?;
109 let relative = crate::strip_prefix(path, cache.base_dir())?;
110 cache.remove(relative);
111 Ok(())
112 }
113
114 pub fn save_cache(&self) -> anyhow::Result<()> {
116 log::info!(
117 "Hash stats for {:?}: {} computed, {} looked up",
118 self.dirs,
119 self.num_hashed.load(Ordering::Relaxed),
120 self.num_hash_looked_up.load(Ordering::Relaxed)
121 );
122 if let Some(cache) = &self.cache {
123 cache.save()?;
124 }
125 Ok(())
126 }
127
128 pub(crate) fn clear_cache(&mut self) -> anyhow::Result<()> {
130 let cache = self.cache()?;
131 for dir in &self.dirs {
132 let relative = crate::strip_prefix(dir, cache.base_dir())?;
133 cache.clear(relative);
134 }
135 Ok(())
136 }
137
138 pub fn check(&self, update: bool) -> anyhow::Result<()> {
140 if self.dirs.len() > 1 {
141 anyhow::bail!("Check mode only supports one directory.");
142 }
143 let start_time = time::Instant::now();
144 let progress = self
145 .progress
146 .as_ref()
147 .map(|progress| progress.add_spinner())
148 .unwrap_or_else(Progress::none);
149 progress.set_message("Scanning directory...");
150 let mut num_new = 0;
151 let mut num_modified = 0;
152 let mut num_error = 0;
153 std::thread::scope(|scope| {
154 let (tx, rx) = mpsc::channel();
155 scope.spawn(|| {
156 if let Err(e) = self.check_streaming(tx, update) {
157 log::error!("Error during check: {}", e);
158 }
159 });
160 while let Ok(event) = rx.recv() {
161 match event {
162 CheckEvent::StartChecking => {
163 progress.set_message("Checking files...");
164 }
165 CheckEvent::TotalFiles(total) => {
166 progress.set_length(total as u64);
167 progress.set_message("");
168 }
169 CheckEvent::Result(path, status) => {
170 let symbol = match status {
171 CheckStatus::New => {
172 num_new += 1;
173 '+'
174 }
175 CheckStatus::Modified => {
176 num_modified += 1;
177 '!'
178 }
179 CheckStatus::Unchanged => unreachable!(),
180 };
181 progress.inc(1);
182 progress.suspend_for(stdout(), || {
183 println!("{} {}", symbol, path.display());
184 });
185 }
186 CheckEvent::FileDone => {
187 progress.inc(1);
188 }
189 CheckEvent::Error => {
190 progress.inc(1);
191 num_error += 1;
192 }
193 }
194 }
195 });
196 progress.finish();
197 self.print_check_summary(&start_time, num_new, num_modified, num_error)?;
198 Ok(())
199 }
200
201 fn print_check_summary(
202 &self,
203 start_time: &time::Instant,
204 num_new: usize,
205 num_modified: usize,
206 num_error: usize,
207 ) -> io::Result<()> {
208 let summary = [
209 ("Elapsed:", 0),
210 ("Hash computed:", self.num_hashed.load(Ordering::Relaxed)),
211 ("New files:", num_new),
212 ("Modified files:", num_modified),
213 ("Errors:", num_error),
214 ];
215 let formatter = ColumnFormatter::new(summary.iter().map(|(s, _)| *s));
216 let mut writer = std::io::stderr();
217 formatter.write_value(
218 &mut writer,
219 summary[0].0,
220 FormattedDuration(start_time.elapsed()),
221 )?;
222 formatter.write_values(&mut writer, &summary[1..])
223 }
224
225 fn check_streaming(&self, tx: mpsc::Sender<CheckEvent>, update: bool) -> anyhow::Result<()> {
226 assert_eq!(self.dirs.len(), 1);
227 let cache = self.new_cache()?;
228 let base_dir = &self.dirs[0];
229 let relative = crate::strip_prefix(base_dir, cache.base_dir())?;
230 cache.set_remove_if_no_access(relative);
231 let cache_clone = Arc::clone(&cache);
232 std::thread::scope(|global_scope| {
233 let mut it = FileIterator::new(base_dir);
234 it.cache = Some(Arc::clone(&cache));
235 it.exclude = self.exclude.as_ref();
236 let it_rx = it.spawn_in_scope(global_scope);
237 tx.send(CheckEvent::StartChecking)?;
238 let pool = crate::build_thread_pool(self.jobs)?;
239 pool.scope(move |scope| -> anyhow::Result<()> {
240 let mut total_files = 0;
241 for path in it_rx {
242 total_files += 1;
243 let tx = tx.clone();
244 let cache = Arc::clone(&cache);
245 scope.spawn(move |_| {
246 let status = self.check_file(&path, &cache, update);
247 let event = match status {
248 Ok(CheckStatus::New) | Ok(CheckStatus::Modified) => {
249 let rel_path = crate::strip_prefix(&path, base_dir).unwrap();
250 CheckEvent::Result(rel_path.into(), status.unwrap())
251 }
252 Ok(CheckStatus::Unchanged) => CheckEvent::FileDone,
253 Err(e) => {
254 log::error!("Failed to check file {:?}: {}", path, e);
255 CheckEvent::Error
256 }
257 };
258 if tx.send(event).is_err() {
259 log::error!("Send failed");
260 }
261 });
262 }
263 tx.send(CheckEvent::TotalFiles(total_files))?;
264 Ok(())
265 })
266 })?;
267 cache_clone.save()?;
268 Ok(())
269 }
270
271 fn check_file(
272 &self,
273 abs_path: &Path,
274 cache: &FileHashCache,
275 update: bool,
276 ) -> anyhow::Result<CheckStatus> {
277 assert!(abs_path.is_absolute());
278 let computed_hash = self.compute_hash(abs_path)?;
279 let rel_path = crate::strip_prefix(abs_path, cache.base_dir())?;
280 let cached_hash = cache.get_by_path(rel_path);
281 let status = match cached_hash {
282 None => CheckStatus::New,
283 Some(cached) => {
284 if computed_hash != cached {
285 CheckStatus::Modified
286 } else {
287 CheckStatus::Unchanged
288 }
289 }
290 };
291 if update {
292 let modified = fs::metadata(abs_path)?.modified()?;
293 match status {
294 CheckStatus::New | CheckStatus::Modified => {
295 cache.insert(rel_path, modified, computed_hash);
296 }
297 CheckStatus::Unchanged => {
298 if cache.get(rel_path, modified).is_none() {
299 cache.insert(rel_path, modified, computed_hash);
300 }
301 }
302 }
303 }
304 Ok(status)
305 }
306
307 pub fn run(&self) -> anyhow::Result<()> {
309 let start_time = time::Instant::now();
310 let mut duplicates = self.find_duplicates()?;
311 let mut total_wasted_space = 0;
312 if !duplicates.is_empty() {
313 duplicates.sort_by_key(|a| a.size);
314 for dupes in &duplicates {
315 if self.is_yaml_format {
316 dupes.write_yaml(std::io::stdout())?;
317 } else {
318 dupes.write_human(std::io::stdout())?;
319 }
320 total_wasted_space += dupes.wasted_size();
321 }
322 }
323 self.print_duplicates_summary(&start_time, total_wasted_space)?;
324 Ok(())
325 }
326
327 fn print_duplicates_summary(
328 &self,
329 start_time: &time::Instant,
330 total_wasted_space: u64,
331 ) -> io::Result<()> {
332 let elapsed = FormattedDuration(start_time.elapsed()).to_string();
333 let num_hashed = self.num_hashed.load(Ordering::Relaxed).to_string();
334 let total_wasted_space = crate::human_readable_size(total_wasted_space);
335 let summary = [
336 ("Elapsed:", elapsed),
337 ("Hash computed:", num_hashed),
338 ("Total wasted space:", total_wasted_space),
339 ];
340 let formatter = ColumnFormatter::new(summary.iter().map(|(s, _)| *s));
341 formatter.write_values(&mut io::stderr(), &summary)
342 }
343
344 pub fn find_duplicates(&self) -> anyhow::Result<Vec<DuplicatedFiles>> {
346 let progress = self
347 .progress
348 .as_ref()
349 .map(|progress| progress.add_spinner())
350 .unwrap_or_else(Progress::none);
351 progress.set_message("Scanning directories...");
352
353 let (tx, rx) = mpsc::channel();
354 let mut by_hash: HashMap<blake3::Hash, DuplicatedFiles> = HashMap::new();
355 std::thread::scope(|scope| {
356 scope.spawn(|| {
357 if let Err(e) = self.find_duplicates_streaming(tx) {
358 log::error!("Error during duplicate finding: {}", e);
359 }
360 });
361
362 while let Ok(event) = rx.recv() {
363 match event {
364 DupEvent::StartHashing => progress.set_message("Hashing files..."),
365 DupEvent::NumFiles(num) => progress.set_length(num as u64),
366 DupEvent::Result(path, size, hash) => {
367 progress.inc(1);
368 let entry = by_hash.entry(hash).or_insert_with(|| DuplicatedFiles {
369 paths: Vec::new(),
370 size,
371 });
372 assert_eq!(entry.size, size, "Hash collision: sizes do not match");
374 entry.paths.push(path);
375 }
376 DupEvent::Error => progress.inc(1),
377 }
378 }
379 });
380 progress.finish();
381
382 let mut duplicates = Vec::new();
383 for (_, mut dupes) in by_hash {
384 if dupes.paths.len() > 1 {
385 dupes.paths.sort();
386 duplicates.push(dupes);
387 }
388 }
389 Ok(duplicates)
390 }
391
392 fn find_duplicates_streaming(&self, tx: mpsc::Sender<DupEvent>) -> anyhow::Result<()> {
393 std::thread::scope(|global_scope| {
394 let (it_rx, caches) = self.stream_file_items(global_scope)?;
395 let caches = &caches;
396 let pool = crate::build_thread_pool(self.jobs)?;
397 pool.scope(move |scope| -> anyhow::Result<()> {
398 let mut by_size: HashMap<u64, DupState> = HashMap::new();
399 let mut num_hashed = 0;
400 tx.send(DupEvent::StartHashing)?;
401 for (path, dir_index) in it_rx {
402 let meta = fs::metadata(&path)?;
403 let size = meta.len();
404 if size == 0 {
405 continue;
406 }
407 let modified = meta.modified()?;
408 let cache = &caches[dir_index];
409 match by_size.entry(size) {
410 std::collections::hash_map::Entry::Occupied(mut occ) => match occ.get_mut()
411 {
412 DupState::Single(path0, modified0, dir_index0) => {
413 let cache0 = &caches[*dir_index0];
416 self.send_hash(path0, size, *modified0, cache0, &tx, scope);
417 self.send_hash(&path, size, modified, cache, &tx, scope);
418
419 *occ.get_mut() = DupState::Hashing;
421 num_hashed += 2;
422 }
423 DupState::Hashing => {
424 self.send_hash(&path, size, modified, cache, &tx, scope);
426 num_hashed += 1;
427 }
428 },
429 std::collections::hash_map::Entry::Vacant(vac) => {
430 vac.insert(DupState::Single(path, modified, dir_index));
431 }
432 }
433 }
434 tx.send(DupEvent::NumFiles(num_hashed))?;
435 Ok(())
436 })?;
437 pool.install(|| caches.into_par_iter().try_for_each(|cache| cache.save()))?;
438 Ok::<(), anyhow::Error>(())
439 })?;
440 Ok(())
441 }
442
443 fn stream_file_items<'scope, 'env>(
444 &'env self,
445 scope: &'scope std::thread::Scope<'scope, 'env>,
446 ) -> anyhow::Result<(mpsc::Receiver<FileItem>, Vec<Arc<FileHashCache>>)> {
447 let (it_tx, it_rx) = mpsc::channel();
448 let mut caches = Vec::with_capacity(self.dirs.len());
449 for (dir_index, dir) in self.dirs.iter().enumerate() {
450 let mut it = FileIterator::new(dir);
451 let cache = FileHashCache::find_or_new(dir);
452 it.cache = Some(Arc::clone(&cache));
453 it.exclude = self.exclude.as_ref();
454 let it_tx = it_tx.clone();
455 scope.spawn(move || it.send_to_as(it_tx, |path| (path, dir_index)));
456 caches.push(cache);
457 }
458 Ok((it_rx, caches))
459 }
460
461 fn send_hash<'scope>(
462 &'scope self,
463 path: &Path,
464 size: u64,
465 modified: time::SystemTime,
466 cache: &Arc<FileHashCache>,
467 tx: &mpsc::Sender<DupEvent>,
468 scope: &rayon::Scope<'scope>,
469 ) {
470 let (hash, relative) = self
471 .get_hash_from_cache(path, modified, cache)
472 .expect("path should be in cache base_dir");
473 if let Some(hash) = hash {
474 let _ = tx.send(DupEvent::Result(path.to_path_buf(), size, hash));
475 return;
476 }
477
478 let path = path.to_path_buf();
479 let relative = relative.to_path_buf();
480 let tx = tx.clone();
481 let cache = Arc::clone(cache);
482 scope.spawn(move |_| {
483 if let Ok(hash) = self.compute_hash(&path) {
484 cache.insert(&relative, modified, hash);
485 let _ = tx.send(DupEvent::Result(path, size, hash));
486 } else {
487 log::error!("Failed to hash file: {:?}", path);
488 let _ = tx.send(DupEvent::Error);
489 }
490 });
491 }
492
493 pub fn get_hash(&self, path: &Path) -> io::Result<blake3::Hash> {
495 let cache = self.cache.as_ref().expect("cache should be initialized");
496 let meta = fs::metadata(path)?;
497 let modified = meta.modified()?;
498 let (hash, relative) = self.get_hash_from_cache(path, modified, cache)?;
499 if let Some(hash) = hash {
500 return Ok(hash);
501 }
502
503 let hash = self.compute_hash(path)?;
504 cache.insert(relative, modified, hash);
505 Ok(hash)
506 }
507
508 fn get_hash_from_cache<'a>(
509 &self,
510 path: &'a Path,
511 modified: time::SystemTime,
512 cache: &FileHashCache,
513 ) -> io::Result<(Option<blake3::Hash>, &'a Path)> {
514 let relative = crate::strip_prefix(path, cache.base_dir())
515 .map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?;
516 if let Some(hash) = cache.get(relative, modified) {
517 self.num_hash_looked_up.fetch_add(1, Ordering::Relaxed);
518 return Ok((Some(hash), relative));
519 }
520 Ok((None, relative))
521 }
522
523 fn compute_hash(&self, path: &Path) -> io::Result<blake3::Hash> {
524 let start_time = time::Instant::now();
525 let mut f = fs::File::open(path)?;
526 let len = f.metadata()?.len();
527 let progress = self
528 .progress
529 .as_ref()
530 .map(|progress| progress.add_file(path, len))
531 .unwrap_or_else(Progress::none);
532 let mut hasher = blake3::Hasher::new();
533 if self.buffer_size == 0 {
534 if len > 0 {
535 let mmap = unsafe { memmap2::MmapOptions::new().map(&f)? };
536 hasher.update(&mmap[..]);
537 progress.inc(len);
538 }
539 } else {
540 let mut buf = vec![0u8; self.buffer_size];
541 loop {
542 let n = f.read(&mut buf)?;
543 if n == 0 {
544 break;
545 }
546 hasher.update(&buf[..n]);
547 progress.inc(n as u64);
548 }
549 }
550 progress.finish();
551 self.num_hashed.fetch_add(1, Ordering::Relaxed);
552 let hash = hasher.finalize();
553 log::debug!(
554 "Computed hash in {}: {:?}",
555 FormattedDuration(start_time.elapsed()),
556 path
557 );
558 Ok(hash)
559 }
560}
561
562#[derive(Clone, Debug)]
564pub struct DuplicatedFiles {
565 pub paths: Vec<PathBuf>,
566 pub size: u64,
567}
568
569impl DuplicatedFiles {
570 fn write_human(&self, mut writer: impl io::Write) -> anyhow::Result<()> {
571 writeln!(
572 writer,
573 "Identical {} files of {}:",
574 self.paths.len(),
575 crate::human_readable_size(self.size)
576 )?;
577 for path in &self.paths {
578 writeln!(writer, " {}", path.display())?;
579 }
580 Ok(())
581 }
582
583 fn write_yaml(&self, mut writer: impl io::Write) -> anyhow::Result<()> {
584 writeln!(writer, "- paths:")?;
585 for path in &self.paths {
586 writeln!(writer, " - {:?}", path)?;
587 }
588 writeln!(writer, " size: {}", self.size)?;
589 Ok(())
590 }
591
592 fn wasted_size(&self) -> u64 {
593 self.size * (self.paths.len() as u64 - 1)
594 }
595}
596
597#[cfg(test)]
598mod tests {
599 use super::*;
600
601 fn default_exclude() -> globset::GlobSet {
602 let mut builder = globset::GlobSetBuilder::new();
603 builder.add(
604 globset::GlobBuilder::new(".hash_cache")
605 .case_insensitive(true)
606 .build()
607 .unwrap(),
608 );
609 builder.build().unwrap()
610 }
611
612 #[test]
613 fn find_duplicates() -> anyhow::Result<()> {
614 let dir = tempfile::tempdir()?;
615
616 let file1_path = dir.path().join("same1.txt");
617 fs::write(&file1_path, "same content")?;
618
619 let file2_path = dir.path().join("same2.txt");
620 fs::write(&file2_path, "same content")?;
621
622 let diff_path = dir.path().join("diff.txt");
623 fs::write(&diff_path, "different content")?;
624
625 let mut hasher = FileHasher::new(&[dir.path()])?;
626 hasher.buffer_size = 8192;
627 let duplicates = hasher.find_duplicates()?;
628
629 assert_eq!(hasher.num_hashed.load(Ordering::Relaxed), 2);
630 assert_eq!(hasher.num_hash_looked_up.load(Ordering::Relaxed), 0);
631
632 assert_eq!(duplicates.len(), 1);
633 let group = &duplicates[0];
634 assert_eq!(group.paths.len(), 2);
635 assert_eq!(group.size, 12); assert!(group.paths.contains(&file1_path));
638 assert!(group.paths.contains(&file2_path));
639
640 Ok(())
641 }
642
643 #[test]
644 fn find_duplicates_merge_cache() -> anyhow::Result<()> {
645 let dir = tempfile::tempdir()?;
646 let dir_path = dir.path();
647
648 let sub_dir = dir_path.join("a").join("a");
649 fs::create_dir_all(&sub_dir)?;
650
651 let file1_path = sub_dir.join("1");
652 fs::write(&file1_path, "same content")?;
653
654 let file2_path = sub_dir.join("2");
655 fs::write(&file2_path, "same content")?;
656
657 let cache_aa_path = sub_dir.join(FileHashCache::FILE_NAME);
659 fs::File::create(&cache_aa_path)?;
660
661 let hasher_aa = FileHasher::new(&[&sub_dir])?;
663 let duplicates_aa = hasher_aa.find_duplicates()?;
664 assert_eq!(duplicates_aa.len(), 1);
665 assert!(cache_aa_path.exists());
666 assert_eq!(hasher_aa.num_hashed.load(Ordering::Relaxed), 2);
667 assert_eq!(hasher_aa.num_hash_looked_up.load(Ordering::Relaxed), 0);
668
669 let root_a = dir_path.join("a");
671 let cache_a_path = root_a.join(FileHashCache::FILE_NAME);
672 fs::File::create(&cache_a_path)?;
673
674 let hasher_a = FileHasher::new(&[&root_a])?;
676 let duplicates_a = hasher_a.find_duplicates()?;
677 assert_eq!(duplicates_a.len(), 1);
678 assert_eq!(hasher_a.num_hashed.load(Ordering::Relaxed), 0);
679 assert_eq!(hasher_a.num_hash_looked_up.load(Ordering::Relaxed), 2);
680
681 assert!(cache_a_path.exists());
683 assert!(!cache_aa_path.exists());
684
685 Ok(())
686 }
687
688 #[test]
689 fn find_duplicates_with_exclude() -> anyhow::Result<()> {
690 let dir = tempfile::tempdir()?;
691
692 let file1_path = dir.path().join("same1.txt");
693 fs::write(&file1_path, "same content")?;
694
695 let file2_path = dir.path().join("same2.txt");
696 fs::write(&file2_path, "same content")?;
697
698 let exclude_path = dir.path().join("exclude.txt");
699 fs::write(&exclude_path, "same content")?;
700
701 let mut hasher = FileHasher::new(&[dir.path()])?;
702 hasher.buffer_size = 8192;
703 let mut builder = globset::GlobSetBuilder::new();
704 builder.add(
705 globset::GlobBuilder::new("exclude.txt")
706 .case_insensitive(true)
707 .build()?,
708 );
709 let filter = builder.build()?;
710 hasher.exclude = Some(filter);
711
712 let duplicates = hasher.find_duplicates()?;
713 assert_eq!(duplicates.len(), 1);
714 let group = &duplicates[0];
715 assert_eq!(group.paths.len(), 2);
716 assert!(group.paths.contains(&file1_path));
717 assert!(group.paths.contains(&file2_path));
718 assert!(!group.paths.contains(&exclude_path));
719 Ok(())
720 }
721
722 #[test]
723 fn check_mode_empty_cache() -> anyhow::Result<()> {
724 let dir = tempfile::tempdir()?;
725 let dir_path = dir.path().to_path_buf();
726 println!("{:?}", dir_path);
727 let file1_path = dir.path().join("file1.txt");
728 fs::write(&file1_path, "content 1")?;
729 let file2_path = dir.path().join("file2.txt");
730 fs::write(&file2_path, "content 2")?;
731
732 let mut hasher = FileHasher::new(&[&dir_path])?;
733 hasher.exclude = Some(default_exclude());
734 let (tx, rx) = mpsc::channel();
735 hasher.check_streaming(tx, false)?;
736 let mut results = Vec::new();
737 let mut start_seen = false;
738 let mut total_files = None;
739 let mut file_done_count = 0;
740 let mut num_error = 0;
741 while let Ok(event) = rx.recv() {
742 match event {
743 CheckEvent::StartChecking => start_seen = true,
744 CheckEvent::TotalFiles(total) => total_files = Some(total),
745 CheckEvent::Result(path, status) => results.push((path, status)),
746 CheckEvent::FileDone => file_done_count += 1,
747 CheckEvent::Error => num_error += 1,
748 }
749 }
750 assert!(start_seen);
751 assert_eq!(total_files, Some(2));
752 assert_eq!(file_done_count, 0);
753 assert_eq!(num_error, 0);
754
755 results.sort_by(|a, b| a.0.cmp(&b.0));
756 assert_eq!(results.len(), 2);
757 assert_eq!(results[0], (PathBuf::from("file1.txt"), CheckStatus::New));
758 assert_eq!(results[1], (PathBuf::from("file2.txt"), CheckStatus::New));
759
760 assert!(!dir.path().join(FileHashCache::FILE_NAME).exists());
761 Ok(())
762 }
763
764 #[test]
765 fn check_mode_with_cache() -> anyhow::Result<()> {
766 let dir = tempfile::tempdir()?;
767 let dir_path = dir.path().to_path_buf();
768 let file1_path = dir.path().join("file1.txt");
769 let file2_path = dir.path().join("file2.txt");
770 fs::write(&file1_path, "content 1")?;
771 fs::write(&file2_path, "content 2")?;
772
773 let mut hasher = FileHasher::new_with_cache(&[&dir_path])?;
774 hasher.exclude = Some(default_exclude());
775 let _hash1 = hasher.get_hash(&file1_path)?;
776 let _hash2 = hasher.get_hash(&file2_path)?;
777 hasher.save_cache()?;
778 assert!(dir.path().join(FileHashCache::FILE_NAME).exists());
779
780 let mut hasher = FileHasher::new(&[&dir_path])?;
781 hasher.exclude = Some(default_exclude());
782 let (tx, rx) = mpsc::channel();
783 hasher.check_streaming(tx, false)?;
784 let mut results = Vec::new();
785 let mut file_done_count = 0;
786 while let Ok(event) = rx.recv() {
787 match event {
788 CheckEvent::Result(path, status) => results.push((path, status)),
789 CheckEvent::FileDone => file_done_count += 1,
790 _ => {}
791 }
792 }
793 assert_eq!(results.len(), 0);
794 assert_eq!(file_done_count, 2);
795
796 fs::write(&file1_path, "content 1 modified")?;
797
798 let file2_meta_before = fs::metadata(&file2_path)?;
799 let mtime_before = file2_meta_before.modified()?;
800 std::thread::sleep(time::Duration::from_millis(10));
801 fs::write(&file2_path, "content 2")?;
802 let file2_meta_after = fs::metadata(&file2_path)?;
803 let mtime_after = file2_meta_after.modified()?;
804 assert!(mtime_after > mtime_before);
805
806 let mut hasher = FileHasher::new(&[&dir_path])?;
807 hasher.exclude = Some(default_exclude());
808 let (tx, rx) = mpsc::channel();
809 hasher.check_streaming(tx, false)?;
810 let mut results = Vec::new();
811 let mut file_done_count = 0;
812 while let Ok(event) = rx.recv() {
813 match event {
814 CheckEvent::Result(path, status) => results.push((path, status)),
815 CheckEvent::FileDone => file_done_count += 1,
816 _ => {}
817 }
818 }
819 assert_eq!(results.len(), 1);
820 assert_eq!(
821 results[0],
822 (PathBuf::from("file1.txt"), CheckStatus::Modified)
823 );
824 assert_eq!(file_done_count, 1);
825 Ok(())
826 }
827
828 #[test]
829 fn check_update_mode() -> anyhow::Result<()> {
830 let dir = tempfile::tempdir()?;
831 let dir_path = dir.path().to_path_buf();
832 let file1_path = dir.path().join("file1.txt");
833 fs::write(&file1_path, "content 1")?;
834
835 let mut hasher = FileHasher::new(&[&dir_path])?;
836 hasher.exclude = Some(default_exclude());
837 let (tx, rx) = mpsc::channel();
838 hasher.check_streaming(tx, true)?;
839 while rx.recv().is_ok() {}
840 hasher.save_cache()?;
841 assert!(dir.path().join(FileHashCache::FILE_NAME).exists());
842
843 let cache = FileHashCache::new(&dir_path);
844 let mtime1 = fs::metadata(&file1_path)?.modified()?;
845 let hash1 = cache.get(&PathBuf::from("file1.txt"), mtime1);
846 assert!(hash1.is_some());
847
848 std::thread::sleep(time::Duration::from_millis(10));
849 fs::write(&file1_path, "content 1 modified")?;
850 let mtime1_mod = fs::metadata(&file1_path)?.modified()?;
851
852 let mut hasher = FileHasher::new(&[&dir_path])?;
853 hasher.exclude = Some(default_exclude());
854 let (tx, rx) = mpsc::channel();
855 hasher.check_streaming(tx, true)?;
856 while rx.recv().is_ok() {}
857 hasher.save_cache()?;
858
859 let cache = FileHashCache::new(&dir_path);
860 let hash_mod = cache.get(&PathBuf::from("file1.txt"), mtime1_mod);
861 assert!(hash_mod.is_some());
862 assert_ne!(hash1, hash_mod);
863
864 std::thread::sleep(time::Duration::from_millis(10));
865 fs::write(&file1_path, "content 1 modified")?;
866 let mtime1_mod2 = fs::metadata(&file1_path)?.modified()?;
867 assert!(mtime1_mod2 > mtime1_mod);
868
869 assert!(
870 cache
871 .get(&PathBuf::from("file1.txt"), mtime1_mod2)
872 .is_none()
873 );
874
875 let mut hasher = FileHasher::new(&[&dir_path])?;
876 hasher.exclude = Some(default_exclude());
877 let (tx, rx) = mpsc::channel();
878 hasher.check_streaming(tx, true)?;
879 while rx.recv().is_ok() {}
880 hasher.save_cache()?;
881
882 let cache = FileHashCache::new(&dir_path);
883 assert!(
884 cache
885 .get(&PathBuf::from("file1.txt"), mtime1_mod2)
886 .is_some()
887 );
888 Ok(())
889 }
890
891 #[test]
892 fn check_cleanup_deleted_files() -> anyhow::Result<()> {
893 let dir = tempfile::tempdir()?;
894 let dir_path = dir.path().to_path_buf();
895 let file1_path = dir.path().join("file1.txt");
896 let file2_path = dir.path().join("file2.txt");
897 fs::write(&file1_path, "content 1")?;
898 fs::write(&file2_path, "content 2")?;
899 let mtime1 = fs::metadata(&file1_path)?.modified()?;
900 let mtime2 = fs::metadata(&file2_path)?.modified()?;
901
902 let mut hasher = FileHasher::new(&[&dir_path])?;
903 hasher.exclude = Some(default_exclude());
904 let (tx, rx) = mpsc::channel();
905 hasher.check_streaming(tx, true)?;
906 while rx.recv().is_ok() {}
907 hasher.save_cache()?;
908
909 let cache = FileHashCache::new(&dir_path);
911 assert!(cache.get(&PathBuf::from("file1.txt"), mtime1).is_some());
912 assert!(cache.get(&PathBuf::from("file2.txt"), mtime2).is_some());
913
914 fs::remove_file(&file2_path)?;
916
917 let mut hasher = FileHasher::new(&[&dir_path])?;
919 hasher.exclude = Some(default_exclude());
920 let (tx, rx) = mpsc::channel();
921 hasher.check_streaming(tx, true)?;
922 while rx.recv().is_ok() {}
923 hasher.save_cache()?;
924
925 let cache = FileHashCache::new(&dir_path);
927 assert!(cache.get(&PathBuf::from("file2.txt"), mtime2).is_none());
928 assert!(cache.get(&PathBuf::from("file1.txt"), mtime1).is_some());
929 Ok(())
930 }
931
932 #[test]
933 fn find_duplicates_multiple_dirs() -> anyhow::Result<()> {
934 let tmp = tempfile::tempdir()?;
935 let dir1 = tmp.path().join("dir1");
936 let dir2 = tmp.path().join("dir2");
937 fs::create_dir(&dir1)?;
938 fs::create_dir(&dir2)?;
939 let file1_path = dir1.join("file1.txt");
940 fs::write(&file1_path, "same content")?;
941 let file2_path = dir2.join("file2.txt");
942 fs::write(&file2_path, "same content")?;
943 let hasher = FileHasher::new(&[&dir1, &dir2])?;
944 let duplicates = hasher.find_duplicates()?;
945 assert_eq!(duplicates.len(), 1);
946 let group = &duplicates[0];
947 assert_eq!(group.paths.len(), 2);
948 assert_eq!(group.size, 12);
949 assert!(group.paths.contains(&file1_path));
950 assert!(group.paths.contains(&file2_path));
951
952 Ok(())
953 }
954
955 #[test]
956 fn check_fails_with_multiple_dirs() -> anyhow::Result<()> {
957 let tmp = tempfile::tempdir()?;
958 let dir1 = tmp.path().join("dir1");
959 let dir2 = tmp.path().join("dir2");
960 fs::create_dir(&dir1)?;
961 fs::create_dir(&dir2)?;
962 let hasher = FileHasher::new(&[&dir1, &dir2])?;
963 assert!(hasher.check(false).is_err());
964 Ok(())
965 }
966}