1use std::cmp::{max, min, Reverse};
4use std::collections::HashMap;
5use std::fmt::{Display, Formatter};
6use std::hash::{Hash, Hasher};
7use std::io::{ErrorKind, Write};
8use std::ops::{Add, AddAssign};
9use std::sync::mpsc::channel;
10use std::sync::Arc;
11use std::time::SystemTime;
12use std::{fmt, fs, io};
13
14use chrono::{DateTime, FixedOffset, Local};
15use priority_queue::PriorityQueue;
16use rand::distributions::Alphanumeric;
17use rand::Rng;
18use rayon::iter::{IntoParallelIterator, ParallelBridge, ParallelIterator};
19
20use crate::config::{DedupeConfig, Priority};
21use crate::device::DiskDevices;
22use crate::file::{FileId, FileLen, FileMetadata};
23use crate::group::{FileGroup, FileSubGroup};
24use crate::lock::FileLock;
25use crate::log::{Log, LogExt};
26use crate::path::Path;
27use crate::util::{max_result, min_result, try_sort_by_key};
28use crate::{Error, TIMESTAMP_FMT};
29
30#[derive(Clone, Debug, PartialEq, Eq)]
32pub enum DedupeOp {
33 Remove,
35 Move(Arc<Path>),
37 SymbolicLink,
39 HardLink,
41 RefLink,
43}
44
45#[derive(Clone, Debug)]
47pub struct PathAndMetadata {
48 pub path: Path,
49 pub metadata: FileMetadata,
50}
51
52impl PathAndMetadata {
53 pub fn new(path: Path) -> io::Result<PathAndMetadata> {
54 let metadata = FileMetadata::new(&path).map_err(|e| {
55 io::Error::new(
56 e.kind(),
57 format!("Failed to read metadata of {}: {}", path.display(), e),
58 )
59 })?;
60 Ok(PathAndMetadata { metadata, path })
61 }
62}
63
64impl AsRef<PathAndMetadata> for PathAndMetadata {
65 fn as_ref(&self) -> &PathAndMetadata {
66 self
67 }
68}
69
70impl AsRef<Path> for PathAndMetadata {
71 fn as_ref(&self) -> &Path {
72 &self.path
73 }
74}
75
76impl AsRef<FileId> for PathAndMetadata {
77 fn as_ref(&self) -> &FileId {
78 self.metadata.as_ref()
79 }
80}
81
82impl From<PathAndMetadata> for Path {
83 fn from(value: PathAndMetadata) -> Self {
84 value.path
85 }
86}
87
88impl Display for PathAndMetadata {
89 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
90 f.pad(self.path.display().as_str())
91 }
92}
93
94#[derive(Debug)]
96pub enum FsCommand {
97 Remove {
98 file: PathAndMetadata,
99 },
100 Move {
101 source: PathAndMetadata,
102 target: Path,
103 use_rename: bool, },
105 SoftLink {
106 target: Arc<PathAndMetadata>,
107 link: PathAndMetadata,
108 },
109 HardLink {
110 target: Arc<PathAndMetadata>,
111 link: PathAndMetadata,
112 },
113 RefLink {
114 target: Arc<PathAndMetadata>,
115 link: PathAndMetadata,
116 },
117}
118
119impl FsCommand {
120 fn maybe_lock(path: &Path, lock: bool) -> io::Result<Option<FileLock>> {
122 if lock {
123 match FileLock::new(path) {
124 Ok(lock) => Ok(Some(lock)),
125 Err(e) if e.kind() == ErrorKind::Unsupported => Ok(None),
126 Err(e) => Err(e),
127 }
128 } else {
129 Ok(None)
130 }
131 }
132
133 pub fn remove(path: &Path) -> io::Result<()> {
134 fs::remove_file(path.to_path_buf()).map_err(|e| {
135 io::Error::new(
136 e.kind(),
137 format!("Failed to remove file {}: {}", path.display(), e),
138 )
139 })
140 }
141
142 #[cfg(unix)]
143 fn symlink_internal(target: &std::path::Path, link: &std::path::Path) -> io::Result<()> {
144 std::os::unix::fs::symlink(target, link)
145 }
146
147 #[cfg(windows)]
148 fn symlink_internal(target: &std::path::Path, link: &std::path::Path) -> io::Result<()> {
149 std::os::windows::fs::symlink_file(target, link)
150 }
151
152 fn symlink(target: &Path, link: &Path) -> io::Result<()> {
153 Self::symlink_internal(&target.to_path_buf(), &link.to_path_buf()).map_err(|e| {
154 io::Error::new(
155 e.kind(),
156 format!(
157 "Failed to create symbolic link {} -> {}: {}",
158 link.display(),
159 target.display(),
160 e
161 ),
162 )
163 })
164 }
165
166 fn hardlink(target: &Path, link: &Path) -> io::Result<()> {
167 fs::hard_link(target.to_path_buf(), link.to_path_buf()).map_err(|e| {
168 io::Error::new(
169 e.kind(),
170 format!(
171 "Failed to create hard link {} -> {}: {}",
172 link.display(),
173 target.display(),
174 e
175 ),
176 )
177 })
178 }
179
180 fn check_can_rename(source: &Path, target: &Path) -> io::Result<()> {
181 if target.to_path_buf().exists() {
182 return Err(io::Error::new(
183 ErrorKind::AlreadyExists,
184 format!(
185 "Cannot move {} to {}: Target already exists",
186 source.display(),
187 target.display()
188 ),
189 ));
190 }
191 Ok(())
192 }
193
194 fn mkdirs(path: &Path) -> io::Result<()> {
195 fs::create_dir_all(path.to_path_buf()).map_err(|e| {
196 io::Error::new(
197 e.kind(),
198 format!("Failed to create directory {}: {}", path.display(), e),
199 )
200 })
201 }
202
203 pub fn unsafe_rename(source: &Path, target: &Path) -> io::Result<()> {
206 fs::rename(source.to_path_buf(), target.to_path_buf()).map_err(|e| {
207 io::Error::new(
208 e.kind(),
209 format!(
210 "Failed to rename file from {} to {}: {}",
211 source.display(),
212 target.display(),
213 e
214 ),
215 )
216 })
217 }
218
219 fn unsafe_copy(source: &Path, target: &Path) -> io::Result<()> {
222 fs::copy(source.to_path_buf(), target.to_path_buf()).map_err(|e| {
223 io::Error::new(
224 e.kind(),
225 format!(
226 "Failed to copy file from {} to {}: {}",
227 source.display(),
228 target.display(),
229 e
230 ),
231 )
232 })?;
233 Ok(())
234 }
235
236 fn move_rename(source: &Path, target: &Path) -> io::Result<()> {
239 Self::check_can_rename(source, target)?;
240 Self::mkdirs(target.parent().unwrap())?;
241 Self::unsafe_rename(source, target)?;
242 Ok(())
243 }
244
245 fn move_copy(source: &Path, target: &Path) -> io::Result<()> {
248 Self::check_can_rename(source, target)?;
249 Self::mkdirs(target.parent().unwrap())?;
250 Self::unsafe_copy(source, target)?;
251 Self::remove(source)?;
252 Ok(())
253 }
254
255 pub fn temp_file(path: &Path) -> Path {
258 let mut name = path
259 .file_name()
260 .expect("must be a regular file with a name");
261 name.push(".");
262 name.push(
263 rand::thread_rng()
264 .sample_iter(&Alphanumeric)
265 .take(24)
266 .map(char::from)
267 .collect::<String>(),
268 );
269 match path.parent() {
270 Some(parent) => parent.join(Path::from(name)),
271 None => Path::from(name),
272 }
273 }
274
275 pub fn safe_remove<R>(
279 path: &Path,
280 f: impl FnOnce(&Path) -> io::Result<R>,
281 log: &dyn Log,
282 ) -> io::Result<R> {
283 let tmp = Self::temp_file(path);
284 Self::unsafe_rename(path, &tmp)?;
285 let result = match f(path) {
286 Ok(result) => result,
287 Err(e) => {
288 if let Err(remove_err) = Self::unsafe_rename(&tmp, path) {
290 log.warn(format!(
291 "Failed to undo move from {} to {}: {}",
292 &path.display(),
293 &tmp.display(),
294 remove_err
295 ))
296 }
297 return Err(e);
298 }
299 };
300 if let Err(e) = Self::remove(&tmp) {
302 log.warn(format!(
303 "Failed to remove temporary {}: {}",
304 &tmp.display(),
305 e
306 ))
307 }
308 Ok(result)
309 }
310
311 pub fn execute(&self, should_lock: bool, log: &dyn Log) -> io::Result<FileLen> {
313 match self {
314 FsCommand::Remove { file } => {
315 let _ = Self::maybe_lock(&file.path, should_lock)?;
316 Self::remove(&file.path)?;
317 Ok(file.metadata.len())
318 }
319 FsCommand::SoftLink { target, link } => {
320 let _ = Self::maybe_lock(&link.path, should_lock)?;
321 Self::safe_remove(&link.path, |link| Self::symlink(&target.path, link), log)?;
322 Ok(link.metadata.len())
323 }
324 FsCommand::HardLink { target, link } => {
325 let _ = Self::maybe_lock(&link.path, should_lock)?;
326 Self::safe_remove(&link.path, |link| Self::hardlink(&target.path, link), log)?;
327 Ok(link.metadata.len())
328 }
329 FsCommand::RefLink { target, link } => {
330 let _ = Self::maybe_lock(&link.path, should_lock)?;
331 crate::reflink::reflink(target, link, log)?;
332 Ok(link.metadata.len())
333 }
334 FsCommand::Move {
335 source,
336 target,
337 use_rename,
338 } => {
339 let _ = Self::maybe_lock(&source.path, should_lock);
340 let len = source.metadata.len();
341 if *use_rename && Self::move_rename(&source.path, target).is_ok() {
342 return Ok(len);
343 }
344 Self::move_copy(&source.path, target)?;
345 Ok(len)
346 }
347 }
348 }
349
350 pub fn file_to_remove(&self) -> &Path {
354 match self {
355 FsCommand::Remove { file, .. }
356 | FsCommand::SoftLink { link: file, .. }
357 | FsCommand::HardLink { link: file, .. }
358 | FsCommand::RefLink { link: file, .. }
359 | FsCommand::Move { source: file, .. } => &file.path,
360 }
361 }
362
363 pub fn space_to_reclaim(&self) -> FileLen {
365 match self {
366 FsCommand::Remove { file, .. }
367 | FsCommand::SoftLink { link: file, .. }
368 | FsCommand::HardLink { link: file, .. }
369 | FsCommand::RefLink { link: file, .. }
370 | FsCommand::Move { source: file, .. } => file.metadata.len(),
371 }
372 }
373
374 #[cfg(unix)]
376 pub fn to_shell_str(&self) -> Vec<String> {
377 let mut result = Vec::new();
378 match self {
379 FsCommand::Remove { file, .. } => {
380 let path = file.path.quote();
381 result.push(format!("rm {path}"));
382 }
383 FsCommand::SoftLink { target, link, .. } => {
384 let tmp = Self::temp_file(&link.path);
385 let target = target.path.quote();
386 let link = link.path.quote();
387 result.push(format!("mv {} {}", link, tmp.quote()));
388 result.push(format!("ln -s {target} {link}"));
389 result.push(format!("rm {}", tmp.quote()));
390 }
391 FsCommand::HardLink { target, link, .. } => {
392 let tmp = Self::temp_file(&link.path);
393 let target = target.path.quote();
394 let link = link.path.quote();
395 result.push(format!("mv {} {}", link, tmp.quote()));
396 result.push(format!("ln {target} {link}"));
397 result.push(format!("rm {}", tmp.quote()));
398 }
399 FsCommand::RefLink { target, link, .. } => {
400 let tmp = Self::temp_file(&link.path);
401 let target = target.path.quote();
402 let link = link.path.quote();
403 result.push(format!("mv {} {}", link, tmp.quote()));
405 if cfg!(target_os = "macos") {
406 result.push(format!("cp -c {target} {link}"));
407 } else {
408 result.push(format!("cp --reflink=always {target} {link}"));
409 };
410 result.push(format!("rm {}", tmp.quote()));
411 }
412 FsCommand::Move {
413 source,
414 target,
415 use_rename,
416 } => {
417 let source = source.path.quote();
418 let target = target.quote();
419 if *use_rename {
420 result.push(format!("mv {} {}", &source, &target));
421 } else {
422 result.push(format!("cp {} {}", &source, &target));
423 result.push(format!("rm {}", &source));
424 }
425 }
426 }
427 result
428 }
429
430 #[cfg(windows)]
431 pub fn to_shell_str(&self) -> Vec<String> {
432 let mut result = Vec::new();
433 match self {
434 FsCommand::Remove { file, .. } => {
435 let path = file.path.quote();
436 result.push(format!("del {}", path));
437 }
438 FsCommand::SoftLink { target, link, .. } => {
439 let tmp = Self::temp_file(&link.path);
440 let target = target.path.quote();
441 let link = link.path.quote();
442 result.push(format!("move {} {}", link, tmp.quote()));
443 result.push(format!("mklink {} {}", target, link));
444 result.push(format!("del {}", tmp.quote()));
445 }
446 FsCommand::HardLink { target, link, .. } => {
447 let tmp = Self::temp_file(&link.path);
448 let target = target.path.quote();
449 let link = link.path.quote();
450 result.push(format!("move {} {}", link, tmp.quote()));
451 result.push(format!("mklink /H {} {}", target, link));
452 result.push(format!("del {}", tmp.quote()));
453 }
454 FsCommand::RefLink { target, link, .. } => {
455 result.push(format!(":: deduplicate {} {}", link, target));
456 }
457 FsCommand::Move {
458 source,
459 target,
460 use_rename,
461 } => {
462 let source = source.path.quote();
463 let target = target.quote();
464 if *use_rename {
465 result.push(format!("move {} {}", &source, &target));
466 } else {
467 result.push(format!("copy {} {}", &source, &target));
468 result.push(format!("del {}", &source));
469 }
470 }
471 }
472 result
473 }
474}
475
476#[derive(Default)]
478pub struct DedupeResult {
479 pub processed_count: u64,
480 pub reclaimed_space: FileLen,
481}
482
483impl Add<DedupeResult> for DedupeResult {
484 type Output = DedupeResult;
485
486 fn add(self, rhs: Self) -> Self::Output {
487 DedupeResult {
488 processed_count: self.processed_count + rhs.processed_count,
489 reclaimed_space: self.reclaimed_space + rhs.reclaimed_space,
490 }
491 }
492}
493
494impl AddAssign for DedupeResult {
495 fn add_assign(&mut self, rhs: Self) {
496 self.processed_count += rhs.processed_count;
497 self.reclaimed_space += rhs.reclaimed_space;
498 }
499}
500
501fn was_modified(files: &[PathAndMetadata], after: DateTime<FixedOffset>, log: &dyn Log) -> bool {
504 let mut result = false;
505 let after: DateTime<Local> = after.into();
506 for PathAndMetadata {
507 path: p,
508 metadata: m,
509 ..
510 } in files.iter()
511 {
512 match m.modified() {
513 Ok(file_timestamp) => {
514 let file_timestamp: DateTime<Local> = file_timestamp.into();
515 if file_timestamp > after {
516 log.warn(format!(
517 "File {} was updated after {} (at {})",
518 p.display(),
519 after.format(TIMESTAMP_FMT),
520 file_timestamp.format(TIMESTAMP_FMT)
521 ));
522 result = true;
523 }
524 }
525 Err(e) => {
526 log.warn(format!(
527 "Failed to read modification time of file {}: {}",
528 p.display(),
529 e
530 ));
531 result = true;
532 }
533 }
534 }
535 result
536}
537
538fn should_keep(path: &Path, config: &DedupeConfig) -> bool {
540 let matches_any_name = config
541 .keep_name_patterns
542 .iter()
543 .any(|p| match path.file_name_cstr() {
544 Some(name) => p.matches(name.to_string_lossy().as_ref()),
545 None => false,
546 });
547 let matches_any_path = || {
548 config
549 .keep_path_patterns
550 .iter()
551 .any(|p| p.matches_path(&path.to_path_buf()))
552 };
553
554 matches_any_name || matches_any_path()
555}
556
557fn may_drop(path: &Path, config: &DedupeConfig) -> bool {
560 let matches_any_name = || {
561 config
562 .name_patterns
563 .iter()
564 .any(|p| match path.file_name_cstr() {
565 Some(name) => p.matches(name.to_string_lossy().as_ref()),
566 None => false,
567 })
568 };
569 let matches_any_path = || {
570 config
571 .path_patterns
572 .iter()
573 .any(|p| p.matches_path(&path.to_path_buf()))
574 };
575
576 (config.name_patterns.is_empty() && config.path_patterns.is_empty())
577 || matches_any_name()
578 || matches_any_path()
579}
580
581impl<P: AsRef<PathAndMetadata>> FileSubGroup<P> {
582 pub fn created(&self) -> Result<SystemTime, Error> {
584 Ok(min_result(self.files.iter().map(|f| {
585 let f = f.as_ref();
586 f.metadata.created().map_err(|e| {
587 format!(
588 "Failed to read creation time of file {}: {}",
589 f.path.display(),
590 e
591 )
592 })
593 }))?
594 .unwrap())
595 }
596
597 pub fn modified(&self) -> Result<SystemTime, Error> {
599 Ok(max_result(self.files.iter().map(|f| {
600 let f = f.as_ref();
601 f.metadata.modified().map_err(|e| {
602 format!(
603 "Failed to read modification time of file {}: {}",
604 f.path.display(),
605 e
606 )
607 })
608 }))?
609 .unwrap())
610 }
611
612 pub fn accessed(&self) -> Result<SystemTime, Error> {
614 Ok(max_result(self.files.iter().map(|f| {
615 let f = f.as_ref();
616 f.metadata.accessed().map_err(|e| {
617 format!(
618 "Failed to read access time of file {}: {}",
619 f.path.display(),
620 e
621 )
622 })
623 }))?
624 .unwrap())
625 }
626
627 #[cfg(unix)]
629 pub fn status_changed(&self) -> Result<(i64, i64), Error> {
630 use std::os::unix::fs::MetadataExt;
631 Ok(self
632 .files
633 .iter()
634 .map(|f| {
635 let f = f.as_ref();
636 (f.metadata.ctime(), f.metadata.ctime_nsec())
637 })
638 .max()
639 .unwrap())
640 }
641
642 pub fn should_keep(&self, config: &DedupeConfig) -> bool {
644 self.files
645 .iter()
646 .any(|f| should_keep(&f.as_ref().path, config))
647 }
648
649 pub fn may_drop(&self, config: &DedupeConfig) -> bool {
651 self.files
652 .iter()
653 .all(|f| may_drop(&f.as_ref().path, config))
654 }
655
656 pub fn min_nesting(&self) -> usize {
658 self.files
659 .iter()
660 .map(|f| f.as_ref().path.component_count())
661 .min()
662 .unwrap()
663 }
664
665 pub fn max_nesting(&self) -> usize {
667 self.files
668 .iter()
669 .map(|f| f.as_ref().path.component_count())
670 .max()
671 .unwrap()
672 }
673}
674
675pub fn sort_by_priority<P>(files: &mut [FileSubGroup<P>], priority: &Priority) -> Vec<Error>
680where
681 P: AsRef<PathAndMetadata>,
682{
683 match priority {
684 Priority::Top => {
685 files.reverse();
686 vec![]
687 }
688 Priority::Bottom => vec![],
689 Priority::Newest => try_sort_by_key(files, |m| m.created()),
690 Priority::Oldest => try_sort_by_key(files, |m| m.created().map(Reverse)),
691 Priority::MostRecentlyModified => try_sort_by_key(files, |m| m.modified()),
692 Priority::LeastRecentlyModified => try_sort_by_key(files, |m| m.modified().map(Reverse)),
693 Priority::MostRecentlyAccessed => try_sort_by_key(files, |m| m.accessed()),
694 Priority::LeastRecentlyAccessed => try_sort_by_key(files, |m| m.accessed().map(Reverse)),
695 #[cfg(unix)]
696 Priority::MostRecentStatusChange => try_sort_by_key(files, |m| m.status_changed()),
697 #[cfg(unix)]
698 Priority::LeastRecentStatusChange => {
699 try_sort_by_key(files, |m| m.status_changed().map(Reverse))
700 }
701 Priority::MostNested => {
702 files.sort_by_key(|m| m.max_nesting());
703 vec![]
704 }
705 Priority::LeastNested => {
706 files.sort_by_key(|m| Reverse(m.min_nesting()));
707 vec![]
708 }
709 }
710}
711
712#[derive(Debug)]
713pub struct PartitionedFileGroup {
714 pub to_keep: Vec<PathAndMetadata>,
715 pub to_drop: Vec<PathAndMetadata>,
716}
717
718impl PartitionedFileGroup {
719 fn move_target(target_dir: &Arc<Path>, source_path: &Path) -> Path {
722 let root = source_path
723 .root()
724 .map(|p| p.to_string_lossy().replace(['/', '\\', ':'], ""));
725 let suffix = source_path.strip_root();
726 match root {
727 None => target_dir.join(suffix),
728 Some(root) => Arc::new(target_dir.join(Path::from(root))).join(suffix),
729 }
730 }
731
732 fn are_on_same_mount(devices: &DiskDevices, file1: &Path, file2: &Path) -> bool {
733 let mount1 = devices.get_mount_point(file1);
734 let mount2 = devices.get_mount_point(file2);
735 mount1 == mount2
736 }
737
738 pub fn dedupe_script(mut self, strategy: &DedupeOp, devices: &DiskDevices) -> Vec<FsCommand> {
740 if self.to_drop.is_empty() {
741 return vec![];
742 }
743 assert!(
744 !self.to_keep.is_empty(),
745 "No files would be left after deduplicating"
746 );
747 let mut commands = Vec::new();
748 let retained_file = Arc::new(self.to_keep.swap_remove(0));
749 for dropped_file in self.to_drop {
750 match strategy {
751 DedupeOp::SymbolicLink => commands.push(FsCommand::SoftLink {
752 target: retained_file.clone(),
753 link: dropped_file,
754 }),
755 DedupeOp::HardLink => commands.push(FsCommand::HardLink {
756 target: retained_file.clone(),
757 link: dropped_file,
758 }),
759 DedupeOp::RefLink => commands.push(FsCommand::RefLink {
760 target: retained_file.clone(),
761 link: dropped_file,
762 }),
763 DedupeOp::Remove => commands.push(FsCommand::Remove { file: dropped_file }),
764 DedupeOp::Move(target_dir) => {
765 let source = dropped_file;
766 let source_path = &source.path;
767 let use_rename = Self::are_on_same_mount(devices, source_path, target_dir);
768 let target = Self::move_target(target_dir, source_path);
769 commands.push(FsCommand::Move {
770 source,
771 target,
772 use_rename,
773 })
774 }
775 }
776 }
777 commands
778 }
779}
780
781fn fetch_files_metadata<P>(group: FileGroup<P>, log: &dyn Log) -> Option<FileGroup<PathAndMetadata>>
784where
785 P: Into<Path>,
786{
787 group
788 .try_map_all(|p| {
789 PathAndMetadata::new(p.into()).map_err(|e| {
790 log.warn(&e);
791 })
792 })
793 .ok()
794}
795
796fn partition(
799 group: FileGroup<PathAndMetadata>,
800 config: &DedupeConfig,
801 log: &dyn Log,
802) -> Result<PartitionedFileGroup, Error> {
803 let file_len = group.file_len;
804 let file_hash = group.file_hash.clone();
805 let mut files = group.files;
806
807 let error = |msg: &str| {
808 Err(Error::from(format!(
809 "Could not determine files to drop in group with hash {} and len {}: {}",
810 file_hash, file_len.0, msg
811 )))
812 };
813
814 files.retain(|m| {
816 let is_file = m.metadata.is_file();
817 if !is_file {
818 log.warn(format!(
819 "Skipping file {}: Not a regular file",
820 m.path.display()
821 ));
822 }
823 is_file
824 });
825
826 if !config.no_check_size {
829 files.retain(|m| {
830 let len_ok = m.metadata.len() == file_len;
831 if !len_ok {
832 log.warn(format!(
833 "Skipping file {} with length {} different than the group length {}",
834 m.path.display(),
835 m.metadata.len(),
836 file_len.0,
837 ));
838 }
839 len_ok
840 });
841 }
842
843 if let Some(max_timestamp) = config.modified_before {
846 if was_modified(&files, max_timestamp, log) {
847 return error("Some files could be updated since the previous run of fclones");
848 }
849 }
850
851 let mut file_sub_groups =
852 FileSubGroup::group(files, &config.isolated_roots, !config.match_links);
853
854 let mut sort_errors = Vec::new();
859 for priority in config.priority.iter().rev() {
860 sort_errors.extend(sort_by_priority(&mut file_sub_groups, priority));
861 }
862
863 if !sort_errors.is_empty() {
864 for e in sort_errors {
865 log.warn(e);
866 }
867 return error("Metadata of some files could not be read.");
868 }
869
870 let (mut to_retain, mut to_drop): (Vec<_>, Vec<_>) = file_sub_groups
873 .into_iter()
874 .partition(|m| m.should_keep(config) || !m.may_drop(config));
875
876 let n = max(1, config.rf_over.unwrap_or(1));
879 let missing_count = min(to_drop.len(), n.saturating_sub(to_retain.len()));
880 to_retain.extend(to_drop.drain(0..missing_count));
881
882 assert!(to_retain.len() >= n || to_drop.is_empty());
883 Ok(PartitionedFileGroup {
884 to_keep: to_retain.into_iter().flat_map(|g| g.files).collect(),
885 to_drop: to_drop.into_iter().flat_map(|g| g.files).collect(),
886 })
887}
888
889pub fn dedupe<'a, I, P>(
916 groups: I,
917 op: DedupeOp,
918 config: &'a DedupeConfig,
919 log: &'a dyn Log,
920) -> impl ParallelIterator<Item = (usize, Vec<FsCommand>)> + 'a
921where
922 I: IntoIterator<Item = FileGroup<P>> + 'a,
923 I::IntoIter: Send,
924 P: Into<Path> + AsRef<Path> + fmt::Debug + Send + 'a,
925{
926 let devices = DiskDevices::new(&HashMap::new());
927 let disallow_cross_device = op == DedupeOp::HardLink || op == DedupeOp::RefLink;
928 groups
929 .into_iter()
930 .enumerate()
931 .par_bridge()
932 .map(move |(i, group)| {
933 let mut commands = Vec::new();
934 if let Some(group) = fetch_files_metadata(group, log) {
935 let groups = if disallow_cross_device {
936 group.partition_by_key(|p| p.metadata.device_id())
937 } else {
938 vec![group]
939 };
940 for group in groups {
941 match partition(group, config, log) {
942 Ok(group) => commands.extend(group.dedupe_script(&op, &devices)),
943 Err(e) => log.warn(e),
944 }
945 }
946 }
947 (i, commands)
948 })
949}
950
951pub fn run_script<I>(script: I, should_lock: bool, log: &dyn Log) -> DedupeResult
960where
961 I: IntoParallelIterator<Item = (usize, Vec<FsCommand>)>,
962{
963 script
964 .into_par_iter()
965 .flat_map(|(_, cmd_vec)| cmd_vec)
966 .map(|cmd| cmd.execute(should_lock, log))
967 .inspect(|res| {
968 if let Err(e) = res {
969 log.warn(e);
970 }
971 })
972 .filter_map(|res| res.ok())
973 .map(|len| DedupeResult {
974 processed_count: 1,
975 reclaimed_space: len,
976 })
977 .reduce(DedupeResult::default, |a, b| a + b)
978}
979
980struct FsCommandGroup {
982 index: usize,
983 commands: Vec<FsCommand>,
984}
985
986impl FsCommandGroup {
987 pub fn new(index: usize, commands: Vec<FsCommand>) -> FsCommandGroup {
988 FsCommandGroup { index, commands }
989 }
990}
991
992impl PartialEq<Self> for FsCommandGroup {
993 fn eq(&self, other: &Self) -> bool {
994 self.index == other.index
995 }
996}
997
998impl Eq for FsCommandGroup {}
999
1000impl Hash for FsCommandGroup {
1001 fn hash<H: Hasher>(&self, state: &mut H) {
1002 self.index.hash(state)
1003 }
1004}
1005
1006pub fn log_script(
1012 script: impl IntoParallelIterator<Item = (usize, Vec<FsCommand>)> + Send,
1013 mut out: impl Write + Send,
1014) -> io::Result<DedupeResult> {
1015 let (tx, rx) = channel();
1024
1025 crossbeam_utils::thread::scope(move |s| {
1029 s.spawn(move |_| {
1032 script
1033 .into_par_iter()
1034 .for_each_with(tx, |tx, item| tx.send(item).unwrap())
1035 });
1036
1037 let mut queue = PriorityQueue::new();
1038 let mut next_group_index = 0;
1039 let mut processed_count = 0;
1040 let mut reclaimed_space = FileLen(0);
1041
1042 while let Ok((group_index, commands)) = rx.recv() {
1043 queue.push(
1046 FsCommandGroup::new(group_index, commands),
1047 Reverse(group_index), );
1049
1050 while let Some((group, _)) = queue.peek() {
1052 if group.index != next_group_index {
1056 break;
1057 }
1058 next_group_index += 1;
1060 let cmd_vec = queue.pop().unwrap().0.commands;
1061 for cmd in cmd_vec {
1062 processed_count += 1;
1063 reclaimed_space += cmd.space_to_reclaim();
1064 for line in cmd.to_shell_str() {
1065 writeln!(out, "{line}")?;
1066 }
1067 }
1068 }
1069 }
1070
1071 Ok(DedupeResult {
1072 processed_count,
1073 reclaimed_space,
1074 })
1075 })
1076 .unwrap()
1077}
1078
1079#[cfg(test)]
1080mod test {
1081 use std::collections::HashSet;
1082 use std::default::Default;
1083 use std::fs::{create_dir, create_dir_all};
1084 use std::path::PathBuf;
1085 use std::str::FromStr;
1086 use std::{thread, time};
1087
1088 use chrono::Duration;
1089 use itertools::Itertools;
1090
1091 use crate::config::GroupConfig;
1092 use crate::file::FileHash;
1093 use crate::group_files;
1094 use crate::log::StdLog;
1095 use crate::pattern::Pattern;
1096 use crate::util::test::{create_file, create_file_newer_than, read_file, with_dir, write_file};
1097
1098 use super::*;
1099
1100 #[test]
1101 fn test_temp_file_name_generation() {
1102 let path = Path::from("/foo/bar");
1103 let temp = FsCommand::temp_file(&path);
1104 assert_ne!(path, temp);
1105 assert_ne!(
1106 path.file_name().unwrap().len(),
1107 temp.file_name().unwrap().len()
1108 );
1109 assert_eq!(path.parent(), temp.parent());
1110 }
1111
1112 #[test]
1113 fn test_remove_command_removes_file() {
1114 with_dir("dedupe/remove_cmd", |root| {
1115 let log = StdLog::new();
1116 let file_path = root.join("file");
1117 create_file(&file_path);
1118 let file = PathAndMetadata::new(Path::from(&file_path)).unwrap();
1119 let cmd = FsCommand::Remove { file };
1120 cmd.execute(true, &log).unwrap();
1121 assert!(!file_path.exists())
1122 })
1123 }
1124
1125 #[test]
1126 fn test_move_command_moves_file_by_rename() {
1127 with_dir("dedupe/move_rename_cmd", |root| {
1128 let log = StdLog::new();
1129 let file_path = root.join("file");
1130 let target = Path::from(root.join("target"));
1131 create_file(&file_path);
1132 let file = PathAndMetadata::new(Path::from(&file_path)).unwrap();
1133 let cmd = FsCommand::Move {
1134 source: file,
1135 target: target.clone(),
1136 use_rename: true,
1137 };
1138 cmd.execute(true, &log).unwrap();
1139 assert!(!file_path.exists());
1140 assert!(target.to_path_buf().exists());
1141 })
1142 }
1143
1144 #[test]
1145 fn test_move_command_moves_file_by_copy() {
1146 with_dir("dedupe/move_copy_cmd", |root| {
1147 let log = StdLog::new();
1148 let file_path = root.join("file");
1149 let target = Path::from(root.join("target"));
1150 create_file(&file_path);
1151 let file = PathAndMetadata::new(Path::from(&file_path)).unwrap();
1152 let cmd = FsCommand::Move {
1153 source: file,
1154 target: target.clone(),
1155 use_rename: false,
1156 };
1157 cmd.execute(true, &log).unwrap();
1158 assert!(!file_path.exists());
1159 assert!(target.to_path_buf().exists());
1160 })
1161 }
1162
1163 #[test]
1164 fn test_move_fails_if_target_exists() {
1165 with_dir("dedupe/move_target_exists", |root| {
1166 let log = StdLog::new();
1167 let file_path = root.join("file");
1168 let target = root.join("target");
1169 create_file(&file_path);
1170 create_file(&target);
1171 let file = PathAndMetadata::new(Path::from(&file_path)).unwrap();
1172 let cmd = FsCommand::Move {
1173 source: file,
1174 target: Path::from(&target),
1175 use_rename: false,
1176 };
1177 assert!(cmd.execute(true, &log).is_err());
1178 })
1179 }
1180
1181 #[test]
1182 fn test_soft_link_command_replaces_file_with_a_link() {
1183 with_dir("dedupe/soft_link_cmd", |root| {
1184 let log = StdLog::new();
1185 let file_path_1 = root.join("file_1");
1186 let file_path_2 = root.join("file_2");
1187 write_file(&file_path_1, "foo");
1188 write_file(&file_path_2, "");
1189
1190 let file_1 = PathAndMetadata::new(Path::from(&file_path_1)).unwrap();
1191 let file_2 = PathAndMetadata::new(Path::from(&file_path_2)).unwrap();
1192 let cmd = FsCommand::SoftLink {
1193 target: Arc::new(file_1),
1194 link: file_2,
1195 };
1196 cmd.execute(true, &log).unwrap();
1197
1198 assert!(file_path_1.exists());
1199 assert!(file_path_2.exists());
1200 assert!(fs::symlink_metadata(&file_path_2)
1201 .unwrap()
1202 .file_type()
1203 .is_symlink());
1204 assert_eq!(read_file(&file_path_2), "foo");
1205 })
1206 }
1207
1208 #[test]
1209 fn test_hard_link_command_replaces_file_with_a_link() {
1210 with_dir("dedupe/hard_link_cmd", |root| {
1211 let log = StdLog::new();
1212 let file_path_1 = root.join("file_1");
1213 let file_path_2 = root.join("file_2");
1214
1215 write_file(&file_path_1, "foo");
1216 write_file(&file_path_2, "");
1217
1218 let file_1 = PathAndMetadata::new(Path::from(&file_path_1)).unwrap();
1219 let file_2 = PathAndMetadata::new(Path::from(&file_path_2)).unwrap();
1220 let cmd = FsCommand::HardLink {
1221 target: Arc::new(file_1),
1222 link: file_2,
1223 };
1224 cmd.execute(true, &log).unwrap();
1225
1226 assert!(file_path_1.exists());
1227 assert!(file_path_2.exists());
1228 assert_eq!(read_file(&file_path_2), "foo");
1229 })
1230 }
1231
1232 fn make_group(root: &PathBuf, file_hash: FileHash) -> FileGroup<Path> {
1234 create_dir_all(root).unwrap();
1235 let file_1 = root.join("file_1");
1236 let file_2 = root.join("file_2");
1237 let file_3 = root.join("file_3");
1238 create_file(&file_1);
1239 let ctime_1 = fs::metadata(&file_1).unwrap().modified().unwrap();
1240 let ctime_2 = create_file_newer_than(&file_2, ctime_1);
1241 create_file_newer_than(&file_3, ctime_2);
1242
1243 FileGroup {
1244 file_len: FileLen(0),
1245 file_hash,
1246 files: vec![
1247 Path::from(&file_1),
1248 Path::from(&file_2),
1249 Path::from(&file_3),
1250 ],
1251 }
1252 }
1253
1254 #[test]
1255 fn test_partition_selects_files_for_removal() {
1256 with_dir("dedupe/partition/basic", |root| {
1257 let group = make_group(root, FileHash::from_str("00").unwrap());
1258 let group = group.map(|p| PathAndMetadata::new(p).unwrap());
1259 let config = DedupeConfig::default();
1260 let partitioned = partition(group, &config, &StdLog::new()).unwrap();
1261 assert_eq!(partitioned.to_keep.len(), 1);
1262 assert_eq!(partitioned.to_drop.len(), 2);
1263 })
1264 }
1265
1266 #[test]
1267 fn test_partition_bails_out_if_file_modified_too_late() {
1268 with_dir("dedupe/partition/modification", |root| {
1269 let group = make_group(root, FileHash::from_str("00").unwrap());
1270 let group = group.map(|p| PathAndMetadata::new(p).unwrap());
1271 let config = DedupeConfig {
1272 modified_before: Some(DateTime::from(Local::now() - Duration::days(1))),
1273 ..DedupeConfig::default()
1274 };
1275 let partitioned = partition(group, &config, &StdLog::new());
1276 assert!(partitioned.is_err());
1277 })
1278 }
1279
1280 #[test]
1281 fn test_partition_skips_file_with_different_len() {
1282 with_dir("dedupe/partition/file_len", |root| {
1283 let group = make_group(root, FileHash::from_str("00").unwrap());
1284 let path = group.files[0].clone();
1285 write_file(&path.to_path_buf(), "foo");
1286 let config = DedupeConfig {
1287 priority: vec![Priority::MostRecentlyModified],
1288 ..DedupeConfig::default()
1289 };
1290 let group = group.map(|p| PathAndMetadata::new(p).unwrap());
1291 let partitioned = partition(group, &config, &StdLog::new()).unwrap();
1292 assert!(!partitioned.to_drop.iter().any(|m| m.path == path));
1293 assert!(!partitioned.to_keep.iter().any(|m| m.path == path));
1294 })
1295 }
1296
1297 fn path_set(v: &[PathAndMetadata]) -> HashSet<&Path> {
1298 v.iter().map(|f| &f.path).collect()
1299 }
1300
1301 #[test]
1302 fn test_partition_respects_creation_time_priority() {
1303 with_dir("dedupe/partition/ctime_priority", |root| {
1304 if fs::metadata(root).unwrap().created().is_err() {
1305 return;
1308 }
1309 let group = make_group(root, FileHash::from_str("00").unwrap());
1310 let mut config = DedupeConfig {
1311 priority: vec![Priority::Newest],
1312 ..DedupeConfig::default()
1313 };
1314 let group = group.map(|p| PathAndMetadata::new(p).unwrap());
1315 let partitioned_1 = partition(group.clone(), &config, &StdLog::new()).unwrap();
1316 config.priority = vec![Priority::Oldest];
1317 let partitioned_2 = partition(group, &config, &StdLog::new()).unwrap();
1318
1319 assert_ne!(
1320 path_set(&partitioned_1.to_keep),
1321 path_set(&partitioned_2.to_keep)
1322 );
1323 assert_ne!(
1324 path_set(&partitioned_1.to_drop),
1325 path_set(&partitioned_2.to_drop)
1326 );
1327 });
1328 }
1329
1330 #[test]
1331 fn test_partition_respects_modification_time_priority() {
1332 with_dir("dedupe/partition/mtime_priority", |root| {
1333 let group = make_group(root, FileHash::from_str("00").unwrap());
1334
1335 thread::sleep(time::Duration::from_millis(10));
1336 let path = group.files[0].clone();
1337 write_file(&path.to_path_buf(), "foo");
1338
1339 let group = group.map(|p| PathAndMetadata::new(p).unwrap());
1341 let config = DedupeConfig {
1342 priority: vec![Priority::MostRecentlyModified],
1343 ..DedupeConfig::default()
1344 };
1345 let partitioned_1 = partition(group.clone(), &config, &StdLog::new()).unwrap();
1346
1347 let config = DedupeConfig {
1348 priority: vec![Priority::LeastRecentlyModified],
1349 ..DedupeConfig::default()
1350 };
1351 let partitioned_2 = partition(group, &config, &StdLog::new()).unwrap();
1352
1353 assert_ne!(
1354 path_set(&partitioned_1.to_keep),
1355 path_set(&partitioned_2.to_keep)
1356 );
1357 assert_ne!(
1358 path_set(&partitioned_1.to_drop),
1359 path_set(&partitioned_2.to_drop)
1360 );
1361 });
1362 }
1363
1364 #[test]
1365 fn test_partition_respects_keep_patterns() {
1366 with_dir("dedupe/partition/keep", |root| {
1367 let group = make_group(root, FileHash::from_str("00").unwrap());
1368 let group = group.map(|p| PathAndMetadata::new(p).unwrap());
1369 let mut config = DedupeConfig {
1370 priority: vec![Priority::LeastRecentlyModified],
1371 keep_name_patterns: vec![Pattern::glob("*_1").unwrap()],
1372 ..DedupeConfig::default()
1373 };
1374
1375 let p = partition(group.clone(), &config, &StdLog::new()).unwrap();
1376 assert_eq!(p.to_keep.len(), 1);
1377 assert_eq!(&p.to_keep[0].path, &group.files[0].path);
1378
1379 config.keep_name_patterns = vec![];
1380 config.keep_path_patterns = vec![Pattern::glob("**/file_1").unwrap()];
1381 let p = partition(group.clone(), &config, &StdLog::new()).unwrap();
1382 assert_eq!(p.to_keep.len(), 1);
1383 assert_eq!(&p.to_keep[0].path, &group.files[0].path);
1384 })
1385 }
1386
1387 #[test]
1388 fn test_partition_respects_drop_patterns() {
1389 with_dir("dedupe/partition/drop", |root| {
1390 let group = make_group(root, FileHash::from_str("00").unwrap());
1391 let group = group.map(|p| PathAndMetadata::new(p).unwrap());
1392 let mut config = DedupeConfig {
1393 priority: vec![Priority::LeastRecentlyModified],
1394 name_patterns: vec![Pattern::glob("*_3").unwrap()],
1395 ..DedupeConfig::default()
1396 };
1397 let p = partition(group.clone(), &config, &StdLog::new()).unwrap();
1398 assert_eq!(p.to_drop.len(), 1);
1399 assert_eq!(&p.to_drop[0].path, &group.files[2].path);
1400
1401 config.name_patterns = vec![];
1402 config.path_patterns = vec![Pattern::glob("**/file_3").unwrap()];
1403 let p = partition(group.clone(), &config, &StdLog::new()).unwrap();
1404 assert_eq!(p.to_drop.len(), 1);
1405 assert_eq!(&p.to_drop[0].path, &group.files[2].path);
1406 })
1407 }
1408
1409 #[test]
1410 fn test_partition_respects_isolated_roots() {
1411 with_dir("dedupe/partition/isolated_roots", |root| {
1412 let root1 = root.join("root1");
1413 let root2 = root.join("root2");
1414 create_dir(&root1).unwrap();
1415 create_dir(&root2).unwrap();
1416
1417 let group1 = make_group(&root1, FileHash::from_str("00").unwrap());
1418 let group2 = make_group(&root2, FileHash::from_str("00").unwrap());
1419 let group = FileGroup {
1420 file_len: group1.file_len,
1421 file_hash: group1.file_hash,
1422 files: group1.files.into_iter().chain(group2.files).collect(),
1423 };
1424
1425 let config = DedupeConfig {
1426 isolated_roots: vec![Path::from(&root1), Path::from(&root2)],
1427 ..DedupeConfig::default()
1428 };
1429
1430 let group = group.map(|p| PathAndMetadata::new(p).unwrap());
1431 let p = partition(group, &config, &StdLog::new()).unwrap();
1432 assert_eq!(p.to_drop.len(), 3);
1433 assert!(p
1434 .to_drop
1435 .iter()
1436 .all(|f| f.path.to_path_buf().starts_with(&root2)));
1437 assert_eq!(p.to_keep.len(), 3);
1438 assert!(p
1439 .to_keep
1440 .iter()
1441 .all(|f| f.path.to_path_buf().starts_with(&root1)));
1442 })
1443 }
1444
1445 #[test]
1446 fn test_partition_respects_links() {
1447 with_dir("dedupe/partition/links", |root| {
1448 let root_a = root.join("root_a");
1449 let root_b = root.join("root_b");
1450 create_dir(&root_a).unwrap();
1451 create_dir(&root_b).unwrap();
1452
1453 let file_a1 = root_a.join("file_a1");
1454 let file_a2 = root_a.join("file_a2");
1455 write_file(&file_a1, "aaa");
1456 fs::hard_link(&file_a1, &file_a2).unwrap();
1457
1458 let file_b1 = root_b.join("file_b1");
1459 let file_b2 = root_b.join("file_b2");
1460 write_file(&file_b1, "aaa");
1461 fs::hard_link(&file_b1, &file_b2).unwrap();
1462
1463 let group = FileGroup {
1464 file_len: FileLen(3),
1465 file_hash: FileHash::from_str("00").unwrap(),
1466 files: vec![
1467 Path::from(&file_b1),
1468 Path::from(&file_a2),
1469 Path::from(&file_a1),
1470 Path::from(&file_b2),
1471 ],
1472 };
1473
1474 let config = DedupeConfig::default();
1475 let group = group.map(|p| PathAndMetadata::new(p).unwrap());
1476 let p = partition(group, &config, &StdLog::new()).unwrap();
1477
1478 assert_eq!(p.to_drop.len(), 2);
1480 assert!(p
1481 .to_drop
1482 .iter()
1483 .all(|f| f.path.to_path_buf().starts_with(&root_a)));
1484 assert_eq!(p.to_keep.len(), 2);
1485 assert!(p
1486 .to_keep
1487 .iter()
1488 .all(|f| f.path.to_path_buf().starts_with(&root_b)));
1489 })
1490 }
1491
1492 #[test]
1493 fn test_run_dedupe_script() {
1494 with_dir("dedupe/partition/run_dedupe_script", |root| {
1495 let mut log = StdLog::new();
1496 log.no_progress = true;
1497 log.log_stderr_to_stdout = true;
1498
1499 let group = make_group(root, FileHash::from_str("00").unwrap());
1500 let config = DedupeConfig {
1501 priority: vec![Priority::LeastRecentlyModified],
1502 ..DedupeConfig::default()
1503 };
1504 let script = dedupe(vec![group], DedupeOp::Remove, &config, &log);
1505 let dedupe_result = run_script(script, !config.no_lock, &log);
1506 assert_eq!(dedupe_result.processed_count, 2);
1507 assert!(!root.join("file_1").exists());
1508 assert!(!root.join("file_2").exists());
1509 assert!(root.join("file_3").exists());
1510 });
1511 }
1512
1513 #[test]
1514 fn test_log_dedupe_script() {
1515 with_dir("dedupe/partition/log_dedupe_script", |root| {
1516 let mut log = StdLog::new();
1517 log.no_progress = true;
1518 log.log_stderr_to_stdout = true;
1519
1520 let group_1 = make_group(&root.join("group_1"), FileHash::from_str("00").unwrap());
1521 let group_2 = make_group(&root.join("group_2"), FileHash::from_str("01").unwrap());
1522 let group_3 = make_group(&root.join("group_3"), FileHash::from_str("02").unwrap());
1523 let groups = vec![group_1, group_2, group_3];
1524
1525 let config = DedupeConfig {
1526 priority: vec![Priority::LeastRecentlyModified],
1527 ..DedupeConfig::default()
1528 };
1529
1530 let script = dedupe(groups, DedupeOp::Remove, &config, &log);
1531
1532 let mut out = Vec::new();
1533 let dedupe_result = log_script(script, &mut out).unwrap();
1534 assert_eq!(dedupe_result.processed_count, 6);
1535
1536 let out = String::from_utf8(out).unwrap();
1537 let out_lines = out.lines().collect_vec();
1538 assert_eq!(out_lines.len(), 6);
1539 assert!(out_lines[0].contains("group_1"));
1540 assert!(out_lines[1].contains("group_1"));
1541 assert!(out_lines[2].contains("group_2"));
1542 assert!(out_lines[3].contains("group_2"));
1543 assert!(out_lines[4].contains("group_3"));
1544 assert!(out_lines[5].contains("group_3"));
1545 });
1546 }
1547
1548 #[test]
1549 fn test_hard_link_merges_subgroups_of_hard_links() {
1550 with_dir("dedupe/merge_subgroups_of_hardlinks", |root| {
1551 let mut log = StdLog::new();
1552 log.no_progress = true;
1553 log.log_stderr_to_stdout = true;
1554
1555 let file_a1 = root.join("file_a1");
1556 let file_a2 = root.join("file_a2");
1557 let file_b1 = root.join("file_b1");
1558 let file_b2 = root.join("file_b2");
1559
1560 write_file(&file_a1, "foo");
1561 write_file(&file_b1, "foo");
1562
1563 let file_id = FileId::new(&Path::from(&file_a1)).unwrap();
1564
1565 fs::hard_link(&file_a1, &file_a2).unwrap();
1566 fs::hard_link(&file_b1, &file_b2).unwrap();
1567
1568 let group_config = GroupConfig {
1569 paths: vec![Path::from(root)],
1570 ..GroupConfig::default()
1571 };
1572
1573 let groups = group_files(&group_config, &log).unwrap();
1574 let dedupe_config = DedupeConfig::default();
1575 let script = dedupe(groups, DedupeOp::HardLink, &dedupe_config, &log);
1576 let dedupe_result = run_script(script, false, &log);
1577 assert_eq!(dedupe_result.processed_count, 2);
1578 assert!(file_a1.exists());
1579 assert!(file_a2.exists());
1580 assert!(file_b1.exists());
1581 assert!(file_b2.exists());
1582
1583 assert_eq!(read_file(&file_a1), "foo");
1584
1585 assert_eq!(FileId::new(&Path::from(&file_a2)).unwrap(), file_id);
1586 assert_eq!(FileId::new(&Path::from(&file_b1)).unwrap(), file_id);
1587 assert_eq!(FileId::new(&Path::from(&file_b2)).unwrap(), file_id);
1588 })
1589 }
1590
1591 #[test]
1592 #[cfg(unix)]
1593 fn test_remove_removes_subgroups_of_soft_links() {
1594 use std::os::unix::fs;
1595
1596 with_dir("dedupe/remove_subgroups_with_symlinks", |root| {
1597 let mut log = StdLog::new();
1598 log.no_progress = true;
1599 log.log_stderr_to_stdout = true;
1600
1601 let file_a1 = root.join("file_a1");
1602 let file_a2 = root.join("file_a2");
1603 let file_b1 = root.join("file_b1");
1604 let file_b2 = root.join("file_b2");
1605
1606 write_file(&file_a1, "foo");
1607 write_file(&file_b1, "foo");
1608
1609 fs::symlink(&file_a1, &file_a2).unwrap();
1610 fs::symlink(&file_b1, &file_b2).unwrap();
1611
1612 let group_config = GroupConfig {
1613 paths: vec![Path::from(root)],
1614 symbolic_links: true,
1615 ..GroupConfig::default()
1616 };
1617
1618 let groups = group_files(&group_config, &log).unwrap();
1619 let dedupe_config = DedupeConfig::default();
1620 let script = dedupe(groups, DedupeOp::Remove, &dedupe_config, &log);
1621 let dedupe_result = run_script(script, false, &log);
1622 assert_eq!(dedupe_result.processed_count, 2);
1623
1624 assert!(file_a1.exists());
1625 assert!(file_a2.exists());
1626 assert!(!file_b1.exists());
1627 assert!(!file_b2.exists());
1628
1629 assert_eq!(read_file(&file_a1), "foo");
1630 assert_eq!(read_file(&file_a2), "foo");
1631 })
1632 }
1633}