Skip to main content

oximedia_dedup/
merge_strategy.rs

1//! Merge strategies for resolving duplicate file groups.
2//!
3//! When duplicates are found, this module decides which files to keep and
4//! which to remove (or link). Strategies include:
5//! - **`KeepNewest`**: keep the file with the latest modification time
6//! - **`KeepOldest`**: keep the earliest file
7//! - **`KeepLargest`**: keep the largest file (e.g. highest-quality encode)
8//! - **`KeepSmallest`**: keep the smallest (e.g. most efficient encode)
9//! - **`KeepByPath`**: keep the file in a preferred directory hierarchy
10//! - **`Custom`**: user-supplied scoring function
11
12#![allow(dead_code)]
13#![allow(clippy::cast_precision_loss)]
14
15use std::path::{Path, PathBuf};
16
17// ---------------------------------------------------------------------------
18// FileCandidate
19// ---------------------------------------------------------------------------
20
21/// Metadata about a duplicate file candidate.
22#[derive(Debug, Clone)]
23pub struct FileCandidate {
24    /// Path to the file.
25    pub path: PathBuf,
26    /// File size in bytes.
27    pub size: u64,
28    /// Modification timestamp (Unix seconds).
29    pub modified: u64,
30    /// Creation timestamp (Unix seconds).
31    pub created: u64,
32    /// Optional quality score (0.0 - 1.0).
33    pub quality_score: Option<f64>,
34}
35
36impl FileCandidate {
37    /// Create a new candidate.
38    pub fn new(path: PathBuf, size: u64, modified: u64, created: u64) -> Self {
39        Self {
40            path,
41            size,
42            modified,
43            created,
44            quality_score: None,
45        }
46    }
47
48    /// Builder: set an optional quality score.
49    #[must_use]
50    pub fn with_quality(mut self, score: f64) -> Self {
51        self.quality_score = Some(score);
52        self
53    }
54}
55
56// ---------------------------------------------------------------------------
57// MergeStrategy
58// ---------------------------------------------------------------------------
59
60/// Strategy for choosing which duplicate to keep.
61#[derive(Debug, Clone, Copy, PartialEq, Eq)]
62pub enum MergeStrategy {
63    /// Keep the most recently modified file.
64    KeepNewest,
65    /// Keep the oldest modified file.
66    KeepOldest,
67    /// Keep the largest file.
68    KeepLargest,
69    /// Keep the smallest file.
70    KeepSmallest,
71    /// Keep the file with the highest quality score.
72    KeepHighestQuality,
73}
74
75impl MergeStrategy {
76    /// Return a human-readable label.
77    #[must_use]
78    pub fn label(self) -> &'static str {
79        match self {
80            Self::KeepNewest => "keep-newest",
81            Self::KeepOldest => "keep-oldest",
82            Self::KeepLargest => "keep-largest",
83            Self::KeepSmallest => "keep-smallest",
84            Self::KeepHighestQuality => "keep-highest-quality",
85        }
86    }
87}
88
89// ---------------------------------------------------------------------------
90// MergeAction
91// ---------------------------------------------------------------------------
92
93/// Action to perform on a file after merge resolution.
94#[derive(Debug, Clone, PartialEq, Eq)]
95pub enum MergeAction {
96    /// Keep this file as the canonical copy.
97    Keep,
98    /// Remove this file.
99    Remove,
100    /// Replace this file with a symlink to the kept file.
101    Symlink {
102        /// Target of the symlink (the kept file).
103        target: PathBuf,
104    },
105    /// Replace this file with a hardlink to the kept file.
106    Hardlink {
107        /// Target of the hardlink (the kept file).
108        target: PathBuf,
109    },
110}
111
112impl MergeAction {
113    /// Return `true` if this action keeps the file.
114    #[must_use]
115    pub fn is_keep(&self) -> bool {
116        matches!(self, Self::Keep)
117    }
118
119    /// Return `true` if this action removes the file.
120    #[must_use]
121    pub fn is_remove(&self) -> bool {
122        matches!(self, Self::Remove)
123    }
124}
125
126// ---------------------------------------------------------------------------
127// MergeResolution
128// ---------------------------------------------------------------------------
129
130/// A single file's resolution after merge.
131#[derive(Debug, Clone)]
132pub struct FileResolution {
133    /// The candidate file.
134    pub candidate: FileCandidate,
135    /// The action to take.
136    pub action: MergeAction,
137}
138
139/// The full resolution of a duplicate group.
140#[derive(Debug, Clone)]
141pub struct MergeResolution {
142    /// Per-file resolutions.
143    pub files: Vec<FileResolution>,
144    /// The strategy used.
145    pub strategy: MergeStrategy,
146    /// Estimated bytes recoverable by removing duplicates.
147    pub bytes_saved: u64,
148}
149
150// ---------------------------------------------------------------------------
151// Resolver
152// ---------------------------------------------------------------------------
153
154/// Resolve a group of duplicate candidates using a strategy.
155///
156/// Returns a [`MergeResolution`] specifying which file to keep and what
157/// to do with the rest.
158pub fn resolve(
159    candidates: &[FileCandidate],
160    strategy: MergeStrategy,
161    link_mode: LinkMode,
162) -> MergeResolution {
163    if candidates.is_empty() {
164        return MergeResolution {
165            files: Vec::new(),
166            strategy,
167            bytes_saved: 0,
168        };
169    }
170
171    let winner_idx = pick_winner(candidates, strategy);
172    let winner_path = candidates[winner_idx].path.clone();
173    let mut bytes_saved = 0u64;
174
175    let files = candidates
176        .iter()
177        .enumerate()
178        .map(|(i, c)| {
179            if i == winner_idx {
180                FileResolution {
181                    candidate: c.clone(),
182                    action: MergeAction::Keep,
183                }
184            } else {
185                bytes_saved += c.size;
186                let action = match link_mode {
187                    LinkMode::Delete => MergeAction::Remove,
188                    LinkMode::Symlink => MergeAction::Symlink {
189                        target: winner_path.clone(),
190                    },
191                    LinkMode::Hardlink => MergeAction::Hardlink {
192                        target: winner_path.clone(),
193                    },
194                };
195                FileResolution {
196                    candidate: c.clone(),
197                    action,
198                }
199            }
200        })
201        .collect();
202
203    MergeResolution {
204        files,
205        strategy,
206        bytes_saved,
207    }
208}
209
210/// How to handle non-winner files.
211#[derive(Debug, Clone, Copy, PartialEq, Eq)]
212pub enum LinkMode {
213    /// Delete non-winner files.
214    Delete,
215    /// Replace with symlinks.
216    Symlink,
217    /// Replace with hardlinks.
218    Hardlink,
219}
220
221/// Pick the winner index based on strategy.
222fn pick_winner(candidates: &[FileCandidate], strategy: MergeStrategy) -> usize {
223    match strategy {
224        MergeStrategy::KeepNewest => candidates
225            .iter()
226            .enumerate()
227            .max_by_key(|(_, c)| c.modified)
228            .map(|(i, _)| i)
229            .unwrap_or(0),
230        MergeStrategy::KeepOldest => candidates
231            .iter()
232            .enumerate()
233            .min_by_key(|(_, c)| c.modified)
234            .map(|(i, _)| i)
235            .unwrap_or(0),
236        MergeStrategy::KeepLargest => candidates
237            .iter()
238            .enumerate()
239            .max_by_key(|(_, c)| c.size)
240            .map(|(i, _)| i)
241            .unwrap_or(0),
242        MergeStrategy::KeepSmallest => candidates
243            .iter()
244            .enumerate()
245            .min_by_key(|(_, c)| c.size)
246            .map(|(i, _)| i)
247            .unwrap_or(0),
248        MergeStrategy::KeepHighestQuality => candidates
249            .iter()
250            .enumerate()
251            .max_by(|(_, a), (_, b)| {
252                let qa = a.quality_score.unwrap_or(0.0);
253                let qb = b.quality_score.unwrap_or(0.0);
254                qa.partial_cmp(&qb).unwrap_or(std::cmp::Ordering::Equal)
255            })
256            .map(|(i, _)| i)
257            .unwrap_or(0),
258    }
259}
260
261/// Check if a path is under a preferred directory prefix.
262#[must_use]
263pub fn is_preferred_path(path: &Path, preferred_prefix: &Path) -> bool {
264    path.starts_with(preferred_prefix)
265}
266
267// ---------------------------------------------------------------------------
268// Tests
269// ---------------------------------------------------------------------------
270
271#[cfg(test)]
272mod tests {
273    use super::*;
274
275    fn candidates() -> Vec<FileCandidate> {
276        vec![
277            FileCandidate::new(PathBuf::from("/a.mp4"), 1000, 100, 90),
278            FileCandidate::new(PathBuf::from("/b.mp4"), 2000, 200, 80),
279            FileCandidate::new(PathBuf::from("/c.mp4"), 500, 50, 100),
280        ]
281    }
282
283    #[test]
284    fn test_keep_newest() {
285        let res = resolve(&candidates(), MergeStrategy::KeepNewest, LinkMode::Delete);
286        assert_eq!(res.files.len(), 3);
287        assert!(res.files[1].action.is_keep()); // /b.mp4 has modified=200
288    }
289
290    #[test]
291    fn test_keep_oldest() {
292        let res = resolve(&candidates(), MergeStrategy::KeepOldest, LinkMode::Delete);
293        assert!(res.files[2].action.is_keep()); // /c.mp4 has modified=50
294    }
295
296    #[test]
297    fn test_keep_largest() {
298        let res = resolve(&candidates(), MergeStrategy::KeepLargest, LinkMode::Delete);
299        assert!(res.files[1].action.is_keep()); // /b.mp4 has size=2000
300    }
301
302    #[test]
303    fn test_keep_smallest() {
304        let res = resolve(&candidates(), MergeStrategy::KeepSmallest, LinkMode::Delete);
305        assert!(res.files[2].action.is_keep()); // /c.mp4 has size=500
306    }
307
308    #[test]
309    fn test_keep_highest_quality() {
310        let cs = vec![
311            FileCandidate::new(PathBuf::from("/a.mp4"), 100, 10, 10).with_quality(0.6),
312            FileCandidate::new(PathBuf::from("/b.mp4"), 100, 10, 10).with_quality(0.9),
313            FileCandidate::new(PathBuf::from("/c.mp4"), 100, 10, 10).with_quality(0.3),
314        ];
315        let res = resolve(&cs, MergeStrategy::KeepHighestQuality, LinkMode::Delete);
316        assert!(res.files[1].action.is_keep()); // 0.9 is highest
317    }
318
319    #[test]
320    fn test_bytes_saved() {
321        let res = resolve(&candidates(), MergeStrategy::KeepLargest, LinkMode::Delete);
322        // keep /b.mp4 (2000), remove /a.mp4 (1000) and /c.mp4 (500) => saved 1500
323        assert_eq!(res.bytes_saved, 1500);
324    }
325
326    #[test]
327    fn test_symlink_mode() {
328        let res = resolve(&candidates(), MergeStrategy::KeepNewest, LinkMode::Symlink);
329        for f in &res.files {
330            if !f.action.is_keep() {
331                match &f.action {
332                    MergeAction::Symlink { target } => {
333                        assert_eq!(target, &PathBuf::from("/b.mp4"));
334                    }
335                    _ => panic!("expected symlink action"),
336                }
337            }
338        }
339    }
340
341    #[test]
342    fn test_hardlink_mode() {
343        let res = resolve(&candidates(), MergeStrategy::KeepNewest, LinkMode::Hardlink);
344        for f in &res.files {
345            if !f.action.is_keep() {
346                match &f.action {
347                    MergeAction::Hardlink { target } => {
348                        assert_eq!(target, &PathBuf::from("/b.mp4"));
349                    }
350                    _ => panic!("expected hardlink action"),
351                }
352            }
353        }
354    }
355
356    #[test]
357    fn test_empty_candidates() {
358        let res = resolve(&[], MergeStrategy::KeepNewest, LinkMode::Delete);
359        assert!(res.files.is_empty());
360        assert_eq!(res.bytes_saved, 0);
361    }
362
363    #[test]
364    fn test_single_candidate() {
365        let cs = vec![FileCandidate::new(PathBuf::from("/only.mp4"), 999, 10, 10)];
366        let res = resolve(&cs, MergeStrategy::KeepNewest, LinkMode::Delete);
367        assert_eq!(res.files.len(), 1);
368        assert!(res.files[0].action.is_keep());
369        assert_eq!(res.bytes_saved, 0);
370    }
371
372    #[test]
373    fn test_is_preferred_path() {
374        assert!(is_preferred_path(
375            Path::new("/archive/media/a.mp4"),
376            Path::new("/archive")
377        ));
378        assert!(!is_preferred_path(
379            Path::new("/other/a.mp4"),
380            Path::new("/archive")
381        ));
382    }
383
384    #[test]
385    fn test_strategy_label() {
386        assert_eq!(MergeStrategy::KeepNewest.label(), "keep-newest");
387        assert_eq!(MergeStrategy::KeepSmallest.label(), "keep-smallest");
388    }
389}
390
391// ---------------------------------------------------------------------------
392// Real FS Executor — MergeExecutor
393// ---------------------------------------------------------------------------
394
395/// The concrete action that was applied (or would be applied in dry-run) to a
396/// duplicate file during [`MergeExecutor::apply`] or [`MergeExecutor::dry_run`].
397#[derive(Debug, Clone, PartialEq, Eq)]
398pub enum AppliedAction {
399    /// The file was replaced with a symbolic link pointing to `target`.
400    Symlinked {
401        /// Canonical (primary) file the symlink points to.
402        target: PathBuf,
403    },
404    /// The file was replaced with a hard link pointing to the same inode as `target`.
405    Hardlinked {
406        /// Canonical (primary) file that shares the inode.
407        target: PathBuf,
408    },
409    /// The file was deleted.
410    Deleted,
411    /// The file was left untouched (it is the primary / canonical copy).
412    Kept,
413    /// The action was not performed; `reason` explains why.
414    Skipped(String),
415}
416
417/// A per-file action record returned by [`MergeExecutor`].
418#[derive(Debug, Clone)]
419pub struct MergeReport {
420    /// The file that was chosen as the canonical copy to retain.
421    pub primary_path: PathBuf,
422    /// Actions taken (or planned, in dry-run mode) for each duplicate file.
423    ///
424    /// The first element of each tuple is the duplicate path; the second is
425    /// what was (or would be) done to it.
426    pub actions: Vec<(PathBuf, AppliedAction)>,
427}
428
429impl MergeReport {
430    /// Returns the number of files that were actually modified (not Kept/Skipped).
431    #[must_use]
432    pub fn modified_count(&self) -> usize {
433        self.actions
434            .iter()
435            .filter(|(_, a)| !matches!(a, AppliedAction::Kept | AppliedAction::Skipped(_)))
436            .count()
437    }
438
439    /// Returns `true` if any action was skipped.
440    #[must_use]
441    pub fn has_skipped(&self) -> bool {
442        self.actions
443            .iter()
444            .any(|(_, a)| matches!(a, AppliedAction::Skipped(_)))
445    }
446}
447
448/// Executes real filesystem mutations (symlink / hardlink / delete) for a set
449/// of duplicate files.
450///
451/// ```rust
452/// use oximedia_dedup::merge_strategy::{MergeExecutor, LinkMode};
453/// use std::path::PathBuf;
454///
455/// // Build an executor that replaces duplicates with hard links.
456/// let executor = MergeExecutor::new(LinkMode::Hardlink);
457///
458/// // Dry-run: returns what *would* happen without touching the filesystem.
459/// let primary = PathBuf::from("/media/archive/original.mp4");
460/// let dups = vec![PathBuf::from("/media/inbox/copy.mp4")];
461/// let report = executor.dry_run(&primary, &dups);
462/// ```
463#[derive(Debug, Clone, Copy)]
464pub struct MergeExecutor {
465    link_mode: LinkMode,
466}
467
468impl MergeExecutor {
469    /// Create a new executor with the given [`LinkMode`].
470    #[must_use]
471    pub fn new(link_mode: LinkMode) -> Self {
472        Self { link_mode }
473    }
474
475    /// Apply the configured action to every path in `duplicates`, treating
476    /// `primary_path` as the canonical file to keep.
477    ///
478    /// The primary path itself is silently skipped if it appears in
479    /// `duplicates` (path equality check using [`Path::canonicalize`] if both
480    /// exist, falling back to byte comparison otherwise).
481    ///
482    /// # Errors
483    ///
484    /// Returns [`crate::DedupError::Io`] for unrecoverable I/O failures.
485    /// Cross-device hardlink failures are NOT errors — they produce
486    /// [`AppliedAction::Skipped`] instead.
487    pub fn apply(
488        &self,
489        primary_path: &Path,
490        duplicates: &[PathBuf],
491    ) -> Result<MergeReport, crate::DedupError> {
492        self.execute(primary_path, duplicates, false)
493    }
494
495    /// Preview what [`apply`](Self::apply) would do without touching the
496    /// filesystem.
497    ///
498    /// The returned [`MergeReport`] describes the intended actions.  No files
499    /// are created, removed, or modified.
500    ///
501    /// # Errors
502    ///
503    /// Currently infallible (always returns `Ok`), but uses the same error
504    /// type as [`apply`](Self::apply) for API symmetry.
505    pub fn dry_run(
506        &self,
507        primary_path: &Path,
508        duplicates: &[PathBuf],
509    ) -> Result<MergeReport, crate::DedupError> {
510        self.execute(primary_path, duplicates, true)
511    }
512
513    /// Execute a pre-computed [`MergeResolution`] produced by [`MergeResolver`].
514    ///
515    /// This bridges the selection phase (which file to keep) with the
516    /// execution phase (actually modifying the filesystem).
517    ///
518    /// # Errors
519    ///
520    /// Returns [`crate::DedupError::Io`] for unrecoverable I/O failures.
521    pub fn apply_resolution(
522        &self,
523        resolution: &MergeResolution,
524    ) -> Result<MergeReport, crate::DedupError> {
525        // The primary is the one file whose action is Keep.
526        let primary = resolution
527            .files
528            .iter()
529            .find(|f| f.action.is_keep())
530            .map(|f| f.candidate.path.clone());
531
532        let Some(primary_path) = primary else {
533            // Degenerate resolution (e.g., empty group) — nothing to do.
534            return Ok(MergeReport {
535                primary_path: PathBuf::new(),
536                actions: Vec::new(),
537            });
538        };
539
540        let duplicates: Vec<PathBuf> = resolution
541            .files
542            .iter()
543            .filter(|f| !f.action.is_keep())
544            .map(|f| f.candidate.path.clone())
545            .collect();
546
547        self.execute(&primary_path, &duplicates, false)
548    }
549
550    // --- internal ---
551
552    fn execute(
553        &self,
554        primary_path: &Path,
555        duplicates: &[PathBuf],
556        dry: bool,
557    ) -> Result<MergeReport, crate::DedupError> {
558        let mut report = MergeReport {
559            primary_path: primary_path.to_path_buf(),
560            actions: Vec::new(),
561        };
562
563        for dup in duplicates {
564            // Skip if `dup` and `primary_path` refer to the same file.
565            if Self::same_path(primary_path, dup) {
566                report.actions.push((dup.clone(), AppliedAction::Kept));
567                continue;
568            }
569
570            let action = self.apply_to_one(primary_path, dup, dry)?;
571            report.actions.push((dup.clone(), action));
572        }
573
574        Ok(report)
575    }
576
577    fn apply_to_one(
578        &self,
579        primary: &Path,
580        dup: &Path,
581        dry: bool,
582    ) -> Result<AppliedAction, crate::DedupError> {
583        match self.link_mode {
584            LinkMode::Symlink => {
585                if dry {
586                    return Ok(AppliedAction::Symlinked {
587                        target: primary.to_path_buf(),
588                    });
589                }
590                // Remove the duplicate first.
591                if dup.exists() || dup.symlink_metadata().is_ok() {
592                    std::fs::remove_file(dup)?;
593                }
594                create_symlink(primary, dup)?;
595                Ok(AppliedAction::Symlinked {
596                    target: primary.to_path_buf(),
597                })
598            }
599
600            LinkMode::Hardlink => {
601                if dry {
602                    return Ok(AppliedAction::Hardlinked {
603                        target: primary.to_path_buf(),
604                    });
605                }
606                if !primary.exists() {
607                    return Ok(AppliedAction::Skipped(format!(
608                        "primary does not exist: {}",
609                        primary.display()
610                    )));
611                }
612                // Remove the duplicate first.
613                if dup.exists() || dup.symlink_metadata().is_ok() {
614                    std::fs::remove_file(dup)?;
615                }
616                // hard_link fails with EXDEV on cross-device links — treat as
617                // a graceful skip rather than a hard error.
618                match std::fs::hard_link(primary, dup) {
619                    Ok(()) => Ok(AppliedAction::Hardlinked {
620                        target: primary.to_path_buf(),
621                    }),
622                    Err(e) => Ok(AppliedAction::Skipped(format!(
623                        "hardlink failed (cross-device or permission?): {e}"
624                    ))),
625                }
626            }
627
628            LinkMode::Delete => {
629                if dry {
630                    return Ok(AppliedAction::Deleted);
631                }
632                if !primary.exists() {
633                    return Ok(AppliedAction::Skipped(
634                        "primary does not exist — refusing to delete duplicate".into(),
635                    ));
636                }
637                std::fs::remove_file(dup)?;
638                Ok(AppliedAction::Deleted)
639            }
640        }
641    }
642
643    /// Compare two paths for identity.  Tries canonicalization first (handles
644    /// symlinks and `..` components); falls back to byte comparison.
645    fn same_path(a: &Path, b: &Path) -> bool {
646        if a == b {
647            return true;
648        }
649        let ca = std::fs::canonicalize(a);
650        let cb = std::fs::canonicalize(b);
651        match (ca, cb) {
652            (Ok(ca), Ok(cb)) => ca == cb,
653            _ => false,
654        }
655    }
656}
657
658/// Platform-specific symlink creation.
659fn create_symlink(target: &Path, link: &Path) -> std::io::Result<()> {
660    #[cfg(unix)]
661    {
662        std::os::unix::fs::symlink(target, link)
663    }
664    #[cfg(windows)]
665    {
666        std::os::windows::fs::symlink_file(target, link)
667    }
668    #[cfg(not(any(unix, windows)))]
669    {
670        Err(std::io::Error::new(
671            std::io::ErrorKind::Unsupported,
672            format!(
673                "symlinks not supported on this platform (target={}, link={})",
674                target.display(),
675                link.display()
676            ),
677        ))
678    }
679}
680
681// ---------------------------------------------------------------------------
682// MergeExecutor tests
683// ---------------------------------------------------------------------------
684
685#[cfg(test)]
686mod executor_tests {
687    use super::*;
688    use std::env::temp_dir;
689    use std::fs;
690
691    /// Create `n` temporary files under a unique subdirectory and return their
692    /// paths.  Each file contains distinct content to avoid accidental sharing.
693    fn make_temp_files(n: usize) -> Vec<PathBuf> {
694        let unique = std::time::SystemTime::now()
695            .duration_since(std::time::UNIX_EPOCH)
696            .unwrap_or_default()
697            .subsec_nanos();
698        let base = temp_dir().join(format!("oxidedup_exec_{unique}_{n}"));
699        fs::create_dir_all(&base).expect("create test dir");
700        (0..n)
701            .map(|i| {
702                let p = base.join(format!("file_{i}.bin"));
703                // Content must be non-trivial so files are not accidentally
704                // sharing blocks at the filesystem level.
705                let content = format!("oximedia-dedup-test-content-{i}-{unique}").repeat(100);
706                fs::write(&p, content.as_bytes()).expect("write test file");
707                p
708            })
709            .collect()
710    }
711
712    #[test]
713    fn test_symlink_strategy_creates_symlinks() {
714        let files = make_temp_files(3);
715        let executor = MergeExecutor::new(LinkMode::Symlink);
716        let report = executor
717            .apply(&files[0], &files[1..])
718            .expect("apply symlink");
719
720        assert_eq!(report.primary_path, files[0]);
721        assert_eq!(report.actions.len(), 2);
722
723        for (path, action) in &report.actions {
724            assert!(
725                path.symlink_metadata()
726                    .map(|m| m.file_type().is_symlink())
727                    .unwrap_or(false),
728                "expected symlink at {path:?}"
729            );
730            match action {
731                AppliedAction::Symlinked { target } => {
732                    assert_eq!(target, &files[0]);
733                }
734                other => panic!("unexpected action: {other:?}"),
735            }
736        }
737    }
738
739    #[test]
740    fn test_hardlink_strategy_creates_hardlinks() {
741        let files = make_temp_files(2);
742        let executor = MergeExecutor::new(LinkMode::Hardlink);
743        let report = executor
744            .apply(&files[0], &files[1..])
745            .expect("apply hardlink");
746
747        assert_eq!(report.actions.len(), 1);
748
749        let (path, action) = &report.actions[0];
750        match action {
751            AppliedAction::Hardlinked { target } => {
752                assert_eq!(target, &files[0]);
753                // On a single filesystem, nlink must be 2 after hard_link.
754                #[cfg(unix)]
755                {
756                    use std::os::unix::fs::MetadataExt;
757                    let meta = fs::metadata(path).expect("metadata after hardlink");
758                    assert_eq!(meta.nlink(), 2, "hardlink count should be 2");
759                }
760            }
761            // Cross-device: graceful skip is acceptable.
762            AppliedAction::Skipped(_) => {}
763            other => panic!("unexpected action: {other:?}"),
764        }
765    }
766
767    #[test]
768    fn test_delete_strategy_removes_duplicates() {
769        let files = make_temp_files(3);
770        let executor = MergeExecutor::new(LinkMode::Delete);
771        executor
772            .apply(&files[0], &files[1..])
773            .expect("apply delete");
774
775        assert!(files[0].exists(), "primary should still exist");
776        assert!(!files[1].exists(), "dup 1 should be deleted");
777        assert!(!files[2].exists(), "dup 2 should be deleted");
778    }
779
780    #[test]
781    fn test_dry_run_changes_nothing() {
782        let files = make_temp_files(2);
783        let original_content = fs::read(&files[1]).expect("read original");
784
785        let executor = MergeExecutor::new(LinkMode::Delete);
786        let report = executor.dry_run(&files[0], &files[1..]).expect("dry_run");
787
788        // File must still exist and be unchanged.
789        assert!(files[1].exists(), "dry run must not delete");
790        assert_eq!(
791            fs::read(&files[1]).expect("read after dry_run"),
792            original_content,
793            "content must not change after dry_run"
794        );
795
796        // Report must still describe the intended action.
797        assert_eq!(report.actions.len(), 1);
798        assert!(matches!(report.actions[0].1, AppliedAction::Deleted));
799    }
800
801    #[test]
802    fn test_cross_fs_hardlink_skipped_gracefully() {
803        // In most CI environments tempdir is on the same filesystem, so the
804        // hard link will succeed and return Hardlinked.  Either outcome must
805        // not panic or return Err — only Ok(Hardlinked|Skipped) is valid.
806        let files = make_temp_files(2);
807        let executor = MergeExecutor::new(LinkMode::Hardlink);
808        let result = executor.apply(&files[0], &files[1..]);
809        assert!(
810            result.is_ok(),
811            "hardlink executor must not fail: {result:?}"
812        );
813        let report = result.expect("hardlink result");
814        for (_, action) in &report.actions {
815            assert!(
816                matches!(
817                    action,
818                    AppliedAction::Hardlinked { .. } | AppliedAction::Skipped(_)
819                ),
820                "unexpected action: {action:?}"
821            );
822        }
823    }
824
825    #[test]
826    fn test_primary_skipped_when_in_duplicates_list() {
827        let files = make_temp_files(1);
828        // Pass primary as its own duplicate — must produce Kept, not delete itself.
829        let executor = MergeExecutor::new(LinkMode::Delete);
830        let report = executor
831            .apply(&files[0], &[files[0].clone()])
832            .expect("apply self-dup");
833        assert!(files[0].exists(), "primary must not be deleted");
834        assert_eq!(report.actions.len(), 1);
835        assert!(matches!(report.actions[0].1, AppliedAction::Kept));
836    }
837
838    #[test]
839    fn test_dry_run_symlink_returns_intended_action() {
840        let files = make_temp_files(2);
841        let executor = MergeExecutor::new(LinkMode::Symlink);
842        let report = executor
843            .dry_run(&files[0], &files[1..])
844            .expect("dry_run symlink");
845
846        // File must NOT have been replaced with a symlink.
847        assert!(
848            !files[1]
849                .symlink_metadata()
850                .map(|m| m.file_type().is_symlink())
851                .unwrap_or(false),
852            "dry_run must not create a symlink"
853        );
854        assert!(matches!(
855            report.actions[0].1,
856            AppliedAction::Symlinked { .. }
857        ));
858    }
859
860    #[test]
861    fn test_merge_report_modified_count() {
862        let files = make_temp_files(3);
863        let executor = MergeExecutor::new(LinkMode::Delete);
864        let report = executor.apply(&files[0], &files[1..]).expect("apply");
865        assert_eq!(report.modified_count(), 2);
866    }
867
868    #[test]
869    fn test_apply_resolution_integrates_with_resolver() {
870        let files = make_temp_files(3);
871        // Build candidates: file 0 is smallest, file 1 is largest, file 2 is middle.
872        let candidates = vec![
873            FileCandidate::new(files[0].clone(), 100, 100, 100),
874            FileCandidate::new(files[1].clone(), 9000, 200, 200),
875            FileCandidate::new(files[2].clone(), 500, 50, 50),
876        ];
877        let resolver = MergeResolver::new(MergeStrategy::KeepLargest, LinkMode::Delete);
878        let resolution = resolve(&candidates, resolver.strategy(), resolver.link_mode());
879
880        let executor = MergeExecutor::new(LinkMode::Delete);
881        let report = executor
882            .apply_resolution(&resolution)
883            .expect("apply_resolution");
884
885        // Primary should be files[1] (largest); files[0] and [2] deleted.
886        assert_eq!(report.primary_path, files[1]);
887        assert!(!files[0].exists(), "dup 0 should be deleted");
888        assert!(files[1].exists(), "primary should survive");
889        assert!(!files[2].exists(), "dup 2 should be deleted");
890        assert_eq!(report.modified_count(), 2);
891    }
892}
893
894// ---------------------------------------------------------------------------
895// DuplicateGroup
896// ---------------------------------------------------------------------------
897
898/// A group of files that have been identified as duplicates, with one
899/// designated as the canonical representative to keep.
900#[derive(Debug, Clone)]
901pub struct DuplicateGroup {
902    /// All files in this duplicate group (including the representative).
903    pub files: Vec<PathBuf>,
904    /// The representative (canonical) file to retain.
905    pub representative: PathBuf,
906}
907
908impl DuplicateGroup {
909    /// Create a new `DuplicateGroup`.
910    ///
911    /// `representative` does not have to be a member of `files`; the caller
912    /// is responsible for consistency.
913    #[must_use]
914    pub fn new(files: Vec<PathBuf>, representative: PathBuf) -> Self {
915        Self {
916            files,
917            representative,
918        }
919    }
920
921    /// Number of files in this group.
922    #[must_use]
923    pub fn len(&self) -> usize {
924        self.files.len()
925    }
926
927    /// Returns `true` if the group contains no files.
928    #[must_use]
929    pub fn is_empty(&self) -> bool {
930        self.files.is_empty()
931    }
932
933    /// Returns references to files that are NOT the representative.
934    ///
935    /// These are the duplicates that can be removed, linked, or otherwise
936    /// disposed of according to a [`MergeStrategy`].
937    #[must_use]
938    pub fn duplicates(&self) -> Vec<&PathBuf> {
939        self.files
940            .iter()
941            .filter(|p| p.as_path() != self.representative.as_path())
942            .collect()
943    }
944
945    /// Returns `true` if `path` is the representative of this group.
946    #[must_use]
947    pub fn is_representative(&self, path: &Path) -> bool {
948        self.representative == path
949    }
950}
951
952// ---------------------------------------------------------------------------
953// MergeResolver
954// ---------------------------------------------------------------------------
955
956/// Resolves a [`DuplicateGroup`] to a single canonical file using a
957/// configured [`MergeStrategy`] and [`LinkMode`].
958///
959/// Two resolution paths are provided:
960/// 1. [`MergeResolver::resolve`] — reads filesystem metadata for scoring.
961/// 2. [`MergeResolver::resolve_from_candidates`] — accepts pre-built
962///    [`FileCandidate`] data (no filesystem access required).
963#[derive(Debug, Clone)]
964pub struct MergeResolver {
965    strategy: MergeStrategy,
966    link_mode: LinkMode,
967}
968
969impl MergeResolver {
970    /// Create a new resolver.
971    #[must_use]
972    pub fn new(strategy: MergeStrategy, link_mode: LinkMode) -> Self {
973        Self {
974            strategy,
975            link_mode,
976        }
977    }
978
979    /// Create a resolver that deletes duplicates and keeps with `KeepLargest`.
980    #[must_use]
981    pub fn default_delete() -> Self {
982        Self::new(MergeStrategy::KeepLargest, LinkMode::Delete)
983    }
984
985    /// Returns the configured strategy.
986    #[must_use]
987    pub fn strategy(&self) -> MergeStrategy {
988        self.strategy
989    }
990
991    /// Returns the configured link mode.
992    #[must_use]
993    pub fn link_mode(&self) -> LinkMode {
994        self.link_mode
995    }
996
997    /// Resolve which file to keep by reading filesystem metadata.
998    ///
999    /// For each file in `group.files`, attempts to read its metadata (size,
1000    /// mtime). Files whose metadata cannot be read are assigned zero values and
1001    /// will lose to files with valid metadata under most strategies.
1002    ///
1003    /// Returns the path of the file to keep. Falls back to the first file when
1004    /// `group.files` is empty.
1005    #[must_use]
1006    pub fn resolve(&self, group: &DuplicateGroup) -> PathBuf {
1007        if group.files.is_empty() {
1008            return group.representative.clone();
1009        }
1010
1011        let candidates: Vec<FileCandidate> = group
1012            .files
1013            .iter()
1014            .map(|path| {
1015                let meta = std::fs::metadata(path);
1016                let (size, modified, created) = meta
1017                    .as_ref()
1018                    .map(|m| {
1019                        let size = m.len();
1020                        let modified = m
1021                            .modified()
1022                            .ok()
1023                            .and_then(|t| t.duration_since(std::time::UNIX_EPOCH).ok())
1024                            .map(|d| d.as_secs())
1025                            .unwrap_or(0);
1026                        let created = m
1027                            .created()
1028                            .ok()
1029                            .and_then(|t| t.duration_since(std::time::UNIX_EPOCH).ok())
1030                            .map(|d| d.as_secs())
1031                            .unwrap_or(0);
1032                        (size, modified, created)
1033                    })
1034                    .unwrap_or((0, 0, 0));
1035                FileCandidate::new(path.clone(), size, modified, created)
1036            })
1037            .collect();
1038
1039        self.resolve_from_candidates(&candidates)
1040            .unwrap_or_else(|| group.files[0].clone())
1041    }
1042
1043    /// Resolve from pre-built candidate metadata (no filesystem access).
1044    ///
1045    /// Returns the path of the winning candidate, or `None` if `candidates`
1046    /// is empty.
1047    #[must_use]
1048    pub fn resolve_from_candidates(&self, candidates: &[FileCandidate]) -> Option<PathBuf> {
1049        if candidates.is_empty() {
1050            return None;
1051        }
1052        let resolution = resolve(candidates, self.strategy, self.link_mode);
1053        resolution
1054            .files
1055            .iter()
1056            .find(|f| f.action.is_keep())
1057            .map(|f| f.candidate.path.clone())
1058    }
1059}
1060
1061// ---------------------------------------------------------------------------
1062// DuplicateGroup + MergeResolver tests
1063// ---------------------------------------------------------------------------
1064
1065#[cfg(test)]
1066mod group_resolver_tests {
1067    use super::*;
1068
1069    fn paths(names: &[&str]) -> Vec<PathBuf> {
1070        names.iter().map(|n| PathBuf::from(n)).collect()
1071    }
1072
1073    // ── DuplicateGroup ─────────────────────────────────────────────────────
1074
1075    #[test]
1076    fn test_group_new_and_len() {
1077        let g = DuplicateGroup::new(
1078            paths(&["/a.mp4", "/b.mp4", "/c.mp4"]),
1079            PathBuf::from("/a.mp4"),
1080        );
1081        assert_eq!(g.len(), 3);
1082        assert!(!g.is_empty());
1083    }
1084
1085    #[test]
1086    fn test_group_empty() {
1087        let g = DuplicateGroup::new(vec![], PathBuf::from("/none.mp4"));
1088        assert!(g.is_empty());
1089        assert_eq!(g.len(), 0);
1090    }
1091
1092    #[test]
1093    fn test_group_duplicates_excludes_representative() {
1094        let g = DuplicateGroup::new(
1095            paths(&["/a.mp4", "/b.mp4", "/c.mp4"]),
1096            PathBuf::from("/a.mp4"),
1097        );
1098        let dups = g.duplicates();
1099        assert_eq!(dups.len(), 2);
1100        assert!(!dups.contains(&&PathBuf::from("/a.mp4")));
1101        assert!(dups.contains(&&PathBuf::from("/b.mp4")));
1102        assert!(dups.contains(&&PathBuf::from("/c.mp4")));
1103    }
1104
1105    #[test]
1106    fn test_group_is_representative() {
1107        let g = DuplicateGroup::new(paths(&["/rep.mp4", "/dup.mp4"]), PathBuf::from("/rep.mp4"));
1108        assert!(g.is_representative(Path::new("/rep.mp4")));
1109        assert!(!g.is_representative(Path::new("/dup.mp4")));
1110    }
1111
1112    #[test]
1113    fn test_group_duplicates_all_are_duplicates_when_representative_absent() {
1114        // Representative not in files list — all files are returned as duplicates.
1115        let g = DuplicateGroup::new(paths(&["/b.mp4", "/c.mp4"]), PathBuf::from("/a.mp4"));
1116        assert_eq!(g.duplicates().len(), 2);
1117    }
1118
1119    // ── MergeResolver ──────────────────────────────────────────────────────
1120
1121    fn make_candidates() -> Vec<FileCandidate> {
1122        vec![
1123            FileCandidate::new(PathBuf::from("/small.mp4"), 500, 100, 90),
1124            FileCandidate::new(PathBuf::from("/large.mp4"), 2000, 200, 80),
1125            FileCandidate::new(PathBuf::from("/oldest.mp4"), 1000, 50, 100),
1126        ]
1127    }
1128
1129    #[test]
1130    fn test_resolver_keep_largest_from_candidates() {
1131        let r = MergeResolver::new(MergeStrategy::KeepLargest, LinkMode::Delete);
1132        let result = r.resolve_from_candidates(&make_candidates());
1133        assert_eq!(result, Some(PathBuf::from("/large.mp4")));
1134    }
1135
1136    #[test]
1137    fn test_resolver_keep_newest_from_candidates() {
1138        let r = MergeResolver::new(MergeStrategy::KeepNewest, LinkMode::Delete);
1139        let result = r.resolve_from_candidates(&make_candidates());
1140        assert_eq!(result, Some(PathBuf::from("/large.mp4"))); // modified=200
1141    }
1142
1143    #[test]
1144    fn test_resolver_keep_oldest_from_candidates() {
1145        let r = MergeResolver::new(MergeStrategy::KeepOldest, LinkMode::Delete);
1146        let result = r.resolve_from_candidates(&make_candidates());
1147        assert_eq!(result, Some(PathBuf::from("/oldest.mp4"))); // modified=50
1148    }
1149
1150    #[test]
1151    fn test_resolver_keep_smallest_from_candidates() {
1152        let r = MergeResolver::new(MergeStrategy::KeepSmallest, LinkMode::Delete);
1153        let result = r.resolve_from_candidates(&make_candidates());
1154        assert_eq!(result, Some(PathBuf::from("/small.mp4"))); // size=500
1155    }
1156
1157    #[test]
1158    fn test_resolver_keep_highest_quality_from_candidates() {
1159        let cs = vec![
1160            FileCandidate::new(PathBuf::from("/low.mp4"), 100, 10, 10).with_quality(0.3),
1161            FileCandidate::new(PathBuf::from("/high.mp4"), 100, 10, 10).with_quality(0.95),
1162        ];
1163        let r = MergeResolver::new(MergeStrategy::KeepHighestQuality, LinkMode::Delete);
1164        let result = r.resolve_from_candidates(&cs);
1165        assert_eq!(result, Some(PathBuf::from("/high.mp4")));
1166    }
1167
1168    #[test]
1169    fn test_resolver_empty_candidates_returns_none() {
1170        let r = MergeResolver::new(MergeStrategy::KeepLargest, LinkMode::Delete);
1171        assert!(r.resolve_from_candidates(&[]).is_none());
1172    }
1173
1174    #[test]
1175    fn test_resolver_strategy_and_link_mode_accessors() {
1176        let r = MergeResolver::new(MergeStrategy::KeepSmallest, LinkMode::Symlink);
1177        assert_eq!(r.strategy(), MergeStrategy::KeepSmallest);
1178        assert_eq!(r.link_mode(), LinkMode::Symlink);
1179    }
1180
1181    #[test]
1182    fn test_resolver_default_delete() {
1183        let r = MergeResolver::default_delete();
1184        assert_eq!(r.strategy(), MergeStrategy::KeepLargest);
1185        assert_eq!(r.link_mode(), LinkMode::Delete);
1186    }
1187
1188    #[test]
1189    fn test_resolver_resolve_filesystem_fallback() {
1190        // Files do not exist → metadata reads fail → fallback to first file.
1191        let group = DuplicateGroup::new(
1192            paths(&["/nonexistent_a.mp4", "/nonexistent_b.mp4"]),
1193            PathBuf::from("/nonexistent_a.mp4"),
1194        );
1195        let r = MergeResolver::new(MergeStrategy::KeepLargest, LinkMode::Delete);
1196        // With all sizes=0 (metadata unavailable), pick_winner returns 0 → first file
1197        let result = r.resolve(&group);
1198        assert!(!result.as_os_str().is_empty());
1199    }
1200
1201    #[test]
1202    fn test_resolver_resolve_empty_group() {
1203        let group = DuplicateGroup::new(vec![], PathBuf::from("/rep.mp4"));
1204        let r = MergeResolver::new(MergeStrategy::KeepLargest, LinkMode::Delete);
1205        // Falls back to representative
1206        let result = r.resolve(&group);
1207        assert_eq!(result, PathBuf::from("/rep.mp4"));
1208    }
1209}