Skip to main content

modde_core/
scanner.rs

1use std::collections::{HashMap, HashSet};
2
3use crate::manifest::wabbajack::{
4    ArchiveEntry, ArchiveState, InstallDirective, WabbajackManifest, compute_manifest_hash,
5};
6use crate::nexus_id::{NexusFileId, NexusModId};
7use crate::profile::{EnabledMod, LoadOrderLock, LockReason, Profile};
8
9/// Canonical `mod_id` derivation for a Wabbajack archive entry.
10///
11/// Used by **both** the scanner and the Wabbajack installer so that a
12/// profile installed via `modde install wabbajack` and the same modlist
13/// re-scanned via `modde scan --manifest` produce identical `mod_id`
14/// strings — otherwise retroactive-lock flows would create duplicates
15/// rather than matching existing mods.
16///
17/// - Nexus-sourced archives: `nexus_{game_domain}_{mod_id}_{file_id}`
18/// - Everything else:        `wj_{archive_hash}`
19#[must_use]
20pub fn archive_mod_id(archive: &ArchiveEntry) -> String {
21    if let Some(ArchiveState::NexusDownloader {
22        game_name,
23        mod_id,
24        file_id,
25    }) = archive.state.as_ref()
26    {
27        format!("nexus_{game_name}_{mod_id}_{file_id}")
28    } else {
29        format!("wj_{}", archive.hash)
30    }
31}
32
33/// A mod discovered by matching a Wabbajack manifest against files on disk.
34pub struct ManifestMatch {
35    /// Stable unique ID based on Nexus identity or archive hash.
36    pub mod_id: String,
37    /// Human-readable name (from archive filename, cleaned).
38    pub display_name: String,
39    /// Original archive filename.
40    pub archive_name: String,
41    pub archive_hash: u64,
42    pub total_files: usize,
43    pub present_files: usize,
44    pub confidence: f32,
45    pub nexus_mod_id: Option<NexusModId>,
46    pub nexus_file_id: Option<NexusFileId>,
47    pub nexus_game_domain: Option<String>,
48    /// Game-relative file paths that this archive covers on disk (lowercased).
49    /// Used for correlation with filesystem-discovered mods.
50    pub covered_paths: Vec<String>,
51}
52
53/// Match files on disk against a Wabbajack manifest.
54///
55/// Groups directives by their source `archive_hash`, then checks what
56/// fraction of each archive's `to` paths exist in `on_disk_files`.
57/// Archives where the fraction meets or exceeds `threshold` are returned.
58///
59/// `on_disk_files` should contain lowercased, forward-slash relative paths
60/// from the game install root.
61#[must_use]
62pub fn match_wabbajack_manifest(
63    manifest: &WabbajackManifest,
64    on_disk_files: &HashSet<String>,
65    threshold: f32,
66) -> Vec<ManifestMatch> {
67    let directives = manifest.install_directives();
68
69    // Group directives by archive_hash → list of game-relative paths.
70    // Also extract the MO2 mod name from the `mods/<Name>/...` prefix.
71    let mut archive_files: HashMap<u64, Vec<String>> = HashMap::new();
72    let mut archive_mod_names: HashMap<u64, String> = HashMap::new();
73
74    for d in &directives {
75        match d {
76            InstallDirective::FromArchive {
77                archive_hash, to, ..
78            }
79            | InstallDirective::PatchedFromArchive {
80                archive_hash, to, ..
81            } => {
82                let normalized = to.replace('\\', "/");
83
84                // Extract the MO2 mod name before lowercasing (preserves casing).
85                if archive_mod_names.get(archive_hash).is_none()
86                    && let Some(name) = extract_mo2_mod_name(&normalized)
87                {
88                    archive_mod_names.insert(*archive_hash, name);
89                }
90
91                // Strip prefix and lowercase for matching.
92                let game_relative = strip_mo2_prefix(&normalized.to_lowercase());
93                archive_files
94                    .entry(*archive_hash)
95                    .or_default()
96                    .push(game_relative);
97            }
98            _ => {}
99        }
100    }
101
102    // Build archive hash → ArchiveEntry lookup for metadata.
103    let archive_map: HashMap<u64, &crate::manifest::wabbajack::ArchiveEntry> =
104        manifest.archives.iter().map(|a| (a.hash, a)).collect();
105
106    let mut results = Vec::new();
107
108    for (hash, files) in &archive_files {
109        let total = files.len();
110        if total == 0 {
111            continue;
112        }
113
114        let present_paths: Vec<String> = files
115            .iter()
116            .filter(|path| on_disk_files.contains(path.as_str()))
117            .cloned()
118            .collect();
119        let present = present_paths.len();
120
121        let fraction = present as f32 / total as f32;
122        if fraction < threshold {
123            continue;
124        }
125
126        let archive = archive_map.get(hash);
127        let archive_name = archive.map_or_else(|| format!("unknown_{hash}"), |a| a.name.clone());
128
129        // Display name: prefer cleaned archive filename (unique per archive).
130        let display_name = clean_archive_name(&archive_name);
131
132        let (nexus_mod_id, nexus_file_id, nexus_game_domain) = archive
133            .and_then(|a| a.state.as_ref())
134            .map_or((None, None, None), |state| match state {
135                ArchiveState::NexusDownloader {
136                    game_name,
137                    mod_id,
138                    file_id,
139                } => (Some(*mod_id), Some(*file_id), Some(game_name.clone())),
140                _ => (None, None, None),
141            });
142
143        // Canonical mod_id — must match `archive_mod_id` exactly so Wabbajack
144        // installs + retroactive scans dedup correctly.
145        let mod_id = match archive {
146            Some(a) => archive_mod_id(a),
147            None => format!("wj_{hash}"),
148        };
149
150        results.push(ManifestMatch {
151            mod_id,
152            display_name,
153            archive_name,
154            archive_hash: *hash,
155            total_files: total,
156            present_files: present,
157            confidence: fraction,
158            nexus_mod_id,
159            nexus_file_id,
160            nexus_game_domain,
161            covered_paths: present_paths,
162        });
163    }
164
165    // Sort by display_name for readability.
166    results.sort_by(|a, b| {
167        a.display_name
168            .to_lowercase()
169            .cmp(&b.display_name.to_lowercase())
170    });
171    results
172}
173
174/// Convert a `ManifestMatch` into an `EnabledMod` for database storage.
175#[must_use]
176pub fn manifest_match_to_enabled(m: &ManifestMatch) -> EnabledMod {
177    EnabledMod {
178        mod_id: m.mod_id.clone(),
179        display_name: Some(m.display_name.clone()),
180        enabled: true,
181        version: None,
182        fomod_config: None,
183        nexus_mod_id: m.nexus_mod_id,
184        nexus_file_id: m.nexus_file_id,
185        nexus_game_domain: m.nexus_game_domain.clone(),
186        installed_timestamp: Some(
187            std::time::SystemTime::now()
188                .duration_since(std::time::UNIX_EPOCH)
189                .unwrap_or_default()
190                .as_secs() as i64,
191        ),
192        ..Default::default()
193    }
194}
195
196/// Extract the MO2 mod name from a directive path.
197///
198/// Paths like `mods/Immersive Healing/archive/pc/mod/ImmersiveHealing.archive`
199/// yield `"Immersive Healing"`.
200fn extract_mo2_mod_name(path: &str) -> Option<String> {
201    let rest = path.strip_prefix("mods/")?;
202    let end = rest.find('/')?;
203    let name = &rest[..end];
204    if name.is_empty() {
205        return None;
206    }
207    Some(name.to_string())
208}
209
210/// Strip MO2 staging prefix from a path.
211///
212/// `mods/<mod_name>/<game_relative_path>` → `<game_relative_path>`.
213/// Non-mod paths (e.g., MO2 executables) are returned as-is.
214fn strip_mo2_prefix(path: &str) -> String {
215    if let Some(rest) = path.strip_prefix("mods/")
216        && let Some(idx) = rest.find('/')
217    {
218        return rest[idx + 1..].to_string();
219    }
220    path.to_string()
221}
222
223/// Clean an archive filename into a display name.
224///
225/// `ImmersiveHealing-26281-3-1-3-1772288704.zip` → `ImmersiveHealing`.
226/// Strips the Nexus suffix pattern (mod_id-version-timestamp.ext).
227fn clean_archive_name(name: &str) -> String {
228    // Strip extension.
229    let stem = name.rsplit_once('.').map_or(name, |(s, _)| s);
230    // Nexus filenames: "ModName-modid-version-timestamp". Strip from first `-{digits}`.
231    if let Some(idx) = stem
232        .find('-')
233        .filter(|&i| stem[i + 1..].starts_with(|c: char| c.is_ascii_digit()))
234    {
235        stem[..idx].replace('_', " ")
236    } else {
237        stem.replace('_', " ")
238    }
239}
240
241/// Compute the canonical mod order from a Wabbajack manifest's install
242/// directives.
243///
244/// `WabbajackManifest.archives` is an unordered JSON array — not a load
245/// order. The *directive* list, however, is the sequence Wabbajack applies
246/// on install, so the first-appearance order of each archive in the
247/// directives is the closest reproducible approximation of "load order".
248///
249/// Returns a `Vec<String>` of canonical `mod_id`s (as produced by
250/// [`archive_mod_id`]) in the order the corresponding archives first
251/// appear in the install directives. Archives that never appear in a
252/// [`InstallDirective::FromArchive`] / [`InstallDirective::PatchedFromArchive`]
253/// are omitted.
254#[must_use]
255pub fn manifest_directive_order(manifest: &WabbajackManifest) -> Vec<String> {
256    let archive_by_hash: HashMap<u64, &ArchiveEntry> =
257        manifest.archives.iter().map(|a| (a.hash, a)).collect();
258
259    let mut seen: HashSet<u64> = HashSet::new();
260    let mut order: Vec<String> = Vec::new();
261    for d in manifest.install_directives() {
262        let hash = match d {
263            InstallDirective::FromArchive { archive_hash, .. }
264            | InstallDirective::PatchedFromArchive { archive_hash, .. } => archive_hash,
265            _ => continue,
266        };
267        if !seen.insert(hash) {
268            continue;
269        }
270        if let Some(archive) = archive_by_hash.get(&hash) {
271            order.push(archive_mod_id(archive));
272        }
273    }
274    order
275}
276
277/// Report from [`apply_wabbajack_lock`] — what the in-place reorder did.
278#[derive(Debug, Clone, PartialEq, Eq)]
279pub struct WabbajackLockApplied {
280    /// `manifest_hash` recorded on the new lock. Matches
281    /// `ProfileSource::Wabbajack { manifest_hash }` on installs.
282    pub manifest_hash: String,
283    /// Number of mods whose `mod_id` is present in the manifest order
284    /// (these end up at the front of the mod list).
285    pub matched: usize,
286    /// Number of pre-existing profile mods not mentioned by the
287    /// manifest (these are appended after, preserving relative order).
288    pub unmatched: usize,
289    /// Whether the profile already carried a lock that was overwritten.
290    pub replaced_existing_lock: bool,
291}
292
293/// Reorder `profile.mods` to follow the manifest's install-directive
294/// order and stamp a `LockReason::Wabbajack` lock onto the profile.
295///
296/// This is the pure helper that powers `modde scan --manifest` and is
297/// the recommended way to retroactively lock an existing profile to a
298/// Wabbajack modlist. Extracted from `scan.rs` so it can be unit-tested
299/// without touching the filesystem scanner.
300///
301/// Invariants:
302///
303/// 1. **Mod count is preserved** — no mod is ever dropped. Matched mods
304///    move to the front in manifest order; unmatched mods retain their
305///    original relative order and are appended after.
306/// 2. **Matched mods are sorted by first-appearance in install
307///    directives** — see [`manifest_directive_order`] for the semantic.
308/// 3. **`profile.load_order_lock` is overwritten** — any prior lock
309///    (including a stale Wabbajack or Manual lock) is replaced. The
310///    return value's `replaced_existing_lock` field lets callers surface
311///    this to the user.
312pub fn apply_wabbajack_lock(
313    profile: &mut Profile,
314    manifest: &WabbajackManifest,
315) -> WabbajackLockApplied {
316    let manifest_order = manifest_directive_order(manifest);
317    let manifest_rank: HashMap<String, usize> = manifest_order
318        .iter()
319        .enumerate()
320        .map(|(i, mid)| (mid.clone(), i))
321        .collect();
322
323    // Stable partition: matched first (in manifest order), unmatched
324    // after (original relative order preserved).
325    let (mut matched, unmatched): (Vec<EnabledMod>, Vec<EnabledMod>) =
326        std::mem::take(&mut profile.mods)
327            .into_iter()
328            .partition(|m| manifest_rank.contains_key(&m.mod_id));
329
330    matched.sort_by_key(|m| manifest_rank.get(&m.mod_id).copied().unwrap_or(usize::MAX));
331
332    let matched_count = matched.len();
333    let unmatched_count = unmatched.len();
334    profile.mods = matched;
335    profile.mods.extend(unmatched);
336
337    let manifest_hash = compute_manifest_hash(manifest);
338    let replaced_existing_lock = profile.load_order_lock.is_some();
339    profile.load_order_lock = Some(LoadOrderLock::now(LockReason::Wabbajack {
340        manifest_hash: manifest_hash.clone(),
341    }));
342
343    WabbajackLockApplied {
344        manifest_hash,
345        matched: matched_count,
346        unmatched: unmatched_count,
347        replaced_existing_lock,
348    }
349}
350
351/// The filesystem footprint of a mod discovered by a game-specific
352/// filesystem scanner.
353///
354/// Game scanners produce `mod_ids` in schemes like `cet/<name>`,
355/// `archive/<stem>`, etc. To correlate those rows against a Wabbajack
356/// manifest's install directives, we need to know what portion of the
357/// game directory each mod owns. That's what this enum expresses.
358///
359/// - [`ModFootprint::Directory`] — the mod owns everything under a
360///   subtree of the game install (e.g. `bin/x64/plugins/cyber_engine_tweaks/mods/<name>/`).
361/// - [`ModFootprint::File`] — the mod *is* a single file (e.g. a
362///   loose `.archive` under `archive/pc/mod/`).
363///
364/// Paths are lowercased, use forward slashes, and (for `Directory`)
365/// end with a trailing `/`. This matches the conventions used by
366/// [`dir_prefixes`](crate::scanner) and the manifest-covered-dirs set
367/// built in `modde-cli::commands::scan`.
368#[derive(Debug, Clone, PartialEq, Eq)]
369pub enum ModFootprint {
370    /// A directory subtree owned by the mod. Compared against the set of
371    /// directories the manifest writes into.
372    Directory(String),
373    /// A single file owned by the mod. Compared against the set of
374    /// `To` paths in the manifest's install directives.
375    File(String),
376}
377
378/// Result of [`detect_stale_duplicates`] — a partition of a profile's
379/// filesystem-scanner rows into "covered by the manifest" (leaked
380/// duplicates) and "not covered" (genuine additions).
381///
382/// `mod_ids` whose footprint cannot be determined by the supplied
383/// `mod_id_to_footprint` closure (typically `nexus_*`, `wj_*`, or any
384/// non-filesystem-scheme row) are **not** included in either list —
385/// they're skipped silently because they aren't candidates for this
386/// kind of dedup.
387#[derive(Debug, Clone, Default, PartialEq, Eq)]
388pub struct DuplicateReport {
389    /// Filesystem-scanner `mod_ids` whose footprint is covered by the
390    /// manifest. These are safe to delete from the profile: a
391    /// manifest-authored row (usually `nexus_*`) already deploys the
392    /// same files under a different ID.
393    pub leaked: Vec<String>,
394    /// Filesystem-scanner `mod_ids` whose footprint is **not** covered
395    /// by the manifest. These are genuine additions the user made on
396    /// top of the Wabbajack modlist and must be preserved.
397    pub genuine: Vec<String>,
398}
399
400/// Classify a profile's filesystem-scanner rows against a Wabbajack
401/// manifest into "leaked duplicates" and "genuine additions".
402///
403/// This is the pure helper that powers `modde profile dedup` and the
404/// `--prune-duplicates` flag on `modde scan`. See
405/// `/home/can/.claude/plans/greedy-shimmying-pine.md` and the companion
406/// discussion in `docs/` (if present) for the design rationale.
407///
408/// The `mod_id_to_footprint` closure is the game-specific bridge: it
409/// maps a filesystem-scanner `mod_id` (e.g. `cet/ImmersiveHealing`) back
410/// to the directory or file the mod owns in the game install. For
411/// Cyberpunk 2077 this is `modde_games::cyberpunk::scanner::mod_id_footprint`.
412/// Profiles
413/// spanning multiple games aren't supported — each profile is tied to
414/// a single game via `profile.game_id`, so callers wire up a
415/// per-game closure.
416///
417/// Classification rules:
418///
419/// 1. If the closure returns `None` for a `mod_id`, the row is **not a
420///    candidate** — it's skipped silently. `nexus_*` and `wj_*` rows
421///    are manifest-authored and shouldn't be classified as duplicates
422///    of themselves.
423/// 2. If the footprint is [`ModFootprint::Directory`] and the manifest
424///    writes any file under that directory → **LEAKED** (the nexus
425///    archive that deployed those files is already tracked under its
426///    `nexus_*` ID).
427/// 3. If the footprint is [`ModFootprint::File`] and the exact file
428///    path appears in the manifest's install directives → **LEAKED**.
429/// 4. Otherwise → **GENUINE**: the user added this mod on top of the
430///    Wabbajack and it must not be deleted.
431///
432/// Case and slash-normalization: paths are lowercased and
433/// forward-slashed internally, so callers don't need to pre-normalize.
434///
435/// Complexity: O(D × A + M) where D is manifest directive count,
436/// A is average path depth, and M is profile mod count. For a typical
437/// CP2077 modlist (≈7k directives, ≈700 mods) this runs in well under
438/// a millisecond.
439pub fn detect_stale_duplicates<F>(
440    profile: &Profile,
441    manifest: &WabbajackManifest,
442    mod_id_to_footprint: F,
443) -> DuplicateReport
444where
445    F: Fn(&str) -> Option<ModFootprint>,
446{
447    // Build the manifest's covered file set + covered directory set
448    // from its install directives. Only `FromArchive` /
449    // `PatchedFromArchive` directives are "physical" file placements
450    // we can compare against — `CreateBSA` and `InlineFile` don't map
451    // cleanly to a single on-disk file at scan time.
452    //
453    // Wabbajack `To` paths are MO2-staged: they look like
454    // `mods\<MO2 Mod Name>\<game-relative-path>`. We must strip the
455    // `mods/<name>/` prefix before comparing against game-relative
456    // footprints — this mirrors what `match_wabbajack_manifest` does
457    // via `strip_mo2_prefix`. Without this step, every directive path
458    // in a CP2077 modlist begins with `mods/<big mod name>/`, which
459    // never overlaps with a `bin/x64/...` or `archive/pc/mod/...`
460    // footprint, and `detect_stale_duplicates` silently classifies
461    // every row as GENUINE. See profile 3077 for the failure mode.
462    let mut covered_files: HashSet<String> = HashSet::new();
463    for d in manifest.install_directives() {
464        let to = match d {
465            InstallDirective::FromArchive { to, .. }
466            | InstallDirective::PatchedFromArchive { to, .. } => to,
467            _ => continue,
468        };
469        let normalized = to.replace('\\', "/").to_lowercase();
470        covered_files.insert(strip_mo2_prefix(&normalized));
471    }
472
473    // Expand each covered file into its ancestor-directory prefixes so
474    // the Directory footprint check becomes a single O(1) HashSet lookup.
475    let mut covered_dirs: HashSet<String> = HashSet::new();
476    for f in &covered_files {
477        let mut cur = f.as_str();
478        while let Some(idx) = cur.rfind('/') {
479            cur = &cur[..idx];
480            covered_dirs.insert(format!("{cur}/"));
481        }
482    }
483
484    let mut report = DuplicateReport::default();
485    for m in &profile.mods {
486        let footprint = match mod_id_to_footprint(&m.mod_id) {
487            Some(fp) => fp,
488            None => continue, // Not a filesystem-scanner row; skip.
489        };
490        let covered = match &footprint {
491            ModFootprint::Directory(d) => covered_dirs.contains(d),
492            ModFootprint::File(f) => covered_files.contains(f),
493        };
494        if covered {
495            report.leaked.push(m.mod_id.clone());
496        } else {
497            report.genuine.push(m.mod_id.clone());
498        }
499    }
500    report
501}
502
503/// Convert a filesystem-discovered mod into an `EnabledMod`.
504pub fn discovered_to_enabled(
505    mod_id: &str,
506    display_name: &str,
507    version: Option<&str>,
508    _confidence: f32,
509) -> EnabledMod {
510    EnabledMod {
511        mod_id: mod_id.to_string(),
512        display_name: Some(display_name.to_string()),
513        enabled: true,
514        version: version.map(String::from),
515        ..Default::default()
516    }
517}