Skip to main content

fallow_core/
changed_files.rs

1//! Git-aware "changed files" filtering shared between fallow-cli and fallow-lsp.
2//!
3//! Provides:
4//! - [`validate_git_ref`] for input validation at trust boundaries.
5//! - [`ChangedFilesError`] / [`try_get_changed_files`] / [`get_changed_files`]
6//!   for resolving a git ref into the set of changed files.
7//! - [`filter_results_by_changed_files`] for narrowing an [`AnalysisResults`]
8//!   to issues in those files.
9//! - [`filter_duplication_by_changed_files`] for narrowing a
10//!   [`DuplicationReport`] to clone groups touching at least one changed file.
11//!
12//! Both filters intentionally exclude dependency-level issues (unused deps,
13//! type-only deps, test-only deps) since "unused dependency" is a function of
14//! the entire import graph and can't be attributed to individual changed files.
15
16use std::path::{Path, PathBuf};
17use std::process::Output;
18use std::sync::OnceLock;
19
20use rustc_hash::{FxHashMap, FxHashSet};
21
22use crate::duplicates::{DuplicationReport, DuplicationStats, families};
23use crate::results::AnalysisResults;
24
25/// Function pointer signature used by `set_spawn_hook` to intercept the
26/// short-running `git rev-parse` / `git diff` / `git ls-files` subprocesses
27/// this module spawns. Lets the CLI route those git children through its
28/// `ScopedChild` registry so a SIGINT delivered to the parent during
29/// watch mode (or any analysis) reaps them instead of letting them run
30/// to completion. See `crates/cli/src/signal/` and issue #477.
31pub type ChangedFilesSpawnHook = fn(&mut std::process::Command) -> std::io::Result<Output>;
32
33static SPAWN_HOOK: OnceLock<ChangedFilesSpawnHook> = OnceLock::new();
34
35/// Install a spawn-hook for this module's git subprocesses. Idempotent;
36/// subsequent calls are no-ops. Called once from the CLI's `main()` so
37/// long-running watch sessions reap pending git children on Ctrl+C.
38/// Defaults to `Command::output` when not set; the function-pointer
39/// indirection costs nothing for embedders and tests that don't install
40/// a hook.
41pub fn set_spawn_hook(hook: ChangedFilesSpawnHook) {
42    let _ = SPAWN_HOOK.set(hook);
43}
44
45fn spawn_output(command: &mut std::process::Command) -> std::io::Result<Output> {
46    if let Some(hook) = SPAWN_HOOK.get() {
47        hook(command)
48    } else {
49        command.output()
50    }
51}
52
53/// Validate a user-supplied git ref before passing it to `git diff`.
54///
55/// Rejects empty strings, refs starting with `-` (which `git` would interpret
56/// as an option flag), and characters outside the safe allowlist for branch
57/// names, tags, SHAs, and reflog expressions (`HEAD~N`, `HEAD@{...}`).
58///
59/// Inside `@{...}` braces, colons and spaces are allowed so reflog timestamps
60/// like `HEAD@{2025-01-01}` and `HEAD@{1 week ago}` round-trip.
61///
62/// Used by both the CLI (clap value parser) and the LSP (initializationOptions
63/// trust boundary) to fail fast with a readable error rather than handing a
64/// malformed ref to git.
65pub fn validate_git_ref(s: &str) -> Result<&str, String> {
66    if s.is_empty() {
67        return Err("git ref cannot be empty".to_string());
68    }
69    if s.starts_with('-') {
70        return Err("git ref cannot start with '-'".to_string());
71    }
72    let mut in_braces = false;
73    for c in s.chars() {
74        match c {
75            '{' => in_braces = true,
76            '}' => in_braces = false,
77            ':' | ' ' if in_braces => {}
78            c if c.is_ascii_alphanumeric()
79                || matches!(c, '.' | '_' | '-' | '/' | '~' | '^' | '@' | '{' | '}') => {}
80            _ => return Err(format!("git ref contains disallowed character: '{c}'")),
81        }
82    }
83    if in_braces {
84        return Err("git ref has unclosed '{'".to_string());
85    }
86    Ok(s)
87}
88
89/// Classification of a `git diff` failure, so callers can pick their own
90/// wording (soft warning vs hard error) without re-parsing stderr.
91#[derive(Debug)]
92pub enum ChangedFilesError {
93    /// Git ref failed validation before invoking `git`.
94    InvalidRef(String),
95    /// `git` binary not found / not executable.
96    GitMissing(String),
97    /// Command ran but the directory isn't a git repository.
98    NotARepository,
99    /// Command ran but the ref is invalid / another git error.
100    GitFailed(String),
101}
102
103impl ChangedFilesError {
104    /// Human-readable clause suitable for embedding in an error message.
105    /// Does not include the flag name (e.g. "--changed-since") so callers can
106    /// prepend their own context.
107    pub fn describe(&self) -> String {
108        match self {
109            Self::InvalidRef(e) => format!("invalid git ref: {e}"),
110            Self::GitMissing(e) => format!("failed to run git: {e}"),
111            Self::NotARepository => "not a git repository".to_owned(),
112            Self::GitFailed(stderr) => augment_git_failed(stderr),
113        }
114    }
115}
116
117/// Enrich a raw `git diff` stderr with actionable hints when the failure mode
118/// is recognizable. Today: shallow-clone misses (`actions/checkout@v4` defaults
119/// to `fetch-depth: 1`, GitLab CI to `GIT_DEPTH: 50`), where the baseline ref
120/// predates the fetch boundary. Bare git stderr is famously cryptic; a hint
121/// here is much more useful than a docs link the reader has to chase.
122fn augment_git_failed(stderr: &str) -> String {
123    let lower = stderr.to_ascii_lowercase();
124    if lower.contains("not a valid object name")
125        || lower.contains("unknown revision")
126        || lower.contains("ambiguous argument")
127    {
128        format!(
129            "{stderr} (shallow clone? try `git fetch --unshallow`, or set `fetch-depth: 0` on actions/checkout / `GIT_DEPTH: 0` in GitLab CI)"
130        )
131    } else {
132        stderr.to_owned()
133    }
134}
135
136/// Resolve the canonical git toplevel for `cwd`.
137///
138/// Runs `git rev-parse --show-toplevel`, which is git's own answer to "where
139/// does this repository live?". The returned path is canonicalized so it
140/// agrees with paths produced by `fs::canonicalize` elsewhere on macOS
141/// (`/tmp` -> `/private/tmp`) and Windows (8.3 short paths).
142///
143/// Used by `try_get_changed_files` to produce changed-file paths whose
144/// absolute form matches what the analysis pipeline emits, regardless of
145/// whether the caller's `cwd` is the repo root or a subdirectory of it.
146pub fn resolve_git_toplevel(cwd: &Path) -> Result<PathBuf, ChangedFilesError> {
147    let output = spawn_output(&mut git_command(cwd, &["rev-parse", "--show-toplevel"]))
148        .map_err(|e| ChangedFilesError::GitMissing(e.to_string()))?;
149
150    if !output.status.success() {
151        let stderr = String::from_utf8_lossy(&output.stderr);
152        return Err(if stderr.contains("not a git repository") {
153            ChangedFilesError::NotARepository
154        } else {
155            ChangedFilesError::GitFailed(stderr.trim().to_owned())
156        });
157    }
158
159    let raw = String::from_utf8_lossy(&output.stdout);
160    let trimmed = raw.trim();
161    if trimmed.is_empty() {
162        return Err(ChangedFilesError::GitFailed(
163            "git rev-parse --show-toplevel returned empty output".to_owned(),
164        ));
165    }
166
167    let path = PathBuf::from(trimmed);
168    Ok(dunce::canonicalize(&path).unwrap_or(path))
169}
170
171fn collect_git_paths(
172    cwd: &Path,
173    toplevel: &Path,
174    args: &[&str],
175) -> Result<FxHashSet<PathBuf>, ChangedFilesError> {
176    let output = spawn_output(&mut git_command(cwd, args))
177        .map_err(|e| ChangedFilesError::GitMissing(e.to_string()))?;
178
179    if !output.status.success() {
180        let stderr = String::from_utf8_lossy(&output.stderr);
181        return Err(if stderr.contains("not a git repository") {
182            ChangedFilesError::NotARepository
183        } else {
184            ChangedFilesError::GitFailed(stderr.trim().to_owned())
185        });
186    }
187
188    #[cfg(windows)]
189    let normalise_segment = |line: &str| line.replace('/', "\\");
190    #[cfg(not(windows))]
191    let normalise_segment = |line: &str| line.to_owned();
192
193    let files: FxHashSet<PathBuf> = String::from_utf8_lossy(&output.stdout)
194        .lines()
195        .filter(|line| !line.is_empty())
196        .map(|line| toplevel.join(normalise_segment(line)))
197        .collect();
198
199    Ok(files)
200}
201
202fn git_command(cwd: &Path, args: &[&str]) -> std::process::Command {
203    let mut command = crate::spawn::git();
204    command.args(args).current_dir(cwd);
205    command
206}
207
208/// Get files changed since a git ref. Returns `Err` (with details) when the
209/// git invocation itself failed, so callers can choose between warn-and-ignore
210/// and hard-error behavior.
211///
212/// Includes both:
213/// - committed changes from the merge-base range `git_ref...HEAD`
214/// - tracked staged/unstaged changes from `HEAD` to the current worktree
215/// - untracked files not ignored by Git
216///
217/// This keeps `--changed-since` useful for local validation instead of only
218/// reflecting the last committed `HEAD`.
219///
220/// All paths in the returned set are absolute and rooted at the canonical
221/// git toplevel, not at `root`. This matters when the LSP / CLI is invoked
222/// from a subdirectory of the repository (e.g., a Turborepo workspace at
223/// `apps/web`): `git diff` emits root-relative paths, and we need to join
224/// them against the actual repo root rather than the caller's cwd.
225pub fn try_get_changed_files(
226    root: &Path,
227    git_ref: &str,
228) -> Result<FxHashSet<PathBuf>, ChangedFilesError> {
229    validate_git_ref(git_ref).map_err(ChangedFilesError::InvalidRef)?;
230    let toplevel = resolve_git_toplevel(root)?;
231    try_get_changed_files_with_toplevel(root, &toplevel, git_ref)
232}
233
234/// Like [`try_get_changed_files`], but takes a pre-resolved canonical
235/// `toplevel` so callers (the LSP) can cache it across runs and avoid the
236/// extra `git rev-parse --show-toplevel` subprocess on every save.
237///
238/// `toplevel` MUST be the canonical git toplevel for `cwd`; passing anything
239/// else produces incorrect changed-file paths. The CLI does not call this
240/// directly: it uses [`try_get_changed_files`] which resolves on each call.
241pub fn try_get_changed_files_with_toplevel(
242    cwd: &Path,
243    toplevel: &Path,
244    git_ref: &str,
245) -> Result<FxHashSet<PathBuf>, ChangedFilesError> {
246    validate_git_ref(git_ref).map_err(ChangedFilesError::InvalidRef)?;
247
248    let mut files = collect_git_paths(
249        cwd,
250        toplevel,
251        &[
252            "diff",
253            "--name-only",
254            "--end-of-options",
255            &format!("{git_ref}...HEAD"),
256        ],
257    )?;
258    files.extend(collect_git_paths(
259        cwd,
260        toplevel,
261        &["diff", "--name-only", "HEAD"],
262    )?);
263    files.extend(collect_git_paths(
264        cwd,
265        toplevel,
266        &["ls-files", "--full-name", "--others", "--exclude-standard"],
267    )?);
268    Ok(files)
269}
270
271/// Get the zero-context unified diff of the merge-base range `git_ref...HEAD`,
272/// with paths relative to `root`, for the line-level security gate (issue #886).
273///
274/// Unlike [`get_changed_files`] (which falls back to full scope on failure), this
275/// returns `Err` when the git invocation itself fails (missing/unfetched ref,
276/// shallow clone, not a repo). The security gate hard-errors on `Err` rather than
277/// emitting a green gate: a diff it could not compute must NEVER read as "no new
278/// sinks". `--relative` emits paths relative to `root` (rewriting the prefix to
279/// match the keys `DiffIndex` is queried with, `relative_to_diff_path(finding,
280/// root)`) and, when fallow runs in a monorepo subpackage, omits changes outside
281/// `root` from the output entirely; a sibling-package edit `git diff --relative`
282/// did emit would carry a `../...` path that `relative_to_diff_path` cannot strip
283/// (returns `None`), which is harmless because no findings exist for files
284/// outside the analyzed `root`. An empty diff (no changes / docs-only) is
285/// `Ok("")`, a clean pass, not an error.
286pub fn try_get_changed_diff(root: &Path, git_ref: &str) -> Result<String, ChangedFilesError> {
287    validate_git_ref(git_ref).map_err(ChangedFilesError::InvalidRef)?;
288    let output = spawn_output(&mut git_command(
289        root,
290        &[
291            "diff",
292            "--relative",
293            "--unified=0",
294            "--end-of-options",
295            &format!("{git_ref}...HEAD"),
296        ],
297    ))
298    .map_err(|e| ChangedFilesError::GitMissing(e.to_string()))?;
299
300    if !output.status.success() {
301        let stderr = String::from_utf8_lossy(&output.stderr);
302        return Err(if stderr.contains("not a git repository") {
303            ChangedFilesError::NotARepository
304        } else {
305            ChangedFilesError::GitFailed(stderr.trim().to_owned())
306        });
307    }
308
309    Ok(String::from_utf8_lossy(&output.stdout).into_owned())
310}
311
312/// Get files changed since a git ref. Returns `None` on git failure after
313/// printing a warning to stderr. Used by `--changed-since` and `--file`, where
314/// a failure falls back to full-scope analysis.
315#[expect(
316    clippy::print_stderr,
317    reason = "intentional user-facing warning for the CLI's --changed-since fallback path; LSP callers use try_get_changed_files instead"
318)]
319pub fn get_changed_files(root: &Path, git_ref: &str) -> Option<FxHashSet<PathBuf>> {
320    match try_get_changed_files(root, git_ref) {
321        Ok(files) => Some(files),
322        Err(ChangedFilesError::InvalidRef(e)) => {
323            eprintln!("Warning: --changed-since ignored: invalid git ref: {e}");
324            None
325        }
326        Err(ChangedFilesError::GitMissing(e)) => {
327            eprintln!("Warning: --changed-since ignored: failed to run git: {e}");
328            None
329        }
330        Err(ChangedFilesError::NotARepository) => {
331            eprintln!("Warning: --changed-since ignored: not a git repository");
332            None
333        }
334        Err(ChangedFilesError::GitFailed(stderr)) => {
335            eprintln!("Warning: --changed-since failed for ref '{git_ref}': {stderr}");
336            None
337        }
338    }
339}
340
341/// Filter `results` to only include issues whose source file is in
342/// `changed_files`.
343///
344/// Dependency-level issues (unused deps, dev deps, optional deps, type-only
345/// deps, test-only deps) are intentionally NOT filtered here. Unlike
346/// file-level issues, a dependency being "unused" is a function of the entire
347/// import graph and can't be attributed to individual changed source files.
348///
349/// This destructure is deliberately exhaustive: adding a field to
350/// `AnalysisResults` must fail compilation here so the author decides
351/// explicitly whether the new finding type is file-attributable (add a retain)
352/// or graph-global (bind with underscore and document why).
353#[expect(
354    clippy::implicit_hasher,
355    reason = "fallow standardizes on FxHashSet across the workspace"
356)]
357pub fn filter_results_by_changed_files(
358    results: &mut AnalysisResults,
359    changed_files: &FxHashSet<PathBuf>,
360) {
361    let AnalysisResults {
362        unused_files,
363        unused_exports,
364        unused_types,
365        private_type_leaks,
366        // Dependency-level issues are graph-global: "unused" is a function of
367        // the whole import graph and cannot be attributed to a changed file.
368        unused_dependencies: _unused_dependencies,
369        unused_dev_dependencies: _unused_dev_dependencies,
370        unused_optional_dependencies: _unused_optional_dependencies,
371        unused_enum_members,
372        unused_class_members,
373        unresolved_imports,
374        unlisted_dependencies,
375        duplicate_exports,
376        // Type-only and test-only dependency issues are graph-global for the
377        // same reason as the other dependency kinds above.
378        type_only_dependencies: _type_only_dependencies,
379        test_only_dependencies: _test_only_dependencies,
380        circular_dependencies,
381        re_export_cycles,
382        boundary_violations,
383        boundary_coverage_violations,
384        boundary_call_violations,
385        policy_violations,
386        stale_suppressions,
387        // Catalog entries are workspace-global: whether a catalog entry is
388        // unused depends on all workspace packages, not a single changed file.
389        unused_catalog_entries: _unused_catalog_entries,
390        empty_catalog_groups,
391        unresolved_catalog_references,
392        unused_dependency_overrides,
393        misconfigured_dependency_overrides,
394        // Non-finding fields: counts and metadata, not issue collections.
395        suppression_count: _suppression_count,
396        active_suppressions: _active_suppressions,
397        feature_flags: _feature_flags,
398        security_findings,
399        security_unresolved_edge_files: _security_unresolved_edge_files,
400        security_unresolved_callee_sites: _security_unresolved_callee_sites,
401        security_unresolved_callee_diagnostics,
402        // Export usages and entry-point summary are metadata, not issue
403        // collections; they are not changed-files filtered.
404        export_usages: _export_usages,
405        entry_point_summary: _entry_point_summary,
406    } = &mut *results;
407
408    let cf = normalize_changed_files_set(changed_files);
409    unused_files.retain(|f| contains_normalized(&cf, &f.file.path));
410    unused_exports.retain(|e| contains_normalized(&cf, &e.export.path));
411    unused_types.retain(|e| contains_normalized(&cf, &e.export.path));
412    private_type_leaks.retain(|e| contains_normalized(&cf, &e.leak.path));
413    unused_enum_members.retain(|m| contains_normalized(&cf, &m.member.path));
414    unused_class_members.retain(|m| contains_normalized(&cf, &m.member.path));
415    unresolved_imports.retain(|i| contains_normalized(&cf, &i.import.path));
416
417    unlisted_dependencies.retain(|d| {
418        d.dep
419            .imported_from
420            .iter()
421            .any(|s| contains_normalized(&cf, &s.path))
422    });
423
424    for dup in &mut *duplicate_exports {
425        dup.export
426            .locations
427            .retain(|loc| contains_normalized(&cf, &loc.path));
428    }
429    duplicate_exports.retain(|d| d.export.locations.len() >= 2);
430
431    circular_dependencies.retain(|c| c.cycle.files.iter().any(|f| contains_normalized(&cf, f)));
432
433    re_export_cycles.retain(|c| c.cycle.files.iter().any(|f| contains_normalized(&cf, f)));
434
435    boundary_violations.retain(|v| contains_normalized(&cf, &v.violation.from_path));
436    boundary_coverage_violations.retain(|v| contains_normalized(&cf, &v.violation.path));
437    boundary_call_violations.retain(|v| contains_normalized(&cf, &v.violation.path));
438    policy_violations.retain(|v| contains_normalized(&cf, &v.violation.path));
439
440    stale_suppressions.retain(|s| contains_normalized(&cf, &s.path));
441
442    security_findings.retain(|f| {
443        contains_normalized(&cf, &f.path)
444            || f.trace
445                .iter()
446                .any(|hop| contains_normalized(&cf, &hop.path))
447            || f.reachability.as_ref().is_some_and(|reachability| {
448                reachability
449                    .untrusted_source_trace
450                    .iter()
451                    .any(|hop| contains_normalized(&cf, &hop.path))
452            })
453    });
454    security_unresolved_callee_diagnostics.retain(|d| contains_normalized(&cf, &d.path));
455
456    unresolved_catalog_references.retain(|r| contains_normalized(&cf, &r.reference.path));
457    empty_catalog_groups.retain(|g| normalized_set_contains_path(&cf, &g.group.path));
458
459    unused_dependency_overrides.retain(|o| contains_normalized(&cf, &o.entry.path));
460    misconfigured_dependency_overrides.retain(|o| contains_normalized(&cf, &o.entry.path));
461}
462
463/// Pre-normalise a `changed_files` set through `dunce::simplified` so each
464/// per-entry comparison can normalise its lookup side and avoid the Windows
465/// `\\?\` verbatim-vs-non-verbatim mismatch. On POSIX `dunce::simplified` is
466/// a no-op, so this is identical to cloning the set.
467///
468/// Background: `try_get_changed_files` joins git-emitted segments onto the
469/// `dunce::canonicalize`d toplevel, so entries land in non-verbatim shape.
470/// Analysis-pipeline paths (clone instances, finding paths) inherit the
471/// shape of `opts.root`, which `validate_root` / discovery / cache lookups
472/// pre-canonicalise with `std::fs::canonicalize` in test fixtures and tools
473/// (which yields verbatim paths on Windows). Comparing the two sides byte
474/// for byte silently dropped every finding before this normalisation.
475fn normalize_changed_files_set(changed_files: &FxHashSet<PathBuf>) -> FxHashSet<PathBuf> {
476    changed_files
477        .iter()
478        .map(|p| dunce::simplified(p).to_path_buf())
479        .collect()
480}
481
482fn contains_normalized(normalized: &FxHashSet<PathBuf>, path: &Path) -> bool {
483    normalized.contains(dunce::simplified(path))
484}
485
486fn normalized_set_contains_path(normalized: &FxHashSet<PathBuf>, path: &Path) -> bool {
487    contains_normalized(normalized, path)
488        || (path.is_relative() && normalized.iter().any(|changed| changed.ends_with(path)))
489}
490
491/// Recompute duplication statistics after filtering.
492///
493/// Uses per-file line deduplication (matching `compute_stats` in
494/// `duplicates/detect.rs`) so overlapping clone instances don't inflate the
495/// duplicated line count.
496fn recompute_duplication_stats(report: &DuplicationReport) -> DuplicationStats {
497    let mut files_with_clones: FxHashSet<&Path> = FxHashSet::default();
498    let mut file_dup_lines: FxHashMap<&Path, FxHashSet<usize>> = FxHashMap::default();
499    let mut duplicated_tokens = 0_usize;
500    let mut clone_instances = 0_usize;
501
502    for group in &report.clone_groups {
503        for instance in &group.instances {
504            files_with_clones.insert(&instance.file);
505            clone_instances += 1;
506            let lines = file_dup_lines.entry(&instance.file).or_default();
507            for line in instance.start_line..=instance.end_line {
508                lines.insert(line);
509            }
510        }
511        duplicated_tokens += group.token_count * group.instances.len();
512    }
513
514    let duplicated_lines: usize = file_dup_lines.values().map(FxHashSet::len).sum();
515
516    DuplicationStats {
517        total_files: report.stats.total_files,
518        files_with_clones: files_with_clones.len(),
519        total_lines: report.stats.total_lines,
520        duplicated_lines,
521        total_tokens: report.stats.total_tokens,
522        duplicated_tokens,
523        clone_groups: report.clone_groups.len(),
524        clone_instances,
525        #[expect(
526            clippy::cast_precision_loss,
527            reason = "stat percentages are display-only; precision loss at usize::MAX line counts is acceptable"
528        )]
529        duplication_percentage: if report.stats.total_lines > 0 {
530            (duplicated_lines as f64 / report.stats.total_lines as f64) * 100.0
531        } else {
532            0.0
533        },
534        clone_groups_below_min_occurrences: report.stats.clone_groups_below_min_occurrences,
535    }
536}
537
538/// Filter a duplication report to only retain clone groups where at least one
539/// instance belongs to a changed file. Families, mirrored directories, and
540/// stats are rebuilt from the surviving groups so consumers see consistent,
541/// correctly-scoped numbers.
542#[expect(
543    clippy::implicit_hasher,
544    reason = "fallow standardizes on FxHashSet across the workspace"
545)]
546pub fn filter_duplication_by_changed_files(
547    report: &mut DuplicationReport,
548    changed_files: &FxHashSet<PathBuf>,
549    root: &Path,
550) {
551    let cf = normalize_changed_files_set(changed_files);
552    report.clone_groups.retain(|g| {
553        g.instances
554            .iter()
555            .any(|i| contains_normalized(&cf, &i.file))
556    });
557    report.clone_families = families::group_into_families(&report.clone_groups, root);
558    report.mirrored_directories =
559        families::detect_mirrored_directories(&report.clone_families, root);
560    report.stats = recompute_duplication_stats(report);
561}
562
563#[cfg(test)]
564mod tests {
565    use super::*;
566    use crate::duplicates::{CloneGroup, CloneInstance};
567    use crate::results::{
568        BoundaryViolation, CircularDependency, EmptyCatalogGroup, SecurityFinding,
569        SecurityFindingKind, SecurityUnresolvedCalleeDiagnostic, TraceHop, TraceHopRole,
570        UnusedExport, UnusedFile,
571    };
572    use fallow_types::extract::{SkippedSecurityCalleeExpressionKind, SkippedSecurityCalleeReason};
573    use fallow_types::output_dead_code::{
574        BoundaryViolationFinding, CircularDependencyFinding, EmptyCatalogGroupFinding,
575        UnusedExportFinding, UnusedFileFinding,
576    };
577    use fallow_types::results::{SecurityReachability, SecuritySeverity};
578
579    #[test]
580    fn changed_files_error_describe_variants() {
581        assert!(
582            ChangedFilesError::InvalidRef("bad".to_owned())
583                .describe()
584                .contains("invalid git ref")
585        );
586        assert!(
587            ChangedFilesError::GitMissing("oops".to_owned())
588                .describe()
589                .contains("oops")
590        );
591        assert_eq!(
592            ChangedFilesError::NotARepository.describe(),
593            "not a git repository"
594        );
595        assert!(
596            ChangedFilesError::GitFailed("bad ref".to_owned())
597                .describe()
598                .contains("bad ref")
599        );
600    }
601
602    #[test]
603    fn augment_git_failed_appends_shallow_clone_hint_for_unknown_revision() {
604        let stderr = "fatal: ambiguous argument 'fallow-baseline...HEAD': unknown revision or path not in the working tree.";
605        let described = ChangedFilesError::GitFailed(stderr.to_owned()).describe();
606        assert!(described.contains(stderr), "original stderr preserved");
607        assert!(
608            described.contains("shallow clone"),
609            "hint surfaced: {described}"
610        );
611        assert!(
612            described.contains("fetch-depth: 0") || described.contains("git fetch --unshallow"),
613            "hint actionable: {described}"
614        );
615    }
616
617    #[test]
618    fn augment_git_failed_passthrough_for_other_errors() {
619        let stderr = "fatal: refusing to merge unrelated histories";
620        let described = ChangedFilesError::GitFailed(stderr.to_owned()).describe();
621        assert_eq!(described, stderr);
622    }
623
624    #[test]
625    fn validate_git_ref_rejects_leading_dash() {
626        assert!(validate_git_ref("--upload-pack=evil").is_err());
627        assert!(validate_git_ref("-flag").is_err());
628    }
629
630    #[test]
631    fn validate_git_ref_accepts_baseline_tag() {
632        assert_eq!(
633            validate_git_ref("fallow-baseline").unwrap(),
634            "fallow-baseline"
635        );
636    }
637
638    #[test]
639    fn changed_files_filter_scopes_unresolved_callee_diagnostics() {
640        let mut results = AnalysisResults::default();
641        results
642            .security_unresolved_callee_diagnostics
643            .push(SecurityUnresolvedCalleeDiagnostic {
644                path: PathBuf::from("/repo/src/changed.ts"),
645                line: 4,
646                col: 0,
647                reason: SkippedSecurityCalleeReason::DynamicDispatch,
648                expression_kind: SkippedSecurityCalleeExpressionKind::Other,
649            });
650        results
651            .security_unresolved_callee_diagnostics
652            .push(SecurityUnresolvedCalleeDiagnostic {
653                path: PathBuf::from("/repo/src/unchanged.ts"),
654                line: 4,
655                col: 0,
656                reason: SkippedSecurityCalleeReason::ComputedMember,
657                expression_kind: SkippedSecurityCalleeExpressionKind::ComputedMemberExpression,
658            });
659
660        let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
661        changed.insert(PathBuf::from("/repo/src/changed.ts"));
662
663        filter_results_by_changed_files(&mut results, &changed);
664
665        assert_eq!(results.security_unresolved_callee_diagnostics.len(), 1);
666        assert_eq!(
667            results.security_unresolved_callee_diagnostics[0].path,
668            PathBuf::from("/repo/src/changed.ts")
669        );
670    }
671
672    #[test]
673    fn try_get_changed_files_rejects_invalid_ref() {
674        let err = try_get_changed_files(Path::new("/"), "--evil")
675            .expect_err("leading-dash ref must be rejected");
676        assert!(matches!(err, ChangedFilesError::InvalidRef(_)));
677        assert!(err.describe().contains("cannot start with"));
678    }
679
680    #[test]
681    fn validate_git_ref_rejects_option_like_ref() {
682        assert!(validate_git_ref("--output=/tmp/fallow-proof").is_err());
683    }
684
685    #[test]
686    fn validate_git_ref_allows_reflog_relative_date() {
687        assert!(validate_git_ref("HEAD@{1 week ago}").is_ok());
688    }
689
690    #[test]
691    fn try_get_changed_files_rejects_option_like_ref_before_git() {
692        let root = tempfile::tempdir().expect("create temp dir");
693        let proof_path = root.path().join("proof");
694
695        let result = try_get_changed_files(
696            root.path(),
697            &format!("--output={}", proof_path.to_string_lossy()),
698        );
699
700        assert!(matches!(result, Err(ChangedFilesError::InvalidRef(_))));
701        assert!(
702            !proof_path.exists(),
703            "invalid changedSince ref must not be passed through to git as an option"
704        );
705    }
706
707    #[test]
708    fn git_command_clears_parent_git_environment() {
709        let command = git_command(Path::new("."), &["status", "--short"]);
710        let overrides: Vec<_> = command.get_envs().collect();
711
712        for var in crate::git_env::AMBIENT_GIT_ENV_VARS {
713            assert!(
714                overrides
715                    .iter()
716                    .any(|(key, value)| key.to_str() == Some(*var) && value.is_none()),
717                "git helper must clear inherited {var}",
718            );
719        }
720    }
721
722    #[test]
723    fn filter_results_keeps_only_changed_files() {
724        let mut results = AnalysisResults::default();
725        results
726            .unused_files
727            .push(UnusedFileFinding::with_actions(UnusedFile {
728                path: "/a.ts".into(),
729            }));
730        results
731            .unused_files
732            .push(UnusedFileFinding::with_actions(UnusedFile {
733                path: "/b.ts".into(),
734            }));
735        results
736            .unused_exports
737            .push(UnusedExportFinding::with_actions(UnusedExport {
738                path: "/a.ts".into(),
739                export_name: "foo".into(),
740                is_type_only: false,
741                line: 1,
742                col: 0,
743                span_start: 0,
744                is_re_export: false,
745            }));
746
747        let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
748        changed.insert("/a.ts".into());
749
750        filter_results_by_changed_files(&mut results, &changed);
751
752        assert_eq!(results.unused_files.len(), 1);
753        assert_eq!(results.unused_files[0].file.path, PathBuf::from("/a.ts"));
754        assert_eq!(results.unused_exports.len(), 1);
755    }
756
757    #[test]
758    fn filter_results_preserves_dependency_level_issues() {
759        let mut results = AnalysisResults::default();
760        results.unused_dependencies.push(
761            fallow_types::output_dead_code::UnusedDependencyFinding::with_actions(
762                crate::results::UnusedDependency {
763                    package_name: "lodash".into(),
764                    location: crate::results::DependencyLocation::Dependencies,
765                    path: "/pkg.json".into(),
766                    line: 3,
767                    used_in_workspaces: Vec::new(),
768                },
769            ),
770        );
771
772        let changed: FxHashSet<PathBuf> = FxHashSet::default();
773        filter_results_by_changed_files(&mut results, &changed);
774
775        assert_eq!(results.unused_dependencies.len(), 1);
776    }
777
778    #[test]
779    fn filter_results_keeps_circular_dep_when_any_file_changed() {
780        let mut results = AnalysisResults::default();
781        results
782            .circular_dependencies
783            .push(CircularDependencyFinding::with_actions(
784                CircularDependency {
785                    files: vec!["/a.ts".into(), "/b.ts".into()],
786                    length: 2,
787                    line: 1,
788                    col: 0,
789                    edges: Vec::new(),
790                    is_cross_package: false,
791                },
792            ));
793
794        let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
795        changed.insert("/b.ts".into());
796
797        filter_results_by_changed_files(&mut results, &changed);
798        assert_eq!(results.circular_dependencies.len(), 1);
799    }
800
801    #[test]
802    fn filter_results_drops_circular_dep_when_no_file_changed() {
803        let mut results = AnalysisResults::default();
804        results
805            .circular_dependencies
806            .push(CircularDependencyFinding::with_actions(
807                CircularDependency {
808                    files: vec!["/a.ts".into(), "/b.ts".into()],
809                    length: 2,
810                    line: 1,
811                    col: 0,
812                    edges: Vec::new(),
813                    is_cross_package: false,
814                },
815            ));
816
817        let changed: FxHashSet<PathBuf> = FxHashSet::default();
818        filter_results_by_changed_files(&mut results, &changed);
819        assert!(results.circular_dependencies.is_empty());
820    }
821
822    #[test]
823    fn filter_results_drops_boundary_violation_when_importer_unchanged() {
824        let mut results = AnalysisResults::default();
825        results
826            .boundary_violations
827            .push(BoundaryViolationFinding::with_actions(BoundaryViolation {
828                from_path: "/a.ts".into(),
829                to_path: "/b.ts".into(),
830                from_zone: "ui".into(),
831                to_zone: "data".into(),
832                import_specifier: "../data/db".into(),
833                line: 1,
834                col: 0,
835            }));
836
837        let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
838        changed.insert("/b.ts".into());
839
840        filter_results_by_changed_files(&mut results, &changed);
841        assert!(results.boundary_violations.is_empty());
842    }
843
844    #[test]
845    fn filter_results_keeps_security_finding_when_trace_file_changed() {
846        let mut results = AnalysisResults::default();
847        results.security_findings.push(SecurityFinding {
848            finding_id: String::new(),
849            candidate: fallow_types::results::SecurityCandidate::default(),
850            taint_flow: None,
851            attack_surface: None,
852            kind: SecurityFindingKind::ClientServerLeak,
853            category: None,
854            cwe: None,
855            path: "/project/src/client.tsx".into(),
856            line: 2,
857            col: 0,
858            evidence: "candidate".into(),
859            source_backed: false,
860            source_read: None,
861            severity: SecuritySeverity::Low,
862            trace: vec![
863                TraceHop {
864                    path: "/project/src/client.tsx".into(),
865                    line: 2,
866                    col: 0,
867                    role: TraceHopRole::ClientBoundary,
868                },
869                TraceHop {
870                    path: "/project/src/server.ts".into(),
871                    line: 1,
872                    col: 0,
873                    role: TraceHopRole::SecretSource,
874                },
875            ],
876            actions: Vec::new(),
877            dead_code: None,
878            reachability: None,
879            runtime: None,
880        });
881
882        let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
883        changed.insert("/project/src/server.ts".into());
884
885        filter_results_by_changed_files(&mut results, &changed);
886
887        assert_eq!(results.security_findings.len(), 1);
888    }
889
890    #[test]
891    fn filter_results_keeps_security_finding_when_untrusted_source_trace_file_changed() {
892        let mut results = AnalysisResults::default();
893        results.security_findings.push(SecurityFinding {
894            finding_id: String::new(),
895            candidate: fallow_types::results::SecurityCandidate::default(),
896            taint_flow: None,
897            attack_surface: None,
898            kind: SecurityFindingKind::TaintedSink,
899            category: Some("command-injection".into()),
900            cwe: Some(78),
901            path: "/project/src/runner.ts".into(),
902            line: 4,
903            col: 2,
904            evidence: "candidate".into(),
905            source_backed: false,
906            source_read: None,
907            severity: SecuritySeverity::Low,
908            trace: Vec::new(),
909            actions: Vec::new(),
910            dead_code: None,
911            reachability: Some(SecurityReachability {
912                reachable_from_entry: false,
913                reachable_from_untrusted_source: true,
914                taint_confidence: Some(fallow_types::results::TaintConfidence::ModuleLevel),
915                untrusted_source_hop_count: Some(1),
916                untrusted_source_trace: vec![
917                    TraceHop {
918                        path: "/project/src/route.ts".into(),
919                        line: 1,
920                        col: 0,
921                        role: TraceHopRole::UntrustedSource,
922                    },
923                    TraceHop {
924                        path: "/project/src/runner.ts".into(),
925                        line: 4,
926                        col: 2,
927                        role: TraceHopRole::Sink,
928                    },
929                ],
930                blast_radius: 0,
931                crosses_boundary: false,
932            }),
933            runtime: None,
934        });
935
936        let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
937        changed.insert("/project/src/route.ts".into());
938
939        filter_results_by_changed_files(&mut results, &changed);
940
941        assert_eq!(results.security_findings.len(), 1);
942    }
943
944    #[test]
945    fn filter_results_keeps_relative_empty_catalog_group_when_manifest_changed() {
946        let mut results = AnalysisResults::default();
947        results
948            .empty_catalog_groups
949            .push(EmptyCatalogGroupFinding::with_actions(EmptyCatalogGroup {
950                catalog_name: "legacy".into(),
951                path: PathBuf::from("pnpm-workspace.yaml"),
952                line: 4,
953            }));
954
955        let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
956        changed.insert(PathBuf::from("/repo/pnpm-workspace.yaml"));
957
958        filter_results_by_changed_files(&mut results, &changed);
959
960        assert_eq!(results.empty_catalog_groups.len(), 1);
961        assert_eq!(results.empty_catalog_groups[0].group.catalog_name, "legacy");
962    }
963
964    #[test]
965    fn filter_duplication_keeps_groups_with_at_least_one_changed_instance() {
966        let mut report = DuplicationReport {
967            clone_groups: vec![CloneGroup {
968                instances: vec![
969                    CloneInstance {
970                        file: "/a.ts".into(),
971                        start_line: 1,
972                        end_line: 5,
973                        start_col: 0,
974                        end_col: 10,
975                        fragment: "code".into(),
976                    },
977                    CloneInstance {
978                        file: "/b.ts".into(),
979                        start_line: 1,
980                        end_line: 5,
981                        start_col: 0,
982                        end_col: 10,
983                        fragment: "code".into(),
984                    },
985                ],
986                token_count: 20,
987                line_count: 5,
988            }],
989            clone_families: vec![],
990            mirrored_directories: vec![],
991            stats: DuplicationStats {
992                total_files: 2,
993                files_with_clones: 2,
994                total_lines: 100,
995                duplicated_lines: 10,
996                total_tokens: 200,
997                duplicated_tokens: 40,
998                clone_groups: 1,
999                clone_instances: 2,
1000                duplication_percentage: 10.0,
1001                clone_groups_below_min_occurrences: 0,
1002            },
1003        };
1004
1005        let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
1006        changed.insert("/a.ts".into());
1007
1008        filter_duplication_by_changed_files(&mut report, &changed, Path::new(""));
1009        assert_eq!(report.clone_groups.len(), 1);
1010        assert_eq!(report.stats.clone_groups, 1);
1011        assert_eq!(report.stats.clone_instances, 2);
1012    }
1013
1014    /// Regression for issue #561: on Windows, `try_get_changed_files` joins
1015    /// segments onto the `dunce::canonicalize`d toplevel (non-verbatim),
1016    /// while analysis-pipeline paths inherit the shape of `opts.root` which
1017    /// tools / test fixtures often pre-canonicalise with `std::fs::canonicalize`
1018    /// (verbatim). The byte-level lookup against `FxHashSet<PathBuf>` then
1019    /// silently dropped every clone group. Pin both sides through a synthetic
1020    /// verbatim path on one side and a plain path on the other.
1021    #[cfg(windows)]
1022    #[test]
1023    fn filter_duplication_normalises_verbatim_prefix_mismatch() {
1024        let mut report = DuplicationReport {
1025            clone_groups: vec![CloneGroup {
1026                instances: vec![
1027                    CloneInstance {
1028                        file: PathBuf::from(r"\\?\C:\repo\src\changed.ts"),
1029                        start_line: 1,
1030                        end_line: 5,
1031                        start_col: 0,
1032                        end_col: 10,
1033                        fragment: "code".into(),
1034                    },
1035                    CloneInstance {
1036                        file: PathBuf::from(r"\\?\C:\repo\src\focused-copy.ts"),
1037                        start_line: 1,
1038                        end_line: 5,
1039                        start_col: 0,
1040                        end_col: 10,
1041                        fragment: "code".into(),
1042                    },
1043                ],
1044                token_count: 20,
1045                line_count: 5,
1046            }],
1047            clone_families: vec![],
1048            mirrored_directories: vec![],
1049            stats: DuplicationStats {
1050                total_files: 2,
1051                files_with_clones: 2,
1052                total_lines: 100,
1053                duplicated_lines: 10,
1054                total_tokens: 200,
1055                duplicated_tokens: 40,
1056                clone_groups: 1,
1057                clone_instances: 2,
1058                duplication_percentage: 10.0,
1059                clone_groups_below_min_occurrences: 0,
1060            },
1061        };
1062
1063        let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
1064        changed.insert(PathBuf::from(r"C:\repo\src\changed.ts"));
1065
1066        filter_duplication_by_changed_files(&mut report, &changed, Path::new(""));
1067        assert_eq!(
1068            report.clone_groups.len(),
1069            1,
1070            "verbatim instance path must match non-verbatim changed-file entry"
1071        );
1072    }
1073
1074    #[cfg(windows)]
1075    #[test]
1076    fn filter_results_normalises_verbatim_prefix_mismatch() {
1077        let mut results = AnalysisResults::default();
1078        results
1079            .unused_exports
1080            .push(UnusedExportFinding::with_actions(UnusedExport {
1081                path: PathBuf::from(r"\\?\C:\repo\src\a.ts"),
1082                export_name: "foo".into(),
1083                is_type_only: false,
1084                line: 1,
1085                col: 0,
1086                span_start: 0,
1087                is_re_export: false,
1088            }));
1089
1090        let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
1091        changed.insert(PathBuf::from(r"C:\repo\src\a.ts"));
1092
1093        filter_results_by_changed_files(&mut results, &changed);
1094        assert_eq!(
1095            results.unused_exports.len(),
1096            1,
1097            "verbatim finding path must match non-verbatim changed-file entry"
1098        );
1099    }
1100
1101    /// Initialize a temp git repo with a single committed file plus a tag
1102    /// at HEAD. Returns the canonical repo root.
1103    ///
1104    /// Uses `dunce::canonicalize` rather than `std::fs::canonicalize` so the
1105    /// returned path agrees with what `resolve_git_toplevel` produces in
1106    /// production (PR #566 swapped that helper to `dunce::canonicalize` to
1107    /// strip the Windows `\\?\` verbatim prefix). `std::fs::canonicalize`
1108    /// still produces verbatim on Windows, so the prior shape diverged from
1109    /// the production helper and downstream `changed.contains(&expected)`
1110    /// assertions silently failed because one side was verbatim and the
1111    /// other was not. POSIX behaviour is identical to `std::fs::canonicalize`.
1112    fn init_repo(repo: &Path) -> PathBuf {
1113        run_git(repo, &["init", "--quiet", "--initial-branch=main"]);
1114        run_git(repo, &["config", "user.email", "test@example.com"]);
1115        run_git(repo, &["config", "user.name", "test"]);
1116        run_git(repo, &["config", "commit.gpgsign", "false"]);
1117        std::fs::write(repo.join("seed.txt"), "seed\n").unwrap();
1118        run_git(repo, &["add", "seed.txt"]);
1119        run_git(repo, &["commit", "--quiet", "-m", "initial"]);
1120        run_git(repo, &["tag", "fallow-baseline"]);
1121        dunce::canonicalize(repo).unwrap()
1122    }
1123
1124    fn run_git(cwd: &Path, args: &[&str]) {
1125        let output = std::process::Command::new("git")
1126            .args(args)
1127            .current_dir(cwd)
1128            .output()
1129            .expect("git available");
1130        assert!(
1131            output.status.success(),
1132            "git {args:?} failed: {}",
1133            String::from_utf8_lossy(&output.stderr)
1134        );
1135    }
1136
1137    /// Workspace at git root, an untracked file is included in the
1138    /// changed-files set with an absolute path joined from the repo root.
1139    #[test]
1140    fn try_get_changed_files_workspace_at_repo_root() {
1141        let tmp = tempfile::tempdir().unwrap();
1142        let repo = init_repo(tmp.path());
1143        std::fs::create_dir_all(repo.join("src")).unwrap();
1144        std::fs::write(repo.join("src/new.ts"), "export const x = 1;\n").unwrap();
1145
1146        let changed = try_get_changed_files(&repo, "fallow-baseline").unwrap();
1147
1148        let expected = repo.join("src/new.ts");
1149        assert!(
1150            changed.contains(&expected),
1151            "changed set should contain {expected:?}; actual: {changed:?}"
1152        );
1153    }
1154
1155    /// Regression test for #190. When the workspace is a subdirectory of
1156    /// the git repository, `git diff --name-only` emits paths relative to
1157    /// the repo root (e.g., `frontend/src/new.ts`). Without the
1158    /// rev-parse-based toplevel resolution the function joined those
1159    /// against the workspace root, producing bogus paths like
1160    /// `<repo>/frontend/frontend/src/new.ts` that never matched
1161    /// `analyze_project` output and silently dropped the filter.
1162    #[test]
1163    fn try_get_changed_files_workspace_in_subdirectory() {
1164        let tmp = tempfile::tempdir().unwrap();
1165        let repo = init_repo(tmp.path());
1166        let frontend = repo.join("frontend");
1167        std::fs::create_dir_all(frontend.join("src")).unwrap();
1168        std::fs::write(frontend.join("src/new.ts"), "export const x = 1;\n").unwrap();
1169
1170        let changed = try_get_changed_files(&frontend, "fallow-baseline").unwrap();
1171
1172        let expected = repo.join("frontend/src/new.ts");
1173        assert!(
1174            changed.contains(&expected),
1175            "changed set should contain canonical {expected:?}; actual: {changed:?}"
1176        );
1177        let bogus = frontend.join("frontend/src/new.ts");
1178        assert!(
1179            !changed.contains(&bogus),
1180            "changed set must not contain double-frontend path {bogus:?}"
1181        );
1182    }
1183
1184    /// A *committed* change in a sibling subdirectory (outside the
1185    /// workspace) appears in the changed-files set because `git diff`
1186    /// is repo-wide regardless of cwd. The downstream
1187    /// `filter_results_by_changed_files` retains it only if
1188    /// `analyze_project` saw it; for a workspace scoped to one subdir,
1189    /// the sibling file is not in the analysis paths and falls away at
1190    /// the result-merge boundary, not here. This test pins the contract:
1191    /// for committed changes, the set is repo-wide.
1192    ///
1193    /// Note: `git ls-files --others --exclude-standard` only lists
1194    /// untracked files in cwd's subtree, so untracked siblings are NOT
1195    /// in the set when invoked from a subdirectory. That's harmless for
1196    /// the LSP because `analyze_project` only walks files under the
1197    /// workspace root either way.
1198    #[test]
1199    fn try_get_changed_files_includes_committed_sibling_changes() {
1200        let tmp = tempfile::tempdir().unwrap();
1201        let repo = init_repo(tmp.path());
1202        let backend = repo.join("backend");
1203        std::fs::create_dir_all(&backend).unwrap();
1204        std::fs::write(backend.join("server.py"), "print('hi')\n").unwrap();
1205        run_git(&repo, &["add", "."]);
1206        run_git(&repo, &["commit", "--quiet", "-m", "add backend"]);
1207
1208        let frontend = repo.join("frontend");
1209        std::fs::create_dir_all(&frontend).unwrap();
1210
1211        let changed = try_get_changed_files(&frontend, "fallow-baseline").unwrap();
1212
1213        let expected = repo.join("backend/server.py");
1214        assert!(
1215            changed.contains(&expected),
1216            "committed sibling backend/server.py should be in the set: {changed:?}"
1217        );
1218    }
1219
1220    /// Modifying a tracked file shows up via `git diff --name-only HEAD`,
1221    /// not just via `ls-files --others`. Confirm the path-join fix
1222    /// applies to that codepath too.
1223    #[test]
1224    fn try_get_changed_files_includes_modified_tracked_file() {
1225        let tmp = tempfile::tempdir().unwrap();
1226        let repo = init_repo(tmp.path());
1227        let frontend = repo.join("frontend");
1228        std::fs::create_dir_all(frontend.join("src")).unwrap();
1229        std::fs::write(frontend.join("src/old.ts"), "export const x = 1;\n").unwrap();
1230        run_git(&repo, &["add", "."]);
1231        run_git(&repo, &["commit", "--quiet", "-m", "add old"]);
1232        run_git(&repo, &["tag", "fallow-baseline-v2"]);
1233        std::fs::write(frontend.join("src/old.ts"), "export const x = 2;\n").unwrap();
1234
1235        let changed = try_get_changed_files(&frontend, "fallow-baseline-v2").unwrap();
1236
1237        let expected = repo.join("frontend/src/old.ts");
1238        assert!(
1239            changed.contains(&expected),
1240            "modified tracked file {expected:?} missing from set: {changed:?}"
1241        );
1242    }
1243
1244    /// `resolve_git_toplevel` returns the canonical repo path even when
1245    /// invoked from inside a subdirectory and via a symlinked input path.
1246    /// On macOS this guards against the `/tmp` -> `/private/tmp`
1247    /// canonicalization gap that would otherwise make the LSP filter set
1248    /// disagree with `analyze_project` paths.
1249    #[test]
1250    fn resolve_git_toplevel_returns_canonical_path() {
1251        let tmp = tempfile::tempdir().unwrap();
1252        let repo = init_repo(tmp.path());
1253        let frontend = repo.join("frontend");
1254        std::fs::create_dir_all(&frontend).unwrap();
1255
1256        let toplevel = resolve_git_toplevel(&frontend).unwrap();
1257        assert_eq!(toplevel, repo, "toplevel should equal canonical repo root");
1258        assert_eq!(
1259            toplevel,
1260            dunce::canonicalize(&toplevel).unwrap(),
1261            "resolved toplevel should already be canonical"
1262        );
1263    }
1264
1265    /// Outside any git repo, `resolve_git_toplevel` returns
1266    /// `NotARepository` rather than panicking or returning a wrong path.
1267    /// The LSP relies on this to fall back to the workspace root cleanly.
1268    #[test]
1269    fn resolve_git_toplevel_not_a_repository() {
1270        let tmp = tempfile::tempdir().unwrap();
1271        let result = resolve_git_toplevel(tmp.path());
1272        assert!(
1273            matches!(result, Err(ChangedFilesError::NotARepository)),
1274            "expected NotARepository, got {result:?}"
1275        );
1276    }
1277
1278    /// `try_get_changed_files` propagates the not-a-repo error so the
1279    /// LSP can warn and fall back to full-scope results.
1280    #[test]
1281    fn try_get_changed_files_not_a_repository() {
1282        let tmp = tempfile::tempdir().unwrap();
1283        let result = try_get_changed_files(tmp.path(), "main");
1284        assert!(matches!(result, Err(ChangedFilesError::NotARepository)));
1285    }
1286
1287    #[test]
1288    fn filter_duplication_drops_groups_with_no_changed_instance() {
1289        let mut report = DuplicationReport {
1290            clone_groups: vec![CloneGroup {
1291                instances: vec![CloneInstance {
1292                    file: "/a.ts".into(),
1293                    start_line: 1,
1294                    end_line: 5,
1295                    start_col: 0,
1296                    end_col: 10,
1297                    fragment: "code".into(),
1298                }],
1299                token_count: 20,
1300                line_count: 5,
1301            }],
1302            clone_families: vec![],
1303            mirrored_directories: vec![],
1304            stats: DuplicationStats {
1305                total_files: 1,
1306                files_with_clones: 1,
1307                total_lines: 100,
1308                duplicated_lines: 5,
1309                total_tokens: 100,
1310                duplicated_tokens: 20,
1311                clone_groups: 1,
1312                clone_instances: 1,
1313                duplication_percentage: 5.0,
1314                clone_groups_below_min_occurrences: 0,
1315            },
1316        };
1317
1318        let changed: FxHashSet<PathBuf> = FxHashSet::default();
1319        filter_duplication_by_changed_files(&mut report, &changed, Path::new(""));
1320        assert!(report.clone_groups.is_empty());
1321        assert_eq!(report.stats.clone_groups, 0);
1322        assert_eq!(report.stats.clone_instances, 0);
1323        assert!((report.stats.duplication_percentage - 0.0).abs() < f64::EPSILON);
1324    }
1325}