Skip to main content

fallow_core/
changed_files.rs

1//! Git-aware "changed files" filtering shared between fallow-cli and fallow-lsp.
2//!
3//! Provides:
4//! - [`validate_git_ref`] for input validation at trust boundaries.
5//! - [`ChangedFilesError`] / [`try_get_changed_files`] / [`get_changed_files`]
6//!   for resolving a git ref into the set of changed files.
7//! - [`filter_results_by_changed_files`] for narrowing an [`AnalysisResults`]
8//!   to issues in those files.
9//! - [`filter_duplication_by_changed_files`] for narrowing a
10//!   [`DuplicationReport`] to clone groups touching at least one changed file.
11//!
12//! Both filters intentionally exclude dependency-level issues (unused deps,
13//! type-only deps, test-only deps) since "unused dependency" is a function of
14//! the entire import graph and can't be attributed to individual changed files.
15
16use std::path::{Path, PathBuf};
17use std::process::Output;
18use std::sync::OnceLock;
19
20use rustc_hash::{FxHashMap, FxHashSet};
21
22use crate::duplicates::{DuplicationReport, DuplicationStats, families};
23use crate::results::{
24    AnalysisResults, CircularDependencyFinding, DuplicateExportFinding, DuplicatePropShapeFinding,
25    ReExportCycleFinding, SecurityFinding, UnlistedDependencyFinding,
26};
27use fallow_types::output_dead_code::PropDrillingChainFinding;
28
29/// Function pointer signature used by `set_spawn_hook` to intercept the
30/// short-running `git rev-parse` / `git diff` / `git ls-files` subprocesses
31/// this module spawns. Lets the CLI route those git children through its
32/// `ScopedChild` registry so a SIGINT delivered to the parent during
33/// watch mode (or any analysis) reaps them instead of letting them run
34/// to completion. See `crates/cli/src/signal/` and issue #477.
35pub type ChangedFilesSpawnHook = fn(&mut std::process::Command) -> std::io::Result<Output>;
36
37static SPAWN_HOOK: OnceLock<ChangedFilesSpawnHook> = OnceLock::new();
38
39/// Install a spawn-hook for this module's git subprocesses. Idempotent;
40/// subsequent calls are no-ops. Called once from the CLI's `main()` so
41/// long-running watch sessions reap pending git children on Ctrl+C.
42/// Defaults to `Command::output` when not set; the function-pointer
43/// indirection costs nothing for embedders and tests that don't install
44/// a hook.
45pub fn set_spawn_hook(hook: ChangedFilesSpawnHook) {
46    let _ = SPAWN_HOOK.set(hook);
47}
48
49fn spawn_output(command: &mut std::process::Command) -> std::io::Result<Output> {
50    if let Some(hook) = SPAWN_HOOK.get() {
51        hook(command)
52    } else {
53        command.output()
54    }
55}
56
57/// Validate a user-supplied git ref before passing it to `git diff`.
58///
59/// Rejects empty strings, refs starting with `-` (which `git` would interpret
60/// as an option flag), and characters outside the safe allowlist for branch
61/// names, tags, SHAs, and reflog expressions (`HEAD~N`, `HEAD@{...}`).
62///
63/// Inside `@{...}` braces, colons and spaces are allowed so reflog timestamps
64/// like `HEAD@{2025-01-01}` and `HEAD@{1 week ago}` round-trip.
65///
66/// Used by both the CLI (clap value parser) and the LSP (initializationOptions
67/// trust boundary) to fail fast with a readable error rather than handing a
68/// malformed ref to git.
69pub fn validate_git_ref(s: &str) -> Result<&str, String> {
70    if s.is_empty() {
71        return Err("git ref cannot be empty".to_string());
72    }
73    if s.starts_with('-') {
74        return Err("git ref cannot start with '-'".to_string());
75    }
76    let mut in_braces = false;
77    for c in s.chars() {
78        match c {
79            '{' => in_braces = true,
80            '}' => in_braces = false,
81            ':' | ' ' if in_braces => {}
82            c if c.is_ascii_alphanumeric()
83                || matches!(c, '.' | '_' | '-' | '/' | '~' | '^' | '@' | '{' | '}') => {}
84            _ => return Err(format!("git ref contains disallowed character: '{c}'")),
85        }
86    }
87    if in_braces {
88        return Err("git ref has unclosed '{'".to_string());
89    }
90    Ok(s)
91}
92
93/// Classification of a `git diff` failure, so callers can pick their own
94/// wording (soft warning vs hard error) without re-parsing stderr.
95#[derive(Debug)]
96pub enum ChangedFilesError {
97    /// Git ref failed validation before invoking `git`.
98    InvalidRef(String),
99    /// `git` binary not found / not executable.
100    GitMissing(String),
101    /// Command ran but the directory isn't a git repository.
102    NotARepository,
103    /// Command ran but the ref is invalid / another git error.
104    GitFailed(String),
105}
106
107impl ChangedFilesError {
108    /// Human-readable clause suitable for embedding in an error message.
109    /// Does not include the flag name (e.g. "--changed-since") so callers can
110    /// prepend their own context.
111    pub fn describe(&self) -> String {
112        match self {
113            Self::InvalidRef(e) => format!("invalid git ref: {e}"),
114            Self::GitMissing(e) => format!("failed to run git: {e}"),
115            Self::NotARepository => "not a git repository".to_owned(),
116            Self::GitFailed(stderr) => augment_git_failed(stderr),
117        }
118    }
119}
120
121/// Enrich a raw `git diff` stderr with actionable hints when the failure mode
122/// is recognizable. Today: shallow-clone misses (`actions/checkout@v4` defaults
123/// to `fetch-depth: 1`, GitLab CI to `GIT_DEPTH: 50`), where the baseline ref
124/// predates the fetch boundary. Bare git stderr is famously cryptic; a hint
125/// here is much more useful than a docs link the reader has to chase.
126fn augment_git_failed(stderr: &str) -> String {
127    let lower = stderr.to_ascii_lowercase();
128    if lower.contains("not a valid object name")
129        || lower.contains("unknown revision")
130        || lower.contains("ambiguous argument")
131    {
132        format!(
133            "{stderr} (shallow clone? try `git fetch --unshallow`, or set `fetch-depth: 0` on actions/checkout / `GIT_DEPTH: 0` in GitLab CI)"
134        )
135    } else {
136        stderr.to_owned()
137    }
138}
139
140/// Resolve the canonical git toplevel for `cwd`.
141///
142/// Runs `git rev-parse --show-toplevel`, which is git's own answer to "where
143/// does this repository live?". The returned path is canonicalized so it
144/// agrees with paths produced by `fs::canonicalize` elsewhere on macOS
145/// (`/tmp` -> `/private/tmp`) and Windows (8.3 short paths).
146///
147/// Used by `try_get_changed_files` to produce changed-file paths whose
148/// absolute form matches what the analysis pipeline emits, regardless of
149/// whether the caller's `cwd` is the repo root or a subdirectory of it.
150pub fn resolve_git_toplevel(cwd: &Path) -> Result<PathBuf, ChangedFilesError> {
151    let output = spawn_output(&mut git_command(cwd, &["rev-parse", "--show-toplevel"]))
152        .map_err(|e| ChangedFilesError::GitMissing(e.to_string()))?;
153
154    if !output.status.success() {
155        let stderr = String::from_utf8_lossy(&output.stderr);
156        return Err(if stderr.contains("not a git repository") {
157            ChangedFilesError::NotARepository
158        } else {
159            ChangedFilesError::GitFailed(stderr.trim().to_owned())
160        });
161    }
162
163    let raw = String::from_utf8_lossy(&output.stdout);
164    let trimmed = raw.trim();
165    if trimmed.is_empty() {
166        return Err(ChangedFilesError::GitFailed(
167            "git rev-parse --show-toplevel returned empty output".to_owned(),
168        ));
169    }
170
171    let path = PathBuf::from(trimmed);
172    Ok(dunce::canonicalize(&path).unwrap_or(path))
173}
174
175/// Resolve the canonical git *common* directory for `cwd`.
176///
177/// Runs `git rev-parse --path-format=absolute --git-common-dir`. Unlike
178/// `--show-toplevel` (which returns each worktree's own working directory),
179/// `--git-common-dir` returns the SHARED `.git` directory of the repository,
180/// so every linked worktree of the same repo resolves to the SAME path. This
181/// is what lets the Impact store collapse all worktrees of a repo onto a
182/// single identity (one history per repo, not per checkout).
183///
184/// `--path-format=absolute` (git 2.31+) forces an absolute result, so the
185/// bare-`.git` relative form `--git-common-dir` would otherwise emit at the
186/// repo root is avoided. The path is canonicalized to agree with paths from
187/// `fs::canonicalize` elsewhere (macOS `/tmp` -> `/private/tmp`, Windows 8.3).
188pub fn resolve_git_common_dir(cwd: &Path) -> Result<PathBuf, ChangedFilesError> {
189    let output = spawn_output(&mut git_command(
190        cwd,
191        &["rev-parse", "--path-format=absolute", "--git-common-dir"],
192    ))
193    .map_err(|e| ChangedFilesError::GitMissing(e.to_string()))?;
194
195    if !output.status.success() {
196        let stderr = String::from_utf8_lossy(&output.stderr);
197        return Err(if stderr.contains("not a git repository") {
198            ChangedFilesError::NotARepository
199        } else {
200            ChangedFilesError::GitFailed(stderr.trim().to_owned())
201        });
202    }
203
204    let raw = String::from_utf8_lossy(&output.stdout);
205    let trimmed = raw.trim();
206    if trimmed.is_empty() {
207        return Err(ChangedFilesError::GitFailed(
208            "git rev-parse --git-common-dir returned empty output".to_owned(),
209        ));
210    }
211
212    let path = PathBuf::from(trimmed);
213    Ok(dunce::canonicalize(&path).unwrap_or(path))
214}
215
216fn collect_git_paths(
217    cwd: &Path,
218    toplevel: &Path,
219    args: &[&str],
220) -> Result<FxHashSet<PathBuf>, ChangedFilesError> {
221    let output = spawn_output(&mut git_command(cwd, args))
222        .map_err(|e| ChangedFilesError::GitMissing(e.to_string()))?;
223
224    if !output.status.success() {
225        let stderr = String::from_utf8_lossy(&output.stderr);
226        return Err(if stderr.contains("not a git repository") {
227            ChangedFilesError::NotARepository
228        } else {
229            ChangedFilesError::GitFailed(stderr.trim().to_owned())
230        });
231    }
232
233    #[cfg(windows)]
234    let normalise_segment = |line: &str| line.replace('/', "\\");
235    #[cfg(not(windows))]
236    let normalise_segment = |line: &str| line.to_owned();
237
238    let files: FxHashSet<PathBuf> = String::from_utf8_lossy(&output.stdout)
239        .lines()
240        .filter(|line| !line.is_empty())
241        .map(|line| toplevel.join(normalise_segment(line)))
242        .collect();
243
244    Ok(files)
245}
246
247fn git_command(cwd: &Path, args: &[&str]) -> std::process::Command {
248    let mut command = crate::spawn::git();
249    command.args(args).current_dir(cwd);
250    command
251}
252
253/// Get files changed since a git ref. Returns `Err` (with details) when the
254/// git invocation itself failed, so callers can choose between warn-and-ignore
255/// and hard-error behavior.
256///
257/// Includes both:
258/// - committed changes from the merge-base range `git_ref...HEAD`
259/// - tracked staged/unstaged changes from `HEAD` to the current worktree
260/// - untracked files not ignored by Git
261///
262/// This keeps `--changed-since` useful for local validation instead of only
263/// reflecting the last committed `HEAD`.
264///
265/// All paths in the returned set are absolute and rooted at the canonical
266/// git toplevel, not at `root`. This matters when the LSP / CLI is invoked
267/// from a subdirectory of the repository (e.g., a Turborepo workspace at
268/// `apps/web`): `git diff` emits root-relative paths, and we need to join
269/// them against the actual repo root rather than the caller's cwd.
270pub fn try_get_changed_files(
271    root: &Path,
272    git_ref: &str,
273) -> Result<FxHashSet<PathBuf>, ChangedFilesError> {
274    validate_git_ref(git_ref).map_err(ChangedFilesError::InvalidRef)?;
275    let toplevel = resolve_git_toplevel(root)?;
276    try_get_changed_files_with_toplevel(root, &toplevel, git_ref)
277}
278
279/// Like [`try_get_changed_files`], but takes a pre-resolved canonical
280/// `toplevel` so callers (the LSP) can cache it across runs and avoid the
281/// extra `git rev-parse --show-toplevel` subprocess on every save.
282///
283/// `toplevel` MUST be the canonical git toplevel for `cwd`; passing anything
284/// else produces incorrect changed-file paths. The CLI does not call this
285/// directly: it uses [`try_get_changed_files`] which resolves on each call.
286pub fn try_get_changed_files_with_toplevel(
287    cwd: &Path,
288    toplevel: &Path,
289    git_ref: &str,
290) -> Result<FxHashSet<PathBuf>, ChangedFilesError> {
291    validate_git_ref(git_ref).map_err(ChangedFilesError::InvalidRef)?;
292
293    let mut files = collect_git_paths(
294        cwd,
295        toplevel,
296        &[
297            "diff",
298            "--name-only",
299            "--end-of-options",
300            &format!("{git_ref}...HEAD"),
301        ],
302    )?;
303    files.extend(collect_git_paths(
304        cwd,
305        toplevel,
306        &["diff", "--name-only", "HEAD"],
307    )?);
308    files.extend(collect_git_paths(
309        cwd,
310        toplevel,
311        &["ls-files", "--full-name", "--others", "--exclude-standard"],
312    )?);
313    Ok(files)
314}
315
316/// Get the zero-context unified diff of the merge-base range `git_ref...HEAD`,
317/// with paths relative to `root`, for the line-level security gate (issue #886).
318///
319/// Unlike [`get_changed_files`] (which falls back to full scope on failure), this
320/// returns `Err` when the git invocation itself fails (missing/unfetched ref,
321/// shallow clone, not a repo). The security gate hard-errors on `Err` rather than
322/// emitting a green gate: a diff it could not compute must NEVER read as "no new
323/// sinks". `--relative` emits paths relative to `root` (rewriting the prefix to
324/// match the keys `DiffIndex` is queried with, `relative_to_diff_path(finding,
325/// root)`) and, when fallow runs in a monorepo subpackage, omits changes outside
326/// `root` from the output entirely; a sibling-package edit `git diff --relative`
327/// did emit would carry a `../...` path that `relative_to_diff_path` cannot strip
328/// (returns `None`), which is harmless because no findings exist for files
329/// outside the analyzed `root`. An empty diff (no changes / docs-only) is
330/// `Ok("")`, a clean pass, not an error.
331pub fn try_get_changed_diff(root: &Path, git_ref: &str) -> Result<String, ChangedFilesError> {
332    validate_git_ref(git_ref).map_err(ChangedFilesError::InvalidRef)?;
333    let output = spawn_output(&mut git_command(
334        root,
335        &[
336            "diff",
337            "--relative",
338            "--unified=0",
339            "--end-of-options",
340            &format!("{git_ref}...HEAD"),
341        ],
342    ))
343    .map_err(|e| ChangedFilesError::GitMissing(e.to_string()))?;
344
345    if !output.status.success() {
346        let stderr = String::from_utf8_lossy(&output.stderr);
347        return Err(if stderr.contains("not a git repository") {
348            ChangedFilesError::NotARepository
349        } else {
350            ChangedFilesError::GitFailed(stderr.trim().to_owned())
351        });
352    }
353
354    Ok(String::from_utf8_lossy(&output.stdout).into_owned())
355}
356
357/// Get files changed since a git ref. Returns `None` on git failure after
358/// printing a warning to stderr. Used by `--changed-since` and `--file`, where
359/// a failure falls back to full-scope analysis.
360#[expect(
361    clippy::print_stderr,
362    reason = "intentional user-facing warning for the CLI's --changed-since fallback path; LSP callers use try_get_changed_files instead"
363)]
364pub fn get_changed_files(root: &Path, git_ref: &str) -> Option<FxHashSet<PathBuf>> {
365    match try_get_changed_files(root, git_ref) {
366        Ok(files) => Some(files),
367        Err(ChangedFilesError::InvalidRef(e)) => {
368            eprintln!("Warning: --changed-since ignored: invalid git ref: {e}");
369            None
370        }
371        Err(ChangedFilesError::GitMissing(e)) => {
372            eprintln!("Warning: --changed-since ignored: failed to run git: {e}");
373            None
374        }
375        Err(ChangedFilesError::NotARepository) => {
376            eprintln!("Warning: --changed-since ignored: not a git repository");
377            None
378        }
379        Err(ChangedFilesError::GitFailed(stderr)) => {
380            eprintln!("Warning: --changed-since failed for ref '{git_ref}': {stderr}");
381            None
382        }
383    }
384}
385
386/// Filter `results` to only include issues whose source file is in
387/// `changed_files`.
388///
389/// Dependency-level issues (unused deps, dev deps, optional deps, type-only
390/// deps, test-only deps) are intentionally NOT filtered here. Unlike
391/// file-level issues, a dependency being "unused" is a function of the entire
392/// import graph and can't be attributed to individual changed source files.
393///
394/// This destructure is deliberately exhaustive: adding a field to
395/// `AnalysisResults` must fail compilation here so the author decides
396/// explicitly whether the new finding type is file-attributable (add a retain)
397/// or graph-global (bind with underscore and document why).
398#[expect(
399    clippy::implicit_hasher,
400    reason = "fallow standardizes on FxHashSet across the workspace"
401)]
402pub fn filter_results_by_changed_files(
403    results: &mut AnalysisResults,
404    changed_files: &FxHashSet<PathBuf>,
405) {
406    let cf = normalize_changed_files_set(changed_files);
407    classify_changed_file_filter_fields(results);
408    retain_basic_issue_findings_by_changed_path(results, &cf);
409    retain_graph_findings_by_changed_files(results, &cf);
410    retain_boundary_policy_and_suppression_findings(results, &cf);
411    retain_security_and_workspace_findings(results, &cf);
412    retain_framework_findings_by_changed_files(results, &cf);
413}
414
415fn classify_changed_file_filter_fields(results: &AnalysisResults) {
416    let AnalysisResults {
417        unused_files: _unused_files,
418        unused_exports: _unused_exports,
419        unused_types: _unused_types,
420        private_type_leaks: _private_type_leaks,
421        // Dependency-level issues are graph-global: "unused" is a function
422        // of the whole import graph and cannot be attributed to a changed
423        // file.
424        unused_dependencies: _unused_dependencies,
425        unused_dev_dependencies: _unused_dev_dependencies,
426        unused_optional_dependencies: _unused_optional_dependencies,
427        unused_enum_members: _unused_enum_members,
428        unused_class_members: _unused_class_members,
429        unused_store_members: _unused_store_members,
430        unresolved_imports: _unresolved_imports,
431        unlisted_dependencies: _unlisted_dependencies,
432        duplicate_exports: _duplicate_exports,
433        // Type-only and test-only dependency issues are graph-global for
434        // the same reason as the other dependency kinds above.
435        type_only_dependencies: _type_only_dependencies,
436        test_only_dependencies: _test_only_dependencies,
437        circular_dependencies: _circular_dependencies,
438        re_export_cycles: _re_export_cycles,
439        boundary_violations: _boundary_violations,
440        boundary_coverage_violations: _boundary_coverage_violations,
441        boundary_call_violations: _boundary_call_violations,
442        policy_violations: _policy_violations,
443        stale_suppressions: _stale_suppressions,
444        // Catalog entries are workspace-global: whether a catalog entry is
445        // unused depends on all workspace packages, not a single changed
446        // file.
447        unused_catalog_entries: _unused_catalog_entries,
448        empty_catalog_groups: _empty_catalog_groups,
449        unresolved_catalog_references: _unresolved_catalog_references,
450        unused_dependency_overrides: _unused_dependency_overrides,
451        misconfigured_dependency_overrides: _misconfigured_dependency_overrides,
452        invalid_client_exports: _invalid_client_exports,
453        mixed_client_server_barrels: _mixed_client_server_barrels,
454        misplaced_directives: _misplaced_directives,
455        unprovided_injects: _unprovided_injects,
456        unrendered_components: _unrendered_components,
457        route_collisions: _route_collisions,
458        dynamic_segment_name_conflicts: _dynamic_segment_name_conflicts,
459        unused_component_props: _unused_component_props,
460        unused_component_emits: _unused_component_emits,
461        unused_component_inputs: _unused_component_inputs,
462        unused_component_outputs: _unused_component_outputs,
463        unused_svelte_events: _unused_svelte_events,
464        unused_server_actions: _unused_server_actions,
465        unused_load_data_keys: _unused_load_data_keys,
466        // Observability flag, not an issue collection.
467        unused_load_data_keys_global_abstain: _unused_load_data_keys_global_abstain,
468        prop_drilling_chains: _prop_drilling_chains,
469        thin_wrappers: _thin_wrappers,
470        duplicate_prop_shapes: _duplicate_prop_shapes,
471        // Non-finding fields: counts and metadata, not issue collections.
472        suppression_count: _suppression_count,
473        unused_component_props_exempted: _unused_component_props_exempted,
474        active_suppressions: _active_suppressions,
475        feature_flags: _feature_flags,
476        security_findings: _security_findings,
477        security_unresolved_edge_files: _security_unresolved_edge_files,
478        security_unresolved_callee_sites: _security_unresolved_callee_sites,
479        security_unresolved_callee_diagnostics: _security_unresolved_callee_diagnostics,
480        // Export usages and entry-point summary are metadata, not issue
481        // collections; they are not changed-files filtered.
482        export_usages: _export_usages,
483        entry_point_summary: _entry_point_summary,
484        // Render fan-in is a whole-project descriptive metric (the
485        // component-graph analogue of module fan-in), not an issue collection;
486        // it is not changed-files filtered.
487        render_fan_in: _render_fan_in,
488        // Per-component React intel is a descriptive ambient-editor carrier, not
489        // an issue collection; it is not changed-files filtered.
490        react_component_intel: _react_component_intel,
491    } = results;
492}
493
494fn retain_basic_issue_findings_by_changed_path(
495    results: &mut AnalysisResults,
496    changed_files: &FxHashSet<PathBuf>,
497) {
498    retain_by_changed_path(&mut results.unused_files, changed_files, |f| &f.file.path);
499    retain_by_changed_path(&mut results.unused_exports, changed_files, |e| {
500        &e.export.path
501    });
502    retain_by_changed_path(&mut results.unused_types, changed_files, |e| &e.export.path);
503    retain_by_changed_path(&mut results.private_type_leaks, changed_files, |e| {
504        &e.leak.path
505    });
506    retain_by_changed_path(&mut results.unused_enum_members, changed_files, |m| {
507        &m.member.path
508    });
509    retain_by_changed_path(&mut results.unused_class_members, changed_files, |m| {
510        &m.member.path
511    });
512    retain_by_changed_path(&mut results.unused_store_members, changed_files, |m| {
513        &m.member.path
514    });
515    retain_by_changed_path(&mut results.unresolved_imports, changed_files, |i| {
516        &i.import.path
517    });
518}
519
520fn retain_graph_findings_by_changed_files(
521    results: &mut AnalysisResults,
522    changed_files: &FxHashSet<PathBuf>,
523) {
524    retain_unlisted_dependencies_by_import_site(&mut results.unlisted_dependencies, changed_files);
525    retain_duplicate_exports_by_changed_locations(&mut results.duplicate_exports, changed_files);
526    retain_circular_dependencies_by_changed_file(&mut results.circular_dependencies, changed_files);
527    retain_re_export_cycles_by_changed_file(&mut results.re_export_cycles, changed_files);
528}
529
530fn retain_boundary_policy_and_suppression_findings(
531    results: &mut AnalysisResults,
532    changed_files: &FxHashSet<PathBuf>,
533) {
534    retain_by_changed_path(&mut results.boundary_violations, changed_files, |v| {
535        &v.violation.from_path
536    });
537    retain_by_changed_path(
538        &mut results.boundary_coverage_violations,
539        changed_files,
540        |v| &v.violation.path,
541    );
542    retain_by_changed_path(&mut results.boundary_call_violations, changed_files, |v| {
543        &v.violation.path
544    });
545    retain_by_changed_path(&mut results.policy_violations, changed_files, |v| {
546        &v.violation.path
547    });
548    retain_by_changed_path(&mut results.stale_suppressions, changed_files, |s| &s.path);
549}
550
551fn retain_security_and_workspace_findings(
552    results: &mut AnalysisResults,
553    changed_files: &FxHashSet<PathBuf>,
554) {
555    retain_security_findings_by_changed_path(&mut results.security_findings, changed_files);
556    retain_by_changed_path(
557        &mut results.security_unresolved_callee_diagnostics,
558        changed_files,
559        |d| &d.path,
560    );
561    retain_by_changed_path(
562        &mut results.unresolved_catalog_references,
563        changed_files,
564        |r| &r.reference.path,
565    );
566    results
567        .empty_catalog_groups
568        .retain(|g| normalized_set_contains_path(changed_files, &g.group.path));
569    retain_by_changed_path(
570        &mut results.unused_dependency_overrides,
571        changed_files,
572        |o| &o.entry.path,
573    );
574    retain_by_changed_path(
575        &mut results.misconfigured_dependency_overrides,
576        changed_files,
577        |o| &o.entry.path,
578    );
579}
580
581fn retain_framework_findings_by_changed_files(
582    results: &mut AnalysisResults,
583    changed_files: &FxHashSet<PathBuf>,
584) {
585    retain_client_boundary_findings_by_changed_files(results, changed_files);
586    retain_component_contract_findings_by_changed_files(results, changed_files);
587    retain_react_health_findings_by_changed_files(results, changed_files);
588    retain_nextjs_findings_by_changed_files(results, changed_files);
589}
590
591fn retain_client_boundary_findings_by_changed_files(
592    results: &mut AnalysisResults,
593    changed_files: &FxHashSet<PathBuf>,
594) {
595    let AnalysisResults {
596        invalid_client_exports,
597        mixed_client_server_barrels,
598        misplaced_directives,
599        ..
600    } = results;
601
602    retain_by_changed_path(invalid_client_exports, changed_files, |e| &e.export.path);
603    retain_by_changed_path(mixed_client_server_barrels, changed_files, |b| {
604        &b.barrel.path
605    });
606    retain_by_changed_path(misplaced_directives, changed_files, |d| {
607        &d.directive_site.path
608    });
609}
610
611fn retain_component_contract_findings_by_changed_files(
612    results: &mut AnalysisResults,
613    changed_files: &FxHashSet<PathBuf>,
614) {
615    let AnalysisResults {
616        unprovided_injects,
617        unrendered_components,
618        unused_component_props,
619        unused_component_emits,
620        unused_component_inputs,
621        unused_component_outputs,
622        unused_svelte_events,
623        unused_server_actions,
624        unused_load_data_keys,
625        ..
626    } = results;
627
628    retain_by_changed_path(unprovided_injects, changed_files, |i| &i.inject.path);
629    retain_by_changed_path(unrendered_components, changed_files, |c| &c.component.path);
630    retain_by_changed_path(unused_component_props, changed_files, |p| &p.prop.path);
631    retain_by_changed_path(unused_component_emits, changed_files, |e| &e.emit.path);
632    retain_by_changed_path(unused_component_inputs, changed_files, |i| &i.input.path);
633    retain_by_changed_path(unused_component_outputs, changed_files, |o| &o.output.path);
634    retain_by_changed_path(unused_svelte_events, changed_files, |e| &e.event.path);
635    retain_by_changed_path(unused_server_actions, changed_files, |a| &a.action.path);
636    retain_by_changed_path(unused_load_data_keys, changed_files, |k| &k.key.path);
637}
638
639fn retain_react_health_findings_by_changed_files(
640    results: &mut AnalysisResults,
641    changed_files: &FxHashSet<PathBuf>,
642) {
643    let AnalysisResults {
644        prop_drilling_chains,
645        thin_wrappers,
646        duplicate_prop_shapes,
647        ..
648    } = results;
649
650    retain_prop_drilling_chains_by_anchor(prop_drilling_chains, changed_files);
651    retain_by_changed_path(thin_wrappers, changed_files, |w| &w.wrapper.file);
652    retain_duplicate_prop_shapes_by_anchor(duplicate_prop_shapes, changed_files);
653}
654
655fn retain_nextjs_findings_by_changed_files(
656    results: &mut AnalysisResults,
657    changed_files: &FxHashSet<PathBuf>,
658) {
659    let AnalysisResults {
660        route_collisions,
661        dynamic_segment_name_conflicts,
662        ..
663    } = results;
664
665    retain_by_changed_path(route_collisions, changed_files, |c| &c.collision.path);
666    retain_by_changed_path(dynamic_segment_name_conflicts, changed_files, |c| {
667        &c.conflict.path
668    });
669}
670
671fn retain_unlisted_dependencies_by_import_site(
672    dependencies: &mut Vec<UnlistedDependencyFinding>,
673    changed_files: &FxHashSet<PathBuf>,
674) {
675    dependencies.retain(|dependency| {
676        dependency
677            .dep
678            .imported_from
679            .iter()
680            .any(|site| contains_normalized(changed_files, &site.path))
681    });
682}
683
684fn retain_duplicate_exports_by_changed_locations(
685    duplicate_exports: &mut Vec<DuplicateExportFinding>,
686    changed_files: &FxHashSet<PathBuf>,
687) {
688    for duplicate in &mut *duplicate_exports {
689        duplicate
690            .export
691            .locations
692            .retain(|location| contains_normalized(changed_files, &location.path));
693    }
694    duplicate_exports.retain(|duplicate| duplicate.export.locations.len() >= 2);
695}
696
697fn retain_circular_dependencies_by_changed_file(
698    cycles: &mut Vec<CircularDependencyFinding>,
699    changed_files: &FxHashSet<PathBuf>,
700) {
701    cycles.retain(|cycle| {
702        cycle
703            .cycle
704            .files
705            .iter()
706            .any(|file| contains_normalized(changed_files, file))
707    });
708}
709
710fn retain_re_export_cycles_by_changed_file(
711    cycles: &mut Vec<ReExportCycleFinding>,
712    changed_files: &FxHashSet<PathBuf>,
713) {
714    cycles.retain(|cycle| {
715        cycle
716            .cycle
717            .files
718            .iter()
719            .any(|file| contains_normalized(changed_files, file))
720    });
721}
722
723fn retain_security_findings_by_changed_path(
724    findings: &mut Vec<SecurityFinding>,
725    changed_files: &FxHashSet<PathBuf>,
726) {
727    findings.retain(|finding| security_finding_touches_changed_path(finding, changed_files));
728}
729
730fn retain_prop_drilling_chains_by_anchor(
731    chains: &mut Vec<PropDrillingChainFinding>,
732    changed_files: &FxHashSet<PathBuf>,
733) {
734    // Anchor a chain on its source hop's file (the finding anchor).
735    chains.retain(|chain| {
736        chain
737            .chain
738            .hops
739            .first()
740            .is_some_and(|hop| contains_normalized(changed_files, &hop.file))
741    });
742}
743
744fn retain_duplicate_prop_shapes_by_anchor(
745    shapes: &mut Vec<DuplicatePropShapeFinding>,
746    changed_files: &FxHashSet<PathBuf>,
747) {
748    // Anchor a duplicate-prop-shape member on its component definition file.
749    retain_by_changed_path(shapes, changed_files, |shape| &shape.shape.file);
750}
751
752fn retain_by_changed_path<T>(
753    items: &mut Vec<T>,
754    changed_files: &FxHashSet<PathBuf>,
755    path: impl Fn(&T) -> &Path,
756) {
757    items.retain(|item| contains_normalized(changed_files, path(item)));
758}
759
760fn security_finding_touches_changed_path(
761    finding: &SecurityFinding,
762    changed_files: &FxHashSet<PathBuf>,
763) -> bool {
764    contains_normalized(changed_files, &finding.path)
765        || finding
766            .trace
767            .iter()
768            .any(|hop| contains_normalized(changed_files, &hop.path))
769        || finding.reachability.as_ref().is_some_and(|reachability| {
770            reachability
771                .untrusted_source_trace
772                .iter()
773                .any(|hop| contains_normalized(changed_files, &hop.path))
774        })
775}
776
777/// Pre-normalise a `changed_files` set through `dunce::simplified` so each
778/// per-entry comparison can normalise its lookup side and avoid the Windows
779/// `\\?\` verbatim-vs-non-verbatim mismatch. On POSIX `dunce::simplified` is
780/// a no-op, so this is identical to cloning the set.
781///
782/// Background: `try_get_changed_files` joins git-emitted segments onto the
783/// `dunce::canonicalize`d toplevel, so entries land in non-verbatim shape.
784/// Analysis-pipeline paths (clone instances, finding paths) inherit the
785/// shape of `opts.root`, which `validate_root` / discovery / cache lookups
786/// pre-canonicalise with `std::fs::canonicalize` in test fixtures and tools
787/// (which yields verbatim paths on Windows). Comparing the two sides byte
788/// for byte silently dropped every finding before this normalisation.
789fn normalize_changed_files_set(changed_files: &FxHashSet<PathBuf>) -> FxHashSet<PathBuf> {
790    changed_files
791        .iter()
792        .map(|p| dunce::simplified(p).to_path_buf())
793        .collect()
794}
795
796fn contains_normalized(normalized: &FxHashSet<PathBuf>, path: &Path) -> bool {
797    normalized.contains(dunce::simplified(path))
798}
799
800fn normalized_set_contains_path(normalized: &FxHashSet<PathBuf>, path: &Path) -> bool {
801    contains_normalized(normalized, path)
802        || (path.is_relative() && normalized.iter().any(|changed| changed.ends_with(path)))
803}
804
805/// Recompute duplication statistics after filtering.
806///
807/// Uses per-file line deduplication (matching `compute_stats` in
808/// `duplicates/detect.rs`) so overlapping clone instances don't inflate the
809/// duplicated line count.
810fn recompute_duplication_stats(report: &DuplicationReport) -> DuplicationStats {
811    let mut files_with_clones: FxHashSet<&Path> = FxHashSet::default();
812    let mut file_dup_lines: FxHashMap<&Path, FxHashSet<usize>> = FxHashMap::default();
813    let mut duplicated_tokens = 0_usize;
814    let mut clone_instances = 0_usize;
815
816    for group in &report.clone_groups {
817        for instance in &group.instances {
818            files_with_clones.insert(&instance.file);
819            clone_instances += 1;
820            let lines = file_dup_lines.entry(&instance.file).or_default();
821            for line in instance.start_line..=instance.end_line {
822                lines.insert(line);
823            }
824        }
825        duplicated_tokens += group.token_count * group.instances.len();
826    }
827
828    let duplicated_lines: usize = file_dup_lines.values().map(FxHashSet::len).sum();
829
830    DuplicationStats {
831        total_files: report.stats.total_files,
832        files_with_clones: files_with_clones.len(),
833        total_lines: report.stats.total_lines,
834        duplicated_lines,
835        total_tokens: report.stats.total_tokens,
836        duplicated_tokens,
837        clone_groups: report.clone_groups.len(),
838        clone_instances,
839        #[expect(
840            clippy::cast_precision_loss,
841            reason = "stat percentages are display-only; precision loss at usize::MAX line counts is acceptable"
842        )]
843        duplication_percentage: if report.stats.total_lines > 0 {
844            (duplicated_lines as f64 / report.stats.total_lines as f64) * 100.0
845        } else {
846            0.0
847        },
848        clone_groups_below_min_occurrences: report.stats.clone_groups_below_min_occurrences,
849    }
850}
851
852/// Filter a duplication report to only retain clone groups where at least one
853/// instance belongs to a changed file. Families, mirrored directories, and
854/// stats are rebuilt from the surviving groups so consumers see consistent,
855/// correctly-scoped numbers.
856#[expect(
857    clippy::implicit_hasher,
858    reason = "fallow standardizes on FxHashSet across the workspace"
859)]
860pub fn filter_duplication_by_changed_files(
861    report: &mut DuplicationReport,
862    changed_files: &FxHashSet<PathBuf>,
863    root: &Path,
864) {
865    let cf = normalize_changed_files_set(changed_files);
866    report.clone_groups.retain(|g| {
867        g.instances
868            .iter()
869            .any(|i| contains_normalized(&cf, &i.file))
870    });
871    report.clone_families = families::group_into_families(&report.clone_groups, root);
872    report.mirrored_directories =
873        families::detect_mirrored_directories(&report.clone_families, root);
874    report.stats = recompute_duplication_stats(report);
875}
876
877#[cfg(test)]
878mod tests {
879    use super::*;
880    use crate::duplicates::{CloneGroup, CloneInstance};
881    use crate::results::{
882        BoundaryViolation, CircularDependency, EmptyCatalogGroup, SecurityFinding,
883        SecurityFindingKind, SecurityUnresolvedCalleeDiagnostic, TraceHop, TraceHopRole,
884        UnusedExport, UnusedFile,
885    };
886    use fallow_types::extract::{SkippedSecurityCalleeExpressionKind, SkippedSecurityCalleeReason};
887    use fallow_types::output_dead_code::{
888        BoundaryViolationFinding, CircularDependencyFinding, EmptyCatalogGroupFinding,
889        UnusedExportFinding, UnusedFileFinding,
890    };
891    use fallow_types::results::{SecurityReachability, SecuritySeverity};
892
893    #[test]
894    fn changed_files_error_describe_variants() {
895        assert!(
896            ChangedFilesError::InvalidRef("bad".to_owned())
897                .describe()
898                .contains("invalid git ref")
899        );
900        assert!(
901            ChangedFilesError::GitMissing("oops".to_owned())
902                .describe()
903                .contains("oops")
904        );
905        assert_eq!(
906            ChangedFilesError::NotARepository.describe(),
907            "not a git repository"
908        );
909        assert!(
910            ChangedFilesError::GitFailed("bad ref".to_owned())
911                .describe()
912                .contains("bad ref")
913        );
914    }
915
916    #[test]
917    fn augment_git_failed_appends_shallow_clone_hint_for_unknown_revision() {
918        let stderr = "fatal: ambiguous argument 'fallow-baseline...HEAD': unknown revision or path not in the working tree.";
919        let described = ChangedFilesError::GitFailed(stderr.to_owned()).describe();
920        assert!(described.contains(stderr), "original stderr preserved");
921        assert!(
922            described.contains("shallow clone"),
923            "hint surfaced: {described}"
924        );
925        assert!(
926            described.contains("fetch-depth: 0") || described.contains("git fetch --unshallow"),
927            "hint actionable: {described}"
928        );
929    }
930
931    #[test]
932    fn augment_git_failed_passthrough_for_other_errors() {
933        let stderr = "fatal: refusing to merge unrelated histories";
934        let described = ChangedFilesError::GitFailed(stderr.to_owned()).describe();
935        assert_eq!(described, stderr);
936    }
937
938    #[test]
939    fn validate_git_ref_rejects_leading_dash() {
940        assert!(validate_git_ref("--upload-pack=evil").is_err());
941        assert!(validate_git_ref("-flag").is_err());
942    }
943
944    #[test]
945    fn validate_git_ref_accepts_baseline_tag() {
946        assert_eq!(
947            validate_git_ref("fallow-baseline").unwrap(),
948            "fallow-baseline"
949        );
950    }
951
952    #[test]
953    fn changed_files_filter_scopes_unresolved_callee_diagnostics() {
954        let mut results = AnalysisResults::default();
955        results
956            .security_unresolved_callee_diagnostics
957            .push(SecurityUnresolvedCalleeDiagnostic {
958                path: PathBuf::from("/repo/src/changed.ts"),
959                line: 4,
960                col: 0,
961                reason: SkippedSecurityCalleeReason::DynamicDispatch,
962                expression_kind: SkippedSecurityCalleeExpressionKind::Other,
963            });
964        results
965            .security_unresolved_callee_diagnostics
966            .push(SecurityUnresolvedCalleeDiagnostic {
967                path: PathBuf::from("/repo/src/unchanged.ts"),
968                line: 4,
969                col: 0,
970                reason: SkippedSecurityCalleeReason::ComputedMember,
971                expression_kind: SkippedSecurityCalleeExpressionKind::ComputedMemberExpression,
972            });
973
974        let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
975        changed.insert(PathBuf::from("/repo/src/changed.ts"));
976
977        filter_results_by_changed_files(&mut results, &changed);
978
979        assert_eq!(results.security_unresolved_callee_diagnostics.len(), 1);
980        assert_eq!(
981            results.security_unresolved_callee_diagnostics[0].path,
982            PathBuf::from("/repo/src/changed.ts")
983        );
984    }
985
986    #[test]
987    fn try_get_changed_files_rejects_invalid_ref() {
988        let err = try_get_changed_files(Path::new("/"), "--evil")
989            .expect_err("leading-dash ref must be rejected");
990        assert!(matches!(err, ChangedFilesError::InvalidRef(_)));
991        assert!(err.describe().contains("cannot start with"));
992    }
993
994    #[test]
995    fn validate_git_ref_rejects_option_like_ref() {
996        assert!(validate_git_ref("--output=/tmp/fallow-proof").is_err());
997    }
998
999    #[test]
1000    fn validate_git_ref_allows_reflog_relative_date() {
1001        assert!(validate_git_ref("HEAD@{1 week ago}").is_ok());
1002    }
1003
1004    #[test]
1005    fn try_get_changed_files_rejects_option_like_ref_before_git() {
1006        let root = tempfile::tempdir().expect("create temp dir");
1007        let proof_path = root.path().join("proof");
1008
1009        let result = try_get_changed_files(
1010            root.path(),
1011            &format!("--output={}", proof_path.to_string_lossy()),
1012        );
1013
1014        assert!(matches!(result, Err(ChangedFilesError::InvalidRef(_))));
1015        assert!(
1016            !proof_path.exists(),
1017            "invalid changedSince ref must not be passed through to git as an option"
1018        );
1019    }
1020
1021    #[test]
1022    fn git_command_clears_parent_git_environment() {
1023        let command = git_command(Path::new("."), &["status", "--short"]);
1024        let overrides: Vec<_> = command.get_envs().collect();
1025
1026        for var in crate::git_env::AMBIENT_GIT_ENV_VARS {
1027            assert!(
1028                overrides
1029                    .iter()
1030                    .any(|(key, value)| key.to_str() == Some(*var) && value.is_none()),
1031                "git helper must clear inherited {var}",
1032            );
1033        }
1034    }
1035
1036    #[test]
1037    fn filter_results_keeps_only_changed_files() {
1038        let mut results = AnalysisResults::default();
1039        results
1040            .unused_files
1041            .push(UnusedFileFinding::with_actions(UnusedFile {
1042                path: "/a.ts".into(),
1043            }));
1044        results
1045            .unused_files
1046            .push(UnusedFileFinding::with_actions(UnusedFile {
1047                path: "/b.ts".into(),
1048            }));
1049        results
1050            .unused_exports
1051            .push(UnusedExportFinding::with_actions(UnusedExport {
1052                path: "/a.ts".into(),
1053                export_name: "foo".into(),
1054                is_type_only: false,
1055                line: 1,
1056                col: 0,
1057                span_start: 0,
1058                is_re_export: false,
1059            }));
1060
1061        let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
1062        changed.insert("/a.ts".into());
1063
1064        filter_results_by_changed_files(&mut results, &changed);
1065
1066        assert_eq!(results.unused_files.len(), 1);
1067        assert_eq!(results.unused_files[0].file.path, PathBuf::from("/a.ts"));
1068        assert_eq!(results.unused_exports.len(), 1);
1069    }
1070
1071    #[test]
1072    fn filter_results_preserves_dependency_level_issues() {
1073        let mut results = AnalysisResults::default();
1074        results.unused_dependencies.push(
1075            fallow_types::output_dead_code::UnusedDependencyFinding::with_actions(
1076                crate::results::UnusedDependency {
1077                    package_name: "lodash".into(),
1078                    location: crate::results::DependencyLocation::Dependencies,
1079                    path: "/pkg.json".into(),
1080                    line: 3,
1081                    used_in_workspaces: Vec::new(),
1082                },
1083            ),
1084        );
1085
1086        let changed: FxHashSet<PathBuf> = FxHashSet::default();
1087        filter_results_by_changed_files(&mut results, &changed);
1088
1089        assert_eq!(results.unused_dependencies.len(), 1);
1090    }
1091
1092    #[test]
1093    fn filter_results_keeps_circular_dep_when_any_file_changed() {
1094        let mut results = AnalysisResults::default();
1095        results
1096            .circular_dependencies
1097            .push(CircularDependencyFinding::with_actions(
1098                CircularDependency {
1099                    files: vec!["/a.ts".into(), "/b.ts".into()],
1100                    length: 2,
1101                    line: 1,
1102                    col: 0,
1103                    edges: Vec::new(),
1104                    is_cross_package: false,
1105                },
1106            ));
1107
1108        let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
1109        changed.insert("/b.ts".into());
1110
1111        filter_results_by_changed_files(&mut results, &changed);
1112        assert_eq!(results.circular_dependencies.len(), 1);
1113    }
1114
1115    #[test]
1116    fn filter_results_drops_circular_dep_when_no_file_changed() {
1117        let mut results = AnalysisResults::default();
1118        results
1119            .circular_dependencies
1120            .push(CircularDependencyFinding::with_actions(
1121                CircularDependency {
1122                    files: vec!["/a.ts".into(), "/b.ts".into()],
1123                    length: 2,
1124                    line: 1,
1125                    col: 0,
1126                    edges: Vec::new(),
1127                    is_cross_package: false,
1128                },
1129            ));
1130
1131        let changed: FxHashSet<PathBuf> = FxHashSet::default();
1132        filter_results_by_changed_files(&mut results, &changed);
1133        assert!(results.circular_dependencies.is_empty());
1134    }
1135
1136    #[test]
1137    fn filter_results_drops_boundary_violation_when_importer_unchanged() {
1138        let mut results = AnalysisResults::default();
1139        results
1140            .boundary_violations
1141            .push(BoundaryViolationFinding::with_actions(BoundaryViolation {
1142                from_path: "/a.ts".into(),
1143                to_path: "/b.ts".into(),
1144                from_zone: "ui".into(),
1145                to_zone: "data".into(),
1146                import_specifier: "../data/db".into(),
1147                line: 1,
1148                col: 0,
1149            }));
1150
1151        let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
1152        changed.insert("/b.ts".into());
1153
1154        filter_results_by_changed_files(&mut results, &changed);
1155        assert!(results.boundary_violations.is_empty());
1156    }
1157
1158    #[test]
1159    fn filter_results_keeps_security_finding_when_trace_file_changed() {
1160        let mut results = AnalysisResults::default();
1161        results.security_findings.push(SecurityFinding {
1162            finding_id: String::new(),
1163            candidate: fallow_types::results::SecurityCandidate::default(),
1164            taint_flow: None,
1165            attack_surface: None,
1166            kind: SecurityFindingKind::ClientServerLeak,
1167            category: None,
1168            cwe: None,
1169            path: "/project/src/client.tsx".into(),
1170            line: 2,
1171            col: 0,
1172            evidence: "candidate".into(),
1173            source_backed: false,
1174            source_read: None,
1175            severity: SecuritySeverity::Low,
1176            trace: vec![
1177                TraceHop {
1178                    path: "/project/src/client.tsx".into(),
1179                    line: 2,
1180                    col: 0,
1181                    role: TraceHopRole::ClientBoundary,
1182                },
1183                TraceHop {
1184                    path: "/project/src/server.ts".into(),
1185                    line: 1,
1186                    col: 0,
1187                    role: TraceHopRole::SecretSource,
1188                },
1189            ],
1190            actions: Vec::new(),
1191            dead_code: None,
1192            reachability: None,
1193            runtime: None,
1194        });
1195
1196        let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
1197        changed.insert("/project/src/server.ts".into());
1198
1199        filter_results_by_changed_files(&mut results, &changed);
1200
1201        assert_eq!(results.security_findings.len(), 1);
1202    }
1203
1204    #[test]
1205    fn filter_results_keeps_security_finding_when_untrusted_source_trace_file_changed() {
1206        let mut results = AnalysisResults::default();
1207        results.security_findings.push(SecurityFinding {
1208            finding_id: String::new(),
1209            candidate: fallow_types::results::SecurityCandidate::default(),
1210            taint_flow: None,
1211            attack_surface: None,
1212            kind: SecurityFindingKind::TaintedSink,
1213            category: Some("command-injection".into()),
1214            cwe: Some(78),
1215            path: "/project/src/runner.ts".into(),
1216            line: 4,
1217            col: 2,
1218            evidence: "candidate".into(),
1219            source_backed: false,
1220            source_read: None,
1221            severity: SecuritySeverity::Low,
1222            trace: Vec::new(),
1223            actions: Vec::new(),
1224            dead_code: None,
1225            reachability: Some(SecurityReachability {
1226                reachable_from_entry: false,
1227                reachable_from_untrusted_source: true,
1228                taint_confidence: Some(fallow_types::results::TaintConfidence::ModuleLevel),
1229                untrusted_source_hop_count: Some(1),
1230                untrusted_source_trace: vec![
1231                    TraceHop {
1232                        path: "/project/src/route.ts".into(),
1233                        line: 1,
1234                        col: 0,
1235                        role: TraceHopRole::UntrustedSource,
1236                    },
1237                    TraceHop {
1238                        path: "/project/src/runner.ts".into(),
1239                        line: 4,
1240                        col: 2,
1241                        role: TraceHopRole::Sink,
1242                    },
1243                ],
1244                blast_radius: 0,
1245                crosses_boundary: false,
1246            }),
1247            runtime: None,
1248        });
1249
1250        let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
1251        changed.insert("/project/src/route.ts".into());
1252
1253        filter_results_by_changed_files(&mut results, &changed);
1254
1255        assert_eq!(results.security_findings.len(), 1);
1256    }
1257
1258    #[test]
1259    fn filter_results_keeps_relative_empty_catalog_group_when_manifest_changed() {
1260        let mut results = AnalysisResults::default();
1261        results
1262            .empty_catalog_groups
1263            .push(EmptyCatalogGroupFinding::with_actions(EmptyCatalogGroup {
1264                catalog_name: "legacy".into(),
1265                path: PathBuf::from("pnpm-workspace.yaml"),
1266                line: 4,
1267            }));
1268
1269        let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
1270        changed.insert(PathBuf::from("/repo/pnpm-workspace.yaml"));
1271
1272        filter_results_by_changed_files(&mut results, &changed);
1273
1274        assert_eq!(results.empty_catalog_groups.len(), 1);
1275        assert_eq!(results.empty_catalog_groups[0].group.catalog_name, "legacy");
1276    }
1277
1278    #[test]
1279    fn filter_duplication_keeps_groups_with_at_least_one_changed_instance() {
1280        let mut report = DuplicationReport {
1281            clone_groups: vec![CloneGroup {
1282                instances: vec![
1283                    CloneInstance {
1284                        file: "/a.ts".into(),
1285                        start_line: 1,
1286                        end_line: 5,
1287                        start_col: 0,
1288                        end_col: 10,
1289                        fragment: "code".into(),
1290                    },
1291                    CloneInstance {
1292                        file: "/b.ts".into(),
1293                        start_line: 1,
1294                        end_line: 5,
1295                        start_col: 0,
1296                        end_col: 10,
1297                        fragment: "code".into(),
1298                    },
1299                ],
1300                token_count: 20,
1301                line_count: 5,
1302            }],
1303            clone_families: vec![],
1304            mirrored_directories: vec![],
1305            stats: DuplicationStats {
1306                total_files: 2,
1307                files_with_clones: 2,
1308                total_lines: 100,
1309                duplicated_lines: 10,
1310                total_tokens: 200,
1311                duplicated_tokens: 40,
1312                clone_groups: 1,
1313                clone_instances: 2,
1314                duplication_percentage: 10.0,
1315                clone_groups_below_min_occurrences: 0,
1316            },
1317        };
1318
1319        let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
1320        changed.insert("/a.ts".into());
1321
1322        filter_duplication_by_changed_files(&mut report, &changed, Path::new(""));
1323        assert_eq!(report.clone_groups.len(), 1);
1324        assert_eq!(report.stats.clone_groups, 1);
1325        assert_eq!(report.stats.clone_instances, 2);
1326    }
1327
1328    /// Regression for issue #561: on Windows, `try_get_changed_files` joins
1329    /// segments onto the `dunce::canonicalize`d toplevel (non-verbatim),
1330    /// while analysis-pipeline paths inherit the shape of `opts.root` which
1331    /// tools / test fixtures often pre-canonicalise with `std::fs::canonicalize`
1332    /// (verbatim). The byte-level lookup against `FxHashSet<PathBuf>` then
1333    /// silently dropped every clone group. Pin both sides through a synthetic
1334    /// verbatim path on one side and a plain path on the other.
1335    #[cfg(windows)]
1336    #[test]
1337    fn filter_duplication_normalises_verbatim_prefix_mismatch() {
1338        let mut report = DuplicationReport {
1339            clone_groups: vec![CloneGroup {
1340                instances: vec![
1341                    CloneInstance {
1342                        file: PathBuf::from(r"\\?\C:\repo\src\changed.ts"),
1343                        start_line: 1,
1344                        end_line: 5,
1345                        start_col: 0,
1346                        end_col: 10,
1347                        fragment: "code".into(),
1348                    },
1349                    CloneInstance {
1350                        file: PathBuf::from(r"\\?\C:\repo\src\focused-copy.ts"),
1351                        start_line: 1,
1352                        end_line: 5,
1353                        start_col: 0,
1354                        end_col: 10,
1355                        fragment: "code".into(),
1356                    },
1357                ],
1358                token_count: 20,
1359                line_count: 5,
1360            }],
1361            clone_families: vec![],
1362            mirrored_directories: vec![],
1363            stats: DuplicationStats {
1364                total_files: 2,
1365                files_with_clones: 2,
1366                total_lines: 100,
1367                duplicated_lines: 10,
1368                total_tokens: 200,
1369                duplicated_tokens: 40,
1370                clone_groups: 1,
1371                clone_instances: 2,
1372                duplication_percentage: 10.0,
1373                clone_groups_below_min_occurrences: 0,
1374            },
1375        };
1376
1377        let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
1378        changed.insert(PathBuf::from(r"C:\repo\src\changed.ts"));
1379
1380        filter_duplication_by_changed_files(&mut report, &changed, Path::new(""));
1381        assert_eq!(
1382            report.clone_groups.len(),
1383            1,
1384            "verbatim instance path must match non-verbatim changed-file entry"
1385        );
1386    }
1387
1388    #[cfg(windows)]
1389    #[test]
1390    fn filter_results_normalises_verbatim_prefix_mismatch() {
1391        let mut results = AnalysisResults::default();
1392        results
1393            .unused_exports
1394            .push(UnusedExportFinding::with_actions(UnusedExport {
1395                path: PathBuf::from(r"\\?\C:\repo\src\a.ts"),
1396                export_name: "foo".into(),
1397                is_type_only: false,
1398                line: 1,
1399                col: 0,
1400                span_start: 0,
1401                is_re_export: false,
1402            }));
1403
1404        let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
1405        changed.insert(PathBuf::from(r"C:\repo\src\a.ts"));
1406
1407        filter_results_by_changed_files(&mut results, &changed);
1408        assert_eq!(
1409            results.unused_exports.len(),
1410            1,
1411            "verbatim finding path must match non-verbatim changed-file entry"
1412        );
1413    }
1414
1415    /// Initialize a temp git repo with a single committed file plus a tag
1416    /// at HEAD. Returns the canonical repo root.
1417    ///
1418    /// Uses `dunce::canonicalize` rather than `std::fs::canonicalize` so the
1419    /// returned path agrees with what `resolve_git_toplevel` produces in
1420    /// production (PR #566 swapped that helper to `dunce::canonicalize` to
1421    /// strip the Windows `\\?\` verbatim prefix). `std::fs::canonicalize`
1422    /// still produces verbatim on Windows, so the prior shape diverged from
1423    /// the production helper and downstream `changed.contains(&expected)`
1424    /// assertions silently failed because one side was verbatim and the
1425    /// other was not. POSIX behaviour is identical to `std::fs::canonicalize`.
1426    fn init_repo(repo: &Path) -> PathBuf {
1427        run_git(repo, &["init", "--quiet", "--initial-branch=main"]);
1428        run_git(repo, &["config", "user.email", "test@example.com"]);
1429        run_git(repo, &["config", "user.name", "test"]);
1430        run_git(repo, &["config", "commit.gpgsign", "false"]);
1431        std::fs::write(repo.join("seed.txt"), "seed\n").unwrap();
1432        run_git(repo, &["add", "seed.txt"]);
1433        run_git(repo, &["commit", "--quiet", "-m", "initial"]);
1434        run_git(repo, &["tag", "fallow-baseline"]);
1435        dunce::canonicalize(repo).unwrap()
1436    }
1437
1438    fn run_git(cwd: &Path, args: &[&str]) {
1439        let output = std::process::Command::new("git")
1440            .args(args)
1441            .current_dir(cwd)
1442            .output()
1443            .expect("git available");
1444        assert!(
1445            output.status.success(),
1446            "git {args:?} failed: {}",
1447            String::from_utf8_lossy(&output.stderr)
1448        );
1449    }
1450
1451    /// Workspace at git root, an untracked file is included in the
1452    /// changed-files set with an absolute path joined from the repo root.
1453    #[test]
1454    fn try_get_changed_files_workspace_at_repo_root() {
1455        let tmp = tempfile::tempdir().unwrap();
1456        let repo = init_repo(tmp.path());
1457        std::fs::create_dir_all(repo.join("src")).unwrap();
1458        std::fs::write(repo.join("src/new.ts"), "export const x = 1;\n").unwrap();
1459
1460        let changed = try_get_changed_files(&repo, "fallow-baseline").unwrap();
1461
1462        let expected = repo.join("src/new.ts");
1463        assert!(
1464            changed.contains(&expected),
1465            "changed set should contain {expected:?}; actual: {changed:?}"
1466        );
1467    }
1468
1469    /// Regression test for #190. When the workspace is a subdirectory of
1470    /// the git repository, `git diff --name-only` emits paths relative to
1471    /// the repo root (e.g., `frontend/src/new.ts`). Without the
1472    /// rev-parse-based toplevel resolution the function joined those
1473    /// against the workspace root, producing bogus paths like
1474    /// `<repo>/frontend/frontend/src/new.ts` that never matched
1475    /// `analyze_project` output and silently dropped the filter.
1476    #[test]
1477    fn try_get_changed_files_workspace_in_subdirectory() {
1478        let tmp = tempfile::tempdir().unwrap();
1479        let repo = init_repo(tmp.path());
1480        let frontend = repo.join("frontend");
1481        std::fs::create_dir_all(frontend.join("src")).unwrap();
1482        std::fs::write(frontend.join("src/new.ts"), "export const x = 1;\n").unwrap();
1483
1484        let changed = try_get_changed_files(&frontend, "fallow-baseline").unwrap();
1485
1486        let expected = repo.join("frontend/src/new.ts");
1487        assert!(
1488            changed.contains(&expected),
1489            "changed set should contain canonical {expected:?}; actual: {changed:?}"
1490        );
1491        let bogus = frontend.join("frontend/src/new.ts");
1492        assert!(
1493            !changed.contains(&bogus),
1494            "changed set must not contain double-frontend path {bogus:?}"
1495        );
1496    }
1497
1498    /// A *committed* change in a sibling subdirectory (outside the
1499    /// workspace) appears in the changed-files set because `git diff`
1500    /// is repo-wide regardless of cwd. The downstream
1501    /// `filter_results_by_changed_files` retains it only if
1502    /// `analyze_project` saw it; for a workspace scoped to one subdir,
1503    /// the sibling file is not in the analysis paths and falls away at
1504    /// the result-merge boundary, not here. This test pins the contract:
1505    /// for committed changes, the set is repo-wide.
1506    ///
1507    /// Note: `git ls-files --others --exclude-standard` only lists
1508    /// untracked files in cwd's subtree, so untracked siblings are NOT
1509    /// in the set when invoked from a subdirectory. That's harmless for
1510    /// the LSP because `analyze_project` only walks files under the
1511    /// workspace root either way.
1512    #[test]
1513    fn try_get_changed_files_includes_committed_sibling_changes() {
1514        let tmp = tempfile::tempdir().unwrap();
1515        let repo = init_repo(tmp.path());
1516        let backend = repo.join("backend");
1517        std::fs::create_dir_all(&backend).unwrap();
1518        std::fs::write(backend.join("server.py"), "print('hi')\n").unwrap();
1519        run_git(&repo, &["add", "."]);
1520        run_git(&repo, &["commit", "--quiet", "-m", "add backend"]);
1521
1522        let frontend = repo.join("frontend");
1523        std::fs::create_dir_all(&frontend).unwrap();
1524
1525        let changed = try_get_changed_files(&frontend, "fallow-baseline").unwrap();
1526
1527        let expected = repo.join("backend/server.py");
1528        assert!(
1529            changed.contains(&expected),
1530            "committed sibling backend/server.py should be in the set: {changed:?}"
1531        );
1532    }
1533
1534    /// Modifying a tracked file shows up via `git diff --name-only HEAD`,
1535    /// not just via `ls-files --others`. Confirm the path-join fix
1536    /// applies to that codepath too.
1537    #[test]
1538    fn try_get_changed_files_includes_modified_tracked_file() {
1539        let tmp = tempfile::tempdir().unwrap();
1540        let repo = init_repo(tmp.path());
1541        let frontend = repo.join("frontend");
1542        std::fs::create_dir_all(frontend.join("src")).unwrap();
1543        std::fs::write(frontend.join("src/old.ts"), "export const x = 1;\n").unwrap();
1544        run_git(&repo, &["add", "."]);
1545        run_git(&repo, &["commit", "--quiet", "-m", "add old"]);
1546        run_git(&repo, &["tag", "fallow-baseline-v2"]);
1547        std::fs::write(frontend.join("src/old.ts"), "export const x = 2;\n").unwrap();
1548
1549        let changed = try_get_changed_files(&frontend, "fallow-baseline-v2").unwrap();
1550
1551        let expected = repo.join("frontend/src/old.ts");
1552        assert!(
1553            changed.contains(&expected),
1554            "modified tracked file {expected:?} missing from set: {changed:?}"
1555        );
1556    }
1557
1558    /// `resolve_git_toplevel` returns the canonical repo path even when
1559    /// invoked from inside a subdirectory and via a symlinked input path.
1560    /// On macOS this guards against the `/tmp` -> `/private/tmp`
1561    /// canonicalization gap that would otherwise make the LSP filter set
1562    /// disagree with `analyze_project` paths.
1563    #[test]
1564    fn resolve_git_toplevel_returns_canonical_path() {
1565        let tmp = tempfile::tempdir().unwrap();
1566        let repo = init_repo(tmp.path());
1567        let frontend = repo.join("frontend");
1568        std::fs::create_dir_all(&frontend).unwrap();
1569
1570        let toplevel = resolve_git_toplevel(&frontend).unwrap();
1571        assert_eq!(toplevel, repo, "toplevel should equal canonical repo root");
1572        assert_eq!(
1573            toplevel,
1574            dunce::canonicalize(&toplevel).unwrap(),
1575            "resolved toplevel should already be canonical"
1576        );
1577    }
1578
1579    /// Outside any git repo, `resolve_git_toplevel` returns
1580    /// `NotARepository` rather than panicking or returning a wrong path.
1581    /// The LSP relies on this to fall back to the workspace root cleanly.
1582    #[test]
1583    fn resolve_git_toplevel_not_a_repository() {
1584        let tmp = tempfile::tempdir().unwrap();
1585        let result = resolve_git_toplevel(tmp.path());
1586        assert!(
1587            matches!(result, Err(ChangedFilesError::NotARepository)),
1588            "expected NotARepository, got {result:?}"
1589        );
1590    }
1591
1592    /// Two linked worktrees of the same repo resolve to the SAME common dir
1593    /// (the shared `.git`), even though their `--show-toplevel` working
1594    /// directories differ. This is the invariant the Impact store relies on to
1595    /// collapse all worktrees of a repo onto one history.
1596    #[test]
1597    fn resolve_git_common_dir_collapses_worktrees() {
1598        let tmp = tempfile::tempdir().unwrap();
1599        let repo = init_repo(tmp.path());
1600        let linked = tmp.path().join("linked-worktree");
1601        run_git(
1602            &repo,
1603            &[
1604                "worktree",
1605                "add",
1606                "--quiet",
1607                linked.to_str().unwrap(),
1608                "-b",
1609                "feat",
1610            ],
1611        );
1612
1613        let main_common = resolve_git_common_dir(&repo).unwrap();
1614        let linked_common = resolve_git_common_dir(&linked).unwrap();
1615        assert_eq!(
1616            main_common, linked_common,
1617            "worktrees of one repo must share a common dir"
1618        );
1619
1620        // The per-worktree toplevels DO differ, proving the collapse is real.
1621        let main_top = resolve_git_toplevel(&repo).unwrap();
1622        let linked_top = resolve_git_toplevel(&linked).unwrap();
1623        assert_ne!(
1624            main_top, linked_top,
1625            "the two worktrees should have distinct toplevels"
1626        );
1627    }
1628
1629    /// Outside any git repo, `resolve_git_common_dir` returns `NotARepository`
1630    /// so the Impact key can fall back to the canonical root.
1631    #[test]
1632    fn resolve_git_common_dir_not_a_repository() {
1633        let tmp = tempfile::tempdir().unwrap();
1634        let result = resolve_git_common_dir(tmp.path());
1635        assert!(
1636            matches!(result, Err(ChangedFilesError::NotARepository)),
1637            "expected NotARepository, got {result:?}"
1638        );
1639    }
1640
1641    /// `try_get_changed_files` propagates the not-a-repo error so the
1642    /// LSP can warn and fall back to full-scope results.
1643    #[test]
1644    fn try_get_changed_files_not_a_repository() {
1645        let tmp = tempfile::tempdir().unwrap();
1646        let result = try_get_changed_files(tmp.path(), "main");
1647        assert!(matches!(result, Err(ChangedFilesError::NotARepository)));
1648    }
1649
1650    #[test]
1651    fn filter_duplication_drops_groups_with_no_changed_instance() {
1652        let mut report = DuplicationReport {
1653            clone_groups: vec![CloneGroup {
1654                instances: vec![CloneInstance {
1655                    file: "/a.ts".into(),
1656                    start_line: 1,
1657                    end_line: 5,
1658                    start_col: 0,
1659                    end_col: 10,
1660                    fragment: "code".into(),
1661                }],
1662                token_count: 20,
1663                line_count: 5,
1664            }],
1665            clone_families: vec![],
1666            mirrored_directories: vec![],
1667            stats: DuplicationStats {
1668                total_files: 1,
1669                files_with_clones: 1,
1670                total_lines: 100,
1671                duplicated_lines: 5,
1672                total_tokens: 100,
1673                duplicated_tokens: 20,
1674                clone_groups: 1,
1675                clone_instances: 1,
1676                duplication_percentage: 5.0,
1677                clone_groups_below_min_occurrences: 0,
1678            },
1679        };
1680
1681        let changed: FxHashSet<PathBuf> = FxHashSet::default();
1682        filter_duplication_by_changed_files(&mut report, &changed, Path::new(""));
1683        assert!(report.clone_groups.is_empty());
1684        assert_eq!(report.stats.clone_groups, 0);
1685        assert_eq!(report.stats.clone_instances, 0);
1686        assert!((report.stats.duplication_percentage - 0.0).abs() < f64::EPSILON);
1687    }
1688}