Skip to main content

fallow_core/
changed_files.rs

1//! Git-aware "changed files" filtering shared between fallow-cli and fallow-lsp.
2//!
3//! Provides:
4//! - [`validate_git_ref`] for input validation at trust boundaries.
5//! - [`ChangedFilesError`] / [`try_get_changed_files`] / [`get_changed_files`]
6//!   for resolving a git ref into the set of changed files.
7//! - [`filter_results_by_changed_files`] for narrowing an [`AnalysisResults`]
8//!   to issues in those files.
9//! - [`filter_duplication_by_changed_files`] for narrowing a
10//!   [`DuplicationReport`] to clone groups touching at least one changed file.
11//!
12//! Both filters intentionally exclude dependency-level issues (unused deps,
13//! type-only deps, test-only deps) since "unused dependency" is a function of
14//! the entire import graph and can't be attributed to individual changed files.
15
16use std::path::{Path, PathBuf};
17use std::process::Output;
18use std::sync::OnceLock;
19
20use rustc_hash::{FxHashMap, FxHashSet};
21
22use crate::duplicates::{DuplicationReport, DuplicationStats, families};
23use crate::results::AnalysisResults;
24
25/// Function pointer signature used by `set_spawn_hook` to intercept the
26/// short-running `git rev-parse` / `git diff` / `git ls-files` subprocesses
27/// this module spawns. Lets the CLI route those git children through its
28/// `ScopedChild` registry so a SIGINT delivered to the parent during
29/// watch mode (or any analysis) reaps them instead of letting them run
30/// to completion. See `crates/cli/src/signal/` and issue #477.
31pub type ChangedFilesSpawnHook = fn(&mut std::process::Command) -> std::io::Result<Output>;
32
33static SPAWN_HOOK: OnceLock<ChangedFilesSpawnHook> = OnceLock::new();
34
35/// Install a spawn-hook for this module's git subprocesses. Idempotent;
36/// subsequent calls are no-ops. Called once from the CLI's `main()` so
37/// long-running watch sessions reap pending git children on Ctrl+C.
38/// Defaults to `Command::output` when not set; the function-pointer
39/// indirection costs nothing for embedders and tests that don't install
40/// a hook.
41pub fn set_spawn_hook(hook: ChangedFilesSpawnHook) {
42    let _ = SPAWN_HOOK.set(hook);
43}
44
45fn spawn_output(command: &mut std::process::Command) -> std::io::Result<Output> {
46    if let Some(hook) = SPAWN_HOOK.get() {
47        hook(command)
48    } else {
49        command.output()
50    }
51}
52
53/// Validate a user-supplied git ref before passing it to `git diff`.
54///
55/// Rejects empty strings, refs starting with `-` (which `git` would interpret
56/// as an option flag), and characters outside the safe allowlist for branch
57/// names, tags, SHAs, and reflog expressions (`HEAD~N`, `HEAD@{...}`).
58///
59/// Inside `@{...}` braces, colons and spaces are allowed so reflog timestamps
60/// like `HEAD@{2025-01-01}` and `HEAD@{1 week ago}` round-trip.
61///
62/// Used by both the CLI (clap value parser) and the LSP (initializationOptions
63/// trust boundary) to fail fast with a readable error rather than handing a
64/// malformed ref to git.
65pub fn validate_git_ref(s: &str) -> Result<&str, String> {
66    if s.is_empty() {
67        return Err("git ref cannot be empty".to_string());
68    }
69    if s.starts_with('-') {
70        return Err("git ref cannot start with '-'".to_string());
71    }
72    let mut in_braces = false;
73    for c in s.chars() {
74        match c {
75            '{' => in_braces = true,
76            '}' => in_braces = false,
77            ':' | ' ' if in_braces => {}
78            c if c.is_ascii_alphanumeric()
79                || matches!(c, '.' | '_' | '-' | '/' | '~' | '^' | '@' | '{' | '}') => {}
80            _ => return Err(format!("git ref contains disallowed character: '{c}'")),
81        }
82    }
83    if in_braces {
84        return Err("git ref has unclosed '{'".to_string());
85    }
86    Ok(s)
87}
88
89/// Classification of a `git diff` failure, so callers can pick their own
90/// wording (soft warning vs hard error) without re-parsing stderr.
91#[derive(Debug)]
92pub enum ChangedFilesError {
93    /// Git ref failed validation before invoking `git`.
94    InvalidRef(String),
95    /// `git` binary not found / not executable.
96    GitMissing(String),
97    /// Command ran but the directory isn't a git repository.
98    NotARepository,
99    /// Command ran but the ref is invalid / another git error.
100    GitFailed(String),
101}
102
103impl ChangedFilesError {
104    /// Human-readable clause suitable for embedding in an error message.
105    /// Does not include the flag name (e.g. "--changed-since") so callers can
106    /// prepend their own context.
107    pub fn describe(&self) -> String {
108        match self {
109            Self::InvalidRef(e) => format!("invalid git ref: {e}"),
110            Self::GitMissing(e) => format!("failed to run git: {e}"),
111            Self::NotARepository => "not a git repository".to_owned(),
112            Self::GitFailed(stderr) => augment_git_failed(stderr),
113        }
114    }
115}
116
117/// Enrich a raw `git diff` stderr with actionable hints when the failure mode
118/// is recognizable. Today: shallow-clone misses (`actions/checkout@v4` defaults
119/// to `fetch-depth: 1`, GitLab CI to `GIT_DEPTH: 50`), where the baseline ref
120/// predates the fetch boundary. Bare git stderr is famously cryptic; a hint
121/// here is much more useful than a docs link the reader has to chase.
122fn augment_git_failed(stderr: &str) -> String {
123    let lower = stderr.to_ascii_lowercase();
124    if lower.contains("not a valid object name")
125        || lower.contains("unknown revision")
126        || lower.contains("ambiguous argument")
127    {
128        format!(
129            "{stderr} (shallow clone? try `git fetch --unshallow`, or set `fetch-depth: 0` on actions/checkout / `GIT_DEPTH: 0` in GitLab CI)"
130        )
131    } else {
132        stderr.to_owned()
133    }
134}
135
136/// Resolve the canonical git toplevel for `cwd`.
137///
138/// Runs `git rev-parse --show-toplevel`, which is git's own answer to "where
139/// does this repository live?". The returned path is canonicalized so it
140/// agrees with paths produced by `fs::canonicalize` elsewhere on macOS
141/// (`/tmp` -> `/private/tmp`) and Windows (8.3 short paths).
142///
143/// Used by `try_get_changed_files` to produce changed-file paths whose
144/// absolute form matches what the analysis pipeline emits, regardless of
145/// whether the caller's `cwd` is the repo root or a subdirectory of it.
146pub fn resolve_git_toplevel(cwd: &Path) -> Result<PathBuf, ChangedFilesError> {
147    let output = spawn_output(&mut git_command(cwd, &["rev-parse", "--show-toplevel"]))
148        .map_err(|e| ChangedFilesError::GitMissing(e.to_string()))?;
149
150    if !output.status.success() {
151        let stderr = String::from_utf8_lossy(&output.stderr);
152        return Err(if stderr.contains("not a git repository") {
153            ChangedFilesError::NotARepository
154        } else {
155            ChangedFilesError::GitFailed(stderr.trim().to_owned())
156        });
157    }
158
159    let raw = String::from_utf8_lossy(&output.stdout);
160    let trimmed = raw.trim();
161    if trimmed.is_empty() {
162        return Err(ChangedFilesError::GitFailed(
163            "git rev-parse --show-toplevel returned empty output".to_owned(),
164        ));
165    }
166
167    let path = PathBuf::from(trimmed);
168    Ok(dunce::canonicalize(&path).unwrap_or(path))
169}
170
171fn collect_git_paths(
172    cwd: &Path,
173    toplevel: &Path,
174    args: &[&str],
175) -> Result<FxHashSet<PathBuf>, ChangedFilesError> {
176    let output = spawn_output(&mut git_command(cwd, args))
177        .map_err(|e| ChangedFilesError::GitMissing(e.to_string()))?;
178
179    if !output.status.success() {
180        let stderr = String::from_utf8_lossy(&output.stderr);
181        return Err(if stderr.contains("not a git repository") {
182            ChangedFilesError::NotARepository
183        } else {
184            ChangedFilesError::GitFailed(stderr.trim().to_owned())
185        });
186    }
187
188    #[cfg(windows)]
189    let normalise_segment = |line: &str| line.replace('/', "\\");
190    #[cfg(not(windows))]
191    let normalise_segment = |line: &str| line.to_owned();
192
193    let files: FxHashSet<PathBuf> = String::from_utf8_lossy(&output.stdout)
194        .lines()
195        .filter(|line| !line.is_empty())
196        .map(|line| toplevel.join(normalise_segment(line)))
197        .collect();
198
199    Ok(files)
200}
201
202fn git_command(cwd: &Path, args: &[&str]) -> std::process::Command {
203    let mut command = crate::spawn::git();
204    command.args(args).current_dir(cwd);
205    command
206}
207
208/// Get files changed since a git ref. Returns `Err` (with details) when the
209/// git invocation itself failed, so callers can choose between warn-and-ignore
210/// and hard-error behavior.
211///
212/// Includes both:
213/// - committed changes from the merge-base range `git_ref...HEAD`
214/// - tracked staged/unstaged changes from `HEAD` to the current worktree
215/// - untracked files not ignored by Git
216///
217/// This keeps `--changed-since` useful for local validation instead of only
218/// reflecting the last committed `HEAD`.
219///
220/// All paths in the returned set are absolute and rooted at the canonical
221/// git toplevel, not at `root`. This matters when the LSP / CLI is invoked
222/// from a subdirectory of the repository (e.g., a Turborepo workspace at
223/// `apps/web`): `git diff` emits root-relative paths, and we need to join
224/// them against the actual repo root rather than the caller's cwd.
225pub fn try_get_changed_files(
226    root: &Path,
227    git_ref: &str,
228) -> Result<FxHashSet<PathBuf>, ChangedFilesError> {
229    validate_git_ref(git_ref).map_err(ChangedFilesError::InvalidRef)?;
230    let toplevel = resolve_git_toplevel(root)?;
231    try_get_changed_files_with_toplevel(root, &toplevel, git_ref)
232}
233
234/// Like [`try_get_changed_files`], but takes a pre-resolved canonical
235/// `toplevel` so callers (the LSP) can cache it across runs and avoid the
236/// extra `git rev-parse --show-toplevel` subprocess on every save.
237///
238/// `toplevel` MUST be the canonical git toplevel for `cwd`; passing anything
239/// else produces incorrect changed-file paths. The CLI does not call this
240/// directly: it uses [`try_get_changed_files`] which resolves on each call.
241pub fn try_get_changed_files_with_toplevel(
242    cwd: &Path,
243    toplevel: &Path,
244    git_ref: &str,
245) -> Result<FxHashSet<PathBuf>, ChangedFilesError> {
246    validate_git_ref(git_ref).map_err(ChangedFilesError::InvalidRef)?;
247
248    let mut files = collect_git_paths(
249        cwd,
250        toplevel,
251        &[
252            "diff",
253            "--name-only",
254            "--end-of-options",
255            &format!("{git_ref}...HEAD"),
256        ],
257    )?;
258    files.extend(collect_git_paths(
259        cwd,
260        toplevel,
261        &["diff", "--name-only", "HEAD"],
262    )?);
263    files.extend(collect_git_paths(
264        cwd,
265        toplevel,
266        &["ls-files", "--full-name", "--others", "--exclude-standard"],
267    )?);
268    Ok(files)
269}
270
271/// Get files changed since a git ref. Returns `None` on git failure after
272/// printing a warning to stderr. Used by `--changed-since` and `--file`, where
273/// a failure falls back to full-scope analysis.
274#[expect(
275    clippy::print_stderr,
276    reason = "intentional user-facing warning for the CLI's --changed-since fallback path; LSP callers use try_get_changed_files instead"
277)]
278pub fn get_changed_files(root: &Path, git_ref: &str) -> Option<FxHashSet<PathBuf>> {
279    match try_get_changed_files(root, git_ref) {
280        Ok(files) => Some(files),
281        Err(ChangedFilesError::InvalidRef(e)) => {
282            eprintln!("Warning: --changed-since ignored: invalid git ref: {e}");
283            None
284        }
285        Err(ChangedFilesError::GitMissing(e)) => {
286            eprintln!("Warning: --changed-since ignored: failed to run git: {e}");
287            None
288        }
289        Err(ChangedFilesError::NotARepository) => {
290            eprintln!("Warning: --changed-since ignored: not a git repository");
291            None
292        }
293        Err(ChangedFilesError::GitFailed(stderr)) => {
294            eprintln!("Warning: --changed-since failed for ref '{git_ref}': {stderr}");
295            None
296        }
297    }
298}
299
300/// Filter `results` to only include issues whose source file is in
301/// `changed_files`.
302///
303/// Dependency-level issues (unused deps, dev deps, optional deps, type-only
304/// deps, test-only deps) are intentionally NOT filtered here. Unlike
305/// file-level issues, a dependency being "unused" is a function of the entire
306/// import graph and can't be attributed to individual changed source files.
307#[expect(
308    clippy::implicit_hasher,
309    reason = "fallow standardizes on FxHashSet across the workspace"
310)]
311pub fn filter_results_by_changed_files(
312    results: &mut AnalysisResults,
313    changed_files: &FxHashSet<PathBuf>,
314) {
315    let cf = normalize_changed_files_set(changed_files);
316    results
317        .unused_files
318        .retain(|f| contains_normalized(&cf, &f.file.path));
319    results
320        .unused_exports
321        .retain(|e| contains_normalized(&cf, &e.export.path));
322    results
323        .unused_types
324        .retain(|e| contains_normalized(&cf, &e.export.path));
325    results
326        .private_type_leaks
327        .retain(|e| contains_normalized(&cf, &e.leak.path));
328    results
329        .unused_enum_members
330        .retain(|m| contains_normalized(&cf, &m.member.path));
331    results
332        .unused_class_members
333        .retain(|m| contains_normalized(&cf, &m.member.path));
334    results
335        .unresolved_imports
336        .retain(|i| contains_normalized(&cf, &i.import.path));
337
338    results.unlisted_dependencies.retain(|d| {
339        d.dep
340            .imported_from
341            .iter()
342            .any(|s| contains_normalized(&cf, &s.path))
343    });
344
345    for dup in &mut results.duplicate_exports {
346        dup.export
347            .locations
348            .retain(|loc| contains_normalized(&cf, &loc.path));
349    }
350    results
351        .duplicate_exports
352        .retain(|d| d.export.locations.len() >= 2);
353
354    results
355        .circular_dependencies
356        .retain(|c| c.cycle.files.iter().any(|f| contains_normalized(&cf, f)));
357
358    results
359        .re_export_cycles
360        .retain(|c| c.cycle.files.iter().any(|f| contains_normalized(&cf, f)));
361
362    results
363        .boundary_violations
364        .retain(|v| contains_normalized(&cf, &v.violation.from_path));
365
366    results
367        .stale_suppressions
368        .retain(|s| contains_normalized(&cf, &s.path));
369
370    results.security_findings.retain(|f| {
371        contains_normalized(&cf, &f.path)
372            || f.trace
373                .iter()
374                .any(|hop| contains_normalized(&cf, &hop.path))
375    });
376
377    results
378        .unresolved_catalog_references
379        .retain(|r| contains_normalized(&cf, &r.reference.path));
380    results
381        .empty_catalog_groups
382        .retain(|g| normalized_set_contains_path(&cf, &g.group.path));
383
384    results
385        .unused_dependency_overrides
386        .retain(|o| contains_normalized(&cf, &o.entry.path));
387    results
388        .misconfigured_dependency_overrides
389        .retain(|o| contains_normalized(&cf, &o.entry.path));
390}
391
392/// Pre-normalise a `changed_files` set through `dunce::simplified` so each
393/// per-entry comparison can normalise its lookup side and avoid the Windows
394/// `\\?\` verbatim-vs-non-verbatim mismatch. On POSIX `dunce::simplified` is
395/// a no-op, so this is identical to cloning the set.
396///
397/// Background: `try_get_changed_files` joins git-emitted segments onto the
398/// `dunce::canonicalize`d toplevel, so entries land in non-verbatim shape.
399/// Analysis-pipeline paths (clone instances, finding paths) inherit the
400/// shape of `opts.root`, which `validate_root` / discovery / cache lookups
401/// pre-canonicalise with `std::fs::canonicalize` in test fixtures and tools
402/// (which yields verbatim paths on Windows). Comparing the two sides byte
403/// for byte silently dropped every finding before this normalisation.
404fn normalize_changed_files_set(changed_files: &FxHashSet<PathBuf>) -> FxHashSet<PathBuf> {
405    changed_files
406        .iter()
407        .map(|p| dunce::simplified(p).to_path_buf())
408        .collect()
409}
410
411fn contains_normalized(normalized: &FxHashSet<PathBuf>, path: &Path) -> bool {
412    normalized.contains(dunce::simplified(path))
413}
414
415fn normalized_set_contains_path(normalized: &FxHashSet<PathBuf>, path: &Path) -> bool {
416    contains_normalized(normalized, path)
417        || (path.is_relative() && normalized.iter().any(|changed| changed.ends_with(path)))
418}
419
420/// Recompute duplication statistics after filtering.
421///
422/// Uses per-file line deduplication (matching `compute_stats` in
423/// `duplicates/detect.rs`) so overlapping clone instances don't inflate the
424/// duplicated line count.
425fn recompute_duplication_stats(report: &DuplicationReport) -> DuplicationStats {
426    let mut files_with_clones: FxHashSet<&Path> = FxHashSet::default();
427    let mut file_dup_lines: FxHashMap<&Path, FxHashSet<usize>> = FxHashMap::default();
428    let mut duplicated_tokens = 0_usize;
429    let mut clone_instances = 0_usize;
430
431    for group in &report.clone_groups {
432        for instance in &group.instances {
433            files_with_clones.insert(&instance.file);
434            clone_instances += 1;
435            let lines = file_dup_lines.entry(&instance.file).or_default();
436            for line in instance.start_line..=instance.end_line {
437                lines.insert(line);
438            }
439        }
440        duplicated_tokens += group.token_count * group.instances.len();
441    }
442
443    let duplicated_lines: usize = file_dup_lines.values().map(FxHashSet::len).sum();
444
445    DuplicationStats {
446        total_files: report.stats.total_files,
447        files_with_clones: files_with_clones.len(),
448        total_lines: report.stats.total_lines,
449        duplicated_lines,
450        total_tokens: report.stats.total_tokens,
451        duplicated_tokens,
452        clone_groups: report.clone_groups.len(),
453        clone_instances,
454        #[expect(
455            clippy::cast_precision_loss,
456            reason = "stat percentages are display-only; precision loss at usize::MAX line counts is acceptable"
457        )]
458        duplication_percentage: if report.stats.total_lines > 0 {
459            (duplicated_lines as f64 / report.stats.total_lines as f64) * 100.0
460        } else {
461            0.0
462        },
463        clone_groups_below_min_occurrences: report.stats.clone_groups_below_min_occurrences,
464    }
465}
466
467/// Filter a duplication report to only retain clone groups where at least one
468/// instance belongs to a changed file. Families, mirrored directories, and
469/// stats are rebuilt from the surviving groups so consumers see consistent,
470/// correctly-scoped numbers.
471#[expect(
472    clippy::implicit_hasher,
473    reason = "fallow standardizes on FxHashSet across the workspace"
474)]
475pub fn filter_duplication_by_changed_files(
476    report: &mut DuplicationReport,
477    changed_files: &FxHashSet<PathBuf>,
478    root: &Path,
479) {
480    let cf = normalize_changed_files_set(changed_files);
481    report.clone_groups.retain(|g| {
482        g.instances
483            .iter()
484            .any(|i| contains_normalized(&cf, &i.file))
485    });
486    report.clone_families = families::group_into_families(&report.clone_groups, root);
487    report.mirrored_directories =
488        families::detect_mirrored_directories(&report.clone_families, root);
489    report.stats = recompute_duplication_stats(report);
490}
491
492#[cfg(test)]
493mod tests {
494    use super::*;
495    use crate::duplicates::{CloneGroup, CloneInstance};
496    use crate::results::{
497        BoundaryViolation, CircularDependency, EmptyCatalogGroup, SecurityFinding,
498        SecurityFindingKind, TraceHop, TraceHopRole, UnusedExport, UnusedFile,
499    };
500    use fallow_types::output_dead_code::{
501        BoundaryViolationFinding, CircularDependencyFinding, EmptyCatalogGroupFinding,
502        UnusedExportFinding, UnusedFileFinding,
503    };
504
505    #[test]
506    fn changed_files_error_describe_variants() {
507        assert!(
508            ChangedFilesError::InvalidRef("bad".to_owned())
509                .describe()
510                .contains("invalid git ref")
511        );
512        assert!(
513            ChangedFilesError::GitMissing("oops".to_owned())
514                .describe()
515                .contains("oops")
516        );
517        assert_eq!(
518            ChangedFilesError::NotARepository.describe(),
519            "not a git repository"
520        );
521        assert!(
522            ChangedFilesError::GitFailed("bad ref".to_owned())
523                .describe()
524                .contains("bad ref")
525        );
526    }
527
528    #[test]
529    fn augment_git_failed_appends_shallow_clone_hint_for_unknown_revision() {
530        let stderr = "fatal: ambiguous argument 'fallow-baseline...HEAD': unknown revision or path not in the working tree.";
531        let described = ChangedFilesError::GitFailed(stderr.to_owned()).describe();
532        assert!(described.contains(stderr), "original stderr preserved");
533        assert!(
534            described.contains("shallow clone"),
535            "hint surfaced: {described}"
536        );
537        assert!(
538            described.contains("fetch-depth: 0") || described.contains("git fetch --unshallow"),
539            "hint actionable: {described}"
540        );
541    }
542
543    #[test]
544    fn augment_git_failed_passthrough_for_other_errors() {
545        let stderr = "fatal: refusing to merge unrelated histories";
546        let described = ChangedFilesError::GitFailed(stderr.to_owned()).describe();
547        assert_eq!(described, stderr);
548    }
549
550    #[test]
551    fn validate_git_ref_rejects_leading_dash() {
552        assert!(validate_git_ref("--upload-pack=evil").is_err());
553        assert!(validate_git_ref("-flag").is_err());
554    }
555
556    #[test]
557    fn validate_git_ref_accepts_baseline_tag() {
558        assert_eq!(
559            validate_git_ref("fallow-baseline").unwrap(),
560            "fallow-baseline"
561        );
562    }
563
564    #[test]
565    fn try_get_changed_files_rejects_invalid_ref() {
566        let err = try_get_changed_files(Path::new("/"), "--evil")
567            .expect_err("leading-dash ref must be rejected");
568        assert!(matches!(err, ChangedFilesError::InvalidRef(_)));
569        assert!(err.describe().contains("cannot start with"));
570    }
571
572    #[test]
573    fn validate_git_ref_rejects_option_like_ref() {
574        assert!(validate_git_ref("--output=/tmp/fallow-proof").is_err());
575    }
576
577    #[test]
578    fn validate_git_ref_allows_reflog_relative_date() {
579        assert!(validate_git_ref("HEAD@{1 week ago}").is_ok());
580    }
581
582    #[test]
583    fn try_get_changed_files_rejects_option_like_ref_before_git() {
584        let root = tempfile::tempdir().expect("create temp dir");
585        let proof_path = root.path().join("proof");
586
587        let result = try_get_changed_files(
588            root.path(),
589            &format!("--output={}", proof_path.to_string_lossy()),
590        );
591
592        assert!(matches!(result, Err(ChangedFilesError::InvalidRef(_))));
593        assert!(
594            !proof_path.exists(),
595            "invalid changedSince ref must not be passed through to git as an option"
596        );
597    }
598
599    #[test]
600    fn git_command_clears_parent_git_environment() {
601        let command = git_command(Path::new("."), &["status", "--short"]);
602        let overrides: Vec<_> = command.get_envs().collect();
603
604        for var in crate::git_env::AMBIENT_GIT_ENV_VARS {
605            assert!(
606                overrides
607                    .iter()
608                    .any(|(key, value)| key.to_str() == Some(*var) && value.is_none()),
609                "git helper must clear inherited {var}",
610            );
611        }
612    }
613
614    #[test]
615    fn filter_results_keeps_only_changed_files() {
616        let mut results = AnalysisResults::default();
617        results
618            .unused_files
619            .push(UnusedFileFinding::with_actions(UnusedFile {
620                path: "/a.ts".into(),
621            }));
622        results
623            .unused_files
624            .push(UnusedFileFinding::with_actions(UnusedFile {
625                path: "/b.ts".into(),
626            }));
627        results
628            .unused_exports
629            .push(UnusedExportFinding::with_actions(UnusedExport {
630                path: "/a.ts".into(),
631                export_name: "foo".into(),
632                is_type_only: false,
633                line: 1,
634                col: 0,
635                span_start: 0,
636                is_re_export: false,
637            }));
638
639        let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
640        changed.insert("/a.ts".into());
641
642        filter_results_by_changed_files(&mut results, &changed);
643
644        assert_eq!(results.unused_files.len(), 1);
645        assert_eq!(results.unused_files[0].file.path, PathBuf::from("/a.ts"));
646        assert_eq!(results.unused_exports.len(), 1);
647    }
648
649    #[test]
650    fn filter_results_preserves_dependency_level_issues() {
651        let mut results = AnalysisResults::default();
652        results.unused_dependencies.push(
653            fallow_types::output_dead_code::UnusedDependencyFinding::with_actions(
654                crate::results::UnusedDependency {
655                    package_name: "lodash".into(),
656                    location: crate::results::DependencyLocation::Dependencies,
657                    path: "/pkg.json".into(),
658                    line: 3,
659                    used_in_workspaces: Vec::new(),
660                },
661            ),
662        );
663
664        let changed: FxHashSet<PathBuf> = FxHashSet::default();
665        filter_results_by_changed_files(&mut results, &changed);
666
667        assert_eq!(results.unused_dependencies.len(), 1);
668    }
669
670    #[test]
671    fn filter_results_keeps_circular_dep_when_any_file_changed() {
672        let mut results = AnalysisResults::default();
673        results
674            .circular_dependencies
675            .push(CircularDependencyFinding::with_actions(
676                CircularDependency {
677                    files: vec!["/a.ts".into(), "/b.ts".into()],
678                    length: 2,
679                    line: 1,
680                    col: 0,
681                    edges: Vec::new(),
682                    is_cross_package: false,
683                },
684            ));
685
686        let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
687        changed.insert("/b.ts".into());
688
689        filter_results_by_changed_files(&mut results, &changed);
690        assert_eq!(results.circular_dependencies.len(), 1);
691    }
692
693    #[test]
694    fn filter_results_drops_circular_dep_when_no_file_changed() {
695        let mut results = AnalysisResults::default();
696        results
697            .circular_dependencies
698            .push(CircularDependencyFinding::with_actions(
699                CircularDependency {
700                    files: vec!["/a.ts".into(), "/b.ts".into()],
701                    length: 2,
702                    line: 1,
703                    col: 0,
704                    edges: Vec::new(),
705                    is_cross_package: false,
706                },
707            ));
708
709        let changed: FxHashSet<PathBuf> = FxHashSet::default();
710        filter_results_by_changed_files(&mut results, &changed);
711        assert!(results.circular_dependencies.is_empty());
712    }
713
714    #[test]
715    fn filter_results_drops_boundary_violation_when_importer_unchanged() {
716        let mut results = AnalysisResults::default();
717        results
718            .boundary_violations
719            .push(BoundaryViolationFinding::with_actions(BoundaryViolation {
720                from_path: "/a.ts".into(),
721                to_path: "/b.ts".into(),
722                from_zone: "ui".into(),
723                to_zone: "data".into(),
724                import_specifier: "../data/db".into(),
725                line: 1,
726                col: 0,
727            }));
728
729        let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
730        changed.insert("/b.ts".into());
731
732        filter_results_by_changed_files(&mut results, &changed);
733        assert!(results.boundary_violations.is_empty());
734    }
735
736    #[test]
737    fn filter_results_keeps_security_finding_when_trace_file_changed() {
738        let mut results = AnalysisResults::default();
739        results.security_findings.push(SecurityFinding {
740            kind: SecurityFindingKind::ClientServerLeak,
741            category: None,
742            cwe: None,
743            path: "/project/src/client.tsx".into(),
744            line: 2,
745            col: 0,
746            evidence: "candidate".into(),
747            source_backed: false,
748            trace: vec![
749                TraceHop {
750                    path: "/project/src/client.tsx".into(),
751                    line: 2,
752                    col: 0,
753                    role: TraceHopRole::ClientBoundary,
754                },
755                TraceHop {
756                    path: "/project/src/server.ts".into(),
757                    line: 1,
758                    col: 0,
759                    role: TraceHopRole::SecretSource,
760                },
761            ],
762            actions: Vec::new(),
763            reachability: None,
764        });
765
766        let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
767        changed.insert("/project/src/server.ts".into());
768
769        filter_results_by_changed_files(&mut results, &changed);
770
771        assert_eq!(results.security_findings.len(), 1);
772    }
773
774    #[test]
775    fn filter_results_keeps_relative_empty_catalog_group_when_manifest_changed() {
776        let mut results = AnalysisResults::default();
777        results
778            .empty_catalog_groups
779            .push(EmptyCatalogGroupFinding::with_actions(EmptyCatalogGroup {
780                catalog_name: "legacy".into(),
781                path: PathBuf::from("pnpm-workspace.yaml"),
782                line: 4,
783            }));
784
785        let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
786        changed.insert(PathBuf::from("/repo/pnpm-workspace.yaml"));
787
788        filter_results_by_changed_files(&mut results, &changed);
789
790        assert_eq!(results.empty_catalog_groups.len(), 1);
791        assert_eq!(results.empty_catalog_groups[0].group.catalog_name, "legacy");
792    }
793
794    #[test]
795    fn filter_duplication_keeps_groups_with_at_least_one_changed_instance() {
796        let mut report = DuplicationReport {
797            clone_groups: vec![CloneGroup {
798                instances: vec![
799                    CloneInstance {
800                        file: "/a.ts".into(),
801                        start_line: 1,
802                        end_line: 5,
803                        start_col: 0,
804                        end_col: 10,
805                        fragment: "code".into(),
806                    },
807                    CloneInstance {
808                        file: "/b.ts".into(),
809                        start_line: 1,
810                        end_line: 5,
811                        start_col: 0,
812                        end_col: 10,
813                        fragment: "code".into(),
814                    },
815                ],
816                token_count: 20,
817                line_count: 5,
818            }],
819            clone_families: vec![],
820            mirrored_directories: vec![],
821            stats: DuplicationStats {
822                total_files: 2,
823                files_with_clones: 2,
824                total_lines: 100,
825                duplicated_lines: 10,
826                total_tokens: 200,
827                duplicated_tokens: 40,
828                clone_groups: 1,
829                clone_instances: 2,
830                duplication_percentage: 10.0,
831                clone_groups_below_min_occurrences: 0,
832            },
833        };
834
835        let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
836        changed.insert("/a.ts".into());
837
838        filter_duplication_by_changed_files(&mut report, &changed, Path::new(""));
839        assert_eq!(report.clone_groups.len(), 1);
840        assert_eq!(report.stats.clone_groups, 1);
841        assert_eq!(report.stats.clone_instances, 2);
842    }
843
844    /// Regression for issue #561: on Windows, `try_get_changed_files` joins
845    /// segments onto the `dunce::canonicalize`d toplevel (non-verbatim),
846    /// while analysis-pipeline paths inherit the shape of `opts.root` which
847    /// tools / test fixtures often pre-canonicalise with `std::fs::canonicalize`
848    /// (verbatim). The byte-level lookup against `FxHashSet<PathBuf>` then
849    /// silently dropped every clone group. Pin both sides through a synthetic
850    /// verbatim path on one side and a plain path on the other.
851    #[cfg(windows)]
852    #[test]
853    fn filter_duplication_normalises_verbatim_prefix_mismatch() {
854        let mut report = DuplicationReport {
855            clone_groups: vec![CloneGroup {
856                instances: vec![
857                    CloneInstance {
858                        file: PathBuf::from(r"\\?\C:\repo\src\changed.ts"),
859                        start_line: 1,
860                        end_line: 5,
861                        start_col: 0,
862                        end_col: 10,
863                        fragment: "code".into(),
864                    },
865                    CloneInstance {
866                        file: PathBuf::from(r"\\?\C:\repo\src\focused-copy.ts"),
867                        start_line: 1,
868                        end_line: 5,
869                        start_col: 0,
870                        end_col: 10,
871                        fragment: "code".into(),
872                    },
873                ],
874                token_count: 20,
875                line_count: 5,
876            }],
877            clone_families: vec![],
878            mirrored_directories: vec![],
879            stats: DuplicationStats {
880                total_files: 2,
881                files_with_clones: 2,
882                total_lines: 100,
883                duplicated_lines: 10,
884                total_tokens: 200,
885                duplicated_tokens: 40,
886                clone_groups: 1,
887                clone_instances: 2,
888                duplication_percentage: 10.0,
889                clone_groups_below_min_occurrences: 0,
890            },
891        };
892
893        let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
894        changed.insert(PathBuf::from(r"C:\repo\src\changed.ts"));
895
896        filter_duplication_by_changed_files(&mut report, &changed, Path::new(""));
897        assert_eq!(
898            report.clone_groups.len(),
899            1,
900            "verbatim instance path must match non-verbatim changed-file entry"
901        );
902    }
903
904    #[cfg(windows)]
905    #[test]
906    fn filter_results_normalises_verbatim_prefix_mismatch() {
907        let mut results = AnalysisResults::default();
908        results
909            .unused_exports
910            .push(UnusedExportFinding::with_actions(UnusedExport {
911                path: PathBuf::from(r"\\?\C:\repo\src\a.ts"),
912                export_name: "foo".into(),
913                is_type_only: false,
914                line: 1,
915                col: 0,
916                span_start: 0,
917                is_re_export: false,
918            }));
919
920        let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
921        changed.insert(PathBuf::from(r"C:\repo\src\a.ts"));
922
923        filter_results_by_changed_files(&mut results, &changed);
924        assert_eq!(
925            results.unused_exports.len(),
926            1,
927            "verbatim finding path must match non-verbatim changed-file entry"
928        );
929    }
930
931    /// Initialize a temp git repo with a single committed file plus a tag
932    /// at HEAD. Returns the canonical repo root.
933    ///
934    /// Uses `dunce::canonicalize` rather than `std::fs::canonicalize` so the
935    /// returned path agrees with what `resolve_git_toplevel` produces in
936    /// production (PR #566 swapped that helper to `dunce::canonicalize` to
937    /// strip the Windows `\\?\` verbatim prefix). `std::fs::canonicalize`
938    /// still produces verbatim on Windows, so the prior shape diverged from
939    /// the production helper and downstream `changed.contains(&expected)`
940    /// assertions silently failed because one side was verbatim and the
941    /// other was not. POSIX behaviour is identical to `std::fs::canonicalize`.
942    fn init_repo(repo: &Path) -> PathBuf {
943        run_git(repo, &["init", "--quiet", "--initial-branch=main"]);
944        run_git(repo, &["config", "user.email", "test@example.com"]);
945        run_git(repo, &["config", "user.name", "test"]);
946        run_git(repo, &["config", "commit.gpgsign", "false"]);
947        std::fs::write(repo.join("seed.txt"), "seed\n").unwrap();
948        run_git(repo, &["add", "seed.txt"]);
949        run_git(repo, &["commit", "--quiet", "-m", "initial"]);
950        run_git(repo, &["tag", "fallow-baseline"]);
951        dunce::canonicalize(repo).unwrap()
952    }
953
954    fn run_git(cwd: &Path, args: &[&str]) {
955        let output = std::process::Command::new("git")
956            .args(args)
957            .current_dir(cwd)
958            .output()
959            .expect("git available");
960        assert!(
961            output.status.success(),
962            "git {args:?} failed: {}",
963            String::from_utf8_lossy(&output.stderr)
964        );
965    }
966
967    /// Workspace at git root, an untracked file is included in the
968    /// changed-files set with an absolute path joined from the repo root.
969    #[test]
970    fn try_get_changed_files_workspace_at_repo_root() {
971        let tmp = tempfile::tempdir().unwrap();
972        let repo = init_repo(tmp.path());
973        std::fs::create_dir_all(repo.join("src")).unwrap();
974        std::fs::write(repo.join("src/new.ts"), "export const x = 1;\n").unwrap();
975
976        let changed = try_get_changed_files(&repo, "fallow-baseline").unwrap();
977
978        let expected = repo.join("src/new.ts");
979        assert!(
980            changed.contains(&expected),
981            "changed set should contain {expected:?}; actual: {changed:?}"
982        );
983    }
984
985    /// Regression test for #190. When the workspace is a subdirectory of
986    /// the git repository, `git diff --name-only` emits paths relative to
987    /// the repo root (e.g., `frontend/src/new.ts`). Without the
988    /// rev-parse-based toplevel resolution the function joined those
989    /// against the workspace root, producing bogus paths like
990    /// `<repo>/frontend/frontend/src/new.ts` that never matched
991    /// `analyze_project` output and silently dropped the filter.
992    #[test]
993    fn try_get_changed_files_workspace_in_subdirectory() {
994        let tmp = tempfile::tempdir().unwrap();
995        let repo = init_repo(tmp.path());
996        let frontend = repo.join("frontend");
997        std::fs::create_dir_all(frontend.join("src")).unwrap();
998        std::fs::write(frontend.join("src/new.ts"), "export const x = 1;\n").unwrap();
999
1000        let changed = try_get_changed_files(&frontend, "fallow-baseline").unwrap();
1001
1002        let expected = repo.join("frontend/src/new.ts");
1003        assert!(
1004            changed.contains(&expected),
1005            "changed set should contain canonical {expected:?}; actual: {changed:?}"
1006        );
1007        let bogus = frontend.join("frontend/src/new.ts");
1008        assert!(
1009            !changed.contains(&bogus),
1010            "changed set must not contain double-frontend path {bogus:?}"
1011        );
1012    }
1013
1014    /// A *committed* change in a sibling subdirectory (outside the
1015    /// workspace) appears in the changed-files set because `git diff`
1016    /// is repo-wide regardless of cwd. The downstream
1017    /// `filter_results_by_changed_files` retains it only if
1018    /// `analyze_project` saw it; for a workspace scoped to one subdir,
1019    /// the sibling file is not in the analysis paths and falls away at
1020    /// the result-merge boundary, not here. This test pins the contract:
1021    /// for committed changes, the set is repo-wide.
1022    ///
1023    /// Note: `git ls-files --others --exclude-standard` only lists
1024    /// untracked files in cwd's subtree, so untracked siblings are NOT
1025    /// in the set when invoked from a subdirectory. That's harmless for
1026    /// the LSP because `analyze_project` only walks files under the
1027    /// workspace root either way.
1028    #[test]
1029    fn try_get_changed_files_includes_committed_sibling_changes() {
1030        let tmp = tempfile::tempdir().unwrap();
1031        let repo = init_repo(tmp.path());
1032        let backend = repo.join("backend");
1033        std::fs::create_dir_all(&backend).unwrap();
1034        std::fs::write(backend.join("server.py"), "print('hi')\n").unwrap();
1035        run_git(&repo, &["add", "."]);
1036        run_git(&repo, &["commit", "--quiet", "-m", "add backend"]);
1037
1038        let frontend = repo.join("frontend");
1039        std::fs::create_dir_all(&frontend).unwrap();
1040
1041        let changed = try_get_changed_files(&frontend, "fallow-baseline").unwrap();
1042
1043        let expected = repo.join("backend/server.py");
1044        assert!(
1045            changed.contains(&expected),
1046            "committed sibling backend/server.py should be in the set: {changed:?}"
1047        );
1048    }
1049
1050    /// Modifying a tracked file shows up via `git diff --name-only HEAD`,
1051    /// not just via `ls-files --others`. Confirm the path-join fix
1052    /// applies to that codepath too.
1053    #[test]
1054    fn try_get_changed_files_includes_modified_tracked_file() {
1055        let tmp = tempfile::tempdir().unwrap();
1056        let repo = init_repo(tmp.path());
1057        let frontend = repo.join("frontend");
1058        std::fs::create_dir_all(frontend.join("src")).unwrap();
1059        std::fs::write(frontend.join("src/old.ts"), "export const x = 1;\n").unwrap();
1060        run_git(&repo, &["add", "."]);
1061        run_git(&repo, &["commit", "--quiet", "-m", "add old"]);
1062        run_git(&repo, &["tag", "fallow-baseline-v2"]);
1063        std::fs::write(frontend.join("src/old.ts"), "export const x = 2;\n").unwrap();
1064
1065        let changed = try_get_changed_files(&frontend, "fallow-baseline-v2").unwrap();
1066
1067        let expected = repo.join("frontend/src/old.ts");
1068        assert!(
1069            changed.contains(&expected),
1070            "modified tracked file {expected:?} missing from set: {changed:?}"
1071        );
1072    }
1073
1074    /// `resolve_git_toplevel` returns the canonical repo path even when
1075    /// invoked from inside a subdirectory and via a symlinked input path.
1076    /// On macOS this guards against the `/tmp` -> `/private/tmp`
1077    /// canonicalization gap that would otherwise make the LSP filter set
1078    /// disagree with `analyze_project` paths.
1079    #[test]
1080    fn resolve_git_toplevel_returns_canonical_path() {
1081        let tmp = tempfile::tempdir().unwrap();
1082        let repo = init_repo(tmp.path());
1083        let frontend = repo.join("frontend");
1084        std::fs::create_dir_all(&frontend).unwrap();
1085
1086        let toplevel = resolve_git_toplevel(&frontend).unwrap();
1087        assert_eq!(toplevel, repo, "toplevel should equal canonical repo root");
1088        assert_eq!(
1089            toplevel,
1090            dunce::canonicalize(&toplevel).unwrap(),
1091            "resolved toplevel should already be canonical"
1092        );
1093    }
1094
1095    /// Outside any git repo, `resolve_git_toplevel` returns
1096    /// `NotARepository` rather than panicking or returning a wrong path.
1097    /// The LSP relies on this to fall back to the workspace root cleanly.
1098    #[test]
1099    fn resolve_git_toplevel_not_a_repository() {
1100        let tmp = tempfile::tempdir().unwrap();
1101        let result = resolve_git_toplevel(tmp.path());
1102        assert!(
1103            matches!(result, Err(ChangedFilesError::NotARepository)),
1104            "expected NotARepository, got {result:?}"
1105        );
1106    }
1107
1108    /// `try_get_changed_files` propagates the not-a-repo error so the
1109    /// LSP can warn and fall back to full-scope results.
1110    #[test]
1111    fn try_get_changed_files_not_a_repository() {
1112        let tmp = tempfile::tempdir().unwrap();
1113        let result = try_get_changed_files(tmp.path(), "main");
1114        assert!(matches!(result, Err(ChangedFilesError::NotARepository)));
1115    }
1116
1117    #[test]
1118    fn filter_duplication_drops_groups_with_no_changed_instance() {
1119        let mut report = DuplicationReport {
1120            clone_groups: vec![CloneGroup {
1121                instances: vec![CloneInstance {
1122                    file: "/a.ts".into(),
1123                    start_line: 1,
1124                    end_line: 5,
1125                    start_col: 0,
1126                    end_col: 10,
1127                    fragment: "code".into(),
1128                }],
1129                token_count: 20,
1130                line_count: 5,
1131            }],
1132            clone_families: vec![],
1133            mirrored_directories: vec![],
1134            stats: DuplicationStats {
1135                total_files: 1,
1136                files_with_clones: 1,
1137                total_lines: 100,
1138                duplicated_lines: 5,
1139                total_tokens: 100,
1140                duplicated_tokens: 20,
1141                clone_groups: 1,
1142                clone_instances: 1,
1143                duplication_percentage: 5.0,
1144                clone_groups_below_min_occurrences: 0,
1145            },
1146        };
1147
1148        let changed: FxHashSet<PathBuf> = FxHashSet::default();
1149        filter_duplication_by_changed_files(&mut report, &changed, Path::new(""));
1150        assert!(report.clone_groups.is_empty());
1151        assert_eq!(report.stats.clone_groups, 0);
1152        assert_eq!(report.stats.clone_instances, 0);
1153        assert!((report.stats.duplication_percentage - 0.0).abs() < f64::EPSILON);
1154    }
1155}