Skip to main content

fallow_core/
changed_files.rs

1//! Git-aware "changed files" filtering shared between fallow-cli and fallow-lsp.
2//!
3//! Provides:
4//! - [`validate_git_ref`] for input validation at trust boundaries.
5//! - [`ChangedFilesError`] / [`try_get_changed_files`] / [`get_changed_files`]
6//!   for resolving a git ref into the set of changed files.
7//! - [`filter_results_by_changed_files`] for narrowing an [`AnalysisResults`]
8//!   to issues in those files.
9//! - [`filter_duplication_by_changed_files`] for narrowing a
10//!   [`DuplicationReport`] to clone groups touching at least one changed file.
11//!
12//! Both filters intentionally exclude dependency-level issues (unused deps,
13//! type-only deps, test-only deps) since "unused dependency" is a function of
14//! the entire import graph and can't be attributed to individual changed files.
15
16use std::path::{Path, PathBuf};
17
18use rustc_hash::{FxHashMap, FxHashSet};
19
20use crate::duplicates::{DuplicationReport, DuplicationStats, families};
21use crate::results::AnalysisResults;
22
23/// Validate a user-supplied git ref before passing it to `git diff`.
24///
25/// Rejects empty strings, refs starting with `-` (which `git` would interpret
26/// as an option flag), and characters outside the safe allowlist for branch
27/// names, tags, SHAs, and reflog expressions (`HEAD~N`, `HEAD@{...}`).
28///
29/// Inside `@{...}` braces, colons and spaces are allowed so reflog timestamps
30/// like `HEAD@{2025-01-01}` and `HEAD@{1 week ago}` round-trip.
31///
32/// Used by both the CLI (clap value parser) and the LSP (initializationOptions
33/// trust boundary) to fail fast with a readable error rather than handing a
34/// malformed ref to git.
35pub fn validate_git_ref(s: &str) -> Result<&str, String> {
36    if s.is_empty() {
37        return Err("git ref cannot be empty".to_string());
38    }
39    if s.starts_with('-') {
40        return Err("git ref cannot start with '-'".to_string());
41    }
42    let mut in_braces = false;
43    for c in s.chars() {
44        match c {
45            '{' => in_braces = true,
46            '}' => in_braces = false,
47            ':' | ' ' if in_braces => {}
48            c if c.is_ascii_alphanumeric()
49                || matches!(c, '.' | '_' | '-' | '/' | '~' | '^' | '@' | '{' | '}') => {}
50            _ => return Err(format!("git ref contains disallowed character: '{c}'")),
51        }
52    }
53    if in_braces {
54        return Err("git ref has unclosed '{'".to_string());
55    }
56    Ok(s)
57}
58
59/// Classification of a `git diff` failure, so callers can pick their own
60/// wording (soft warning vs hard error) without re-parsing stderr.
61#[derive(Debug)]
62pub enum ChangedFilesError {
63    /// Git ref failed validation before invoking `git`.
64    InvalidRef(String),
65    /// `git` binary not found / not executable.
66    GitMissing(String),
67    /// Command ran but the directory isn't a git repository.
68    NotARepository,
69    /// Command ran but the ref is invalid / another git error.
70    GitFailed(String),
71}
72
73impl ChangedFilesError {
74    /// Human-readable clause suitable for embedding in an error message.
75    /// Does not include the flag name (e.g. "--changed-since") so callers can
76    /// prepend their own context.
77    pub fn describe(&self) -> String {
78        match self {
79            Self::InvalidRef(e) => format!("invalid git ref: {e}"),
80            Self::GitMissing(e) => format!("failed to run git: {e}"),
81            Self::NotARepository => "not a git repository".to_owned(),
82            Self::GitFailed(stderr) => augment_git_failed(stderr),
83        }
84    }
85}
86
87/// Enrich a raw `git diff` stderr with actionable hints when the failure mode
88/// is recognizable. Today: shallow-clone misses (`actions/checkout@v4` defaults
89/// to `fetch-depth: 1`, GitLab CI to `GIT_DEPTH: 50`), where the baseline ref
90/// predates the fetch boundary. Bare git stderr is famously cryptic; a hint
91/// here is much more useful than a docs link the reader has to chase.
92fn augment_git_failed(stderr: &str) -> String {
93    let lower = stderr.to_ascii_lowercase();
94    if lower.contains("not a valid object name")
95        || lower.contains("unknown revision")
96        || lower.contains("ambiguous argument")
97    {
98        format!(
99            "{stderr} (shallow clone? try `git fetch --unshallow`, or set `fetch-depth: 0` on actions/checkout / `GIT_DEPTH: 0` in GitLab CI)"
100        )
101    } else {
102        stderr.to_owned()
103    }
104}
105
106/// Resolve the canonical git toplevel for `cwd`.
107///
108/// Runs `git rev-parse --show-toplevel`, which is git's own answer to "where
109/// does this repository live?". The returned path is canonicalized so it
110/// agrees with paths produced by `fs::canonicalize` elsewhere on macOS
111/// (`/tmp` -> `/private/tmp`) and Windows (8.3 short paths).
112///
113/// Used by `try_get_changed_files` to produce changed-file paths whose
114/// absolute form matches what the analysis pipeline emits, regardless of
115/// whether the caller's `cwd` is the repo root or a subdirectory of it.
116pub fn resolve_git_toplevel(cwd: &Path) -> Result<PathBuf, ChangedFilesError> {
117    let output = git_command(cwd, &["rev-parse", "--show-toplevel"])
118        .output()
119        .map_err(|e| ChangedFilesError::GitMissing(e.to_string()))?;
120
121    if !output.status.success() {
122        let stderr = String::from_utf8_lossy(&output.stderr);
123        return Err(if stderr.contains("not a git repository") {
124            ChangedFilesError::NotARepository
125        } else {
126            ChangedFilesError::GitFailed(stderr.trim().to_owned())
127        });
128    }
129
130    let raw = String::from_utf8_lossy(&output.stdout);
131    let trimmed = raw.trim();
132    if trimmed.is_empty() {
133        return Err(ChangedFilesError::GitFailed(
134            "git rev-parse --show-toplevel returned empty output".to_owned(),
135        ));
136    }
137
138    let path = PathBuf::from(trimmed);
139    Ok(path.canonicalize().unwrap_or(path))
140}
141
142fn collect_git_paths(
143    cwd: &Path,
144    toplevel: &Path,
145    args: &[&str],
146) -> Result<FxHashSet<PathBuf>, ChangedFilesError> {
147    let output = git_command(cwd, args)
148        .output()
149        .map_err(|e| ChangedFilesError::GitMissing(e.to_string()))?;
150
151    if !output.status.success() {
152        let stderr = String::from_utf8_lossy(&output.stderr);
153        return Err(if stderr.contains("not a git repository") {
154            ChangedFilesError::NotARepository
155        } else {
156            ChangedFilesError::GitFailed(stderr.trim().to_owned())
157        });
158    }
159
160    // All callers use modes whose output is repository-root-relative
161    // (`git diff --name-only`, `git ls-files --full-name --others`). Joining
162    // against `toplevel` yields absolute paths that line up with what
163    // `analyze_project` emits when given a canonical workspace root, even if
164    // the LSP / CLI was invoked from a subdirectory.
165    let files: FxHashSet<PathBuf> = String::from_utf8_lossy(&output.stdout)
166        .lines()
167        .filter(|line| !line.is_empty())
168        .map(|line| toplevel.join(line))
169        .collect();
170
171    Ok(files)
172}
173
174fn git_command(cwd: &Path, args: &[&str]) -> std::process::Command {
175    let mut command = std::process::Command::new("git");
176    command.args(args).current_dir(cwd);
177    crate::git_env::clear_ambient_git_env(&mut command);
178    command
179}
180
181/// Get files changed since a git ref. Returns `Err` (with details) when the
182/// git invocation itself failed, so callers can choose between warn-and-ignore
183/// and hard-error behavior.
184///
185/// Includes both:
186/// - committed changes from the merge-base range `git_ref...HEAD`
187/// - tracked staged/unstaged changes from `HEAD` to the current worktree
188/// - untracked files not ignored by Git
189///
190/// This keeps `--changed-since` useful for local validation instead of only
191/// reflecting the last committed `HEAD`.
192///
193/// All paths in the returned set are absolute and rooted at the canonical
194/// git toplevel, not at `root`. This matters when the LSP / CLI is invoked
195/// from a subdirectory of the repository (e.g., a Turborepo workspace at
196/// `apps/web`): `git diff` emits root-relative paths, and we need to join
197/// them against the actual repo root rather than the caller's cwd.
198pub fn try_get_changed_files(
199    root: &Path,
200    git_ref: &str,
201) -> Result<FxHashSet<PathBuf>, ChangedFilesError> {
202    // Validate the ref BEFORE resolving the toplevel so the security-relevant
203    // boundary check (rejects refs starting with `-`, etc.) runs even when
204    // `cwd` happens to not be a git repo. Otherwise an attacker-controlled
205    // `--changed-since=--upload-pack=evil` would leak through to
206    // `git rev-parse` instead of being rejected at validation.
207    validate_git_ref(git_ref).map_err(ChangedFilesError::InvalidRef)?;
208    let toplevel = resolve_git_toplevel(root)?;
209    try_get_changed_files_with_toplevel(root, &toplevel, git_ref)
210}
211
212/// Like [`try_get_changed_files`], but takes a pre-resolved canonical
213/// `toplevel` so callers (the LSP) can cache it across runs and avoid the
214/// extra `git rev-parse --show-toplevel` subprocess on every save.
215///
216/// `toplevel` MUST be the canonical git toplevel for `cwd`; passing anything
217/// else produces incorrect changed-file paths. The CLI does not call this
218/// directly: it uses [`try_get_changed_files`] which resolves on each call.
219pub fn try_get_changed_files_with_toplevel(
220    cwd: &Path,
221    toplevel: &Path,
222    git_ref: &str,
223) -> Result<FxHashSet<PathBuf>, ChangedFilesError> {
224    validate_git_ref(git_ref).map_err(ChangedFilesError::InvalidRef)?;
225
226    let mut files = collect_git_paths(
227        cwd,
228        toplevel,
229        &[
230            "diff",
231            "--name-only",
232            "--end-of-options",
233            &format!("{git_ref}...HEAD"),
234        ],
235    )?;
236    files.extend(collect_git_paths(
237        cwd,
238        toplevel,
239        &["diff", "--name-only", "HEAD"],
240    )?);
241    // `--full-name` forces `ls-files` to emit repository-root-relative paths,
242    // matching `git diff`'s default. Without it, `ls-files` emits paths
243    // relative to cwd, which silently produces wrong joins when the caller
244    // invokes from a subdirectory.
245    files.extend(collect_git_paths(
246        cwd,
247        toplevel,
248        &["ls-files", "--full-name", "--others", "--exclude-standard"],
249    )?);
250    Ok(files)
251}
252
253/// Get files changed since a git ref. Returns `None` on git failure after
254/// printing a warning to stderr. Used by `--changed-since` and `--file`, where
255/// a failure falls back to full-scope analysis.
256#[expect(
257    clippy::print_stderr,
258    reason = "intentional user-facing warning for the CLI's --changed-since fallback path; LSP callers use try_get_changed_files instead"
259)]
260pub fn get_changed_files(root: &Path, git_ref: &str) -> Option<FxHashSet<PathBuf>> {
261    match try_get_changed_files(root, git_ref) {
262        Ok(files) => Some(files),
263        Err(ChangedFilesError::InvalidRef(e)) => {
264            eprintln!("Warning: --changed-since ignored: invalid git ref: {e}");
265            None
266        }
267        Err(ChangedFilesError::GitMissing(e)) => {
268            eprintln!("Warning: --changed-since ignored: failed to run git: {e}");
269            None
270        }
271        Err(ChangedFilesError::NotARepository) => {
272            eprintln!("Warning: --changed-since ignored: not a git repository");
273            None
274        }
275        Err(ChangedFilesError::GitFailed(stderr)) => {
276            eprintln!("Warning: --changed-since failed for ref '{git_ref}': {stderr}");
277            None
278        }
279    }
280}
281
282/// Filter `results` to only include issues whose source file is in
283/// `changed_files`.
284///
285/// Dependency-level issues (unused deps, dev deps, optional deps, type-only
286/// deps, test-only deps) are intentionally NOT filtered here. Unlike
287/// file-level issues, a dependency being "unused" is a function of the entire
288/// import graph and can't be attributed to individual changed source files.
289#[expect(
290    clippy::implicit_hasher,
291    reason = "fallow standardizes on FxHashSet across the workspace"
292)]
293pub fn filter_results_by_changed_files(
294    results: &mut AnalysisResults,
295    changed_files: &FxHashSet<PathBuf>,
296) {
297    results
298        .unused_files
299        .retain(|f| changed_files.contains(&f.file.path));
300    results
301        .unused_exports
302        .retain(|e| changed_files.contains(&e.export.path));
303    results
304        .unused_types
305        .retain(|e| changed_files.contains(&e.export.path));
306    results
307        .private_type_leaks
308        .retain(|e| changed_files.contains(&e.leak.path));
309    results
310        .unused_enum_members
311        .retain(|m| changed_files.contains(&m.member.path));
312    results
313        .unused_class_members
314        .retain(|m| changed_files.contains(&m.member.path));
315    results
316        .unresolved_imports
317        .retain(|i| changed_files.contains(&i.import.path));
318
319    // Unlisted deps: keep only if any importing file is changed
320    results.unlisted_dependencies.retain(|d| {
321        d.dep
322            .imported_from
323            .iter()
324            .any(|s| changed_files.contains(&s.path))
325    });
326
327    // Duplicate exports: filter locations to changed files, drop groups with < 2
328    for dup in &mut results.duplicate_exports {
329        dup.export
330            .locations
331            .retain(|loc| changed_files.contains(&loc.path));
332    }
333    results
334        .duplicate_exports
335        .retain(|d| d.export.locations.len() >= 2);
336
337    // Circular deps: keep cycles where at least one file is changed
338    results
339        .circular_dependencies
340        .retain(|c| c.cycle.files.iter().any(|f| changed_files.contains(f)));
341
342    // Boundary violations: keep if the importing file changed
343    results
344        .boundary_violations
345        .retain(|v| changed_files.contains(&v.violation.from_path));
346
347    // Stale suppressions: keep if the file changed
348    results
349        .stale_suppressions
350        .retain(|s| changed_files.contains(&s.path));
351
352    // Unresolved catalog references: anchored at the consumer package.json,
353    // so keep only findings whose path is in the changed set.
354    results
355        .unresolved_catalog_references
356        .retain(|r| changed_files.contains(&r.reference.path));
357    results
358        .empty_catalog_groups
359        .retain(|g| changed_files_contains_path(changed_files, &g.group.path));
360
361    // Unused / misconfigured dependency overrides: anchored at the declaring
362    // source file (pnpm-workspace.yaml or root package.json). Keep only
363    // findings whose source file is in the changed set.
364    results
365        .unused_dependency_overrides
366        .retain(|o| changed_files.contains(&o.entry.path));
367    results
368        .misconfigured_dependency_overrides
369        .retain(|o| changed_files.contains(&o.entry.path));
370}
371
372fn changed_files_contains_path(changed_files: &FxHashSet<PathBuf>, path: &Path) -> bool {
373    changed_files.contains(path)
374        || (path.is_relative() && changed_files.iter().any(|changed| changed.ends_with(path)))
375}
376
377/// Recompute duplication statistics after filtering.
378///
379/// Uses per-file line deduplication (matching `compute_stats` in
380/// `duplicates/detect.rs`) so overlapping clone instances don't inflate the
381/// duplicated line count.
382fn recompute_duplication_stats(report: &DuplicationReport) -> DuplicationStats {
383    let mut files_with_clones: FxHashSet<&Path> = FxHashSet::default();
384    let mut file_dup_lines: FxHashMap<&Path, FxHashSet<usize>> = FxHashMap::default();
385    let mut duplicated_tokens = 0_usize;
386    let mut clone_instances = 0_usize;
387
388    for group in &report.clone_groups {
389        for instance in &group.instances {
390            files_with_clones.insert(&instance.file);
391            clone_instances += 1;
392            let lines = file_dup_lines.entry(&instance.file).or_default();
393            for line in instance.start_line..=instance.end_line {
394                lines.insert(line);
395            }
396        }
397        duplicated_tokens += group.token_count * group.instances.len();
398    }
399
400    let duplicated_lines: usize = file_dup_lines.values().map(FxHashSet::len).sum();
401
402    DuplicationStats {
403        total_files: report.stats.total_files,
404        files_with_clones: files_with_clones.len(),
405        total_lines: report.stats.total_lines,
406        duplicated_lines,
407        total_tokens: report.stats.total_tokens,
408        duplicated_tokens,
409        clone_groups: report.clone_groups.len(),
410        clone_instances,
411        #[expect(
412            clippy::cast_precision_loss,
413            reason = "stat percentages are display-only; precision loss at usize::MAX line counts is acceptable"
414        )]
415        duplication_percentage: if report.stats.total_lines > 0 {
416            (duplicated_lines as f64 / report.stats.total_lines as f64) * 100.0
417        } else {
418            0.0
419        },
420        clone_groups_below_min_occurrences: report.stats.clone_groups_below_min_occurrences,
421    }
422}
423
424/// Filter a duplication report to only retain clone groups where at least one
425/// instance belongs to a changed file. Families, mirrored directories, and
426/// stats are rebuilt from the surviving groups so consumers see consistent,
427/// correctly-scoped numbers.
428#[expect(
429    clippy::implicit_hasher,
430    reason = "fallow standardizes on FxHashSet across the workspace"
431)]
432pub fn filter_duplication_by_changed_files(
433    report: &mut DuplicationReport,
434    changed_files: &FxHashSet<PathBuf>,
435    root: &Path,
436) {
437    report
438        .clone_groups
439        .retain(|g| g.instances.iter().any(|i| changed_files.contains(&i.file)));
440    report.clone_families = families::group_into_families(&report.clone_groups, root);
441    report.mirrored_directories =
442        families::detect_mirrored_directories(&report.clone_families, root);
443    report.stats = recompute_duplication_stats(report);
444}
445
446#[cfg(test)]
447mod tests {
448    use super::*;
449    use crate::duplicates::{CloneGroup, CloneInstance};
450    use crate::results::{
451        BoundaryViolation, CircularDependency, EmptyCatalogGroup, UnusedExport, UnusedFile,
452    };
453    use fallow_types::output_dead_code::{
454        BoundaryViolationFinding, CircularDependencyFinding, EmptyCatalogGroupFinding,
455        UnusedExportFinding, UnusedFileFinding,
456    };
457
458    #[test]
459    fn changed_files_error_describe_variants() {
460        assert!(
461            ChangedFilesError::InvalidRef("bad".to_owned())
462                .describe()
463                .contains("invalid git ref")
464        );
465        assert!(
466            ChangedFilesError::GitMissing("oops".to_owned())
467                .describe()
468                .contains("oops")
469        );
470        assert_eq!(
471            ChangedFilesError::NotARepository.describe(),
472            "not a git repository"
473        );
474        assert!(
475            ChangedFilesError::GitFailed("bad ref".to_owned())
476                .describe()
477                .contains("bad ref")
478        );
479    }
480
481    #[test]
482    fn augment_git_failed_appends_shallow_clone_hint_for_unknown_revision() {
483        let stderr = "fatal: ambiguous argument 'fallow-baseline...HEAD': unknown revision or path not in the working tree.";
484        let described = ChangedFilesError::GitFailed(stderr.to_owned()).describe();
485        assert!(described.contains(stderr), "original stderr preserved");
486        assert!(
487            described.contains("shallow clone"),
488            "hint surfaced: {described}"
489        );
490        assert!(
491            described.contains("fetch-depth: 0") || described.contains("git fetch --unshallow"),
492            "hint actionable: {described}"
493        );
494    }
495
496    #[test]
497    fn augment_git_failed_passthrough_for_other_errors() {
498        // Errors that aren't shallow-clone-related stay verbatim
499        let stderr = "fatal: refusing to merge unrelated histories";
500        let described = ChangedFilesError::GitFailed(stderr.to_owned()).describe();
501        assert_eq!(described, stderr);
502    }
503
504    #[test]
505    fn validate_git_ref_rejects_leading_dash() {
506        assert!(validate_git_ref("--upload-pack=evil").is_err());
507        assert!(validate_git_ref("-flag").is_err());
508    }
509
510    #[test]
511    fn validate_git_ref_accepts_baseline_tag() {
512        assert_eq!(
513            validate_git_ref("fallow-baseline").unwrap(),
514            "fallow-baseline"
515        );
516    }
517
518    #[test]
519    fn try_get_changed_files_rejects_invalid_ref() {
520        // Validation runs before git invocation, so any path will do
521        let err = try_get_changed_files(Path::new("/"), "--evil")
522            .expect_err("leading-dash ref must be rejected");
523        assert!(matches!(err, ChangedFilesError::InvalidRef(_)));
524        assert!(err.describe().contains("cannot start with"));
525    }
526
527    #[test]
528    fn validate_git_ref_rejects_option_like_ref() {
529        assert!(validate_git_ref("--output=/tmp/fallow-proof").is_err());
530    }
531
532    #[test]
533    fn validate_git_ref_allows_reflog_relative_date() {
534        assert!(validate_git_ref("HEAD@{1 week ago}").is_ok());
535    }
536
537    #[test]
538    fn try_get_changed_files_rejects_option_like_ref_before_git() {
539        let root = tempfile::tempdir().expect("create temp dir");
540        let proof_path = root.path().join("proof");
541
542        let result = try_get_changed_files(
543            root.path(),
544            &format!("--output={}", proof_path.to_string_lossy()),
545        );
546
547        assert!(matches!(result, Err(ChangedFilesError::InvalidRef(_))));
548        assert!(
549            !proof_path.exists(),
550            "invalid changedSince ref must not be passed through to git as an option"
551        );
552    }
553
554    #[test]
555    fn git_command_clears_parent_git_environment() {
556        let command = git_command(Path::new("."), &["status", "--short"]);
557        let overrides: Vec<_> = command.get_envs().collect();
558
559        for var in crate::git_env::AMBIENT_GIT_ENV_VARS {
560            assert!(
561                overrides
562                    .iter()
563                    .any(|(key, value)| key.to_str() == Some(*var) && value.is_none()),
564                "git helper must clear inherited {var}",
565            );
566        }
567    }
568
569    #[test]
570    fn filter_results_keeps_only_changed_files() {
571        let mut results = AnalysisResults::default();
572        results
573            .unused_files
574            .push(UnusedFileFinding::with_actions(UnusedFile {
575                path: "/a.ts".into(),
576            }));
577        results
578            .unused_files
579            .push(UnusedFileFinding::with_actions(UnusedFile {
580                path: "/b.ts".into(),
581            }));
582        results
583            .unused_exports
584            .push(UnusedExportFinding::with_actions(UnusedExport {
585                path: "/a.ts".into(),
586                export_name: "foo".into(),
587                is_type_only: false,
588                line: 1,
589                col: 0,
590                span_start: 0,
591                is_re_export: false,
592            }));
593
594        let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
595        changed.insert("/a.ts".into());
596
597        filter_results_by_changed_files(&mut results, &changed);
598
599        assert_eq!(results.unused_files.len(), 1);
600        assert_eq!(results.unused_files[0].file.path, PathBuf::from("/a.ts"));
601        assert_eq!(results.unused_exports.len(), 1);
602    }
603
604    #[test]
605    fn filter_results_preserves_dependency_level_issues() {
606        let mut results = AnalysisResults::default();
607        results.unused_dependencies.push(
608            fallow_types::output_dead_code::UnusedDependencyFinding::with_actions(
609                crate::results::UnusedDependency {
610                    package_name: "lodash".into(),
611                    location: crate::results::DependencyLocation::Dependencies,
612                    path: "/pkg.json".into(),
613                    line: 3,
614                    used_in_workspaces: Vec::new(),
615                },
616            ),
617        );
618
619        let changed: FxHashSet<PathBuf> = FxHashSet::default();
620        filter_results_by_changed_files(&mut results, &changed);
621
622        // Dependency-level issues survive even when no source files changed
623        assert_eq!(results.unused_dependencies.len(), 1);
624    }
625
626    #[test]
627    fn filter_results_keeps_circular_dep_when_any_file_changed() {
628        let mut results = AnalysisResults::default();
629        results
630            .circular_dependencies
631            .push(CircularDependencyFinding::with_actions(
632                CircularDependency {
633                    files: vec!["/a.ts".into(), "/b.ts".into()],
634                    length: 2,
635                    line: 1,
636                    col: 0,
637                    is_cross_package: false,
638                },
639            ));
640
641        let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
642        changed.insert("/b.ts".into());
643
644        filter_results_by_changed_files(&mut results, &changed);
645        assert_eq!(results.circular_dependencies.len(), 1);
646    }
647
648    #[test]
649    fn filter_results_drops_circular_dep_when_no_file_changed() {
650        let mut results = AnalysisResults::default();
651        results
652            .circular_dependencies
653            .push(CircularDependencyFinding::with_actions(
654                CircularDependency {
655                    files: vec!["/a.ts".into(), "/b.ts".into()],
656                    length: 2,
657                    line: 1,
658                    col: 0,
659                    is_cross_package: false,
660                },
661            ));
662
663        let changed: FxHashSet<PathBuf> = FxHashSet::default();
664        filter_results_by_changed_files(&mut results, &changed);
665        assert!(results.circular_dependencies.is_empty());
666    }
667
668    #[test]
669    fn filter_results_drops_boundary_violation_when_importer_unchanged() {
670        let mut results = AnalysisResults::default();
671        results
672            .boundary_violations
673            .push(BoundaryViolationFinding::with_actions(BoundaryViolation {
674                from_path: "/a.ts".into(),
675                to_path: "/b.ts".into(),
676                from_zone: "ui".into(),
677                to_zone: "data".into(),
678                import_specifier: "../data/db".into(),
679                line: 1,
680                col: 0,
681            }));
682
683        let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
684        // only the imported file changed, not the importer
685        changed.insert("/b.ts".into());
686
687        filter_results_by_changed_files(&mut results, &changed);
688        assert!(results.boundary_violations.is_empty());
689    }
690
691    #[test]
692    fn filter_results_keeps_relative_empty_catalog_group_when_manifest_changed() {
693        let mut results = AnalysisResults::default();
694        results
695            .empty_catalog_groups
696            .push(EmptyCatalogGroupFinding::with_actions(EmptyCatalogGroup {
697                catalog_name: "legacy".into(),
698                path: PathBuf::from("pnpm-workspace.yaml"),
699                line: 4,
700            }));
701
702        let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
703        changed.insert(PathBuf::from("/repo/pnpm-workspace.yaml"));
704
705        filter_results_by_changed_files(&mut results, &changed);
706
707        assert_eq!(results.empty_catalog_groups.len(), 1);
708        assert_eq!(results.empty_catalog_groups[0].group.catalog_name, "legacy");
709    }
710
711    #[test]
712    fn filter_duplication_keeps_groups_with_at_least_one_changed_instance() {
713        let mut report = DuplicationReport {
714            clone_groups: vec![CloneGroup {
715                instances: vec![
716                    CloneInstance {
717                        file: "/a.ts".into(),
718                        start_line: 1,
719                        end_line: 5,
720                        start_col: 0,
721                        end_col: 10,
722                        fragment: "code".into(),
723                    },
724                    CloneInstance {
725                        file: "/b.ts".into(),
726                        start_line: 1,
727                        end_line: 5,
728                        start_col: 0,
729                        end_col: 10,
730                        fragment: "code".into(),
731                    },
732                ],
733                token_count: 20,
734                line_count: 5,
735            }],
736            clone_families: vec![],
737            mirrored_directories: vec![],
738            stats: DuplicationStats {
739                total_files: 2,
740                files_with_clones: 2,
741                total_lines: 100,
742                duplicated_lines: 10,
743                total_tokens: 200,
744                duplicated_tokens: 40,
745                clone_groups: 1,
746                clone_instances: 2,
747                duplication_percentage: 10.0,
748                clone_groups_below_min_occurrences: 0,
749            },
750        };
751
752        let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
753        changed.insert("/a.ts".into());
754
755        filter_duplication_by_changed_files(&mut report, &changed, Path::new(""));
756        assert_eq!(report.clone_groups.len(), 1);
757        // stats recomputed from surviving groups
758        assert_eq!(report.stats.clone_groups, 1);
759        assert_eq!(report.stats.clone_instances, 2);
760    }
761
762    // -----------------------------------------------------------------------
763    // Real git interactions (tempdir + git init). These exercise the
764    // path-resolution boundary between `git rev-parse --show-toplevel`,
765    // `git diff --name-only`, and `git ls-files --full-name --others` to
766    // catch regressions like issue #190 where the LSP workspace was a
767    // subdirectory of the git repo and changed-file paths were joined
768    // against the wrong base.
769    // -----------------------------------------------------------------------
770
771    /// Initialize a temp git repo with a single committed file plus a tag
772    /// at HEAD. Returns the canonical repo root.
773    fn init_repo(repo: &Path) -> PathBuf {
774        run_git(repo, &["init", "--quiet", "--initial-branch=main"]);
775        run_git(repo, &["config", "user.email", "test@example.com"]);
776        run_git(repo, &["config", "user.name", "test"]);
777        run_git(repo, &["config", "commit.gpgsign", "false"]);
778        std::fs::write(repo.join("seed.txt"), "seed\n").unwrap();
779        run_git(repo, &["add", "seed.txt"]);
780        run_git(repo, &["commit", "--quiet", "-m", "initial"]);
781        run_git(repo, &["tag", "fallow-baseline"]);
782        repo.canonicalize().unwrap()
783    }
784
785    fn run_git(cwd: &Path, args: &[&str]) {
786        let output = std::process::Command::new("git")
787            .args(args)
788            .current_dir(cwd)
789            .output()
790            .expect("git available");
791        assert!(
792            output.status.success(),
793            "git {args:?} failed: {}",
794            String::from_utf8_lossy(&output.stderr)
795        );
796    }
797
798    /// Workspace at git root, an untracked file is included in the
799    /// changed-files set with an absolute path joined from the repo root.
800    #[test]
801    fn try_get_changed_files_workspace_at_repo_root() {
802        let tmp = tempfile::tempdir().unwrap();
803        let repo = init_repo(tmp.path());
804        std::fs::create_dir_all(repo.join("src")).unwrap();
805        std::fs::write(repo.join("src/new.ts"), "export const x = 1;\n").unwrap();
806
807        let changed = try_get_changed_files(&repo, "fallow-baseline").unwrap();
808
809        let expected = repo.join("src/new.ts");
810        assert!(
811            changed.contains(&expected),
812            "changed set should contain {expected:?}; actual: {changed:?}"
813        );
814    }
815
816    /// Regression test for #190. When the workspace is a subdirectory of
817    /// the git repository, `git diff --name-only` emits paths relative to
818    /// the repo root (e.g., `frontend/src/new.ts`). Without the
819    /// rev-parse-based toplevel resolution the function joined those
820    /// against the workspace root, producing bogus paths like
821    /// `<repo>/frontend/frontend/src/new.ts` that never matched
822    /// `analyze_project` output and silently dropped the filter.
823    #[test]
824    fn try_get_changed_files_workspace_in_subdirectory() {
825        let tmp = tempfile::tempdir().unwrap();
826        let repo = init_repo(tmp.path());
827        let frontend = repo.join("frontend");
828        std::fs::create_dir_all(frontend.join("src")).unwrap();
829        std::fs::write(frontend.join("src/new.ts"), "export const x = 1;\n").unwrap();
830
831        let changed = try_get_changed_files(&frontend, "fallow-baseline").unwrap();
832
833        let expected = repo.join("frontend/src/new.ts");
834        assert!(
835            changed.contains(&expected),
836            "changed set should contain canonical {expected:?}; actual: {changed:?}"
837        );
838        // Verify the bogus double-frontend path is NOT in the set
839        let bogus = frontend.join("frontend/src/new.ts");
840        assert!(
841            !changed.contains(&bogus),
842            "changed set must not contain double-frontend path {bogus:?}"
843        );
844    }
845
846    /// A *committed* change in a sibling subdirectory (outside the
847    /// workspace) appears in the changed-files set because `git diff`
848    /// is repo-wide regardless of cwd. The downstream
849    /// `filter_results_by_changed_files` retains it only if
850    /// `analyze_project` saw it; for a workspace scoped to one subdir,
851    /// the sibling file is not in the analysis paths and falls away at
852    /// the result-merge boundary, not here. This test pins the contract:
853    /// for committed changes, the set is repo-wide.
854    ///
855    /// Note: `git ls-files --others --exclude-standard` only lists
856    /// untracked files in cwd's subtree, so untracked siblings are NOT
857    /// in the set when invoked from a subdirectory. That's harmless for
858    /// the LSP because `analyze_project` only walks files under the
859    /// workspace root either way.
860    #[test]
861    fn try_get_changed_files_includes_committed_sibling_changes() {
862        let tmp = tempfile::tempdir().unwrap();
863        let repo = init_repo(tmp.path());
864        let backend = repo.join("backend");
865        std::fs::create_dir_all(&backend).unwrap();
866        std::fs::write(backend.join("server.py"), "print('hi')\n").unwrap();
867        run_git(&repo, &["add", "."]);
868        run_git(&repo, &["commit", "--quiet", "-m", "add backend"]);
869
870        let frontend = repo.join("frontend");
871        std::fs::create_dir_all(&frontend).unwrap();
872
873        let changed = try_get_changed_files(&frontend, "fallow-baseline").unwrap();
874
875        let expected = repo.join("backend/server.py");
876        assert!(
877            changed.contains(&expected),
878            "committed sibling backend/server.py should be in the set: {changed:?}"
879        );
880    }
881
882    /// Modifying a tracked file shows up via `git diff --name-only HEAD`,
883    /// not just via `ls-files --others`. Confirm the path-join fix
884    /// applies to that codepath too.
885    #[test]
886    fn try_get_changed_files_includes_modified_tracked_file() {
887        let tmp = tempfile::tempdir().unwrap();
888        let repo = init_repo(tmp.path());
889        let frontend = repo.join("frontend");
890        std::fs::create_dir_all(frontend.join("src")).unwrap();
891        std::fs::write(frontend.join("src/old.ts"), "export const x = 1;\n").unwrap();
892        run_git(&repo, &["add", "."]);
893        run_git(&repo, &["commit", "--quiet", "-m", "add old"]);
894        run_git(&repo, &["tag", "fallow-baseline-v2"]);
895        // Modify the tracked file (no commit, so diff-HEAD picks it up)
896        std::fs::write(frontend.join("src/old.ts"), "export const x = 2;\n").unwrap();
897
898        let changed = try_get_changed_files(&frontend, "fallow-baseline-v2").unwrap();
899
900        let expected = repo.join("frontend/src/old.ts");
901        assert!(
902            changed.contains(&expected),
903            "modified tracked file {expected:?} missing from set: {changed:?}"
904        );
905    }
906
907    /// `resolve_git_toplevel` returns the canonical repo path even when
908    /// invoked from inside a subdirectory and via a symlinked input path.
909    /// On macOS this guards against the `/tmp` -> `/private/tmp`
910    /// canonicalization gap that would otherwise make the LSP filter set
911    /// disagree with `analyze_project` paths.
912    #[test]
913    fn resolve_git_toplevel_returns_canonical_path() {
914        let tmp = tempfile::tempdir().unwrap();
915        let repo = init_repo(tmp.path());
916        let frontend = repo.join("frontend");
917        std::fs::create_dir_all(&frontend).unwrap();
918
919        let toplevel = resolve_git_toplevel(&frontend).unwrap();
920        assert_eq!(toplevel, repo, "toplevel should equal canonical repo root");
921        assert_eq!(
922            toplevel,
923            toplevel.canonicalize().unwrap(),
924            "resolved toplevel should already be canonical"
925        );
926    }
927
928    /// Outside any git repo, `resolve_git_toplevel` returns
929    /// `NotARepository` rather than panicking or returning a wrong path.
930    /// The LSP relies on this to fall back to the workspace root cleanly.
931    #[test]
932    fn resolve_git_toplevel_not_a_repository() {
933        let tmp = tempfile::tempdir().unwrap();
934        let result = resolve_git_toplevel(tmp.path());
935        assert!(
936            matches!(result, Err(ChangedFilesError::NotARepository)),
937            "expected NotARepository, got {result:?}"
938        );
939    }
940
941    /// `try_get_changed_files` propagates the not-a-repo error so the
942    /// LSP can warn and fall back to full-scope results.
943    #[test]
944    fn try_get_changed_files_not_a_repository() {
945        let tmp = tempfile::tempdir().unwrap();
946        let result = try_get_changed_files(tmp.path(), "main");
947        assert!(matches!(result, Err(ChangedFilesError::NotARepository)));
948    }
949
950    #[test]
951    fn filter_duplication_drops_groups_with_no_changed_instance() {
952        let mut report = DuplicationReport {
953            clone_groups: vec![CloneGroup {
954                instances: vec![CloneInstance {
955                    file: "/a.ts".into(),
956                    start_line: 1,
957                    end_line: 5,
958                    start_col: 0,
959                    end_col: 10,
960                    fragment: "code".into(),
961                }],
962                token_count: 20,
963                line_count: 5,
964            }],
965            clone_families: vec![],
966            mirrored_directories: vec![],
967            stats: DuplicationStats {
968                total_files: 1,
969                files_with_clones: 1,
970                total_lines: 100,
971                duplicated_lines: 5,
972                total_tokens: 100,
973                duplicated_tokens: 20,
974                clone_groups: 1,
975                clone_instances: 1,
976                duplication_percentage: 5.0,
977                clone_groups_below_min_occurrences: 0,
978            },
979        };
980
981        let changed: FxHashSet<PathBuf> = FxHashSet::default();
982        filter_duplication_by_changed_files(&mut report, &changed, Path::new(""));
983        assert!(report.clone_groups.is_empty());
984        assert_eq!(report.stats.clone_groups, 0);
985        assert_eq!(report.stats.clone_instances, 0);
986        assert!((report.stats.duplication_percentage - 0.0).abs() < f64::EPSILON);
987    }
988}