Skip to main content

fallow_core/
changed_files.rs

1//! Git-aware "changed files" filtering shared between fallow-cli and fallow-lsp.
2//!
3//! Provides:
4//! - [`validate_git_ref`] for input validation at trust boundaries.
5//! - [`ChangedFilesError`] / [`try_get_changed_files`] / [`get_changed_files`]
6//!   for resolving a git ref into the set of changed files.
7//! - [`filter_results_by_changed_files`] for narrowing an [`AnalysisResults`]
8//!   to issues in those files.
9//! - [`filter_duplication_by_changed_files`] for narrowing a
10//!   [`DuplicationReport`] to clone groups touching at least one changed file.
11//!
12//! Both filters intentionally exclude dependency-level issues (unused deps,
13//! type-only deps, test-only deps) since "unused dependency" is a function of
14//! the entire import graph and can't be attributed to individual changed files.
15
16use std::path::{Path, PathBuf};
17use std::process::Output;
18use std::sync::OnceLock;
19
20use rustc_hash::{FxHashMap, FxHashSet};
21
22use crate::duplicates::{DuplicationReport, DuplicationStats, families};
23use crate::results::AnalysisResults;
24
25/// Function pointer signature used by `set_spawn_hook` to intercept the
26/// short-running `git rev-parse` / `git diff` / `git ls-files` subprocesses
27/// this module spawns. Lets the CLI route those git children through its
28/// `ScopedChild` registry so a SIGINT delivered to the parent during
29/// watch mode (or any analysis) reaps them instead of letting them run
30/// to completion. See `crates/cli/src/signal/` and issue #477.
31pub type ChangedFilesSpawnHook = fn(&mut std::process::Command) -> std::io::Result<Output>;
32
33static SPAWN_HOOK: OnceLock<ChangedFilesSpawnHook> = OnceLock::new();
34
35/// Install a spawn-hook for this module's git subprocesses. Idempotent;
36/// subsequent calls are no-ops. Called once from the CLI's `main()` so
37/// long-running watch sessions reap pending git children on Ctrl+C.
38/// Defaults to `Command::output` when not set; the function-pointer
39/// indirection costs nothing for embedders and tests that don't install
40/// a hook.
41pub fn set_spawn_hook(hook: ChangedFilesSpawnHook) {
42    let _ = SPAWN_HOOK.set(hook);
43}
44
45fn spawn_output(command: &mut std::process::Command) -> std::io::Result<Output> {
46    if let Some(hook) = SPAWN_HOOK.get() {
47        hook(command)
48    } else {
49        command.output()
50    }
51}
52
53/// Validate a user-supplied git ref before passing it to `git diff`.
54///
55/// Rejects empty strings, refs starting with `-` (which `git` would interpret
56/// as an option flag), and characters outside the safe allowlist for branch
57/// names, tags, SHAs, and reflog expressions (`HEAD~N`, `HEAD@{...}`).
58///
59/// Inside `@{...}` braces, colons and spaces are allowed so reflog timestamps
60/// like `HEAD@{2025-01-01}` and `HEAD@{1 week ago}` round-trip.
61///
62/// Used by both the CLI (clap value parser) and the LSP (initializationOptions
63/// trust boundary) to fail fast with a readable error rather than handing a
64/// malformed ref to git.
65pub fn validate_git_ref(s: &str) -> Result<&str, String> {
66    if s.is_empty() {
67        return Err("git ref cannot be empty".to_string());
68    }
69    if s.starts_with('-') {
70        return Err("git ref cannot start with '-'".to_string());
71    }
72    let mut in_braces = false;
73    for c in s.chars() {
74        match c {
75            '{' => in_braces = true,
76            '}' => in_braces = false,
77            ':' | ' ' if in_braces => {}
78            c if c.is_ascii_alphanumeric()
79                || matches!(c, '.' | '_' | '-' | '/' | '~' | '^' | '@' | '{' | '}') => {}
80            _ => return Err(format!("git ref contains disallowed character: '{c}'")),
81        }
82    }
83    if in_braces {
84        return Err("git ref has unclosed '{'".to_string());
85    }
86    Ok(s)
87}
88
89/// Classification of a `git diff` failure, so callers can pick their own
90/// wording (soft warning vs hard error) without re-parsing stderr.
91#[derive(Debug)]
92pub enum ChangedFilesError {
93    /// Git ref failed validation before invoking `git`.
94    InvalidRef(String),
95    /// `git` binary not found / not executable.
96    GitMissing(String),
97    /// Command ran but the directory isn't a git repository.
98    NotARepository,
99    /// Command ran but the ref is invalid / another git error.
100    GitFailed(String),
101}
102
103impl ChangedFilesError {
104    /// Human-readable clause suitable for embedding in an error message.
105    /// Does not include the flag name (e.g. "--changed-since") so callers can
106    /// prepend their own context.
107    pub fn describe(&self) -> String {
108        match self {
109            Self::InvalidRef(e) => format!("invalid git ref: {e}"),
110            Self::GitMissing(e) => format!("failed to run git: {e}"),
111            Self::NotARepository => "not a git repository".to_owned(),
112            Self::GitFailed(stderr) => augment_git_failed(stderr),
113        }
114    }
115}
116
117/// Enrich a raw `git diff` stderr with actionable hints when the failure mode
118/// is recognizable. Today: shallow-clone misses (`actions/checkout@v4` defaults
119/// to `fetch-depth: 1`, GitLab CI to `GIT_DEPTH: 50`), where the baseline ref
120/// predates the fetch boundary. Bare git stderr is famously cryptic; a hint
121/// here is much more useful than a docs link the reader has to chase.
122fn augment_git_failed(stderr: &str) -> String {
123    let lower = stderr.to_ascii_lowercase();
124    if lower.contains("not a valid object name")
125        || lower.contains("unknown revision")
126        || lower.contains("ambiguous argument")
127    {
128        format!(
129            "{stderr} (shallow clone? try `git fetch --unshallow`, or set `fetch-depth: 0` on actions/checkout / `GIT_DEPTH: 0` in GitLab CI)"
130        )
131    } else {
132        stderr.to_owned()
133    }
134}
135
136/// Resolve the canonical git toplevel for `cwd`.
137///
138/// Runs `git rev-parse --show-toplevel`, which is git's own answer to "where
139/// does this repository live?". The returned path is canonicalized so it
140/// agrees with paths produced by `fs::canonicalize` elsewhere on macOS
141/// (`/tmp` -> `/private/tmp`) and Windows (8.3 short paths).
142///
143/// Used by `try_get_changed_files` to produce changed-file paths whose
144/// absolute form matches what the analysis pipeline emits, regardless of
145/// whether the caller's `cwd` is the repo root or a subdirectory of it.
146pub fn resolve_git_toplevel(cwd: &Path) -> Result<PathBuf, ChangedFilesError> {
147    let output = spawn_output(&mut git_command(cwd, &["rev-parse", "--show-toplevel"]))
148        .map_err(|e| ChangedFilesError::GitMissing(e.to_string()))?;
149
150    if !output.status.success() {
151        let stderr = String::from_utf8_lossy(&output.stderr);
152        return Err(if stderr.contains("not a git repository") {
153            ChangedFilesError::NotARepository
154        } else {
155            ChangedFilesError::GitFailed(stderr.trim().to_owned())
156        });
157    }
158
159    let raw = String::from_utf8_lossy(&output.stdout);
160    let trimmed = raw.trim();
161    if trimmed.is_empty() {
162        return Err(ChangedFilesError::GitFailed(
163            "git rev-parse --show-toplevel returned empty output".to_owned(),
164        ));
165    }
166
167    let path = PathBuf::from(trimmed);
168    Ok(path.canonicalize().unwrap_or(path))
169}
170
171fn collect_git_paths(
172    cwd: &Path,
173    toplevel: &Path,
174    args: &[&str],
175) -> Result<FxHashSet<PathBuf>, ChangedFilesError> {
176    let output = spawn_output(&mut git_command(cwd, args))
177        .map_err(|e| ChangedFilesError::GitMissing(e.to_string()))?;
178
179    if !output.status.success() {
180        let stderr = String::from_utf8_lossy(&output.stderr);
181        return Err(if stderr.contains("not a git repository") {
182            ChangedFilesError::NotARepository
183        } else {
184            ChangedFilesError::GitFailed(stderr.trim().to_owned())
185        });
186    }
187
188    // All callers use modes whose output is repository-root-relative
189    // (`git diff --name-only`, `git ls-files --full-name --others`). Joining
190    // against `toplevel` yields absolute paths that line up with what
191    // `analyze_project` emits when given a canonical workspace root, even if
192    // the LSP / CLI was invoked from a subdirectory.
193    let files: FxHashSet<PathBuf> = String::from_utf8_lossy(&output.stdout)
194        .lines()
195        .filter(|line| !line.is_empty())
196        .map(|line| toplevel.join(line))
197        .collect();
198
199    Ok(files)
200}
201
202fn git_command(cwd: &Path, args: &[&str]) -> std::process::Command {
203    let mut command = std::process::Command::new("git");
204    command.args(args).current_dir(cwd);
205    crate::git_env::clear_ambient_git_env(&mut command);
206    command
207}
208
209/// Get files changed since a git ref. Returns `Err` (with details) when the
210/// git invocation itself failed, so callers can choose between warn-and-ignore
211/// and hard-error behavior.
212///
213/// Includes both:
214/// - committed changes from the merge-base range `git_ref...HEAD`
215/// - tracked staged/unstaged changes from `HEAD` to the current worktree
216/// - untracked files not ignored by Git
217///
218/// This keeps `--changed-since` useful for local validation instead of only
219/// reflecting the last committed `HEAD`.
220///
221/// All paths in the returned set are absolute and rooted at the canonical
222/// git toplevel, not at `root`. This matters when the LSP / CLI is invoked
223/// from a subdirectory of the repository (e.g., a Turborepo workspace at
224/// `apps/web`): `git diff` emits root-relative paths, and we need to join
225/// them against the actual repo root rather than the caller's cwd.
226pub fn try_get_changed_files(
227    root: &Path,
228    git_ref: &str,
229) -> Result<FxHashSet<PathBuf>, ChangedFilesError> {
230    // Validate the ref BEFORE resolving the toplevel so the security-relevant
231    // boundary check (rejects refs starting with `-`, etc.) runs even when
232    // `cwd` happens to not be a git repo. Otherwise an attacker-controlled
233    // `--changed-since=--upload-pack=evil` would leak through to
234    // `git rev-parse` instead of being rejected at validation.
235    validate_git_ref(git_ref).map_err(ChangedFilesError::InvalidRef)?;
236    let toplevel = resolve_git_toplevel(root)?;
237    try_get_changed_files_with_toplevel(root, &toplevel, git_ref)
238}
239
240/// Like [`try_get_changed_files`], but takes a pre-resolved canonical
241/// `toplevel` so callers (the LSP) can cache it across runs and avoid the
242/// extra `git rev-parse --show-toplevel` subprocess on every save.
243///
244/// `toplevel` MUST be the canonical git toplevel for `cwd`; passing anything
245/// else produces incorrect changed-file paths. The CLI does not call this
246/// directly: it uses [`try_get_changed_files`] which resolves on each call.
247pub fn try_get_changed_files_with_toplevel(
248    cwd: &Path,
249    toplevel: &Path,
250    git_ref: &str,
251) -> Result<FxHashSet<PathBuf>, ChangedFilesError> {
252    validate_git_ref(git_ref).map_err(ChangedFilesError::InvalidRef)?;
253
254    let mut files = collect_git_paths(
255        cwd,
256        toplevel,
257        &[
258            "diff",
259            "--name-only",
260            "--end-of-options",
261            &format!("{git_ref}...HEAD"),
262        ],
263    )?;
264    files.extend(collect_git_paths(
265        cwd,
266        toplevel,
267        &["diff", "--name-only", "HEAD"],
268    )?);
269    // `--full-name` forces `ls-files` to emit repository-root-relative paths,
270    // matching `git diff`'s default. Without it, `ls-files` emits paths
271    // relative to cwd, which silently produces wrong joins when the caller
272    // invokes from a subdirectory.
273    files.extend(collect_git_paths(
274        cwd,
275        toplevel,
276        &["ls-files", "--full-name", "--others", "--exclude-standard"],
277    )?);
278    Ok(files)
279}
280
281/// Get files changed since a git ref. Returns `None` on git failure after
282/// printing a warning to stderr. Used by `--changed-since` and `--file`, where
283/// a failure falls back to full-scope analysis.
284#[expect(
285    clippy::print_stderr,
286    reason = "intentional user-facing warning for the CLI's --changed-since fallback path; LSP callers use try_get_changed_files instead"
287)]
288pub fn get_changed_files(root: &Path, git_ref: &str) -> Option<FxHashSet<PathBuf>> {
289    match try_get_changed_files(root, git_ref) {
290        Ok(files) => Some(files),
291        Err(ChangedFilesError::InvalidRef(e)) => {
292            eprintln!("Warning: --changed-since ignored: invalid git ref: {e}");
293            None
294        }
295        Err(ChangedFilesError::GitMissing(e)) => {
296            eprintln!("Warning: --changed-since ignored: failed to run git: {e}");
297            None
298        }
299        Err(ChangedFilesError::NotARepository) => {
300            eprintln!("Warning: --changed-since ignored: not a git repository");
301            None
302        }
303        Err(ChangedFilesError::GitFailed(stderr)) => {
304            eprintln!("Warning: --changed-since failed for ref '{git_ref}': {stderr}");
305            None
306        }
307    }
308}
309
310/// Filter `results` to only include issues whose source file is in
311/// `changed_files`.
312///
313/// Dependency-level issues (unused deps, dev deps, optional deps, type-only
314/// deps, test-only deps) are intentionally NOT filtered here. Unlike
315/// file-level issues, a dependency being "unused" is a function of the entire
316/// import graph and can't be attributed to individual changed source files.
317#[expect(
318    clippy::implicit_hasher,
319    reason = "fallow standardizes on FxHashSet across the workspace"
320)]
321pub fn filter_results_by_changed_files(
322    results: &mut AnalysisResults,
323    changed_files: &FxHashSet<PathBuf>,
324) {
325    results
326        .unused_files
327        .retain(|f| changed_files.contains(&f.file.path));
328    results
329        .unused_exports
330        .retain(|e| changed_files.contains(&e.export.path));
331    results
332        .unused_types
333        .retain(|e| changed_files.contains(&e.export.path));
334    results
335        .private_type_leaks
336        .retain(|e| changed_files.contains(&e.leak.path));
337    results
338        .unused_enum_members
339        .retain(|m| changed_files.contains(&m.member.path));
340    results
341        .unused_class_members
342        .retain(|m| changed_files.contains(&m.member.path));
343    results
344        .unresolved_imports
345        .retain(|i| changed_files.contains(&i.import.path));
346
347    // Unlisted deps: keep only if any importing file is changed
348    results.unlisted_dependencies.retain(|d| {
349        d.dep
350            .imported_from
351            .iter()
352            .any(|s| changed_files.contains(&s.path))
353    });
354
355    // Duplicate exports: filter locations to changed files, drop groups with < 2
356    for dup in &mut results.duplicate_exports {
357        dup.export
358            .locations
359            .retain(|loc| changed_files.contains(&loc.path));
360    }
361    results
362        .duplicate_exports
363        .retain(|d| d.export.locations.len() >= 2);
364
365    // Circular deps: keep cycles where at least one file is changed
366    results
367        .circular_dependencies
368        .retain(|c| c.cycle.files.iter().any(|f| changed_files.contains(f)));
369
370    // Re-export cycles: same file-level treatment as circular deps; the
371    // cycle is file-scoped so any member changing counts as touching the
372    // cycle.
373    results
374        .re_export_cycles
375        .retain(|c| c.cycle.files.iter().any(|f| changed_files.contains(f)));
376
377    // Boundary violations: keep if the importing file changed
378    results
379        .boundary_violations
380        .retain(|v| changed_files.contains(&v.violation.from_path));
381
382    // Stale suppressions: keep if the file changed
383    results
384        .stale_suppressions
385        .retain(|s| changed_files.contains(&s.path));
386
387    // Unresolved catalog references: anchored at the consumer package.json,
388    // so keep only findings whose path is in the changed set.
389    results
390        .unresolved_catalog_references
391        .retain(|r| changed_files.contains(&r.reference.path));
392    results
393        .empty_catalog_groups
394        .retain(|g| changed_files_contains_path(changed_files, &g.group.path));
395
396    // Unused / misconfigured dependency overrides: anchored at the declaring
397    // source file (pnpm-workspace.yaml or root package.json). Keep only
398    // findings whose source file is in the changed set.
399    results
400        .unused_dependency_overrides
401        .retain(|o| changed_files.contains(&o.entry.path));
402    results
403        .misconfigured_dependency_overrides
404        .retain(|o| changed_files.contains(&o.entry.path));
405}
406
407fn changed_files_contains_path(changed_files: &FxHashSet<PathBuf>, path: &Path) -> bool {
408    changed_files.contains(path)
409        || (path.is_relative() && changed_files.iter().any(|changed| changed.ends_with(path)))
410}
411
412/// Recompute duplication statistics after filtering.
413///
414/// Uses per-file line deduplication (matching `compute_stats` in
415/// `duplicates/detect.rs`) so overlapping clone instances don't inflate the
416/// duplicated line count.
417fn recompute_duplication_stats(report: &DuplicationReport) -> DuplicationStats {
418    let mut files_with_clones: FxHashSet<&Path> = FxHashSet::default();
419    let mut file_dup_lines: FxHashMap<&Path, FxHashSet<usize>> = FxHashMap::default();
420    let mut duplicated_tokens = 0_usize;
421    let mut clone_instances = 0_usize;
422
423    for group in &report.clone_groups {
424        for instance in &group.instances {
425            files_with_clones.insert(&instance.file);
426            clone_instances += 1;
427            let lines = file_dup_lines.entry(&instance.file).or_default();
428            for line in instance.start_line..=instance.end_line {
429                lines.insert(line);
430            }
431        }
432        duplicated_tokens += group.token_count * group.instances.len();
433    }
434
435    let duplicated_lines: usize = file_dup_lines.values().map(FxHashSet::len).sum();
436
437    DuplicationStats {
438        total_files: report.stats.total_files,
439        files_with_clones: files_with_clones.len(),
440        total_lines: report.stats.total_lines,
441        duplicated_lines,
442        total_tokens: report.stats.total_tokens,
443        duplicated_tokens,
444        clone_groups: report.clone_groups.len(),
445        clone_instances,
446        #[expect(
447            clippy::cast_precision_loss,
448            reason = "stat percentages are display-only; precision loss at usize::MAX line counts is acceptable"
449        )]
450        duplication_percentage: if report.stats.total_lines > 0 {
451            (duplicated_lines as f64 / report.stats.total_lines as f64) * 100.0
452        } else {
453            0.0
454        },
455        clone_groups_below_min_occurrences: report.stats.clone_groups_below_min_occurrences,
456    }
457}
458
459/// Filter a duplication report to only retain clone groups where at least one
460/// instance belongs to a changed file. Families, mirrored directories, and
461/// stats are rebuilt from the surviving groups so consumers see consistent,
462/// correctly-scoped numbers.
463#[expect(
464    clippy::implicit_hasher,
465    reason = "fallow standardizes on FxHashSet across the workspace"
466)]
467pub fn filter_duplication_by_changed_files(
468    report: &mut DuplicationReport,
469    changed_files: &FxHashSet<PathBuf>,
470    root: &Path,
471) {
472    report
473        .clone_groups
474        .retain(|g| g.instances.iter().any(|i| changed_files.contains(&i.file)));
475    report.clone_families = families::group_into_families(&report.clone_groups, root);
476    report.mirrored_directories =
477        families::detect_mirrored_directories(&report.clone_families, root);
478    report.stats = recompute_duplication_stats(report);
479}
480
481#[cfg(test)]
482mod tests {
483    use super::*;
484    use crate::duplicates::{CloneGroup, CloneInstance};
485    use crate::results::{
486        BoundaryViolation, CircularDependency, EmptyCatalogGroup, UnusedExport, UnusedFile,
487    };
488    use fallow_types::output_dead_code::{
489        BoundaryViolationFinding, CircularDependencyFinding, EmptyCatalogGroupFinding,
490        UnusedExportFinding, UnusedFileFinding,
491    };
492
493    #[test]
494    fn changed_files_error_describe_variants() {
495        assert!(
496            ChangedFilesError::InvalidRef("bad".to_owned())
497                .describe()
498                .contains("invalid git ref")
499        );
500        assert!(
501            ChangedFilesError::GitMissing("oops".to_owned())
502                .describe()
503                .contains("oops")
504        );
505        assert_eq!(
506            ChangedFilesError::NotARepository.describe(),
507            "not a git repository"
508        );
509        assert!(
510            ChangedFilesError::GitFailed("bad ref".to_owned())
511                .describe()
512                .contains("bad ref")
513        );
514    }
515
516    #[test]
517    fn augment_git_failed_appends_shallow_clone_hint_for_unknown_revision() {
518        let stderr = "fatal: ambiguous argument 'fallow-baseline...HEAD': unknown revision or path not in the working tree.";
519        let described = ChangedFilesError::GitFailed(stderr.to_owned()).describe();
520        assert!(described.contains(stderr), "original stderr preserved");
521        assert!(
522            described.contains("shallow clone"),
523            "hint surfaced: {described}"
524        );
525        assert!(
526            described.contains("fetch-depth: 0") || described.contains("git fetch --unshallow"),
527            "hint actionable: {described}"
528        );
529    }
530
531    #[test]
532    fn augment_git_failed_passthrough_for_other_errors() {
533        // Errors that aren't shallow-clone-related stay verbatim
534        let stderr = "fatal: refusing to merge unrelated histories";
535        let described = ChangedFilesError::GitFailed(stderr.to_owned()).describe();
536        assert_eq!(described, stderr);
537    }
538
539    #[test]
540    fn validate_git_ref_rejects_leading_dash() {
541        assert!(validate_git_ref("--upload-pack=evil").is_err());
542        assert!(validate_git_ref("-flag").is_err());
543    }
544
545    #[test]
546    fn validate_git_ref_accepts_baseline_tag() {
547        assert_eq!(
548            validate_git_ref("fallow-baseline").unwrap(),
549            "fallow-baseline"
550        );
551    }
552
553    #[test]
554    fn try_get_changed_files_rejects_invalid_ref() {
555        // Validation runs before git invocation, so any path will do
556        let err = try_get_changed_files(Path::new("/"), "--evil")
557            .expect_err("leading-dash ref must be rejected");
558        assert!(matches!(err, ChangedFilesError::InvalidRef(_)));
559        assert!(err.describe().contains("cannot start with"));
560    }
561
562    #[test]
563    fn validate_git_ref_rejects_option_like_ref() {
564        assert!(validate_git_ref("--output=/tmp/fallow-proof").is_err());
565    }
566
567    #[test]
568    fn validate_git_ref_allows_reflog_relative_date() {
569        assert!(validate_git_ref("HEAD@{1 week ago}").is_ok());
570    }
571
572    #[test]
573    fn try_get_changed_files_rejects_option_like_ref_before_git() {
574        let root = tempfile::tempdir().expect("create temp dir");
575        let proof_path = root.path().join("proof");
576
577        let result = try_get_changed_files(
578            root.path(),
579            &format!("--output={}", proof_path.to_string_lossy()),
580        );
581
582        assert!(matches!(result, Err(ChangedFilesError::InvalidRef(_))));
583        assert!(
584            !proof_path.exists(),
585            "invalid changedSince ref must not be passed through to git as an option"
586        );
587    }
588
589    #[test]
590    fn git_command_clears_parent_git_environment() {
591        let command = git_command(Path::new("."), &["status", "--short"]);
592        let overrides: Vec<_> = command.get_envs().collect();
593
594        for var in crate::git_env::AMBIENT_GIT_ENV_VARS {
595            assert!(
596                overrides
597                    .iter()
598                    .any(|(key, value)| key.to_str() == Some(*var) && value.is_none()),
599                "git helper must clear inherited {var}",
600            );
601        }
602    }
603
604    #[test]
605    fn filter_results_keeps_only_changed_files() {
606        let mut results = AnalysisResults::default();
607        results
608            .unused_files
609            .push(UnusedFileFinding::with_actions(UnusedFile {
610                path: "/a.ts".into(),
611            }));
612        results
613            .unused_files
614            .push(UnusedFileFinding::with_actions(UnusedFile {
615                path: "/b.ts".into(),
616            }));
617        results
618            .unused_exports
619            .push(UnusedExportFinding::with_actions(UnusedExport {
620                path: "/a.ts".into(),
621                export_name: "foo".into(),
622                is_type_only: false,
623                line: 1,
624                col: 0,
625                span_start: 0,
626                is_re_export: false,
627            }));
628
629        let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
630        changed.insert("/a.ts".into());
631
632        filter_results_by_changed_files(&mut results, &changed);
633
634        assert_eq!(results.unused_files.len(), 1);
635        assert_eq!(results.unused_files[0].file.path, PathBuf::from("/a.ts"));
636        assert_eq!(results.unused_exports.len(), 1);
637    }
638
639    #[test]
640    fn filter_results_preserves_dependency_level_issues() {
641        let mut results = AnalysisResults::default();
642        results.unused_dependencies.push(
643            fallow_types::output_dead_code::UnusedDependencyFinding::with_actions(
644                crate::results::UnusedDependency {
645                    package_name: "lodash".into(),
646                    location: crate::results::DependencyLocation::Dependencies,
647                    path: "/pkg.json".into(),
648                    line: 3,
649                    used_in_workspaces: Vec::new(),
650                },
651            ),
652        );
653
654        let changed: FxHashSet<PathBuf> = FxHashSet::default();
655        filter_results_by_changed_files(&mut results, &changed);
656
657        // Dependency-level issues survive even when no source files changed
658        assert_eq!(results.unused_dependencies.len(), 1);
659    }
660
661    #[test]
662    fn filter_results_keeps_circular_dep_when_any_file_changed() {
663        let mut results = AnalysisResults::default();
664        results
665            .circular_dependencies
666            .push(CircularDependencyFinding::with_actions(
667                CircularDependency {
668                    files: vec!["/a.ts".into(), "/b.ts".into()],
669                    length: 2,
670                    line: 1,
671                    col: 0,
672                    is_cross_package: false,
673                },
674            ));
675
676        let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
677        changed.insert("/b.ts".into());
678
679        filter_results_by_changed_files(&mut results, &changed);
680        assert_eq!(results.circular_dependencies.len(), 1);
681    }
682
683    #[test]
684    fn filter_results_drops_circular_dep_when_no_file_changed() {
685        let mut results = AnalysisResults::default();
686        results
687            .circular_dependencies
688            .push(CircularDependencyFinding::with_actions(
689                CircularDependency {
690                    files: vec!["/a.ts".into(), "/b.ts".into()],
691                    length: 2,
692                    line: 1,
693                    col: 0,
694                    is_cross_package: false,
695                },
696            ));
697
698        let changed: FxHashSet<PathBuf> = FxHashSet::default();
699        filter_results_by_changed_files(&mut results, &changed);
700        assert!(results.circular_dependencies.is_empty());
701    }
702
703    #[test]
704    fn filter_results_drops_boundary_violation_when_importer_unchanged() {
705        let mut results = AnalysisResults::default();
706        results
707            .boundary_violations
708            .push(BoundaryViolationFinding::with_actions(BoundaryViolation {
709                from_path: "/a.ts".into(),
710                to_path: "/b.ts".into(),
711                from_zone: "ui".into(),
712                to_zone: "data".into(),
713                import_specifier: "../data/db".into(),
714                line: 1,
715                col: 0,
716            }));
717
718        let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
719        // only the imported file changed, not the importer
720        changed.insert("/b.ts".into());
721
722        filter_results_by_changed_files(&mut results, &changed);
723        assert!(results.boundary_violations.is_empty());
724    }
725
726    #[test]
727    fn filter_results_keeps_relative_empty_catalog_group_when_manifest_changed() {
728        let mut results = AnalysisResults::default();
729        results
730            .empty_catalog_groups
731            .push(EmptyCatalogGroupFinding::with_actions(EmptyCatalogGroup {
732                catalog_name: "legacy".into(),
733                path: PathBuf::from("pnpm-workspace.yaml"),
734                line: 4,
735            }));
736
737        let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
738        changed.insert(PathBuf::from("/repo/pnpm-workspace.yaml"));
739
740        filter_results_by_changed_files(&mut results, &changed);
741
742        assert_eq!(results.empty_catalog_groups.len(), 1);
743        assert_eq!(results.empty_catalog_groups[0].group.catalog_name, "legacy");
744    }
745
746    #[test]
747    fn filter_duplication_keeps_groups_with_at_least_one_changed_instance() {
748        let mut report = DuplicationReport {
749            clone_groups: vec![CloneGroup {
750                instances: vec![
751                    CloneInstance {
752                        file: "/a.ts".into(),
753                        start_line: 1,
754                        end_line: 5,
755                        start_col: 0,
756                        end_col: 10,
757                        fragment: "code".into(),
758                    },
759                    CloneInstance {
760                        file: "/b.ts".into(),
761                        start_line: 1,
762                        end_line: 5,
763                        start_col: 0,
764                        end_col: 10,
765                        fragment: "code".into(),
766                    },
767                ],
768                token_count: 20,
769                line_count: 5,
770            }],
771            clone_families: vec![],
772            mirrored_directories: vec![],
773            stats: DuplicationStats {
774                total_files: 2,
775                files_with_clones: 2,
776                total_lines: 100,
777                duplicated_lines: 10,
778                total_tokens: 200,
779                duplicated_tokens: 40,
780                clone_groups: 1,
781                clone_instances: 2,
782                duplication_percentage: 10.0,
783                clone_groups_below_min_occurrences: 0,
784            },
785        };
786
787        let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
788        changed.insert("/a.ts".into());
789
790        filter_duplication_by_changed_files(&mut report, &changed, Path::new(""));
791        assert_eq!(report.clone_groups.len(), 1);
792        // stats recomputed from surviving groups
793        assert_eq!(report.stats.clone_groups, 1);
794        assert_eq!(report.stats.clone_instances, 2);
795    }
796
797    // -----------------------------------------------------------------------
798    // Real git interactions (tempdir + git init). These exercise the
799    // path-resolution boundary between `git rev-parse --show-toplevel`,
800    // `git diff --name-only`, and `git ls-files --full-name --others` to
801    // catch regressions like issue #190 where the LSP workspace was a
802    // subdirectory of the git repo and changed-file paths were joined
803    // against the wrong base.
804    // -----------------------------------------------------------------------
805
806    /// Initialize a temp git repo with a single committed file plus a tag
807    /// at HEAD. Returns the canonical repo root.
808    fn init_repo(repo: &Path) -> PathBuf {
809        run_git(repo, &["init", "--quiet", "--initial-branch=main"]);
810        run_git(repo, &["config", "user.email", "test@example.com"]);
811        run_git(repo, &["config", "user.name", "test"]);
812        run_git(repo, &["config", "commit.gpgsign", "false"]);
813        std::fs::write(repo.join("seed.txt"), "seed\n").unwrap();
814        run_git(repo, &["add", "seed.txt"]);
815        run_git(repo, &["commit", "--quiet", "-m", "initial"]);
816        run_git(repo, &["tag", "fallow-baseline"]);
817        repo.canonicalize().unwrap()
818    }
819
820    fn run_git(cwd: &Path, args: &[&str]) {
821        let output = std::process::Command::new("git")
822            .args(args)
823            .current_dir(cwd)
824            .output()
825            .expect("git available");
826        assert!(
827            output.status.success(),
828            "git {args:?} failed: {}",
829            String::from_utf8_lossy(&output.stderr)
830        );
831    }
832
833    /// Workspace at git root, an untracked file is included in the
834    /// changed-files set with an absolute path joined from the repo root.
835    #[test]
836    fn try_get_changed_files_workspace_at_repo_root() {
837        let tmp = tempfile::tempdir().unwrap();
838        let repo = init_repo(tmp.path());
839        std::fs::create_dir_all(repo.join("src")).unwrap();
840        std::fs::write(repo.join("src/new.ts"), "export const x = 1;\n").unwrap();
841
842        let changed = try_get_changed_files(&repo, "fallow-baseline").unwrap();
843
844        let expected = repo.join("src/new.ts");
845        assert!(
846            changed.contains(&expected),
847            "changed set should contain {expected:?}; actual: {changed:?}"
848        );
849    }
850
851    /// Regression test for #190. When the workspace is a subdirectory of
852    /// the git repository, `git diff --name-only` emits paths relative to
853    /// the repo root (e.g., `frontend/src/new.ts`). Without the
854    /// rev-parse-based toplevel resolution the function joined those
855    /// against the workspace root, producing bogus paths like
856    /// `<repo>/frontend/frontend/src/new.ts` that never matched
857    /// `analyze_project` output and silently dropped the filter.
858    #[test]
859    fn try_get_changed_files_workspace_in_subdirectory() {
860        let tmp = tempfile::tempdir().unwrap();
861        let repo = init_repo(tmp.path());
862        let frontend = repo.join("frontend");
863        std::fs::create_dir_all(frontend.join("src")).unwrap();
864        std::fs::write(frontend.join("src/new.ts"), "export const x = 1;\n").unwrap();
865
866        let changed = try_get_changed_files(&frontend, "fallow-baseline").unwrap();
867
868        let expected = repo.join("frontend/src/new.ts");
869        assert!(
870            changed.contains(&expected),
871            "changed set should contain canonical {expected:?}; actual: {changed:?}"
872        );
873        // Verify the bogus double-frontend path is NOT in the set
874        let bogus = frontend.join("frontend/src/new.ts");
875        assert!(
876            !changed.contains(&bogus),
877            "changed set must not contain double-frontend path {bogus:?}"
878        );
879    }
880
881    /// A *committed* change in a sibling subdirectory (outside the
882    /// workspace) appears in the changed-files set because `git diff`
883    /// is repo-wide regardless of cwd. The downstream
884    /// `filter_results_by_changed_files` retains it only if
885    /// `analyze_project` saw it; for a workspace scoped to one subdir,
886    /// the sibling file is not in the analysis paths and falls away at
887    /// the result-merge boundary, not here. This test pins the contract:
888    /// for committed changes, the set is repo-wide.
889    ///
890    /// Note: `git ls-files --others --exclude-standard` only lists
891    /// untracked files in cwd's subtree, so untracked siblings are NOT
892    /// in the set when invoked from a subdirectory. That's harmless for
893    /// the LSP because `analyze_project` only walks files under the
894    /// workspace root either way.
895    #[test]
896    fn try_get_changed_files_includes_committed_sibling_changes() {
897        let tmp = tempfile::tempdir().unwrap();
898        let repo = init_repo(tmp.path());
899        let backend = repo.join("backend");
900        std::fs::create_dir_all(&backend).unwrap();
901        std::fs::write(backend.join("server.py"), "print('hi')\n").unwrap();
902        run_git(&repo, &["add", "."]);
903        run_git(&repo, &["commit", "--quiet", "-m", "add backend"]);
904
905        let frontend = repo.join("frontend");
906        std::fs::create_dir_all(&frontend).unwrap();
907
908        let changed = try_get_changed_files(&frontend, "fallow-baseline").unwrap();
909
910        let expected = repo.join("backend/server.py");
911        assert!(
912            changed.contains(&expected),
913            "committed sibling backend/server.py should be in the set: {changed:?}"
914        );
915    }
916
917    /// Modifying a tracked file shows up via `git diff --name-only HEAD`,
918    /// not just via `ls-files --others`. Confirm the path-join fix
919    /// applies to that codepath too.
920    #[test]
921    fn try_get_changed_files_includes_modified_tracked_file() {
922        let tmp = tempfile::tempdir().unwrap();
923        let repo = init_repo(tmp.path());
924        let frontend = repo.join("frontend");
925        std::fs::create_dir_all(frontend.join("src")).unwrap();
926        std::fs::write(frontend.join("src/old.ts"), "export const x = 1;\n").unwrap();
927        run_git(&repo, &["add", "."]);
928        run_git(&repo, &["commit", "--quiet", "-m", "add old"]);
929        run_git(&repo, &["tag", "fallow-baseline-v2"]);
930        // Modify the tracked file (no commit, so diff-HEAD picks it up)
931        std::fs::write(frontend.join("src/old.ts"), "export const x = 2;\n").unwrap();
932
933        let changed = try_get_changed_files(&frontend, "fallow-baseline-v2").unwrap();
934
935        let expected = repo.join("frontend/src/old.ts");
936        assert!(
937            changed.contains(&expected),
938            "modified tracked file {expected:?} missing from set: {changed:?}"
939        );
940    }
941
942    /// `resolve_git_toplevel` returns the canonical repo path even when
943    /// invoked from inside a subdirectory and via a symlinked input path.
944    /// On macOS this guards against the `/tmp` -> `/private/tmp`
945    /// canonicalization gap that would otherwise make the LSP filter set
946    /// disagree with `analyze_project` paths.
947    #[test]
948    fn resolve_git_toplevel_returns_canonical_path() {
949        let tmp = tempfile::tempdir().unwrap();
950        let repo = init_repo(tmp.path());
951        let frontend = repo.join("frontend");
952        std::fs::create_dir_all(&frontend).unwrap();
953
954        let toplevel = resolve_git_toplevel(&frontend).unwrap();
955        assert_eq!(toplevel, repo, "toplevel should equal canonical repo root");
956        assert_eq!(
957            toplevel,
958            toplevel.canonicalize().unwrap(),
959            "resolved toplevel should already be canonical"
960        );
961    }
962
963    /// Outside any git repo, `resolve_git_toplevel` returns
964    /// `NotARepository` rather than panicking or returning a wrong path.
965    /// The LSP relies on this to fall back to the workspace root cleanly.
966    #[test]
967    fn resolve_git_toplevel_not_a_repository() {
968        let tmp = tempfile::tempdir().unwrap();
969        let result = resolve_git_toplevel(tmp.path());
970        assert!(
971            matches!(result, Err(ChangedFilesError::NotARepository)),
972            "expected NotARepository, got {result:?}"
973        );
974    }
975
976    /// `try_get_changed_files` propagates the not-a-repo error so the
977    /// LSP can warn and fall back to full-scope results.
978    #[test]
979    fn try_get_changed_files_not_a_repository() {
980        let tmp = tempfile::tempdir().unwrap();
981        let result = try_get_changed_files(tmp.path(), "main");
982        assert!(matches!(result, Err(ChangedFilesError::NotARepository)));
983    }
984
985    #[test]
986    fn filter_duplication_drops_groups_with_no_changed_instance() {
987        let mut report = DuplicationReport {
988            clone_groups: vec![CloneGroup {
989                instances: vec![CloneInstance {
990                    file: "/a.ts".into(),
991                    start_line: 1,
992                    end_line: 5,
993                    start_col: 0,
994                    end_col: 10,
995                    fragment: "code".into(),
996                }],
997                token_count: 20,
998                line_count: 5,
999            }],
1000            clone_families: vec![],
1001            mirrored_directories: vec![],
1002            stats: DuplicationStats {
1003                total_files: 1,
1004                files_with_clones: 1,
1005                total_lines: 100,
1006                duplicated_lines: 5,
1007                total_tokens: 100,
1008                duplicated_tokens: 20,
1009                clone_groups: 1,
1010                clone_instances: 1,
1011                duplication_percentage: 5.0,
1012                clone_groups_below_min_occurrences: 0,
1013            },
1014        };
1015
1016        let changed: FxHashSet<PathBuf> = FxHashSet::default();
1017        filter_duplication_by_changed_files(&mut report, &changed, Path::new(""));
1018        assert!(report.clone_groups.is_empty());
1019        assert_eq!(report.stats.clone_groups, 0);
1020        assert_eq!(report.stats.clone_instances, 0);
1021        assert!((report.stats.duplication_percentage - 0.0).abs() < f64::EPSILON);
1022    }
1023}