Skip to main content

fallow_config/workspace/
diagnostics.rs

1//! Workspace and source-discovery diagnostics.
2//!
3//! Surfaces malformed `package.json`, unreachable glob matches, missing
4//! tsconfig references, undeclared workspaces, and source files skipped during
5//! source discovery as typed [`WorkspaceDiagnostic`] values. Each diagnostic
6//! also emits a deduplicated `tracing::warn!` so users running fallow with
7//! default tracing filters see the cause of "fallow doesn't see my package" or
8//! "fallow ate all my memory."
9//!
10//! Repeated `GlobMatchedNoPackageJson` diagnostics are aggregated by glob
11//! pattern at emission time so a wide glob matching hundreds of package-less
12//! directories on a large monorepo collapses to one bounded summary line per
13//! pattern instead of one line per directory (issue #637). The structured
14//! `Vec<WorkspaceDiagnostic>` returned to callers stays full; only the stderr
15//! surface is bounded.
16//!
17//! Mirrors the dedupe + capture pattern in
18//! `crates/config/src/config/parsing.rs::warn_on_unknown_rule_keys` (issue
19//! #467).
20
21use std::path::{Path, PathBuf};
22use std::sync::{Mutex, OnceLock};
23
24use rustc_hash::{FxHashMap, FxHashSet};
25use schemars::JsonSchema;
26use serde::{Deserialize, Serialize};
27
28/// Why a workspace-discovery candidate was rejected, or why a sibling
29/// directory looked workspace-like but was not declared.
30///
31/// Wire-format names are kebab-case so JSON consumers (CI integrations, MCP
32/// agents, LSP clients) get a stable, language-neutral identifier.
33#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, PartialEq, Eq, Hash)]
34#[serde(tag = "kind", rename_all = "kebab-case")]
35pub enum WorkspaceDiagnosticKind {
36    /// A directory contains `package.json` but is not declared as a workspace
37    /// in `package.json` `workspaces`, `pnpm-workspace.yaml`, or
38    /// `tsconfig.json` `references`. Surfaced by
39    /// `find_undeclared_workspaces`.
40    UndeclaredWorkspace,
41    /// A declared workspace's `package.json` failed to parse. The directory is
42    /// dropped from discovery, but analysis still proceeds (degraded).
43    MalformedPackageJson {
44        /// `serde_json` parse error text.
45        error: String,
46    },
47    /// A workspace glob pattern matched a directory that contains no
48    /// `package.json`. Honors the extended skip list and `ignorePatterns`
49    /// before emitting.
50    GlobMatchedNoPackageJson {
51        /// The glob pattern that matched the directory.
52        pattern: String,
53    },
54    /// `tsconfig.json` exists at the root but failed to parse. Project
55    /// references cannot be discovered.
56    MalformedTsconfig {
57        /// JSONC parse error text.
58        error: String,
59    },
60    /// `tsconfig.json` lists a `references[].path` that does not point to an
61    /// existing directory.
62    TsconfigReferenceDirMissing,
63    /// A source file was skipped at discovery because it exceeds the configured
64    /// per-file size limit (`--max-file-size` / `FALLOW_MAX_FILE_SIZE`, default
65    /// 5 MB). The file is never read, parsed, or analyzed, guarding against the
66    /// out-of-memory blowup a single multi-MB generated/vendored/bundled file
67    /// causes (issue #1086). Surfaced by source discovery, not workspace
68    /// discovery, but shares this channel so the skip is visible in
69    /// `workspace_diagnostics[]` on `fallow dead-code / dupes / health` JSON.
70    SkippedLargeFile {
71        /// On-disk size of the skipped file in bytes.
72        size_bytes: u64,
73    },
74    /// A large JavaScript bundle was skipped at discovery because it appears to
75    /// be minified generated output. The file is never parsed or analyzed,
76    /// guarding against sub-limit bundles that can still create very large ASTs
77    /// and extraction payloads (issue #1086). Use `--max-file-size 0` when the
78    /// bundled file really should be analyzed.
79    SkippedMinifiedFile {
80        /// On-disk size of the skipped file in bytes.
81        size_bytes: u64,
82    },
83}
84
85impl WorkspaceDiagnosticKind {
86    /// Stable kebab-case identifier used in dedupe keys and tracing payloads.
87    #[must_use]
88    pub const fn id(&self) -> &'static str {
89        match self {
90            Self::UndeclaredWorkspace => "undeclared-workspace",
91            Self::MalformedPackageJson { .. } => "malformed-package-json",
92            Self::GlobMatchedNoPackageJson { .. } => "glob-matched-no-package-json",
93            Self::MalformedTsconfig { .. } => "malformed-tsconfig",
94            Self::TsconfigReferenceDirMissing => "tsconfig-reference-dir-missing",
95            Self::SkippedLargeFile { .. } => "skipped-large-file",
96            Self::SkippedMinifiedFile { .. } => "skipped-minified-file",
97        }
98    }
99
100    /// Whether this diagnostic is produced by SOURCE discovery (the file walk in
101    /// `discover_files`) rather than WORKSPACE discovery (config load). Source-
102    /// discovery diagnostics are APPENDED to the registry after config load, so
103    /// [`stash_workspace_diagnostics`] must preserve them when it replaces the
104    /// workspace-discovery set, otherwise the per-analysis config re-loads in
105    /// combined-mode (`fallow` with no subcommand re-loads config for check,
106    /// dupes, and health) wipe them before the JSON envelope is built (issue
107    /// #1086).
108    #[must_use]
109    pub const fn is_source_discovery(&self) -> bool {
110        matches!(
111            self,
112            Self::SkippedLargeFile { .. } | Self::SkippedMinifiedFile { .. }
113        )
114    }
115}
116
117/// Render a byte count as a megabyte figure with one decimal place for
118/// human-readable diagnostic messages (e.g. `12.3 MB`).
119#[must_use]
120fn format_size_mb(bytes: u64) -> String {
121    #[expect(
122        clippy::cast_precision_loss,
123        reason = "display-only size figure; precision loss past 2^53 bytes is irrelevant"
124    )]
125    let mb = bytes as f64 / (1024.0 * 1024.0);
126    format!("{mb:.1} MB")
127}
128
129/// A diagnostic about a workspace-discovery candidate.
130///
131/// The `message` field is a human-readable rendering derived from `kind`. It
132/// always ends with a concrete next step ("fix the JSON syntax", "remove from
133/// `workspaces`", "add to `ignorePatterns`") so first-time users have a path
134/// forward.
135#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
136pub struct WorkspaceDiagnostic {
137    /// Path to the directory or file that triggered the diagnostic.
138    pub path: PathBuf,
139    /// Kind discriminator with the typed payload.
140    #[serde(flatten)]
141    pub kind: WorkspaceDiagnosticKind,
142    /// Human-readable rendering derived from `kind` + `path`. Always ends
143    /// with a next-step hint.
144    pub message: String,
145}
146
147impl WorkspaceDiagnostic {
148    /// Construct a diagnostic with the message rendered from `kind` + `path`.
149    ///
150    /// `root` is used to produce project-relative paths in the message text
151    /// AND inside the variant payload (e.g. the `error` field of
152    /// `MalformedPackageJson` / `MalformedTsconfig` which embed the absolute
153    /// file path from `PackageJson::load()`'s error text). Without the
154    /// payload-side normalisation the embedded path would survive
155    /// environment-specific differences (CI vs Docker vs local) because the
156    /// post-serialisation `strip_root_prefix` only catches whole-string
157    /// matches, not paths embedded mid-sentence.
158    ///
159    /// If `path` is not under `root` (e.g. canonicalisation crossed a
160    /// symlink), the absolute path is emitted instead.
161    #[must_use]
162    pub fn new(root: &Path, path: PathBuf, kind: WorkspaceDiagnosticKind) -> Self {
163        let kind = normalise_payload_paths(root, kind);
164        let message = render_message(root, &path, &kind);
165        Self {
166            path,
167            kind,
168            message,
169        }
170    }
171}
172
173/// Strip the project root from absolute paths embedded inside variant
174/// payloads (today: the `error` field of `MalformedPackageJson` and
175/// `MalformedTsconfig`). Mirrors the per-platform `display()` byte sequence
176/// so the substring match works on Windows too.
177fn normalise_payload_paths(root: &Path, kind: WorkspaceDiagnosticKind) -> WorkspaceDiagnosticKind {
178    let root_str = root.display().to_string();
179    let root_alt = root_str.replace('\\', "/");
180    let normalise = |text: String| -> String {
181        let stripped = text
182            .replace(&format!("{root_str}/"), "")
183            .replace(&format!("{root_alt}/"), "");
184        stripped
185            .replace(&format!("{root_str}\\"), "")
186            .replace(&format!("{root_alt}\\"), "")
187    };
188    match kind {
189        WorkspaceDiagnosticKind::MalformedPackageJson { error } => {
190            WorkspaceDiagnosticKind::MalformedPackageJson {
191                error: normalise(error),
192            }
193        }
194        WorkspaceDiagnosticKind::MalformedTsconfig { error } => {
195            WorkspaceDiagnosticKind::MalformedTsconfig {
196                error: normalise(error),
197            }
198        }
199        other => other,
200    }
201}
202
203/// Render `path` relative to `root` with forward slashes. Shared by
204/// [`render_message`] and [`build_glob_group_message`] so the per-instance and
205/// aggregated message surfaces format paths identically (the forward-slash
206/// normalisation is load-bearing for cross-platform output stability).
207fn display_relative(root: &Path, path: &Path) -> String {
208    path.strip_prefix(root)
209        .unwrap_or(path)
210        .display()
211        .to_string()
212        .replace('\\', "/")
213}
214
215fn render_message(root: &Path, path: &Path, kind: &WorkspaceDiagnosticKind) -> String {
216    let display = display_relative(root, path);
217    match kind {
218        WorkspaceDiagnosticKind::UndeclaredWorkspace => format!(
219            "Directory '{display}' contains package.json but is not declared as a workspace. \
220             Add it to package.json workspaces or pnpm-workspace.yaml, or add it to ignorePatterns."
221        ),
222        WorkspaceDiagnosticKind::MalformedPackageJson { error } => format!(
223            "Dropped workspace '{display}': package.json is not valid JSON ({error}). \
224             Fix the JSON syntax or remove '{display}' from the workspaces pattern."
225        ),
226        WorkspaceDiagnosticKind::GlobMatchedNoPackageJson { pattern } => format!(
227            "Glob '{pattern}' matched '{display}' but no package.json is present. \
228             Add a package.json, narrow the pattern, or add '{display}' to ignorePatterns."
229        ),
230        WorkspaceDiagnosticKind::MalformedTsconfig { error } => format!(
231            "tsconfig.json at '{display}' failed to parse ({error}); \
232             project references will be ignored. Fix the JSON syntax."
233        ),
234        WorkspaceDiagnosticKind::TsconfigReferenceDirMissing => format!(
235            "tsconfig.json references '{display}' but the directory does not exist. \
236             Update or remove the reference, or restore the missing directory."
237        ),
238        WorkspaceDiagnosticKind::SkippedLargeFile { size_bytes } => format!(
239            "Skipped '{display}' ({size}): exceeds the max file size limit. \
240             Its imports and exports are not analyzed. Raise the limit with \
241             --max-file-size <MB> (or FALLOW_MAX_FILE_SIZE), or add '{display}' \
242             to ignorePatterns.",
243            size = format_size_mb(*size_bytes)
244        ),
245        WorkspaceDiagnosticKind::SkippedMinifiedFile { size_bytes } => format!(
246            "Skipped '{display}' ({size}): appears to be minified generated JavaScript. \
247             Its imports and exports are not analyzed. Add '{display}' to ignorePatterns, \
248             rename it with a .min.js suffix, or use --max-file-size 0 if this file \
249             should be analyzed.",
250            size = format_size_mb(*size_bytes)
251        ),
252    }
253}
254
255/// Workspace-discovery failures that prevent analysis from proceeding.
256///
257/// Returned only by `discover_workspaces_with_diagnostics` (in the parent
258/// module) when the root `package.json` itself is malformed: without a
259/// parseable root, no workspace patterns can be collected, and analysis
260/// output would be fiction. The CLI surfaces this as exit 2.
261#[derive(Debug, Clone)]
262pub enum WorkspaceLoadError {
263    /// The project root's `package.json` exists but failed to parse.
264    MalformedRootPackageJson { path: PathBuf, error: String },
265}
266
267impl std::fmt::Display for WorkspaceLoadError {
268    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
269        match self {
270            Self::MalformedRootPackageJson { path, error } => write!(
271                f,
272                "root package.json at '{}' is not valid JSON ({error}). \
273                 Fix the syntax before re-running fallow.",
274                path.display()
275            ),
276        }
277    }
278}
279
280impl std::error::Error for WorkspaceLoadError {}
281
282/// Maximum number of example directories named in an aggregated
283/// `GlobMatchedNoPackageJson` warning before the tail is summarised as
284/// "and N more". Keeps a fanned-out glob to one bounded stderr line.
285const GLOB_EXAMPLE_CAP: usize = 3;
286
287/// Process-wide set of already-emitted diagnostic dedupe keys. Per-instance
288/// keys (`root::kind::path`) and aggregated per-pattern keys
289/// (`root::glob-matched-no-package-json-agg::pattern`) share one set so
290/// combined-mode (check + dupes + health through one loader) and watch-mode
291/// reruns warn at most once per logical diagnostic. The two key namespaces are
292/// disjoint, so there is no cross-talk.
293fn warned_keys() -> &'static Mutex<FxHashSet<String>> {
294    static WARNED: OnceLock<Mutex<FxHashSet<String>>> = OnceLock::new();
295    WARNED.get_or_init(|| Mutex::new(FxHashSet::default()))
296}
297
298/// Insert `key` and return `true` when it was newly inserted (caller should
299/// emit). On a poisoned mutex returns `true` so over-warning beats swallowing
300/// a typo. Mirrors `parsing::warn_on_unknown_rule_keys` and
301/// `plugins::registry::should_warn`.
302fn should_emit(key: String) -> bool {
303    warned_keys().lock().map_or(true, |mut set| set.insert(key))
304}
305
306/// A single planned stderr warning: its process-dedupe key and the rendered
307/// message. The pure output of [`plan_warnings`] so the partition/aggregation
308/// logic is unit-testable without a tracing subscriber or the process-wide
309/// dedupe set.
310#[derive(Debug, PartialEq, Eq)]
311struct PlannedWarning {
312    dedupe_key: String,
313    message: String,
314}
315
316/// Turn a batch of workspace diagnostics into the bounded set of stderr
317/// warnings to emit, collapsing the two kinds that fan out on large monorepos
318/// (issue #637):
319/// - `GlobMatchedNoPackageJson`: aggregated by glob pattern, one summary line
320///   per pattern instead of one line per package-less directory.
321/// - `TsconfigReferenceDirMissing`: aggregated together, one summary line
322///   instead of one per missing `references[]` entry in the root tsconfig.
323///
324/// Pure: no tracing, no dedupe-set mutation. A group of exactly one keeps
325/// today's per-instance message byte-for-byte (no regression for the common
326/// single-match case); every other kind plans one per-instance warning. The
327/// returned plan lists non-aggregated diagnostics first (in first-seen order),
328/// then the glob-pattern summaries, then the tsconfig summary; ordering does
329/// not affect correctness since these are independent stderr lines.
330fn plan_warnings(root: &Path, diagnostics: &[WorkspaceDiagnostic]) -> Vec<PlannedWarning> {
331    let canonical = dunce::canonicalize(root).unwrap_or_else(|_| root.to_path_buf());
332    let per_instance = |diag: &WorkspaceDiagnostic| PlannedWarning {
333        dedupe_key: format!(
334            "{}::{}::{}",
335            canonical.display(),
336            diag.kind.id(),
337            diag.path.display()
338        ),
339        message: diag.message.clone(),
340    };
341
342    let mut plans: Vec<PlannedWarning> = Vec::new();
343    let mut glob_groups: Vec<(&str, Vec<&WorkspaceDiagnostic>)> = Vec::new();
344    let mut tsconfig_ref_misses: Vec<&WorkspaceDiagnostic> = Vec::new();
345    for diag in diagnostics {
346        match &diag.kind {
347            WorkspaceDiagnosticKind::GlobMatchedNoPackageJson { pattern } => {
348                match glob_groups.iter_mut().find(|(p, _)| *p == pattern.as_str()) {
349                    Some((_, group)) => group.push(diag),
350                    None => glob_groups.push((pattern.as_str(), vec![diag])),
351                }
352            }
353            WorkspaceDiagnosticKind::TsconfigReferenceDirMissing => tsconfig_ref_misses.push(diag),
354            _ => plans.push(per_instance(diag)),
355        }
356    }
357
358    for (pattern, group) in glob_groups {
359        if let [only] = group.as_slice() {
360            plans.push(per_instance(only));
361            continue;
362        }
363        let paths: Vec<&Path> = group.iter().map(|d| d.path.as_path()).collect();
364        plans.push(PlannedWarning {
365            dedupe_key: format!(
366                "{}::glob-matched-no-package-json-agg::{pattern}",
367                canonical.display()
368            ),
369            message: build_glob_group_message(root, pattern, &paths),
370        });
371    }
372
373    if let [only] = tsconfig_ref_misses.as_slice() {
374        plans.push(per_instance(only));
375    } else if !tsconfig_ref_misses.is_empty() {
376        let paths: Vec<&Path> = tsconfig_ref_misses
377            .iter()
378            .map(|d| d.path.as_path())
379            .collect();
380        plans.push(PlannedWarning {
381            dedupe_key: format!(
382                "{}::tsconfig-reference-dir-missing-agg",
383                canonical.display()
384            ),
385            message: build_tsconfig_refs_message(root, &paths),
386        });
387    }
388
389    plans
390}
391
392/// Emit `tracing::warn!` lines for a batch of workspace diagnostics.
393///
394/// Delegates the partition/aggregation decisions to the pure [`plan_warnings`]
395/// and applies the process-wide dedupe so combined-mode (check + dupes + health
396/// through one loader) and watch-mode reruns warn at most once per logical
397/// diagnostic. The returned/stashed `Vec<WorkspaceDiagnostic>` is unaffected;
398/// only the stderr surface is bounded, so structured JSON consumers still see
399/// every diagnostic.
400pub(super) fn emit_diagnostics(root: &Path, diagnostics: &[WorkspaceDiagnostic]) {
401    #[cfg(test)]
402    for diag in diagnostics {
403        capture_diag(diag);
404    }
405
406    for plan in plan_warnings(root, diagnostics) {
407        if should_emit(plan.dedupe_key) {
408            tracing::warn!("fallow: {}", plan.message);
409        }
410    }
411}
412
413/// Render up to [`GLOB_EXAMPLE_CAP`] project-relative example paths (sorted for
414/// deterministic output) with an "and N more" tail when the count exceeds the
415/// cap. Returns the joined example string and the total path count. Shared by
416/// the aggregated-message builders.
417fn summarize_examples(root: &Path, paths: &[&Path]) -> (String, usize) {
418    let mut examples: Vec<String> = paths.iter().map(|p| display_relative(root, p)).collect();
419    examples.sort();
420    let count = examples.len();
421    let shown = examples
422        .iter()
423        .take(GLOB_EXAMPLE_CAP)
424        .cloned()
425        .collect::<Vec<_>>()
426        .join(", ");
427    let remaining = count.saturating_sub(GLOB_EXAMPLE_CAP);
428    let listed = if remaining > 0 {
429        format!("{shown}, and {remaining} more")
430    } else {
431        shown
432    };
433    (listed, count)
434}
435
436/// Build the aggregated message for a glob pattern that matched `paths`
437/// package-less directories (always called with `paths.len() >= 2`).
438fn build_glob_group_message(root: &Path, pattern: &str, paths: &[&Path]) -> String {
439    let (listed, count) = summarize_examples(root, paths);
440    format!(
441        "Glob '{pattern}' matched {count} directories with no package.json \
442         (e.g. {listed}). Add a package.json, narrow the pattern, or add \
443         them to ignorePatterns."
444    )
445}
446
447/// Build the aggregated message for `paths` `tsconfig.json` `references[]`
448/// entries that point at missing directories (always called with
449/// `paths.len() >= 2`).
450fn build_tsconfig_refs_message(root: &Path, paths: &[&Path]) -> String {
451    let (listed, count) = summarize_examples(root, paths);
452    format!(
453        "tsconfig.json references {count} directories that do not exist \
454         (e.g. {listed}). Update or remove the references, or restore the \
455         missing directories."
456    )
457}
458
459thread_local! {
460    /// Per-thread capture of workspace diagnostics, for tests that assert
461    /// emission without inspecting tracing output. Parallel test execution
462    /// stays race-free because the buffer is thread-local; production code
463    /// keeps the cell empty so emission goes only to tracing.
464    ///
465    /// Mirrors `parsing::UNKNOWN_RULE_CAPTURE` (issue #467).
466    #[cfg(test)]
467    static WORKSPACE_DIAGNOSTIC_CAPTURE: std::cell::RefCell<Option<Vec<WorkspaceDiagnostic>>> =
468        const { std::cell::RefCell::new(None) };
469}
470
471/// Push `diag` into the thread-local capture buffer when one is installed.
472/// No-op when no test has called [`capture_workspace_warnings`] on the current
473/// thread, so production code never allocates. Called once per diagnostic by
474/// [`emit_diagnostics`] before the dedupe gate, so every diagnostic is observed
475/// regardless of whether it was emitted per-instance or aggregated.
476#[cfg(test)]
477fn capture_diag(diag: &WorkspaceDiagnostic) {
478    WORKSPACE_DIAGNOSTIC_CAPTURE.with(|cell| {
479        if let Some(buf) = cell.borrow_mut().as_mut() {
480            buf.push(diag.clone());
481        }
482    });
483}
484
485/// Install a thread-local capture buffer and run `body`. Returns the body's
486/// result alongside every diagnostic passed through [`emit_diagnostics`] on the
487/// current thread, in order.
488///
489/// Test-only. Diagnostics captured here also bypass the process-wide dedupe
490/// (so two captures on the same root + kind + path inside one test both
491/// observe the emission).
492#[cfg(test)]
493#[must_use]
494pub fn capture_workspace_warnings<F: FnOnce() -> R, R>(body: F) -> (R, Vec<WorkspaceDiagnostic>) {
495    WORKSPACE_DIAGNOSTIC_CAPTURE.with(|cell| {
496        *cell.borrow_mut() = Some(Vec::new());
497    });
498    let result = body();
499    let findings =
500        WORKSPACE_DIAGNOSTIC_CAPTURE.with(|cell| cell.borrow_mut().take().unwrap_or_default());
501    (result, findings)
502}
503
504/// Process-wide registry of workspace-discovery diagnostics, keyed by
505/// canonical root. Populated by callers that run
506/// [`super::discover_workspaces_with_diagnostics`] and (after config load
507/// completes) by the analysis pipeline's `find_undeclared_workspaces_*`
508/// pass. Consumers (`fallow list --workspaces`, the JSON envelope on
509/// `fallow dead-code / dupes / health`) read via [`workspace_diagnostics_for`].
510///
511/// Canonicalisation matches the dedupe-key canonicalisation in
512/// [`plan_warnings`]: two callers on the same physical root coalesce, and
513/// nested-monorepo callers on different roots stay independent.
514static WORKSPACE_DIAGNOSTICS: OnceLock<Mutex<FxHashMap<PathBuf, Vec<WorkspaceDiagnostic>>>> =
515    OnceLock::new();
516
517/// Replace the workspace-discovery diagnostics for `root` with `diagnostics`,
518/// PRESERVING any source-discovery diagnostics (see
519/// [`WorkspaceDiagnosticKind::is_source_discovery`]) already appended for the
520/// root.
521///
522/// Called at config-load time after [`super::discover_workspaces_with_diagnostics`]
523/// completes; the analyze pipeline then APPENDS undeclared-workspace and
524/// source-discovery (`skipped-large-file`) diagnostics via
525/// [`append_workspace_diagnostics`]. The workspace-discovery set is authoritative
526/// and replaced wholesale (so a fixed `package.json` clears its stale diagnostic
527/// across watch-mode reruns), but source-discovery diagnostics are appended
528/// AFTER this stash, so combined-mode's per-analysis config re-loads would
529/// otherwise wipe a `skipped-large-file` entry that the first analysis's
530/// discovery already recorded (issue #1086).
531pub fn stash_workspace_diagnostics(root: &Path, diagnostics: Vec<WorkspaceDiagnostic>) {
532    let canonical = dunce::canonicalize(root).unwrap_or_else(|_| root.to_path_buf());
533    let registry = WORKSPACE_DIAGNOSTICS.get_or_init(|| Mutex::new(FxHashMap::default()));
534    if let Ok(mut map) = registry.lock() {
535        let mut combined = diagnostics;
536        if let Some(existing) = map.get(&canonical) {
537            combined.extend(
538                existing
539                    .iter()
540                    .filter(|d| d.kind.is_source_discovery())
541                    .cloned(),
542            );
543        }
544        map.insert(canonical, combined);
545    }
546}
547
548/// Append `additions` to the workspace-discovery diagnostics for `root`,
549/// skipping any entry whose `(kind id, canonical path)` is already present.
550///
551/// Used by the analyze pipeline's undeclared-workspace pass to fold its
552/// findings into the registry without re-emitting diagnostics that the
553/// config-load pass already surfaced (e.g. a directory whose `package.json`
554/// is malformed should NOT also produce a separate "undeclared" diagnostic
555/// alongside the malformed-package-json one).
556pub fn append_workspace_diagnostics(root: &Path, additions: Vec<WorkspaceDiagnostic>) {
557    if additions.is_empty() {
558        return;
559    }
560    let canonical = dunce::canonicalize(root).unwrap_or_else(|_| root.to_path_buf());
561    let registry = WORKSPACE_DIAGNOSTICS.get_or_init(|| Mutex::new(FxHashMap::default()));
562    if let Ok(mut map) = registry.lock() {
563        let existing = map.entry(canonical).or_default();
564        let mut seen: FxHashSet<(String, String)> = existing
565            .iter()
566            .map(|d| {
567                (
568                    d.kind.id().to_owned(),
569                    dunce::canonicalize(&d.path)
570                        .unwrap_or_else(|_| d.path.clone())
571                        .display()
572                        .to_string(),
573                )
574            })
575            .collect();
576        for addition in additions {
577            let key = (
578                addition.kind.id().to_owned(),
579                dunce::canonicalize(&addition.path)
580                    .unwrap_or_else(|_| addition.path.clone())
581                    .display()
582                    .to_string(),
583            );
584            if seen.insert(key) {
585                existing.push(addition);
586            }
587        }
588    }
589}
590
591/// Remove all source-discovery diagnostics (see
592/// [`WorkspaceDiagnosticKind::is_source_discovery`]) for `root` from the
593/// registry, keeping the workspace-discovery set intact.
594///
595/// Called at the START of each source walk (`discover_files`) so a stale
596/// `skipped-large-file` entry from a previous analysis pass (e.g. a watch-mode
597/// rerun after the user raised `--max-file-size` or added the file to
598/// `ignorePatterns`) is dropped before the current walk re-appends only the
599/// files it actually skips. Pairs with the preserve in
600/// [`stash_workspace_diagnostics`]: clear keeps the set CURRENT across reruns,
601/// preserve keeps it ALIVE across combined-mode's per-analysis config re-loads
602/// (issue #1086).
603pub fn clear_source_discovery_diagnostics(root: &Path) {
604    let canonical = dunce::canonicalize(root).unwrap_or_else(|_| root.to_path_buf());
605    let Some(registry) = WORKSPACE_DIAGNOSTICS.get() else {
606        return;
607    };
608    if let Ok(mut map) = registry.lock()
609        && let Some(existing) = map.get_mut(&canonical)
610    {
611        existing.retain(|d| !d.kind.is_source_discovery());
612    }
613}
614
615/// Read the workspace-discovery diagnostics produced by the most recent
616/// `stash_workspace_diagnostics` + any subsequent
617/// `append_workspace_diagnostics` calls for `root`. Returns an empty vector
618/// when nothing has been stashed for this root yet (e.g. programmatic
619/// callers bypassing the standard loader).
620#[must_use]
621pub fn workspace_diagnostics_for(root: &Path) -> Vec<WorkspaceDiagnostic> {
622    let canonical = dunce::canonicalize(root).unwrap_or_else(|_| root.to_path_buf());
623    let Some(registry) = WORKSPACE_DIAGNOSTICS.get() else {
624        return Vec::new();
625    };
626    registry
627        .lock()
628        .ok()
629        .and_then(|map| map.get(&canonical).cloned())
630        .unwrap_or_default()
631}
632
633/// Directories that are conventionally NOT workspace packages even when a
634/// glob like `packages/*` matches them. Mirrors pnpm/npm/yarn behavior of
635/// silently filtering these out, and extends fallow's existing
636/// `should_skip_workspace_scan_dir` list with build artifacts and tooling
637/// caches.
638#[must_use]
639pub(super) fn is_skip_listed_dir(name: &str) -> bool {
640    name.starts_with('.') || matches!(name, "node_modules" | "build" | "dist" | "coverage")
641}
642
643/// Test if a project-root-relative directory path is excluded by user
644/// `ignorePatterns`. The directory itself and its `package.json` are both
645/// checked because users variably write `packages/legacy/**` or
646/// `packages/legacy/package.json` in their ignore globs.
647#[must_use]
648pub(super) fn is_ignored_workspace_dir(
649    relative_dir: &Path,
650    ignore_patterns: &globset::GlobSet,
651) -> bool {
652    if ignore_patterns.is_empty() {
653        return false;
654    }
655    let relative_str = relative_dir.to_string_lossy().replace('\\', "/");
656    ignore_patterns.is_match(relative_str.as_str())
657        || ignore_patterns.is_match(format!("{relative_str}/package.json").as_str())
658}
659
660#[cfg(test)]
661mod tests {
662    use super::*;
663
664    fn glob_diag(root: &Path, pattern: &str, rel_path: &str) -> WorkspaceDiagnostic {
665        WorkspaceDiagnostic::new(
666            root,
667            root.join(rel_path),
668            WorkspaceDiagnosticKind::GlobMatchedNoPackageJson {
669                pattern: pattern.to_owned(),
670            },
671        )
672    }
673
674    #[test]
675    fn skipped_large_file_diagnostic_id_and_message() {
676        let root = Path::new("/project");
677        let diag = WorkspaceDiagnostic::new(
678            root,
679            root.join("src/vendor/app.bundle.js"),
680            WorkspaceDiagnosticKind::SkippedLargeFile {
681                size_bytes: 6 * 1024 * 1024,
682            },
683        );
684        assert_eq!(diag.kind.id(), "skipped-large-file");
685        assert!(
686            diag.message.contains("src/vendor/app.bundle.js"),
687            "message names the project-relative path: {}",
688            diag.message
689        );
690        assert!(
691            diag.message.contains("6.0 MB"),
692            "message reports the size: {}",
693            diag.message
694        );
695        assert!(
696            diag.message.contains("--max-file-size"),
697            "message names the override flag: {}",
698            diag.message
699        );
700    }
701
702    #[test]
703    fn skipped_minified_file_diagnostic_id_and_message() {
704        let root = Path::new("/project");
705        let diag = WorkspaceDiagnostic::new(
706            root,
707            root.join("src/assets/index-abc123.js"),
708            WorkspaceDiagnosticKind::SkippedMinifiedFile {
709                size_bytes: 2 * 1024 * 1024,
710            },
711        );
712        assert_eq!(diag.kind.id(), "skipped-minified-file");
713        assert!(
714            diag.message.contains("src/assets/index-abc123.js"),
715            "message names the project-relative path: {}",
716            diag.message
717        );
718        assert!(
719            diag.message.contains("2.0 MB"),
720            "message reports the size: {}",
721            diag.message
722        );
723        assert!(
724            diag.message.contains("--max-file-size 0"),
725            "message names the opt-out: {}",
726            diag.message
727        );
728    }
729
730    #[test]
731    fn format_size_mb_one_decimal() {
732        assert_eq!(format_size_mb(0), "0.0 MB");
733        assert_eq!(format_size_mb(5 * 1024 * 1024), "5.0 MB");
734        assert_eq!(format_size_mb(1024 * 1024 + 512 * 1024), "1.5 MB");
735    }
736
737    #[test]
738    fn stash_preserves_appended_skipped_large_file_across_restash() {
739        // Unique synthetic root so the process-global registry does not collide
740        // with sibling tests.
741        let root = Path::new("/fallow-test-1086-stash-preserve");
742        let undeclared = || {
743            WorkspaceDiagnostic::new(
744                root,
745                root.join("pkg"),
746                WorkspaceDiagnosticKind::UndeclaredWorkspace,
747            )
748        };
749        // First analysis loads config and stashes the workspace-discovery set.
750        stash_workspace_diagnostics(root, vec![undeclared()]);
751        // Its source discovery appends a skipped-large-file diagnostic.
752        append_workspace_diagnostics(
753            root,
754            vec![WorkspaceDiagnostic::new(
755                root,
756                root.join("vendor/big.js"),
757                WorkspaceDiagnosticKind::SkippedLargeFile {
758                    size_bytes: 9_999_999,
759                },
760            )],
761        );
762        // A sibling analysis (combined-mode dupes/health) re-loads config and
763        // re-stashes the same workspace-discovery set.
764        stash_workspace_diagnostics(root, vec![undeclared()]);
765
766        let after = workspace_diagnostics_for(root);
767        assert_eq!(
768            after
769                .iter()
770                .filter(|d| d.kind.is_source_discovery())
771                .count(),
772            1,
773            "skipped-large-file survives the combined-mode re-stash exactly once (#1086): {after:?}"
774        );
775        assert_eq!(
776            after
777                .iter()
778                .filter(|d| matches!(d.kind, WorkspaceDiagnosticKind::UndeclaredWorkspace))
779                .count(),
780            1,
781            "the workspace-discovery diagnostic is replaced, not duplicated"
782        );
783    }
784
785    #[test]
786    fn clear_source_discovery_drops_stale_skip_keeps_workspace_diag() {
787        let root = Path::new("/fallow-test-1086-clear-stale");
788        stash_workspace_diagnostics(
789            root,
790            vec![WorkspaceDiagnostic::new(
791                root,
792                root.join("pkg"),
793                WorkspaceDiagnosticKind::UndeclaredWorkspace,
794            )],
795        );
796        append_workspace_diagnostics(
797            root,
798            vec![WorkspaceDiagnostic::new(
799                root,
800                root.join("vendor/big.js"),
801                WorkspaceDiagnosticKind::SkippedLargeFile {
802                    size_bytes: 9_999_999,
803                },
804            )],
805        );
806        // A later walk (the file is no longer skipped) clears the stale entry.
807        clear_source_discovery_diagnostics(root);
808
809        let after = workspace_diagnostics_for(root);
810        assert!(
811            !after.iter().any(|d| d.kind.is_source_discovery()),
812            "stale skipped-large-file is dropped on the next walk (#1086 watch-mode): {after:?}"
813        );
814        assert!(
815            after
816                .iter()
817                .any(|d| matches!(d.kind, WorkspaceDiagnosticKind::UndeclaredWorkspace)),
818            "the workspace-discovery diagnostic survives the source-discovery clear"
819        );
820    }
821
822    #[test]
823    fn build_glob_group_message_caps_examples_and_summarises_tail() {
824        let root = Path::new("/project");
825        let paths = [
826            root.join("playground/cli"),
827            root.join("playground/lib-types"),
828            root.join("playground/minify"),
829            root.join("playground/ssr"),
830            root.join("playground/worker"),
831        ];
832        let refs: Vec<&Path> = paths.iter().map(PathBuf::as_path).collect();
833        let message = build_glob_group_message(root, "playground/**", &refs);
834
835        assert!(
836            message.starts_with("Glob 'playground/**' matched 5 directories with no package.json"),
837            "count and pattern lead the message: {message}"
838        );
839        assert!(
840            message.contains(
841                "(e.g. playground/cli, playground/lib-types, playground/minify, and 2 more)"
842            ),
843            "three sorted examples + tail count: {message}"
844        );
845        assert!(
846            message.ends_with(
847                "Add a package.json, narrow the pattern, or add them to ignorePatterns."
848            ),
849            "next-step hint preserved: {message}"
850        );
851        assert!(
852            !message.contains("playground/ssr"),
853            "tail example not named: {message}"
854        );
855    }
856
857    #[test]
858    fn build_glob_group_message_no_tail_when_at_or_below_cap() {
859        let root = Path::new("/project");
860        let paths = [root.join("packages/a"), root.join("packages/b")];
861        let refs: Vec<&Path> = paths.iter().map(PathBuf::as_path).collect();
862        let message = build_glob_group_message(root, "packages/*", &refs);
863
864        assert!(message.contains("matched 2 directories"), "{message}");
865        assert!(
866            message.contains("(e.g. packages/a, packages/b)"),
867            "both examples named, no `and N more`: {message}"
868        );
869        assert!(!message.contains("more)"), "no tail clause: {message}");
870    }
871
872    #[test]
873    fn plan_warnings_aggregates_repeated_glob_diagnostics_to_one_line() {
874        let root = Path::new("/project");
875        let diagnostics: Vec<WorkspaceDiagnostic> = (0..50)
876            .map(|i| glob_diag(root, "playground/**", &format!("playground/p{i}")))
877            .collect();
878
879        let plans = plan_warnings(root, &diagnostics);
880
881        assert_eq!(
882            plans.len(),
883            1,
884            "50 same-pattern diagnostics collapse to one plan"
885        );
886        assert!(
887            plans[0]
888                .dedupe_key
889                .ends_with("::glob-matched-no-package-json-agg::playground/**")
890        );
891        assert!(plans[0].message.contains("matched 50 directories"));
892    }
893
894    #[test]
895    fn plan_warnings_keeps_distinct_patterns_separate() {
896        let root = Path::new("/project");
897        let diagnostics = vec![
898            glob_diag(root, "apps/*", "apps/a"),
899            glob_diag(root, "apps/*", "apps/b"),
900            glob_diag(root, "packages/*", "packages/x"),
901            glob_diag(root, "packages/*", "packages/y"),
902        ];
903
904        let plans = plan_warnings(root, &diagnostics);
905
906        assert_eq!(plans.len(), 2, "one aggregated plan per distinct pattern");
907        let messages: Vec<&str> = plans.iter().map(|p| p.message.as_str()).collect();
908        assert!(
909            messages
910                .iter()
911                .any(|m| m.contains("Glob 'apps/*' matched 2")),
912            "{messages:?}"
913        );
914        assert!(
915            messages
916                .iter()
917                .any(|m| m.contains("Glob 'packages/*' matched 2")),
918            "{messages:?}"
919        );
920    }
921
922    #[test]
923    fn plan_warnings_single_match_keeps_per_instance_message_and_key() {
924        let root = Path::new("/project");
925        let diag = glob_diag(root, "packages/*", "packages/scratch");
926
927        let plans = plan_warnings(root, std::slice::from_ref(&diag));
928
929        assert_eq!(plans.len(), 1);
930        assert_eq!(plans[0].message, diag.message);
931        assert!(
932            plans[0]
933                .dedupe_key
934                .contains("::glob-matched-no-package-json::")
935                && plans[0].dedupe_key.ends_with("packages/scratch"),
936            "per-instance key is `root::kind::path`, not the `-agg::pattern` form: {}",
937            plans[0].dedupe_key
938        );
939        assert!(
940            !plans[0].message.contains("directories"),
941            "single match is not aggregated"
942        );
943    }
944
945    #[test]
946    fn plan_warnings_non_glob_kinds_stay_per_instance() {
947        let root = Path::new("/project");
948        let diagnostics = vec![
949            WorkspaceDiagnostic::new(
950                root,
951                root.join("packages/a"),
952                WorkspaceDiagnosticKind::UndeclaredWorkspace,
953            ),
954            WorkspaceDiagnostic::new(
955                root,
956                root.join("packages/b"),
957                WorkspaceDiagnosticKind::MalformedPackageJson {
958                    error: "trailing comma".to_owned(),
959                },
960            ),
961        ];
962
963        let plans = plan_warnings(root, &diagnostics);
964
965        assert_eq!(
966            plans.len(),
967            2,
968            "each non-glob diagnostic plans its own warning"
969        );
970        assert!(
971            plans
972                .iter()
973                .all(|p| !p.message.contains("directories with no package.json"))
974        );
975    }
976
977    fn tsconfig_ref_diag(root: &Path, rel_path: &str) -> WorkspaceDiagnostic {
978        WorkspaceDiagnostic::new(
979            root,
980            root.join(rel_path),
981            WorkspaceDiagnosticKind::TsconfigReferenceDirMissing,
982        )
983    }
984
985    #[test]
986    fn plan_warnings_aggregates_repeated_tsconfig_ref_misses_to_one_line() {
987        let root = Path::new("/project");
988        let diagnostics: Vec<WorkspaceDiagnostic> = (0..30)
989            .map(|i| tsconfig_ref_diag(root, &format!("packages/p{i:02}/tsconfig.json")))
990            .collect();
991
992        let plans = plan_warnings(root, &diagnostics);
993
994        assert_eq!(plans.len(), 1, "30 missing references collapse to one plan");
995        assert!(
996            plans[0]
997                .dedupe_key
998                .ends_with("::tsconfig-reference-dir-missing-agg")
999        );
1000        assert!(
1001            plans[0]
1002                .message
1003                .starts_with("tsconfig.json references 30 directories that do not exist"),
1004            "{}",
1005            plans[0].message
1006        );
1007        assert!(
1008            plans[0].message.contains(
1009                "(e.g. packages/p00/tsconfig.json, packages/p01/tsconfig.json, \
1010                 packages/p02/tsconfig.json, and 27 more)"
1011            ),
1012            "three sorted examples + tail: {}",
1013            plans[0].message
1014        );
1015        assert!(
1016            plans[0]
1017                .message
1018                .ends_with("Update or remove the references, or restore the missing directories."),
1019            "{}",
1020            plans[0].message
1021        );
1022    }
1023
1024    #[test]
1025    fn plan_warnings_single_tsconfig_ref_miss_keeps_per_instance_message() {
1026        let root = Path::new("/project");
1027        let diag = tsconfig_ref_diag(root, "packages/only/tsconfig.json");
1028
1029        let plans = plan_warnings(root, std::slice::from_ref(&diag));
1030
1031        assert_eq!(plans.len(), 1);
1032        assert_eq!(
1033            plans[0].message, diag.message,
1034            "single miss is not aggregated"
1035        );
1036        assert!(!plans[0].message.contains("directories that do not exist"));
1037    }
1038
1039    #[test]
1040    fn plan_warnings_mixed_aggregatable_kinds_each_collapse_independently() {
1041        let root = Path::new("/project");
1042        let mut diagnostics: Vec<WorkspaceDiagnostic> = (0..5)
1043            .map(|i| glob_diag(root, "packages/*", &format!("packages/g{i}")))
1044            .collect();
1045        diagnostics.extend(
1046            (0..4).map(|i| tsconfig_ref_diag(root, &format!("packages/t{i}/tsconfig.json"))),
1047        );
1048
1049        let plans = plan_warnings(root, &diagnostics);
1050
1051        assert_eq!(plans.len(), 2, "one glob summary + one tsconfig summary");
1052        assert!(
1053            plans
1054                .iter()
1055                .any(|p| p.message.contains("matched 5 directories"))
1056        );
1057        assert!(
1058            plans
1059                .iter()
1060                .any(|p| p.message.contains("references 4 directories"))
1061        );
1062    }
1063}