Skip to main content

fallow_config/workspace/
diagnostics.rs

1//! Workspace and source-discovery diagnostics.
2//!
3//! Surfaces malformed `package.json`, unreachable glob matches, missing
4//! tsconfig references, undeclared workspaces, and source files skipped for
5//! exceeding the per-file size limit ([`WorkspaceDiagnosticKind::SkippedLargeFile`],
6//! issue #1086) as typed [`WorkspaceDiagnostic`] values. Each diagnostic also
7//! emits a deduplicated `tracing::warn!` so users running fallow with default
8//! tracing filters see the cause of "fallow doesn't see my package" or "fallow
9//! ate all my memory."
10//!
11//! Repeated `GlobMatchedNoPackageJson` diagnostics are aggregated by glob
12//! pattern at emission time so a wide glob matching hundreds of package-less
13//! directories on a large monorepo collapses to one bounded summary line per
14//! pattern instead of one line per directory (issue #637). The structured
15//! `Vec<WorkspaceDiagnostic>` returned to callers stays full; only the stderr
16//! surface is bounded.
17//!
18//! Mirrors the dedupe + capture pattern in
19//! `crates/config/src/config/parsing.rs::warn_on_unknown_rule_keys` (issue
20//! #467).
21
22use std::path::{Path, PathBuf};
23use std::sync::{Mutex, OnceLock};
24
25use rustc_hash::{FxHashMap, FxHashSet};
26use schemars::JsonSchema;
27use serde::{Deserialize, Serialize};
28
29/// Why a workspace-discovery candidate was rejected, or why a sibling
30/// directory looked workspace-like but was not declared.
31///
32/// Wire-format names are kebab-case so JSON consumers (CI integrations, MCP
33/// agents, LSP clients) get a stable, language-neutral identifier.
34#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, PartialEq, Eq, Hash)]
35#[serde(tag = "kind", rename_all = "kebab-case")]
36pub enum WorkspaceDiagnosticKind {
37    /// A directory contains `package.json` but is not declared as a workspace
38    /// in `package.json` `workspaces`, `pnpm-workspace.yaml`, or
39    /// `tsconfig.json` `references`. Surfaced by
40    /// `find_undeclared_workspaces`.
41    UndeclaredWorkspace,
42    /// A declared workspace's `package.json` failed to parse. The directory is
43    /// dropped from discovery, but analysis still proceeds (degraded).
44    MalformedPackageJson {
45        /// `serde_json` parse error text.
46        error: String,
47    },
48    /// A workspace glob pattern matched a directory that contains no
49    /// `package.json`. Honors the extended skip list and `ignorePatterns`
50    /// before emitting.
51    GlobMatchedNoPackageJson {
52        /// The glob pattern that matched the directory.
53        pattern: String,
54    },
55    /// `tsconfig.json` exists at the root but failed to parse. Project
56    /// references cannot be discovered.
57    MalformedTsconfig {
58        /// JSONC parse error text.
59        error: String,
60    },
61    /// `tsconfig.json` lists a `references[].path` that does not point to an
62    /// existing directory.
63    TsconfigReferenceDirMissing,
64    /// A source file was skipped at discovery because it exceeds the configured
65    /// per-file size limit (`--max-file-size` / `FALLOW_MAX_FILE_SIZE`, default
66    /// 5 MB). The file is never read, parsed, or analyzed, guarding against the
67    /// out-of-memory blowup a single multi-MB generated/vendored/bundled file
68    /// causes (issue #1086). Surfaced by source discovery, not workspace
69    /// discovery, but shares this channel so the skip is visible in
70    /// `workspace_diagnostics[]` on `fallow dead-code / dupes / health` JSON.
71    SkippedLargeFile {
72        /// On-disk size of the skipped file in bytes.
73        size_bytes: u64,
74    },
75}
76
77impl WorkspaceDiagnosticKind {
78    /// Stable kebab-case identifier used in dedupe keys and tracing payloads.
79    #[must_use]
80    pub const fn id(&self) -> &'static str {
81        match self {
82            Self::UndeclaredWorkspace => "undeclared-workspace",
83            Self::MalformedPackageJson { .. } => "malformed-package-json",
84            Self::GlobMatchedNoPackageJson { .. } => "glob-matched-no-package-json",
85            Self::MalformedTsconfig { .. } => "malformed-tsconfig",
86            Self::TsconfigReferenceDirMissing => "tsconfig-reference-dir-missing",
87            Self::SkippedLargeFile { .. } => "skipped-large-file",
88        }
89    }
90
91    /// Whether this diagnostic is produced by SOURCE discovery (the file walk in
92    /// `discover_files`) rather than WORKSPACE discovery (config load). Source-
93    /// discovery diagnostics are APPENDED to the registry after config load, so
94    /// [`stash_workspace_diagnostics`] must preserve them when it replaces the
95    /// workspace-discovery set, otherwise the per-analysis config re-loads in
96    /// combined-mode (`fallow` with no subcommand re-loads config for check,
97    /// dupes, and health) wipe them before the JSON envelope is built (issue
98    /// #1086).
99    #[must_use]
100    pub const fn is_source_discovery(&self) -> bool {
101        matches!(self, Self::SkippedLargeFile { .. })
102    }
103}
104
105/// Render a byte count as a megabyte figure with one decimal place for
106/// human-readable diagnostic messages (e.g. `12.3 MB`).
107#[must_use]
108fn format_size_mb(bytes: u64) -> String {
109    #[expect(
110        clippy::cast_precision_loss,
111        reason = "display-only size figure; precision loss past 2^53 bytes is irrelevant"
112    )]
113    let mb = bytes as f64 / (1024.0 * 1024.0);
114    format!("{mb:.1} MB")
115}
116
117/// A diagnostic about a workspace-discovery candidate.
118///
119/// The `message` field is a human-readable rendering derived from `kind`. It
120/// always ends with a concrete next step ("fix the JSON syntax", "remove from
121/// `workspaces`", "add to `ignorePatterns`") so first-time users have a path
122/// forward.
123#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
124pub struct WorkspaceDiagnostic {
125    /// Path to the directory or file that triggered the diagnostic.
126    pub path: PathBuf,
127    /// Kind discriminator with the typed payload.
128    #[serde(flatten)]
129    pub kind: WorkspaceDiagnosticKind,
130    /// Human-readable rendering derived from `kind` + `path`. Always ends
131    /// with a next-step hint.
132    pub message: String,
133}
134
135impl WorkspaceDiagnostic {
136    /// Construct a diagnostic with the message rendered from `kind` + `path`.
137    ///
138    /// `root` is used to produce project-relative paths in the message text
139    /// AND inside the variant payload (e.g. the `error` field of
140    /// `MalformedPackageJson` / `MalformedTsconfig` which embed the absolute
141    /// file path from `PackageJson::load()`'s error text). Without the
142    /// payload-side normalisation the embedded path would survive
143    /// environment-specific differences (CI vs Docker vs local) because the
144    /// post-serialisation `strip_root_prefix` only catches whole-string
145    /// matches, not paths embedded mid-sentence.
146    ///
147    /// If `path` is not under `root` (e.g. canonicalisation crossed a
148    /// symlink), the absolute path is emitted instead.
149    #[must_use]
150    pub fn new(root: &Path, path: PathBuf, kind: WorkspaceDiagnosticKind) -> Self {
151        let kind = normalise_payload_paths(root, kind);
152        let message = render_message(root, &path, &kind);
153        Self {
154            path,
155            kind,
156            message,
157        }
158    }
159}
160
161/// Strip the project root from absolute paths embedded inside variant
162/// payloads (today: the `error` field of `MalformedPackageJson` and
163/// `MalformedTsconfig`). Mirrors the per-platform `display()` byte sequence
164/// so the substring match works on Windows too.
165fn normalise_payload_paths(root: &Path, kind: WorkspaceDiagnosticKind) -> WorkspaceDiagnosticKind {
166    let root_str = root.display().to_string();
167    let root_alt = root_str.replace('\\', "/");
168    let normalise = |text: String| -> String {
169        let stripped = text
170            .replace(&format!("{root_str}/"), "")
171            .replace(&format!("{root_alt}/"), "");
172        stripped
173            .replace(&format!("{root_str}\\"), "")
174            .replace(&format!("{root_alt}\\"), "")
175    };
176    match kind {
177        WorkspaceDiagnosticKind::MalformedPackageJson { error } => {
178            WorkspaceDiagnosticKind::MalformedPackageJson {
179                error: normalise(error),
180            }
181        }
182        WorkspaceDiagnosticKind::MalformedTsconfig { error } => {
183            WorkspaceDiagnosticKind::MalformedTsconfig {
184                error: normalise(error),
185            }
186        }
187        other => other,
188    }
189}
190
191/// Render `path` relative to `root` with forward slashes. Shared by
192/// [`render_message`] and [`build_glob_group_message`] so the per-instance and
193/// aggregated message surfaces format paths identically (the forward-slash
194/// normalisation is load-bearing for cross-platform output stability).
195fn display_relative(root: &Path, path: &Path) -> String {
196    path.strip_prefix(root)
197        .unwrap_or(path)
198        .display()
199        .to_string()
200        .replace('\\', "/")
201}
202
203fn render_message(root: &Path, path: &Path, kind: &WorkspaceDiagnosticKind) -> String {
204    let display = display_relative(root, path);
205    match kind {
206        WorkspaceDiagnosticKind::UndeclaredWorkspace => format!(
207            "Directory '{display}' contains package.json but is not declared as a workspace. \
208             Add it to package.json workspaces or pnpm-workspace.yaml, or add it to ignorePatterns."
209        ),
210        WorkspaceDiagnosticKind::MalformedPackageJson { error } => format!(
211            "Dropped workspace '{display}': package.json is not valid JSON ({error}). \
212             Fix the JSON syntax or remove '{display}' from the workspaces pattern."
213        ),
214        WorkspaceDiagnosticKind::GlobMatchedNoPackageJson { pattern } => format!(
215            "Glob '{pattern}' matched '{display}' but no package.json is present. \
216             Add a package.json, narrow the pattern, or add '{display}' to ignorePatterns."
217        ),
218        WorkspaceDiagnosticKind::MalformedTsconfig { error } => format!(
219            "tsconfig.json at '{display}' failed to parse ({error}); \
220             project references will be ignored. Fix the JSON syntax."
221        ),
222        WorkspaceDiagnosticKind::TsconfigReferenceDirMissing => format!(
223            "tsconfig.json references '{display}' but the directory does not exist. \
224             Update or remove the reference, or restore the missing directory."
225        ),
226        WorkspaceDiagnosticKind::SkippedLargeFile { size_bytes } => format!(
227            "Skipped '{display}' ({size}): exceeds the max file size limit. \
228             Its imports and exports are not analyzed. Raise the limit with \
229             --max-file-size <MB> (or FALLOW_MAX_FILE_SIZE), or add '{display}' \
230             to ignorePatterns.",
231            size = format_size_mb(*size_bytes)
232        ),
233    }
234}
235
236/// Workspace-discovery failures that prevent analysis from proceeding.
237///
238/// Returned only by `discover_workspaces_with_diagnostics` (in the parent
239/// module) when the root `package.json` itself is malformed: without a
240/// parseable root, no workspace patterns can be collected, and analysis
241/// output would be fiction. The CLI surfaces this as exit 2.
242#[derive(Debug, Clone)]
243pub enum WorkspaceLoadError {
244    /// The project root's `package.json` exists but failed to parse.
245    MalformedRootPackageJson { path: PathBuf, error: String },
246}
247
248impl std::fmt::Display for WorkspaceLoadError {
249    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
250        match self {
251            Self::MalformedRootPackageJson { path, error } => write!(
252                f,
253                "root package.json at '{}' is not valid JSON ({error}). \
254                 Fix the syntax before re-running fallow.",
255                path.display()
256            ),
257        }
258    }
259}
260
261impl std::error::Error for WorkspaceLoadError {}
262
263/// Maximum number of example directories named in an aggregated
264/// `GlobMatchedNoPackageJson` warning before the tail is summarised as
265/// "and N more". Keeps a fanned-out glob to one bounded stderr line.
266const GLOB_EXAMPLE_CAP: usize = 3;
267
268/// Process-wide set of already-emitted diagnostic dedupe keys. Per-instance
269/// keys (`root::kind::path`) and aggregated per-pattern keys
270/// (`root::glob-matched-no-package-json-agg::pattern`) share one set so
271/// combined-mode (check + dupes + health through one loader) and watch-mode
272/// reruns warn at most once per logical diagnostic. The two key namespaces are
273/// disjoint, so there is no cross-talk.
274fn warned_keys() -> &'static Mutex<FxHashSet<String>> {
275    static WARNED: OnceLock<Mutex<FxHashSet<String>>> = OnceLock::new();
276    WARNED.get_or_init(|| Mutex::new(FxHashSet::default()))
277}
278
279/// Insert `key` and return `true` when it was newly inserted (caller should
280/// emit). On a poisoned mutex returns `true` so over-warning beats swallowing
281/// a typo. Mirrors `parsing::warn_on_unknown_rule_keys` and
282/// `plugins::registry::should_warn`.
283fn should_emit(key: String) -> bool {
284    warned_keys().lock().map_or(true, |mut set| set.insert(key))
285}
286
287/// A single planned stderr warning: its process-dedupe key and the rendered
288/// message. The pure output of [`plan_warnings`] so the partition/aggregation
289/// logic is unit-testable without a tracing subscriber or the process-wide
290/// dedupe set.
291#[derive(Debug, PartialEq, Eq)]
292struct PlannedWarning {
293    dedupe_key: String,
294    message: String,
295}
296
297/// Turn a batch of workspace diagnostics into the bounded set of stderr
298/// warnings to emit, collapsing the two kinds that fan out on large monorepos
299/// (issue #637):
300/// - `GlobMatchedNoPackageJson`: aggregated by glob pattern, one summary line
301///   per pattern instead of one line per package-less directory.
302/// - `TsconfigReferenceDirMissing`: aggregated together, one summary line
303///   instead of one per missing `references[]` entry in the root tsconfig.
304///
305/// Pure: no tracing, no dedupe-set mutation. A group of exactly one keeps
306/// today's per-instance message byte-for-byte (no regression for the common
307/// single-match case); every other kind plans one per-instance warning. The
308/// returned plan lists non-aggregated diagnostics first (in first-seen order),
309/// then the glob-pattern summaries, then the tsconfig summary; ordering does
310/// not affect correctness since these are independent stderr lines.
311fn plan_warnings(root: &Path, diagnostics: &[WorkspaceDiagnostic]) -> Vec<PlannedWarning> {
312    let canonical = dunce::canonicalize(root).unwrap_or_else(|_| root.to_path_buf());
313    let per_instance = |diag: &WorkspaceDiagnostic| PlannedWarning {
314        dedupe_key: format!(
315            "{}::{}::{}",
316            canonical.display(),
317            diag.kind.id(),
318            diag.path.display()
319        ),
320        message: diag.message.clone(),
321    };
322
323    let mut plans: Vec<PlannedWarning> = Vec::new();
324    let mut glob_groups: Vec<(&str, Vec<&WorkspaceDiagnostic>)> = Vec::new();
325    let mut tsconfig_ref_misses: Vec<&WorkspaceDiagnostic> = Vec::new();
326    for diag in diagnostics {
327        match &diag.kind {
328            WorkspaceDiagnosticKind::GlobMatchedNoPackageJson { pattern } => {
329                match glob_groups.iter_mut().find(|(p, _)| *p == pattern.as_str()) {
330                    Some((_, group)) => group.push(diag),
331                    None => glob_groups.push((pattern.as_str(), vec![diag])),
332                }
333            }
334            WorkspaceDiagnosticKind::TsconfigReferenceDirMissing => tsconfig_ref_misses.push(diag),
335            _ => plans.push(per_instance(diag)),
336        }
337    }
338
339    for (pattern, group) in glob_groups {
340        if let [only] = group.as_slice() {
341            plans.push(per_instance(only));
342            continue;
343        }
344        let paths: Vec<&Path> = group.iter().map(|d| d.path.as_path()).collect();
345        plans.push(PlannedWarning {
346            dedupe_key: format!(
347                "{}::glob-matched-no-package-json-agg::{pattern}",
348                canonical.display()
349            ),
350            message: build_glob_group_message(root, pattern, &paths),
351        });
352    }
353
354    if let [only] = tsconfig_ref_misses.as_slice() {
355        plans.push(per_instance(only));
356    } else if !tsconfig_ref_misses.is_empty() {
357        let paths: Vec<&Path> = tsconfig_ref_misses
358            .iter()
359            .map(|d| d.path.as_path())
360            .collect();
361        plans.push(PlannedWarning {
362            dedupe_key: format!(
363                "{}::tsconfig-reference-dir-missing-agg",
364                canonical.display()
365            ),
366            message: build_tsconfig_refs_message(root, &paths),
367        });
368    }
369
370    plans
371}
372
373/// Emit `tracing::warn!` lines for a batch of workspace diagnostics.
374///
375/// Delegates the partition/aggregation decisions to the pure [`plan_warnings`]
376/// and applies the process-wide dedupe so combined-mode (check + dupes + health
377/// through one loader) and watch-mode reruns warn at most once per logical
378/// diagnostic. The returned/stashed `Vec<WorkspaceDiagnostic>` is unaffected;
379/// only the stderr surface is bounded, so structured JSON consumers still see
380/// every diagnostic.
381pub(super) fn emit_diagnostics(root: &Path, diagnostics: &[WorkspaceDiagnostic]) {
382    #[cfg(test)]
383    for diag in diagnostics {
384        capture_diag(diag);
385    }
386
387    for plan in plan_warnings(root, diagnostics) {
388        if should_emit(plan.dedupe_key) {
389            tracing::warn!("fallow: {}", plan.message);
390        }
391    }
392}
393
394/// Render up to [`GLOB_EXAMPLE_CAP`] project-relative example paths (sorted for
395/// deterministic output) with an "and N more" tail when the count exceeds the
396/// cap. Returns the joined example string and the total path count. Shared by
397/// the aggregated-message builders.
398fn summarize_examples(root: &Path, paths: &[&Path]) -> (String, usize) {
399    let mut examples: Vec<String> = paths.iter().map(|p| display_relative(root, p)).collect();
400    examples.sort();
401    let count = examples.len();
402    let shown = examples
403        .iter()
404        .take(GLOB_EXAMPLE_CAP)
405        .cloned()
406        .collect::<Vec<_>>()
407        .join(", ");
408    let remaining = count.saturating_sub(GLOB_EXAMPLE_CAP);
409    let listed = if remaining > 0 {
410        format!("{shown}, and {remaining} more")
411    } else {
412        shown
413    };
414    (listed, count)
415}
416
417/// Build the aggregated message for a glob pattern that matched `paths`
418/// package-less directories (always called with `paths.len() >= 2`).
419fn build_glob_group_message(root: &Path, pattern: &str, paths: &[&Path]) -> String {
420    let (listed, count) = summarize_examples(root, paths);
421    format!(
422        "Glob '{pattern}' matched {count} directories with no package.json \
423         (e.g. {listed}). Add a package.json, narrow the pattern, or add \
424         them to ignorePatterns."
425    )
426}
427
428/// Build the aggregated message for `paths` `tsconfig.json` `references[]`
429/// entries that point at missing directories (always called with
430/// `paths.len() >= 2`).
431fn build_tsconfig_refs_message(root: &Path, paths: &[&Path]) -> String {
432    let (listed, count) = summarize_examples(root, paths);
433    format!(
434        "tsconfig.json references {count} directories that do not exist \
435         (e.g. {listed}). Update or remove the references, or restore the \
436         missing directories."
437    )
438}
439
440thread_local! {
441    /// Per-thread capture of workspace diagnostics, for tests that assert
442    /// emission without inspecting tracing output. Parallel test execution
443    /// stays race-free because the buffer is thread-local; production code
444    /// keeps the cell empty so emission goes only to tracing.
445    ///
446    /// Mirrors `parsing::UNKNOWN_RULE_CAPTURE` (issue #467).
447    #[cfg(test)]
448    static WORKSPACE_DIAGNOSTIC_CAPTURE: std::cell::RefCell<Option<Vec<WorkspaceDiagnostic>>> =
449        const { std::cell::RefCell::new(None) };
450}
451
452/// Push `diag` into the thread-local capture buffer when one is installed.
453/// No-op when no test has called [`capture_workspace_warnings`] on the current
454/// thread, so production code never allocates. Called once per diagnostic by
455/// [`emit_diagnostics`] before the dedupe gate, so every diagnostic is observed
456/// regardless of whether it was emitted per-instance or aggregated.
457#[cfg(test)]
458fn capture_diag(diag: &WorkspaceDiagnostic) {
459    WORKSPACE_DIAGNOSTIC_CAPTURE.with(|cell| {
460        if let Some(buf) = cell.borrow_mut().as_mut() {
461            buf.push(diag.clone());
462        }
463    });
464}
465
466/// Install a thread-local capture buffer and run `body`. Returns the body's
467/// result alongside every diagnostic passed through [`emit_diagnostics`] on the
468/// current thread, in order.
469///
470/// Test-only. Diagnostics captured here also bypass the process-wide dedupe
471/// (so two captures on the same root + kind + path inside one test both
472/// observe the emission).
473#[cfg(test)]
474#[must_use]
475pub fn capture_workspace_warnings<F: FnOnce() -> R, R>(body: F) -> (R, Vec<WorkspaceDiagnostic>) {
476    WORKSPACE_DIAGNOSTIC_CAPTURE.with(|cell| {
477        *cell.borrow_mut() = Some(Vec::new());
478    });
479    let result = body();
480    let findings =
481        WORKSPACE_DIAGNOSTIC_CAPTURE.with(|cell| cell.borrow_mut().take().unwrap_or_default());
482    (result, findings)
483}
484
485/// Process-wide registry of workspace-discovery diagnostics, keyed by
486/// canonical root. Populated by callers that run
487/// [`super::discover_workspaces_with_diagnostics`] and (after config load
488/// completes) by the analysis pipeline's `find_undeclared_workspaces_*`
489/// pass. Consumers (`fallow list --workspaces`, the JSON envelope on
490/// `fallow dead-code / dupes / health`) read via [`workspace_diagnostics_for`].
491///
492/// Canonicalisation matches the dedupe-key canonicalisation in
493/// [`plan_warnings`]: two callers on the same physical root coalesce, and
494/// nested-monorepo callers on different roots stay independent.
495static WORKSPACE_DIAGNOSTICS: OnceLock<Mutex<FxHashMap<PathBuf, Vec<WorkspaceDiagnostic>>>> =
496    OnceLock::new();
497
498/// Replace the workspace-discovery diagnostics for `root` with `diagnostics`,
499/// PRESERVING any source-discovery diagnostics (see
500/// [`WorkspaceDiagnosticKind::is_source_discovery`]) already appended for the
501/// root.
502///
503/// Called at config-load time after [`super::discover_workspaces_with_diagnostics`]
504/// completes; the analyze pipeline then APPENDS undeclared-workspace and
505/// source-discovery (`skipped-large-file`) diagnostics via
506/// [`append_workspace_diagnostics`]. The workspace-discovery set is authoritative
507/// and replaced wholesale (so a fixed `package.json` clears its stale diagnostic
508/// across watch-mode reruns), but source-discovery diagnostics are appended
509/// AFTER this stash, so combined-mode's per-analysis config re-loads would
510/// otherwise wipe a `skipped-large-file` entry that the first analysis's
511/// discovery already recorded (issue #1086).
512pub fn stash_workspace_diagnostics(root: &Path, diagnostics: Vec<WorkspaceDiagnostic>) {
513    let canonical = dunce::canonicalize(root).unwrap_or_else(|_| root.to_path_buf());
514    let registry = WORKSPACE_DIAGNOSTICS.get_or_init(|| Mutex::new(FxHashMap::default()));
515    if let Ok(mut map) = registry.lock() {
516        let mut combined = diagnostics;
517        if let Some(existing) = map.get(&canonical) {
518            combined.extend(
519                existing
520                    .iter()
521                    .filter(|d| d.kind.is_source_discovery())
522                    .cloned(),
523            );
524        }
525        map.insert(canonical, combined);
526    }
527}
528
529/// Append `additions` to the workspace-discovery diagnostics for `root`,
530/// skipping any entry whose `(kind id, canonical path)` is already present.
531///
532/// Used by the analyze pipeline's undeclared-workspace pass to fold its
533/// findings into the registry without re-emitting diagnostics that the
534/// config-load pass already surfaced (e.g. a directory whose `package.json`
535/// is malformed should NOT also produce a separate "undeclared" diagnostic
536/// alongside the malformed-package-json one).
537pub fn append_workspace_diagnostics(root: &Path, additions: Vec<WorkspaceDiagnostic>) {
538    if additions.is_empty() {
539        return;
540    }
541    let canonical = dunce::canonicalize(root).unwrap_or_else(|_| root.to_path_buf());
542    let registry = WORKSPACE_DIAGNOSTICS.get_or_init(|| Mutex::new(FxHashMap::default()));
543    if let Ok(mut map) = registry.lock() {
544        let existing = map.entry(canonical).or_default();
545        let mut seen: FxHashSet<(String, String)> = existing
546            .iter()
547            .map(|d| {
548                (
549                    d.kind.id().to_owned(),
550                    dunce::canonicalize(&d.path)
551                        .unwrap_or_else(|_| d.path.clone())
552                        .display()
553                        .to_string(),
554                )
555            })
556            .collect();
557        for addition in additions {
558            let key = (
559                addition.kind.id().to_owned(),
560                dunce::canonicalize(&addition.path)
561                    .unwrap_or_else(|_| addition.path.clone())
562                    .display()
563                    .to_string(),
564            );
565            if seen.insert(key) {
566                existing.push(addition);
567            }
568        }
569    }
570}
571
572/// Remove all source-discovery diagnostics (see
573/// [`WorkspaceDiagnosticKind::is_source_discovery`]) for `root` from the
574/// registry, keeping the workspace-discovery set intact.
575///
576/// Called at the START of each source walk (`discover_files`) so a stale
577/// `skipped-large-file` entry from a previous analysis pass (e.g. a watch-mode
578/// rerun after the user raised `--max-file-size` or added the file to
579/// `ignorePatterns`) is dropped before the current walk re-appends only the
580/// files it actually skips. Pairs with the preserve in
581/// [`stash_workspace_diagnostics`]: clear keeps the set CURRENT across reruns,
582/// preserve keeps it ALIVE across combined-mode's per-analysis config re-loads
583/// (issue #1086).
584pub fn clear_source_discovery_diagnostics(root: &Path) {
585    let canonical = dunce::canonicalize(root).unwrap_or_else(|_| root.to_path_buf());
586    let Some(registry) = WORKSPACE_DIAGNOSTICS.get() else {
587        return;
588    };
589    if let Ok(mut map) = registry.lock()
590        && let Some(existing) = map.get_mut(&canonical)
591    {
592        existing.retain(|d| !d.kind.is_source_discovery());
593    }
594}
595
596/// Read the workspace-discovery diagnostics produced by the most recent
597/// `stash_workspace_diagnostics` + any subsequent
598/// `append_workspace_diagnostics` calls for `root`. Returns an empty vector
599/// when nothing has been stashed for this root yet (e.g. programmatic
600/// callers bypassing the standard loader).
601#[must_use]
602pub fn workspace_diagnostics_for(root: &Path) -> Vec<WorkspaceDiagnostic> {
603    let canonical = dunce::canonicalize(root).unwrap_or_else(|_| root.to_path_buf());
604    let Some(registry) = WORKSPACE_DIAGNOSTICS.get() else {
605        return Vec::new();
606    };
607    registry
608        .lock()
609        .ok()
610        .and_then(|map| map.get(&canonical).cloned())
611        .unwrap_or_default()
612}
613
614/// Directories that are conventionally NOT workspace packages even when a
615/// glob like `packages/*` matches them. Mirrors pnpm/npm/yarn behavior of
616/// silently filtering these out, and extends fallow's existing
617/// `should_skip_workspace_scan_dir` list with build artifacts and tooling
618/// caches.
619#[must_use]
620pub(super) fn is_skip_listed_dir(name: &str) -> bool {
621    name.starts_with('.') || matches!(name, "node_modules" | "build" | "dist" | "coverage")
622}
623
624/// Test if a project-root-relative directory path is excluded by user
625/// `ignorePatterns`. The directory itself and its `package.json` are both
626/// checked because users variably write `packages/legacy/**` or
627/// `packages/legacy/package.json` in their ignore globs.
628#[must_use]
629pub(super) fn is_ignored_workspace_dir(
630    relative_dir: &Path,
631    ignore_patterns: &globset::GlobSet,
632) -> bool {
633    if ignore_patterns.is_empty() {
634        return false;
635    }
636    let relative_str = relative_dir.to_string_lossy().replace('\\', "/");
637    ignore_patterns.is_match(relative_str.as_str())
638        || ignore_patterns.is_match(format!("{relative_str}/package.json").as_str())
639}
640
641#[cfg(test)]
642mod tests {
643    use super::*;
644
645    fn glob_diag(root: &Path, pattern: &str, rel_path: &str) -> WorkspaceDiagnostic {
646        WorkspaceDiagnostic::new(
647            root,
648            root.join(rel_path),
649            WorkspaceDiagnosticKind::GlobMatchedNoPackageJson {
650                pattern: pattern.to_owned(),
651            },
652        )
653    }
654
655    #[test]
656    fn skipped_large_file_diagnostic_id_and_message() {
657        let root = Path::new("/project");
658        let diag = WorkspaceDiagnostic::new(
659            root,
660            root.join("src/vendor/app.bundle.js"),
661            WorkspaceDiagnosticKind::SkippedLargeFile {
662                size_bytes: 6 * 1024 * 1024,
663            },
664        );
665        assert_eq!(diag.kind.id(), "skipped-large-file");
666        assert!(
667            diag.message.contains("src/vendor/app.bundle.js"),
668            "message names the project-relative path: {}",
669            diag.message
670        );
671        assert!(
672            diag.message.contains("6.0 MB"),
673            "message reports the size: {}",
674            diag.message
675        );
676        assert!(
677            diag.message.contains("--max-file-size"),
678            "message names the override flag: {}",
679            diag.message
680        );
681    }
682
683    #[test]
684    fn format_size_mb_one_decimal() {
685        assert_eq!(format_size_mb(0), "0.0 MB");
686        assert_eq!(format_size_mb(5 * 1024 * 1024), "5.0 MB");
687        assert_eq!(format_size_mb(1024 * 1024 + 512 * 1024), "1.5 MB");
688    }
689
690    #[test]
691    fn stash_preserves_appended_skipped_large_file_across_restash() {
692        // Unique synthetic root so the process-global registry does not collide
693        // with sibling tests.
694        let root = Path::new("/fallow-test-1086-stash-preserve");
695        let undeclared = || {
696            WorkspaceDiagnostic::new(
697                root,
698                root.join("pkg"),
699                WorkspaceDiagnosticKind::UndeclaredWorkspace,
700            )
701        };
702        // First analysis loads config and stashes the workspace-discovery set.
703        stash_workspace_diagnostics(root, vec![undeclared()]);
704        // Its source discovery appends a skipped-large-file diagnostic.
705        append_workspace_diagnostics(
706            root,
707            vec![WorkspaceDiagnostic::new(
708                root,
709                root.join("vendor/big.js"),
710                WorkspaceDiagnosticKind::SkippedLargeFile {
711                    size_bytes: 9_999_999,
712                },
713            )],
714        );
715        // A sibling analysis (combined-mode dupes/health) re-loads config and
716        // re-stashes the same workspace-discovery set.
717        stash_workspace_diagnostics(root, vec![undeclared()]);
718
719        let after = workspace_diagnostics_for(root);
720        assert_eq!(
721            after
722                .iter()
723                .filter(|d| d.kind.is_source_discovery())
724                .count(),
725            1,
726            "skipped-large-file survives the combined-mode re-stash exactly once (#1086): {after:?}"
727        );
728        assert_eq!(
729            after
730                .iter()
731                .filter(|d| matches!(d.kind, WorkspaceDiagnosticKind::UndeclaredWorkspace))
732                .count(),
733            1,
734            "the workspace-discovery diagnostic is replaced, not duplicated"
735        );
736    }
737
738    #[test]
739    fn clear_source_discovery_drops_stale_skip_keeps_workspace_diag() {
740        let root = Path::new("/fallow-test-1086-clear-stale");
741        stash_workspace_diagnostics(
742            root,
743            vec![WorkspaceDiagnostic::new(
744                root,
745                root.join("pkg"),
746                WorkspaceDiagnosticKind::UndeclaredWorkspace,
747            )],
748        );
749        append_workspace_diagnostics(
750            root,
751            vec![WorkspaceDiagnostic::new(
752                root,
753                root.join("vendor/big.js"),
754                WorkspaceDiagnosticKind::SkippedLargeFile {
755                    size_bytes: 9_999_999,
756                },
757            )],
758        );
759        // A later walk (the file is no longer skipped) clears the stale entry.
760        clear_source_discovery_diagnostics(root);
761
762        let after = workspace_diagnostics_for(root);
763        assert!(
764            !after.iter().any(|d| d.kind.is_source_discovery()),
765            "stale skipped-large-file is dropped on the next walk (#1086 watch-mode): {after:?}"
766        );
767        assert!(
768            after
769                .iter()
770                .any(|d| matches!(d.kind, WorkspaceDiagnosticKind::UndeclaredWorkspace)),
771            "the workspace-discovery diagnostic survives the source-discovery clear"
772        );
773    }
774
775    #[test]
776    fn build_glob_group_message_caps_examples_and_summarises_tail() {
777        let root = Path::new("/project");
778        let paths = [
779            root.join("playground/cli"),
780            root.join("playground/lib-types"),
781            root.join("playground/minify"),
782            root.join("playground/ssr"),
783            root.join("playground/worker"),
784        ];
785        let refs: Vec<&Path> = paths.iter().map(PathBuf::as_path).collect();
786        let message = build_glob_group_message(root, "playground/**", &refs);
787
788        assert!(
789            message.starts_with("Glob 'playground/**' matched 5 directories with no package.json"),
790            "count and pattern lead the message: {message}"
791        );
792        assert!(
793            message.contains(
794                "(e.g. playground/cli, playground/lib-types, playground/minify, and 2 more)"
795            ),
796            "three sorted examples + tail count: {message}"
797        );
798        assert!(
799            message.ends_with(
800                "Add a package.json, narrow the pattern, or add them to ignorePatterns."
801            ),
802            "next-step hint preserved: {message}"
803        );
804        assert!(
805            !message.contains("playground/ssr"),
806            "tail example not named: {message}"
807        );
808    }
809
810    #[test]
811    fn build_glob_group_message_no_tail_when_at_or_below_cap() {
812        let root = Path::new("/project");
813        let paths = [root.join("packages/a"), root.join("packages/b")];
814        let refs: Vec<&Path> = paths.iter().map(PathBuf::as_path).collect();
815        let message = build_glob_group_message(root, "packages/*", &refs);
816
817        assert!(message.contains("matched 2 directories"), "{message}");
818        assert!(
819            message.contains("(e.g. packages/a, packages/b)"),
820            "both examples named, no `and N more`: {message}"
821        );
822        assert!(!message.contains("more)"), "no tail clause: {message}");
823    }
824
825    #[test]
826    fn plan_warnings_aggregates_repeated_glob_diagnostics_to_one_line() {
827        let root = Path::new("/project");
828        let diagnostics: Vec<WorkspaceDiagnostic> = (0..50)
829            .map(|i| glob_diag(root, "playground/**", &format!("playground/p{i}")))
830            .collect();
831
832        let plans = plan_warnings(root, &diagnostics);
833
834        assert_eq!(
835            plans.len(),
836            1,
837            "50 same-pattern diagnostics collapse to one plan"
838        );
839        assert!(
840            plans[0]
841                .dedupe_key
842                .ends_with("::glob-matched-no-package-json-agg::playground/**")
843        );
844        assert!(plans[0].message.contains("matched 50 directories"));
845    }
846
847    #[test]
848    fn plan_warnings_keeps_distinct_patterns_separate() {
849        let root = Path::new("/project");
850        let diagnostics = vec![
851            glob_diag(root, "apps/*", "apps/a"),
852            glob_diag(root, "apps/*", "apps/b"),
853            glob_diag(root, "packages/*", "packages/x"),
854            glob_diag(root, "packages/*", "packages/y"),
855        ];
856
857        let plans = plan_warnings(root, &diagnostics);
858
859        assert_eq!(plans.len(), 2, "one aggregated plan per distinct pattern");
860        let messages: Vec<&str> = plans.iter().map(|p| p.message.as_str()).collect();
861        assert!(
862            messages
863                .iter()
864                .any(|m| m.contains("Glob 'apps/*' matched 2")),
865            "{messages:?}"
866        );
867        assert!(
868            messages
869                .iter()
870                .any(|m| m.contains("Glob 'packages/*' matched 2")),
871            "{messages:?}"
872        );
873    }
874
875    #[test]
876    fn plan_warnings_single_match_keeps_per_instance_message_and_key() {
877        let root = Path::new("/project");
878        let diag = glob_diag(root, "packages/*", "packages/scratch");
879
880        let plans = plan_warnings(root, std::slice::from_ref(&diag));
881
882        assert_eq!(plans.len(), 1);
883        assert_eq!(plans[0].message, diag.message);
884        assert!(
885            plans[0]
886                .dedupe_key
887                .contains("::glob-matched-no-package-json::")
888                && plans[0].dedupe_key.ends_with("packages/scratch"),
889            "per-instance key is `root::kind::path`, not the `-agg::pattern` form: {}",
890            plans[0].dedupe_key
891        );
892        assert!(
893            !plans[0].message.contains("directories"),
894            "single match is not aggregated"
895        );
896    }
897
898    #[test]
899    fn plan_warnings_non_glob_kinds_stay_per_instance() {
900        let root = Path::new("/project");
901        let diagnostics = vec![
902            WorkspaceDiagnostic::new(
903                root,
904                root.join("packages/a"),
905                WorkspaceDiagnosticKind::UndeclaredWorkspace,
906            ),
907            WorkspaceDiagnostic::new(
908                root,
909                root.join("packages/b"),
910                WorkspaceDiagnosticKind::MalformedPackageJson {
911                    error: "trailing comma".to_owned(),
912                },
913            ),
914        ];
915
916        let plans = plan_warnings(root, &diagnostics);
917
918        assert_eq!(
919            plans.len(),
920            2,
921            "each non-glob diagnostic plans its own warning"
922        );
923        assert!(
924            plans
925                .iter()
926                .all(|p| !p.message.contains("directories with no package.json"))
927        );
928    }
929
930    fn tsconfig_ref_diag(root: &Path, rel_path: &str) -> WorkspaceDiagnostic {
931        WorkspaceDiagnostic::new(
932            root,
933            root.join(rel_path),
934            WorkspaceDiagnosticKind::TsconfigReferenceDirMissing,
935        )
936    }
937
938    #[test]
939    fn plan_warnings_aggregates_repeated_tsconfig_ref_misses_to_one_line() {
940        let root = Path::new("/project");
941        let diagnostics: Vec<WorkspaceDiagnostic> = (0..30)
942            .map(|i| tsconfig_ref_diag(root, &format!("packages/p{i:02}/tsconfig.json")))
943            .collect();
944
945        let plans = plan_warnings(root, &diagnostics);
946
947        assert_eq!(plans.len(), 1, "30 missing references collapse to one plan");
948        assert!(
949            plans[0]
950                .dedupe_key
951                .ends_with("::tsconfig-reference-dir-missing-agg")
952        );
953        assert!(
954            plans[0]
955                .message
956                .starts_with("tsconfig.json references 30 directories that do not exist"),
957            "{}",
958            plans[0].message
959        );
960        assert!(
961            plans[0].message.contains(
962                "(e.g. packages/p00/tsconfig.json, packages/p01/tsconfig.json, \
963                 packages/p02/tsconfig.json, and 27 more)"
964            ),
965            "three sorted examples + tail: {}",
966            plans[0].message
967        );
968        assert!(
969            plans[0]
970                .message
971                .ends_with("Update or remove the references, or restore the missing directories."),
972            "{}",
973            plans[0].message
974        );
975    }
976
977    #[test]
978    fn plan_warnings_single_tsconfig_ref_miss_keeps_per_instance_message() {
979        let root = Path::new("/project");
980        let diag = tsconfig_ref_diag(root, "packages/only/tsconfig.json");
981
982        let plans = plan_warnings(root, std::slice::from_ref(&diag));
983
984        assert_eq!(plans.len(), 1);
985        assert_eq!(
986            plans[0].message, diag.message,
987            "single miss is not aggregated"
988        );
989        assert!(!plans[0].message.contains("directories that do not exist"));
990    }
991
992    #[test]
993    fn plan_warnings_mixed_aggregatable_kinds_each_collapse_independently() {
994        let root = Path::new("/project");
995        let mut diagnostics: Vec<WorkspaceDiagnostic> = (0..5)
996            .map(|i| glob_diag(root, "packages/*", &format!("packages/g{i}")))
997            .collect();
998        diagnostics.extend(
999            (0..4).map(|i| tsconfig_ref_diag(root, &format!("packages/t{i}/tsconfig.json"))),
1000        );
1001
1002        let plans = plan_warnings(root, &diagnostics);
1003
1004        assert_eq!(plans.len(), 2, "one glob summary + one tsconfig summary");
1005        assert!(
1006            plans
1007                .iter()
1008                .any(|p| p.message.contains("matched 5 directories"))
1009        );
1010        assert!(
1011            plans
1012                .iter()
1013                .any(|p| p.message.contains("references 4 directories"))
1014        );
1015    }
1016}