Skip to main content

fallow_engine/
discover.rs

1//! Discovery helpers and types exposed through the engine boundary.
2
3use std::ffi::OsStr;
4use std::path::{Path, PathBuf};
5use std::time::Instant;
6
7use fallow_config::{
8    PackageJson, ResolvedConfig, WorkspaceDiagnostic, WorkspaceInfo, discover_workspaces,
9    find_undeclared_workspaces_with_ignores,
10};
11pub use fallow_types::discover::{DiscoveredFile, EntryPoint, EntryPointSource, FileId};
12use rustc_hash::FxHashSet;
13
14use crate::{EngineError, EngineResult, core_backend};
15
16const UNDECLARED_WORKSPACE_WARNING_PREVIEW: usize = 5;
17
18pub const SOURCE_EXTENSIONS: &[&str] = &[
19    "ts", "tsx", "mts", "cts", "gts", "js", "jsx", "mjs", "cjs", "gjs", "vue", "svelte", "astro",
20    "mdx", "css", "scss", "sass", "less", "html", "graphql", "gql",
21];
22
23/// Glob patterns for test/dev/story files excluded in production mode.
24pub const PRODUCTION_EXCLUDE_PATTERNS: &[&str] = &[
25    "**/*.test.*",
26    "**/*.spec.*",
27    "**/*.e2e.*",
28    "**/*.e2e-spec.*",
29    "**/*.bench.*",
30    "**/*.fixture.*",
31    "**/*.stories.*",
32    "**/*.story.*",
33    "**/__tests__/**",
34    "**/__mocks__/**",
35    "**/__snapshots__/**",
36    "**/__fixtures__/**",
37    "**/test/**",
38    "**/tests/**",
39    "*.config.*",
40    "**/.*.js",
41    "**/.*.ts",
42    "**/.*.mjs",
43    "**/.*.cjs",
44];
45
46/// Discover workspace packages through the engine boundary.
47///
48/// Use this for callers that only need workspace metadata and do not yet own an
49/// `AnalysisSession`. Session-backed flows should prefer
50/// [`AnalysisSession::workspaces`](crate::session::AnalysisSession::workspaces)
51/// so discovery is reused with the rest of the analysis context.
52#[must_use]
53pub fn discover_workspace_packages(root: &Path) -> Vec<WorkspaceInfo> {
54    discover_workspaces(root)
55}
56
57/// Discover workspace packages and diagnostics through the engine boundary.
58///
59/// This is for CLI/API surfaces that need to render workspace diagnostics but
60/// do not otherwise need a full [`AnalysisSession`](crate::session::AnalysisSession).
61///
62/// # Errors
63///
64/// Returns an engine error when workspace manifest loading fails.
65pub fn discover_workspace_packages_with_diagnostics(
66    root: &Path,
67    ignore_patterns: &globset::GlobSet,
68) -> EngineResult<(Vec<WorkspaceInfo>, Vec<WorkspaceDiagnostic>)> {
69    fallow_config::discover_workspaces_with_diagnostics(root, ignore_patterns)
70        .map_err(|err| EngineError::new(err.to_string()))
71}
72
73/// Entry points grouped by reachability role.
74#[derive(Debug, Clone, Default)]
75pub struct CategorizedEntryPoints {
76    pub all: Vec<EntryPoint>,
77    pub runtime: Vec<EntryPoint>,
78    pub test: Vec<EntryPoint>,
79}
80
81impl CategorizedEntryPoints {
82    #[must_use]
83    pub fn dedup(mut self) -> Self {
84        dedup_entry_paths(&mut self.all);
85        dedup_entry_paths(&mut self.runtime);
86        dedup_entry_paths(&mut self.test);
87        self
88    }
89}
90
91fn dedup_entry_paths(entries: &mut Vec<EntryPoint>) {
92    entries.sort_by(|a, b| a.path.cmp(&b.path));
93    entries.dedup_by(|a, b| a.path == b.path);
94}
95
96/// Package-scoped hidden directories that source discovery should traverse.
97#[derive(Debug, Clone, PartialEq, Eq)]
98pub struct HiddenDirScope {
99    root: PathBuf,
100    dirs: Vec<String>,
101}
102
103impl HiddenDirScope {
104    #[must_use]
105    pub const fn new(root: PathBuf, dirs: Vec<String>) -> Self {
106        Self { root, dirs }
107    }
108
109    #[must_use]
110    pub fn root(&self) -> &Path {
111        &self.root
112    }
113
114    #[must_use]
115    pub fn dirs(&self) -> &[String] {
116        &self.dirs
117    }
118}
119
120/// Reusable engine discovery prelude for one resolved project.
121#[derive(Debug, Clone)]
122pub struct AnalysisDiscovery {
123    inner: core_backend::BackendAnalysisDiscovery,
124}
125
126impl AnalysisDiscovery {
127    pub(crate) const fn as_backend(&self) -> &core_backend::BackendAnalysisDiscovery {
128        &self.inner
129    }
130
131    fn from_parts(
132        files: Vec<DiscoveredFile>,
133        workspaces: Vec<WorkspaceInfo>,
134        root_pkg: Option<PackageJson>,
135        config_candidates: Vec<PathBuf>,
136        discover_ms: f64,
137        workspaces_ms: f64,
138    ) -> Self {
139        Self {
140            inner: core_backend::BackendAnalysisDiscovery::from_parts(
141                files,
142                workspaces,
143                root_pkg,
144                config_candidates,
145                discover_ms,
146                workspaces_ms,
147            ),
148        }
149    }
150
151    /// Discovered source files, indexed by stable `FileId` for this session.
152    #[must_use]
153    pub fn files(&self) -> &[DiscoveredFile] {
154        self.inner.files()
155    }
156
157    /// Discovered workspace packages for this session.
158    #[must_use]
159    pub fn workspaces(&self) -> &[WorkspaceInfo] {
160        self.inner.workspaces()
161    }
162
163    /// Consume this discovery prelude and return its source file registry.
164    #[must_use]
165    pub fn into_files(self) -> Vec<DiscoveredFile> {
166        self.inner.into_files()
167    }
168}
169
170/// Run engine-owned workspace and source discovery for a resolved project.
171#[must_use]
172pub fn prepare_analysis_discovery(config: &ResolvedConfig) -> AnalysisDiscovery {
173    warn_missing_node_modules(config);
174
175    let workspaces_start = Instant::now();
176    let workspaces = discover_workspaces(&config.root);
177    let workspaces_ms = workspaces_start.elapsed().as_secs_f64() * 1000.0;
178    if !workspaces.is_empty() {
179        tracing::info!(count = workspaces.len(), "workspaces discovered");
180    }
181    warn_undeclared_workspaces(
182        &config.root,
183        &workspaces,
184        &config.ignore_patterns,
185        config.quiet,
186    );
187
188    let root_pkg = PackageJson::load(&config.root.join("package.json")).ok();
189    let hidden_dir_scopes = collect_hidden_dir_scopes(config, root_pkg.as_ref(), &workspaces);
190
191    let discover_start = Instant::now();
192    let (files, config_candidates) =
193        discover_files_and_config_candidates(config, &hidden_dir_scopes);
194    let discover_ms = discover_start.elapsed().as_secs_f64() * 1000.0;
195
196    AnalysisDiscovery::from_parts(
197        files,
198        workspaces,
199        root_pkg,
200        config_candidates,
201        discover_ms,
202        workspaces_ms,
203    )
204}
205
206/// Run source discovery with workspace metadata already resolved by config load.
207///
208/// This is the normal [`AnalysisSession`](crate::session::AnalysisSession) path:
209/// config loading already expanded workspace globs and collected diagnostics, so
210/// source discovery can reuse that set instead of walking workspace manifests a
211/// second time.
212#[must_use]
213pub fn prepare_analysis_discovery_with_workspaces(
214    config: &ResolvedConfig,
215    workspaces: &[WorkspaceInfo],
216    workspaces_ms: f64,
217) -> AnalysisDiscovery {
218    warn_missing_node_modules(config);
219
220    if !workspaces.is_empty() {
221        tracing::info!(count = workspaces.len(), "workspaces discovered");
222    }
223
224    let root_pkg = PackageJson::load(&config.root.join("package.json")).ok();
225    let hidden_dir_scopes = collect_hidden_dir_scopes(config, root_pkg.as_ref(), workspaces);
226
227    let discover_start = Instant::now();
228    let (files, config_candidates) =
229        discover_files_and_config_candidates(config, &hidden_dir_scopes);
230    let discover_ms = discover_start.elapsed().as_secs_f64() * 1000.0;
231
232    AnalysisDiscovery::from_parts(
233        files,
234        workspaces.to_vec(),
235        root_pkg,
236        config_candidates,
237        discover_ms,
238        workspaces_ms,
239    )
240}
241
242fn warn_missing_node_modules(config: &ResolvedConfig) {
243    if config.root.join("node_modules").is_dir() {
244        return;
245    }
246
247    tracing::warn!(
248        "node_modules directory not found. Run `npm install` / `pnpm install` first for accurate results."
249    );
250}
251
252fn format_undeclared_workspace_warning(
253    root: &Path,
254    undeclared: &[WorkspaceDiagnostic],
255) -> Option<String> {
256    if undeclared.is_empty() {
257        return None;
258    }
259
260    let preview = undeclared
261        .iter()
262        .take(UNDECLARED_WORKSPACE_WARNING_PREVIEW)
263        .map(|diagnostic| {
264            diagnostic
265                .path
266                .strip_prefix(root)
267                .unwrap_or(&diagnostic.path)
268                .display()
269                .to_string()
270                .replace('\\', "/")
271        })
272        .collect::<Vec<_>>();
273    let remaining = undeclared
274        .len()
275        .saturating_sub(UNDECLARED_WORKSPACE_WARNING_PREVIEW);
276    let tail = if remaining > 0 {
277        format!(" (and {remaining} more)")
278    } else {
279        String::new()
280    };
281    let noun = if undeclared.len() == 1 {
282        "directory with package.json is"
283    } else {
284        "directories with package.json are"
285    };
286    let guidance = if undeclared.len() == 1 {
287        "Add that path to package.json workspaces or pnpm-workspace.yaml if it should be analyzed as a workspace."
288    } else {
289        "Add those paths to package.json workspaces or pnpm-workspace.yaml if they should be analyzed as workspaces."
290    };
291
292    Some(format!(
293        "{} {} not declared as {}: {}{}. {}",
294        undeclared.len(),
295        noun,
296        if undeclared.len() == 1 {
297            "a workspace"
298        } else {
299            "workspaces"
300        },
301        preview.join(", "),
302        tail,
303        guidance
304    ))
305}
306
307fn warn_undeclared_workspaces(
308    root: &Path,
309    workspaces: &[WorkspaceInfo],
310    ignore_patterns: &globset::GlobSet,
311    quiet: bool,
312) {
313    let undeclared = find_undeclared_workspaces_with_ignores(root, workspaces, ignore_patterns);
314    if undeclared.is_empty() {
315        return;
316    }
317
318    let existing = fallow_config::workspace_diagnostics_for(root);
319    let already_flagged: FxHashSet<PathBuf> = existing
320        .iter()
321        .map(|diagnostic| {
322            dunce::canonicalize(&diagnostic.path).unwrap_or_else(|_| diagnostic.path.clone())
323        })
324        .collect();
325    let undeclared: Vec<_> = undeclared
326        .into_iter()
327        .filter(|diagnostic| {
328            let canonical =
329                dunce::canonicalize(&diagnostic.path).unwrap_or_else(|_| diagnostic.path.clone());
330            !already_flagged.contains(&canonical)
331        })
332        .collect();
333    if undeclared.is_empty() {
334        return;
335    }
336
337    fallow_config::append_workspace_diagnostics(root, undeclared.clone());
338
339    if !quiet && let Some(message) = format_undeclared_workspace_warning(root, &undeclared) {
340        tracing::warn!("{message}");
341    }
342}
343
344/// Check if a hidden directory name is on the discovery allowlist.
345#[must_use]
346pub fn is_allowed_hidden_dir(name: &OsStr) -> bool {
347    core_backend::is_allowed_hidden_dir(name)
348}
349
350/// Collect plugin-derived hidden directory scopes.
351#[must_use]
352pub fn collect_plugin_hidden_dir_scopes(
353    config: &ResolvedConfig,
354    root_pkg: Option<&PackageJson>,
355    workspaces: &[WorkspaceInfo],
356) -> Vec<HiddenDirScope> {
357    core_backend::collect_plugin_hidden_dir_scopes(config, root_pkg, workspaces)
358}
359
360/// Collect plugin and script-derived hidden directory scopes.
361#[must_use]
362pub fn collect_hidden_dir_scopes(
363    config: &ResolvedConfig,
364    root_pkg: Option<&PackageJson>,
365    workspaces: &[WorkspaceInfo],
366) -> Vec<HiddenDirScope> {
367    core_backend::collect_hidden_dir_scopes(config, root_pkg, workspaces)
368}
369
370/// Discover source files and non-source config candidates in one traversal.
371#[must_use]
372pub fn discover_files_and_config_candidates(
373    config: &ResolvedConfig,
374    additional_hidden_dir_scopes: &[HiddenDirScope],
375) -> (Vec<DiscoveredFile>, Vec<PathBuf>) {
376    core_backend::discover_files_and_config_candidates(config, additional_hidden_dir_scopes)
377}
378
379/// Discover configured and inferred entry points.
380#[must_use]
381pub fn discover_entry_points(config: &ResolvedConfig, files: &[DiscoveredFile]) -> Vec<EntryPoint> {
382    core_backend::discover_entry_points(config, files)
383}
384
385/// Discover entry points for a workspace package.
386#[must_use]
387pub fn discover_workspace_entry_points(
388    ws_root: &Path,
389    config: &ResolvedConfig,
390    all_files: &[DiscoveredFile],
391) -> Vec<EntryPoint> {
392    core_backend::discover_workspace_entry_points(ws_root, config, all_files)
393}
394
395/// Discover entry points from plugin results.
396#[must_use]
397pub fn discover_plugin_entry_points(
398    plugin_result: &crate::plugins::AggregatedPluginResult,
399    config: &ResolvedConfig,
400    files: &[DiscoveredFile],
401) -> Vec<EntryPoint> {
402    core_backend::discover_plugin_entry_points(plugin_result.as_backend(), config, files)
403}
404
405#[cfg(test)]
406mod tests {
407    use std::path::PathBuf;
408
409    use super::{CategorizedEntryPoints, EntryPoint, EntryPointSource, HiddenDirScope};
410
411    #[test]
412    fn hidden_dir_scope_exposes_root_and_dirs() {
413        let scope = HiddenDirScope::new(PathBuf::from("/repo/packages/app"), vec![".next".into()]);
414
415        assert_eq!(scope.root(), PathBuf::from("/repo/packages/app"));
416        assert_eq!(scope.dirs(), [".next"]);
417    }
418
419    #[test]
420    fn categorized_entry_points_dedups_each_bucket() {
421        let entry = EntryPoint {
422            path: PathBuf::from("/repo/src/index.ts"),
423            source: EntryPointSource::DefaultIndex,
424        };
425        let engine = CategorizedEntryPoints {
426            all: vec![entry.clone(), entry.clone()],
427            runtime: vec![entry.clone(), entry.clone()],
428            test: Vec::new(),
429        }
430        .dedup();
431
432        assert_eq!(engine.all.len(), 1);
433        assert_eq!(engine.runtime.len(), 1);
434        assert_eq!(engine.test.len(), 0);
435        assert_eq!(engine.all[0].path, entry.path);
436    }
437}