Skip to main content

fallow_engine/
discover.rs

1//! Discovery helpers and types exposed through the engine boundary.
2
3use std::ffi::OsStr;
4use std::path::{Path, PathBuf};
5use std::time::Instant;
6
7use fallow_config::{
8    PackageJson, ResolvedConfig, WorkspaceDiagnostic, WorkspaceInfo, discover_workspaces,
9    find_undeclared_workspaces_with_ignores,
10};
11pub use fallow_types::discover::{DiscoveredFile, EntryPoint, EntryPointSource, FileId};
12use rustc_hash::FxHashSet;
13
14use crate::core_backend;
15
16const UNDECLARED_WORKSPACE_WARNING_PREVIEW: usize = 5;
17
18pub const SOURCE_EXTENSIONS: &[&str] = &[
19    "ts", "tsx", "mts", "cts", "gts", "js", "jsx", "mjs", "cjs", "gjs", "vue", "svelte", "astro",
20    "mdx", "css", "scss", "sass", "less", "html", "graphql", "gql",
21];
22
23/// Glob patterns for test/dev/story files excluded in production mode.
24pub const PRODUCTION_EXCLUDE_PATTERNS: &[&str] = &[
25    "**/*.test.*",
26    "**/*.spec.*",
27    "**/*.e2e.*",
28    "**/*.e2e-spec.*",
29    "**/*.bench.*",
30    "**/*.fixture.*",
31    "**/*.stories.*",
32    "**/*.story.*",
33    "**/__tests__/**",
34    "**/__mocks__/**",
35    "**/__snapshots__/**",
36    "**/__fixtures__/**",
37    "**/test/**",
38    "**/tests/**",
39    "*.config.*",
40    "**/.*.js",
41    "**/.*.ts",
42    "**/.*.mjs",
43    "**/.*.cjs",
44];
45
46/// Entry points grouped by reachability role.
47#[derive(Debug, Clone, Default)]
48pub struct CategorizedEntryPoints {
49    pub all: Vec<EntryPoint>,
50    pub runtime: Vec<EntryPoint>,
51    pub test: Vec<EntryPoint>,
52}
53
54impl CategorizedEntryPoints {
55    #[must_use]
56    pub fn dedup(mut self) -> Self {
57        dedup_entry_paths(&mut self.all);
58        dedup_entry_paths(&mut self.runtime);
59        dedup_entry_paths(&mut self.test);
60        self
61    }
62}
63
64fn dedup_entry_paths(entries: &mut Vec<EntryPoint>) {
65    entries.sort_by(|a, b| a.path.cmp(&b.path));
66    entries.dedup_by(|a, b| a.path == b.path);
67}
68
69/// Package-scoped hidden directories that source discovery should traverse.
70#[derive(Debug, Clone, PartialEq, Eq)]
71pub struct HiddenDirScope {
72    root: PathBuf,
73    dirs: Vec<String>,
74}
75
76impl HiddenDirScope {
77    #[must_use]
78    pub const fn new(root: PathBuf, dirs: Vec<String>) -> Self {
79        Self { root, dirs }
80    }
81
82    #[must_use]
83    pub fn root(&self) -> &Path {
84        &self.root
85    }
86
87    #[must_use]
88    pub fn dirs(&self) -> &[String] {
89        &self.dirs
90    }
91}
92
93/// Reusable engine discovery prelude for one resolved project.
94#[derive(Debug, Clone)]
95pub struct AnalysisDiscovery {
96    inner: core_backend::BackendAnalysisDiscovery,
97}
98
99impl AnalysisDiscovery {
100    pub(crate) const fn as_backend(&self) -> &core_backend::BackendAnalysisDiscovery {
101        &self.inner
102    }
103
104    fn from_parts(
105        files: Vec<DiscoveredFile>,
106        workspaces: Vec<WorkspaceInfo>,
107        root_pkg: Option<PackageJson>,
108        config_candidates: Vec<PathBuf>,
109        discover_ms: f64,
110        workspaces_ms: f64,
111    ) -> Self {
112        Self {
113            inner: core_backend::BackendAnalysisDiscovery::from_parts(
114                files,
115                workspaces,
116                root_pkg,
117                config_candidates,
118                discover_ms,
119                workspaces_ms,
120            ),
121        }
122    }
123
124    /// Discovered source files, indexed by stable `FileId` for this session.
125    #[must_use]
126    pub fn files(&self) -> &[DiscoveredFile] {
127        self.inner.files()
128    }
129
130    /// Discovered workspace packages for this session.
131    #[must_use]
132    pub fn workspaces(&self) -> &[WorkspaceInfo] {
133        self.inner.workspaces()
134    }
135
136    /// Consume this discovery prelude and return its source file registry.
137    #[must_use]
138    pub fn into_files(self) -> Vec<DiscoveredFile> {
139        self.inner.into_files()
140    }
141}
142
143/// Run engine-owned workspace and source discovery for a resolved project.
144#[must_use]
145pub fn prepare_analysis_discovery(config: &ResolvedConfig) -> AnalysisDiscovery {
146    warn_missing_node_modules(config);
147
148    let workspaces_start = Instant::now();
149    let workspaces = discover_workspaces(&config.root);
150    let workspaces_ms = workspaces_start.elapsed().as_secs_f64() * 1000.0;
151    if !workspaces.is_empty() {
152        tracing::info!(count = workspaces.len(), "workspaces discovered");
153    }
154    warn_undeclared_workspaces(
155        &config.root,
156        &workspaces,
157        &config.ignore_patterns,
158        config.quiet,
159    );
160
161    let root_pkg = PackageJson::load(&config.root.join("package.json")).ok();
162    let hidden_dir_scopes = collect_hidden_dir_scopes(config, root_pkg.as_ref(), &workspaces);
163
164    let discover_start = Instant::now();
165    let (files, config_candidates) =
166        discover_files_and_config_candidates(config, &hidden_dir_scopes);
167    let discover_ms = discover_start.elapsed().as_secs_f64() * 1000.0;
168
169    AnalysisDiscovery::from_parts(
170        files,
171        workspaces,
172        root_pkg,
173        config_candidates,
174        discover_ms,
175        workspaces_ms,
176    )
177}
178
179/// Run source discovery with workspace metadata already resolved by config load.
180///
181/// This is the normal [`AnalysisSession`](crate::session::AnalysisSession) path:
182/// config loading already expanded workspace globs and collected diagnostics, so
183/// source discovery can reuse that set instead of walking workspace manifests a
184/// second time.
185#[must_use]
186pub fn prepare_analysis_discovery_with_workspaces(
187    config: &ResolvedConfig,
188    workspaces: &[WorkspaceInfo],
189    workspaces_ms: f64,
190) -> AnalysisDiscovery {
191    warn_missing_node_modules(config);
192
193    if !workspaces.is_empty() {
194        tracing::info!(count = workspaces.len(), "workspaces discovered");
195    }
196
197    let root_pkg = PackageJson::load(&config.root.join("package.json")).ok();
198    let hidden_dir_scopes = collect_hidden_dir_scopes(config, root_pkg.as_ref(), workspaces);
199
200    let discover_start = Instant::now();
201    let (files, config_candidates) =
202        discover_files_and_config_candidates(config, &hidden_dir_scopes);
203    let discover_ms = discover_start.elapsed().as_secs_f64() * 1000.0;
204
205    AnalysisDiscovery::from_parts(
206        files,
207        workspaces.to_vec(),
208        root_pkg,
209        config_candidates,
210        discover_ms,
211        workspaces_ms,
212    )
213}
214
215fn warn_missing_node_modules(config: &ResolvedConfig) {
216    if config.root.join("node_modules").is_dir() {
217        return;
218    }
219
220    tracing::warn!(
221        "node_modules directory not found. Run `npm install` / `pnpm install` first for accurate results."
222    );
223}
224
225fn format_undeclared_workspace_warning(
226    root: &Path,
227    undeclared: &[WorkspaceDiagnostic],
228) -> Option<String> {
229    if undeclared.is_empty() {
230        return None;
231    }
232
233    let preview = undeclared
234        .iter()
235        .take(UNDECLARED_WORKSPACE_WARNING_PREVIEW)
236        .map(|diagnostic| {
237            diagnostic
238                .path
239                .strip_prefix(root)
240                .unwrap_or(&diagnostic.path)
241                .display()
242                .to_string()
243                .replace('\\', "/")
244        })
245        .collect::<Vec<_>>();
246    let remaining = undeclared
247        .len()
248        .saturating_sub(UNDECLARED_WORKSPACE_WARNING_PREVIEW);
249    let tail = if remaining > 0 {
250        format!(" (and {remaining} more)")
251    } else {
252        String::new()
253    };
254    let noun = if undeclared.len() == 1 {
255        "directory with package.json is"
256    } else {
257        "directories with package.json are"
258    };
259    let guidance = if undeclared.len() == 1 {
260        "Add that path to package.json workspaces or pnpm-workspace.yaml if it should be analyzed as a workspace."
261    } else {
262        "Add those paths to package.json workspaces or pnpm-workspace.yaml if they should be analyzed as workspaces."
263    };
264
265    Some(format!(
266        "{} {} not declared as {}: {}{}. {}",
267        undeclared.len(),
268        noun,
269        if undeclared.len() == 1 {
270            "a workspace"
271        } else {
272            "workspaces"
273        },
274        preview.join(", "),
275        tail,
276        guidance
277    ))
278}
279
280fn warn_undeclared_workspaces(
281    root: &Path,
282    workspaces: &[WorkspaceInfo],
283    ignore_patterns: &globset::GlobSet,
284    quiet: bool,
285) {
286    let undeclared = find_undeclared_workspaces_with_ignores(root, workspaces, ignore_patterns);
287    if undeclared.is_empty() {
288        return;
289    }
290
291    let existing = fallow_config::workspace_diagnostics_for(root);
292    let already_flagged: FxHashSet<PathBuf> = existing
293        .iter()
294        .map(|diagnostic| {
295            dunce::canonicalize(&diagnostic.path).unwrap_or_else(|_| diagnostic.path.clone())
296        })
297        .collect();
298    let undeclared: Vec<_> = undeclared
299        .into_iter()
300        .filter(|diagnostic| {
301            let canonical =
302                dunce::canonicalize(&diagnostic.path).unwrap_or_else(|_| diagnostic.path.clone());
303            !already_flagged.contains(&canonical)
304        })
305        .collect();
306    if undeclared.is_empty() {
307        return;
308    }
309
310    fallow_config::append_workspace_diagnostics(root, undeclared.clone());
311
312    if !quiet && let Some(message) = format_undeclared_workspace_warning(root, &undeclared) {
313        tracing::warn!("{message}");
314    }
315}
316
317/// Check if a hidden directory name is on the discovery allowlist.
318#[must_use]
319pub fn is_allowed_hidden_dir(name: &OsStr) -> bool {
320    core_backend::is_allowed_hidden_dir(name)
321}
322
323/// Collect plugin-derived hidden directory scopes.
324#[must_use]
325pub fn collect_plugin_hidden_dir_scopes(
326    config: &ResolvedConfig,
327    root_pkg: Option<&PackageJson>,
328    workspaces: &[WorkspaceInfo],
329) -> Vec<HiddenDirScope> {
330    core_backend::collect_plugin_hidden_dir_scopes(config, root_pkg, workspaces)
331}
332
333/// Collect plugin and script-derived hidden directory scopes.
334#[must_use]
335pub fn collect_hidden_dir_scopes(
336    config: &ResolvedConfig,
337    root_pkg: Option<&PackageJson>,
338    workspaces: &[WorkspaceInfo],
339) -> Vec<HiddenDirScope> {
340    core_backend::collect_hidden_dir_scopes(config, root_pkg, workspaces)
341}
342
343/// Discover source files and non-source config candidates in one traversal.
344#[must_use]
345pub fn discover_files_and_config_candidates(
346    config: &ResolvedConfig,
347    additional_hidden_dir_scopes: &[HiddenDirScope],
348) -> (Vec<DiscoveredFile>, Vec<PathBuf>) {
349    core_backend::discover_files_and_config_candidates(config, additional_hidden_dir_scopes)
350}
351
352/// Discover configured and inferred entry points.
353#[must_use]
354pub fn discover_entry_points(config: &ResolvedConfig, files: &[DiscoveredFile]) -> Vec<EntryPoint> {
355    core_backend::discover_entry_points(config, files)
356}
357
358/// Discover entry points for a workspace package.
359#[must_use]
360pub fn discover_workspace_entry_points(
361    ws_root: &Path,
362    config: &ResolvedConfig,
363    all_files: &[DiscoveredFile],
364) -> Vec<EntryPoint> {
365    core_backend::discover_workspace_entry_points(ws_root, config, all_files)
366}
367
368/// Discover entry points from plugin results.
369#[must_use]
370pub fn discover_plugin_entry_points(
371    plugin_result: &crate::plugins::AggregatedPluginResult,
372    config: &ResolvedConfig,
373    files: &[DiscoveredFile],
374) -> Vec<EntryPoint> {
375    core_backend::discover_plugin_entry_points(plugin_result.as_backend(), config, files)
376}
377
378#[cfg(test)]
379mod tests {
380    use std::path::PathBuf;
381
382    use super::{CategorizedEntryPoints, EntryPoint, EntryPointSource, HiddenDirScope};
383
384    #[test]
385    fn hidden_dir_scope_exposes_root_and_dirs() {
386        let scope = HiddenDirScope::new(PathBuf::from("/repo/packages/app"), vec![".next".into()]);
387
388        assert_eq!(scope.root(), PathBuf::from("/repo/packages/app"));
389        assert_eq!(scope.dirs(), [".next"]);
390    }
391
392    #[test]
393    fn categorized_entry_points_dedups_each_bucket() {
394        let entry = EntryPoint {
395            path: PathBuf::from("/repo/src/index.ts"),
396            source: EntryPointSource::DefaultIndex,
397        };
398        let engine = CategorizedEntryPoints {
399            all: vec![entry.clone(), entry.clone()],
400            runtime: vec![entry.clone(), entry.clone()],
401            test: Vec::new(),
402        }
403        .dedup();
404
405        assert_eq!(engine.all.len(), 1);
406        assert_eq!(engine.runtime.len(), 1);
407        assert_eq!(engine.test.len(), 0);
408        assert_eq!(engine.all[0].path, entry.path);
409    }
410}