Skip to main content

fallow_core/discover/
entry_points.rs

1use std::path::{Path, PathBuf};
2
3use fallow_config::{PackageJson, ResolvedConfig};
4use fallow_types::discover::{DiscoveredFile, EntryPoint, EntryPointSource};
5
6use super::parse_scripts::extract_script_file_refs;
7use super::walk::SOURCE_EXTENSIONS;
8
9/// Known output directory names from exports maps.
10/// When an entry point path is inside one of these directories, we also try
11/// the `src/` equivalent to find the tracked source file.
12const OUTPUT_DIRS: &[&str] = &["dist", "build", "out", "esm", "cjs"];
13
14/// Resolve a path relative to a base directory, with security check and extension fallback.
15///
16/// Returns `Some(EntryPoint)` if the path resolves to an existing file within `canonical_root`,
17/// trying source extensions as fallback when the exact path doesn't exist.
18/// Also handles exports map targets in output directories (e.g., `./dist/utils.js`)
19/// by trying to map back to the source file (e.g., `./src/utils.ts`).
20pub fn resolve_entry_path(
21    base: &Path,
22    entry: &str,
23    canonical_root: &Path,
24    source: EntryPointSource,
25) -> Option<EntryPoint> {
26    let resolved = base.join(entry);
27    // Security: ensure resolved path stays within the allowed root
28    let canonical_resolved = resolved.canonicalize().unwrap_or_else(|_| resolved.clone());
29    if !canonical_resolved.starts_with(canonical_root) {
30        tracing::warn!(path = %entry, "Skipping entry point outside project root");
31        return None;
32    }
33
34    // If the path is in an output directory (dist/, build/, etc.), try mapping to src/ first.
35    // This handles exports map targets like `./dist/utils.js` → `./src/utils.ts`.
36    // We check this BEFORE the exists() check because even if the dist file exists,
37    // fallow ignores dist/ by default, so we need the source file instead.
38    if let Some(source_path) = try_output_to_source_path(base, entry) {
39        // Security: ensure the mapped source path stays within the project root
40        if let Ok(canonical_source) = source_path.canonicalize()
41            && canonical_source.starts_with(canonical_root)
42        {
43            return Some(EntryPoint {
44                path: source_path,
45                source,
46            });
47        }
48    }
49
50    if resolved.exists() {
51        return Some(EntryPoint {
52            path: resolved,
53            source,
54        });
55    }
56    // Try with source extensions
57    for ext in SOURCE_EXTENSIONS {
58        let with_ext = resolved.with_extension(ext);
59        if with_ext.exists() {
60            return Some(EntryPoint {
61                path: with_ext,
62                source,
63            });
64        }
65    }
66    None
67}
68
69/// Try to map an entry path from an output directory to its source equivalent.
70///
71/// Given `base=/project/packages/ui` and `entry=./dist/utils.js`, this tries:
72/// - `/project/packages/ui/src/utils.ts`
73/// - `/project/packages/ui/src/utils.tsx`
74/// - etc. for all source extensions
75///
76/// Preserves any path prefix between the package root and the output dir,
77/// e.g. `./modules/dist/utils.js` → `base/modules/src/utils.ts`.
78///
79/// Returns `Some(path)` if a source file is found.
80fn try_output_to_source_path(base: &Path, entry: &str) -> Option<PathBuf> {
81    let entry_path = Path::new(entry);
82    let components: Vec<_> = entry_path.components().collect();
83
84    // Find the last output directory component in the entry path
85    let output_pos = components.iter().rposition(|c| {
86        if let std::path::Component::Normal(s) = c
87            && let Some(name) = s.to_str()
88        {
89            return OUTPUT_DIRS.contains(&name);
90        }
91        false
92    })?;
93
94    // Build the relative prefix before the output dir, filtering out CurDir (".")
95    let prefix: PathBuf = components[..output_pos]
96        .iter()
97        .filter(|c| !matches!(c, std::path::Component::CurDir))
98        .collect();
99
100    // Build the relative path after the output dir (e.g., "utils.js")
101    let suffix: PathBuf = components[output_pos + 1..].iter().collect();
102
103    // Try base + prefix + "src" + suffix-with-source-extension
104    for ext in SOURCE_EXTENSIONS {
105        let source_candidate = base
106            .join(&prefix)
107            .join("src")
108            .join(suffix.with_extension(ext));
109        if source_candidate.exists() {
110            return Some(source_candidate);
111        }
112    }
113
114    None
115}
116
117/// Default index patterns used when no other entry points are found.
118const DEFAULT_INDEX_PATTERNS: &[&str] = &[
119    "src/index.{ts,tsx,js,jsx}",
120    "src/main.{ts,tsx,js,jsx}",
121    "index.{ts,tsx,js,jsx}",
122    "main.{ts,tsx,js,jsx}",
123];
124
125/// Fall back to default index patterns if no entries were found.
126///
127/// When `ws_filter` is `Some`, only files whose path starts with the given
128/// workspace root are considered (used for workspace-scoped discovery).
129fn apply_default_fallback(
130    files: &[DiscoveredFile],
131    root: &Path,
132    ws_filter: Option<&Path>,
133) -> Vec<EntryPoint> {
134    let default_matchers: Vec<globset::GlobMatcher> = DEFAULT_INDEX_PATTERNS
135        .iter()
136        .filter_map(|p| globset::Glob::new(p).ok().map(|g| g.compile_matcher()))
137        .collect();
138
139    let mut entries = Vec::new();
140    for file in files {
141        // Use strip_prefix instead of canonicalize for workspace filtering
142        if let Some(ws_root) = ws_filter
143            && file.path.strip_prefix(ws_root).is_err()
144        {
145            continue;
146        }
147        let relative = file.path.strip_prefix(root).unwrap_or(&file.path);
148        let relative_str = relative.to_string_lossy();
149        if default_matchers
150            .iter()
151            .any(|m| m.is_match(relative_str.as_ref()))
152        {
153            entries.push(EntryPoint {
154                path: file.path.clone(),
155                source: EntryPointSource::DefaultIndex,
156            });
157        }
158    }
159    entries
160}
161
162/// Discover entry points from package.json, framework rules, and defaults.
163pub fn discover_entry_points(config: &ResolvedConfig, files: &[DiscoveredFile]) -> Vec<EntryPoint> {
164    let _span = tracing::info_span!("discover_entry_points").entered();
165    let mut entries = Vec::new();
166
167    // Pre-compute relative paths for all files (once, not per pattern)
168    let relative_paths: Vec<String> = files
169        .iter()
170        .map(|f| {
171            f.path
172                .strip_prefix(&config.root)
173                .unwrap_or(&f.path)
174                .to_string_lossy()
175                .into_owned()
176        })
177        .collect();
178
179    // 1. Manual entries from config — batch all patterns into a single GlobSet
180    // for O(files) matching instead of O(patterns × files).
181    {
182        let mut builder = globset::GlobSetBuilder::new();
183        for pattern in &config.entry_patterns {
184            if let Ok(glob) = globset::Glob::new(pattern) {
185                builder.add(glob);
186            }
187        }
188        if let Ok(glob_set) = builder.build()
189            && !glob_set.is_empty()
190        {
191            for (idx, rel) in relative_paths.iter().enumerate() {
192                if glob_set.is_match(rel) {
193                    entries.push(EntryPoint {
194                        path: files[idx].path.clone(),
195                        source: EntryPointSource::ManualEntry,
196                    });
197                }
198            }
199        }
200    }
201
202    // 2. Package.json entries
203    // Pre-compute canonical root once for all resolve_entry_path calls
204    let canonical_root = config
205        .root
206        .canonicalize()
207        .unwrap_or_else(|_| config.root.clone());
208    let pkg_path = config.root.join("package.json");
209    if let Ok(pkg) = PackageJson::load(&pkg_path) {
210        for entry_path in pkg.entry_points() {
211            if let Some(ep) = resolve_entry_path(
212                &config.root,
213                &entry_path,
214                &canonical_root,
215                EntryPointSource::PackageJsonMain,
216            ) {
217                entries.push(ep);
218            }
219        }
220
221        // 2b. Package.json scripts — extract file references as entry points
222        if let Some(scripts) = &pkg.scripts {
223            for script_value in scripts.values() {
224                for file_ref in extract_script_file_refs(script_value) {
225                    if let Some(ep) = resolve_entry_path(
226                        &config.root,
227                        &file_ref,
228                        &canonical_root,
229                        EntryPointSource::PackageJsonScript,
230                    ) {
231                        entries.push(ep);
232                    }
233                }
234            }
235        }
236
237        // Framework rules now flow through PluginRegistry via external_plugins.
238    }
239
240    // 4. Auto-discover nested package.json entry points
241    // For monorepo-like structures without explicit workspace config, scan for
242    // package.json files in subdirectories and use their main/exports as entries.
243    discover_nested_package_entries(&config.root, files, &mut entries, &canonical_root);
244
245    // 5. Default index files (if no other entries found)
246    if entries.is_empty() {
247        entries = apply_default_fallback(files, &config.root, None);
248    }
249
250    // Deduplicate by path
251    entries.sort_by(|a, b| a.path.cmp(&b.path));
252    entries.dedup_by(|a, b| a.path == b.path);
253
254    entries
255}
256
257/// Discover entry points from nested package.json files in subdirectories.
258///
259/// When a project has subdirectories with their own package.json (e.g., `packages/foo/package.json`),
260/// the `main`, `module`, `exports`, and `bin` fields of those package.json files should be treated
261/// as entry points. This handles monorepos without explicit workspace configuration.
262fn discover_nested_package_entries(
263    root: &Path,
264    _files: &[DiscoveredFile],
265    entries: &mut Vec<EntryPoint>,
266    canonical_root: &Path,
267) {
268    // Walk common monorepo patterns to find nested package.json files
269    let search_dirs = [
270        "packages", "apps", "libs", "modules", "plugins", "services", "tools", "utils",
271    ];
272    for dir_name in &search_dirs {
273        let search_dir = root.join(dir_name);
274        if !search_dir.is_dir() {
275            continue;
276        }
277        let Ok(read_dir) = std::fs::read_dir(&search_dir) else {
278            continue;
279        };
280        for entry in read_dir.flatten() {
281            let pkg_path = entry.path().join("package.json");
282            if !pkg_path.exists() {
283                continue;
284            }
285            let Ok(pkg) = PackageJson::load(&pkg_path) else {
286                continue;
287            };
288            let pkg_dir = entry.path();
289            for entry_path in pkg.entry_points() {
290                if let Some(ep) = resolve_entry_path(
291                    &pkg_dir,
292                    &entry_path,
293                    canonical_root,
294                    EntryPointSource::PackageJsonExports,
295                ) {
296                    entries.push(ep);
297                }
298            }
299            // Also check scripts in nested package.json
300            if let Some(scripts) = &pkg.scripts {
301                for script_value in scripts.values() {
302                    for file_ref in extract_script_file_refs(script_value) {
303                        if let Some(ep) = resolve_entry_path(
304                            &pkg_dir,
305                            &file_ref,
306                            canonical_root,
307                            EntryPointSource::PackageJsonScript,
308                        ) {
309                            entries.push(ep);
310                        }
311                    }
312                }
313            }
314        }
315    }
316}
317
318/// Discover entry points for a workspace package.
319pub fn discover_workspace_entry_points(
320    ws_root: &Path,
321    _config: &ResolvedConfig,
322    all_files: &[DiscoveredFile],
323) -> Vec<EntryPoint> {
324    let mut entries = Vec::new();
325
326    let pkg_path = ws_root.join("package.json");
327    if let Ok(pkg) = PackageJson::load(&pkg_path) {
328        let canonical_ws_root = ws_root
329            .canonicalize()
330            .unwrap_or_else(|_| ws_root.to_path_buf());
331        for entry_path in pkg.entry_points() {
332            if let Some(ep) = resolve_entry_path(
333                ws_root,
334                &entry_path,
335                &canonical_ws_root,
336                EntryPointSource::PackageJsonMain,
337            ) {
338                entries.push(ep);
339            }
340        }
341
342        // Scripts field — extract file references as entry points
343        if let Some(scripts) = &pkg.scripts {
344            for script_value in scripts.values() {
345                for file_ref in extract_script_file_refs(script_value) {
346                    if let Some(ep) = resolve_entry_path(
347                        ws_root,
348                        &file_ref,
349                        &canonical_ws_root,
350                        EntryPointSource::PackageJsonScript,
351                    ) {
352                        entries.push(ep);
353                    }
354                }
355            }
356        }
357
358        // Framework rules now flow through PluginRegistry via external_plugins.
359    }
360
361    // Fall back to default index files if no entry points found for this workspace
362    if entries.is_empty() {
363        entries = apply_default_fallback(all_files, ws_root, None);
364    }
365
366    entries.sort_by(|a, b| a.path.cmp(&b.path));
367    entries.dedup_by(|a, b| a.path == b.path);
368    entries
369}
370
371/// Discover entry points from plugin results (dynamic config parsing).
372///
373/// Converts plugin-discovered patterns and setup files into concrete entry points
374/// by matching them against the discovered file list.
375pub fn discover_plugin_entry_points(
376    plugin_result: &crate::plugins::AggregatedPluginResult,
377    config: &ResolvedConfig,
378    files: &[DiscoveredFile],
379) -> Vec<EntryPoint> {
380    let mut entries = Vec::new();
381
382    // Pre-compute relative paths
383    let relative_paths: Vec<String> = files
384        .iter()
385        .map(|f| {
386            f.path
387                .strip_prefix(&config.root)
388                .unwrap_or(&f.path)
389                .to_string_lossy()
390                .into_owned()
391        })
392        .collect();
393
394    // Match plugin entry patterns against files using a single GlobSet
395    // for O(files) matching instead of O(patterns × files).
396    // Track which plugin name corresponds to each glob index.
397    let mut builder = globset::GlobSetBuilder::new();
398    let mut glob_plugin_names: Vec<&str> = Vec::new();
399    for (pattern, pname) in plugin_result
400        .entry_patterns
401        .iter()
402        .chain(plugin_result.discovered_always_used.iter())
403        .chain(plugin_result.always_used.iter())
404    {
405        if let Ok(glob) = globset::Glob::new(pattern) {
406            builder.add(glob);
407            glob_plugin_names.push(pname);
408        }
409    }
410    if let Ok(glob_set) = builder.build()
411        && !glob_set.is_empty()
412    {
413        for (idx, rel) in relative_paths.iter().enumerate() {
414            let matches = glob_set.matches(rel);
415            if !matches.is_empty() {
416                // Use the plugin name from the first matching pattern
417                let name = glob_plugin_names[matches[0]].to_string();
418                entries.push(EntryPoint {
419                    path: files[idx].path.clone(),
420                    source: EntryPointSource::Plugin { name },
421                });
422            }
423        }
424    }
425
426    // Add setup files (absolute paths from plugin config parsing)
427    for (setup_file, pname) in &plugin_result.setup_files {
428        let resolved = if setup_file.is_absolute() {
429            setup_file.clone()
430        } else {
431            config.root.join(setup_file)
432        };
433        if resolved.exists() {
434            entries.push(EntryPoint {
435                path: resolved,
436                source: EntryPointSource::Plugin {
437                    name: pname.clone(),
438                },
439            });
440        } else {
441            // Try with extensions
442            for ext in SOURCE_EXTENSIONS {
443                let with_ext = resolved.with_extension(ext);
444                if with_ext.exists() {
445                    entries.push(EntryPoint {
446                        path: with_ext,
447                        source: EntryPointSource::Plugin {
448                            name: pname.clone(),
449                        },
450                    });
451                    break;
452                }
453            }
454        }
455    }
456
457    // Deduplicate
458    entries.sort_by(|a, b| a.path.cmp(&b.path));
459    entries.dedup_by(|a, b| a.path == b.path);
460    entries
461}
462
463/// Pre-compile a set of glob patterns for efficient matching against many paths.
464pub fn compile_glob_set(patterns: &[String]) -> Option<globset::GlobSet> {
465    if patterns.is_empty() {
466        return None;
467    }
468    let mut builder = globset::GlobSetBuilder::new();
469    for pattern in patterns {
470        if let Ok(glob) = globset::Glob::new(pattern) {
471            builder.add(glob);
472        }
473    }
474    builder.build().ok()
475}
476
477#[cfg(test)]
478mod tests {
479    use super::*;
480    use proptest::prelude::*;
481
482    proptest! {
483        /// Valid glob patterns should never panic when compiled via globset.
484        #[test]
485        fn glob_patterns_never_panic_on_compile(
486            prefix in "[a-zA-Z0-9_]{1,20}",
487            ext in prop::sample::select(vec!["ts", "tsx", "js", "jsx", "vue", "svelte", "astro", "mdx"]),
488        ) {
489            let pattern = format!("**/{prefix}*.{ext}");
490            // Should not panic — either compiles or returns Err gracefully
491            let result = globset::Glob::new(&pattern);
492            prop_assert!(result.is_ok(), "Glob::new should not fail for well-formed patterns");
493        }
494
495        /// Non-source extensions should NOT be in the SOURCE_EXTENSIONS list.
496        #[test]
497        fn non_source_extensions_not_in_list(
498            ext in prop::sample::select(vec!["py", "rb", "rs", "go", "java", "html", "xml", "yaml", "toml", "md", "txt", "png", "jpg", "wasm", "lock"]),
499        ) {
500            prop_assert!(
501                !SOURCE_EXTENSIONS.contains(&ext),
502                "Extension '{ext}' should NOT be in SOURCE_EXTENSIONS"
503            );
504        }
505
506        /// compile_glob_set should never panic on arbitrary well-formed glob patterns.
507        #[test]
508        fn compile_glob_set_no_panic(
509            patterns in prop::collection::vec("[a-zA-Z0-9_*/.]{1,30}", 0..10),
510        ) {
511            // Should not panic regardless of input
512            let _ = compile_glob_set(&patterns);
513        }
514    }
515}