Skip to main content

fallow_core/
discover.rs

1use std::path::{Path, PathBuf};
2
3use fallow_config::{FrameworkDetection, PackageJson, ResolvedConfig};
4use ignore::WalkBuilder;
5
6/// A discovered source file on disk.
7#[derive(Debug, Clone)]
8pub struct DiscoveredFile {
9    /// Unique file index.
10    pub id: FileId,
11    /// Absolute path.
12    pub path: PathBuf,
13    /// File size in bytes (for sorting largest-first).
14    pub size_bytes: u64,
15}
16
17/// Compact file identifier.
18#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
19pub struct FileId(pub u32);
20
21/// An entry point into the module graph.
22#[derive(Debug, Clone)]
23pub struct EntryPoint {
24    pub path: PathBuf,
25    pub source: EntryPointSource,
26}
27
28/// Where an entry point was discovered from.
29#[derive(Debug, Clone)]
30pub enum EntryPointSource {
31    PackageJsonMain,
32    PackageJsonModule,
33    PackageJsonExports,
34    PackageJsonBin,
35    PackageJsonScript,
36    FrameworkRule { name: String },
37    TestFile,
38    DefaultIndex,
39    ManualEntry,
40}
41
42const SOURCE_EXTENSIONS: &[&str] = &[
43    "ts", "tsx", "mts", "cts", "js", "jsx", "mjs", "cjs", "vue", "svelte",
44];
45
46/// Glob patterns for test/dev/story files excluded in production mode.
47const PRODUCTION_EXCLUDE_PATTERNS: &[&str] = &[
48    // Test files
49    "**/*.test.*",
50    "**/*.spec.*",
51    "**/*.e2e.*",
52    "**/*.e2e-spec.*",
53    "**/*.bench.*",
54    "**/*.fixture.*",
55    // Story files
56    "**/*.stories.*",
57    "**/*.story.*",
58    // Test directories
59    "**/__tests__/**",
60    "**/__mocks__/**",
61    "**/__snapshots__/**",
62    "**/__fixtures__/**",
63    "**/test/**",
64    "**/tests/**",
65    // Dev/config files at project level
66    "**/*.config.*",
67    "**/.*.js",
68    "**/.*.ts",
69    "**/.*.mjs",
70    "**/.*.cjs",
71];
72
73/// Discover all source files in the project.
74pub fn discover_files(config: &ResolvedConfig) -> Vec<DiscoveredFile> {
75    let _span = tracing::info_span!("discover_files").entered();
76
77    let mut types_builder = ignore::types::TypesBuilder::new();
78    for ext in SOURCE_EXTENSIONS {
79        types_builder
80            .add("source", &format!("*.{ext}"))
81            .expect("valid glob");
82    }
83    types_builder.select("source");
84    let types = types_builder.build().expect("valid types");
85
86    let walker = WalkBuilder::new(&config.root)
87        .hidden(true)
88        .git_ignore(true)
89        .git_global(true)
90        .git_exclude(true)
91        .types(types)
92        .threads(config.threads)
93        .build();
94
95    // Build production exclude matcher if needed
96    let production_excludes = if config.production {
97        let mut builder = globset::GlobSetBuilder::new();
98        for pattern in PRODUCTION_EXCLUDE_PATTERNS {
99            if let Ok(glob) = globset::Glob::new(pattern) {
100                builder.add(glob);
101            }
102        }
103        builder.build().ok()
104    } else {
105        None
106    };
107
108    let mut files: Vec<DiscoveredFile> = walker
109        .filter_map(|entry| entry.ok())
110        .filter(|entry| entry.file_type().is_some_and(|ft| ft.is_file()))
111        .filter(|entry| !config.ignore_patterns.is_match(entry.path()))
112        .filter(|entry| {
113            // In production mode, exclude test/story/dev files
114            if let Some(ref excludes) = production_excludes {
115                let relative = entry
116                    .path()
117                    .strip_prefix(&config.root)
118                    .unwrap_or(entry.path());
119                !excludes.is_match(relative)
120            } else {
121                true
122            }
123        })
124        .enumerate()
125        .map(|(idx, entry)| {
126            let size_bytes = entry.metadata().map(|m| m.len()).unwrap_or(0);
127            DiscoveredFile {
128                id: FileId(idx as u32),
129                path: entry.into_path(),
130                size_bytes,
131            }
132        })
133        .collect();
134
135    // Sort largest files first for better rayon work-stealing, with path as tiebreaker for determinism
136    files.sort_unstable_by(|a, b| {
137        b.size_bytes
138            .cmp(&a.size_bytes)
139            .then_with(|| a.path.cmp(&b.path))
140    });
141
142    // Re-assign IDs after sorting
143    for (idx, file) in files.iter_mut().enumerate() {
144        file.id = FileId(idx as u32);
145    }
146
147    files
148}
149
150/// Resolve a path relative to a base directory, with security check and extension fallback.
151///
152/// Returns `Some(EntryPoint)` if the path resolves to an existing file within `canonical_root`,
153/// trying source extensions as fallback when the exact path doesn't exist.
154fn resolve_entry_path(
155    base: &Path,
156    entry: &str,
157    canonical_root: &Path,
158    source: EntryPointSource,
159) -> Option<EntryPoint> {
160    let resolved = base.join(entry);
161    // Security: ensure resolved path stays within the allowed root
162    let canonical_resolved = resolved.canonicalize().unwrap_or(resolved.clone());
163    if !canonical_resolved.starts_with(canonical_root) {
164        tracing::warn!(path = %entry, "Skipping entry point outside project root");
165        return None;
166    }
167    if resolved.exists() {
168        return Some(EntryPoint {
169            path: resolved,
170            source,
171        });
172    }
173    // Try with source extensions
174    for ext in SOURCE_EXTENSIONS {
175        let with_ext = resolved.with_extension(ext);
176        if with_ext.exists() {
177            return Some(EntryPoint {
178                path: with_ext,
179                source,
180            });
181        }
182    }
183    None
184}
185
186/// Pre-compile entry point and always_used glob matchers from a framework rule.
187fn compile_rule_matchers(
188    rule: &fallow_config::FrameworkRule,
189) -> (Vec<globset::GlobMatcher>, Vec<globset::GlobMatcher>) {
190    let entry_matchers: Vec<globset::GlobMatcher> = rule
191        .entry_points
192        .iter()
193        .filter_map(|ep| {
194            globset::Glob::new(&ep.pattern)
195                .ok()
196                .map(|g| g.compile_matcher())
197        })
198        .collect();
199
200    let always_matchers: Vec<globset::GlobMatcher> = rule
201        .always_used
202        .iter()
203        .filter_map(|p| globset::Glob::new(p).ok().map(|g| g.compile_matcher()))
204        .collect();
205
206    (entry_matchers, always_matchers)
207}
208
209/// Default index patterns used when no other entry points are found.
210const DEFAULT_INDEX_PATTERNS: &[&str] = &[
211    "src/index.{ts,tsx,js,jsx}",
212    "src/main.{ts,tsx,js,jsx}",
213    "index.{ts,tsx,js,jsx}",
214    "main.{ts,tsx,js,jsx}",
215];
216
217/// Fall back to default index patterns if no entries were found.
218///
219/// When `ws_filter` is `Some`, only files whose path starts with the given
220/// workspace root are considered (used for workspace-scoped discovery).
221fn apply_default_fallback(
222    files: &[DiscoveredFile],
223    root: &Path,
224    ws_filter: Option<&Path>,
225) -> Vec<EntryPoint> {
226    let default_matchers: Vec<globset::GlobMatcher> = DEFAULT_INDEX_PATTERNS
227        .iter()
228        .filter_map(|p| globset::Glob::new(p).ok().map(|g| g.compile_matcher()))
229        .collect();
230
231    let mut entries = Vec::new();
232    for file in files {
233        // Use strip_prefix instead of canonicalize for workspace filtering
234        if let Some(ws_root) = ws_filter
235            && file.path.strip_prefix(ws_root).is_err()
236        {
237            continue;
238        }
239        let relative = file.path.strip_prefix(root).unwrap_or(&file.path);
240        let relative_str = relative.to_string_lossy();
241        if default_matchers
242            .iter()
243            .any(|m| m.is_match(relative_str.as_ref()))
244        {
245            entries.push(EntryPoint {
246                path: file.path.clone(),
247                source: EntryPointSource::DefaultIndex,
248            });
249        }
250    }
251    entries
252}
253
254/// Discover entry points from package.json, framework rules, and defaults.
255pub fn discover_entry_points(config: &ResolvedConfig, files: &[DiscoveredFile]) -> Vec<EntryPoint> {
256    let _span = tracing::info_span!("discover_entry_points").entered();
257    let mut entries = Vec::new();
258
259    // Pre-compute relative paths for all files (once, not per pattern)
260    let relative_paths: Vec<String> = files
261        .iter()
262        .map(|f| {
263            f.path
264                .strip_prefix(&config.root)
265                .unwrap_or(&f.path)
266                .to_string_lossy()
267                .into_owned()
268        })
269        .collect();
270
271    // 1. Manual entries from config — pre-compile all patterns
272    for pattern in &config.entry_patterns {
273        if let Ok(glob) = globset::Glob::new(pattern) {
274            let matcher = glob.compile_matcher();
275            for (idx, rel) in relative_paths.iter().enumerate() {
276                if matcher.is_match(rel) {
277                    entries.push(EntryPoint {
278                        path: files[idx].path.clone(),
279                        source: EntryPointSource::ManualEntry,
280                    });
281                }
282            }
283        }
284    }
285
286    // 2. Package.json entries
287    let pkg_path = config.root.join("package.json");
288    if let Ok(pkg) = PackageJson::load(&pkg_path) {
289        let canonical_root = config.root.canonicalize().unwrap_or(config.root.clone());
290        for entry_path in pkg.entry_points() {
291            if let Some(ep) = resolve_entry_path(
292                &config.root,
293                &entry_path,
294                &canonical_root,
295                EntryPointSource::PackageJsonMain,
296            ) {
297                entries.push(ep);
298            }
299        }
300
301        // 2b. Package.json scripts — extract file references as entry points
302        if let Some(scripts) = &pkg.scripts {
303            for script_value in scripts.values() {
304                for file_ref in extract_script_file_refs(script_value) {
305                    if let Some(ep) = resolve_entry_path(
306                        &config.root,
307                        &file_ref,
308                        &canonical_root,
309                        EntryPointSource::PackageJsonScript,
310                    ) {
311                        entries.push(ep);
312                    }
313                }
314            }
315        }
316
317        // 3. Framework rules — cache active status + pre-compile pattern matchers
318        let active_rules: Vec<&fallow_config::FrameworkRule> = config
319            .framework_rules
320            .iter()
321            .filter(|rule| is_framework_active(rule, &pkg, &config.root))
322            .collect();
323
324        for rule in &active_rules {
325            let (entry_matchers, always_matchers) = compile_rule_matchers(rule);
326
327            // Single pass over files for all matchers of this rule
328            for (idx, rel) in relative_paths.iter().enumerate() {
329                let matched = entry_matchers.iter().any(|m| m.is_match(rel))
330                    || always_matchers.iter().any(|m| m.is_match(rel));
331                if matched {
332                    entries.push(EntryPoint {
333                        path: files[idx].path.clone(),
334                        source: EntryPointSource::FrameworkRule {
335                            name: rule.name.clone(),
336                        },
337                    });
338                }
339            }
340        }
341    }
342
343    // 4. Auto-discover nested package.json entry points
344    // For monorepo-like structures without explicit workspace config, scan for
345    // package.json files in subdirectories and use their main/exports as entries.
346    discover_nested_package_entries(&config.root, files, &mut entries);
347
348    // 5. Default index files (if no other entries found)
349    if entries.is_empty() {
350        entries = apply_default_fallback(files, &config.root, None);
351    }
352
353    // Deduplicate by path
354    entries.sort_by(|a, b| a.path.cmp(&b.path));
355    entries.dedup_by(|a, b| a.path == b.path);
356
357    entries
358}
359
360/// Discover entry points from nested package.json files in subdirectories.
361///
362/// When a project has subdirectories with their own package.json (e.g., `packages/foo/package.json`),
363/// the `main`, `module`, `exports`, and `bin` fields of those package.json files should be treated
364/// as entry points. This handles monorepos without explicit workspace configuration.
365fn discover_nested_package_entries(
366    root: &Path,
367    _files: &[DiscoveredFile],
368    entries: &mut Vec<EntryPoint>,
369) {
370    let canonical_root = root.canonicalize().unwrap_or_else(|_| root.to_path_buf());
371
372    // Walk common monorepo patterns to find nested package.json files
373    let search_dirs = ["packages", "apps", "libs", "modules", "plugins"];
374    for dir_name in &search_dirs {
375        let search_dir = root.join(dir_name);
376        if !search_dir.is_dir() {
377            continue;
378        }
379        let read_dir = match std::fs::read_dir(&search_dir) {
380            Ok(rd) => rd,
381            Err(_) => continue,
382        };
383        for entry in read_dir.flatten() {
384            let pkg_path = entry.path().join("package.json");
385            if !pkg_path.exists() {
386                continue;
387            }
388            let Ok(pkg) = PackageJson::load(&pkg_path) else {
389                continue;
390            };
391            let pkg_dir = entry.path();
392            for entry_path in pkg.entry_points() {
393                if let Some(ep) = resolve_entry_path(
394                    &pkg_dir,
395                    &entry_path,
396                    &canonical_root,
397                    EntryPointSource::PackageJsonExports,
398                ) {
399                    entries.push(ep);
400                }
401            }
402            // Also check scripts in nested package.json
403            if let Some(scripts) = &pkg.scripts {
404                for script_value in scripts.values() {
405                    for file_ref in extract_script_file_refs(script_value) {
406                        if let Some(ep) = resolve_entry_path(
407                            &pkg_dir,
408                            &file_ref,
409                            &canonical_root,
410                            EntryPointSource::PackageJsonScript,
411                        ) {
412                            entries.push(ep);
413                        }
414                    }
415                }
416            }
417        }
418    }
419}
420
421/// Check if a framework rule is active based on its detection config.
422fn is_framework_active(
423    rule: &fallow_config::FrameworkRule,
424    pkg: &PackageJson,
425    root: &Path,
426) -> bool {
427    match &rule.detection {
428        None => true, // No detection = always active
429        Some(detection) => check_detection(detection, pkg, root),
430    }
431}
432
433fn check_detection(detection: &FrameworkDetection, pkg: &PackageJson, root: &Path) -> bool {
434    match detection {
435        FrameworkDetection::Dependency { package } => {
436            pkg.all_dependency_names().iter().any(|d| d == package)
437        }
438        FrameworkDetection::FileExists { pattern } => file_exists_glob(pattern, root),
439        FrameworkDetection::All { conditions } => {
440            conditions.iter().all(|c| check_detection(c, pkg, root))
441        }
442        FrameworkDetection::Any { conditions } => {
443            conditions.iter().any(|c| check_detection(c, pkg, root))
444        }
445    }
446}
447
448/// Discover entry points for a workspace package.
449pub fn discover_workspace_entry_points(
450    ws_root: &Path,
451    config: &ResolvedConfig,
452    all_files: &[DiscoveredFile],
453) -> Vec<EntryPoint> {
454    let mut entries = Vec::new();
455
456    // Also load root package.json for framework detection (monorepo deps are often at root)
457    let root_pkg = PackageJson::load(&config.root.join("package.json")).ok();
458
459    let pkg_path = ws_root.join("package.json");
460    if let Ok(pkg) = PackageJson::load(&pkg_path) {
461        let canonical_ws_root = ws_root.canonicalize().unwrap_or(ws_root.to_path_buf());
462        for entry_path in pkg.entry_points() {
463            if let Some(ep) = resolve_entry_path(
464                ws_root,
465                &entry_path,
466                &canonical_ws_root,
467                EntryPointSource::PackageJsonMain,
468            ) {
469                entries.push(ep);
470            }
471        }
472
473        // Scripts field — extract file references as entry points
474        if let Some(scripts) = &pkg.scripts {
475            for script_value in scripts.values() {
476                for file_ref in extract_script_file_refs(script_value) {
477                    if let Some(ep) = resolve_entry_path(
478                        ws_root,
479                        &file_ref,
480                        &canonical_ws_root,
481                        EntryPointSource::PackageJsonScript,
482                    ) {
483                        entries.push(ep);
484                    }
485                }
486            }
487        }
488
489        // Apply framework rules to workspace.
490        // Check activation against BOTH workspace and root package deps (monorepo hoisting).
491        // Use path prefix matching instead of per-file canonicalize (avoids O(files×workspaces) syscalls)
492        for rule in &config.framework_rules {
493            let ws_active = is_framework_active(rule, &pkg, ws_root);
494            let root_active = root_pkg
495                .as_ref()
496                .map(|rpkg| is_framework_active(rule, rpkg, &config.root))
497                .unwrap_or(false);
498
499            if !ws_active && !root_active {
500                continue;
501            }
502
503            let (entry_matchers, always_matchers) = compile_rule_matchers(rule);
504
505            // Only consider files within this workspace — use strip_prefix instead of canonicalize
506            for file in all_files {
507                let relative = match file.path.strip_prefix(ws_root) {
508                    Ok(rel) => rel,
509                    Err(_) => continue,
510                };
511                let relative_str = relative.to_string_lossy();
512                let matched = entry_matchers
513                    .iter()
514                    .any(|m| m.is_match(relative_str.as_ref()))
515                    || always_matchers
516                        .iter()
517                        .any(|m| m.is_match(relative_str.as_ref()));
518                if matched {
519                    entries.push(EntryPoint {
520                        path: file.path.clone(),
521                        source: EntryPointSource::FrameworkRule {
522                            name: rule.name.clone(),
523                        },
524                    });
525                }
526            }
527        }
528    }
529
530    // Fall back to default index files if no entry points found for this workspace
531    if entries.is_empty() {
532        entries = apply_default_fallback(all_files, ws_root, None);
533    }
534
535    entries.sort_by(|a, b| a.path.cmp(&b.path));
536    entries.dedup_by(|a, b| a.path == b.path);
537    entries
538}
539
540/// Extract file path references from a package.json script value.
541///
542/// Recognises patterns like:
543/// - `node path/to/script.js`
544/// - `ts-node path/to/script.ts`
545/// - `tsx path/to/script.ts`
546/// - `npx ts-node path/to/script.ts`
547/// - Bare file paths ending in `.js`, `.ts`, `.mjs`, `.cjs`, `.mts`, `.cts`
548///
549/// Script values are split by `&&`, `||`, and `;` to handle chained commands.
550fn extract_script_file_refs(script: &str) -> Vec<String> {
551    let mut refs = Vec::new();
552
553    // Runners whose next argument is a file path
554    const RUNNERS: &[&str] = &["node", "ts-node", "tsx", "babel-node"];
555
556    // Split on shell operators to handle chained commands
557    for segment in script.split(&['&', '|', ';'][..]) {
558        let segment = segment.trim();
559        if segment.is_empty() {
560            continue;
561        }
562
563        let tokens: Vec<&str> = segment.split_whitespace().collect();
564        if tokens.is_empty() {
565            continue;
566        }
567
568        // Skip leading `npx`/`pnpx`/`yarn`/`pnpm exec` to find the actual command
569        let mut start = 0;
570        if matches!(tokens.first(), Some(&"npx" | &"pnpx")) {
571            start = 1;
572        } else if tokens.len() >= 2 && matches!(tokens[0], "yarn" | "pnpm") && tokens[1] == "exec" {
573            start = 2;
574        }
575
576        if start >= tokens.len() {
577            continue;
578        }
579
580        let cmd = tokens[start];
581
582        // Check if the command is a known runner
583        if RUNNERS.contains(&cmd) {
584            // Collect ALL file path arguments after the runner (handles
585            // `node --test file1.mjs file2.mjs ...` and similar multi-file patterns)
586            for &token in &tokens[start + 1..] {
587                if token.starts_with('-') {
588                    continue;
589                }
590                // Must look like a file path (contains '/' or '.' extension)
591                if looks_like_file_path(token) {
592                    refs.push(token.to_string());
593                }
594            }
595        } else {
596            // Scan all tokens for bare file paths (e.g. `./scripts/build.js`)
597            for &token in &tokens[start..] {
598                if token.starts_with('-') {
599                    continue;
600                }
601                if looks_like_script_file(token) {
602                    refs.push(token.to_string());
603                }
604            }
605        }
606    }
607
608    refs
609}
610
611/// Check if a token looks like a file path argument (has a directory separator or a
612/// JS/TS file extension).
613fn looks_like_file_path(token: &str) -> bool {
614    let extensions = [".js", ".ts", ".mjs", ".cjs", ".mts", ".cts", ".jsx", ".tsx"];
615    if extensions.iter().any(|ext| token.ends_with(ext)) {
616        return true;
617    }
618    // Only treat tokens with `/` as paths if they look like actual file paths,
619    // not URLs or scoped package names like @scope/package
620    token.starts_with("./")
621        || token.starts_with("../")
622        || (token.contains('/') && !token.starts_with('@') && !token.contains("://"))
623}
624
625/// Check if a token looks like a standalone script file reference (must have a
626/// JS/TS extension and a path-like structure, not a bare command name).
627fn looks_like_script_file(token: &str) -> bool {
628    let extensions = [".js", ".ts", ".mjs", ".cjs", ".mts", ".cts", ".jsx", ".tsx"];
629    if !extensions.iter().any(|ext| token.ends_with(ext)) {
630        return false;
631    }
632    // Must contain a path separator or start with ./ to distinguish from
633    // bare package names like `webpack.js`
634    token.contains('/') || token.starts_with("./") || token.starts_with("../")
635}
636
637/// Check whether any file matching a glob pattern exists under root.
638///
639/// Uses `globset::Glob` for pattern compilation (supports brace expansion like
640/// `{ts,js}`) and walks the static prefix directory to find matches.
641fn file_exists_glob(pattern: &str, root: &Path) -> bool {
642    let matcher = match globset::Glob::new(pattern) {
643        Ok(g) => g.compile_matcher(),
644        Err(_) => return false,
645    };
646
647    // Extract the static directory prefix from the pattern to narrow the walk.
648    // E.g. for ".storybook/main.{ts,js}" the prefix is ".storybook".
649    let prefix: PathBuf = Path::new(pattern)
650        .components()
651        .take_while(|c| {
652            let s = c.as_os_str().to_string_lossy();
653            !s.contains('*') && !s.contains('?') && !s.contains('{') && !s.contains('[')
654        })
655        .collect();
656
657    let search_dir = if prefix.as_os_str().is_empty() {
658        root.to_path_buf()
659    } else {
660        // prefix may be an exact directory or include the filename portion.
661        let joined = root.join(&prefix);
662        if joined.is_dir() {
663            joined
664        } else if let Some(parent) = joined.parent() {
665            // Only use parent if it's NOT the root itself (avoid walking entire project)
666            if parent != root && parent.is_dir() {
667                parent.to_path_buf()
668            } else {
669                // The prefix directory doesn't exist — no match possible
670                return false;
671            }
672        } else {
673            return false;
674        }
675    };
676
677    if !search_dir.is_dir() {
678        return false;
679    }
680
681    walk_dir_recursive(&search_dir, root, &matcher)
682}
683
684/// Maximum recursion depth for directory walking to prevent infinite loops on symlink cycles.
685const MAX_WALK_DEPTH: usize = 20;
686
687/// Recursively walk a directory and check if any file matches the glob.
688fn walk_dir_recursive(dir: &Path, root: &Path, matcher: &globset::GlobMatcher) -> bool {
689    walk_dir_recursive_depth(dir, root, matcher, 0)
690}
691
692/// Inner recursive walker with depth tracking.
693fn walk_dir_recursive_depth(
694    dir: &Path,
695    root: &Path,
696    matcher: &globset::GlobMatcher,
697    depth: usize,
698) -> bool {
699    if depth >= MAX_WALK_DEPTH {
700        tracing::warn!(
701            dir = %dir.display(),
702            "Maximum directory walk depth reached, possible symlink cycle"
703        );
704        return false;
705    }
706
707    let entries = match std::fs::read_dir(dir) {
708        Ok(rd) => rd,
709        Err(_) => return false,
710    };
711
712    for entry in entries.flatten() {
713        // Use symlink_metadata to avoid following symlinks (prevents cycles)
714        let is_real_dir = entry.file_type().map(|ft| ft.is_dir()).unwrap_or(false);
715        if is_real_dir {
716            if walk_dir_recursive_depth(&entry.path(), root, matcher, depth + 1) {
717                return true;
718            }
719        } else {
720            let path = entry.path();
721            let relative = path.strip_prefix(root).unwrap_or(&path);
722            if matcher.is_match(relative) {
723                return true;
724            }
725        }
726    }
727
728    false
729}
730
731/// Discover entry points from plugin results (dynamic config parsing).
732///
733/// Converts plugin-discovered patterns and setup files into concrete entry points
734/// by matching them against the discovered file list.
735pub fn discover_plugin_entry_points(
736    plugin_result: &crate::plugins::AggregatedPluginResult,
737    config: &ResolvedConfig,
738    files: &[DiscoveredFile],
739) -> Vec<EntryPoint> {
740    let mut entries = Vec::new();
741
742    // Pre-compute relative paths
743    let relative_paths: Vec<String> = files
744        .iter()
745        .map(|f| {
746            f.path
747                .strip_prefix(&config.root)
748                .unwrap_or(&f.path)
749                .to_string_lossy()
750                .into_owned()
751        })
752        .collect();
753
754    // Match plugin entry patterns against files
755    let all_patterns: Vec<&str> = plugin_result
756        .entry_patterns
757        .iter()
758        .chain(plugin_result.discovered_always_used.iter())
759        .chain(plugin_result.always_used.iter())
760        .map(|s| s.as_str())
761        .collect();
762
763    let matchers: Vec<globset::GlobMatcher> = all_patterns
764        .iter()
765        .filter_map(|p| globset::Glob::new(p).ok().map(|g| g.compile_matcher()))
766        .collect();
767
768    for (idx, rel) in relative_paths.iter().enumerate() {
769        if matchers.iter().any(|m| m.is_match(rel)) {
770            entries.push(EntryPoint {
771                path: files[idx].path.clone(),
772                source: EntryPointSource::FrameworkRule {
773                    name: "plugin".to_string(),
774                },
775            });
776        }
777    }
778
779    // Add setup files (absolute paths from plugin config parsing)
780    for setup_file in &plugin_result.setup_files {
781        let resolved = if setup_file.is_absolute() {
782            setup_file.clone()
783        } else {
784            config.root.join(setup_file)
785        };
786        if resolved.exists() {
787            entries.push(EntryPoint {
788                path: resolved,
789                source: EntryPointSource::FrameworkRule {
790                    name: "plugin-setup".to_string(),
791                },
792            });
793        } else {
794            // Try with extensions
795            for ext in SOURCE_EXTENSIONS {
796                let with_ext = resolved.with_extension(ext);
797                if with_ext.exists() {
798                    entries.push(EntryPoint {
799                        path: with_ext,
800                        source: EntryPointSource::FrameworkRule {
801                            name: "plugin-setup".to_string(),
802                        },
803                    });
804                    break;
805                }
806            }
807        }
808    }
809
810    // Deduplicate
811    entries.sort_by(|a, b| a.path.cmp(&b.path));
812    entries.dedup_by(|a, b| a.path == b.path);
813    entries
814}
815
816/// Pre-compile a set of glob patterns for efficient matching against many paths.
817pub fn compile_glob_set(patterns: &[String]) -> Option<globset::GlobSet> {
818    if patterns.is_empty() {
819        return None;
820    }
821    let mut builder = globset::GlobSetBuilder::new();
822    for pattern in patterns {
823        if let Ok(glob) = globset::Glob::new(pattern) {
824            builder.add(glob);
825        }
826    }
827    builder.build().ok()
828}
829
830#[cfg(test)]
831mod tests {
832    use super::*;
833
834    // extract_script_file_refs tests (Issue 3)
835    #[test]
836    fn script_node_runner() {
837        let refs = extract_script_file_refs("node utilities/generate-coverage-badge.js");
838        assert_eq!(refs, vec!["utilities/generate-coverage-badge.js"]);
839    }
840
841    #[test]
842    fn script_ts_node_runner() {
843        let refs = extract_script_file_refs("ts-node scripts/seed.ts");
844        assert_eq!(refs, vec!["scripts/seed.ts"]);
845    }
846
847    #[test]
848    fn script_tsx_runner() {
849        let refs = extract_script_file_refs("tsx scripts/migrate.ts");
850        assert_eq!(refs, vec!["scripts/migrate.ts"]);
851    }
852
853    #[test]
854    fn script_npx_prefix() {
855        let refs = extract_script_file_refs("npx ts-node scripts/generate.ts");
856        assert_eq!(refs, vec!["scripts/generate.ts"]);
857    }
858
859    #[test]
860    fn script_chained_commands() {
861        let refs = extract_script_file_refs("node scripts/build.js && node scripts/post-build.js");
862        assert_eq!(refs, vec!["scripts/build.js", "scripts/post-build.js"]);
863    }
864
865    #[test]
866    fn script_with_flags() {
867        let refs = extract_script_file_refs(
868            "node --experimental-specifier-resolution=node scripts/run.mjs",
869        );
870        assert_eq!(refs, vec!["scripts/run.mjs"]);
871    }
872
873    #[test]
874    fn script_no_file_ref() {
875        let refs = extract_script_file_refs("next build");
876        assert!(refs.is_empty());
877    }
878
879    #[test]
880    fn script_bare_file_path() {
881        let refs = extract_script_file_refs("echo done && node ./scripts/check.js");
882        assert_eq!(refs, vec!["./scripts/check.js"]);
883    }
884
885    #[test]
886    fn script_semicolon_separator() {
887        let refs = extract_script_file_refs("node scripts/a.js; node scripts/b.ts");
888        assert_eq!(refs, vec!["scripts/a.js", "scripts/b.ts"]);
889    }
890
891    // looks_like_file_path tests
892    #[test]
893    fn file_path_with_extension() {
894        assert!(looks_like_file_path("scripts/build.js"));
895        assert!(looks_like_file_path("scripts/build.ts"));
896        assert!(looks_like_file_path("scripts/build.mjs"));
897    }
898
899    #[test]
900    fn file_path_with_slash() {
901        assert!(looks_like_file_path("scripts/build"));
902    }
903
904    #[test]
905    fn not_file_path() {
906        assert!(!looks_like_file_path("--watch"));
907        assert!(!looks_like_file_path("build"));
908    }
909
910    // looks_like_script_file tests
911    #[test]
912    fn script_file_with_path() {
913        assert!(looks_like_script_file("scripts/build.js"));
914        assert!(looks_like_script_file("./scripts/build.ts"));
915        assert!(looks_like_script_file("../scripts/build.mjs"));
916    }
917
918    #[test]
919    fn not_script_file_bare_name() {
920        // Bare names without path separator should not match
921        assert!(!looks_like_script_file("webpack.js"));
922        assert!(!looks_like_script_file("build"));
923    }
924}