1use std::ffi::OsStr;
2use std::path::{Path, PathBuf};
3
4use fallow_config::{PackageJson, ResolvedConfig};
5use ignore::WalkBuilder;
6
7pub use fallow_types::discover::{DiscoveredFile, EntryPoint, EntryPointSource, FileId};
9
10pub const SOURCE_EXTENSIONS: &[&str] = &[
11 "ts", "tsx", "mts", "cts", "js", "jsx", "mjs", "cjs", "vue", "svelte", "astro", "mdx", "css",
12 "scss",
13];
14
15const ALLOWED_HIDDEN_DIRS: &[&str] = &[".storybook", ".well-known", ".changeset", ".github"];
24
25fn is_allowed_hidden_dir(name: &OsStr) -> bool {
27 ALLOWED_HIDDEN_DIRS.iter().any(|&d| OsStr::new(d) == name)
28}
29
30fn is_allowed_hidden(entry: &ignore::DirEntry) -> bool {
36 let name = entry.file_name();
37 let name_str = name.to_string_lossy();
38
39 if !name_str.starts_with('.') {
41 return true;
42 }
43
44 if entry.file_type().is_some_and(|ft| ft.is_file()) {
46 return true;
47 }
48
49 is_allowed_hidden_dir(name)
51}
52
53const PRODUCTION_EXCLUDE_PATTERNS: &[&str] = &[
55 "**/*.test.*",
57 "**/*.spec.*",
58 "**/*.e2e.*",
59 "**/*.e2e-spec.*",
60 "**/*.bench.*",
61 "**/*.fixture.*",
62 "**/*.stories.*",
64 "**/*.story.*",
65 "**/__tests__/**",
67 "**/__mocks__/**",
68 "**/__snapshots__/**",
69 "**/__fixtures__/**",
70 "**/test/**",
71 "**/tests/**",
72 "**/*.config.*",
74 "**/.*.js",
75 "**/.*.ts",
76 "**/.*.mjs",
77 "**/.*.cjs",
78];
79
80pub fn discover_files(config: &ResolvedConfig) -> Vec<DiscoveredFile> {
82 let _span = tracing::info_span!("discover_files").entered();
83
84 let mut types_builder = ignore::types::TypesBuilder::new();
85 for ext in SOURCE_EXTENSIONS {
86 types_builder
87 .add("source", &format!("*.{ext}"))
88 .expect("valid glob");
89 }
90 types_builder.select("source");
91 let types = types_builder.build().expect("valid types");
92
93 let mut walk_builder = WalkBuilder::new(&config.root);
94 walk_builder
95 .hidden(false)
96 .git_ignore(true)
97 .git_global(true)
98 .git_exclude(true)
99 .types(types)
100 .threads(config.threads)
101 .filter_entry(is_allowed_hidden);
102 let walker = walk_builder.build();
103
104 let production_excludes = if config.production {
106 let mut builder = globset::GlobSetBuilder::new();
107 for pattern in PRODUCTION_EXCLUDE_PATTERNS {
108 if let Ok(glob) = globset::Glob::new(pattern) {
109 builder.add(glob);
110 }
111 }
112 builder.build().ok()
113 } else {
114 None
115 };
116
117 let mut files: Vec<DiscoveredFile> = walker
118 .filter_map(|entry| entry.ok())
119 .filter(|entry| entry.file_type().is_some_and(|ft| ft.is_file()))
120 .filter(|entry| !config.ignore_patterns.is_match(entry.path()))
121 .filter(|entry| {
122 production_excludes.as_ref().is_none_or(|excludes| {
124 let relative = entry
125 .path()
126 .strip_prefix(&config.root)
127 .unwrap_or_else(|_| entry.path());
128 !excludes.is_match(relative)
129 })
130 })
131 .enumerate()
132 .map(|(idx, entry)| {
133 let size_bytes = entry.metadata().map(|m| m.len()).unwrap_or(0);
134 DiscoveredFile {
135 id: FileId(idx as u32),
136 path: entry.into_path(),
137 size_bytes,
138 }
139 })
140 .collect();
141
142 files.sort_unstable_by(|a, b| a.path.cmp(&b.path));
147
148 for (idx, file) in files.iter_mut().enumerate() {
150 file.id = FileId(idx as u32);
151 }
152
153 files
154}
155
156const OUTPUT_DIRS: &[&str] = &["dist", "build", "out", "esm", "cjs"];
160
161fn resolve_entry_path(
168 base: &Path,
169 entry: &str,
170 canonical_root: &Path,
171 source: EntryPointSource,
172) -> Option<EntryPoint> {
173 let resolved = base.join(entry);
174 let canonical_resolved = resolved.canonicalize().unwrap_or_else(|_| resolved.clone());
176 if !canonical_resolved.starts_with(canonical_root) {
177 tracing::warn!(path = %entry, "Skipping entry point outside project root");
178 return None;
179 }
180
181 if let Some(source_path) = try_output_to_source_path(base, entry) {
186 if let Ok(canonical_source) = source_path.canonicalize()
188 && canonical_source.starts_with(canonical_root)
189 {
190 return Some(EntryPoint {
191 path: source_path,
192 source,
193 });
194 }
195 }
196
197 if resolved.exists() {
198 return Some(EntryPoint {
199 path: resolved,
200 source,
201 });
202 }
203 for ext in SOURCE_EXTENSIONS {
205 let with_ext = resolved.with_extension(ext);
206 if with_ext.exists() {
207 return Some(EntryPoint {
208 path: with_ext,
209 source,
210 });
211 }
212 }
213 None
214}
215
216fn try_output_to_source_path(base: &Path, entry: &str) -> Option<PathBuf> {
228 let entry_path = Path::new(entry);
229 let components: Vec<_> = entry_path.components().collect();
230
231 let output_pos = components.iter().rposition(|c| {
233 if let std::path::Component::Normal(s) = c
234 && let Some(name) = s.to_str()
235 {
236 return OUTPUT_DIRS.contains(&name);
237 }
238 false
239 })?;
240
241 let prefix: PathBuf = components[..output_pos]
243 .iter()
244 .filter(|c| !matches!(c, std::path::Component::CurDir))
245 .collect();
246
247 let suffix: PathBuf = components[output_pos + 1..].iter().collect();
249
250 for ext in SOURCE_EXTENSIONS {
252 let source_candidate = base
253 .join(&prefix)
254 .join("src")
255 .join(suffix.with_extension(ext));
256 if source_candidate.exists() {
257 return Some(source_candidate);
258 }
259 }
260
261 None
262}
263
264const DEFAULT_INDEX_PATTERNS: &[&str] = &[
266 "src/index.{ts,tsx,js,jsx}",
267 "src/main.{ts,tsx,js,jsx}",
268 "index.{ts,tsx,js,jsx}",
269 "main.{ts,tsx,js,jsx}",
270];
271
272fn apply_default_fallback(
277 files: &[DiscoveredFile],
278 root: &Path,
279 ws_filter: Option<&Path>,
280) -> Vec<EntryPoint> {
281 let default_matchers: Vec<globset::GlobMatcher> = DEFAULT_INDEX_PATTERNS
282 .iter()
283 .filter_map(|p| globset::Glob::new(p).ok().map(|g| g.compile_matcher()))
284 .collect();
285
286 let mut entries = Vec::new();
287 for file in files {
288 if let Some(ws_root) = ws_filter
290 && file.path.strip_prefix(ws_root).is_err()
291 {
292 continue;
293 }
294 let relative = file.path.strip_prefix(root).unwrap_or(&file.path);
295 let relative_str = relative.to_string_lossy();
296 if default_matchers
297 .iter()
298 .any(|m| m.is_match(relative_str.as_ref()))
299 {
300 entries.push(EntryPoint {
301 path: file.path.clone(),
302 source: EntryPointSource::DefaultIndex,
303 });
304 }
305 }
306 entries
307}
308
309pub fn discover_entry_points(config: &ResolvedConfig, files: &[DiscoveredFile]) -> Vec<EntryPoint> {
311 let _span = tracing::info_span!("discover_entry_points").entered();
312 let mut entries = Vec::new();
313
314 let relative_paths: Vec<String> = files
316 .iter()
317 .map(|f| {
318 f.path
319 .strip_prefix(&config.root)
320 .unwrap_or(&f.path)
321 .to_string_lossy()
322 .into_owned()
323 })
324 .collect();
325
326 {
329 let mut builder = globset::GlobSetBuilder::new();
330 for pattern in &config.entry_patterns {
331 if let Ok(glob) = globset::Glob::new(pattern) {
332 builder.add(glob);
333 }
334 }
335 if let Ok(glob_set) = builder.build()
336 && !glob_set.is_empty()
337 {
338 for (idx, rel) in relative_paths.iter().enumerate() {
339 if glob_set.is_match(rel) {
340 entries.push(EntryPoint {
341 path: files[idx].path.clone(),
342 source: EntryPointSource::ManualEntry,
343 });
344 }
345 }
346 }
347 }
348
349 let canonical_root = config
352 .root
353 .canonicalize()
354 .unwrap_or_else(|_| config.root.clone());
355 let pkg_path = config.root.join("package.json");
356 if let Ok(pkg) = PackageJson::load(&pkg_path) {
357 for entry_path in pkg.entry_points() {
358 if let Some(ep) = resolve_entry_path(
359 &config.root,
360 &entry_path,
361 &canonical_root,
362 EntryPointSource::PackageJsonMain,
363 ) {
364 entries.push(ep);
365 }
366 }
367
368 if let Some(scripts) = &pkg.scripts {
370 for script_value in scripts.values() {
371 for file_ref in extract_script_file_refs(script_value) {
372 if let Some(ep) = resolve_entry_path(
373 &config.root,
374 &file_ref,
375 &canonical_root,
376 EntryPointSource::PackageJsonScript,
377 ) {
378 entries.push(ep);
379 }
380 }
381 }
382 }
383
384 }
386
387 discover_nested_package_entries(&config.root, files, &mut entries, &canonical_root);
391
392 if entries.is_empty() {
394 entries = apply_default_fallback(files, &config.root, None);
395 }
396
397 entries.sort_by(|a, b| a.path.cmp(&b.path));
399 entries.dedup_by(|a, b| a.path == b.path);
400
401 entries
402}
403
404fn discover_nested_package_entries(
410 root: &Path,
411 _files: &[DiscoveredFile],
412 entries: &mut Vec<EntryPoint>,
413 canonical_root: &Path,
414) {
415 let search_dirs = ["packages", "apps", "libs", "modules", "plugins"];
417 for dir_name in &search_dirs {
418 let search_dir = root.join(dir_name);
419 if !search_dir.is_dir() {
420 continue;
421 }
422 let Ok(read_dir) = std::fs::read_dir(&search_dir) else {
423 continue;
424 };
425 for entry in read_dir.flatten() {
426 let pkg_path = entry.path().join("package.json");
427 if !pkg_path.exists() {
428 continue;
429 }
430 let Ok(pkg) = PackageJson::load(&pkg_path) else {
431 continue;
432 };
433 let pkg_dir = entry.path();
434 for entry_path in pkg.entry_points() {
435 if let Some(ep) = resolve_entry_path(
436 &pkg_dir,
437 &entry_path,
438 canonical_root,
439 EntryPointSource::PackageJsonExports,
440 ) {
441 entries.push(ep);
442 }
443 }
444 if let Some(scripts) = &pkg.scripts {
446 for script_value in scripts.values() {
447 for file_ref in extract_script_file_refs(script_value) {
448 if let Some(ep) = resolve_entry_path(
449 &pkg_dir,
450 &file_ref,
451 canonical_root,
452 EntryPointSource::PackageJsonScript,
453 ) {
454 entries.push(ep);
455 }
456 }
457 }
458 }
459 }
460 }
461}
462
463pub fn discover_workspace_entry_points(
465 ws_root: &Path,
466 _config: &ResolvedConfig,
467 all_files: &[DiscoveredFile],
468) -> Vec<EntryPoint> {
469 let mut entries = Vec::new();
470
471 let pkg_path = ws_root.join("package.json");
472 if let Ok(pkg) = PackageJson::load(&pkg_path) {
473 let canonical_ws_root = ws_root
474 .canonicalize()
475 .unwrap_or_else(|_| ws_root.to_path_buf());
476 for entry_path in pkg.entry_points() {
477 if let Some(ep) = resolve_entry_path(
478 ws_root,
479 &entry_path,
480 &canonical_ws_root,
481 EntryPointSource::PackageJsonMain,
482 ) {
483 entries.push(ep);
484 }
485 }
486
487 if let Some(scripts) = &pkg.scripts {
489 for script_value in scripts.values() {
490 for file_ref in extract_script_file_refs(script_value) {
491 if let Some(ep) = resolve_entry_path(
492 ws_root,
493 &file_ref,
494 &canonical_ws_root,
495 EntryPointSource::PackageJsonScript,
496 ) {
497 entries.push(ep);
498 }
499 }
500 }
501 }
502
503 }
505
506 if entries.is_empty() {
508 entries = apply_default_fallback(all_files, ws_root, None);
509 }
510
511 entries.sort_by(|a, b| a.path.cmp(&b.path));
512 entries.dedup_by(|a, b| a.path == b.path);
513 entries
514}
515
516fn extract_script_file_refs(script: &str) -> Vec<String> {
527 let mut refs = Vec::new();
528
529 const RUNNERS: &[&str] = &["node", "ts-node", "tsx", "babel-node"];
531
532 for segment in script.split(&['&', '|', ';'][..]) {
534 let segment = segment.trim();
535 if segment.is_empty() {
536 continue;
537 }
538
539 let tokens: Vec<&str> = segment.split_whitespace().collect();
540 if tokens.is_empty() {
541 continue;
542 }
543
544 let mut start = 0;
546 if matches!(tokens.first(), Some(&"npx" | &"pnpx")) {
547 start = 1;
548 } else if tokens.len() >= 2 && matches!(tokens[0], "yarn" | "pnpm") && tokens[1] == "exec" {
549 start = 2;
550 }
551
552 if start >= tokens.len() {
553 continue;
554 }
555
556 let cmd = tokens[start];
557
558 if RUNNERS.contains(&cmd) {
560 for &token in &tokens[start + 1..] {
563 if token.starts_with('-') {
564 continue;
565 }
566 if looks_like_file_path(token) {
568 refs.push(token.to_string());
569 }
570 }
571 } else {
572 for &token in &tokens[start..] {
574 if token.starts_with('-') {
575 continue;
576 }
577 if looks_like_script_file(token) {
578 refs.push(token.to_string());
579 }
580 }
581 }
582 }
583
584 refs
585}
586
587fn looks_like_file_path(token: &str) -> bool {
590 let extensions = [".js", ".ts", ".mjs", ".cjs", ".mts", ".cts", ".jsx", ".tsx"];
591 if extensions.iter().any(|ext| token.ends_with(ext)) {
592 return true;
593 }
594 token.starts_with("./")
597 || token.starts_with("../")
598 || (token.contains('/') && !token.starts_with('@') && !token.contains("://"))
599}
600
601fn looks_like_script_file(token: &str) -> bool {
604 let extensions = [".js", ".ts", ".mjs", ".cjs", ".mts", ".cts", ".jsx", ".tsx"];
605 if !extensions.iter().any(|ext| token.ends_with(ext)) {
606 return false;
607 }
608 token.contains('/') || token.starts_with("./") || token.starts_with("../")
611}
612
613pub fn discover_plugin_entry_points(
618 plugin_result: &crate::plugins::AggregatedPluginResult,
619 config: &ResolvedConfig,
620 files: &[DiscoveredFile],
621) -> Vec<EntryPoint> {
622 let mut entries = Vec::new();
623
624 let relative_paths: Vec<String> = files
626 .iter()
627 .map(|f| {
628 f.path
629 .strip_prefix(&config.root)
630 .unwrap_or(&f.path)
631 .to_string_lossy()
632 .into_owned()
633 })
634 .collect();
635
636 let mut builder = globset::GlobSetBuilder::new();
639 for pattern in plugin_result
640 .entry_patterns
641 .iter()
642 .chain(plugin_result.discovered_always_used.iter())
643 .chain(plugin_result.always_used.iter())
644 {
645 if let Ok(glob) = globset::Glob::new(pattern) {
646 builder.add(glob);
647 }
648 }
649 if let Ok(glob_set) = builder.build()
650 && !glob_set.is_empty()
651 {
652 for (idx, rel) in relative_paths.iter().enumerate() {
653 if glob_set.is_match(rel) {
654 entries.push(EntryPoint {
655 path: files[idx].path.clone(),
656 source: EntryPointSource::Plugin {
657 name: "plugin".to_string(),
658 },
659 });
660 }
661 }
662 }
663
664 for setup_file in &plugin_result.setup_files {
666 let resolved = if setup_file.is_absolute() {
667 setup_file.clone()
668 } else {
669 config.root.join(setup_file)
670 };
671 if resolved.exists() {
672 entries.push(EntryPoint {
673 path: resolved,
674 source: EntryPointSource::Plugin {
675 name: "plugin-setup".to_string(),
676 },
677 });
678 } else {
679 for ext in SOURCE_EXTENSIONS {
681 let with_ext = resolved.with_extension(ext);
682 if with_ext.exists() {
683 entries.push(EntryPoint {
684 path: with_ext,
685 source: EntryPointSource::Plugin {
686 name: "plugin-setup".to_string(),
687 },
688 });
689 break;
690 }
691 }
692 }
693 }
694
695 entries.sort_by(|a, b| a.path.cmp(&b.path));
697 entries.dedup_by(|a, b| a.path == b.path);
698 entries
699}
700
701pub fn compile_glob_set(patterns: &[String]) -> Option<globset::GlobSet> {
703 if patterns.is_empty() {
704 return None;
705 }
706 let mut builder = globset::GlobSetBuilder::new();
707 for pattern in patterns {
708 if let Ok(glob) = globset::Glob::new(pattern) {
709 builder.add(glob);
710 }
711 }
712 builder.build().ok()
713}
714
715#[cfg(test)]
716mod tests {
717 use super::*;
718
719 #[test]
721 fn script_node_runner() {
722 let refs = extract_script_file_refs("node utilities/generate-coverage-badge.js");
723 assert_eq!(refs, vec!["utilities/generate-coverage-badge.js"]);
724 }
725
726 #[test]
727 fn script_ts_node_runner() {
728 let refs = extract_script_file_refs("ts-node scripts/seed.ts");
729 assert_eq!(refs, vec!["scripts/seed.ts"]);
730 }
731
732 #[test]
733 fn script_tsx_runner() {
734 let refs = extract_script_file_refs("tsx scripts/migrate.ts");
735 assert_eq!(refs, vec!["scripts/migrate.ts"]);
736 }
737
738 #[test]
739 fn script_npx_prefix() {
740 let refs = extract_script_file_refs("npx ts-node scripts/generate.ts");
741 assert_eq!(refs, vec!["scripts/generate.ts"]);
742 }
743
744 #[test]
745 fn script_chained_commands() {
746 let refs = extract_script_file_refs("node scripts/build.js && node scripts/post-build.js");
747 assert_eq!(refs, vec!["scripts/build.js", "scripts/post-build.js"]);
748 }
749
750 #[test]
751 fn script_with_flags() {
752 let refs = extract_script_file_refs(
753 "node --experimental-specifier-resolution=node scripts/run.mjs",
754 );
755 assert_eq!(refs, vec!["scripts/run.mjs"]);
756 }
757
758 #[test]
759 fn script_no_file_ref() {
760 let refs = extract_script_file_refs("next build");
761 assert!(refs.is_empty());
762 }
763
764 #[test]
765 fn script_bare_file_path() {
766 let refs = extract_script_file_refs("echo done && node ./scripts/check.js");
767 assert_eq!(refs, vec!["./scripts/check.js"]);
768 }
769
770 #[test]
771 fn script_semicolon_separator() {
772 let refs = extract_script_file_refs("node scripts/a.js; node scripts/b.ts");
773 assert_eq!(refs, vec!["scripts/a.js", "scripts/b.ts"]);
774 }
775
776 #[test]
778 fn file_path_with_extension() {
779 assert!(looks_like_file_path("scripts/build.js"));
780 assert!(looks_like_file_path("scripts/build.ts"));
781 assert!(looks_like_file_path("scripts/build.mjs"));
782 }
783
784 #[test]
785 fn file_path_with_slash() {
786 assert!(looks_like_file_path("scripts/build"));
787 }
788
789 #[test]
790 fn not_file_path() {
791 assert!(!looks_like_file_path("--watch"));
792 assert!(!looks_like_file_path("build"));
793 }
794
795 #[test]
797 fn script_file_with_path() {
798 assert!(looks_like_script_file("scripts/build.js"));
799 assert!(looks_like_script_file("./scripts/build.ts"));
800 assert!(looks_like_script_file("../scripts/build.mjs"));
801 }
802
803 #[test]
804 fn not_script_file_bare_name() {
805 assert!(!looks_like_script_file("webpack.js"));
807 assert!(!looks_like_script_file("build"));
808 }
809
810 #[test]
812 fn allowed_hidden_dirs() {
813 assert!(is_allowed_hidden_dir(OsStr::new(".storybook")));
814 assert!(is_allowed_hidden_dir(OsStr::new(".well-known")));
815 assert!(is_allowed_hidden_dir(OsStr::new(".changeset")));
816 assert!(is_allowed_hidden_dir(OsStr::new(".github")));
817 }
818
819 #[test]
820 fn disallowed_hidden_dirs() {
821 assert!(!is_allowed_hidden_dir(OsStr::new(".git")));
822 assert!(!is_allowed_hidden_dir(OsStr::new(".cache")));
823 assert!(!is_allowed_hidden_dir(OsStr::new(".vscode")));
824 assert!(!is_allowed_hidden_dir(OsStr::new(".fallow")));
825 assert!(!is_allowed_hidden_dir(OsStr::new(".next")));
826 }
827
828 #[test]
829 fn non_hidden_dirs_not_in_allowlist() {
830 assert!(!is_allowed_hidden_dir(OsStr::new("src")));
833 assert!(!is_allowed_hidden_dir(OsStr::new("node_modules")));
834 }
835
836 mod proptests {
837 use super::*;
838 use proptest::prelude::*;
839
840 proptest! {
841 #[test]
843 fn glob_patterns_never_panic_on_compile(
844 prefix in "[a-zA-Z0-9_]{1,20}",
845 ext in prop::sample::select(vec!["ts", "tsx", "js", "jsx", "vue", "svelte", "astro", "mdx"]),
846 ) {
847 let pattern = format!("**/{prefix}*.{ext}");
848 let result = globset::Glob::new(&pattern);
850 prop_assert!(result.is_ok(), "Glob::new should not fail for well-formed patterns");
851 }
852
853 #[test]
855 fn non_source_extensions_not_in_list(
856 ext in prop::sample::select(vec!["py", "rb", "rs", "go", "java", "html", "xml", "yaml", "toml", "md", "txt", "png", "jpg", "wasm", "lock"]),
857 ) {
858 prop_assert!(
859 !SOURCE_EXTENSIONS.contains(&ext),
860 "Extension '{ext}' should NOT be in SOURCE_EXTENSIONS"
861 );
862 }
863
864 #[test]
866 fn compile_glob_set_no_panic(
867 patterns in prop::collection::vec("[a-zA-Z0-9_*/.]{1,30}", 0..10),
868 ) {
869 let _ = compile_glob_set(&patterns);
871 }
872
873 #[test]
875 fn looks_like_file_path_no_panic(s in "[a-zA-Z0-9_./@-]{1,80}") {
876 let _ = looks_like_file_path(&s);
877 }
878
879 #[test]
881 fn looks_like_script_file_no_panic(s in "[a-zA-Z0-9_./@-]{1,80}") {
882 let _ = looks_like_script_file(&s);
883 }
884
885 #[test]
887 fn extract_script_file_refs_no_panic(s in "[a-zA-Z0-9 _./@&|;-]{1,200}") {
888 let _ = extract_script_file_refs(&s);
889 }
890 }
891 }
892}