1use std::ffi::OsStr;
2use std::path::{Path, PathBuf};
3use std::sync::{Mutex, OnceLock};
4
5use fallow_config::{ResolvedConfig, WorkspaceDiagnostic, WorkspaceDiagnosticKind};
6use fallow_types::discover::{DiscoveredFile, FileId};
7use ignore::WalkBuilder;
8use rustc_hash::FxHashSet;
9
10use super::ALLOWED_HIDDEN_DIRS;
11
12fn should_emit_note_once(key: String) -> bool {
18 static EMITTED: OnceLock<Mutex<FxHashSet<String>>> = OnceLock::new();
19 EMITTED
20 .get_or_init(|| Mutex::new(FxHashSet::default()))
21 .lock()
22 .map_or(true, |mut set| set.insert(key))
23}
24
25type SizedFile = (PathBuf, u64);
28
29const NOTE_EXAMPLE_CAP: usize = 5;
33
34const LARGE_SET_THRESHOLD: usize = 20_000;
38
39const LARGE_FILE_NOTE_BYTES: u64 = 4 * 1024 * 1024;
44
45const NOTE_FILE_FLOOR_BYTES: u64 = 256 * 1024;
49
50fn is_declaration_file(path: &Path) -> bool {
55 let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
56 name.ends_with(".d.ts") || name.ends_with(".d.mts") || name.ends_with(".d.cts")
57}
58
59fn format_size_mb(bytes: u64) -> String {
61 #[expect(
62 clippy::cast_precision_loss,
63 reason = "display-only size figure; precision loss past 2^53 bytes is irrelevant"
64 )]
65 let mb = bytes as f64 / (1024.0 * 1024.0);
66 format!("{mb:.1} MB")
67}
68
69fn summarize_examples(root: &Path, examples: &[SizedFile]) -> String {
72 let shown: Vec<String> = examples
73 .iter()
74 .take(NOTE_EXAMPLE_CAP)
75 .map(|(path, size)| {
76 let display = path
77 .strip_prefix(root)
78 .unwrap_or(path)
79 .display()
80 .to_string()
81 .replace('\\', "/");
82 format!("{display} ({})", format_size_mb(*size))
83 })
84 .collect();
85 let remaining = examples.len().saturating_sub(NOTE_EXAMPLE_CAP);
86 if remaining > 0 {
87 format!("{}, and {remaining} more", shown.join(", "))
88 } else {
89 shown.join(", ")
90 }
91}
92
93fn partition_by_size(
96 raw: Vec<SizedFile>,
97 max_file_size_bytes: Option<u64>,
98) -> (Vec<SizedFile>, Vec<SizedFile>) {
99 let Some(limit) = max_file_size_bytes else {
100 return (raw, Vec::new());
101 };
102 raw.into_iter()
103 .partition(|(path, size)| *size <= limit || is_declaration_file(path))
104}
105
106fn report_skipped_large_files(config: &ResolvedConfig, skipped: &[SizedFile]) {
111 if skipped.is_empty() {
112 return;
113 }
114 let diagnostics: Vec<WorkspaceDiagnostic> = skipped
115 .iter()
116 .map(|(path, size_bytes)| {
117 WorkspaceDiagnostic::new(
118 &config.root,
119 path.clone(),
120 WorkspaceDiagnosticKind::SkippedLargeFile {
121 size_bytes: *size_bytes,
122 },
123 )
124 })
125 .collect();
126 fallow_config::append_workspace_diagnostics(&config.root, diagnostics);
127
128 let mut sorted: Vec<SizedFile> = skipped.to_vec();
129 sorted.sort_unstable_by_key(|f| std::cmp::Reverse(f.1));
130 let count = skipped.len();
131 if !config.quiet
132 && should_emit_note_once(format!(
133 "skip::{}::{count}::{}",
134 config.root.display(),
135 sorted.first().map_or(0, |f| f.1)
136 ))
137 {
138 let examples = summarize_examples(&config.root, &sorted);
139 let noun = if count == 1 { "file" } else { "files" };
140 tracing::warn!(
141 "fallow: skipped {count} {noun} over the max file size limit ({examples}). \
142 Raise the limit with --max-file-size <MB> (or FALLOW_MAX_FILE_SIZE), or add them to ignorePatterns."
143 );
144 }
145}
146
147fn build_largest_files_note(root: &Path, files: &[DiscoveredFile]) -> Option<String> {
152 if files.is_empty() {
153 return None;
154 }
155 let largest = files.iter().map(|f| f.size_bytes).max().unwrap_or(0);
156 if files.len() <= LARGE_SET_THRESHOLD && largest < LARGE_FILE_NOTE_BYTES {
157 return None;
158 }
159 let count = files.len();
160 let noun = if count == 1 { "file" } else { "files" };
161 let mut by_size: Vec<SizedFile> = files
162 .iter()
163 .filter(|f| f.size_bytes >= NOTE_FILE_FLOOR_BYTES)
164 .map(|f| (f.path.clone(), f.size_bytes))
165 .collect();
166 by_size.sort_unstable_by_key(|f| std::cmp::Reverse(f.1));
167 if by_size.is_empty() {
168 return Some(format!(
171 "fallow: discovered {count} {noun}. If analysis stalls or runs out of memory, \
172 exclude large generated files via ignorePatterns or --max-file-size."
173 ));
174 }
175 let examples = summarize_examples(root, &by_size);
176 Some(format!(
177 "fallow: discovered {count} {noun}; largest: {examples}. If analysis stalls or runs out of memory, \
178 exclude large generated files via ignorePatterns or --max-file-size."
179 ))
180}
181
182fn note_largest_files(config: &ResolvedConfig, files: &[DiscoveredFile]) {
187 if config.quiet {
188 return;
189 }
190 if let Some(message) = build_largest_files_note(&config.root, files)
191 && should_emit_note_once(format!("note::{}::{}", config.root.display(), files.len()))
192 {
193 tracing::warn!("{message}");
194 }
195}
196
197#[derive(Debug, Clone, PartialEq, Eq)]
199pub struct HiddenDirScope {
200 root: PathBuf,
201 dirs: Vec<String>,
202}
203
204impl HiddenDirScope {
205 pub fn new(root: PathBuf, dirs: Vec<String>) -> Self {
206 Self { root, dirs }
207 }
208
209 fn allows(&self, path: &Path, name: &OsStr) -> bool {
210 path.starts_with(&self.root) && self.dirs.iter().any(|dir| OsStr::new(dir) == name)
211 }
212}
213
214struct FileVisitor<'a> {
216 root: &'a Path,
217 ignore_patterns: &'a globset::GlobSet,
218 production_excludes: &'a Option<globset::GlobSet>,
219 shared: &'a Mutex<Vec<(std::path::PathBuf, u64)>>,
220 local: Vec<(std::path::PathBuf, u64)>,
221}
222
223impl ignore::ParallelVisitor for FileVisitor<'_> {
224 fn visit(&mut self, result: Result<ignore::DirEntry, ignore::Error>) -> ignore::WalkState {
225 let Ok(entry) = result else {
226 return ignore::WalkState::Continue;
227 };
228 if entry.file_type().is_some_and(|ft| ft.is_dir()) {
229 return ignore::WalkState::Continue;
230 }
231 let relative = entry
232 .path()
233 .strip_prefix(self.root)
234 .unwrap_or_else(|_| entry.path());
235 if self.ignore_patterns.is_match(relative) {
236 return ignore::WalkState::Continue;
237 }
238 if self
239 .production_excludes
240 .as_ref()
241 .is_some_and(|excludes| excludes.is_match(relative))
242 {
243 return ignore::WalkState::Continue;
244 }
245 let size_bytes = entry.metadata().map_or(0, |m| m.len());
246 self.local.push((entry.into_path(), size_bytes));
247 ignore::WalkState::Continue
248 }
249}
250
251impl Drop for FileVisitor<'_> {
252 #[expect(
253 clippy::expect_used,
254 reason = "poisoned walk collector lock means worker state is unrecoverable"
255 )]
256 fn drop(&mut self) {
257 if !self.local.is_empty() {
258 self.shared
259 .lock()
260 .expect("walk collector lock poisoned")
261 .append(&mut self.local);
262 }
263 }
264}
265
266struct FileVisitorBuilder<'a> {
268 root: &'a Path,
269 ignore_patterns: &'a globset::GlobSet,
270 production_excludes: &'a Option<globset::GlobSet>,
271 shared: &'a Mutex<Vec<(std::path::PathBuf, u64)>>,
272}
273
274impl<'s> ignore::ParallelVisitorBuilder<'s> for FileVisitorBuilder<'s> {
275 fn build(&mut self) -> Box<dyn ignore::ParallelVisitor + 's> {
276 Box::new(FileVisitor {
277 root: self.root,
278 ignore_patterns: self.ignore_patterns,
279 production_excludes: self.production_excludes,
280 shared: self.shared,
281 local: Vec::new(),
282 })
283 }
284}
285
286pub const SOURCE_EXTENSIONS: &[&str] = &[
287 "ts", "tsx", "mts", "cts", "gts", "js", "jsx", "mjs", "cjs", "gjs", "vue", "svelte", "astro",
288 "mdx", "css", "scss", "html", "graphql", "gql",
289];
290
291pub const PRODUCTION_EXCLUDE_PATTERNS: &[&str] = &[
293 "**/*.test.*",
294 "**/*.spec.*",
295 "**/*.e2e.*",
296 "**/*.e2e-spec.*",
297 "**/*.bench.*",
298 "**/*.fixture.*",
299 "**/*.stories.*",
300 "**/*.story.*",
301 "**/__tests__/**",
302 "**/__mocks__/**",
303 "**/__snapshots__/**",
304 "**/__fixtures__/**",
305 "**/test/**",
306 "**/tests/**",
307 "*.config.*",
308 "**/.*.js",
309 "**/.*.ts",
310 "**/.*.mjs",
311 "**/.*.cjs",
312];
313
314pub fn is_allowed_hidden_dir(name: &OsStr) -> bool {
316 ALLOWED_HIDDEN_DIRS.iter().any(|&d| OsStr::new(d) == name)
317}
318
319fn is_allowed_scoped_hidden_dir(
320 name: &OsStr,
321 path: &Path,
322 additional_hidden_dir_scopes: &[HiddenDirScope],
323) -> bool {
324 additional_hidden_dir_scopes
325 .iter()
326 .any(|scope| scope.allows(path, name))
327}
328
329fn is_allowed_hidden(entry: &ignore::DirEntry) -> bool {
335 is_allowed_hidden_with_scopes(entry, &[])
336}
337
338fn is_allowed_hidden_with_scopes(
339 entry: &ignore::DirEntry,
340 additional_hidden_dir_scopes: &[HiddenDirScope],
341) -> bool {
342 let name = entry.file_name();
343 let name_str = name.to_string_lossy();
344
345 if !name_str.starts_with('.') {
346 return true;
347 }
348
349 if entry.file_type().is_some_and(|ft| !ft.is_dir()) {
350 return true;
351 }
352
353 is_allowed_hidden_dir(name)
354 || is_allowed_scoped_hidden_dir(name, entry.path(), additional_hidden_dir_scopes)
355}
356
357pub fn discover_files(config: &ResolvedConfig) -> Vec<DiscoveredFile> {
363 discover_files_with_additional_hidden_dirs(config, &[])
364}
365
366#[expect(
372 clippy::cast_possible_truncation,
373 reason = "file count is bounded by project size, well under u32::MAX"
374)]
375#[expect(
376 clippy::expect_used,
377 reason = "source file globs are hard-coded and the collector lock must remain usable"
378)]
379pub fn discover_files_with_additional_hidden_dirs(
380 config: &ResolvedConfig,
381 additional_hidden_dir_scopes: &[HiddenDirScope],
382) -> Vec<DiscoveredFile> {
383 let _span = tracing::info_span!("discover_files").entered();
384
385 let mut types_builder = ignore::types::TypesBuilder::new();
386 for ext in SOURCE_EXTENSIONS {
387 types_builder
388 .add("source", &format!("*.{ext}"))
389 .expect("valid glob");
390 }
391 types_builder.select("source");
392 let types = types_builder.build().expect("valid types");
393
394 let mut walk_builder = WalkBuilder::new(&config.root);
395 walk_builder
396 .hidden(false)
397 .git_ignore(true)
398 .git_global(true)
399 .git_exclude(true)
400 .types(types)
401 .threads(config.threads);
402 if additional_hidden_dir_scopes.is_empty() {
403 walk_builder.filter_entry(is_allowed_hidden);
404 } else {
405 let scopes = additional_hidden_dir_scopes.to_vec();
406 walk_builder.filter_entry(move |entry| is_allowed_hidden_with_scopes(entry, &scopes));
407 }
408
409 let production_excludes = if config.production {
410 let mut builder = globset::GlobSetBuilder::new();
411 for pattern in PRODUCTION_EXCLUDE_PATTERNS {
412 if let Ok(glob) = globset::GlobBuilder::new(pattern)
413 .literal_separator(true)
414 .build()
415 {
416 builder.add(glob);
417 }
418 }
419 builder.build().ok()
420 } else {
421 None
422 };
423
424 let collected: Mutex<Vec<(std::path::PathBuf, u64)>> = Mutex::new(Vec::new());
425 let mut visitor_builder = FileVisitorBuilder {
426 root: &config.root,
427 ignore_patterns: &config.ignore_patterns,
428 production_excludes: &production_excludes,
429 shared: &collected,
430 };
431 walk_builder.build_parallel().visit(&mut visitor_builder);
432
433 let mut raw = collected
434 .into_inner()
435 .expect("walk collector lock poisoned");
436 raw.sort_unstable_by(|a, b| a.0.cmp(&b.0));
437
438 fallow_config::clear_source_discovery_diagnostics(&config.root);
442 let (kept, skipped) = partition_by_size(raw, config.max_file_size_bytes);
443 report_skipped_large_files(config, &skipped);
444
445 let files: Vec<DiscoveredFile> = kept
446 .into_iter()
447 .enumerate()
448 .map(|(idx, (path, size_bytes))| DiscoveredFile {
449 id: FileId(idx as u32),
450 path,
451 size_bytes,
452 })
453 .collect();
454
455 note_largest_files(config, &files);
456
457 files
458}
459
460#[cfg(test)]
461mod tests {
462 use std::ffi::OsStr;
463
464 use super::*;
465
466 #[test]
467 fn allowed_hidden_dirs() {
468 assert!(is_allowed_hidden_dir(OsStr::new(".storybook")));
469 assert!(is_allowed_hidden_dir(OsStr::new(".vitepress")));
470 assert!(is_allowed_hidden_dir(OsStr::new(".well-known")));
471 assert!(is_allowed_hidden_dir(OsStr::new(".changeset")));
472 assert!(is_allowed_hidden_dir(OsStr::new(".github")));
473 }
474
475 #[test]
476 fn disallowed_hidden_dirs() {
477 assert!(!is_allowed_hidden_dir(OsStr::new(".git")));
478 assert!(!is_allowed_hidden_dir(OsStr::new(".cache")));
479 assert!(!is_allowed_hidden_dir(OsStr::new(".vscode")));
480 assert!(!is_allowed_hidden_dir(OsStr::new(".fallow")));
481 assert!(!is_allowed_hidden_dir(OsStr::new(".next")));
482 }
483
484 #[test]
485 fn non_hidden_dirs_not_in_allowlist() {
486 assert!(!is_allowed_hidden_dir(OsStr::new("src")));
487 assert!(!is_allowed_hidden_dir(OsStr::new("node_modules")));
488 }
489
490 #[test]
491 fn source_extensions_include_typescript() {
492 assert!(SOURCE_EXTENSIONS.contains(&"ts"));
493 assert!(SOURCE_EXTENSIONS.contains(&"tsx"));
494 assert!(SOURCE_EXTENSIONS.contains(&"mts"));
495 assert!(SOURCE_EXTENSIONS.contains(&"cts"));
496 assert!(SOURCE_EXTENSIONS.contains(&"gts"));
497 }
498
499 #[test]
500 fn source_extensions_include_javascript() {
501 assert!(SOURCE_EXTENSIONS.contains(&"js"));
502 assert!(SOURCE_EXTENSIONS.contains(&"jsx"));
503 assert!(SOURCE_EXTENSIONS.contains(&"mjs"));
504 assert!(SOURCE_EXTENSIONS.contains(&"cjs"));
505 assert!(SOURCE_EXTENSIONS.contains(&"gjs"));
506 }
507
508 #[test]
509 fn source_extensions_include_sfc_formats() {
510 assert!(SOURCE_EXTENSIONS.contains(&"vue"));
511 assert!(SOURCE_EXTENSIONS.contains(&"svelte"));
512 assert!(SOURCE_EXTENSIONS.contains(&"astro"));
513 }
514
515 #[test]
516 fn source_extensions_include_styles() {
517 assert!(SOURCE_EXTENSIONS.contains(&"css"));
518 assert!(SOURCE_EXTENSIONS.contains(&"scss"));
519 }
520
521 #[test]
522 fn source_extensions_exclude_non_source() {
523 assert!(!SOURCE_EXTENSIONS.contains(&"json"));
524 assert!(!SOURCE_EXTENSIONS.contains(&"yaml"));
525 assert!(!SOURCE_EXTENSIONS.contains(&"md"));
526 assert!(!SOURCE_EXTENSIONS.contains(&"png"));
527 assert!(!SOURCE_EXTENSIONS.contains(&"htm"));
528 }
529
530 #[test]
531 fn source_extensions_include_html() {
532 assert!(SOURCE_EXTENSIONS.contains(&"html"));
533 }
534
535 #[test]
536 fn source_extensions_include_graphql_documents() {
537 assert!(SOURCE_EXTENSIONS.contains(&"graphql"));
538 assert!(SOURCE_EXTENSIONS.contains(&"gql"));
539 }
540
541 fn build_production_glob_set() -> globset::GlobSet {
542 let mut builder = globset::GlobSetBuilder::new();
543 for pattern in PRODUCTION_EXCLUDE_PATTERNS {
544 builder.add(
545 globset::GlobBuilder::new(pattern)
546 .literal_separator(true)
547 .build()
548 .expect("valid glob pattern"),
549 );
550 }
551 builder.build().expect("valid glob set")
552 }
553
554 #[test]
555 fn production_excludes_test_files() {
556 let set = build_production_glob_set();
557 assert!(set.is_match("src/Button.test.ts"));
558 assert!(set.is_match("src/utils.spec.tsx"));
559 assert!(set.is_match("src/__tests__/helper.ts"));
560 assert!(!set.is_match("src/Button.ts"));
561 assert!(!set.is_match("src/utils.tsx"));
562 }
563
564 #[test]
565 fn production_excludes_story_files() {
566 let set = build_production_glob_set();
567 assert!(set.is_match("src/Button.stories.tsx"));
568 assert!(set.is_match("src/Card.story.ts"));
569 assert!(!set.is_match("src/Button.tsx"));
570 }
571
572 #[test]
573 fn production_excludes_config_files_at_root_only() {
574 let set = build_production_glob_set();
575 assert!(set.is_match("vitest.config.ts"));
576 assert!(set.is_match("jest.config.js"));
577 assert!(!set.is_match("src/app/app.config.ts"));
578 assert!(!set.is_match("src/app/app.config.server.ts"));
579 assert!(!set.is_match("packages/foo/vitest.config.ts"));
580 assert!(!set.is_match("src/config.ts"));
581 }
582
583 #[test]
584 fn production_patterns_are_valid_globs() {
585 let _ = build_production_glob_set();
586 }
587
588 #[test]
589 fn disallowed_hidden_dirs_idea() {
590 assert!(!is_allowed_hidden_dir(OsStr::new(".idea")));
591 }
592
593 #[test]
594 fn source_extensions_include_mdx() {
595 assert!(SOURCE_EXTENSIONS.contains(&"mdx"));
596 }
597
598 #[test]
599 fn source_extensions_exclude_image_and_data_formats() {
600 assert!(!SOURCE_EXTENSIONS.contains(&"png"));
601 assert!(!SOURCE_EXTENSIONS.contains(&"jpg"));
602 assert!(!SOURCE_EXTENSIONS.contains(&"svg"));
603 assert!(!SOURCE_EXTENSIONS.contains(&"txt"));
604 assert!(!SOURCE_EXTENSIONS.contains(&"csv"));
605 assert!(!SOURCE_EXTENSIONS.contains(&"wasm"));
606 }
607
608 #[test]
609 fn is_declaration_file_matches_dts_variants() {
610 assert!(is_declaration_file(Path::new("env.d.ts")));
611 assert!(is_declaration_file(Path::new("src/auto-imports.d.ts")));
612 assert!(is_declaration_file(Path::new("mod.d.mts")));
613 assert!(is_declaration_file(Path::new("compat.d.cts")));
614 assert!(!is_declaration_file(Path::new("index.ts")));
615 assert!(!is_declaration_file(Path::new("component.tsx")));
616 assert!(!is_declaration_file(Path::new("notes.d.txt")));
617 }
618
619 #[test]
620 fn format_size_mb_renders_one_decimal() {
621 assert_eq!(format_size_mb(5 * 1024 * 1024), "5.0 MB");
622 assert_eq!(format_size_mb(1024 * 1024 + 512 * 1024), "1.5 MB");
623 assert_eq!(format_size_mb(0), "0.0 MB");
624 }
625
626 #[test]
627 fn partition_by_size_no_limit_keeps_all() {
628 let raw = vec![(PathBuf::from("a.ts"), 10), (PathBuf::from("b.ts"), 10_000)];
629 let (kept, skipped) = partition_by_size(raw, None);
630 assert_eq!(kept.len(), 2);
631 assert!(skipped.is_empty());
632 }
633
634 #[test]
635 fn partition_by_size_skips_strictly_over_limit() {
636 let raw = vec![
637 (PathBuf::from("under.ts"), 99),
638 (PathBuf::from("exact.ts"), 100),
639 (PathBuf::from("over.ts"), 101),
640 ];
641 let (kept, skipped) = partition_by_size(raw, Some(100));
642 let kept_has = |name: &str| kept.iter().any(|(p, _)| p.as_path() == Path::new(name));
643 assert!(kept_has("under.ts"));
644 assert!(
645 kept_has("exact.ts"),
646 "a file exactly at the limit is kept (skip is strictly-greater)"
647 );
648 assert_eq!(skipped.len(), 1);
649 assert_eq!(skipped[0].0, PathBuf::from("over.ts"));
650 }
651
652 #[test]
653 fn partition_by_size_exempts_declaration_files() {
654 let raw = vec![
655 (PathBuf::from("huge.ts"), 10_000),
656 (PathBuf::from("auto-imports.d.ts"), 10_000),
657 ];
658 let (kept, skipped) = partition_by_size(raw, Some(100));
659 assert!(
660 kept.iter()
661 .any(|(p, _)| p.as_path() == Path::new("auto-imports.d.ts")),
662 "declaration files are exempt from the size skip regardless of size"
663 );
664 assert_eq!(skipped.len(), 1);
665 assert_eq!(skipped[0].0, PathBuf::from("huge.ts"));
666 }
667
668 fn disco(path: &str, size_bytes: u64) -> DiscoveredFile {
669 DiscoveredFile {
670 id: FileId(0),
671 path: PathBuf::from(path),
672 size_bytes,
673 }
674 }
675
676 #[test]
677 fn largest_files_note_below_threshold_is_none() {
678 let files = [disco("a.ts", 100), disco("b.ts", 200)];
679 assert!(build_largest_files_note(Path::new("/p"), &files).is_none());
680 }
681
682 #[test]
683 fn largest_files_note_single_file_uses_singular() {
684 let files = [disco("big.ts", 5 * 1024 * 1024)];
685 let note = build_largest_files_note(Path::new("/p"), &files).expect("note fires");
686 assert!(
687 note.contains("discovered 1 file;"),
688 "singular noun on the single-big-file path (issue #1086 regression): {note}"
689 );
690 assert!(!note.contains("discovered 1 files"));
691 assert!(note.contains("big.ts (5.0 MB)"));
692 }
693
694 #[test]
695 fn largest_files_note_filters_sub_floor_files() {
696 let files = [disco("big.ts", 5 * 1024 * 1024), disco("tiny.ts", 10)];
697 let note = build_largest_files_note(Path::new("/p"), &files).expect("note fires");
698 assert!(note.contains("discovered 2 files;"));
699 assert!(note.contains("big.ts (5.0 MB)"));
700 assert!(
701 !note.contains("tiny.ts"),
702 "sub-floor files are not listed as `0.0 MB` chaff: {note}"
703 );
704 }
705
706 #[test]
707 fn largest_files_note_large_set_no_big_file_omits_list() {
708 let files: Vec<DiscoveredFile> = (0..=LARGE_SET_THRESHOLD)
709 .map(|i| disco(&format!("f{i}.ts"), 100))
710 .collect();
711 let note = build_largest_files_note(Path::new("/p"), &files).expect("large set fires");
712 assert!(note.contains(&format!("discovered {} files", LARGE_SET_THRESHOLD + 1)));
713 assert!(
714 !note.contains("largest:"),
715 "no sub-floor `largest:` list when no file clears the floor: {note}"
716 );
717 }
718
719 mod discover_files_integration {
720 use std::path::PathBuf;
721
722 use fallow_config::{
723 DuplicatesConfig, FallowConfig, FlagsConfig, HealthConfig, OutputFormat, ResolveConfig,
724 RulesConfig,
725 };
726
727 use super::*;
728
729 fn make_config(root: PathBuf, production: bool) -> ResolvedConfig {
731 FallowConfig {
732 production: production.into(),
733 ..Default::default()
734 }
735 .resolve(root, OutputFormat::Human, 1, true, true, None)
736 }
737
738 fn file_names(files: &[DiscoveredFile], root: &std::path::Path) -> Vec<String> {
741 files
742 .iter()
743 .map(|f| {
744 f.path
745 .strip_prefix(root)
746 .unwrap_or(&f.path)
747 .to_string_lossy()
748 .replace('\\', "/")
749 })
750 .collect()
751 }
752
753 #[test]
754 fn discovers_source_files_with_valid_extensions() {
755 let dir = tempfile::tempdir().expect("create temp dir");
756 let src = dir.path().join("src");
757 std::fs::create_dir_all(&src).unwrap();
758
759 std::fs::write(src.join("app.ts"), "export const a = 1;").unwrap();
760 std::fs::write(src.join("component.tsx"), "export default () => {};").unwrap();
761 std::fs::write(src.join("utils.js"), "module.exports = {};").unwrap();
762 std::fs::write(src.join("helper.jsx"), "export const h = 1;").unwrap();
763 std::fs::write(src.join("config.mjs"), "export default {};").unwrap();
764 std::fs::write(src.join("legacy.cjs"), "module.exports = {};").unwrap();
765 std::fs::write(src.join("types.mts"), "export type T = string;").unwrap();
766 std::fs::write(src.join("compat.cts"), "module.exports = {};").unwrap();
767
768 let config = make_config(dir.path().to_path_buf(), false);
769 let files = discover_files(&config);
770 let names = file_names(&files, dir.path());
771
772 assert!(names.contains(&"src/app.ts".to_string()));
773 assert!(names.contains(&"src/component.tsx".to_string()));
774 assert!(names.contains(&"src/utils.js".to_string()));
775 assert!(names.contains(&"src/helper.jsx".to_string()));
776 assert!(names.contains(&"src/config.mjs".to_string()));
777 assert!(names.contains(&"src/legacy.cjs".to_string()));
778 assert!(names.contains(&"src/types.mts".to_string()));
779 assert!(names.contains(&"src/compat.cts".to_string()));
780 }
781
782 #[test]
783 fn excludes_non_source_extensions() {
784 let dir = tempfile::tempdir().expect("create temp dir");
785 let src = dir.path().join("src");
786 std::fs::create_dir_all(&src).unwrap();
787
788 std::fs::write(src.join("app.ts"), "export const a = 1;").unwrap();
789
790 std::fs::write(src.join("data.json"), "{}").unwrap();
791 std::fs::write(src.join("readme.md"), "# Hello").unwrap();
792 std::fs::write(src.join("notes.txt"), "notes").unwrap();
793 std::fs::write(src.join("logo.png"), [0u8; 8]).unwrap();
794
795 let config = make_config(dir.path().to_path_buf(), false);
796 let files = discover_files(&config);
797 let names = file_names(&files, dir.path());
798
799 assert_eq!(names.len(), 1, "only the .ts file should be discovered");
800 assert!(names.contains(&"src/app.ts".to_string()));
801 }
802
803 #[test]
804 fn excludes_disallowed_hidden_directories() {
805 let dir = tempfile::tempdir().expect("create temp dir");
806
807 let git_dir = dir.path().join(".git");
808 std::fs::create_dir_all(&git_dir).unwrap();
809 std::fs::write(git_dir.join("hooks.ts"), "// git hook").unwrap();
810
811 let idea_dir = dir.path().join(".idea");
812 std::fs::create_dir_all(&idea_dir).unwrap();
813 std::fs::write(idea_dir.join("workspace.ts"), "// idea").unwrap();
814
815 let cache_dir = dir.path().join(".cache");
816 std::fs::create_dir_all(&cache_dir).unwrap();
817 std::fs::write(cache_dir.join("cached.js"), "// cached").unwrap();
818
819 let src = dir.path().join("src");
820 std::fs::create_dir_all(&src).unwrap();
821 std::fs::write(src.join("app.ts"), "export const a = 1;").unwrap();
822
823 let config = make_config(dir.path().to_path_buf(), false);
824 let files = discover_files(&config);
825 let names = file_names(&files, dir.path());
826
827 assert_eq!(names.len(), 1, "only src/app.ts should be discovered");
828 assert!(names.contains(&"src/app.ts".to_string()));
829 }
830
831 #[test]
832 fn includes_allowed_hidden_directories() {
833 let dir = tempfile::tempdir().expect("create temp dir");
834
835 let storybook = dir.path().join(".storybook");
836 std::fs::create_dir_all(&storybook).unwrap();
837 std::fs::write(storybook.join("main.ts"), "export default {};").unwrap();
838
839 let github = dir.path().join(".github");
840 std::fs::create_dir_all(&github).unwrap();
841 std::fs::write(github.join("actions.js"), "module.exports = {};").unwrap();
842
843 let changeset = dir.path().join(".changeset");
844 std::fs::create_dir_all(&changeset).unwrap();
845 std::fs::write(changeset.join("config.js"), "module.exports = {};").unwrap();
846
847 let config = make_config(dir.path().to_path_buf(), false);
848 let files = discover_files(&config);
849 let names = file_names(&files, dir.path());
850
851 assert!(
852 names.contains(&".storybook/main.ts".to_string()),
853 "files in .storybook should be discovered"
854 );
855 assert!(
856 names.contains(&".github/actions.js".to_string()),
857 "files in .github should be discovered"
858 );
859 assert!(
860 names.contains(&".changeset/config.js".to_string()),
861 "files in .changeset should be discovered"
862 );
863 }
864
865 #[test]
866 fn default_discovery_excludes_client_and_server_hidden_directories() {
867 let dir = tempfile::tempdir().expect("create temp dir");
868 let app = dir.path().join("app");
869 std::fs::create_dir_all(app.join(".client")).unwrap();
870 std::fs::create_dir_all(app.join(".server")).unwrap();
871 std::fs::write(app.join(".client/analytics.ts"), "export const a = 1;").unwrap();
872 std::fs::write(app.join(".server/db.ts"), "export const db = {};").unwrap();
873 std::fs::write(app.join("root.tsx"), "export default function Root() {}").unwrap();
874
875 let config = make_config(dir.path().to_path_buf(), false);
876 let files = discover_files(&config);
877 let names = file_names(&files, dir.path());
878
879 assert!(names.contains(&"app/root.tsx".to_string()));
880 assert!(!names.contains(&"app/.client/analytics.ts".to_string()));
881 assert!(!names.contains(&"app/.server/db.ts".to_string()));
882 }
883
884 #[test]
885 fn scoped_hidden_dirs_include_client_and_server_under_package_root() {
886 let dir = tempfile::tempdir().expect("create temp dir");
887 let package = dir.path().join("packages/app");
888 std::fs::create_dir_all(package.join("app/.client")).unwrap();
889 std::fs::create_dir_all(package.join("app/.server")).unwrap();
890 std::fs::write(
891 package.join("app/.client/analytics.ts"),
892 "export const track = () => {};",
893 )
894 .unwrap();
895 std::fs::write(package.join("app/.server/db.ts"), "export const db = {};").unwrap();
896
897 let config = make_config(dir.path().to_path_buf(), false);
898 let scopes = [HiddenDirScope::new(
899 package,
900 vec![".client".to_string(), ".server".to_string()],
901 )];
902 let files = discover_files_with_additional_hidden_dirs(&config, &scopes);
903 let names = file_names(&files, dir.path());
904
905 assert!(names.contains(&"packages/app/app/.client/analytics.ts".to_string()));
906 assert!(names.contains(&"packages/app/app/.server/db.ts".to_string()));
907 }
908
909 #[test]
910 fn scoped_hidden_dirs_do_not_include_unscoped_packages() {
911 let dir = tempfile::tempdir().expect("create temp dir");
912 let active = dir.path().join("packages/active");
913 let inactive = dir.path().join("packages/inactive");
914 std::fs::create_dir_all(active.join("app/.server")).unwrap();
915 std::fs::create_dir_all(inactive.join("app/.server")).unwrap();
916 std::fs::write(active.join("app/.server/db.ts"), "export const db = {};").unwrap();
917 std::fs::write(inactive.join("app/.server/db.ts"), "export const db = {};").unwrap();
918
919 let config = make_config(dir.path().to_path_buf(), false);
920 let scopes = [HiddenDirScope::new(active, vec![".server".to_string()])];
921 let files = discover_files_with_additional_hidden_dirs(&config, &scopes);
922 let names = file_names(&files, dir.path());
923
924 assert!(names.contains(&"packages/active/app/.server/db.ts".to_string()));
925 assert!(!names.contains(&"packages/inactive/app/.server/db.ts".to_string()));
926 }
927
928 #[test]
929 fn excludes_root_build_directory() {
930 let dir = tempfile::tempdir().expect("create temp dir");
931
932 std::fs::write(dir.path().join(".ignore"), "/build/\n").unwrap();
933
934 let build_dir = dir.path().join("build");
935 std::fs::create_dir_all(&build_dir).unwrap();
936 std::fs::write(build_dir.join("output.js"), "// build output").unwrap();
937
938 let src = dir.path().join("src");
939 std::fs::create_dir_all(&src).unwrap();
940 std::fs::write(src.join("app.ts"), "export const a = 1;").unwrap();
941
942 let config = make_config(dir.path().to_path_buf(), false);
943 let files = discover_files(&config);
944 let names = file_names(&files, dir.path());
945
946 assert_eq!(names.len(), 1, "root build/ should be excluded via .ignore");
947 assert!(names.contains(&"src/app.ts".to_string()));
948 }
949
950 #[test]
951 fn includes_nested_build_directory() {
952 let dir = tempfile::tempdir().expect("create temp dir");
953
954 let nested_build = dir.path().join("src").join("build");
955 std::fs::create_dir_all(&nested_build).unwrap();
956 std::fs::write(nested_build.join("helper.ts"), "export const h = 1;").unwrap();
957
958 let config = make_config(dir.path().to_path_buf(), false);
959 let files = discover_files(&config);
960 let names = file_names(&files, dir.path());
961
962 assert!(
963 names.contains(&"src/build/helper.ts".to_string()),
964 "nested build/ directories should be included"
965 );
966 }
967
968 #[test]
969 #[expect(
970 clippy::cast_possible_truncation,
971 reason = "test file counts are trivially small"
972 )]
973 fn file_ids_are_sequential_after_sorting() {
974 let dir = tempfile::tempdir().expect("create temp dir");
975 let src = dir.path().join("src");
976 std::fs::create_dir_all(&src).unwrap();
977
978 std::fs::write(src.join("z_last.ts"), "export const z = 1;").unwrap();
979 std::fs::write(src.join("a_first.ts"), "export const a = 1;").unwrap();
980 std::fs::write(src.join("m_middle.ts"), "export const m = 1;").unwrap();
981
982 let config = make_config(dir.path().to_path_buf(), false);
983 let files = discover_files(&config);
984
985 for (idx, file) in files.iter().enumerate() {
986 assert_eq!(file.id, FileId(idx as u32), "FileId should be sequential");
987 }
988
989 for pair in files.windows(2) {
990 assert!(
991 pair[0].path < pair[1].path,
992 "files should be sorted by path"
993 );
994 }
995 }
996
997 #[test]
998 fn production_mode_excludes_test_files() {
999 let dir = tempfile::tempdir().expect("create temp dir");
1000 let src = dir.path().join("src");
1001 std::fs::create_dir_all(&src).unwrap();
1002
1003 std::fs::write(src.join("app.ts"), "export const a = 1;").unwrap();
1004 std::fs::write(src.join("app.test.ts"), "test('a', () => {});").unwrap();
1005 std::fs::write(src.join("app.spec.ts"), "describe('a', () => {});").unwrap();
1006 std::fs::write(src.join("app.stories.tsx"), "export default {};").unwrap();
1007
1008 let config = make_config(dir.path().to_path_buf(), true);
1009 let files = discover_files(&config);
1010 let names = file_names(&files, dir.path());
1011
1012 assert!(
1013 names.contains(&"src/app.ts".to_string()),
1014 "source files should be included in production mode"
1015 );
1016 assert!(
1017 !names.contains(&"src/app.test.ts".to_string()),
1018 "test files should be excluded in production mode"
1019 );
1020 assert!(
1021 !names.contains(&"src/app.spec.ts".to_string()),
1022 "spec files should be excluded in production mode"
1023 );
1024 assert!(
1025 !names.contains(&"src/app.stories.tsx".to_string()),
1026 "story files should be excluded in production mode"
1027 );
1028 }
1029
1030 #[test]
1031 fn non_production_mode_includes_test_files() {
1032 let dir = tempfile::tempdir().expect("create temp dir");
1033 let src = dir.path().join("src");
1034 std::fs::create_dir_all(&src).unwrap();
1035
1036 std::fs::write(src.join("app.ts"), "export const a = 1;").unwrap();
1037 std::fs::write(src.join("app.test.ts"), "test('a', () => {});").unwrap();
1038
1039 let config = make_config(dir.path().to_path_buf(), false);
1040 let files = discover_files(&config);
1041 let names = file_names(&files, dir.path());
1042
1043 assert!(names.contains(&"src/app.ts".to_string()));
1044 assert!(
1045 names.contains(&"src/app.test.ts".to_string()),
1046 "test files should be included in non-production mode"
1047 );
1048 }
1049
1050 #[test]
1051 fn empty_directory_returns_no_files() {
1052 let dir = tempfile::tempdir().expect("create temp dir");
1053 let config = make_config(dir.path().to_path_buf(), false);
1054 let files = discover_files(&config);
1055 assert!(files.is_empty(), "empty project should discover no files");
1056 }
1057
1058 #[test]
1059 fn hidden_files_not_discovered_as_source() {
1060 let dir = tempfile::tempdir().expect("create temp dir");
1061
1062 std::fs::write(dir.path().join(".env"), "SECRET=abc").unwrap();
1063 std::fs::write(dir.path().join(".gitignore"), "node_modules").unwrap();
1064 std::fs::write(dir.path().join(".eslintrc.js"), "module.exports = {};").unwrap();
1065
1066 let src = dir.path().join("src");
1067 std::fs::create_dir_all(&src).unwrap();
1068 std::fs::write(src.join("app.ts"), "export const a = 1;").unwrap();
1069
1070 let config = make_config(dir.path().to_path_buf(), false);
1071 let files = discover_files(&config);
1072 let names = file_names(&files, dir.path());
1073
1074 assert!(
1075 !names.contains(&".env".to_string()),
1076 ".env should not be discovered"
1077 );
1078 assert!(
1079 !names.contains(&".gitignore".to_string()),
1080 ".gitignore should not be discovered"
1081 );
1082 }
1083
1084 fn make_config_with_ignores(root: PathBuf, ignores: Vec<String>) -> ResolvedConfig {
1086 FallowConfig {
1087 schema: None,
1088 extends: vec![],
1089 entry: vec![],
1090 ignore_patterns: ignores,
1091 framework: vec![],
1092 workspaces: None,
1093 ignore_dependencies: vec![],
1094 ignore_unresolved_imports: vec![],
1095 ignore_exports: vec![],
1096 ignore_catalog_references: vec![],
1097 ignore_dependency_overrides: vec![],
1098 ignore_exports_used_in_file: fallow_config::IgnoreExportsUsedInFileConfig::default(
1099 ),
1100 used_class_members: vec![],
1101 ignore_decorators: vec![],
1102 duplicates: DuplicatesConfig::default(),
1103 health: HealthConfig::default(),
1104 rules: RulesConfig::default(),
1105 boundaries: fallow_config::BoundaryConfig::default(),
1106 production: false.into(),
1107 plugins: vec![],
1108 dynamically_loaded: vec![],
1109 overrides: vec![],
1110 regression: None,
1111 audit: fallow_config::AuditConfig::default(),
1112 codeowners: None,
1113 public_packages: vec![],
1114 flags: FlagsConfig::default(),
1115 security: fallow_config::SecurityConfig::default(),
1116 fix: fallow_config::FixConfig::default(),
1117 resolve: ResolveConfig::default(),
1118 sealed: false,
1119 include_entry_exports: false,
1120 auto_imports: false,
1121 cache: fallow_config::CacheConfig::default(),
1122 }
1123 .resolve(root, OutputFormat::Human, 1, true, true, None)
1124 }
1125
1126 #[test]
1127 fn custom_ignore_patterns_exclude_matching_files() {
1128 let dir = tempfile::tempdir().expect("create temp dir");
1129
1130 let generated = dir.path().join("src").join("api").join("generated");
1131 std::fs::create_dir_all(&generated).unwrap();
1132 std::fs::write(generated.join("client.ts"), "export const api = {};").unwrap();
1133
1134 let client = dir.path().join("src").join("api").join("client");
1135 std::fs::create_dir_all(&client).unwrap();
1136 std::fs::write(client.join("fetch.ts"), "export const fetch = {};").unwrap();
1137
1138 let src = dir.path().join("src");
1139 std::fs::write(src.join("index.ts"), "export const x = 1;").unwrap();
1140
1141 let config = make_config_with_ignores(
1142 dir.path().to_path_buf(),
1143 vec![
1144 "src/api/generated/**".to_string(),
1145 "src/api/client/**".to_string(),
1146 ],
1147 );
1148 let files = discover_files(&config);
1149 let names = file_names(&files, dir.path());
1150
1151 assert_eq!(names.len(), 1, "only non-ignored files: {names:?}");
1152 assert!(names.contains(&"src/index.ts".to_string()));
1153 }
1154
1155 #[test]
1156 fn default_ignore_patterns_exclude_node_modules_and_dist() {
1157 let dir = tempfile::tempdir().expect("create temp dir");
1158
1159 let nm = dir.path().join("node_modules").join("lodash");
1160 std::fs::create_dir_all(&nm).unwrap();
1161 std::fs::write(nm.join("lodash.js"), "module.exports = {};").unwrap();
1162
1163 let dist = dir.path().join("dist");
1164 std::fs::create_dir_all(&dist).unwrap();
1165 std::fs::write(dist.join("bundle.js"), "// bundled").unwrap();
1166
1167 let src = dir.path().join("src");
1168 std::fs::create_dir_all(&src).unwrap();
1169 std::fs::write(src.join("index.ts"), "export const x = 1;").unwrap();
1170
1171 let config = make_config(dir.path().to_path_buf(), false);
1172 let files = discover_files(&config);
1173 let names = file_names(&files, dir.path());
1174
1175 assert_eq!(names.len(), 1);
1176 assert!(names.contains(&"src/index.ts".to_string()));
1177 }
1178
1179 #[test]
1180 fn default_ignore_patterns_exclude_root_build() {
1181 let dir = tempfile::tempdir().expect("create temp dir");
1182
1183 let build = dir.path().join("build");
1184 std::fs::create_dir_all(&build).unwrap();
1185 std::fs::write(build.join("output.js"), "// built").unwrap();
1186
1187 let nested_build = dir.path().join("src").join("build");
1188 std::fs::create_dir_all(&nested_build).unwrap();
1189 std::fs::write(nested_build.join("helper.ts"), "export const h = 1;").unwrap();
1190
1191 let src = dir.path().join("src");
1192 std::fs::write(src.join("index.ts"), "export const x = 1;").unwrap();
1193
1194 let config = make_config(dir.path().to_path_buf(), false);
1195 let files = discover_files(&config);
1196 let names = file_names(&files, dir.path());
1197
1198 assert_eq!(
1199 names.len(),
1200 2,
1201 "root build/ excluded, nested kept: {names:?}"
1202 );
1203 assert!(names.contains(&"src/index.ts".to_string()));
1204 assert!(names.contains(&"src/build/helper.ts".to_string()));
1205 }
1206
1207 fn make_config_with_max_file_size(
1209 root: PathBuf,
1210 max_file_size_bytes: Option<u64>,
1211 ) -> ResolvedConfig {
1212 let mut config = make_config(root, false);
1213 config.max_file_size_bytes = max_file_size_bytes;
1214 config
1215 }
1216
1217 #[test]
1218 fn skips_files_over_max_file_size() {
1219 let dir = tempfile::tempdir().expect("create temp dir");
1220 let src = dir.path().join("src");
1221 std::fs::create_dir_all(&src).unwrap();
1222 std::fs::write(src.join("small.ts"), "export const a = 1;").unwrap();
1223 std::fs::write(src.join("huge.ts"), "x".repeat(5_000)).unwrap();
1224
1225 let config = make_config_with_max_file_size(dir.path().to_path_buf(), Some(1_000));
1226 let files = discover_files(&config);
1227 let names = file_names(&files, dir.path());
1228
1229 assert!(names.contains(&"src/small.ts".to_string()));
1230 assert!(
1231 !names.contains(&"src/huge.ts".to_string()),
1232 "a file over the size limit must not be discovered"
1233 );
1234 }
1235
1236 #[test]
1237 fn declaration_files_exempt_from_size_skip() {
1238 let dir = tempfile::tempdir().expect("create temp dir");
1239 let src = dir.path().join("src");
1240 std::fs::create_dir_all(&src).unwrap();
1241 std::fs::write(src.join("auto-imports.d.ts"), "x".repeat(5_000)).unwrap();
1242 std::fs::write(src.join("huge.ts"), "x".repeat(5_000)).unwrap();
1243
1244 let config = make_config_with_max_file_size(dir.path().to_path_buf(), Some(1_000));
1245 let files = discover_files(&config);
1246 let names = file_names(&files, dir.path());
1247
1248 assert!(
1249 names.contains(&"src/auto-imports.d.ts".to_string()),
1250 "a large .d.ts is exempt from the skip (reachability root for global types)"
1251 );
1252 assert!(!names.contains(&"src/huge.ts".to_string()));
1253 }
1254
1255 #[test]
1256 fn unlimited_size_keeps_large_files() {
1257 let dir = tempfile::tempdir().expect("create temp dir");
1258 let src = dir.path().join("src");
1259 std::fs::create_dir_all(&src).unwrap();
1260 std::fs::write(src.join("huge.ts"), "x".repeat(5_000)).unwrap();
1261
1262 let config = make_config_with_max_file_size(dir.path().to_path_buf(), None);
1263 let files = discover_files(&config);
1264 let names = file_names(&files, dir.path());
1265
1266 assert!(
1267 names.contains(&"src/huge.ts".to_string()),
1268 "no limit keeps every file"
1269 );
1270 }
1271
1272 #[test]
1273 fn skipped_file_recorded_in_workspace_diagnostics() {
1274 let dir = tempfile::tempdir().expect("create temp dir");
1275 let src = dir.path().join("src");
1276 std::fs::create_dir_all(&src).unwrap();
1277 std::fs::write(src.join("huge.ts"), "x".repeat(5_000)).unwrap();
1278
1279 let config = make_config_with_max_file_size(dir.path().to_path_buf(), Some(1_000));
1280 let _ = discover_files(&config);
1281
1282 let diagnostics = fallow_config::workspace_diagnostics_for(dir.path());
1283 let skipped: Vec<_> = diagnostics
1284 .iter()
1285 .filter(|d| {
1286 matches!(
1287 d.kind,
1288 fallow_config::WorkspaceDiagnosticKind::SkippedLargeFile { .. }
1289 )
1290 })
1291 .collect();
1292 assert_eq!(
1293 skipped.len(),
1294 1,
1295 "the skipped file is recorded in workspace diagnostics for JSON output"
1296 );
1297 assert!(skipped[0].path.ends_with("src/huge.ts"));
1298 assert!(
1299 matches!(
1300 skipped[0].kind,
1301 fallow_config::WorkspaceDiagnosticKind::SkippedLargeFile { size_bytes }
1302 if size_bytes == 5_000
1303 ),
1304 "the recorded diagnostic carries the on-disk byte size"
1305 );
1306 }
1307 }
1308}