Skip to main content

fallow_core/discover/
walk.rs

1use std::ffi::OsStr;
2use std::path::{Path, PathBuf};
3use std::sync::{Mutex, OnceLock};
4
5use fallow_config::{ResolvedConfig, WorkspaceDiagnostic, WorkspaceDiagnosticKind};
6use fallow_types::discover::{DiscoveredFile, FileId};
7use ignore::WalkBuilder;
8use rustc_hash::FxHashSet;
9
10use super::ALLOWED_HIDDEN_DIRS;
11
12/// Process-wide dedupe of the size-skip / largest-files stderr notes, keyed by a
13/// content-derived string, so combined-mode (`fallow` runs check + dupes +
14/// health, each of which can trigger a source walk) emits each note at most once
15/// per distinct content. Mirrors the workspace-diagnostics `should_emit`
16/// pattern (issue #1086).
17fn should_emit_note_once(key: String) -> bool {
18    static EMITTED: OnceLock<Mutex<FxHashSet<String>>> = OnceLock::new();
19    EMITTED
20        .get_or_init(|| Mutex::new(FxHashSet::default()))
21        .lock()
22        .map_or(true, |mut set| set.insert(key))
23}
24
25/// A discovered file path paired with its on-disk size in bytes, as collected
26/// by the parallel walker before [`DiscoveredFile`] ids are assigned.
27type SizedFile = (PathBuf, u64);
28
29/// Number of example file paths named in the aggregated skipped-large-file and
30/// largest-files stderr notes before the tail collapses to "and N more". Keeps
31/// the notes to one bounded line on a monorepo that skips many files.
32const NOTE_EXAMPLE_CAP: usize = 5;
33
34/// Discovered-file-count threshold above which the pre-parse largest-files note
35/// fires, so an out-of-memory hang at the parse stage has a visible suspect
36/// list (issue #1086).
37const LARGE_SET_THRESHOLD: usize = 20_000;
38
39/// Single-file byte threshold above which the pre-parse largest-files note
40/// fires even on a small project. Set just under the default 5 MB skip so the
41/// note fires for kept files that are approaching the skip limit (the genuine
42/// out-of-memory suspects), not for ordinary large-but-benign files.
43const LARGE_FILE_NOTE_BYTES: u64 = 4 * 1024 * 1024;
44
45/// Minimum size for a file to appear in the largest-files note. Filters out the
46/// `0.0 MB` entries that would otherwise pad the list once it fires, keeping the
47/// named files to plausible memory contributors.
48const NOTE_FILE_FLOOR_BYTES: u64 = 256 * 1024;
49
50/// Minimum size for content-shape based minified-bundle skipping. Smaller
51/// one-line files can be hand-written utilities, while multi-MB one-line JS is
52/// generated output in practice.
53const MINIFIED_FILE_SKIP_BYTES: u64 = 1024 * 1024;
54
55/// Number of bytes inspected when deciding whether a large JS file is minified.
56const MINIFIED_SAMPLE_BYTES: usize = 256 * 1024;
57
58/// A single line this long in a multi-MB JS file is treated as generated
59/// minified output. This avoids parsing assets that can expand to huge ASTs.
60const MINIFIED_LONG_LINE_BYTES: usize = 128 * 1024;
61
62/// Whether a path is a TypeScript declaration file (`.d.ts`/`.d.mts`/`.d.cts`).
63/// Declaration files are exempt from the per-file size skip because they are
64/// reachability roots for global types: skipping a large `auto-imports.d.ts`
65/// would false-flag the files whose types it provides.
66fn is_declaration_file(path: &Path) -> bool {
67    let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
68    name.ends_with(".d.ts") || name.ends_with(".d.mts") || name.ends_with(".d.cts")
69}
70
71fn is_plain_js_file(path: &Path) -> bool {
72    matches!(
73        path.extension().and_then(|ext| ext.to_str()),
74        Some("js" | "mjs" | "cjs")
75    )
76}
77
78fn has_minified_line_shape(path: &Path) -> bool {
79    use std::io::Read;
80
81    let Ok(mut file) = std::fs::File::open(path) else {
82        return false;
83    };
84    let mut sample = vec![0; MINIFIED_SAMPLE_BYTES];
85    let Ok(len) = file.read(&mut sample) else {
86        return false;
87    };
88    sample.truncate(len);
89    if sample.is_empty() {
90        return false;
91    }
92
93    let mut current_line = 0usize;
94    for byte in sample {
95        if byte == b'\n' || byte == b'\r' {
96            current_line = 0;
97            continue;
98        }
99        current_line += 1;
100        if current_line >= MINIFIED_LONG_LINE_BYTES {
101            return true;
102        }
103    }
104    false
105}
106
107fn is_probably_minified_generated_js(path: &Path, size_bytes: u64) -> bool {
108    size_bytes >= MINIFIED_FILE_SKIP_BYTES
109        && is_plain_js_file(path)
110        && !is_declaration_file(path)
111        && has_minified_line_shape(path)
112}
113
114/// Render a byte count as a megabyte figure with one decimal place.
115fn format_size_mb(bytes: u64) -> String {
116    #[expect(
117        clippy::cast_precision_loss,
118        reason = "display-only size figure; precision loss past 2^53 bytes is irrelevant"
119    )]
120    let mb = bytes as f64 / (1024.0 * 1024.0);
121    format!("{mb:.1} MB")
122}
123
124/// Join up to [`NOTE_EXAMPLE_CAP`] `path (size)` examples (already ordered) into
125/// one comma-separated string, collapsing the tail to "and N more".
126fn summarize_examples(root: &Path, examples: &[SizedFile]) -> String {
127    let shown: Vec<String> = examples
128        .iter()
129        .take(NOTE_EXAMPLE_CAP)
130        .map(|(path, size)| {
131            let display = path
132                .strip_prefix(root)
133                .unwrap_or(path)
134                .display()
135                .to_string()
136                .replace('\\', "/");
137            format!("{display} ({})", format_size_mb(*size))
138        })
139        .collect();
140    let remaining = examples.len().saturating_sub(NOTE_EXAMPLE_CAP);
141    if remaining > 0 {
142        format!("{}, and {remaining} more", shown.join(", "))
143    } else {
144        shown.join(", ")
145    }
146}
147
148/// Split discovered `(path, size)` pairs into the kept set and the set skipped
149/// for exceeding `max_file_size_bytes`. Declaration files are never skipped.
150fn partition_by_size(
151    raw: Vec<SizedFile>,
152    max_file_size_bytes: Option<u64>,
153) -> (Vec<SizedFile>, Vec<SizedFile>) {
154    let Some(limit) = max_file_size_bytes else {
155        return (raw, Vec::new());
156    };
157    raw.into_iter()
158        .partition(|(path, size)| *size <= limit || is_declaration_file(path))
159}
160
161/// Split discovered `(path, size)` pairs into files kept for parsing and files
162/// skipped because they look like generated minified JavaScript.
163fn partition_minified_generated_js(
164    raw: Vec<SizedFile>,
165    max_file_size_bytes: Option<u64>,
166) -> (Vec<SizedFile>, Vec<SizedFile>) {
167    if max_file_size_bytes.is_none() {
168        return (raw, Vec::new());
169    }
170    raw.into_iter()
171        .partition(|(path, size)| !is_probably_minified_generated_js(path, *size))
172}
173
174/// Record the skipped files in the workspace-diagnostics registry (so they
175/// surface in `workspace_diagnostics[]` JSON) and emit one aggregated
176/// `tracing::warn!` so a human running `fallow` sees what was dropped. Mirrors
177/// the JSON-plus-gated-warn pattern used for undeclared workspaces.
178fn report_skipped_large_files(config: &ResolvedConfig, skipped: &[SizedFile]) {
179    if skipped.is_empty() {
180        return;
181    }
182    let diagnostics: Vec<WorkspaceDiagnostic> = skipped
183        .iter()
184        .map(|(path, size_bytes)| {
185            WorkspaceDiagnostic::new(
186                &config.root,
187                path.clone(),
188                WorkspaceDiagnosticKind::SkippedLargeFile {
189                    size_bytes: *size_bytes,
190                },
191            )
192        })
193        .collect();
194    fallow_config::append_workspace_diagnostics(&config.root, diagnostics);
195
196    let mut sorted: Vec<SizedFile> = skipped.to_vec();
197    sorted.sort_unstable_by_key(|f| std::cmp::Reverse(f.1));
198    let count = skipped.len();
199    if !config.quiet
200        && should_emit_note_once(format!(
201            "skip::{}::{count}::{}",
202            config.root.display(),
203            sorted.first().map_or(0, |f| f.1)
204        ))
205    {
206        let examples = summarize_examples(&config.root, &sorted);
207        let noun = if count == 1 { "file" } else { "files" };
208        tracing::warn!(
209            "fallow: skipped {count} {noun} over the max file size limit ({examples}). \
210             Raise the limit with --max-file-size <MB> (or FALLOW_MAX_FILE_SIZE), or add them to ignorePatterns."
211        );
212    }
213}
214
215/// Record generated minified JS files skipped before parsing.
216fn report_skipped_minified_files(config: &ResolvedConfig, skipped: &[SizedFile]) {
217    if skipped.is_empty() {
218        return;
219    }
220    let diagnostics: Vec<WorkspaceDiagnostic> = skipped
221        .iter()
222        .map(|(path, size_bytes)| {
223            WorkspaceDiagnostic::new(
224                &config.root,
225                path.clone(),
226                WorkspaceDiagnosticKind::SkippedMinifiedFile {
227                    size_bytes: *size_bytes,
228                },
229            )
230        })
231        .collect();
232    fallow_config::append_workspace_diagnostics(&config.root, diagnostics);
233
234    let mut sorted: Vec<SizedFile> = skipped.to_vec();
235    sorted.sort_unstable_by_key(|f| std::cmp::Reverse(f.1));
236    let count = skipped.len();
237    if !config.quiet
238        && should_emit_note_once(format!(
239            "minified::{}::{count}::{}",
240            config.root.display(),
241            sorted.first().map_or(0, |f| f.1)
242        ))
243    {
244        let examples = summarize_examples(&config.root, &sorted);
245        let noun = if count == 1 { "file" } else { "files" };
246        let pronoun = if count == 1 { "it" } else { "them" };
247        tracing::warn!(
248            "fallow: skipped {count} minified generated JS {noun} ({examples}). \
249             Add {pronoun} to ignorePatterns, rename {pronoun} with a .min.js suffix, or use --max-file-size 0 to analyze {pronoun}."
250        );
251    }
252}
253
254/// Build the pre-parse largest-files note, or `None` when the discovered set is
255/// neither unusually large nor contains an unusually large file. Pure so the
256/// pluralization, floor filtering, and count-only fallback are unit-testable
257/// without a tracing subscriber. See issue #1086.
258fn build_largest_files_note(root: &Path, files: &[DiscoveredFile]) -> Option<String> {
259    if files.is_empty() {
260        return None;
261    }
262    let largest = files.iter().map(|f| f.size_bytes).max().unwrap_or(0);
263    if files.len() <= LARGE_SET_THRESHOLD && largest < LARGE_FILE_NOTE_BYTES {
264        return None;
265    }
266    let count = files.len();
267    let noun = if count == 1 { "file" } else { "files" };
268    let mut by_size: Vec<SizedFile> = files
269        .iter()
270        .filter(|f| f.size_bytes >= NOTE_FILE_FLOOR_BYTES)
271        .map(|f| (f.path.clone(), f.size_bytes))
272        .collect();
273    by_size.sort_unstable_by_key(|f| std::cmp::Reverse(f.1));
274    if by_size.is_empty() {
275        // Large file SET with no individually large file: report the count only,
276        // omitting a "largest:" list that would otherwise be all sub-floor noise.
277        return Some(format!(
278            "fallow: discovered {count} {noun}. If analysis stalls or runs out of memory, \
279             exclude large generated files via ignorePatterns or --max-file-size."
280        ));
281    }
282    let examples = summarize_examples(root, &by_size);
283    Some(format!(
284        "fallow: discovered {count} {noun}; largest: {examples}. If analysis stalls or runs out of memory, \
285         exclude large generated files via ignorePatterns or --max-file-size."
286    ))
287}
288
289/// Emit a pre-parse note listing the largest kept files when the discovered set
290/// is unusually large or contains an unusually large file, so an out-of-memory
291/// hang at the parse stage is diagnosable (issue #1086). Visible before the
292/// expensive parse begins, so it survives a subsequent crash.
293fn note_largest_files(config: &ResolvedConfig, files: &[DiscoveredFile]) {
294    if config.quiet {
295        return;
296    }
297    if let Some(message) = build_largest_files_note(&config.root, files)
298        && should_emit_note_once(format!("note::{}::{}", config.root.display(), files.len()))
299    {
300        tracing::warn!("{message}");
301    }
302}
303
304/// Package-scoped hidden directories that source discovery should traverse.
305#[derive(Debug, Clone, PartialEq, Eq)]
306pub struct HiddenDirScope {
307    root: PathBuf,
308    dirs: Vec<String>,
309}
310
311impl HiddenDirScope {
312    pub fn new(root: PathBuf, dirs: Vec<String>) -> Self {
313        Self { root, dirs }
314    }
315
316    #[must_use]
317    pub fn root(&self) -> &Path {
318        &self.root
319    }
320
321    #[must_use]
322    pub fn dirs(&self) -> &[String] {
323        &self.dirs
324    }
325
326    fn allows(&self, path: &Path, name: &OsStr) -> bool {
327        path.starts_with(&self.root) && self.dirs.iter().any(|dir| OsStr::new(dir) == name)
328    }
329}
330
331/// Per-thread file collector for the parallel walker.
332///
333/// Source files (by extension) flow to `shared`; when `config_shared` is set,
334/// non-source files admitted by the config-candidate type group flow to it
335/// instead. The two channels are disjoint and the source channel is byte-for-byte
336/// identical to the config-capture-disabled walk.
337struct FileVisitor<'a> {
338    root: &'a Path,
339    ignore_patterns: &'a globset::GlobSet,
340    production_excludes: &'a Option<globset::GlobSet>,
341    shared: &'a Mutex<Vec<(std::path::PathBuf, u64)>>,
342    config_shared: Option<&'a Mutex<Vec<std::path::PathBuf>>>,
343    local: Vec<(std::path::PathBuf, u64)>,
344    config_local: Vec<std::path::PathBuf>,
345}
346
347impl ignore::ParallelVisitor for FileVisitor<'_> {
348    fn visit(&mut self, result: Result<ignore::DirEntry, ignore::Error>) -> ignore::WalkState {
349        let Ok(entry) = result else {
350            return ignore::WalkState::Continue;
351        };
352        if entry.file_type().is_some_and(|ft| ft.is_dir()) {
353            return ignore::WalkState::Continue;
354        }
355        let relative = entry
356            .path()
357            .strip_prefix(self.root)
358            .unwrap_or_else(|_| entry.path());
359        if self.ignore_patterns.is_match(relative) {
360            return ignore::WalkState::Continue;
361        }
362        if self
363            .production_excludes
364            .as_ref()
365            .is_some_and(|excludes| excludes.is_match(relative))
366        {
367            return ignore::WalkState::Continue;
368        }
369        if has_source_extension(entry.path()) {
370            let size_bytes = entry.metadata().map_or(0, |m| m.len());
371            self.local.push((entry.into_path(), size_bytes));
372        } else if self.config_shared.is_some() {
373            // A non-source file admitted by the config-candidate type group. No
374            // size metadata is needed; these are pattern-matched, never parsed.
375            self.config_local.push(entry.into_path());
376        }
377        ignore::WalkState::Continue
378    }
379}
380
381impl Drop for FileVisitor<'_> {
382    #[expect(
383        clippy::expect_used,
384        reason = "poisoned walk collector lock means worker state is unrecoverable"
385    )]
386    fn drop(&mut self) {
387        if !self.local.is_empty() {
388            self.shared
389                .lock()
390                .expect("walk collector lock poisoned")
391                .append(&mut self.local);
392        }
393        if let Some(config_shared) = self.config_shared
394            && !self.config_local.is_empty()
395        {
396            config_shared
397                .lock()
398                .expect("walk config collector lock poisoned")
399                .append(&mut self.config_local);
400        }
401    }
402}
403
404/// Builder that creates per-thread `FileVisitor` instances for the parallel walker.
405struct FileVisitorBuilder<'a> {
406    root: &'a Path,
407    ignore_patterns: &'a globset::GlobSet,
408    production_excludes: &'a Option<globset::GlobSet>,
409    shared: &'a Mutex<Vec<(std::path::PathBuf, u64)>>,
410    config_shared: Option<&'a Mutex<Vec<std::path::PathBuf>>>,
411}
412
413impl<'s> ignore::ParallelVisitorBuilder<'s> for FileVisitorBuilder<'s> {
414    fn build(&mut self) -> Box<dyn ignore::ParallelVisitor + 's> {
415        Box::new(FileVisitor {
416            root: self.root,
417            ignore_patterns: self.ignore_patterns,
418            production_excludes: self.production_excludes,
419            shared: self.shared,
420            config_shared: self.config_shared,
421            local: Vec::new(),
422            config_local: Vec::new(),
423        })
424    }
425}
426
427pub const SOURCE_EXTENSIONS: &[&str] = &[
428    "ts", "tsx", "mts", "cts", "gts", "js", "jsx", "mjs", "cjs", "gjs", "vue", "svelte", "astro",
429    "mdx", "css", "scss", "sass", "less", "html", "graphql", "gql",
430];
431
432/// Glob patterns for test/dev/story files excluded in production mode.
433pub const PRODUCTION_EXCLUDE_PATTERNS: &[&str] = &[
434    "**/*.test.*",
435    "**/*.spec.*",
436    "**/*.e2e.*",
437    "**/*.e2e-spec.*",
438    "**/*.bench.*",
439    "**/*.fixture.*",
440    "**/*.stories.*",
441    "**/*.story.*",
442    "**/__tests__/**",
443    "**/__mocks__/**",
444    "**/__snapshots__/**",
445    "**/__fixtures__/**",
446    "**/test/**",
447    "**/tests/**",
448    "*.config.*",
449    "**/.*.js",
450    "**/.*.ts",
451    "**/.*.mjs",
452    "**/.*.cjs",
453];
454
455/// Check if a hidden directory name is on the allowlist.
456pub fn is_allowed_hidden_dir(name: &OsStr) -> bool {
457    ALLOWED_HIDDEN_DIRS.iter().any(|&d| OsStr::new(d) == name)
458}
459
460fn is_allowed_scoped_hidden_dir(
461    name: &OsStr,
462    path: &Path,
463    additional_hidden_dir_scopes: &[HiddenDirScope],
464) -> bool {
465    additional_hidden_dir_scopes
466        .iter()
467        .any(|scope| scope.allows(path, name))
468}
469
470/// Check if a hidden directory entry should be allowed through the filter.
471///
472/// Returns `true` if the entry is not hidden or is on the allowlist.
473/// Hidden files (not directories) are always allowed through since the type
474/// filter handles them.
475fn is_allowed_hidden(entry: &ignore::DirEntry) -> bool {
476    is_allowed_hidden_with_scopes(entry, &[])
477}
478
479fn is_allowed_hidden_with_scopes(
480    entry: &ignore::DirEntry,
481    additional_hidden_dir_scopes: &[HiddenDirScope],
482) -> bool {
483    let name = entry.file_name();
484    let name_str = name.to_string_lossy();
485
486    if !name_str.starts_with('.') {
487        return true;
488    }
489
490    if entry.file_type().is_some_and(|ft| !ft.is_dir()) {
491        return true;
492    }
493
494    is_allowed_hidden_dir(name)
495        || is_allowed_scoped_hidden_dir(name, entry.path(), additional_hidden_dir_scopes)
496}
497
498/// Discover all source files in the project.
499///
500/// # Panics
501///
502/// Panics if the file type glob or progress template is invalid (compile-time constants).
503pub fn discover_files(config: &ResolvedConfig) -> Vec<DiscoveredFile> {
504    discover_files_with_additional_hidden_dirs(config, &[])
505}
506
507/// The set of config-file basenames (last path component of every built-in
508/// plugin `config_patterns()` entry, brace forms preserved) that the walk should
509/// additionally admit so non-source configs (`tsconfig.json`, `bunfig.toml`,
510/// `.eslintrc.json`, ...) can be captured in one traversal instead of being
511/// re-discovered by a filesystem re-walk in `discover_config_files`.
512///
513/// Derived live from the built-in plugin list, so it can never drift behind a
514/// new plugin's config patterns. Source-extension config basenames
515/// (`vite.config.{ts,js}`) are admitted too, but the walk visitor routes them
516/// back to the source channel by extension, so the config channel only ever
517/// collects genuinely non-source files.
518fn config_candidate_basename_globs() -> &'static [String] {
519    static GLOBS: OnceLock<Vec<String>> = OnceLock::new();
520    GLOBS.get_or_init(|| {
521        let mut set: FxHashSet<String> = FxHashSet::default();
522        for plugin in crate::plugins::registry::builtin::create_builtin_plugins() {
523            for pattern in plugin.config_patterns() {
524                let basename = pattern.rsplit('/').next().unwrap_or(pattern);
525                set.insert(basename.to_string());
526            }
527        }
528        let mut globs: Vec<String> = set.into_iter().collect();
529        globs.sort_unstable();
530        globs
531    })
532}
533
534/// True when `path`'s extension is one of the known source extensions, i.e. the
535/// file belongs in the source channel rather than the config-candidate channel.
536fn has_source_extension(path: &Path) -> bool {
537    path.extension()
538        .and_then(OsStr::to_str)
539        .is_some_and(|ext| SOURCE_EXTENSIONS.contains(&ext))
540}
541
542/// Build the file-type filter. Always selects known source extensions; when
543/// `capture_config` is set, also selects config-candidate basenames so the
544/// walker yields them for the second collection channel.
545#[expect(
546    clippy::expect_used,
547    reason = "source file globs are hard-coded compile-time constants"
548)]
549fn build_walk_types(capture_config: bool) -> ignore::types::Types {
550    let mut types_builder = ignore::types::TypesBuilder::new();
551    for ext in SOURCE_EXTENSIONS {
552        types_builder
553            .add("source", &format!("*.{ext}"))
554            .expect("valid glob");
555    }
556    types_builder.select("source");
557    if capture_config {
558        for glob in config_candidate_basename_globs() {
559            // Ignore individually-invalid plugin patterns rather than panicking;
560            // a malformed pattern simply fails to admit its config file (the
561            // pre-existing filesystem fallback still covers production mode).
562            let _ = types_builder.add("config", glob);
563        }
564        types_builder.select("config");
565    }
566    types_builder.build().expect("valid types")
567}
568
569/// Construct the parallel walker, applying the appropriate hidden-dir filter.
570/// When `capture_config` is set the walk also yields config-candidate files for
571/// the secondary collection channel.
572fn build_source_walk_builder(
573    config: &ResolvedConfig,
574    additional_hidden_dir_scopes: &[HiddenDirScope],
575    capture_config: bool,
576) -> WalkBuilder {
577    let mut walk_builder = WalkBuilder::new(&config.root);
578    walk_builder
579        .hidden(false)
580        .git_ignore(true)
581        .git_global(true)
582        .git_exclude(true)
583        .types(build_walk_types(capture_config))
584        .threads(config.threads);
585    if additional_hidden_dir_scopes.is_empty() {
586        walk_builder.filter_entry(is_allowed_hidden);
587    } else {
588        let scopes = additional_hidden_dir_scopes.to_vec();
589        walk_builder.filter_entry(move |entry| is_allowed_hidden_with_scopes(entry, &scopes));
590    }
591    walk_builder
592}
593
594/// Compile the production-mode exclude glob set, or `None` outside production mode.
595fn build_production_excludes(config: &ResolvedConfig) -> Option<globset::GlobSet> {
596    if !config.production {
597        return None;
598    }
599    let mut builder = globset::GlobSetBuilder::new();
600    for pattern in PRODUCTION_EXCLUDE_PATTERNS {
601        if let Ok(glob) = globset::GlobBuilder::new(pattern)
602            .literal_separator(true)
603            .build()
604        {
605            builder.add(glob);
606        }
607    }
608    builder.build().ok()
609}
610
611/// Discover all source files in the project, with package-scoped hidden dirs.
612///
613/// # Panics
614///
615/// Panics if the file type glob or progress template is invalid (compile-time constants).
616pub fn discover_files_with_additional_hidden_dirs(
617    config: &ResolvedConfig,
618    additional_hidden_dir_scopes: &[HiddenDirScope],
619) -> Vec<DiscoveredFile> {
620    discover_files_and_config_candidates(config, additional_hidden_dir_scopes).0
621}
622
623/// Discover source files AND, in one traversal, the non-source config-candidate
624/// files (`tsconfig.json`, `bunfig.toml`, `.eslintrc.json`, ...) used by
625/// `discover_config_files` to resolve plugin config patterns in-memory instead of
626/// re-walking the filesystem.
627///
628/// The returned `Vec<DiscoveredFile>` is byte-for-byte identical to the
629/// config-capture-disabled walk: config candidates are routed to the second
630/// return value by extension and never enter the source channel. Config capture
631/// is skipped in production mode (where the walk applies `PRODUCTION_EXCLUDE_PATTERNS`
632/// and `discover_config_files` keeps its filesystem path), so the second vector is
633/// empty there.
634///
635/// # Panics
636///
637/// Panics if the file type glob or progress template is invalid (compile-time constants).
638#[expect(
639    clippy::cast_possible_truncation,
640    reason = "file count is bounded by project size, well under u32::MAX"
641)]
642#[expect(clippy::expect_used, reason = "the collector lock must remain usable")]
643pub fn discover_files_and_config_candidates(
644    config: &ResolvedConfig,
645    additional_hidden_dir_scopes: &[HiddenDirScope],
646) -> (Vec<DiscoveredFile>, Vec<PathBuf>) {
647    let _span = tracing::info_span!("discover_files").entered();
648
649    let capture_config = !config.production;
650    let walk_builder =
651        build_source_walk_builder(config, additional_hidden_dir_scopes, capture_config);
652    let production_excludes = build_production_excludes(config);
653
654    let collected: Mutex<Vec<(std::path::PathBuf, u64)>> = Mutex::new(Vec::new());
655    let config_collected: Mutex<Vec<std::path::PathBuf>> = Mutex::new(Vec::new());
656    let mut visitor_builder = FileVisitorBuilder {
657        root: &config.root,
658        ignore_patterns: &config.ignore_patterns,
659        production_excludes: &production_excludes,
660        shared: &collected,
661        config_shared: capture_config.then_some(&config_collected),
662    };
663    walk_builder.build_parallel().visit(&mut visitor_builder);
664
665    let mut raw = collected
666        .into_inner()
667        .expect("walk collector lock poisoned");
668    // ADR-004 (path-sorted FileIds): the parallel walk visits files in
669    // nondeterministic order, so we sort by absolute path BEFORE the
670    // `.enumerate()` FileId assignment below. This is the stable-cross-run
671    // identity invariant the persisted graph cache depends on: an identical
672    // file set yields identical FileIds, so a cache hit (same paths +
673    // fingerprints) can trust graph data persisted by FileId. Do not replace
674    // this with insertion-order assignment.
675    raw.sort_unstable_by(|a, b| a.0.cmp(&b.0));
676
677    let mut config_candidates = config_collected
678        .into_inner()
679        .expect("walk config collector lock poisoned");
680    config_candidates.sort_unstable();
681
682    // Drop any source-discovery diagnostics from a previous pass (watch-mode
683    // rerun, combined-mode re-walk) BEFORE re-recording this walk's skips, so a
684    // file that is no longer skipped does not leave a stale entry (issue #1086).
685    fallow_config::clear_source_discovery_diagnostics(&config.root);
686    let (kept, skipped) = partition_by_size(raw, config.max_file_size_bytes);
687    report_skipped_large_files(config, &skipped);
688    let (kept, skipped_minified) =
689        partition_minified_generated_js(kept, config.max_file_size_bytes);
690    report_skipped_minified_files(config, &skipped_minified);
691
692    let files: Vec<DiscoveredFile> = kept
693        .into_iter()
694        .enumerate()
695        .map(|(idx, (path, size_bytes))| DiscoveredFile {
696            id: FileId(idx as u32),
697            path,
698            size_bytes,
699        })
700        .collect();
701
702    note_largest_files(config, &files);
703
704    (files, config_candidates)
705}
706
707#[cfg(test)]
708mod tests {
709    use std::ffi::OsStr;
710
711    use super::*;
712
713    /// Reproduce the FileId-assignment rule used by `walk_source_files`: sort by
714    /// absolute path, then assign `FileId(idx)` in that order.
715    fn assign_file_ids(mut raw: Vec<(std::path::PathBuf, u64)>) -> Vec<DiscoveredFile> {
716        raw.sort_unstable_by(|a, b| a.0.cmp(&b.0));
717        raw.into_iter()
718            .enumerate()
719            .map(|(idx, (path, size_bytes))| DiscoveredFile {
720                id: FileId(idx as u32),
721                path,
722                size_bytes,
723            })
724            .collect()
725    }
726
727    /// ADR-004: an identical file set must yield identical FileIds regardless of
728    /// the (nondeterministic, parallel) discovery order. The persisted graph
729    /// cache keys persisted graph data by FileId, so a cache HIT (same paths +
730    /// fingerprints) must reproduce the exact same FileId-to-path mapping the
731    /// graph was built against. This guards the cache's soundness prerequisite.
732    #[test]
733    fn file_id_assignment_is_deterministic_for_identical_file_set() {
734        let paths = [
735            "/project/src/z.ts",
736            "/project/src/a.ts",
737            "/project/src/components/Button.tsx",
738            "/project/src/components/Button.module.css",
739            "/project/index.ts",
740        ];
741
742        // Two independent walks that observe the same paths in DIFFERENT orders.
743        let walk_one: Vec<(std::path::PathBuf, u64)> = paths
744            .iter()
745            .map(|p| (std::path::PathBuf::from(p), 10))
746            .collect();
747        let mut walk_two = walk_one.clone();
748        walk_two.reverse();
749
750        let files_one = assign_file_ids(walk_one);
751        let files_two = assign_file_ids(walk_two);
752
753        // Identical (FileId -> path) mapping despite the different walk orders.
754        assert_eq!(files_one.len(), files_two.len());
755        for (a, b) in files_one.iter().zip(files_two.iter()) {
756            assert_eq!(a.id, b.id);
757            assert_eq!(a.path, b.path);
758        }
759
760        // The mapping is the path-sorted order, and each FileId equals its index
761        // (the density invariant `project.rs` asserts and the graph relies on).
762        for (idx, file) in files_one.iter().enumerate() {
763            assert_eq!(file.id, FileId(idx as u32));
764        }
765        assert_eq!(
766            files_one[0].path,
767            std::path::PathBuf::from("/project/index.ts")
768        );
769    }
770
771    #[test]
772    fn file_id_assignment_recomputes_after_rename_or_delete() {
773        let before = assign_file_ids(vec![
774            (std::path::PathBuf::from("/project/src/a.ts"), 10),
775            (std::path::PathBuf::from("/project/src/b.ts"), 10),
776            (std::path::PathBuf::from("/project/src/c.ts"), 10),
777        ]);
778        let after_delete = assign_file_ids(vec![
779            (std::path::PathBuf::from("/project/src/a.ts"), 10),
780            (std::path::PathBuf::from("/project/src/c.ts"), 10),
781        ]);
782        let after_rename = assign_file_ids(vec![
783            (std::path::PathBuf::from("/project/src/a.ts"), 10),
784            (std::path::PathBuf::from("/project/src/c.ts"), 10),
785            (std::path::PathBuf::from("/project/src/d.ts"), 10),
786        ]);
787
788        assert_eq!(before[0].id, FileId(0));
789        assert_eq!(before[1].id, FileId(1));
790        assert_eq!(before[2].id, FileId(2));
791        assert_eq!(after_delete[0].id, FileId(0));
792        assert_eq!(after_delete[1].id, FileId(1));
793        assert_eq!(
794            after_delete[1].path,
795            std::path::PathBuf::from("/project/src/c.ts")
796        );
797        assert_eq!(after_rename[0].id, FileId(0));
798        assert_eq!(after_rename[1].id, FileId(1));
799        assert_eq!(
800            after_rename[1].path,
801            std::path::PathBuf::from("/project/src/c.ts")
802        );
803        assert_eq!(after_rename[2].id, FileId(2));
804        assert_eq!(
805            after_rename[2].path,
806            std::path::PathBuf::from("/project/src/d.ts")
807        );
808    }
809
810    #[test]
811    fn allowed_hidden_dirs() {
812        assert!(is_allowed_hidden_dir(OsStr::new(".storybook")));
813        assert!(is_allowed_hidden_dir(OsStr::new(".vitepress")));
814        assert!(is_allowed_hidden_dir(OsStr::new(".well-known")));
815        assert!(is_allowed_hidden_dir(OsStr::new(".changeset")));
816        assert!(is_allowed_hidden_dir(OsStr::new(".github")));
817    }
818
819    #[test]
820    fn disallowed_hidden_dirs() {
821        assert!(!is_allowed_hidden_dir(OsStr::new(".git")));
822        assert!(!is_allowed_hidden_dir(OsStr::new(".cache")));
823        assert!(!is_allowed_hidden_dir(OsStr::new(".vscode")));
824        assert!(!is_allowed_hidden_dir(OsStr::new(".fallow")));
825        assert!(!is_allowed_hidden_dir(OsStr::new(".next")));
826    }
827
828    #[test]
829    fn non_hidden_dirs_not_in_allowlist() {
830        assert!(!is_allowed_hidden_dir(OsStr::new("src")));
831        assert!(!is_allowed_hidden_dir(OsStr::new("node_modules")));
832    }
833
834    #[test]
835    fn source_extensions_include_typescript() {
836        assert!(SOURCE_EXTENSIONS.contains(&"ts"));
837        assert!(SOURCE_EXTENSIONS.contains(&"tsx"));
838        assert!(SOURCE_EXTENSIONS.contains(&"mts"));
839        assert!(SOURCE_EXTENSIONS.contains(&"cts"));
840        assert!(SOURCE_EXTENSIONS.contains(&"gts"));
841    }
842
843    #[test]
844    fn source_extensions_include_javascript() {
845        assert!(SOURCE_EXTENSIONS.contains(&"js"));
846        assert!(SOURCE_EXTENSIONS.contains(&"jsx"));
847        assert!(SOURCE_EXTENSIONS.contains(&"mjs"));
848        assert!(SOURCE_EXTENSIONS.contains(&"cjs"));
849        assert!(SOURCE_EXTENSIONS.contains(&"gjs"));
850    }
851
852    #[test]
853    fn source_extensions_include_sfc_formats() {
854        assert!(SOURCE_EXTENSIONS.contains(&"vue"));
855        assert!(SOURCE_EXTENSIONS.contains(&"svelte"));
856        assert!(SOURCE_EXTENSIONS.contains(&"astro"));
857    }
858
859    #[test]
860    fn source_extensions_include_styles() {
861        assert!(SOURCE_EXTENSIONS.contains(&"css"));
862        assert!(SOURCE_EXTENSIONS.contains(&"scss"));
863        assert!(SOURCE_EXTENSIONS.contains(&"sass"));
864        assert!(SOURCE_EXTENSIONS.contains(&"less"));
865    }
866
867    #[test]
868    fn source_extensions_exclude_non_source() {
869        assert!(!SOURCE_EXTENSIONS.contains(&"json"));
870        assert!(!SOURCE_EXTENSIONS.contains(&"yaml"));
871        assert!(!SOURCE_EXTENSIONS.contains(&"md"));
872        assert!(!SOURCE_EXTENSIONS.contains(&"png"));
873        assert!(!SOURCE_EXTENSIONS.contains(&"htm"));
874    }
875
876    #[test]
877    fn source_extensions_include_html() {
878        assert!(SOURCE_EXTENSIONS.contains(&"html"));
879    }
880
881    #[test]
882    fn source_extensions_include_graphql_documents() {
883        assert!(SOURCE_EXTENSIONS.contains(&"graphql"));
884        assert!(SOURCE_EXTENSIONS.contains(&"gql"));
885    }
886
887    fn build_production_glob_set() -> globset::GlobSet {
888        let mut builder = globset::GlobSetBuilder::new();
889        for pattern in PRODUCTION_EXCLUDE_PATTERNS {
890            builder.add(
891                globset::GlobBuilder::new(pattern)
892                    .literal_separator(true)
893                    .build()
894                    .expect("valid glob pattern"),
895            );
896        }
897        builder.build().expect("valid glob set")
898    }
899
900    #[test]
901    fn production_excludes_test_files() {
902        let set = build_production_glob_set();
903        assert!(set.is_match("src/Button.test.ts"));
904        assert!(set.is_match("src/utils.spec.tsx"));
905        assert!(set.is_match("src/__tests__/helper.ts"));
906        assert!(!set.is_match("src/Button.ts"));
907        assert!(!set.is_match("src/utils.tsx"));
908    }
909
910    #[test]
911    fn production_excludes_story_files() {
912        let set = build_production_glob_set();
913        assert!(set.is_match("src/Button.stories.tsx"));
914        assert!(set.is_match("src/Card.story.ts"));
915        assert!(!set.is_match("src/Button.tsx"));
916    }
917
918    #[test]
919    fn production_excludes_config_files_at_root_only() {
920        let set = build_production_glob_set();
921        assert!(set.is_match("vitest.config.ts"));
922        assert!(set.is_match("jest.config.js"));
923        assert!(!set.is_match("src/app/app.config.ts"));
924        assert!(!set.is_match("src/app/app.config.server.ts"));
925        assert!(!set.is_match("packages/foo/vitest.config.ts"));
926        assert!(!set.is_match("src/config.ts"));
927    }
928
929    #[test]
930    fn production_patterns_are_valid_globs() {
931        let _ = build_production_glob_set();
932    }
933
934    #[test]
935    fn disallowed_hidden_dirs_idea() {
936        assert!(!is_allowed_hidden_dir(OsStr::new(".idea")));
937    }
938
939    #[test]
940    fn source_extensions_include_mdx() {
941        assert!(SOURCE_EXTENSIONS.contains(&"mdx"));
942    }
943
944    #[test]
945    fn source_extensions_exclude_image_and_data_formats() {
946        assert!(!SOURCE_EXTENSIONS.contains(&"png"));
947        assert!(!SOURCE_EXTENSIONS.contains(&"jpg"));
948        assert!(!SOURCE_EXTENSIONS.contains(&"svg"));
949        assert!(!SOURCE_EXTENSIONS.contains(&"txt"));
950        assert!(!SOURCE_EXTENSIONS.contains(&"csv"));
951        assert!(!SOURCE_EXTENSIONS.contains(&"wasm"));
952    }
953
954    #[test]
955    fn is_declaration_file_matches_dts_variants() {
956        assert!(is_declaration_file(Path::new("env.d.ts")));
957        assert!(is_declaration_file(Path::new("src/auto-imports.d.ts")));
958        assert!(is_declaration_file(Path::new("mod.d.mts")));
959        assert!(is_declaration_file(Path::new("compat.d.cts")));
960        assert!(!is_declaration_file(Path::new("index.ts")));
961        assert!(!is_declaration_file(Path::new("component.tsx")));
962        assert!(!is_declaration_file(Path::new("notes.d.txt")));
963    }
964
965    #[test]
966    fn format_size_mb_renders_one_decimal() {
967        assert_eq!(format_size_mb(5 * 1024 * 1024), "5.0 MB");
968        assert_eq!(format_size_mb(1024 * 1024 + 512 * 1024), "1.5 MB");
969        assert_eq!(format_size_mb(0), "0.0 MB");
970    }
971
972    #[test]
973    fn partition_by_size_no_limit_keeps_all() {
974        let raw = vec![(PathBuf::from("a.ts"), 10), (PathBuf::from("b.ts"), 10_000)];
975        let (kept, skipped) = partition_by_size(raw, None);
976        assert_eq!(kept.len(), 2);
977        assert!(skipped.is_empty());
978    }
979
980    #[test]
981    fn partition_by_size_skips_strictly_over_limit() {
982        let raw = vec![
983            (PathBuf::from("under.ts"), 99),
984            (PathBuf::from("exact.ts"), 100),
985            (PathBuf::from("over.ts"), 101),
986        ];
987        let (kept, skipped) = partition_by_size(raw, Some(100));
988        let kept_has = |name: &str| kept.iter().any(|(p, _)| p.as_path() == Path::new(name));
989        assert!(kept_has("under.ts"));
990        assert!(
991            kept_has("exact.ts"),
992            "a file exactly at the limit is kept (skip is strictly-greater)"
993        );
994        assert_eq!(skipped.len(), 1);
995        assert_eq!(skipped[0].0, PathBuf::from("over.ts"));
996    }
997
998    #[test]
999    fn partition_by_size_exempts_declaration_files() {
1000        let raw = vec![
1001            (PathBuf::from("huge.ts"), 10_000),
1002            (PathBuf::from("auto-imports.d.ts"), 10_000),
1003        ];
1004        let (kept, skipped) = partition_by_size(raw, Some(100));
1005        assert!(
1006            kept.iter()
1007                .any(|(p, _)| p.as_path() == Path::new("auto-imports.d.ts")),
1008            "declaration files are exempt from the size skip regardless of size"
1009        );
1010        assert_eq!(skipped.len(), 1);
1011        assert_eq!(skipped[0].0, PathBuf::from("huge.ts"));
1012    }
1013
1014    fn disco(path: &str, size_bytes: u64) -> DiscoveredFile {
1015        DiscoveredFile {
1016            id: FileId(0),
1017            path: PathBuf::from(path),
1018            size_bytes,
1019        }
1020    }
1021
1022    #[test]
1023    fn largest_files_note_below_threshold_is_none() {
1024        let files = [disco("a.ts", 100), disco("b.ts", 200)];
1025        assert!(build_largest_files_note(Path::new("/p"), &files).is_none());
1026    }
1027
1028    #[test]
1029    fn largest_files_note_single_file_uses_singular() {
1030        let files = [disco("big.ts", 5 * 1024 * 1024)];
1031        let note = build_largest_files_note(Path::new("/p"), &files).expect("note fires");
1032        assert!(
1033            note.contains("discovered 1 file;"),
1034            "singular noun on the single-big-file path (issue #1086 regression): {note}"
1035        );
1036        assert!(!note.contains("discovered 1 files"));
1037        assert!(note.contains("big.ts (5.0 MB)"));
1038    }
1039
1040    #[test]
1041    fn largest_files_note_filters_sub_floor_files() {
1042        let files = [disco("big.ts", 5 * 1024 * 1024), disco("tiny.ts", 10)];
1043        let note = build_largest_files_note(Path::new("/p"), &files).expect("note fires");
1044        assert!(note.contains("discovered 2 files;"));
1045        assert!(note.contains("big.ts (5.0 MB)"));
1046        assert!(
1047            !note.contains("tiny.ts"),
1048            "sub-floor files are not listed as `0.0 MB` chaff: {note}"
1049        );
1050    }
1051
1052    #[test]
1053    fn largest_files_note_large_set_no_big_file_omits_list() {
1054        let files: Vec<DiscoveredFile> = (0..=LARGE_SET_THRESHOLD)
1055            .map(|i| disco(&format!("f{i}.ts"), 100))
1056            .collect();
1057        let note = build_largest_files_note(Path::new("/p"), &files).expect("large set fires");
1058        assert!(note.contains(&format!("discovered {} files", LARGE_SET_THRESHOLD + 1)));
1059        assert!(
1060            !note.contains("largest:"),
1061            "no sub-floor `largest:` list when no file clears the floor: {note}"
1062        );
1063    }
1064
1065    mod discover_files_integration {
1066        use std::path::PathBuf;
1067
1068        use fallow_config::{
1069            DuplicatesConfig, FallowConfig, FlagsConfig, HealthConfig, OutputFormat, ResolveConfig,
1070            RulesConfig,
1071        };
1072
1073        use super::*;
1074
1075        /// Create a minimal ResolvedConfig pointing at the given root directory.
1076        fn make_config(root: PathBuf, production: bool) -> ResolvedConfig {
1077            FallowConfig {
1078                production: production.into(),
1079                ..Default::default()
1080            }
1081            .resolve(root, OutputFormat::Human, 1, true, true, None)
1082        }
1083
1084        /// Helper to collect discovered file names (relative to root) for assertions.
1085        /// Normalizes path separators to `/` for cross-platform test consistency.
1086        fn file_names(files: &[DiscoveredFile], root: &std::path::Path) -> Vec<String> {
1087            files
1088                .iter()
1089                .map(|f| {
1090                    f.path
1091                        .strip_prefix(root)
1092                        .unwrap_or(&f.path)
1093                        .to_string_lossy()
1094                        .replace('\\', "/")
1095                })
1096                .collect()
1097        }
1098
1099        #[test]
1100        fn discovers_source_files_with_valid_extensions() {
1101            let dir = tempfile::tempdir().expect("create temp dir");
1102            let src = dir.path().join("src");
1103            std::fs::create_dir_all(&src).unwrap();
1104
1105            std::fs::write(src.join("app.ts"), "export const a = 1;").unwrap();
1106            std::fs::write(src.join("component.tsx"), "export default () => {};").unwrap();
1107            std::fs::write(src.join("utils.js"), "module.exports = {};").unwrap();
1108            std::fs::write(src.join("helper.jsx"), "export const h = 1;").unwrap();
1109            std::fs::write(src.join("config.mjs"), "export default {};").unwrap();
1110            std::fs::write(src.join("legacy.cjs"), "module.exports = {};").unwrap();
1111            std::fs::write(src.join("types.mts"), "export type T = string;").unwrap();
1112            std::fs::write(src.join("compat.cts"), "module.exports = {};").unwrap();
1113
1114            let config = make_config(dir.path().to_path_buf(), false);
1115            let files = discover_files(&config);
1116            let names = file_names(&files, dir.path());
1117
1118            assert!(names.contains(&"src/app.ts".to_string()));
1119            assert!(names.contains(&"src/component.tsx".to_string()));
1120            assert!(names.contains(&"src/utils.js".to_string()));
1121            assert!(names.contains(&"src/helper.jsx".to_string()));
1122            assert!(names.contains(&"src/config.mjs".to_string()));
1123            assert!(names.contains(&"src/legacy.cjs".to_string()));
1124            assert!(names.contains(&"src/types.mts".to_string()));
1125            assert!(names.contains(&"src/compat.cts".to_string()));
1126        }
1127
1128        #[test]
1129        fn excludes_non_source_extensions() {
1130            let dir = tempfile::tempdir().expect("create temp dir");
1131            let src = dir.path().join("src");
1132            std::fs::create_dir_all(&src).unwrap();
1133
1134            std::fs::write(src.join("app.ts"), "export const a = 1;").unwrap();
1135
1136            std::fs::write(src.join("data.json"), "{}").unwrap();
1137            std::fs::write(src.join("readme.md"), "# Hello").unwrap();
1138            std::fs::write(src.join("notes.txt"), "notes").unwrap();
1139            std::fs::write(src.join("logo.png"), [0u8; 8]).unwrap();
1140
1141            let config = make_config(dir.path().to_path_buf(), false);
1142            let files = discover_files(&config);
1143            let names = file_names(&files, dir.path());
1144
1145            assert_eq!(names.len(), 1, "only the .ts file should be discovered");
1146            assert!(names.contains(&"src/app.ts".to_string()));
1147        }
1148
1149        #[test]
1150        fn excludes_disallowed_hidden_directories() {
1151            let dir = tempfile::tempdir().expect("create temp dir");
1152
1153            let git_dir = dir.path().join(".git");
1154            std::fs::create_dir_all(&git_dir).unwrap();
1155            std::fs::write(git_dir.join("hooks.ts"), "// git hook").unwrap();
1156
1157            let idea_dir = dir.path().join(".idea");
1158            std::fs::create_dir_all(&idea_dir).unwrap();
1159            std::fs::write(idea_dir.join("workspace.ts"), "// idea").unwrap();
1160
1161            let cache_dir = dir.path().join(".cache");
1162            std::fs::create_dir_all(&cache_dir).unwrap();
1163            std::fs::write(cache_dir.join("cached.js"), "// cached").unwrap();
1164
1165            let src = dir.path().join("src");
1166            std::fs::create_dir_all(&src).unwrap();
1167            std::fs::write(src.join("app.ts"), "export const a = 1;").unwrap();
1168
1169            let config = make_config(dir.path().to_path_buf(), false);
1170            let files = discover_files(&config);
1171            let names = file_names(&files, dir.path());
1172
1173            assert_eq!(names.len(), 1, "only src/app.ts should be discovered");
1174            assert!(names.contains(&"src/app.ts".to_string()));
1175        }
1176
1177        #[test]
1178        fn includes_allowed_hidden_directories() {
1179            let dir = tempfile::tempdir().expect("create temp dir");
1180
1181            let storybook = dir.path().join(".storybook");
1182            std::fs::create_dir_all(&storybook).unwrap();
1183            std::fs::write(storybook.join("main.ts"), "export default {};").unwrap();
1184
1185            let github = dir.path().join(".github");
1186            std::fs::create_dir_all(&github).unwrap();
1187            std::fs::write(github.join("actions.js"), "module.exports = {};").unwrap();
1188
1189            let changeset = dir.path().join(".changeset");
1190            std::fs::create_dir_all(&changeset).unwrap();
1191            std::fs::write(changeset.join("config.js"), "module.exports = {};").unwrap();
1192
1193            let config = make_config(dir.path().to_path_buf(), false);
1194            let files = discover_files(&config);
1195            let names = file_names(&files, dir.path());
1196
1197            assert!(
1198                names.contains(&".storybook/main.ts".to_string()),
1199                "files in .storybook should be discovered"
1200            );
1201            assert!(
1202                names.contains(&".github/actions.js".to_string()),
1203                "files in .github should be discovered"
1204            );
1205            assert!(
1206                names.contains(&".changeset/config.js".to_string()),
1207                "files in .changeset should be discovered"
1208            );
1209        }
1210
1211        #[test]
1212        fn default_discovery_excludes_client_and_server_hidden_directories() {
1213            let dir = tempfile::tempdir().expect("create temp dir");
1214            let app = dir.path().join("app");
1215            std::fs::create_dir_all(app.join(".client")).unwrap();
1216            std::fs::create_dir_all(app.join(".server")).unwrap();
1217            std::fs::write(app.join(".client/analytics.ts"), "export const a = 1;").unwrap();
1218            std::fs::write(app.join(".server/db.ts"), "export const db = {};").unwrap();
1219            std::fs::write(app.join("root.tsx"), "export default function Root() {}").unwrap();
1220
1221            let config = make_config(dir.path().to_path_buf(), false);
1222            let files = discover_files(&config);
1223            let names = file_names(&files, dir.path());
1224
1225            assert!(names.contains(&"app/root.tsx".to_string()));
1226            assert!(!names.contains(&"app/.client/analytics.ts".to_string()));
1227            assert!(!names.contains(&"app/.server/db.ts".to_string()));
1228        }
1229
1230        #[test]
1231        fn scoped_hidden_dirs_include_client_and_server_under_package_root() {
1232            let dir = tempfile::tempdir().expect("create temp dir");
1233            let package = dir.path().join("packages/app");
1234            std::fs::create_dir_all(package.join("app/.client")).unwrap();
1235            std::fs::create_dir_all(package.join("app/.server")).unwrap();
1236            std::fs::write(
1237                package.join("app/.client/analytics.ts"),
1238                "export const track = () => {};",
1239            )
1240            .unwrap();
1241            std::fs::write(package.join("app/.server/db.ts"), "export const db = {};").unwrap();
1242
1243            let config = make_config(dir.path().to_path_buf(), false);
1244            let scopes = [HiddenDirScope::new(
1245                package,
1246                vec![".client".to_string(), ".server".to_string()],
1247            )];
1248            let files = discover_files_with_additional_hidden_dirs(&config, &scopes);
1249            let names = file_names(&files, dir.path());
1250
1251            assert!(names.contains(&"packages/app/app/.client/analytics.ts".to_string()));
1252            assert!(names.contains(&"packages/app/app/.server/db.ts".to_string()));
1253        }
1254
1255        #[test]
1256        fn scoped_hidden_dirs_do_not_include_unscoped_packages() {
1257            let dir = tempfile::tempdir().expect("create temp dir");
1258            let active = dir.path().join("packages/active");
1259            let inactive = dir.path().join("packages/inactive");
1260            std::fs::create_dir_all(active.join("app/.server")).unwrap();
1261            std::fs::create_dir_all(inactive.join("app/.server")).unwrap();
1262            std::fs::write(active.join("app/.server/db.ts"), "export const db = {};").unwrap();
1263            std::fs::write(inactive.join("app/.server/db.ts"), "export const db = {};").unwrap();
1264
1265            let config = make_config(dir.path().to_path_buf(), false);
1266            let scopes = [HiddenDirScope::new(active, vec![".server".to_string()])];
1267            let files = discover_files_with_additional_hidden_dirs(&config, &scopes);
1268            let names = file_names(&files, dir.path());
1269
1270            assert!(names.contains(&"packages/active/app/.server/db.ts".to_string()));
1271            assert!(!names.contains(&"packages/inactive/app/.server/db.ts".to_string()));
1272        }
1273
1274        #[test]
1275        fn excludes_root_build_directory() {
1276            let dir = tempfile::tempdir().expect("create temp dir");
1277
1278            std::fs::write(dir.path().join(".ignore"), "/build/\n").unwrap();
1279
1280            let build_dir = dir.path().join("build");
1281            std::fs::create_dir_all(&build_dir).unwrap();
1282            std::fs::write(build_dir.join("output.js"), "// build output").unwrap();
1283
1284            let src = dir.path().join("src");
1285            std::fs::create_dir_all(&src).unwrap();
1286            std::fs::write(src.join("app.ts"), "export const a = 1;").unwrap();
1287
1288            let config = make_config(dir.path().to_path_buf(), false);
1289            let files = discover_files(&config);
1290            let names = file_names(&files, dir.path());
1291
1292            assert_eq!(names.len(), 1, "root build/ should be excluded via .ignore");
1293            assert!(names.contains(&"src/app.ts".to_string()));
1294        }
1295
1296        #[test]
1297        fn includes_nested_build_directory() {
1298            let dir = tempfile::tempdir().expect("create temp dir");
1299
1300            let nested_build = dir.path().join("src").join("build");
1301            std::fs::create_dir_all(&nested_build).unwrap();
1302            std::fs::write(nested_build.join("helper.ts"), "export const h = 1;").unwrap();
1303
1304            let config = make_config(dir.path().to_path_buf(), false);
1305            let files = discover_files(&config);
1306            let names = file_names(&files, dir.path());
1307
1308            assert!(
1309                names.contains(&"src/build/helper.ts".to_string()),
1310                "nested build/ directories should be included"
1311            );
1312        }
1313
1314        #[test]
1315        #[expect(
1316            clippy::cast_possible_truncation,
1317            reason = "test file counts are trivially small"
1318        )]
1319        fn file_ids_are_sequential_after_sorting() {
1320            let dir = tempfile::tempdir().expect("create temp dir");
1321            let src = dir.path().join("src");
1322            std::fs::create_dir_all(&src).unwrap();
1323
1324            std::fs::write(src.join("z_last.ts"), "export const z = 1;").unwrap();
1325            std::fs::write(src.join("a_first.ts"), "export const a = 1;").unwrap();
1326            std::fs::write(src.join("m_middle.ts"), "export const m = 1;").unwrap();
1327
1328            let config = make_config(dir.path().to_path_buf(), false);
1329            let files = discover_files(&config);
1330
1331            for (idx, file) in files.iter().enumerate() {
1332                assert_eq!(file.id, FileId(idx as u32), "FileId should be sequential");
1333            }
1334
1335            for pair in files.windows(2) {
1336                assert!(
1337                    pair[0].path < pair[1].path,
1338                    "files should be sorted by path"
1339                );
1340            }
1341        }
1342
1343        #[test]
1344        fn production_mode_excludes_test_files() {
1345            let dir = tempfile::tempdir().expect("create temp dir");
1346            let src = dir.path().join("src");
1347            std::fs::create_dir_all(&src).unwrap();
1348
1349            std::fs::write(src.join("app.ts"), "export const a = 1;").unwrap();
1350            std::fs::write(src.join("app.test.ts"), "test('a', () => {});").unwrap();
1351            std::fs::write(src.join("app.spec.ts"), "describe('a', () => {});").unwrap();
1352            std::fs::write(src.join("app.stories.tsx"), "export default {};").unwrap();
1353
1354            let config = make_config(dir.path().to_path_buf(), true);
1355            let files = discover_files(&config);
1356            let names = file_names(&files, dir.path());
1357
1358            assert!(
1359                names.contains(&"src/app.ts".to_string()),
1360                "source files should be included in production mode"
1361            );
1362            assert!(
1363                !names.contains(&"src/app.test.ts".to_string()),
1364                "test files should be excluded in production mode"
1365            );
1366            assert!(
1367                !names.contains(&"src/app.spec.ts".to_string()),
1368                "spec files should be excluded in production mode"
1369            );
1370            assert!(
1371                !names.contains(&"src/app.stories.tsx".to_string()),
1372                "story files should be excluded in production mode"
1373            );
1374        }
1375
1376        #[test]
1377        fn non_production_mode_includes_test_files() {
1378            let dir = tempfile::tempdir().expect("create temp dir");
1379            let src = dir.path().join("src");
1380            std::fs::create_dir_all(&src).unwrap();
1381
1382            std::fs::write(src.join("app.ts"), "export const a = 1;").unwrap();
1383            std::fs::write(src.join("app.test.ts"), "test('a', () => {});").unwrap();
1384
1385            let config = make_config(dir.path().to_path_buf(), false);
1386            let files = discover_files(&config);
1387            let names = file_names(&files, dir.path());
1388
1389            assert!(names.contains(&"src/app.ts".to_string()));
1390            assert!(
1391                names.contains(&"src/app.test.ts".to_string()),
1392                "test files should be included in non-production mode"
1393            );
1394        }
1395
1396        #[test]
1397        fn empty_directory_returns_no_files() {
1398            let dir = tempfile::tempdir().expect("create temp dir");
1399            let config = make_config(dir.path().to_path_buf(), false);
1400            let files = discover_files(&config);
1401            assert!(files.is_empty(), "empty project should discover no files");
1402        }
1403
1404        #[test]
1405        fn hidden_files_not_discovered_as_source() {
1406            let dir = tempfile::tempdir().expect("create temp dir");
1407
1408            std::fs::write(dir.path().join(".env"), "SECRET=abc").unwrap();
1409            std::fs::write(dir.path().join(".gitignore"), "node_modules").unwrap();
1410            std::fs::write(dir.path().join(".eslintrc.js"), "module.exports = {};").unwrap();
1411
1412            let src = dir.path().join("src");
1413            std::fs::create_dir_all(&src).unwrap();
1414            std::fs::write(src.join("app.ts"), "export const a = 1;").unwrap();
1415
1416            let config = make_config(dir.path().to_path_buf(), false);
1417            let files = discover_files(&config);
1418            let names = file_names(&files, dir.path());
1419
1420            assert!(
1421                !names.contains(&".env".to_string()),
1422                ".env should not be discovered"
1423            );
1424            assert!(
1425                !names.contains(&".gitignore".to_string()),
1426                ".gitignore should not be discovered"
1427            );
1428        }
1429
1430        /// Create a config with custom ignore patterns.
1431        fn make_config_with_ignores(root: PathBuf, ignores: Vec<String>) -> ResolvedConfig {
1432            FallowConfig {
1433                schema: None,
1434                extends: vec![],
1435                entry: vec![],
1436                ignore_patterns: ignores,
1437                framework: vec![],
1438                workspaces: None,
1439                ignore_dependencies: vec![],
1440                ignore_unresolved_imports: vec![],
1441                ignore_exports: vec![],
1442                ignore_catalog_references: vec![],
1443                ignore_dependency_overrides: vec![],
1444                ignore_exports_used_in_file: fallow_config::IgnoreExportsUsedInFileConfig::default(
1445                ),
1446                used_class_members: vec![],
1447                ignore_decorators: vec![],
1448                unused_component_props: fallow_config::UnusedComponentPropsConfig::default(),
1449                duplicates: DuplicatesConfig::default(),
1450                health: HealthConfig::default(),
1451                rules: RulesConfig::default(),
1452                boundaries: fallow_config::BoundaryConfig::default(),
1453                production: false.into(),
1454                plugins: vec![],
1455                rule_packs: vec![],
1456                dynamically_loaded: vec![],
1457                overrides: vec![],
1458                regression: None,
1459                audit: fallow_config::AuditConfig::default(),
1460                codeowners: None,
1461                public_packages: vec![],
1462                flags: FlagsConfig::default(),
1463                security: fallow_config::SecurityConfig::default(),
1464                fix: fallow_config::FixConfig::default(),
1465                resolve: ResolveConfig::default(),
1466                sealed: false,
1467                include_entry_exports: false,
1468                auto_imports: false,
1469                cache: fallow_config::CacheConfig::default(),
1470            }
1471            .resolve(root, OutputFormat::Human, 1, true, true, None)
1472        }
1473
1474        #[test]
1475        fn custom_ignore_patterns_exclude_matching_files() {
1476            let dir = tempfile::tempdir().expect("create temp dir");
1477
1478            let generated = dir.path().join("src").join("api").join("generated");
1479            std::fs::create_dir_all(&generated).unwrap();
1480            std::fs::write(generated.join("client.ts"), "export const api = {};").unwrap();
1481
1482            let client = dir.path().join("src").join("api").join("client");
1483            std::fs::create_dir_all(&client).unwrap();
1484            std::fs::write(client.join("fetch.ts"), "export const fetch = {};").unwrap();
1485
1486            let src = dir.path().join("src");
1487            std::fs::write(src.join("index.ts"), "export const x = 1;").unwrap();
1488
1489            let config = make_config_with_ignores(
1490                dir.path().to_path_buf(),
1491                vec![
1492                    "src/api/generated/**".to_string(),
1493                    "src/api/client/**".to_string(),
1494                ],
1495            );
1496            let files = discover_files(&config);
1497            let names = file_names(&files, dir.path());
1498
1499            assert_eq!(names.len(), 1, "only non-ignored files: {names:?}");
1500            assert!(names.contains(&"src/index.ts".to_string()));
1501        }
1502
1503        #[test]
1504        fn leading_dot_ignore_patterns_exclude_matching_files() {
1505            let dir = tempfile::tempdir().expect("create temp dir");
1506
1507            let generated = dir.path().join("src").join("generated");
1508            std::fs::create_dir_all(&generated).unwrap();
1509            std::fs::write(generated.join("client.ts"), "export const api = {};").unwrap();
1510
1511            let src = dir.path().join("src");
1512            std::fs::write(src.join("index.ts"), "export const x = 1;").unwrap();
1513
1514            let config = make_config_with_ignores(
1515                dir.path().to_path_buf(),
1516                vec!["./src/generated/**".to_string()],
1517            );
1518            let files = discover_files(&config);
1519            let names = file_names(&files, dir.path());
1520
1521            assert_eq!(names, vec!["src/index.ts"]);
1522        }
1523
1524        #[test]
1525        fn default_ignore_patterns_exclude_node_modules_and_dist() {
1526            let dir = tempfile::tempdir().expect("create temp dir");
1527
1528            let nm = dir.path().join("node_modules").join("lodash");
1529            std::fs::create_dir_all(&nm).unwrap();
1530            std::fs::write(nm.join("lodash.js"), "module.exports = {};").unwrap();
1531
1532            let dist = dir.path().join("dist");
1533            std::fs::create_dir_all(&dist).unwrap();
1534            std::fs::write(dist.join("bundle.js"), "// bundled").unwrap();
1535
1536            let src = dir.path().join("src");
1537            std::fs::create_dir_all(&src).unwrap();
1538            std::fs::write(src.join("index.ts"), "export const x = 1;").unwrap();
1539
1540            let config = make_config(dir.path().to_path_buf(), false);
1541            let files = discover_files(&config);
1542            let names = file_names(&files, dir.path());
1543
1544            assert_eq!(names.len(), 1);
1545            assert!(names.contains(&"src/index.ts".to_string()));
1546        }
1547
1548        #[test]
1549        fn default_ignore_patterns_exclude_root_build() {
1550            let dir = tempfile::tempdir().expect("create temp dir");
1551
1552            let build = dir.path().join("build");
1553            std::fs::create_dir_all(&build).unwrap();
1554            std::fs::write(build.join("output.js"), "// built").unwrap();
1555
1556            let nested_build = dir.path().join("src").join("build");
1557            std::fs::create_dir_all(&nested_build).unwrap();
1558            std::fs::write(nested_build.join("helper.ts"), "export const h = 1;").unwrap();
1559
1560            let src = dir.path().join("src");
1561            std::fs::write(src.join("index.ts"), "export const x = 1;").unwrap();
1562
1563            let config = make_config(dir.path().to_path_buf(), false);
1564            let files = discover_files(&config);
1565            let names = file_names(&files, dir.path());
1566
1567            assert_eq!(
1568                names.len(),
1569                2,
1570                "root build/ excluded, nested kept: {names:?}"
1571            );
1572            assert!(names.contains(&"src/index.ts".to_string()));
1573            assert!(names.contains(&"src/build/helper.ts".to_string()));
1574        }
1575
1576        /// Resolve a config then override the per-file size limit in bytes.
1577        fn make_config_with_max_file_size(
1578            root: PathBuf,
1579            max_file_size_bytes: Option<u64>,
1580        ) -> ResolvedConfig {
1581            let mut config = make_config(root, false);
1582            config.max_file_size_bytes = max_file_size_bytes;
1583            config
1584        }
1585
1586        #[test]
1587        fn skips_files_over_max_file_size() {
1588            let dir = tempfile::tempdir().expect("create temp dir");
1589            let src = dir.path().join("src");
1590            std::fs::create_dir_all(&src).unwrap();
1591            std::fs::write(src.join("small.ts"), "export const a = 1;").unwrap();
1592            std::fs::write(src.join("huge.ts"), "x".repeat(5_000)).unwrap();
1593
1594            let config = make_config_with_max_file_size(dir.path().to_path_buf(), Some(1_000));
1595            let files = discover_files(&config);
1596            let names = file_names(&files, dir.path());
1597
1598            assert!(names.contains(&"src/small.ts".to_string()));
1599            assert!(
1600                !names.contains(&"src/huge.ts".to_string()),
1601                "a file over the size limit must not be discovered"
1602            );
1603        }
1604
1605        #[test]
1606        fn declaration_files_exempt_from_size_skip() {
1607            let dir = tempfile::tempdir().expect("create temp dir");
1608            let src = dir.path().join("src");
1609            std::fs::create_dir_all(&src).unwrap();
1610            std::fs::write(src.join("auto-imports.d.ts"), "x".repeat(5_000)).unwrap();
1611            std::fs::write(src.join("huge.ts"), "x".repeat(5_000)).unwrap();
1612
1613            let config = make_config_with_max_file_size(dir.path().to_path_buf(), Some(1_000));
1614            let files = discover_files(&config);
1615            let names = file_names(&files, dir.path());
1616
1617            assert!(
1618                names.contains(&"src/auto-imports.d.ts".to_string()),
1619                "a large .d.ts is exempt from the skip (reachability root for global types)"
1620            );
1621            assert!(!names.contains(&"src/huge.ts".to_string()));
1622        }
1623
1624        #[test]
1625        fn unlimited_size_keeps_large_files() {
1626            let dir = tempfile::tempdir().expect("create temp dir");
1627            let src = dir.path().join("src");
1628            std::fs::create_dir_all(&src).unwrap();
1629            std::fs::write(src.join("huge.ts"), "x".repeat(5_000)).unwrap();
1630
1631            let config = make_config_with_max_file_size(dir.path().to_path_buf(), None);
1632            let files = discover_files(&config);
1633            let names = file_names(&files, dir.path());
1634
1635            assert!(
1636                names.contains(&"src/huge.ts".to_string()),
1637                "no limit keeps every file"
1638            );
1639        }
1640
1641        #[test]
1642        fn skipped_file_recorded_in_workspace_diagnostics() {
1643            let dir = tempfile::tempdir().expect("create temp dir");
1644            let src = dir.path().join("src");
1645            std::fs::create_dir_all(&src).unwrap();
1646            std::fs::write(src.join("huge.ts"), "x".repeat(5_000)).unwrap();
1647
1648            let config = make_config_with_max_file_size(dir.path().to_path_buf(), Some(1_000));
1649            let _ = discover_files(&config);
1650
1651            let diagnostics = fallow_config::workspace_diagnostics_for(dir.path());
1652            let skipped: Vec<_> = diagnostics
1653                .iter()
1654                .filter(|d| {
1655                    matches!(
1656                        d.kind,
1657                        fallow_config::WorkspaceDiagnosticKind::SkippedLargeFile { .. }
1658                    )
1659                })
1660                .collect();
1661            assert_eq!(
1662                skipped.len(),
1663                1,
1664                "the skipped file is recorded in workspace diagnostics for JSON output"
1665            );
1666            assert!(skipped[0].path.ends_with("src/huge.ts"));
1667            assert!(
1668                matches!(
1669                    skipped[0].kind,
1670                    fallow_config::WorkspaceDiagnosticKind::SkippedLargeFile { size_bytes }
1671                        if size_bytes == 5_000
1672                ),
1673                "the recorded diagnostic carries the on-disk byte size"
1674            );
1675        }
1676
1677        #[test]
1678        fn skips_large_one_line_js_as_minified_generated_output() {
1679            let dir = tempfile::tempdir().expect("create temp dir");
1680            let src = dir.path().join("src");
1681            std::fs::create_dir_all(&src).unwrap();
1682            let asset = src.join("index-abc123.js");
1683            std::fs::write(&asset, "x".repeat(MINIFIED_FILE_SKIP_BYTES as usize + 1)).unwrap();
1684
1685            let config = make_config(dir.path().to_path_buf(), false);
1686            let files = discover_files(&config);
1687            let names = file_names(&files, dir.path());
1688
1689            assert!(
1690                !names.contains(&"src/index-abc123.js".to_string()),
1691                "large one-line JS assets should be skipped before parsing"
1692            );
1693
1694            let diagnostics = fallow_config::workspace_diagnostics_for(dir.path());
1695            assert!(
1696                diagnostics.iter().any(|diag| {
1697                    diag.path.ends_with("src/index-abc123.js")
1698                        && matches!(
1699                            diag.kind,
1700                            fallow_config::WorkspaceDiagnosticKind::SkippedMinifiedFile { .. }
1701                        )
1702                }),
1703                "the skipped minified asset is recorded for JSON output: {diagnostics:?}"
1704            );
1705        }
1706
1707        #[test]
1708        fn unlimited_size_keeps_large_one_line_js() {
1709            let dir = tempfile::tempdir().expect("create temp dir");
1710            let src = dir.path().join("src");
1711            std::fs::create_dir_all(&src).unwrap();
1712            let asset = src.join("index-abc123.js");
1713            std::fs::write(&asset, "x".repeat(MINIFIED_FILE_SKIP_BYTES as usize + 1)).unwrap();
1714
1715            let config = make_config_with_max_file_size(dir.path().to_path_buf(), None);
1716            let files = discover_files(&config);
1717            let names = file_names(&files, dir.path());
1718
1719            assert!(
1720                names.contains(&"src/index-abc123.js".to_string()),
1721                "--max-file-size 0 should opt out of generated JS skipping"
1722            );
1723        }
1724
1725        #[test]
1726        fn keeps_large_multiline_js() {
1727            let dir = tempfile::tempdir().expect("create temp dir");
1728            let src = dir.path().join("src");
1729            std::fs::create_dir_all(&src).unwrap();
1730            let asset = src.join("handwritten.js");
1731            let mut content = String::new();
1732            while content.len() <= MINIFIED_FILE_SKIP_BYTES as usize + 1 {
1733                content.push_str("export const value = 1;\n");
1734            }
1735            std::fs::write(&asset, content).unwrap();
1736
1737            let config = make_config(dir.path().to_path_buf(), false);
1738            let files = discover_files(&config);
1739            let names = file_names(&files, dir.path());
1740
1741            assert!(
1742                names.contains(&"src/handwritten.js".to_string()),
1743                "large multiline JS should not be treated as a generated minified asset"
1744            );
1745        }
1746    }
1747}