Skip to main content

fallow_core/discover/
walk.rs

1use std::ffi::OsStr;
2use std::path::{Path, PathBuf};
3use std::sync::{Mutex, OnceLock};
4
5use fallow_config::{ResolvedConfig, WorkspaceDiagnostic, WorkspaceDiagnosticKind};
6use fallow_types::discover::{DiscoveredFile, FileId};
7use ignore::WalkBuilder;
8use rustc_hash::FxHashSet;
9
10use super::ALLOWED_HIDDEN_DIRS;
11
12/// Process-wide dedupe of the size-skip / largest-files stderr notes, keyed by a
13/// content-derived string, so combined-mode (`fallow` runs check + dupes +
14/// health, each of which can trigger a source walk) emits each note at most once
15/// per distinct content. Mirrors the workspace-diagnostics `should_emit`
16/// pattern (issue #1086).
17fn should_emit_note_once(key: String) -> bool {
18    static EMITTED: OnceLock<Mutex<FxHashSet<String>>> = OnceLock::new();
19    EMITTED
20        .get_or_init(|| Mutex::new(FxHashSet::default()))
21        .lock()
22        .map_or(true, |mut set| set.insert(key))
23}
24
25/// A discovered file path paired with its on-disk size in bytes, as collected
26/// by the parallel walker before [`DiscoveredFile`] ids are assigned.
27type SizedFile = (PathBuf, u64);
28
29/// Number of example file paths named in the aggregated skipped-large-file and
30/// largest-files stderr notes before the tail collapses to "and N more". Keeps
31/// the notes to one bounded line on a monorepo that skips many files.
32const NOTE_EXAMPLE_CAP: usize = 5;
33
34/// Discovered-file-count threshold above which the pre-parse largest-files note
35/// fires, so an out-of-memory hang at the parse stage has a visible suspect
36/// list (issue #1086).
37const LARGE_SET_THRESHOLD: usize = 20_000;
38
39/// Single-file byte threshold above which the pre-parse largest-files note
40/// fires even on a small project. Set just under the default 5 MB skip so the
41/// note fires for kept files that are approaching the skip limit (the genuine
42/// out-of-memory suspects), not for ordinary large-but-benign files.
43const LARGE_FILE_NOTE_BYTES: u64 = 4 * 1024 * 1024;
44
45/// Minimum size for a file to appear in the largest-files note. Filters out the
46/// `0.0 MB` entries that would otherwise pad the list once it fires, keeping the
47/// named files to plausible memory contributors.
48const NOTE_FILE_FLOOR_BYTES: u64 = 256 * 1024;
49
50/// Minimum size for content-shape based minified-bundle skipping. Smaller
51/// one-line files can be hand-written utilities, while multi-MB one-line JS is
52/// generated output in practice.
53const MINIFIED_FILE_SKIP_BYTES: u64 = 1024 * 1024;
54
55/// Number of bytes inspected when deciding whether a large JS file is minified.
56const MINIFIED_SAMPLE_BYTES: usize = 256 * 1024;
57
58/// A single line this long in a multi-MB JS file is treated as generated
59/// minified output. This avoids parsing assets that can expand to huge ASTs.
60const MINIFIED_LONG_LINE_BYTES: usize = 128 * 1024;
61
62/// Whether a path is a TypeScript declaration file (`.d.ts`/`.d.mts`/`.d.cts`).
63/// Declaration files are exempt from the per-file size skip because they are
64/// reachability roots for global types: skipping a large `auto-imports.d.ts`
65/// would false-flag the files whose types it provides.
66fn is_declaration_file(path: &Path) -> bool {
67    let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
68    name.ends_with(".d.ts") || name.ends_with(".d.mts") || name.ends_with(".d.cts")
69}
70
71fn is_plain_js_file(path: &Path) -> bool {
72    matches!(
73        path.extension().and_then(|ext| ext.to_str()),
74        Some("js" | "mjs" | "cjs")
75    )
76}
77
78fn has_minified_line_shape(path: &Path) -> bool {
79    use std::io::Read;
80
81    let Ok(mut file) = std::fs::File::open(path) else {
82        return false;
83    };
84    let mut sample = vec![0; MINIFIED_SAMPLE_BYTES];
85    let Ok(len) = file.read(&mut sample) else {
86        return false;
87    };
88    sample.truncate(len);
89    if sample.is_empty() {
90        return false;
91    }
92
93    let mut current_line = 0usize;
94    for byte in sample {
95        if byte == b'\n' || byte == b'\r' {
96            current_line = 0;
97            continue;
98        }
99        current_line += 1;
100        if current_line >= MINIFIED_LONG_LINE_BYTES {
101            return true;
102        }
103    }
104    false
105}
106
107fn is_probably_minified_generated_js(path: &Path, size_bytes: u64) -> bool {
108    size_bytes >= MINIFIED_FILE_SKIP_BYTES
109        && is_plain_js_file(path)
110        && !is_declaration_file(path)
111        && has_minified_line_shape(path)
112}
113
114/// Render a byte count as a megabyte figure with one decimal place.
115fn format_size_mb(bytes: u64) -> String {
116    #[expect(
117        clippy::cast_precision_loss,
118        reason = "display-only size figure; precision loss past 2^53 bytes is irrelevant"
119    )]
120    let mb = bytes as f64 / (1024.0 * 1024.0);
121    format!("{mb:.1} MB")
122}
123
124/// Join up to [`NOTE_EXAMPLE_CAP`] `path (size)` examples (already ordered) into
125/// one comma-separated string, collapsing the tail to "and N more".
126fn summarize_examples(root: &Path, examples: &[SizedFile]) -> String {
127    let shown: Vec<String> = examples
128        .iter()
129        .take(NOTE_EXAMPLE_CAP)
130        .map(|(path, size)| {
131            let display = path
132                .strip_prefix(root)
133                .unwrap_or(path)
134                .display()
135                .to_string()
136                .replace('\\', "/");
137            format!("{display} ({})", format_size_mb(*size))
138        })
139        .collect();
140    let remaining = examples.len().saturating_sub(NOTE_EXAMPLE_CAP);
141    if remaining > 0 {
142        format!("{}, and {remaining} more", shown.join(", "))
143    } else {
144        shown.join(", ")
145    }
146}
147
148/// Split discovered `(path, size)` pairs into the kept set and the set skipped
149/// for exceeding `max_file_size_bytes`. Declaration files are never skipped.
150fn partition_by_size(
151    raw: Vec<SizedFile>,
152    max_file_size_bytes: Option<u64>,
153) -> (Vec<SizedFile>, Vec<SizedFile>) {
154    let Some(limit) = max_file_size_bytes else {
155        return (raw, Vec::new());
156    };
157    raw.into_iter()
158        .partition(|(path, size)| *size <= limit || is_declaration_file(path))
159}
160
161/// Split discovered `(path, size)` pairs into files kept for parsing and files
162/// skipped because they look like generated minified JavaScript.
163fn partition_minified_generated_js(
164    raw: Vec<SizedFile>,
165    max_file_size_bytes: Option<u64>,
166) -> (Vec<SizedFile>, Vec<SizedFile>) {
167    if max_file_size_bytes.is_none() {
168        return (raw, Vec::new());
169    }
170    raw.into_iter()
171        .partition(|(path, size)| !is_probably_minified_generated_js(path, *size))
172}
173
174/// Record the skipped files in the workspace-diagnostics registry (so they
175/// surface in `workspace_diagnostics[]` JSON) and emit one aggregated
176/// `tracing::warn!` so a human running `fallow` sees what was dropped. Mirrors
177/// the JSON-plus-gated-warn pattern used for undeclared workspaces.
178fn report_skipped_large_files(config: &ResolvedConfig, skipped: &[SizedFile]) {
179    if skipped.is_empty() {
180        return;
181    }
182    let diagnostics: Vec<WorkspaceDiagnostic> = skipped
183        .iter()
184        .map(|(path, size_bytes)| {
185            WorkspaceDiagnostic::new(
186                &config.root,
187                path.clone(),
188                WorkspaceDiagnosticKind::SkippedLargeFile {
189                    size_bytes: *size_bytes,
190                },
191            )
192        })
193        .collect();
194    fallow_config::append_workspace_diagnostics(&config.root, diagnostics);
195
196    let mut sorted: Vec<SizedFile> = skipped.to_vec();
197    sorted.sort_unstable_by_key(|f| std::cmp::Reverse(f.1));
198    let count = skipped.len();
199    if !config.quiet
200        && should_emit_note_once(format!(
201            "skip::{}::{count}::{}",
202            config.root.display(),
203            sorted.first().map_or(0, |f| f.1)
204        ))
205    {
206        let examples = summarize_examples(&config.root, &sorted);
207        let noun = if count == 1 { "file" } else { "files" };
208        tracing::warn!(
209            "fallow: skipped {count} {noun} over the max file size limit ({examples}). \
210             Raise the limit with --max-file-size <MB> (or FALLOW_MAX_FILE_SIZE), or add them to ignorePatterns."
211        );
212    }
213}
214
215/// Record generated minified JS files skipped before parsing.
216fn report_skipped_minified_files(config: &ResolvedConfig, skipped: &[SizedFile]) {
217    if skipped.is_empty() {
218        return;
219    }
220    let diagnostics: Vec<WorkspaceDiagnostic> = skipped
221        .iter()
222        .map(|(path, size_bytes)| {
223            WorkspaceDiagnostic::new(
224                &config.root,
225                path.clone(),
226                WorkspaceDiagnosticKind::SkippedMinifiedFile {
227                    size_bytes: *size_bytes,
228                },
229            )
230        })
231        .collect();
232    fallow_config::append_workspace_diagnostics(&config.root, diagnostics);
233
234    let mut sorted: Vec<SizedFile> = skipped.to_vec();
235    sorted.sort_unstable_by_key(|f| std::cmp::Reverse(f.1));
236    let count = skipped.len();
237    if !config.quiet
238        && should_emit_note_once(format!(
239            "minified::{}::{count}::{}",
240            config.root.display(),
241            sorted.first().map_or(0, |f| f.1)
242        ))
243    {
244        let examples = summarize_examples(&config.root, &sorted);
245        let noun = if count == 1 { "file" } else { "files" };
246        let pronoun = if count == 1 { "it" } else { "them" };
247        tracing::warn!(
248            "fallow: skipped {count} minified generated JS {noun} ({examples}). \
249             Add {pronoun} to ignorePatterns, rename {pronoun} with a .min.js suffix, or use --max-file-size 0 to analyze {pronoun}."
250        );
251    }
252}
253
254/// Build the pre-parse largest-files note, or `None` when the discovered set is
255/// neither unusually large nor contains an unusually large file. Pure so the
256/// pluralization, floor filtering, and count-only fallback are unit-testable
257/// without a tracing subscriber. See issue #1086.
258fn build_largest_files_note(root: &Path, files: &[DiscoveredFile]) -> Option<String> {
259    if files.is_empty() {
260        return None;
261    }
262    let largest = files.iter().map(|f| f.size_bytes).max().unwrap_or(0);
263    if files.len() <= LARGE_SET_THRESHOLD && largest < LARGE_FILE_NOTE_BYTES {
264        return None;
265    }
266    let count = files.len();
267    let noun = if count == 1 { "file" } else { "files" };
268    let mut by_size: Vec<SizedFile> = files
269        .iter()
270        .filter(|f| f.size_bytes >= NOTE_FILE_FLOOR_BYTES)
271        .map(|f| (f.path.clone(), f.size_bytes))
272        .collect();
273    by_size.sort_unstable_by_key(|f| std::cmp::Reverse(f.1));
274    if by_size.is_empty() {
275        // Large file SET with no individually large file: report the count only,
276        // omitting a "largest:" list that would otherwise be all sub-floor noise.
277        return Some(format!(
278            "fallow: discovered {count} {noun}. If analysis stalls or runs out of memory, \
279             exclude large generated files via ignorePatterns or --max-file-size."
280        ));
281    }
282    let examples = summarize_examples(root, &by_size);
283    Some(format!(
284        "fallow: discovered {count} {noun}; largest: {examples}. If analysis stalls or runs out of memory, \
285         exclude large generated files via ignorePatterns or --max-file-size."
286    ))
287}
288
289/// Emit a pre-parse note listing the largest kept files when the discovered set
290/// is unusually large or contains an unusually large file, so an out-of-memory
291/// hang at the parse stage is diagnosable (issue #1086). Visible before the
292/// expensive parse begins, so it survives a subsequent crash.
293fn note_largest_files(config: &ResolvedConfig, files: &[DiscoveredFile]) {
294    if config.quiet {
295        return;
296    }
297    if let Some(message) = build_largest_files_note(&config.root, files)
298        && should_emit_note_once(format!("note::{}::{}", config.root.display(), files.len()))
299    {
300        tracing::warn!("{message}");
301    }
302}
303
304/// Package-scoped hidden directories that source discovery should traverse.
305#[derive(Debug, Clone, PartialEq, Eq)]
306pub struct HiddenDirScope {
307    root: PathBuf,
308    dirs: Vec<String>,
309}
310
311impl HiddenDirScope {
312    pub fn new(root: PathBuf, dirs: Vec<String>) -> Self {
313        Self { root, dirs }
314    }
315
316    fn allows(&self, path: &Path, name: &OsStr) -> bool {
317        path.starts_with(&self.root) && self.dirs.iter().any(|dir| OsStr::new(dir) == name)
318    }
319}
320
321/// Per-thread file collector for the parallel walker.
322struct FileVisitor<'a> {
323    root: &'a Path,
324    ignore_patterns: &'a globset::GlobSet,
325    production_excludes: &'a Option<globset::GlobSet>,
326    shared: &'a Mutex<Vec<(std::path::PathBuf, u64)>>,
327    local: Vec<(std::path::PathBuf, u64)>,
328}
329
330impl ignore::ParallelVisitor for FileVisitor<'_> {
331    fn visit(&mut self, result: Result<ignore::DirEntry, ignore::Error>) -> ignore::WalkState {
332        let Ok(entry) = result else {
333            return ignore::WalkState::Continue;
334        };
335        if entry.file_type().is_some_and(|ft| ft.is_dir()) {
336            return ignore::WalkState::Continue;
337        }
338        let relative = entry
339            .path()
340            .strip_prefix(self.root)
341            .unwrap_or_else(|_| entry.path());
342        if self.ignore_patterns.is_match(relative) {
343            return ignore::WalkState::Continue;
344        }
345        if self
346            .production_excludes
347            .as_ref()
348            .is_some_and(|excludes| excludes.is_match(relative))
349        {
350            return ignore::WalkState::Continue;
351        }
352        let size_bytes = entry.metadata().map_or(0, |m| m.len());
353        self.local.push((entry.into_path(), size_bytes));
354        ignore::WalkState::Continue
355    }
356}
357
358impl Drop for FileVisitor<'_> {
359    #[expect(
360        clippy::expect_used,
361        reason = "poisoned walk collector lock means worker state is unrecoverable"
362    )]
363    fn drop(&mut self) {
364        if !self.local.is_empty() {
365            self.shared
366                .lock()
367                .expect("walk collector lock poisoned")
368                .append(&mut self.local);
369        }
370    }
371}
372
373/// Builder that creates per-thread `FileVisitor` instances for the parallel walker.
374struct FileVisitorBuilder<'a> {
375    root: &'a Path,
376    ignore_patterns: &'a globset::GlobSet,
377    production_excludes: &'a Option<globset::GlobSet>,
378    shared: &'a Mutex<Vec<(std::path::PathBuf, u64)>>,
379}
380
381impl<'s> ignore::ParallelVisitorBuilder<'s> for FileVisitorBuilder<'s> {
382    fn build(&mut self) -> Box<dyn ignore::ParallelVisitor + 's> {
383        Box::new(FileVisitor {
384            root: self.root,
385            ignore_patterns: self.ignore_patterns,
386            production_excludes: self.production_excludes,
387            shared: self.shared,
388            local: Vec::new(),
389        })
390    }
391}
392
393pub const SOURCE_EXTENSIONS: &[&str] = &[
394    "ts", "tsx", "mts", "cts", "gts", "js", "jsx", "mjs", "cjs", "gjs", "vue", "svelte", "astro",
395    "mdx", "css", "scss", "sass", "less", "html", "graphql", "gql",
396];
397
398/// Glob patterns for test/dev/story files excluded in production mode.
399pub const PRODUCTION_EXCLUDE_PATTERNS: &[&str] = &[
400    "**/*.test.*",
401    "**/*.spec.*",
402    "**/*.e2e.*",
403    "**/*.e2e-spec.*",
404    "**/*.bench.*",
405    "**/*.fixture.*",
406    "**/*.stories.*",
407    "**/*.story.*",
408    "**/__tests__/**",
409    "**/__mocks__/**",
410    "**/__snapshots__/**",
411    "**/__fixtures__/**",
412    "**/test/**",
413    "**/tests/**",
414    "*.config.*",
415    "**/.*.js",
416    "**/.*.ts",
417    "**/.*.mjs",
418    "**/.*.cjs",
419];
420
421/// Check if a hidden directory name is on the allowlist.
422pub fn is_allowed_hidden_dir(name: &OsStr) -> bool {
423    ALLOWED_HIDDEN_DIRS.iter().any(|&d| OsStr::new(d) == name)
424}
425
426fn is_allowed_scoped_hidden_dir(
427    name: &OsStr,
428    path: &Path,
429    additional_hidden_dir_scopes: &[HiddenDirScope],
430) -> bool {
431    additional_hidden_dir_scopes
432        .iter()
433        .any(|scope| scope.allows(path, name))
434}
435
436/// Check if a hidden directory entry should be allowed through the filter.
437///
438/// Returns `true` if the entry is not hidden or is on the allowlist.
439/// Hidden files (not directories) are always allowed through since the type
440/// filter handles them.
441fn is_allowed_hidden(entry: &ignore::DirEntry) -> bool {
442    is_allowed_hidden_with_scopes(entry, &[])
443}
444
445fn is_allowed_hidden_with_scopes(
446    entry: &ignore::DirEntry,
447    additional_hidden_dir_scopes: &[HiddenDirScope],
448) -> bool {
449    let name = entry.file_name();
450    let name_str = name.to_string_lossy();
451
452    if !name_str.starts_with('.') {
453        return true;
454    }
455
456    if entry.file_type().is_some_and(|ft| !ft.is_dir()) {
457        return true;
458    }
459
460    is_allowed_hidden_dir(name)
461        || is_allowed_scoped_hidden_dir(name, entry.path(), additional_hidden_dir_scopes)
462}
463
464/// Discover all source files in the project.
465///
466/// # Panics
467///
468/// Panics if the file type glob or progress template is invalid (compile-time constants).
469pub fn discover_files(config: &ResolvedConfig) -> Vec<DiscoveredFile> {
470    discover_files_with_additional_hidden_dirs(config, &[])
471}
472
473/// Discover all source files in the project, with package-scoped hidden dirs.
474///
475/// # Panics
476///
477/// Panics if the file type glob or progress template is invalid (compile-time constants).
478#[expect(
479    clippy::cast_possible_truncation,
480    reason = "file count is bounded by project size, well under u32::MAX"
481)]
482#[expect(
483    clippy::expect_used,
484    reason = "source file globs are hard-coded and the collector lock must remain usable"
485)]
486pub fn discover_files_with_additional_hidden_dirs(
487    config: &ResolvedConfig,
488    additional_hidden_dir_scopes: &[HiddenDirScope],
489) -> Vec<DiscoveredFile> {
490    let _span = tracing::info_span!("discover_files").entered();
491
492    let mut types_builder = ignore::types::TypesBuilder::new();
493    for ext in SOURCE_EXTENSIONS {
494        types_builder
495            .add("source", &format!("*.{ext}"))
496            .expect("valid glob");
497    }
498    types_builder.select("source");
499    let types = types_builder.build().expect("valid types");
500
501    let mut walk_builder = WalkBuilder::new(&config.root);
502    walk_builder
503        .hidden(false)
504        .git_ignore(true)
505        .git_global(true)
506        .git_exclude(true)
507        .types(types)
508        .threads(config.threads);
509    if additional_hidden_dir_scopes.is_empty() {
510        walk_builder.filter_entry(is_allowed_hidden);
511    } else {
512        let scopes = additional_hidden_dir_scopes.to_vec();
513        walk_builder.filter_entry(move |entry| is_allowed_hidden_with_scopes(entry, &scopes));
514    }
515
516    let production_excludes = if config.production {
517        let mut builder = globset::GlobSetBuilder::new();
518        for pattern in PRODUCTION_EXCLUDE_PATTERNS {
519            if let Ok(glob) = globset::GlobBuilder::new(pattern)
520                .literal_separator(true)
521                .build()
522            {
523                builder.add(glob);
524            }
525        }
526        builder.build().ok()
527    } else {
528        None
529    };
530
531    let collected: Mutex<Vec<(std::path::PathBuf, u64)>> = Mutex::new(Vec::new());
532    let mut visitor_builder = FileVisitorBuilder {
533        root: &config.root,
534        ignore_patterns: &config.ignore_patterns,
535        production_excludes: &production_excludes,
536        shared: &collected,
537    };
538    walk_builder.build_parallel().visit(&mut visitor_builder);
539
540    let mut raw = collected
541        .into_inner()
542        .expect("walk collector lock poisoned");
543    raw.sort_unstable_by(|a, b| a.0.cmp(&b.0));
544
545    // Drop any source-discovery diagnostics from a previous pass (watch-mode
546    // rerun, combined-mode re-walk) BEFORE re-recording this walk's skips, so a
547    // file that is no longer skipped does not leave a stale entry (issue #1086).
548    fallow_config::clear_source_discovery_diagnostics(&config.root);
549    let (kept, skipped) = partition_by_size(raw, config.max_file_size_bytes);
550    report_skipped_large_files(config, &skipped);
551    let (kept, skipped_minified) =
552        partition_minified_generated_js(kept, config.max_file_size_bytes);
553    report_skipped_minified_files(config, &skipped_minified);
554
555    let files: Vec<DiscoveredFile> = kept
556        .into_iter()
557        .enumerate()
558        .map(|(idx, (path, size_bytes))| DiscoveredFile {
559            id: FileId(idx as u32),
560            path,
561            size_bytes,
562        })
563        .collect();
564
565    note_largest_files(config, &files);
566
567    files
568}
569
570#[cfg(test)]
571mod tests {
572    use std::ffi::OsStr;
573
574    use super::*;
575
576    #[test]
577    fn allowed_hidden_dirs() {
578        assert!(is_allowed_hidden_dir(OsStr::new(".storybook")));
579        assert!(is_allowed_hidden_dir(OsStr::new(".vitepress")));
580        assert!(is_allowed_hidden_dir(OsStr::new(".well-known")));
581        assert!(is_allowed_hidden_dir(OsStr::new(".changeset")));
582        assert!(is_allowed_hidden_dir(OsStr::new(".github")));
583    }
584
585    #[test]
586    fn disallowed_hidden_dirs() {
587        assert!(!is_allowed_hidden_dir(OsStr::new(".git")));
588        assert!(!is_allowed_hidden_dir(OsStr::new(".cache")));
589        assert!(!is_allowed_hidden_dir(OsStr::new(".vscode")));
590        assert!(!is_allowed_hidden_dir(OsStr::new(".fallow")));
591        assert!(!is_allowed_hidden_dir(OsStr::new(".next")));
592    }
593
594    #[test]
595    fn non_hidden_dirs_not_in_allowlist() {
596        assert!(!is_allowed_hidden_dir(OsStr::new("src")));
597        assert!(!is_allowed_hidden_dir(OsStr::new("node_modules")));
598    }
599
600    #[test]
601    fn source_extensions_include_typescript() {
602        assert!(SOURCE_EXTENSIONS.contains(&"ts"));
603        assert!(SOURCE_EXTENSIONS.contains(&"tsx"));
604        assert!(SOURCE_EXTENSIONS.contains(&"mts"));
605        assert!(SOURCE_EXTENSIONS.contains(&"cts"));
606        assert!(SOURCE_EXTENSIONS.contains(&"gts"));
607    }
608
609    #[test]
610    fn source_extensions_include_javascript() {
611        assert!(SOURCE_EXTENSIONS.contains(&"js"));
612        assert!(SOURCE_EXTENSIONS.contains(&"jsx"));
613        assert!(SOURCE_EXTENSIONS.contains(&"mjs"));
614        assert!(SOURCE_EXTENSIONS.contains(&"cjs"));
615        assert!(SOURCE_EXTENSIONS.contains(&"gjs"));
616    }
617
618    #[test]
619    fn source_extensions_include_sfc_formats() {
620        assert!(SOURCE_EXTENSIONS.contains(&"vue"));
621        assert!(SOURCE_EXTENSIONS.contains(&"svelte"));
622        assert!(SOURCE_EXTENSIONS.contains(&"astro"));
623    }
624
625    #[test]
626    fn source_extensions_include_styles() {
627        assert!(SOURCE_EXTENSIONS.contains(&"css"));
628        assert!(SOURCE_EXTENSIONS.contains(&"scss"));
629        assert!(SOURCE_EXTENSIONS.contains(&"sass"));
630        assert!(SOURCE_EXTENSIONS.contains(&"less"));
631    }
632
633    #[test]
634    fn source_extensions_exclude_non_source() {
635        assert!(!SOURCE_EXTENSIONS.contains(&"json"));
636        assert!(!SOURCE_EXTENSIONS.contains(&"yaml"));
637        assert!(!SOURCE_EXTENSIONS.contains(&"md"));
638        assert!(!SOURCE_EXTENSIONS.contains(&"png"));
639        assert!(!SOURCE_EXTENSIONS.contains(&"htm"));
640    }
641
642    #[test]
643    fn source_extensions_include_html() {
644        assert!(SOURCE_EXTENSIONS.contains(&"html"));
645    }
646
647    #[test]
648    fn source_extensions_include_graphql_documents() {
649        assert!(SOURCE_EXTENSIONS.contains(&"graphql"));
650        assert!(SOURCE_EXTENSIONS.contains(&"gql"));
651    }
652
653    fn build_production_glob_set() -> globset::GlobSet {
654        let mut builder = globset::GlobSetBuilder::new();
655        for pattern in PRODUCTION_EXCLUDE_PATTERNS {
656            builder.add(
657                globset::GlobBuilder::new(pattern)
658                    .literal_separator(true)
659                    .build()
660                    .expect("valid glob pattern"),
661            );
662        }
663        builder.build().expect("valid glob set")
664    }
665
666    #[test]
667    fn production_excludes_test_files() {
668        let set = build_production_glob_set();
669        assert!(set.is_match("src/Button.test.ts"));
670        assert!(set.is_match("src/utils.spec.tsx"));
671        assert!(set.is_match("src/__tests__/helper.ts"));
672        assert!(!set.is_match("src/Button.ts"));
673        assert!(!set.is_match("src/utils.tsx"));
674    }
675
676    #[test]
677    fn production_excludes_story_files() {
678        let set = build_production_glob_set();
679        assert!(set.is_match("src/Button.stories.tsx"));
680        assert!(set.is_match("src/Card.story.ts"));
681        assert!(!set.is_match("src/Button.tsx"));
682    }
683
684    #[test]
685    fn production_excludes_config_files_at_root_only() {
686        let set = build_production_glob_set();
687        assert!(set.is_match("vitest.config.ts"));
688        assert!(set.is_match("jest.config.js"));
689        assert!(!set.is_match("src/app/app.config.ts"));
690        assert!(!set.is_match("src/app/app.config.server.ts"));
691        assert!(!set.is_match("packages/foo/vitest.config.ts"));
692        assert!(!set.is_match("src/config.ts"));
693    }
694
695    #[test]
696    fn production_patterns_are_valid_globs() {
697        let _ = build_production_glob_set();
698    }
699
700    #[test]
701    fn disallowed_hidden_dirs_idea() {
702        assert!(!is_allowed_hidden_dir(OsStr::new(".idea")));
703    }
704
705    #[test]
706    fn source_extensions_include_mdx() {
707        assert!(SOURCE_EXTENSIONS.contains(&"mdx"));
708    }
709
710    #[test]
711    fn source_extensions_exclude_image_and_data_formats() {
712        assert!(!SOURCE_EXTENSIONS.contains(&"png"));
713        assert!(!SOURCE_EXTENSIONS.contains(&"jpg"));
714        assert!(!SOURCE_EXTENSIONS.contains(&"svg"));
715        assert!(!SOURCE_EXTENSIONS.contains(&"txt"));
716        assert!(!SOURCE_EXTENSIONS.contains(&"csv"));
717        assert!(!SOURCE_EXTENSIONS.contains(&"wasm"));
718    }
719
720    #[test]
721    fn is_declaration_file_matches_dts_variants() {
722        assert!(is_declaration_file(Path::new("env.d.ts")));
723        assert!(is_declaration_file(Path::new("src/auto-imports.d.ts")));
724        assert!(is_declaration_file(Path::new("mod.d.mts")));
725        assert!(is_declaration_file(Path::new("compat.d.cts")));
726        assert!(!is_declaration_file(Path::new("index.ts")));
727        assert!(!is_declaration_file(Path::new("component.tsx")));
728        assert!(!is_declaration_file(Path::new("notes.d.txt")));
729    }
730
731    #[test]
732    fn format_size_mb_renders_one_decimal() {
733        assert_eq!(format_size_mb(5 * 1024 * 1024), "5.0 MB");
734        assert_eq!(format_size_mb(1024 * 1024 + 512 * 1024), "1.5 MB");
735        assert_eq!(format_size_mb(0), "0.0 MB");
736    }
737
738    #[test]
739    fn partition_by_size_no_limit_keeps_all() {
740        let raw = vec![(PathBuf::from("a.ts"), 10), (PathBuf::from("b.ts"), 10_000)];
741        let (kept, skipped) = partition_by_size(raw, None);
742        assert_eq!(kept.len(), 2);
743        assert!(skipped.is_empty());
744    }
745
746    #[test]
747    fn partition_by_size_skips_strictly_over_limit() {
748        let raw = vec![
749            (PathBuf::from("under.ts"), 99),
750            (PathBuf::from("exact.ts"), 100),
751            (PathBuf::from("over.ts"), 101),
752        ];
753        let (kept, skipped) = partition_by_size(raw, Some(100));
754        let kept_has = |name: &str| kept.iter().any(|(p, _)| p.as_path() == Path::new(name));
755        assert!(kept_has("under.ts"));
756        assert!(
757            kept_has("exact.ts"),
758            "a file exactly at the limit is kept (skip is strictly-greater)"
759        );
760        assert_eq!(skipped.len(), 1);
761        assert_eq!(skipped[0].0, PathBuf::from("over.ts"));
762    }
763
764    #[test]
765    fn partition_by_size_exempts_declaration_files() {
766        let raw = vec![
767            (PathBuf::from("huge.ts"), 10_000),
768            (PathBuf::from("auto-imports.d.ts"), 10_000),
769        ];
770        let (kept, skipped) = partition_by_size(raw, Some(100));
771        assert!(
772            kept.iter()
773                .any(|(p, _)| p.as_path() == Path::new("auto-imports.d.ts")),
774            "declaration files are exempt from the size skip regardless of size"
775        );
776        assert_eq!(skipped.len(), 1);
777        assert_eq!(skipped[0].0, PathBuf::from("huge.ts"));
778    }
779
780    fn disco(path: &str, size_bytes: u64) -> DiscoveredFile {
781        DiscoveredFile {
782            id: FileId(0),
783            path: PathBuf::from(path),
784            size_bytes,
785        }
786    }
787
788    #[test]
789    fn largest_files_note_below_threshold_is_none() {
790        let files = [disco("a.ts", 100), disco("b.ts", 200)];
791        assert!(build_largest_files_note(Path::new("/p"), &files).is_none());
792    }
793
794    #[test]
795    fn largest_files_note_single_file_uses_singular() {
796        let files = [disco("big.ts", 5 * 1024 * 1024)];
797        let note = build_largest_files_note(Path::new("/p"), &files).expect("note fires");
798        assert!(
799            note.contains("discovered 1 file;"),
800            "singular noun on the single-big-file path (issue #1086 regression): {note}"
801        );
802        assert!(!note.contains("discovered 1 files"));
803        assert!(note.contains("big.ts (5.0 MB)"));
804    }
805
806    #[test]
807    fn largest_files_note_filters_sub_floor_files() {
808        let files = [disco("big.ts", 5 * 1024 * 1024), disco("tiny.ts", 10)];
809        let note = build_largest_files_note(Path::new("/p"), &files).expect("note fires");
810        assert!(note.contains("discovered 2 files;"));
811        assert!(note.contains("big.ts (5.0 MB)"));
812        assert!(
813            !note.contains("tiny.ts"),
814            "sub-floor files are not listed as `0.0 MB` chaff: {note}"
815        );
816    }
817
818    #[test]
819    fn largest_files_note_large_set_no_big_file_omits_list() {
820        let files: Vec<DiscoveredFile> = (0..=LARGE_SET_THRESHOLD)
821            .map(|i| disco(&format!("f{i}.ts"), 100))
822            .collect();
823        let note = build_largest_files_note(Path::new("/p"), &files).expect("large set fires");
824        assert!(note.contains(&format!("discovered {} files", LARGE_SET_THRESHOLD + 1)));
825        assert!(
826            !note.contains("largest:"),
827            "no sub-floor `largest:` list when no file clears the floor: {note}"
828        );
829    }
830
831    mod discover_files_integration {
832        use std::path::PathBuf;
833
834        use fallow_config::{
835            DuplicatesConfig, FallowConfig, FlagsConfig, HealthConfig, OutputFormat, ResolveConfig,
836            RulesConfig,
837        };
838
839        use super::*;
840
841        /// Create a minimal ResolvedConfig pointing at the given root directory.
842        fn make_config(root: PathBuf, production: bool) -> ResolvedConfig {
843            FallowConfig {
844                production: production.into(),
845                ..Default::default()
846            }
847            .resolve(root, OutputFormat::Human, 1, true, true, None)
848        }
849
850        /// Helper to collect discovered file names (relative to root) for assertions.
851        /// Normalizes path separators to `/` for cross-platform test consistency.
852        fn file_names(files: &[DiscoveredFile], root: &std::path::Path) -> Vec<String> {
853            files
854                .iter()
855                .map(|f| {
856                    f.path
857                        .strip_prefix(root)
858                        .unwrap_or(&f.path)
859                        .to_string_lossy()
860                        .replace('\\', "/")
861                })
862                .collect()
863        }
864
865        #[test]
866        fn discovers_source_files_with_valid_extensions() {
867            let dir = tempfile::tempdir().expect("create temp dir");
868            let src = dir.path().join("src");
869            std::fs::create_dir_all(&src).unwrap();
870
871            std::fs::write(src.join("app.ts"), "export const a = 1;").unwrap();
872            std::fs::write(src.join("component.tsx"), "export default () => {};").unwrap();
873            std::fs::write(src.join("utils.js"), "module.exports = {};").unwrap();
874            std::fs::write(src.join("helper.jsx"), "export const h = 1;").unwrap();
875            std::fs::write(src.join("config.mjs"), "export default {};").unwrap();
876            std::fs::write(src.join("legacy.cjs"), "module.exports = {};").unwrap();
877            std::fs::write(src.join("types.mts"), "export type T = string;").unwrap();
878            std::fs::write(src.join("compat.cts"), "module.exports = {};").unwrap();
879
880            let config = make_config(dir.path().to_path_buf(), false);
881            let files = discover_files(&config);
882            let names = file_names(&files, dir.path());
883
884            assert!(names.contains(&"src/app.ts".to_string()));
885            assert!(names.contains(&"src/component.tsx".to_string()));
886            assert!(names.contains(&"src/utils.js".to_string()));
887            assert!(names.contains(&"src/helper.jsx".to_string()));
888            assert!(names.contains(&"src/config.mjs".to_string()));
889            assert!(names.contains(&"src/legacy.cjs".to_string()));
890            assert!(names.contains(&"src/types.mts".to_string()));
891            assert!(names.contains(&"src/compat.cts".to_string()));
892        }
893
894        #[test]
895        fn excludes_non_source_extensions() {
896            let dir = tempfile::tempdir().expect("create temp dir");
897            let src = dir.path().join("src");
898            std::fs::create_dir_all(&src).unwrap();
899
900            std::fs::write(src.join("app.ts"), "export const a = 1;").unwrap();
901
902            std::fs::write(src.join("data.json"), "{}").unwrap();
903            std::fs::write(src.join("readme.md"), "# Hello").unwrap();
904            std::fs::write(src.join("notes.txt"), "notes").unwrap();
905            std::fs::write(src.join("logo.png"), [0u8; 8]).unwrap();
906
907            let config = make_config(dir.path().to_path_buf(), false);
908            let files = discover_files(&config);
909            let names = file_names(&files, dir.path());
910
911            assert_eq!(names.len(), 1, "only the .ts file should be discovered");
912            assert!(names.contains(&"src/app.ts".to_string()));
913        }
914
915        #[test]
916        fn excludes_disallowed_hidden_directories() {
917            let dir = tempfile::tempdir().expect("create temp dir");
918
919            let git_dir = dir.path().join(".git");
920            std::fs::create_dir_all(&git_dir).unwrap();
921            std::fs::write(git_dir.join("hooks.ts"), "// git hook").unwrap();
922
923            let idea_dir = dir.path().join(".idea");
924            std::fs::create_dir_all(&idea_dir).unwrap();
925            std::fs::write(idea_dir.join("workspace.ts"), "// idea").unwrap();
926
927            let cache_dir = dir.path().join(".cache");
928            std::fs::create_dir_all(&cache_dir).unwrap();
929            std::fs::write(cache_dir.join("cached.js"), "// cached").unwrap();
930
931            let src = dir.path().join("src");
932            std::fs::create_dir_all(&src).unwrap();
933            std::fs::write(src.join("app.ts"), "export const a = 1;").unwrap();
934
935            let config = make_config(dir.path().to_path_buf(), false);
936            let files = discover_files(&config);
937            let names = file_names(&files, dir.path());
938
939            assert_eq!(names.len(), 1, "only src/app.ts should be discovered");
940            assert!(names.contains(&"src/app.ts".to_string()));
941        }
942
943        #[test]
944        fn includes_allowed_hidden_directories() {
945            let dir = tempfile::tempdir().expect("create temp dir");
946
947            let storybook = dir.path().join(".storybook");
948            std::fs::create_dir_all(&storybook).unwrap();
949            std::fs::write(storybook.join("main.ts"), "export default {};").unwrap();
950
951            let github = dir.path().join(".github");
952            std::fs::create_dir_all(&github).unwrap();
953            std::fs::write(github.join("actions.js"), "module.exports = {};").unwrap();
954
955            let changeset = dir.path().join(".changeset");
956            std::fs::create_dir_all(&changeset).unwrap();
957            std::fs::write(changeset.join("config.js"), "module.exports = {};").unwrap();
958
959            let config = make_config(dir.path().to_path_buf(), false);
960            let files = discover_files(&config);
961            let names = file_names(&files, dir.path());
962
963            assert!(
964                names.contains(&".storybook/main.ts".to_string()),
965                "files in .storybook should be discovered"
966            );
967            assert!(
968                names.contains(&".github/actions.js".to_string()),
969                "files in .github should be discovered"
970            );
971            assert!(
972                names.contains(&".changeset/config.js".to_string()),
973                "files in .changeset should be discovered"
974            );
975        }
976
977        #[test]
978        fn default_discovery_excludes_client_and_server_hidden_directories() {
979            let dir = tempfile::tempdir().expect("create temp dir");
980            let app = dir.path().join("app");
981            std::fs::create_dir_all(app.join(".client")).unwrap();
982            std::fs::create_dir_all(app.join(".server")).unwrap();
983            std::fs::write(app.join(".client/analytics.ts"), "export const a = 1;").unwrap();
984            std::fs::write(app.join(".server/db.ts"), "export const db = {};").unwrap();
985            std::fs::write(app.join("root.tsx"), "export default function Root() {}").unwrap();
986
987            let config = make_config(dir.path().to_path_buf(), false);
988            let files = discover_files(&config);
989            let names = file_names(&files, dir.path());
990
991            assert!(names.contains(&"app/root.tsx".to_string()));
992            assert!(!names.contains(&"app/.client/analytics.ts".to_string()));
993            assert!(!names.contains(&"app/.server/db.ts".to_string()));
994        }
995
996        #[test]
997        fn scoped_hidden_dirs_include_client_and_server_under_package_root() {
998            let dir = tempfile::tempdir().expect("create temp dir");
999            let package = dir.path().join("packages/app");
1000            std::fs::create_dir_all(package.join("app/.client")).unwrap();
1001            std::fs::create_dir_all(package.join("app/.server")).unwrap();
1002            std::fs::write(
1003                package.join("app/.client/analytics.ts"),
1004                "export const track = () => {};",
1005            )
1006            .unwrap();
1007            std::fs::write(package.join("app/.server/db.ts"), "export const db = {};").unwrap();
1008
1009            let config = make_config(dir.path().to_path_buf(), false);
1010            let scopes = [HiddenDirScope::new(
1011                package,
1012                vec![".client".to_string(), ".server".to_string()],
1013            )];
1014            let files = discover_files_with_additional_hidden_dirs(&config, &scopes);
1015            let names = file_names(&files, dir.path());
1016
1017            assert!(names.contains(&"packages/app/app/.client/analytics.ts".to_string()));
1018            assert!(names.contains(&"packages/app/app/.server/db.ts".to_string()));
1019        }
1020
1021        #[test]
1022        fn scoped_hidden_dirs_do_not_include_unscoped_packages() {
1023            let dir = tempfile::tempdir().expect("create temp dir");
1024            let active = dir.path().join("packages/active");
1025            let inactive = dir.path().join("packages/inactive");
1026            std::fs::create_dir_all(active.join("app/.server")).unwrap();
1027            std::fs::create_dir_all(inactive.join("app/.server")).unwrap();
1028            std::fs::write(active.join("app/.server/db.ts"), "export const db = {};").unwrap();
1029            std::fs::write(inactive.join("app/.server/db.ts"), "export const db = {};").unwrap();
1030
1031            let config = make_config(dir.path().to_path_buf(), false);
1032            let scopes = [HiddenDirScope::new(active, vec![".server".to_string()])];
1033            let files = discover_files_with_additional_hidden_dirs(&config, &scopes);
1034            let names = file_names(&files, dir.path());
1035
1036            assert!(names.contains(&"packages/active/app/.server/db.ts".to_string()));
1037            assert!(!names.contains(&"packages/inactive/app/.server/db.ts".to_string()));
1038        }
1039
1040        #[test]
1041        fn excludes_root_build_directory() {
1042            let dir = tempfile::tempdir().expect("create temp dir");
1043
1044            std::fs::write(dir.path().join(".ignore"), "/build/\n").unwrap();
1045
1046            let build_dir = dir.path().join("build");
1047            std::fs::create_dir_all(&build_dir).unwrap();
1048            std::fs::write(build_dir.join("output.js"), "// build output").unwrap();
1049
1050            let src = dir.path().join("src");
1051            std::fs::create_dir_all(&src).unwrap();
1052            std::fs::write(src.join("app.ts"), "export const a = 1;").unwrap();
1053
1054            let config = make_config(dir.path().to_path_buf(), false);
1055            let files = discover_files(&config);
1056            let names = file_names(&files, dir.path());
1057
1058            assert_eq!(names.len(), 1, "root build/ should be excluded via .ignore");
1059            assert!(names.contains(&"src/app.ts".to_string()));
1060        }
1061
1062        #[test]
1063        fn includes_nested_build_directory() {
1064            let dir = tempfile::tempdir().expect("create temp dir");
1065
1066            let nested_build = dir.path().join("src").join("build");
1067            std::fs::create_dir_all(&nested_build).unwrap();
1068            std::fs::write(nested_build.join("helper.ts"), "export const h = 1;").unwrap();
1069
1070            let config = make_config(dir.path().to_path_buf(), false);
1071            let files = discover_files(&config);
1072            let names = file_names(&files, dir.path());
1073
1074            assert!(
1075                names.contains(&"src/build/helper.ts".to_string()),
1076                "nested build/ directories should be included"
1077            );
1078        }
1079
1080        #[test]
1081        #[expect(
1082            clippy::cast_possible_truncation,
1083            reason = "test file counts are trivially small"
1084        )]
1085        fn file_ids_are_sequential_after_sorting() {
1086            let dir = tempfile::tempdir().expect("create temp dir");
1087            let src = dir.path().join("src");
1088            std::fs::create_dir_all(&src).unwrap();
1089
1090            std::fs::write(src.join("z_last.ts"), "export const z = 1;").unwrap();
1091            std::fs::write(src.join("a_first.ts"), "export const a = 1;").unwrap();
1092            std::fs::write(src.join("m_middle.ts"), "export const m = 1;").unwrap();
1093
1094            let config = make_config(dir.path().to_path_buf(), false);
1095            let files = discover_files(&config);
1096
1097            for (idx, file) in files.iter().enumerate() {
1098                assert_eq!(file.id, FileId(idx as u32), "FileId should be sequential");
1099            }
1100
1101            for pair in files.windows(2) {
1102                assert!(
1103                    pair[0].path < pair[1].path,
1104                    "files should be sorted by path"
1105                );
1106            }
1107        }
1108
1109        #[test]
1110        fn production_mode_excludes_test_files() {
1111            let dir = tempfile::tempdir().expect("create temp dir");
1112            let src = dir.path().join("src");
1113            std::fs::create_dir_all(&src).unwrap();
1114
1115            std::fs::write(src.join("app.ts"), "export const a = 1;").unwrap();
1116            std::fs::write(src.join("app.test.ts"), "test('a', () => {});").unwrap();
1117            std::fs::write(src.join("app.spec.ts"), "describe('a', () => {});").unwrap();
1118            std::fs::write(src.join("app.stories.tsx"), "export default {};").unwrap();
1119
1120            let config = make_config(dir.path().to_path_buf(), true);
1121            let files = discover_files(&config);
1122            let names = file_names(&files, dir.path());
1123
1124            assert!(
1125                names.contains(&"src/app.ts".to_string()),
1126                "source files should be included in production mode"
1127            );
1128            assert!(
1129                !names.contains(&"src/app.test.ts".to_string()),
1130                "test files should be excluded in production mode"
1131            );
1132            assert!(
1133                !names.contains(&"src/app.spec.ts".to_string()),
1134                "spec files should be excluded in production mode"
1135            );
1136            assert!(
1137                !names.contains(&"src/app.stories.tsx".to_string()),
1138                "story files should be excluded in production mode"
1139            );
1140        }
1141
1142        #[test]
1143        fn non_production_mode_includes_test_files() {
1144            let dir = tempfile::tempdir().expect("create temp dir");
1145            let src = dir.path().join("src");
1146            std::fs::create_dir_all(&src).unwrap();
1147
1148            std::fs::write(src.join("app.ts"), "export const a = 1;").unwrap();
1149            std::fs::write(src.join("app.test.ts"), "test('a', () => {});").unwrap();
1150
1151            let config = make_config(dir.path().to_path_buf(), false);
1152            let files = discover_files(&config);
1153            let names = file_names(&files, dir.path());
1154
1155            assert!(names.contains(&"src/app.ts".to_string()));
1156            assert!(
1157                names.contains(&"src/app.test.ts".to_string()),
1158                "test files should be included in non-production mode"
1159            );
1160        }
1161
1162        #[test]
1163        fn empty_directory_returns_no_files() {
1164            let dir = tempfile::tempdir().expect("create temp dir");
1165            let config = make_config(dir.path().to_path_buf(), false);
1166            let files = discover_files(&config);
1167            assert!(files.is_empty(), "empty project should discover no files");
1168        }
1169
1170        #[test]
1171        fn hidden_files_not_discovered_as_source() {
1172            let dir = tempfile::tempdir().expect("create temp dir");
1173
1174            std::fs::write(dir.path().join(".env"), "SECRET=abc").unwrap();
1175            std::fs::write(dir.path().join(".gitignore"), "node_modules").unwrap();
1176            std::fs::write(dir.path().join(".eslintrc.js"), "module.exports = {};").unwrap();
1177
1178            let src = dir.path().join("src");
1179            std::fs::create_dir_all(&src).unwrap();
1180            std::fs::write(src.join("app.ts"), "export const a = 1;").unwrap();
1181
1182            let config = make_config(dir.path().to_path_buf(), false);
1183            let files = discover_files(&config);
1184            let names = file_names(&files, dir.path());
1185
1186            assert!(
1187                !names.contains(&".env".to_string()),
1188                ".env should not be discovered"
1189            );
1190            assert!(
1191                !names.contains(&".gitignore".to_string()),
1192                ".gitignore should not be discovered"
1193            );
1194        }
1195
1196        /// Create a config with custom ignore patterns.
1197        fn make_config_with_ignores(root: PathBuf, ignores: Vec<String>) -> ResolvedConfig {
1198            FallowConfig {
1199                schema: None,
1200                extends: vec![],
1201                entry: vec![],
1202                ignore_patterns: ignores,
1203                framework: vec![],
1204                workspaces: None,
1205                ignore_dependencies: vec![],
1206                ignore_unresolved_imports: vec![],
1207                ignore_exports: vec![],
1208                ignore_catalog_references: vec![],
1209                ignore_dependency_overrides: vec![],
1210                ignore_exports_used_in_file: fallow_config::IgnoreExportsUsedInFileConfig::default(
1211                ),
1212                used_class_members: vec![],
1213                ignore_decorators: vec![],
1214                duplicates: DuplicatesConfig::default(),
1215                health: HealthConfig::default(),
1216                rules: RulesConfig::default(),
1217                boundaries: fallow_config::BoundaryConfig::default(),
1218                production: false.into(),
1219                plugins: vec![],
1220                rule_packs: vec![],
1221                dynamically_loaded: vec![],
1222                overrides: vec![],
1223                regression: None,
1224                audit: fallow_config::AuditConfig::default(),
1225                codeowners: None,
1226                public_packages: vec![],
1227                flags: FlagsConfig::default(),
1228                security: fallow_config::SecurityConfig::default(),
1229                fix: fallow_config::FixConfig::default(),
1230                resolve: ResolveConfig::default(),
1231                sealed: false,
1232                include_entry_exports: false,
1233                auto_imports: false,
1234                cache: fallow_config::CacheConfig::default(),
1235            }
1236            .resolve(root, OutputFormat::Human, 1, true, true, None)
1237        }
1238
1239        #[test]
1240        fn custom_ignore_patterns_exclude_matching_files() {
1241            let dir = tempfile::tempdir().expect("create temp dir");
1242
1243            let generated = dir.path().join("src").join("api").join("generated");
1244            std::fs::create_dir_all(&generated).unwrap();
1245            std::fs::write(generated.join("client.ts"), "export const api = {};").unwrap();
1246
1247            let client = dir.path().join("src").join("api").join("client");
1248            std::fs::create_dir_all(&client).unwrap();
1249            std::fs::write(client.join("fetch.ts"), "export const fetch = {};").unwrap();
1250
1251            let src = dir.path().join("src");
1252            std::fs::write(src.join("index.ts"), "export const x = 1;").unwrap();
1253
1254            let config = make_config_with_ignores(
1255                dir.path().to_path_buf(),
1256                vec![
1257                    "src/api/generated/**".to_string(),
1258                    "src/api/client/**".to_string(),
1259                ],
1260            );
1261            let files = discover_files(&config);
1262            let names = file_names(&files, dir.path());
1263
1264            assert_eq!(names.len(), 1, "only non-ignored files: {names:?}");
1265            assert!(names.contains(&"src/index.ts".to_string()));
1266        }
1267
1268        #[test]
1269        fn default_ignore_patterns_exclude_node_modules_and_dist() {
1270            let dir = tempfile::tempdir().expect("create temp dir");
1271
1272            let nm = dir.path().join("node_modules").join("lodash");
1273            std::fs::create_dir_all(&nm).unwrap();
1274            std::fs::write(nm.join("lodash.js"), "module.exports = {};").unwrap();
1275
1276            let dist = dir.path().join("dist");
1277            std::fs::create_dir_all(&dist).unwrap();
1278            std::fs::write(dist.join("bundle.js"), "// bundled").unwrap();
1279
1280            let src = dir.path().join("src");
1281            std::fs::create_dir_all(&src).unwrap();
1282            std::fs::write(src.join("index.ts"), "export const x = 1;").unwrap();
1283
1284            let config = make_config(dir.path().to_path_buf(), false);
1285            let files = discover_files(&config);
1286            let names = file_names(&files, dir.path());
1287
1288            assert_eq!(names.len(), 1);
1289            assert!(names.contains(&"src/index.ts".to_string()));
1290        }
1291
1292        #[test]
1293        fn default_ignore_patterns_exclude_root_build() {
1294            let dir = tempfile::tempdir().expect("create temp dir");
1295
1296            let build = dir.path().join("build");
1297            std::fs::create_dir_all(&build).unwrap();
1298            std::fs::write(build.join("output.js"), "// built").unwrap();
1299
1300            let nested_build = dir.path().join("src").join("build");
1301            std::fs::create_dir_all(&nested_build).unwrap();
1302            std::fs::write(nested_build.join("helper.ts"), "export const h = 1;").unwrap();
1303
1304            let src = dir.path().join("src");
1305            std::fs::write(src.join("index.ts"), "export const x = 1;").unwrap();
1306
1307            let config = make_config(dir.path().to_path_buf(), false);
1308            let files = discover_files(&config);
1309            let names = file_names(&files, dir.path());
1310
1311            assert_eq!(
1312                names.len(),
1313                2,
1314                "root build/ excluded, nested kept: {names:?}"
1315            );
1316            assert!(names.contains(&"src/index.ts".to_string()));
1317            assert!(names.contains(&"src/build/helper.ts".to_string()));
1318        }
1319
1320        /// Resolve a config then override the per-file size limit in bytes.
1321        fn make_config_with_max_file_size(
1322            root: PathBuf,
1323            max_file_size_bytes: Option<u64>,
1324        ) -> ResolvedConfig {
1325            let mut config = make_config(root, false);
1326            config.max_file_size_bytes = max_file_size_bytes;
1327            config
1328        }
1329
1330        #[test]
1331        fn skips_files_over_max_file_size() {
1332            let dir = tempfile::tempdir().expect("create temp dir");
1333            let src = dir.path().join("src");
1334            std::fs::create_dir_all(&src).unwrap();
1335            std::fs::write(src.join("small.ts"), "export const a = 1;").unwrap();
1336            std::fs::write(src.join("huge.ts"), "x".repeat(5_000)).unwrap();
1337
1338            let config = make_config_with_max_file_size(dir.path().to_path_buf(), Some(1_000));
1339            let files = discover_files(&config);
1340            let names = file_names(&files, dir.path());
1341
1342            assert!(names.contains(&"src/small.ts".to_string()));
1343            assert!(
1344                !names.contains(&"src/huge.ts".to_string()),
1345                "a file over the size limit must not be discovered"
1346            );
1347        }
1348
1349        #[test]
1350        fn declaration_files_exempt_from_size_skip() {
1351            let dir = tempfile::tempdir().expect("create temp dir");
1352            let src = dir.path().join("src");
1353            std::fs::create_dir_all(&src).unwrap();
1354            std::fs::write(src.join("auto-imports.d.ts"), "x".repeat(5_000)).unwrap();
1355            std::fs::write(src.join("huge.ts"), "x".repeat(5_000)).unwrap();
1356
1357            let config = make_config_with_max_file_size(dir.path().to_path_buf(), Some(1_000));
1358            let files = discover_files(&config);
1359            let names = file_names(&files, dir.path());
1360
1361            assert!(
1362                names.contains(&"src/auto-imports.d.ts".to_string()),
1363                "a large .d.ts is exempt from the skip (reachability root for global types)"
1364            );
1365            assert!(!names.contains(&"src/huge.ts".to_string()));
1366        }
1367
1368        #[test]
1369        fn unlimited_size_keeps_large_files() {
1370            let dir = tempfile::tempdir().expect("create temp dir");
1371            let src = dir.path().join("src");
1372            std::fs::create_dir_all(&src).unwrap();
1373            std::fs::write(src.join("huge.ts"), "x".repeat(5_000)).unwrap();
1374
1375            let config = make_config_with_max_file_size(dir.path().to_path_buf(), None);
1376            let files = discover_files(&config);
1377            let names = file_names(&files, dir.path());
1378
1379            assert!(
1380                names.contains(&"src/huge.ts".to_string()),
1381                "no limit keeps every file"
1382            );
1383        }
1384
1385        #[test]
1386        fn skipped_file_recorded_in_workspace_diagnostics() {
1387            let dir = tempfile::tempdir().expect("create temp dir");
1388            let src = dir.path().join("src");
1389            std::fs::create_dir_all(&src).unwrap();
1390            std::fs::write(src.join("huge.ts"), "x".repeat(5_000)).unwrap();
1391
1392            let config = make_config_with_max_file_size(dir.path().to_path_buf(), Some(1_000));
1393            let _ = discover_files(&config);
1394
1395            let diagnostics = fallow_config::workspace_diagnostics_for(dir.path());
1396            let skipped: Vec<_> = diagnostics
1397                .iter()
1398                .filter(|d| {
1399                    matches!(
1400                        d.kind,
1401                        fallow_config::WorkspaceDiagnosticKind::SkippedLargeFile { .. }
1402                    )
1403                })
1404                .collect();
1405            assert_eq!(
1406                skipped.len(),
1407                1,
1408                "the skipped file is recorded in workspace diagnostics for JSON output"
1409            );
1410            assert!(skipped[0].path.ends_with("src/huge.ts"));
1411            assert!(
1412                matches!(
1413                    skipped[0].kind,
1414                    fallow_config::WorkspaceDiagnosticKind::SkippedLargeFile { size_bytes }
1415                        if size_bytes == 5_000
1416                ),
1417                "the recorded diagnostic carries the on-disk byte size"
1418            );
1419        }
1420
1421        #[test]
1422        fn skips_large_one_line_js_as_minified_generated_output() {
1423            let dir = tempfile::tempdir().expect("create temp dir");
1424            let src = dir.path().join("src");
1425            std::fs::create_dir_all(&src).unwrap();
1426            let asset = src.join("index-abc123.js");
1427            std::fs::write(&asset, "x".repeat(MINIFIED_FILE_SKIP_BYTES as usize + 1)).unwrap();
1428
1429            let config = make_config(dir.path().to_path_buf(), false);
1430            let files = discover_files(&config);
1431            let names = file_names(&files, dir.path());
1432
1433            assert!(
1434                !names.contains(&"src/index-abc123.js".to_string()),
1435                "large one-line JS assets should be skipped before parsing"
1436            );
1437
1438            let diagnostics = fallow_config::workspace_diagnostics_for(dir.path());
1439            assert!(
1440                diagnostics.iter().any(|diag| {
1441                    diag.path.ends_with("src/index-abc123.js")
1442                        && matches!(
1443                            diag.kind,
1444                            fallow_config::WorkspaceDiagnosticKind::SkippedMinifiedFile { .. }
1445                        )
1446                }),
1447                "the skipped minified asset is recorded for JSON output: {diagnostics:?}"
1448            );
1449        }
1450
1451        #[test]
1452        fn unlimited_size_keeps_large_one_line_js() {
1453            let dir = tempfile::tempdir().expect("create temp dir");
1454            let src = dir.path().join("src");
1455            std::fs::create_dir_all(&src).unwrap();
1456            let asset = src.join("index-abc123.js");
1457            std::fs::write(&asset, "x".repeat(MINIFIED_FILE_SKIP_BYTES as usize + 1)).unwrap();
1458
1459            let config = make_config_with_max_file_size(dir.path().to_path_buf(), None);
1460            let files = discover_files(&config);
1461            let names = file_names(&files, dir.path());
1462
1463            assert!(
1464                names.contains(&"src/index-abc123.js".to_string()),
1465                "--max-file-size 0 should opt out of generated JS skipping"
1466            );
1467        }
1468
1469        #[test]
1470        fn keeps_large_multiline_js() {
1471            let dir = tempfile::tempdir().expect("create temp dir");
1472            let src = dir.path().join("src");
1473            std::fs::create_dir_all(&src).unwrap();
1474            let asset = src.join("handwritten.js");
1475            let mut content = String::new();
1476            while content.len() <= MINIFIED_FILE_SKIP_BYTES as usize + 1 {
1477                content.push_str("export const value = 1;\n");
1478            }
1479            std::fs::write(&asset, content).unwrap();
1480
1481            let config = make_config(dir.path().to_path_buf(), false);
1482            let files = discover_files(&config);
1483            let names = file_names(&files, dir.path());
1484
1485            assert!(
1486                names.contains(&"src/handwritten.js".to_string()),
1487                "large multiline JS should not be treated as a generated minified asset"
1488            );
1489        }
1490    }
1491}