Skip to main content

fallow_core/discover/
walk.rs

1use std::ffi::OsStr;
2use std::path::{Path, PathBuf};
3use std::sync::{Mutex, OnceLock};
4
5use fallow_config::{ResolvedConfig, WorkspaceDiagnostic, WorkspaceDiagnosticKind};
6use fallow_types::discover::{DiscoveredFile, FileId};
7use ignore::WalkBuilder;
8use rustc_hash::FxHashSet;
9
10use super::ALLOWED_HIDDEN_DIRS;
11
12/// Process-wide dedupe of the size-skip / largest-files stderr notes, keyed by a
13/// content-derived string, so combined-mode (`fallow` runs check + dupes +
14/// health, each of which can trigger a source walk) emits each note at most once
15/// per distinct content. Mirrors the workspace-diagnostics `should_emit`
16/// pattern (issue #1086).
17fn should_emit_note_once(key: String) -> bool {
18    static EMITTED: OnceLock<Mutex<FxHashSet<String>>> = OnceLock::new();
19    EMITTED
20        .get_or_init(|| Mutex::new(FxHashSet::default()))
21        .lock()
22        .map_or(true, |mut set| set.insert(key))
23}
24
25/// A discovered file path paired with its on-disk size in bytes, as collected
26/// by the parallel walker before [`DiscoveredFile`] ids are assigned.
27type SizedFile = (PathBuf, u64);
28
29/// Number of example file paths named in the aggregated skipped-large-file and
30/// largest-files stderr notes before the tail collapses to "and N more". Keeps
31/// the notes to one bounded line on a monorepo that skips many files.
32const NOTE_EXAMPLE_CAP: usize = 5;
33
34/// Discovered-file-count threshold above which the pre-parse largest-files note
35/// fires, so an out-of-memory hang at the parse stage has a visible suspect
36/// list (issue #1086).
37const LARGE_SET_THRESHOLD: usize = 20_000;
38
39/// Single-file byte threshold above which the pre-parse largest-files note
40/// fires even on a small project. Set just under the default 5 MB skip so the
41/// note fires for kept files that are approaching the skip limit (the genuine
42/// out-of-memory suspects), not for ordinary large-but-benign files.
43const LARGE_FILE_NOTE_BYTES: u64 = 4 * 1024 * 1024;
44
45/// Minimum size for a file to appear in the largest-files note. Filters out the
46/// `0.0 MB` entries that would otherwise pad the list once it fires, keeping the
47/// named files to plausible memory contributors.
48const NOTE_FILE_FLOOR_BYTES: u64 = 256 * 1024;
49
50/// Whether a path is a TypeScript declaration file (`.d.ts`/`.d.mts`/`.d.cts`).
51/// Declaration files are exempt from the per-file size skip because they are
52/// reachability roots for global types: skipping a large `auto-imports.d.ts`
53/// would false-flag the files whose types it provides.
54fn is_declaration_file(path: &Path) -> bool {
55    let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
56    name.ends_with(".d.ts") || name.ends_with(".d.mts") || name.ends_with(".d.cts")
57}
58
59/// Render a byte count as a megabyte figure with one decimal place.
60fn format_size_mb(bytes: u64) -> String {
61    #[expect(
62        clippy::cast_precision_loss,
63        reason = "display-only size figure; precision loss past 2^53 bytes is irrelevant"
64    )]
65    let mb = bytes as f64 / (1024.0 * 1024.0);
66    format!("{mb:.1} MB")
67}
68
69/// Join up to [`NOTE_EXAMPLE_CAP`] `path (size)` examples (already ordered) into
70/// one comma-separated string, collapsing the tail to "and N more".
71fn summarize_examples(root: &Path, examples: &[SizedFile]) -> String {
72    let shown: Vec<String> = examples
73        .iter()
74        .take(NOTE_EXAMPLE_CAP)
75        .map(|(path, size)| {
76            let display = path
77                .strip_prefix(root)
78                .unwrap_or(path)
79                .display()
80                .to_string()
81                .replace('\\', "/");
82            format!("{display} ({})", format_size_mb(*size))
83        })
84        .collect();
85    let remaining = examples.len().saturating_sub(NOTE_EXAMPLE_CAP);
86    if remaining > 0 {
87        format!("{}, and {remaining} more", shown.join(", "))
88    } else {
89        shown.join(", ")
90    }
91}
92
93/// Split discovered `(path, size)` pairs into the kept set and the set skipped
94/// for exceeding `max_file_size_bytes`. Declaration files are never skipped.
95fn partition_by_size(
96    raw: Vec<SizedFile>,
97    max_file_size_bytes: Option<u64>,
98) -> (Vec<SizedFile>, Vec<SizedFile>) {
99    let Some(limit) = max_file_size_bytes else {
100        return (raw, Vec::new());
101    };
102    raw.into_iter()
103        .partition(|(path, size)| *size <= limit || is_declaration_file(path))
104}
105
106/// Record the skipped files in the workspace-diagnostics registry (so they
107/// surface in `workspace_diagnostics[]` JSON) and emit one aggregated
108/// `tracing::warn!` so a human running `fallow` sees what was dropped. Mirrors
109/// the JSON-plus-gated-warn pattern used for undeclared workspaces.
110fn report_skipped_large_files(config: &ResolvedConfig, skipped: &[SizedFile]) {
111    if skipped.is_empty() {
112        return;
113    }
114    let diagnostics: Vec<WorkspaceDiagnostic> = skipped
115        .iter()
116        .map(|(path, size_bytes)| {
117            WorkspaceDiagnostic::new(
118                &config.root,
119                path.clone(),
120                WorkspaceDiagnosticKind::SkippedLargeFile {
121                    size_bytes: *size_bytes,
122                },
123            )
124        })
125        .collect();
126    fallow_config::append_workspace_diagnostics(&config.root, diagnostics);
127
128    let mut sorted: Vec<SizedFile> = skipped.to_vec();
129    sorted.sort_unstable_by_key(|f| std::cmp::Reverse(f.1));
130    let count = skipped.len();
131    if !config.quiet
132        && should_emit_note_once(format!(
133            "skip::{}::{count}::{}",
134            config.root.display(),
135            sorted.first().map_or(0, |f| f.1)
136        ))
137    {
138        let examples = summarize_examples(&config.root, &sorted);
139        let noun = if count == 1 { "file" } else { "files" };
140        tracing::warn!(
141            "fallow: skipped {count} {noun} over the max file size limit ({examples}). \
142             Raise the limit with --max-file-size <MB> (or FALLOW_MAX_FILE_SIZE), or add them to ignorePatterns."
143        );
144    }
145}
146
147/// Build the pre-parse largest-files note, or `None` when the discovered set is
148/// neither unusually large nor contains an unusually large file. Pure so the
149/// pluralization, floor filtering, and count-only fallback are unit-testable
150/// without a tracing subscriber. See issue #1086.
151fn build_largest_files_note(root: &Path, files: &[DiscoveredFile]) -> Option<String> {
152    if files.is_empty() {
153        return None;
154    }
155    let largest = files.iter().map(|f| f.size_bytes).max().unwrap_or(0);
156    if files.len() <= LARGE_SET_THRESHOLD && largest < LARGE_FILE_NOTE_BYTES {
157        return None;
158    }
159    let count = files.len();
160    let noun = if count == 1 { "file" } else { "files" };
161    let mut by_size: Vec<SizedFile> = files
162        .iter()
163        .filter(|f| f.size_bytes >= NOTE_FILE_FLOOR_BYTES)
164        .map(|f| (f.path.clone(), f.size_bytes))
165        .collect();
166    by_size.sort_unstable_by_key(|f| std::cmp::Reverse(f.1));
167    if by_size.is_empty() {
168        // Large file SET with no individually large file: report the count only,
169        // omitting a "largest:" list that would otherwise be all sub-floor noise.
170        return Some(format!(
171            "fallow: discovered {count} {noun}. If analysis stalls or runs out of memory, \
172             exclude large generated files via ignorePatterns or --max-file-size."
173        ));
174    }
175    let examples = summarize_examples(root, &by_size);
176    Some(format!(
177        "fallow: discovered {count} {noun}; largest: {examples}. If analysis stalls or runs out of memory, \
178         exclude large generated files via ignorePatterns or --max-file-size."
179    ))
180}
181
182/// Emit a pre-parse note listing the largest kept files when the discovered set
183/// is unusually large or contains an unusually large file, so an out-of-memory
184/// hang at the parse stage is diagnosable (issue #1086). Visible before the
185/// expensive parse begins, so it survives a subsequent crash.
186fn note_largest_files(config: &ResolvedConfig, files: &[DiscoveredFile]) {
187    if config.quiet {
188        return;
189    }
190    if let Some(message) = build_largest_files_note(&config.root, files)
191        && should_emit_note_once(format!("note::{}::{}", config.root.display(), files.len()))
192    {
193        tracing::warn!("{message}");
194    }
195}
196
197/// Package-scoped hidden directories that source discovery should traverse.
198#[derive(Debug, Clone, PartialEq, Eq)]
199pub struct HiddenDirScope {
200    root: PathBuf,
201    dirs: Vec<String>,
202}
203
204impl HiddenDirScope {
205    pub fn new(root: PathBuf, dirs: Vec<String>) -> Self {
206        Self { root, dirs }
207    }
208
209    fn allows(&self, path: &Path, name: &OsStr) -> bool {
210        path.starts_with(&self.root) && self.dirs.iter().any(|dir| OsStr::new(dir) == name)
211    }
212}
213
214/// Per-thread file collector for the parallel walker.
215struct FileVisitor<'a> {
216    root: &'a Path,
217    ignore_patterns: &'a globset::GlobSet,
218    production_excludes: &'a Option<globset::GlobSet>,
219    shared: &'a Mutex<Vec<(std::path::PathBuf, u64)>>,
220    local: Vec<(std::path::PathBuf, u64)>,
221}
222
223impl ignore::ParallelVisitor for FileVisitor<'_> {
224    fn visit(&mut self, result: Result<ignore::DirEntry, ignore::Error>) -> ignore::WalkState {
225        let Ok(entry) = result else {
226            return ignore::WalkState::Continue;
227        };
228        if entry.file_type().is_some_and(|ft| ft.is_dir()) {
229            return ignore::WalkState::Continue;
230        }
231        let relative = entry
232            .path()
233            .strip_prefix(self.root)
234            .unwrap_or_else(|_| entry.path());
235        if self.ignore_patterns.is_match(relative) {
236            return ignore::WalkState::Continue;
237        }
238        if self
239            .production_excludes
240            .as_ref()
241            .is_some_and(|excludes| excludes.is_match(relative))
242        {
243            return ignore::WalkState::Continue;
244        }
245        let size_bytes = entry.metadata().map_or(0, |m| m.len());
246        self.local.push((entry.into_path(), size_bytes));
247        ignore::WalkState::Continue
248    }
249}
250
251impl Drop for FileVisitor<'_> {
252    #[expect(
253        clippy::expect_used,
254        reason = "poisoned walk collector lock means worker state is unrecoverable"
255    )]
256    fn drop(&mut self) {
257        if !self.local.is_empty() {
258            self.shared
259                .lock()
260                .expect("walk collector lock poisoned")
261                .append(&mut self.local);
262        }
263    }
264}
265
266/// Builder that creates per-thread `FileVisitor` instances for the parallel walker.
267struct FileVisitorBuilder<'a> {
268    root: &'a Path,
269    ignore_patterns: &'a globset::GlobSet,
270    production_excludes: &'a Option<globset::GlobSet>,
271    shared: &'a Mutex<Vec<(std::path::PathBuf, u64)>>,
272}
273
274impl<'s> ignore::ParallelVisitorBuilder<'s> for FileVisitorBuilder<'s> {
275    fn build(&mut self) -> Box<dyn ignore::ParallelVisitor + 's> {
276        Box::new(FileVisitor {
277            root: self.root,
278            ignore_patterns: self.ignore_patterns,
279            production_excludes: self.production_excludes,
280            shared: self.shared,
281            local: Vec::new(),
282        })
283    }
284}
285
286pub const SOURCE_EXTENSIONS: &[&str] = &[
287    "ts", "tsx", "mts", "cts", "gts", "js", "jsx", "mjs", "cjs", "gjs", "vue", "svelte", "astro",
288    "mdx", "css", "scss", "html", "graphql", "gql",
289];
290
291/// Glob patterns for test/dev/story files excluded in production mode.
292pub const PRODUCTION_EXCLUDE_PATTERNS: &[&str] = &[
293    "**/*.test.*",
294    "**/*.spec.*",
295    "**/*.e2e.*",
296    "**/*.e2e-spec.*",
297    "**/*.bench.*",
298    "**/*.fixture.*",
299    "**/*.stories.*",
300    "**/*.story.*",
301    "**/__tests__/**",
302    "**/__mocks__/**",
303    "**/__snapshots__/**",
304    "**/__fixtures__/**",
305    "**/test/**",
306    "**/tests/**",
307    "*.config.*",
308    "**/.*.js",
309    "**/.*.ts",
310    "**/.*.mjs",
311    "**/.*.cjs",
312];
313
314/// Check if a hidden directory name is on the allowlist.
315pub fn is_allowed_hidden_dir(name: &OsStr) -> bool {
316    ALLOWED_HIDDEN_DIRS.iter().any(|&d| OsStr::new(d) == name)
317}
318
319fn is_allowed_scoped_hidden_dir(
320    name: &OsStr,
321    path: &Path,
322    additional_hidden_dir_scopes: &[HiddenDirScope],
323) -> bool {
324    additional_hidden_dir_scopes
325        .iter()
326        .any(|scope| scope.allows(path, name))
327}
328
329/// Check if a hidden directory entry should be allowed through the filter.
330///
331/// Returns `true` if the entry is not hidden or is on the allowlist.
332/// Hidden files (not directories) are always allowed through since the type
333/// filter handles them.
334fn is_allowed_hidden(entry: &ignore::DirEntry) -> bool {
335    is_allowed_hidden_with_scopes(entry, &[])
336}
337
338fn is_allowed_hidden_with_scopes(
339    entry: &ignore::DirEntry,
340    additional_hidden_dir_scopes: &[HiddenDirScope],
341) -> bool {
342    let name = entry.file_name();
343    let name_str = name.to_string_lossy();
344
345    if !name_str.starts_with('.') {
346        return true;
347    }
348
349    if entry.file_type().is_some_and(|ft| !ft.is_dir()) {
350        return true;
351    }
352
353    is_allowed_hidden_dir(name)
354        || is_allowed_scoped_hidden_dir(name, entry.path(), additional_hidden_dir_scopes)
355}
356
357/// Discover all source files in the project.
358///
359/// # Panics
360///
361/// Panics if the file type glob or progress template is invalid (compile-time constants).
362pub fn discover_files(config: &ResolvedConfig) -> Vec<DiscoveredFile> {
363    discover_files_with_additional_hidden_dirs(config, &[])
364}
365
366/// Discover all source files in the project, with package-scoped hidden dirs.
367///
368/// # Panics
369///
370/// Panics if the file type glob or progress template is invalid (compile-time constants).
371#[expect(
372    clippy::cast_possible_truncation,
373    reason = "file count is bounded by project size, well under u32::MAX"
374)]
375#[expect(
376    clippy::expect_used,
377    reason = "source file globs are hard-coded and the collector lock must remain usable"
378)]
379pub fn discover_files_with_additional_hidden_dirs(
380    config: &ResolvedConfig,
381    additional_hidden_dir_scopes: &[HiddenDirScope],
382) -> Vec<DiscoveredFile> {
383    let _span = tracing::info_span!("discover_files").entered();
384
385    let mut types_builder = ignore::types::TypesBuilder::new();
386    for ext in SOURCE_EXTENSIONS {
387        types_builder
388            .add("source", &format!("*.{ext}"))
389            .expect("valid glob");
390    }
391    types_builder.select("source");
392    let types = types_builder.build().expect("valid types");
393
394    let mut walk_builder = WalkBuilder::new(&config.root);
395    walk_builder
396        .hidden(false)
397        .git_ignore(true)
398        .git_global(true)
399        .git_exclude(true)
400        .types(types)
401        .threads(config.threads);
402    if additional_hidden_dir_scopes.is_empty() {
403        walk_builder.filter_entry(is_allowed_hidden);
404    } else {
405        let scopes = additional_hidden_dir_scopes.to_vec();
406        walk_builder.filter_entry(move |entry| is_allowed_hidden_with_scopes(entry, &scopes));
407    }
408
409    let production_excludes = if config.production {
410        let mut builder = globset::GlobSetBuilder::new();
411        for pattern in PRODUCTION_EXCLUDE_PATTERNS {
412            if let Ok(glob) = globset::GlobBuilder::new(pattern)
413                .literal_separator(true)
414                .build()
415            {
416                builder.add(glob);
417            }
418        }
419        builder.build().ok()
420    } else {
421        None
422    };
423
424    let collected: Mutex<Vec<(std::path::PathBuf, u64)>> = Mutex::new(Vec::new());
425    let mut visitor_builder = FileVisitorBuilder {
426        root: &config.root,
427        ignore_patterns: &config.ignore_patterns,
428        production_excludes: &production_excludes,
429        shared: &collected,
430    };
431    walk_builder.build_parallel().visit(&mut visitor_builder);
432
433    let mut raw = collected
434        .into_inner()
435        .expect("walk collector lock poisoned");
436    raw.sort_unstable_by(|a, b| a.0.cmp(&b.0));
437
438    // Drop any source-discovery diagnostics from a previous pass (watch-mode
439    // rerun, combined-mode re-walk) BEFORE re-recording this walk's skips, so a
440    // file that is no longer skipped does not leave a stale entry (issue #1086).
441    fallow_config::clear_source_discovery_diagnostics(&config.root);
442    let (kept, skipped) = partition_by_size(raw, config.max_file_size_bytes);
443    report_skipped_large_files(config, &skipped);
444
445    let files: Vec<DiscoveredFile> = kept
446        .into_iter()
447        .enumerate()
448        .map(|(idx, (path, size_bytes))| DiscoveredFile {
449            id: FileId(idx as u32),
450            path,
451            size_bytes,
452        })
453        .collect();
454
455    note_largest_files(config, &files);
456
457    files
458}
459
460#[cfg(test)]
461mod tests {
462    use std::ffi::OsStr;
463
464    use super::*;
465
466    #[test]
467    fn allowed_hidden_dirs() {
468        assert!(is_allowed_hidden_dir(OsStr::new(".storybook")));
469        assert!(is_allowed_hidden_dir(OsStr::new(".vitepress")));
470        assert!(is_allowed_hidden_dir(OsStr::new(".well-known")));
471        assert!(is_allowed_hidden_dir(OsStr::new(".changeset")));
472        assert!(is_allowed_hidden_dir(OsStr::new(".github")));
473    }
474
475    #[test]
476    fn disallowed_hidden_dirs() {
477        assert!(!is_allowed_hidden_dir(OsStr::new(".git")));
478        assert!(!is_allowed_hidden_dir(OsStr::new(".cache")));
479        assert!(!is_allowed_hidden_dir(OsStr::new(".vscode")));
480        assert!(!is_allowed_hidden_dir(OsStr::new(".fallow")));
481        assert!(!is_allowed_hidden_dir(OsStr::new(".next")));
482    }
483
484    #[test]
485    fn non_hidden_dirs_not_in_allowlist() {
486        assert!(!is_allowed_hidden_dir(OsStr::new("src")));
487        assert!(!is_allowed_hidden_dir(OsStr::new("node_modules")));
488    }
489
490    #[test]
491    fn source_extensions_include_typescript() {
492        assert!(SOURCE_EXTENSIONS.contains(&"ts"));
493        assert!(SOURCE_EXTENSIONS.contains(&"tsx"));
494        assert!(SOURCE_EXTENSIONS.contains(&"mts"));
495        assert!(SOURCE_EXTENSIONS.contains(&"cts"));
496        assert!(SOURCE_EXTENSIONS.contains(&"gts"));
497    }
498
499    #[test]
500    fn source_extensions_include_javascript() {
501        assert!(SOURCE_EXTENSIONS.contains(&"js"));
502        assert!(SOURCE_EXTENSIONS.contains(&"jsx"));
503        assert!(SOURCE_EXTENSIONS.contains(&"mjs"));
504        assert!(SOURCE_EXTENSIONS.contains(&"cjs"));
505        assert!(SOURCE_EXTENSIONS.contains(&"gjs"));
506    }
507
508    #[test]
509    fn source_extensions_include_sfc_formats() {
510        assert!(SOURCE_EXTENSIONS.contains(&"vue"));
511        assert!(SOURCE_EXTENSIONS.contains(&"svelte"));
512        assert!(SOURCE_EXTENSIONS.contains(&"astro"));
513    }
514
515    #[test]
516    fn source_extensions_include_styles() {
517        assert!(SOURCE_EXTENSIONS.contains(&"css"));
518        assert!(SOURCE_EXTENSIONS.contains(&"scss"));
519    }
520
521    #[test]
522    fn source_extensions_exclude_non_source() {
523        assert!(!SOURCE_EXTENSIONS.contains(&"json"));
524        assert!(!SOURCE_EXTENSIONS.contains(&"yaml"));
525        assert!(!SOURCE_EXTENSIONS.contains(&"md"));
526        assert!(!SOURCE_EXTENSIONS.contains(&"png"));
527        assert!(!SOURCE_EXTENSIONS.contains(&"htm"));
528    }
529
530    #[test]
531    fn source_extensions_include_html() {
532        assert!(SOURCE_EXTENSIONS.contains(&"html"));
533    }
534
535    #[test]
536    fn source_extensions_include_graphql_documents() {
537        assert!(SOURCE_EXTENSIONS.contains(&"graphql"));
538        assert!(SOURCE_EXTENSIONS.contains(&"gql"));
539    }
540
541    fn build_production_glob_set() -> globset::GlobSet {
542        let mut builder = globset::GlobSetBuilder::new();
543        for pattern in PRODUCTION_EXCLUDE_PATTERNS {
544            builder.add(
545                globset::GlobBuilder::new(pattern)
546                    .literal_separator(true)
547                    .build()
548                    .expect("valid glob pattern"),
549            );
550        }
551        builder.build().expect("valid glob set")
552    }
553
554    #[test]
555    fn production_excludes_test_files() {
556        let set = build_production_glob_set();
557        assert!(set.is_match("src/Button.test.ts"));
558        assert!(set.is_match("src/utils.spec.tsx"));
559        assert!(set.is_match("src/__tests__/helper.ts"));
560        assert!(!set.is_match("src/Button.ts"));
561        assert!(!set.is_match("src/utils.tsx"));
562    }
563
564    #[test]
565    fn production_excludes_story_files() {
566        let set = build_production_glob_set();
567        assert!(set.is_match("src/Button.stories.tsx"));
568        assert!(set.is_match("src/Card.story.ts"));
569        assert!(!set.is_match("src/Button.tsx"));
570    }
571
572    #[test]
573    fn production_excludes_config_files_at_root_only() {
574        let set = build_production_glob_set();
575        assert!(set.is_match("vitest.config.ts"));
576        assert!(set.is_match("jest.config.js"));
577        assert!(!set.is_match("src/app/app.config.ts"));
578        assert!(!set.is_match("src/app/app.config.server.ts"));
579        assert!(!set.is_match("packages/foo/vitest.config.ts"));
580        assert!(!set.is_match("src/config.ts"));
581    }
582
583    #[test]
584    fn production_patterns_are_valid_globs() {
585        let _ = build_production_glob_set();
586    }
587
588    #[test]
589    fn disallowed_hidden_dirs_idea() {
590        assert!(!is_allowed_hidden_dir(OsStr::new(".idea")));
591    }
592
593    #[test]
594    fn source_extensions_include_mdx() {
595        assert!(SOURCE_EXTENSIONS.contains(&"mdx"));
596    }
597
598    #[test]
599    fn source_extensions_exclude_image_and_data_formats() {
600        assert!(!SOURCE_EXTENSIONS.contains(&"png"));
601        assert!(!SOURCE_EXTENSIONS.contains(&"jpg"));
602        assert!(!SOURCE_EXTENSIONS.contains(&"svg"));
603        assert!(!SOURCE_EXTENSIONS.contains(&"txt"));
604        assert!(!SOURCE_EXTENSIONS.contains(&"csv"));
605        assert!(!SOURCE_EXTENSIONS.contains(&"wasm"));
606    }
607
608    #[test]
609    fn is_declaration_file_matches_dts_variants() {
610        assert!(is_declaration_file(Path::new("env.d.ts")));
611        assert!(is_declaration_file(Path::new("src/auto-imports.d.ts")));
612        assert!(is_declaration_file(Path::new("mod.d.mts")));
613        assert!(is_declaration_file(Path::new("compat.d.cts")));
614        assert!(!is_declaration_file(Path::new("index.ts")));
615        assert!(!is_declaration_file(Path::new("component.tsx")));
616        assert!(!is_declaration_file(Path::new("notes.d.txt")));
617    }
618
619    #[test]
620    fn format_size_mb_renders_one_decimal() {
621        assert_eq!(format_size_mb(5 * 1024 * 1024), "5.0 MB");
622        assert_eq!(format_size_mb(1024 * 1024 + 512 * 1024), "1.5 MB");
623        assert_eq!(format_size_mb(0), "0.0 MB");
624    }
625
626    #[test]
627    fn partition_by_size_no_limit_keeps_all() {
628        let raw = vec![(PathBuf::from("a.ts"), 10), (PathBuf::from("b.ts"), 10_000)];
629        let (kept, skipped) = partition_by_size(raw, None);
630        assert_eq!(kept.len(), 2);
631        assert!(skipped.is_empty());
632    }
633
634    #[test]
635    fn partition_by_size_skips_strictly_over_limit() {
636        let raw = vec![
637            (PathBuf::from("under.ts"), 99),
638            (PathBuf::from("exact.ts"), 100),
639            (PathBuf::from("over.ts"), 101),
640        ];
641        let (kept, skipped) = partition_by_size(raw, Some(100));
642        let kept_has = |name: &str| kept.iter().any(|(p, _)| p.as_path() == Path::new(name));
643        assert!(kept_has("under.ts"));
644        assert!(
645            kept_has("exact.ts"),
646            "a file exactly at the limit is kept (skip is strictly-greater)"
647        );
648        assert_eq!(skipped.len(), 1);
649        assert_eq!(skipped[0].0, PathBuf::from("over.ts"));
650    }
651
652    #[test]
653    fn partition_by_size_exempts_declaration_files() {
654        let raw = vec![
655            (PathBuf::from("huge.ts"), 10_000),
656            (PathBuf::from("auto-imports.d.ts"), 10_000),
657        ];
658        let (kept, skipped) = partition_by_size(raw, Some(100));
659        assert!(
660            kept.iter()
661                .any(|(p, _)| p.as_path() == Path::new("auto-imports.d.ts")),
662            "declaration files are exempt from the size skip regardless of size"
663        );
664        assert_eq!(skipped.len(), 1);
665        assert_eq!(skipped[0].0, PathBuf::from("huge.ts"));
666    }
667
668    fn disco(path: &str, size_bytes: u64) -> DiscoveredFile {
669        DiscoveredFile {
670            id: FileId(0),
671            path: PathBuf::from(path),
672            size_bytes,
673        }
674    }
675
676    #[test]
677    fn largest_files_note_below_threshold_is_none() {
678        let files = [disco("a.ts", 100), disco("b.ts", 200)];
679        assert!(build_largest_files_note(Path::new("/p"), &files).is_none());
680    }
681
682    #[test]
683    fn largest_files_note_single_file_uses_singular() {
684        let files = [disco("big.ts", 5 * 1024 * 1024)];
685        let note = build_largest_files_note(Path::new("/p"), &files).expect("note fires");
686        assert!(
687            note.contains("discovered 1 file;"),
688            "singular noun on the single-big-file path (issue #1086 regression): {note}"
689        );
690        assert!(!note.contains("discovered 1 files"));
691        assert!(note.contains("big.ts (5.0 MB)"));
692    }
693
694    #[test]
695    fn largest_files_note_filters_sub_floor_files() {
696        let files = [disco("big.ts", 5 * 1024 * 1024), disco("tiny.ts", 10)];
697        let note = build_largest_files_note(Path::new("/p"), &files).expect("note fires");
698        assert!(note.contains("discovered 2 files;"));
699        assert!(note.contains("big.ts (5.0 MB)"));
700        assert!(
701            !note.contains("tiny.ts"),
702            "sub-floor files are not listed as `0.0 MB` chaff: {note}"
703        );
704    }
705
706    #[test]
707    fn largest_files_note_large_set_no_big_file_omits_list() {
708        let files: Vec<DiscoveredFile> = (0..=LARGE_SET_THRESHOLD)
709            .map(|i| disco(&format!("f{i}.ts"), 100))
710            .collect();
711        let note = build_largest_files_note(Path::new("/p"), &files).expect("large set fires");
712        assert!(note.contains(&format!("discovered {} files", LARGE_SET_THRESHOLD + 1)));
713        assert!(
714            !note.contains("largest:"),
715            "no sub-floor `largest:` list when no file clears the floor: {note}"
716        );
717    }
718
719    mod discover_files_integration {
720        use std::path::PathBuf;
721
722        use fallow_config::{
723            DuplicatesConfig, FallowConfig, FlagsConfig, HealthConfig, OutputFormat, ResolveConfig,
724            RulesConfig,
725        };
726
727        use super::*;
728
729        /// Create a minimal ResolvedConfig pointing at the given root directory.
730        fn make_config(root: PathBuf, production: bool) -> ResolvedConfig {
731            FallowConfig {
732                production: production.into(),
733                ..Default::default()
734            }
735            .resolve(root, OutputFormat::Human, 1, true, true, None)
736        }
737
738        /// Helper to collect discovered file names (relative to root) for assertions.
739        /// Normalizes path separators to `/` for cross-platform test consistency.
740        fn file_names(files: &[DiscoveredFile], root: &std::path::Path) -> Vec<String> {
741            files
742                .iter()
743                .map(|f| {
744                    f.path
745                        .strip_prefix(root)
746                        .unwrap_or(&f.path)
747                        .to_string_lossy()
748                        .replace('\\', "/")
749                })
750                .collect()
751        }
752
753        #[test]
754        fn discovers_source_files_with_valid_extensions() {
755            let dir = tempfile::tempdir().expect("create temp dir");
756            let src = dir.path().join("src");
757            std::fs::create_dir_all(&src).unwrap();
758
759            std::fs::write(src.join("app.ts"), "export const a = 1;").unwrap();
760            std::fs::write(src.join("component.tsx"), "export default () => {};").unwrap();
761            std::fs::write(src.join("utils.js"), "module.exports = {};").unwrap();
762            std::fs::write(src.join("helper.jsx"), "export const h = 1;").unwrap();
763            std::fs::write(src.join("config.mjs"), "export default {};").unwrap();
764            std::fs::write(src.join("legacy.cjs"), "module.exports = {};").unwrap();
765            std::fs::write(src.join("types.mts"), "export type T = string;").unwrap();
766            std::fs::write(src.join("compat.cts"), "module.exports = {};").unwrap();
767
768            let config = make_config(dir.path().to_path_buf(), false);
769            let files = discover_files(&config);
770            let names = file_names(&files, dir.path());
771
772            assert!(names.contains(&"src/app.ts".to_string()));
773            assert!(names.contains(&"src/component.tsx".to_string()));
774            assert!(names.contains(&"src/utils.js".to_string()));
775            assert!(names.contains(&"src/helper.jsx".to_string()));
776            assert!(names.contains(&"src/config.mjs".to_string()));
777            assert!(names.contains(&"src/legacy.cjs".to_string()));
778            assert!(names.contains(&"src/types.mts".to_string()));
779            assert!(names.contains(&"src/compat.cts".to_string()));
780        }
781
782        #[test]
783        fn excludes_non_source_extensions() {
784            let dir = tempfile::tempdir().expect("create temp dir");
785            let src = dir.path().join("src");
786            std::fs::create_dir_all(&src).unwrap();
787
788            std::fs::write(src.join("app.ts"), "export const a = 1;").unwrap();
789
790            std::fs::write(src.join("data.json"), "{}").unwrap();
791            std::fs::write(src.join("readme.md"), "# Hello").unwrap();
792            std::fs::write(src.join("notes.txt"), "notes").unwrap();
793            std::fs::write(src.join("logo.png"), [0u8; 8]).unwrap();
794
795            let config = make_config(dir.path().to_path_buf(), false);
796            let files = discover_files(&config);
797            let names = file_names(&files, dir.path());
798
799            assert_eq!(names.len(), 1, "only the .ts file should be discovered");
800            assert!(names.contains(&"src/app.ts".to_string()));
801        }
802
803        #[test]
804        fn excludes_disallowed_hidden_directories() {
805            let dir = tempfile::tempdir().expect("create temp dir");
806
807            let git_dir = dir.path().join(".git");
808            std::fs::create_dir_all(&git_dir).unwrap();
809            std::fs::write(git_dir.join("hooks.ts"), "// git hook").unwrap();
810
811            let idea_dir = dir.path().join(".idea");
812            std::fs::create_dir_all(&idea_dir).unwrap();
813            std::fs::write(idea_dir.join("workspace.ts"), "// idea").unwrap();
814
815            let cache_dir = dir.path().join(".cache");
816            std::fs::create_dir_all(&cache_dir).unwrap();
817            std::fs::write(cache_dir.join("cached.js"), "// cached").unwrap();
818
819            let src = dir.path().join("src");
820            std::fs::create_dir_all(&src).unwrap();
821            std::fs::write(src.join("app.ts"), "export const a = 1;").unwrap();
822
823            let config = make_config(dir.path().to_path_buf(), false);
824            let files = discover_files(&config);
825            let names = file_names(&files, dir.path());
826
827            assert_eq!(names.len(), 1, "only src/app.ts should be discovered");
828            assert!(names.contains(&"src/app.ts".to_string()));
829        }
830
831        #[test]
832        fn includes_allowed_hidden_directories() {
833            let dir = tempfile::tempdir().expect("create temp dir");
834
835            let storybook = dir.path().join(".storybook");
836            std::fs::create_dir_all(&storybook).unwrap();
837            std::fs::write(storybook.join("main.ts"), "export default {};").unwrap();
838
839            let github = dir.path().join(".github");
840            std::fs::create_dir_all(&github).unwrap();
841            std::fs::write(github.join("actions.js"), "module.exports = {};").unwrap();
842
843            let changeset = dir.path().join(".changeset");
844            std::fs::create_dir_all(&changeset).unwrap();
845            std::fs::write(changeset.join("config.js"), "module.exports = {};").unwrap();
846
847            let config = make_config(dir.path().to_path_buf(), false);
848            let files = discover_files(&config);
849            let names = file_names(&files, dir.path());
850
851            assert!(
852                names.contains(&".storybook/main.ts".to_string()),
853                "files in .storybook should be discovered"
854            );
855            assert!(
856                names.contains(&".github/actions.js".to_string()),
857                "files in .github should be discovered"
858            );
859            assert!(
860                names.contains(&".changeset/config.js".to_string()),
861                "files in .changeset should be discovered"
862            );
863        }
864
865        #[test]
866        fn default_discovery_excludes_client_and_server_hidden_directories() {
867            let dir = tempfile::tempdir().expect("create temp dir");
868            let app = dir.path().join("app");
869            std::fs::create_dir_all(app.join(".client")).unwrap();
870            std::fs::create_dir_all(app.join(".server")).unwrap();
871            std::fs::write(app.join(".client/analytics.ts"), "export const a = 1;").unwrap();
872            std::fs::write(app.join(".server/db.ts"), "export const db = {};").unwrap();
873            std::fs::write(app.join("root.tsx"), "export default function Root() {}").unwrap();
874
875            let config = make_config(dir.path().to_path_buf(), false);
876            let files = discover_files(&config);
877            let names = file_names(&files, dir.path());
878
879            assert!(names.contains(&"app/root.tsx".to_string()));
880            assert!(!names.contains(&"app/.client/analytics.ts".to_string()));
881            assert!(!names.contains(&"app/.server/db.ts".to_string()));
882        }
883
884        #[test]
885        fn scoped_hidden_dirs_include_client_and_server_under_package_root() {
886            let dir = tempfile::tempdir().expect("create temp dir");
887            let package = dir.path().join("packages/app");
888            std::fs::create_dir_all(package.join("app/.client")).unwrap();
889            std::fs::create_dir_all(package.join("app/.server")).unwrap();
890            std::fs::write(
891                package.join("app/.client/analytics.ts"),
892                "export const track = () => {};",
893            )
894            .unwrap();
895            std::fs::write(package.join("app/.server/db.ts"), "export const db = {};").unwrap();
896
897            let config = make_config(dir.path().to_path_buf(), false);
898            let scopes = [HiddenDirScope::new(
899                package,
900                vec![".client".to_string(), ".server".to_string()],
901            )];
902            let files = discover_files_with_additional_hidden_dirs(&config, &scopes);
903            let names = file_names(&files, dir.path());
904
905            assert!(names.contains(&"packages/app/app/.client/analytics.ts".to_string()));
906            assert!(names.contains(&"packages/app/app/.server/db.ts".to_string()));
907        }
908
909        #[test]
910        fn scoped_hidden_dirs_do_not_include_unscoped_packages() {
911            let dir = tempfile::tempdir().expect("create temp dir");
912            let active = dir.path().join("packages/active");
913            let inactive = dir.path().join("packages/inactive");
914            std::fs::create_dir_all(active.join("app/.server")).unwrap();
915            std::fs::create_dir_all(inactive.join("app/.server")).unwrap();
916            std::fs::write(active.join("app/.server/db.ts"), "export const db = {};").unwrap();
917            std::fs::write(inactive.join("app/.server/db.ts"), "export const db = {};").unwrap();
918
919            let config = make_config(dir.path().to_path_buf(), false);
920            let scopes = [HiddenDirScope::new(active, vec![".server".to_string()])];
921            let files = discover_files_with_additional_hidden_dirs(&config, &scopes);
922            let names = file_names(&files, dir.path());
923
924            assert!(names.contains(&"packages/active/app/.server/db.ts".to_string()));
925            assert!(!names.contains(&"packages/inactive/app/.server/db.ts".to_string()));
926        }
927
928        #[test]
929        fn excludes_root_build_directory() {
930            let dir = tempfile::tempdir().expect("create temp dir");
931
932            std::fs::write(dir.path().join(".ignore"), "/build/\n").unwrap();
933
934            let build_dir = dir.path().join("build");
935            std::fs::create_dir_all(&build_dir).unwrap();
936            std::fs::write(build_dir.join("output.js"), "// build output").unwrap();
937
938            let src = dir.path().join("src");
939            std::fs::create_dir_all(&src).unwrap();
940            std::fs::write(src.join("app.ts"), "export const a = 1;").unwrap();
941
942            let config = make_config(dir.path().to_path_buf(), false);
943            let files = discover_files(&config);
944            let names = file_names(&files, dir.path());
945
946            assert_eq!(names.len(), 1, "root build/ should be excluded via .ignore");
947            assert!(names.contains(&"src/app.ts".to_string()));
948        }
949
950        #[test]
951        fn includes_nested_build_directory() {
952            let dir = tempfile::tempdir().expect("create temp dir");
953
954            let nested_build = dir.path().join("src").join("build");
955            std::fs::create_dir_all(&nested_build).unwrap();
956            std::fs::write(nested_build.join("helper.ts"), "export const h = 1;").unwrap();
957
958            let config = make_config(dir.path().to_path_buf(), false);
959            let files = discover_files(&config);
960            let names = file_names(&files, dir.path());
961
962            assert!(
963                names.contains(&"src/build/helper.ts".to_string()),
964                "nested build/ directories should be included"
965            );
966        }
967
968        #[test]
969        #[expect(
970            clippy::cast_possible_truncation,
971            reason = "test file counts are trivially small"
972        )]
973        fn file_ids_are_sequential_after_sorting() {
974            let dir = tempfile::tempdir().expect("create temp dir");
975            let src = dir.path().join("src");
976            std::fs::create_dir_all(&src).unwrap();
977
978            std::fs::write(src.join("z_last.ts"), "export const z = 1;").unwrap();
979            std::fs::write(src.join("a_first.ts"), "export const a = 1;").unwrap();
980            std::fs::write(src.join("m_middle.ts"), "export const m = 1;").unwrap();
981
982            let config = make_config(dir.path().to_path_buf(), false);
983            let files = discover_files(&config);
984
985            for (idx, file) in files.iter().enumerate() {
986                assert_eq!(file.id, FileId(idx as u32), "FileId should be sequential");
987            }
988
989            for pair in files.windows(2) {
990                assert!(
991                    pair[0].path < pair[1].path,
992                    "files should be sorted by path"
993                );
994            }
995        }
996
997        #[test]
998        fn production_mode_excludes_test_files() {
999            let dir = tempfile::tempdir().expect("create temp dir");
1000            let src = dir.path().join("src");
1001            std::fs::create_dir_all(&src).unwrap();
1002
1003            std::fs::write(src.join("app.ts"), "export const a = 1;").unwrap();
1004            std::fs::write(src.join("app.test.ts"), "test('a', () => {});").unwrap();
1005            std::fs::write(src.join("app.spec.ts"), "describe('a', () => {});").unwrap();
1006            std::fs::write(src.join("app.stories.tsx"), "export default {};").unwrap();
1007
1008            let config = make_config(dir.path().to_path_buf(), true);
1009            let files = discover_files(&config);
1010            let names = file_names(&files, dir.path());
1011
1012            assert!(
1013                names.contains(&"src/app.ts".to_string()),
1014                "source files should be included in production mode"
1015            );
1016            assert!(
1017                !names.contains(&"src/app.test.ts".to_string()),
1018                "test files should be excluded in production mode"
1019            );
1020            assert!(
1021                !names.contains(&"src/app.spec.ts".to_string()),
1022                "spec files should be excluded in production mode"
1023            );
1024            assert!(
1025                !names.contains(&"src/app.stories.tsx".to_string()),
1026                "story files should be excluded in production mode"
1027            );
1028        }
1029
1030        #[test]
1031        fn non_production_mode_includes_test_files() {
1032            let dir = tempfile::tempdir().expect("create temp dir");
1033            let src = dir.path().join("src");
1034            std::fs::create_dir_all(&src).unwrap();
1035
1036            std::fs::write(src.join("app.ts"), "export const a = 1;").unwrap();
1037            std::fs::write(src.join("app.test.ts"), "test('a', () => {});").unwrap();
1038
1039            let config = make_config(dir.path().to_path_buf(), false);
1040            let files = discover_files(&config);
1041            let names = file_names(&files, dir.path());
1042
1043            assert!(names.contains(&"src/app.ts".to_string()));
1044            assert!(
1045                names.contains(&"src/app.test.ts".to_string()),
1046                "test files should be included in non-production mode"
1047            );
1048        }
1049
1050        #[test]
1051        fn empty_directory_returns_no_files() {
1052            let dir = tempfile::tempdir().expect("create temp dir");
1053            let config = make_config(dir.path().to_path_buf(), false);
1054            let files = discover_files(&config);
1055            assert!(files.is_empty(), "empty project should discover no files");
1056        }
1057
1058        #[test]
1059        fn hidden_files_not_discovered_as_source() {
1060            let dir = tempfile::tempdir().expect("create temp dir");
1061
1062            std::fs::write(dir.path().join(".env"), "SECRET=abc").unwrap();
1063            std::fs::write(dir.path().join(".gitignore"), "node_modules").unwrap();
1064            std::fs::write(dir.path().join(".eslintrc.js"), "module.exports = {};").unwrap();
1065
1066            let src = dir.path().join("src");
1067            std::fs::create_dir_all(&src).unwrap();
1068            std::fs::write(src.join("app.ts"), "export const a = 1;").unwrap();
1069
1070            let config = make_config(dir.path().to_path_buf(), false);
1071            let files = discover_files(&config);
1072            let names = file_names(&files, dir.path());
1073
1074            assert!(
1075                !names.contains(&".env".to_string()),
1076                ".env should not be discovered"
1077            );
1078            assert!(
1079                !names.contains(&".gitignore".to_string()),
1080                ".gitignore should not be discovered"
1081            );
1082        }
1083
1084        /// Create a config with custom ignore patterns.
1085        fn make_config_with_ignores(root: PathBuf, ignores: Vec<String>) -> ResolvedConfig {
1086            FallowConfig {
1087                schema: None,
1088                extends: vec![],
1089                entry: vec![],
1090                ignore_patterns: ignores,
1091                framework: vec![],
1092                workspaces: None,
1093                ignore_dependencies: vec![],
1094                ignore_unresolved_imports: vec![],
1095                ignore_exports: vec![],
1096                ignore_catalog_references: vec![],
1097                ignore_dependency_overrides: vec![],
1098                ignore_exports_used_in_file: fallow_config::IgnoreExportsUsedInFileConfig::default(
1099                ),
1100                used_class_members: vec![],
1101                ignore_decorators: vec![],
1102                duplicates: DuplicatesConfig::default(),
1103                health: HealthConfig::default(),
1104                rules: RulesConfig::default(),
1105                boundaries: fallow_config::BoundaryConfig::default(),
1106                production: false.into(),
1107                plugins: vec![],
1108                dynamically_loaded: vec![],
1109                overrides: vec![],
1110                regression: None,
1111                audit: fallow_config::AuditConfig::default(),
1112                codeowners: None,
1113                public_packages: vec![],
1114                flags: FlagsConfig::default(),
1115                security: fallow_config::SecurityConfig::default(),
1116                fix: fallow_config::FixConfig::default(),
1117                resolve: ResolveConfig::default(),
1118                sealed: false,
1119                include_entry_exports: false,
1120                auto_imports: false,
1121                cache: fallow_config::CacheConfig::default(),
1122            }
1123            .resolve(root, OutputFormat::Human, 1, true, true, None)
1124        }
1125
1126        #[test]
1127        fn custom_ignore_patterns_exclude_matching_files() {
1128            let dir = tempfile::tempdir().expect("create temp dir");
1129
1130            let generated = dir.path().join("src").join("api").join("generated");
1131            std::fs::create_dir_all(&generated).unwrap();
1132            std::fs::write(generated.join("client.ts"), "export const api = {};").unwrap();
1133
1134            let client = dir.path().join("src").join("api").join("client");
1135            std::fs::create_dir_all(&client).unwrap();
1136            std::fs::write(client.join("fetch.ts"), "export const fetch = {};").unwrap();
1137
1138            let src = dir.path().join("src");
1139            std::fs::write(src.join("index.ts"), "export const x = 1;").unwrap();
1140
1141            let config = make_config_with_ignores(
1142                dir.path().to_path_buf(),
1143                vec![
1144                    "src/api/generated/**".to_string(),
1145                    "src/api/client/**".to_string(),
1146                ],
1147            );
1148            let files = discover_files(&config);
1149            let names = file_names(&files, dir.path());
1150
1151            assert_eq!(names.len(), 1, "only non-ignored files: {names:?}");
1152            assert!(names.contains(&"src/index.ts".to_string()));
1153        }
1154
1155        #[test]
1156        fn default_ignore_patterns_exclude_node_modules_and_dist() {
1157            let dir = tempfile::tempdir().expect("create temp dir");
1158
1159            let nm = dir.path().join("node_modules").join("lodash");
1160            std::fs::create_dir_all(&nm).unwrap();
1161            std::fs::write(nm.join("lodash.js"), "module.exports = {};").unwrap();
1162
1163            let dist = dir.path().join("dist");
1164            std::fs::create_dir_all(&dist).unwrap();
1165            std::fs::write(dist.join("bundle.js"), "// bundled").unwrap();
1166
1167            let src = dir.path().join("src");
1168            std::fs::create_dir_all(&src).unwrap();
1169            std::fs::write(src.join("index.ts"), "export const x = 1;").unwrap();
1170
1171            let config = make_config(dir.path().to_path_buf(), false);
1172            let files = discover_files(&config);
1173            let names = file_names(&files, dir.path());
1174
1175            assert_eq!(names.len(), 1);
1176            assert!(names.contains(&"src/index.ts".to_string()));
1177        }
1178
1179        #[test]
1180        fn default_ignore_patterns_exclude_root_build() {
1181            let dir = tempfile::tempdir().expect("create temp dir");
1182
1183            let build = dir.path().join("build");
1184            std::fs::create_dir_all(&build).unwrap();
1185            std::fs::write(build.join("output.js"), "// built").unwrap();
1186
1187            let nested_build = dir.path().join("src").join("build");
1188            std::fs::create_dir_all(&nested_build).unwrap();
1189            std::fs::write(nested_build.join("helper.ts"), "export const h = 1;").unwrap();
1190
1191            let src = dir.path().join("src");
1192            std::fs::write(src.join("index.ts"), "export const x = 1;").unwrap();
1193
1194            let config = make_config(dir.path().to_path_buf(), false);
1195            let files = discover_files(&config);
1196            let names = file_names(&files, dir.path());
1197
1198            assert_eq!(
1199                names.len(),
1200                2,
1201                "root build/ excluded, nested kept: {names:?}"
1202            );
1203            assert!(names.contains(&"src/index.ts".to_string()));
1204            assert!(names.contains(&"src/build/helper.ts".to_string()));
1205        }
1206
1207        /// Resolve a config then override the per-file size limit in bytes.
1208        fn make_config_with_max_file_size(
1209            root: PathBuf,
1210            max_file_size_bytes: Option<u64>,
1211        ) -> ResolvedConfig {
1212            let mut config = make_config(root, false);
1213            config.max_file_size_bytes = max_file_size_bytes;
1214            config
1215        }
1216
1217        #[test]
1218        fn skips_files_over_max_file_size() {
1219            let dir = tempfile::tempdir().expect("create temp dir");
1220            let src = dir.path().join("src");
1221            std::fs::create_dir_all(&src).unwrap();
1222            std::fs::write(src.join("small.ts"), "export const a = 1;").unwrap();
1223            std::fs::write(src.join("huge.ts"), "x".repeat(5_000)).unwrap();
1224
1225            let config = make_config_with_max_file_size(dir.path().to_path_buf(), Some(1_000));
1226            let files = discover_files(&config);
1227            let names = file_names(&files, dir.path());
1228
1229            assert!(names.contains(&"src/small.ts".to_string()));
1230            assert!(
1231                !names.contains(&"src/huge.ts".to_string()),
1232                "a file over the size limit must not be discovered"
1233            );
1234        }
1235
1236        #[test]
1237        fn declaration_files_exempt_from_size_skip() {
1238            let dir = tempfile::tempdir().expect("create temp dir");
1239            let src = dir.path().join("src");
1240            std::fs::create_dir_all(&src).unwrap();
1241            std::fs::write(src.join("auto-imports.d.ts"), "x".repeat(5_000)).unwrap();
1242            std::fs::write(src.join("huge.ts"), "x".repeat(5_000)).unwrap();
1243
1244            let config = make_config_with_max_file_size(dir.path().to_path_buf(), Some(1_000));
1245            let files = discover_files(&config);
1246            let names = file_names(&files, dir.path());
1247
1248            assert!(
1249                names.contains(&"src/auto-imports.d.ts".to_string()),
1250                "a large .d.ts is exempt from the skip (reachability root for global types)"
1251            );
1252            assert!(!names.contains(&"src/huge.ts".to_string()));
1253        }
1254
1255        #[test]
1256        fn unlimited_size_keeps_large_files() {
1257            let dir = tempfile::tempdir().expect("create temp dir");
1258            let src = dir.path().join("src");
1259            std::fs::create_dir_all(&src).unwrap();
1260            std::fs::write(src.join("huge.ts"), "x".repeat(5_000)).unwrap();
1261
1262            let config = make_config_with_max_file_size(dir.path().to_path_buf(), None);
1263            let files = discover_files(&config);
1264            let names = file_names(&files, dir.path());
1265
1266            assert!(
1267                names.contains(&"src/huge.ts".to_string()),
1268                "no limit keeps every file"
1269            );
1270        }
1271
1272        #[test]
1273        fn skipped_file_recorded_in_workspace_diagnostics() {
1274            let dir = tempfile::tempdir().expect("create temp dir");
1275            let src = dir.path().join("src");
1276            std::fs::create_dir_all(&src).unwrap();
1277            std::fs::write(src.join("huge.ts"), "x".repeat(5_000)).unwrap();
1278
1279            let config = make_config_with_max_file_size(dir.path().to_path_buf(), Some(1_000));
1280            let _ = discover_files(&config);
1281
1282            let diagnostics = fallow_config::workspace_diagnostics_for(dir.path());
1283            let skipped: Vec<_> = diagnostics
1284                .iter()
1285                .filter(|d| {
1286                    matches!(
1287                        d.kind,
1288                        fallow_config::WorkspaceDiagnosticKind::SkippedLargeFile { .. }
1289                    )
1290                })
1291                .collect();
1292            assert_eq!(
1293                skipped.len(),
1294                1,
1295                "the skipped file is recorded in workspace diagnostics for JSON output"
1296            );
1297            assert!(skipped[0].path.ends_with("src/huge.ts"));
1298            assert!(
1299                matches!(
1300                    skipped[0].kind,
1301                    fallow_config::WorkspaceDiagnosticKind::SkippedLargeFile { size_bytes }
1302                        if size_bytes == 5_000
1303                ),
1304                "the recorded diagnostic carries the on-disk byte size"
1305            );
1306        }
1307    }
1308}