Skip to main content

fallow_extract/
lib.rs

1//! Parsing and extraction engine for fallow codebase intelligence.
2//!
3//! This crate handles all file parsing: JS/TS via Oxc, Vue/Svelte SFC extraction,
4//! Astro frontmatter, MDX import/export extraction, CSS Module class name extraction,
5//! HTML asset reference extraction, and incremental caching of parse results.
6
7#![warn(missing_docs)]
8
9mod asset_url;
10pub mod astro;
11pub mod cache;
12pub(crate) mod complexity;
13pub mod css;
14pub mod flags;
15pub mod graphql;
16pub mod html;
17pub mod inventory;
18pub mod mdx;
19mod parse;
20pub mod sfc;
21mod sfc_template;
22pub mod suppress;
23pub(crate) mod template_complexity;
24mod template_usage;
25pub mod visitor;
26
27use std::path::Path;
28
29use rayon::prelude::*;
30
31use cache::CacheStore;
32use fallow_types::discover::{DiscoveredFile, FileId};
33
34// Re-export all extract types from fallow-types
35pub use fallow_types::extract::{
36    ClassHeritageInfo, DynamicImportInfo, DynamicImportPattern, ExportInfo, ExportName, ImportInfo,
37    ImportedName, LocalTypeDeclaration, MemberAccess, MemberInfo, MemberKind, ModuleInfo,
38    ParseResult, PublicSignatureTypeReference, ReExportInfo, RequireCallInfo, VisibilityTag,
39    compute_line_offsets,
40};
41
42// Re-export extraction functions for internal use and fuzzing
43pub use astro::extract_astro_frontmatter;
44pub use css::extract_css_module_exports;
45pub use mdx::extract_mdx_statements;
46pub use sfc::{extract_sfc_scripts, is_sfc_file};
47pub use sfc_template::angular::ANGULAR_TPL_SENTINEL;
48
49/// Synthetic member-access object used to carry exported-instance bindings.
50///
51/// `MemberAccess { object: format!("{INSTANCE_EXPORT_SENTINEL}{export_name}"), member: target }`
52/// means the exported value named `export_name` is an instance of the local
53/// class/interface symbol named `target`.
54pub const INSTANCE_EXPORT_SENTINEL: &str = "__fallow_instance_export__:";
55
56/// Synthetic member-access object prefix for typed Playwright fixtures.
57///
58/// `MemberAccess { object: format!("{PLAYWRIGHT_FIXTURE_DEF_SENTINEL}{test}:{fixture}"), member: type_name }`
59/// means the exported Playwright test object named `test` provides a fixture
60/// named `fixture` whose declared type is `type_name`.
61pub const PLAYWRIGHT_FIXTURE_DEF_SENTINEL: &str = "__fallow_playwright_fixture_def__:";
62
63/// Synthetic member-access object prefix for Playwright fixture member uses.
64///
65/// `MemberAccess { object: format!("{PLAYWRIGHT_FIXTURE_USE_SENTINEL}{test}:{fixture}"), member }`
66/// means a callback passed to the Playwright test object named `test`
67/// destructures `fixture` and accesses `fixture.member`.
68pub const PLAYWRIGHT_FIXTURE_USE_SENTINEL: &str = "__fallow_playwright_fixture_use__:";
69
70use parse::parse_source_to_module;
71
72/// Parse all files in parallel, extracting imports and exports.
73/// Uses the cache to skip reparsing files whose content hasn't changed.
74///
75/// When `need_complexity` is true, per-function cyclomatic/cognitive complexity
76/// metrics are computed during parsing (needed by the `health` command).
77/// Pass `false` for dead-code analysis where complexity data is unused.
78pub fn parse_all_files(
79    files: &[DiscoveredFile],
80    cache: Option<&CacheStore>,
81    need_complexity: bool,
82) -> ParseResult {
83    use std::sync::atomic::{AtomicUsize, Ordering};
84    let cache_hits = AtomicUsize::new(0);
85    let cache_misses = AtomicUsize::new(0);
86
87    let modules: Vec<ModuleInfo> = files
88        .par_iter()
89        .filter_map(|file| {
90            parse_single_file_cached(file, cache, &cache_hits, &cache_misses, need_complexity)
91        })
92        .collect();
93
94    let hits = cache_hits.load(Ordering::Relaxed);
95    let misses = cache_misses.load(Ordering::Relaxed);
96    if hits > 0 || misses > 0 {
97        tracing::info!(
98            cache_hits = hits,
99            cache_misses = misses,
100            "incremental cache stats"
101        );
102    }
103
104    ParseResult {
105        modules,
106        cache_hits: hits,
107        cache_misses: misses,
108    }
109}
110
111/// Extract mtime (seconds since epoch) from file metadata.
112/// Returns 0 if mtime cannot be determined (pre-epoch, unsupported OS, etc.).
113fn mtime_secs(metadata: &std::fs::Metadata) -> u64 {
114    metadata
115        .modified()
116        .ok()
117        .and_then(|t| t.duration_since(std::time::SystemTime::UNIX_EPOCH).ok())
118        .map_or(0, |d| d.as_secs())
119}
120
121/// Parse a single file, consulting the cache first.
122///
123/// Cache validation strategy (fast path -> slow path):
124/// 1. `stat()` the file to get mtime + size (single syscall, no file read)
125/// 2. If mtime+size match the cached entry -> cache hit, return immediately
126/// 3. If mtime+size differ -> read file, compute content hash
127/// 4. If content hash matches cached entry -> cache hit (file was `touch`ed but unchanged)
128/// 5. Otherwise -> cache miss, full parse
129fn parse_single_file_cached(
130    file: &DiscoveredFile,
131    cache: Option<&CacheStore>,
132    cache_hits: &std::sync::atomic::AtomicUsize,
133    cache_misses: &std::sync::atomic::AtomicUsize,
134    need_complexity: bool,
135) -> Option<ModuleInfo> {
136    use std::sync::atomic::Ordering;
137
138    // Fast path: check mtime+size before reading file content.
139    // A single stat() syscall is ~100x cheaper than read()+hash().
140    if let Some(store) = cache
141        && let Ok(metadata) = std::fs::metadata(&file.path)
142    {
143        let mt = mtime_secs(&metadata);
144        let sz = metadata.len();
145        if let Some(cached) = store.get_by_metadata(&file.path, mt, sz) {
146            // When complexity is requested but the cached entry lacks it
147            // (populated by a prior `check` run), skip the cache and re-parse.
148            if !need_complexity || !cached.complexity.is_empty() {
149                cache_hits.fetch_add(1, Ordering::Relaxed);
150                return Some(cache::cached_to_module(cached, file.id));
151            }
152        }
153    }
154
155    // Slow path: read file content and compute content hash.
156    let source = std::fs::read_to_string(&file.path).ok()?;
157    let content_hash = xxhash_rust::xxh3::xxh3_64(source.as_bytes());
158
159    // Check cache by content hash (handles touch/save-without-change)
160    if let Some(store) = cache
161        && let Some(cached) = store.get(&file.path, content_hash)
162        && (!need_complexity || !cached.complexity.is_empty())
163    {
164        cache_hits.fetch_add(1, Ordering::Relaxed);
165        return Some(cache::cached_to_module(cached, file.id));
166    }
167    cache_misses.fetch_add(1, Ordering::Relaxed);
168
169    // Cache miss, do a full parse
170    Some(parse_source_to_module(
171        file.id,
172        &file.path,
173        &source,
174        content_hash,
175        need_complexity,
176    ))
177}
178
179/// Parse a single file and extract module information (without complexity).
180#[must_use]
181pub fn parse_single_file(file: &DiscoveredFile) -> Option<ModuleInfo> {
182    let source = std::fs::read_to_string(&file.path).ok()?;
183    let content_hash = xxhash_rust::xxh3::xxh3_64(source.as_bytes());
184    Some(parse_source_to_module(
185        file.id,
186        &file.path,
187        &source,
188        content_hash,
189        false,
190    ))
191}
192
193/// Parse from in-memory content (for LSP, includes complexity).
194#[must_use]
195pub fn parse_from_content(file_id: FileId, path: &Path, content: &str) -> ModuleInfo {
196    let content_hash = xxhash_rust::xxh3::xxh3_64(content.as_bytes());
197    parse_source_to_module(file_id, path, content, content_hash, true)
198}
199
200// Parser integration tests invoke Oxc under Miri which is ~1000x slower.
201// Unit tests in individual modules (visitor, suppress, sfc, css, etc.) still run.
202#[cfg(all(test, not(miri)))]
203mod tests;