Skip to main content

fallow_extract/
lib.rs

1//! Parsing and extraction engine for fallow codebase intelligence.
2//!
3//! This crate handles all file parsing: JS/TS via Oxc, Vue/Svelte SFC extraction,
4//! Astro frontmatter, MDX import/export extraction, CSS Module class name extraction,
5//! HTML asset reference extraction, and incremental caching of parse results.
6
7#![warn(missing_docs)]
8
9mod asset_url;
10pub mod astro;
11pub mod cache;
12pub(crate) mod complexity;
13pub mod css;
14pub mod flags;
15pub mod glimmer;
16pub mod graphql;
17pub mod html;
18pub mod inventory;
19pub mod mdx;
20mod parse;
21pub mod sfc;
22mod sfc_template;
23pub mod suppress;
24pub(crate) mod template_complexity;
25mod template_usage;
26pub mod visitor;
27
28use std::path::Path;
29
30use rayon::prelude::*;
31
32use cache::CacheStore;
33use fallow_types::discover::{DiscoveredFile, FileId};
34
35// Re-export all extract types from fallow-types
36pub use fallow_types::extract::{
37    ClassHeritageInfo, DynamicImportInfo, DynamicImportPattern, ExportInfo, ExportName, ImportInfo,
38    ImportedName, LocalTypeDeclaration, MemberAccess, MemberInfo, MemberKind, ModuleInfo,
39    ParseResult, PublicSignatureTypeReference, ReExportInfo, RequireCallInfo, VisibilityTag,
40    compute_line_offsets,
41};
42
43// Re-export extraction functions for internal use and fuzzing
44pub use astro::extract_astro_frontmatter;
45pub use css::extract_css_module_exports;
46pub use glimmer::{is_glimmer_file, strip_glimmer_templates};
47pub use mdx::extract_mdx_statements;
48pub use sfc::{extract_sfc_scripts, is_sfc_file};
49pub use sfc_template::angular::ANGULAR_TPL_SENTINEL;
50
51/// Synthetic member-access object used to carry exported-instance bindings.
52///
53/// `MemberAccess { object: format!("{INSTANCE_EXPORT_SENTINEL}{export_name}"), member: target }`
54/// means the exported value named `export_name` is an instance of the local
55/// class/interface symbol named `target`.
56pub const INSTANCE_EXPORT_SENTINEL: &str = "__fallow_instance_export__:";
57
58/// Synthetic member-access object prefix for typed Playwright fixtures.
59///
60/// `MemberAccess { object: format!("{PLAYWRIGHT_FIXTURE_DEF_SENTINEL}{test}:{fixture}"), member: type_name }`
61/// means the exported Playwright test object named `test` provides a fixture
62/// named `fixture` whose declared type is `type_name`.
63pub const PLAYWRIGHT_FIXTURE_DEF_SENTINEL: &str = "__fallow_playwright_fixture_def__:";
64
65/// Synthetic member-access object prefix for Playwright fixture member uses.
66///
67/// `MemberAccess { object: format!("{PLAYWRIGHT_FIXTURE_USE_SENTINEL}{test}:{fixture}"), member }`
68/// means a callback passed to the Playwright test object named `test`
69/// destructures `fixture` and accesses `fixture.member`.
70pub const PLAYWRIGHT_FIXTURE_USE_SENTINEL: &str = "__fallow_playwright_fixture_use__:";
71
72use parse::parse_source_to_module;
73
74/// Parse all files in parallel, extracting imports and exports.
75/// Uses the cache to skip reparsing files whose content hasn't changed.
76///
77/// When `need_complexity` is true, per-function cyclomatic/cognitive complexity
78/// metrics are computed during parsing (needed by the `health` command).
79/// Pass `false` for dead-code analysis where complexity data is unused.
80pub fn parse_all_files(
81    files: &[DiscoveredFile],
82    cache: Option<&CacheStore>,
83    need_complexity: bool,
84) -> ParseResult {
85    use std::sync::atomic::{AtomicUsize, Ordering};
86    let cache_hits = AtomicUsize::new(0);
87    let cache_misses = AtomicUsize::new(0);
88
89    let modules: Vec<ModuleInfo> = files
90        .par_iter()
91        .filter_map(|file| {
92            parse_single_file_cached(file, cache, &cache_hits, &cache_misses, need_complexity)
93        })
94        .collect();
95
96    let hits = cache_hits.load(Ordering::Relaxed);
97    let misses = cache_misses.load(Ordering::Relaxed);
98    if hits > 0 || misses > 0 {
99        tracing::info!(
100            cache_hits = hits,
101            cache_misses = misses,
102            "incremental cache stats"
103        );
104    }
105
106    ParseResult {
107        modules,
108        cache_hits: hits,
109        cache_misses: misses,
110    }
111}
112
113/// Parse a single file, consulting the cache first.
114///
115/// Cache validation strategy (fast path -> slow path):
116/// 1. `stat()` the file to get mtime + size (single syscall, no file read)
117/// 2. If mtime+size match the cached entry -> cache hit, return immediately
118/// 3. If mtime+size differ -> read file, compute content hash
119/// 4. If content hash matches cached entry -> cache hit (file was `touch`ed but unchanged)
120/// 5. Otherwise -> cache miss, full parse
121fn parse_single_file_cached(
122    file: &DiscoveredFile,
123    cache: Option<&CacheStore>,
124    cache_hits: &std::sync::atomic::AtomicUsize,
125    cache_misses: &std::sync::atomic::AtomicUsize,
126    need_complexity: bool,
127) -> Option<ModuleInfo> {
128    use std::sync::atomic::Ordering;
129
130    // Fast path: check mtime+size before reading file content.
131    // A single stat() syscall is ~100x cheaper than read()+hash().
132    if let Some(store) = cache
133        && let Ok(metadata) = std::fs::metadata(&file.path)
134    {
135        let mt = mtime_secs(&metadata);
136        let sz = metadata.len();
137        if let Some(cached) = store.get_by_metadata(&file.path, mt, sz) {
138            // When complexity is requested but the cached entry lacks it
139            // (populated by a prior `check` run), skip the cache and re-parse.
140            if !need_complexity || !cached.complexity.is_empty() {
141                cache_hits.fetch_add(1, Ordering::Relaxed);
142                return Some(cache::cached_to_module_opts(
143                    cached,
144                    file.id,
145                    need_complexity,
146                ));
147            }
148        }
149    }
150
151    // Slow path: read file content and compute content hash.
152    let source = std::fs::read_to_string(&file.path).ok()?;
153    let content_hash = xxhash_rust::xxh3::xxh3_64(source.as_bytes());
154
155    // Check cache by content hash (handles touch/save-without-change)
156    if let Some(store) = cache
157        && let Some(cached) = store.get(&file.path, content_hash)
158        && (!need_complexity || !cached.complexity.is_empty())
159    {
160        cache_hits.fetch_add(1, Ordering::Relaxed);
161        return Some(cache::cached_to_module_opts(
162            cached,
163            file.id,
164            need_complexity,
165        ));
166    }
167    cache_misses.fetch_add(1, Ordering::Relaxed);
168
169    // Cache miss, do a full parse
170    Some(parse_source_to_module(
171        file.id,
172        &file.path,
173        &source,
174        content_hash,
175        need_complexity,
176    ))
177}
178
179/// Extract mtime (seconds since epoch) from file metadata.
180/// Returns 0 if mtime cannot be determined (pre-epoch, unsupported OS, etc.).
181fn mtime_secs(metadata: &std::fs::Metadata) -> u64 {
182    metadata
183        .modified()
184        .ok()
185        .and_then(|t| t.duration_since(std::time::SystemTime::UNIX_EPOCH).ok())
186        .map_or(0, |d| d.as_secs())
187}
188
189/// Parse a single file and extract module information (without complexity).
190#[must_use]
191pub fn parse_single_file(file: &DiscoveredFile) -> Option<ModuleInfo> {
192    let source = std::fs::read_to_string(&file.path).ok()?;
193    let content_hash = xxhash_rust::xxh3::xxh3_64(source.as_bytes());
194    Some(parse_source_to_module(
195        file.id,
196        &file.path,
197        &source,
198        content_hash,
199        false,
200    ))
201}
202
203/// Parse from in-memory content (for LSP, includes complexity).
204#[must_use]
205pub fn parse_from_content(file_id: FileId, path: &Path, content: &str) -> ModuleInfo {
206    let content_hash = xxhash_rust::xxh3::xxh3_64(content.as_bytes());
207    parse_source_to_module(file_id, path, content, content_hash, true)
208}
209
210// Parser integration tests invoke Oxc under Miri which is ~1000x slower.
211// Unit tests in individual modules (visitor, suppress, sfc, css, etc.) still run.
212#[cfg(all(test, not(miri)))]
213mod tests;