Skip to main content

fallow_extract/
lib.rs

1//! Parsing and extraction engine for fallow codebase intelligence.
2//!
3//! This crate handles all file parsing: JS/TS via Oxc, Vue/Svelte SFC extraction,
4//! Astro frontmatter, MDX import/export extraction, CSS Module class name extraction,
5//! HTML asset reference extraction, and incremental caching of parse results.
6
7#![warn(missing_docs)]
8
9mod asset_url;
10pub mod astro;
11pub mod cache;
12pub(crate) mod complexity;
13pub mod css;
14pub mod flags;
15pub mod html;
16pub mod inventory;
17pub mod mdx;
18mod parse;
19pub mod sfc;
20mod sfc_template;
21pub mod suppress;
22pub(crate) mod template_complexity;
23mod template_usage;
24pub mod visitor;
25
26use std::path::Path;
27
28use rayon::prelude::*;
29
30use cache::CacheStore;
31use fallow_types::discover::{DiscoveredFile, FileId};
32
33// Re-export all extract types from fallow-types
34pub use fallow_types::extract::{
35    ClassHeritageInfo, DynamicImportInfo, DynamicImportPattern, ExportInfo, ExportName, ImportInfo,
36    ImportedName, LocalTypeDeclaration, MemberAccess, MemberInfo, MemberKind, ModuleInfo,
37    ParseResult, PublicSignatureTypeReference, ReExportInfo, RequireCallInfo, VisibilityTag,
38    compute_line_offsets,
39};
40
41// Re-export extraction functions for internal use and fuzzing
42pub use astro::extract_astro_frontmatter;
43pub use css::extract_css_module_exports;
44pub use mdx::extract_mdx_statements;
45pub use sfc::{extract_sfc_scripts, is_sfc_file};
46pub use sfc_template::angular::ANGULAR_TPL_SENTINEL;
47
48/// Synthetic member-access object used to carry exported-instance bindings.
49///
50/// `MemberAccess { object: format!("{INSTANCE_EXPORT_SENTINEL}{export_name}"), member: target }`
51/// means the exported value named `export_name` is an instance of the local
52/// class/interface symbol named `target`.
53pub const INSTANCE_EXPORT_SENTINEL: &str = "__fallow_instance_export__:";
54
55use parse::parse_source_to_module;
56
57/// Parse all files in parallel, extracting imports and exports.
58/// Uses the cache to skip reparsing files whose content hasn't changed.
59///
60/// When `need_complexity` is true, per-function cyclomatic/cognitive complexity
61/// metrics are computed during parsing (needed by the `health` command).
62/// Pass `false` for dead-code analysis where complexity data is unused.
63pub fn parse_all_files(
64    files: &[DiscoveredFile],
65    cache: Option<&CacheStore>,
66    need_complexity: bool,
67) -> ParseResult {
68    use std::sync::atomic::{AtomicUsize, Ordering};
69    let cache_hits = AtomicUsize::new(0);
70    let cache_misses = AtomicUsize::new(0);
71
72    let modules: Vec<ModuleInfo> = files
73        .par_iter()
74        .filter_map(|file| {
75            parse_single_file_cached(file, cache, &cache_hits, &cache_misses, need_complexity)
76        })
77        .collect();
78
79    let hits = cache_hits.load(Ordering::Relaxed);
80    let misses = cache_misses.load(Ordering::Relaxed);
81    if hits > 0 || misses > 0 {
82        tracing::info!(
83            cache_hits = hits,
84            cache_misses = misses,
85            "incremental cache stats"
86        );
87    }
88
89    ParseResult {
90        modules,
91        cache_hits: hits,
92        cache_misses: misses,
93    }
94}
95
96/// Extract mtime (seconds since epoch) from file metadata.
97/// Returns 0 if mtime cannot be determined (pre-epoch, unsupported OS, etc.).
98fn mtime_secs(metadata: &std::fs::Metadata) -> u64 {
99    metadata
100        .modified()
101        .ok()
102        .and_then(|t| t.duration_since(std::time::SystemTime::UNIX_EPOCH).ok())
103        .map_or(0, |d| d.as_secs())
104}
105
106/// Parse a single file, consulting the cache first.
107///
108/// Cache validation strategy (fast path -> slow path):
109/// 1. `stat()` the file to get mtime + size (single syscall, no file read)
110/// 2. If mtime+size match the cached entry -> cache hit, return immediately
111/// 3. If mtime+size differ -> read file, compute content hash
112/// 4. If content hash matches cached entry -> cache hit (file was `touch`ed but unchanged)
113/// 5. Otherwise -> cache miss, full parse
114fn parse_single_file_cached(
115    file: &DiscoveredFile,
116    cache: Option<&CacheStore>,
117    cache_hits: &std::sync::atomic::AtomicUsize,
118    cache_misses: &std::sync::atomic::AtomicUsize,
119    need_complexity: bool,
120) -> Option<ModuleInfo> {
121    use std::sync::atomic::Ordering;
122
123    // Fast path: check mtime+size before reading file content.
124    // A single stat() syscall is ~100x cheaper than read()+hash().
125    if let Some(store) = cache
126        && let Ok(metadata) = std::fs::metadata(&file.path)
127    {
128        let mt = mtime_secs(&metadata);
129        let sz = metadata.len();
130        if let Some(cached) = store.get_by_metadata(&file.path, mt, sz) {
131            // When complexity is requested but the cached entry lacks it
132            // (populated by a prior `check` run), skip the cache and re-parse.
133            if !need_complexity || !cached.complexity.is_empty() {
134                cache_hits.fetch_add(1, Ordering::Relaxed);
135                return Some(cache::cached_to_module(cached, file.id));
136            }
137        }
138    }
139
140    // Slow path: read file content and compute content hash.
141    let source = std::fs::read_to_string(&file.path).ok()?;
142    let content_hash = xxhash_rust::xxh3::xxh3_64(source.as_bytes());
143
144    // Check cache by content hash (handles touch/save-without-change)
145    if let Some(store) = cache
146        && let Some(cached) = store.get(&file.path, content_hash)
147        && (!need_complexity || !cached.complexity.is_empty())
148    {
149        cache_hits.fetch_add(1, Ordering::Relaxed);
150        return Some(cache::cached_to_module(cached, file.id));
151    }
152    cache_misses.fetch_add(1, Ordering::Relaxed);
153
154    // Cache miss, do a full parse
155    Some(parse_source_to_module(
156        file.id,
157        &file.path,
158        &source,
159        content_hash,
160        need_complexity,
161    ))
162}
163
164/// Parse a single file and extract module information (without complexity).
165#[must_use]
166pub fn parse_single_file(file: &DiscoveredFile) -> Option<ModuleInfo> {
167    let source = std::fs::read_to_string(&file.path).ok()?;
168    let content_hash = xxhash_rust::xxh3::xxh3_64(source.as_bytes());
169    Some(parse_source_to_module(
170        file.id,
171        &file.path,
172        &source,
173        content_hash,
174        false,
175    ))
176}
177
178/// Parse from in-memory content (for LSP, includes complexity).
179#[must_use]
180pub fn parse_from_content(file_id: FileId, path: &Path, content: &str) -> ModuleInfo {
181    let content_hash = xxhash_rust::xxh3::xxh3_64(content.as_bytes());
182    parse_source_to_module(file_id, path, content, content_hash, true)
183}
184
185// Parser integration tests invoke Oxc under Miri which is ~1000x slower.
186// Unit tests in individual modules (visitor, suppress, sfc, css, etc.) still run.
187#[cfg(all(test, not(miri)))]
188mod tests;