Skip to main content

fallow_extract/
lib.rs

1//! Parsing and extraction engine for the fallow codebase analyzer.
2//!
3//! This crate handles all file parsing: JS/TS via Oxc, Vue/Svelte SFC extraction,
4//! Astro frontmatter, MDX import/export extraction, CSS Module class name extraction,
5//! HTML asset reference extraction, and incremental caching of parse results.
6
7#![warn(missing_docs)]
8
9mod asset_url;
10pub mod astro;
11pub mod cache;
12pub(crate) mod complexity;
13pub mod css;
14pub mod flags;
15pub mod html;
16pub mod mdx;
17mod parse;
18pub mod sfc;
19mod sfc_template;
20pub mod suppress;
21mod template_usage;
22pub mod visitor;
23
24use std::path::Path;
25
26use rayon::prelude::*;
27
28use cache::CacheStore;
29use fallow_types::discover::{DiscoveredFile, FileId};
30
31// Re-export all extract types from fallow-types
32pub use fallow_types::extract::{
33    DynamicImportInfo, DynamicImportPattern, ExportInfo, ExportName, ImportInfo, ImportedName,
34    MemberAccess, MemberInfo, MemberKind, ModuleInfo, ParseResult, ReExportInfo, RequireCallInfo,
35    compute_line_offsets,
36};
37
38// Re-export extraction functions for internal use and fuzzing
39pub use astro::extract_astro_frontmatter;
40pub use css::extract_css_module_exports;
41pub use mdx::extract_mdx_statements;
42pub use sfc::{extract_sfc_scripts, is_sfc_file};
43pub use sfc_template::angular::ANGULAR_TPL_SENTINEL;
44
45use parse::parse_source_to_module;
46
47/// Parse all files in parallel, extracting imports and exports.
48/// Uses the cache to skip reparsing files whose content hasn't changed.
49///
50/// When `need_complexity` is true, per-function cyclomatic/cognitive complexity
51/// metrics are computed during parsing (needed by the `health` command).
52/// Pass `false` for dead-code analysis where complexity data is unused.
53pub fn parse_all_files(
54    files: &[DiscoveredFile],
55    cache: Option<&CacheStore>,
56    need_complexity: bool,
57) -> ParseResult {
58    use std::sync::atomic::{AtomicUsize, Ordering};
59    let cache_hits = AtomicUsize::new(0);
60    let cache_misses = AtomicUsize::new(0);
61
62    let modules: Vec<ModuleInfo> = files
63        .par_iter()
64        .filter_map(|file| {
65            parse_single_file_cached(file, cache, &cache_hits, &cache_misses, need_complexity)
66        })
67        .collect();
68
69    let hits = cache_hits.load(Ordering::Relaxed);
70    let misses = cache_misses.load(Ordering::Relaxed);
71    if hits > 0 || misses > 0 {
72        tracing::info!(
73            cache_hits = hits,
74            cache_misses = misses,
75            "incremental cache stats"
76        );
77    }
78
79    ParseResult {
80        modules,
81        cache_hits: hits,
82        cache_misses: misses,
83    }
84}
85
86/// Extract mtime (seconds since epoch) from file metadata.
87/// Returns 0 if mtime cannot be determined (pre-epoch, unsupported OS, etc.).
88fn mtime_secs(metadata: &std::fs::Metadata) -> u64 {
89    metadata
90        .modified()
91        .ok()
92        .and_then(|t| t.duration_since(std::time::SystemTime::UNIX_EPOCH).ok())
93        .map_or(0, |d| d.as_secs())
94}
95
96/// Parse a single file, consulting the cache first.
97///
98/// Cache validation strategy (fast path -> slow path):
99/// 1. `stat()` the file to get mtime + size (single syscall, no file read)
100/// 2. If mtime+size match the cached entry -> cache hit, return immediately
101/// 3. If mtime+size differ -> read file, compute content hash
102/// 4. If content hash matches cached entry -> cache hit (file was `touch`ed but unchanged)
103/// 5. Otherwise -> cache miss, full parse
104fn parse_single_file_cached(
105    file: &DiscoveredFile,
106    cache: Option<&CacheStore>,
107    cache_hits: &std::sync::atomic::AtomicUsize,
108    cache_misses: &std::sync::atomic::AtomicUsize,
109    need_complexity: bool,
110) -> Option<ModuleInfo> {
111    use std::sync::atomic::Ordering;
112
113    // Fast path: check mtime+size before reading file content.
114    // A single stat() syscall is ~100x cheaper than read()+hash().
115    if let Some(store) = cache
116        && let Ok(metadata) = std::fs::metadata(&file.path)
117    {
118        let mt = mtime_secs(&metadata);
119        let sz = metadata.len();
120        if let Some(cached) = store.get_by_metadata(&file.path, mt, sz) {
121            // When complexity is requested but the cached entry lacks it
122            // (populated by a prior `check` run), skip the cache and re-parse.
123            if !need_complexity || !cached.complexity.is_empty() {
124                cache_hits.fetch_add(1, Ordering::Relaxed);
125                return Some(cache::cached_to_module(cached, file.id));
126            }
127        }
128    }
129
130    // Slow path: read file content and compute content hash.
131    let source = std::fs::read_to_string(&file.path).ok()?;
132    let content_hash = xxhash_rust::xxh3::xxh3_64(source.as_bytes());
133
134    // Check cache by content hash (handles touch/save-without-change)
135    if let Some(store) = cache
136        && let Some(cached) = store.get(&file.path, content_hash)
137        && (!need_complexity || !cached.complexity.is_empty())
138    {
139        cache_hits.fetch_add(1, Ordering::Relaxed);
140        return Some(cache::cached_to_module(cached, file.id));
141    }
142    cache_misses.fetch_add(1, Ordering::Relaxed);
143
144    // Cache miss — do a full parse
145    Some(parse_source_to_module(
146        file.id,
147        &file.path,
148        &source,
149        content_hash,
150        need_complexity,
151    ))
152}
153
154/// Parse a single file and extract module information (without complexity).
155#[must_use]
156pub fn parse_single_file(file: &DiscoveredFile) -> Option<ModuleInfo> {
157    let source = std::fs::read_to_string(&file.path).ok()?;
158    let content_hash = xxhash_rust::xxh3::xxh3_64(source.as_bytes());
159    Some(parse_source_to_module(
160        file.id,
161        &file.path,
162        &source,
163        content_hash,
164        false,
165    ))
166}
167
168/// Parse from in-memory content (for LSP, includes complexity).
169#[must_use]
170pub fn parse_from_content(file_id: FileId, path: &Path, content: &str) -> ModuleInfo {
171    let content_hash = xxhash_rust::xxh3::xxh3_64(content.as_bytes());
172    parse_source_to_module(file_id, path, content, content_hash, true)
173}
174
175// Parser integration tests invoke Oxc under Miri which is ~1000x slower.
176// Unit tests in individual modules (visitor, suppress, sfc, css, etc.) still run.
177#[cfg(all(test, not(miri)))]
178mod tests;