Skip to main content

fallow_extract/
lib.rs

1//! Parsing and extraction engine for the fallow codebase analyzer.
2//!
3//! This crate handles all file parsing: JS/TS via Oxc, Vue/Svelte SFC extraction,
4//! Astro frontmatter, MDX import/export extraction, CSS Module class name extraction,
5//! HTML asset reference extraction, and incremental caching of parse results.
6
7#![warn(missing_docs)]
8
9pub mod astro;
10pub mod cache;
11pub(crate) mod complexity;
12pub mod css;
13pub mod html;
14pub mod mdx;
15mod parse;
16pub mod sfc;
17mod sfc_template;
18pub mod suppress;
19mod template_usage;
20pub mod visitor;
21
22use std::path::Path;
23
24use rayon::prelude::*;
25
26use cache::CacheStore;
27use fallow_types::discover::{DiscoveredFile, FileId};
28
29// Re-export all extract types from fallow-types
30pub use fallow_types::extract::{
31    DynamicImportInfo, DynamicImportPattern, ExportInfo, ExportName, ImportInfo, ImportedName,
32    MemberAccess, MemberInfo, MemberKind, ModuleInfo, ParseResult, ReExportInfo, RequireCallInfo,
33    compute_line_offsets,
34};
35
36// Re-export extraction functions for internal use and fuzzing
37pub use astro::extract_astro_frontmatter;
38pub use css::extract_css_module_exports;
39pub use mdx::extract_mdx_statements;
40pub use sfc::{extract_sfc_scripts, is_sfc_file};
41pub use sfc_template::angular::ANGULAR_TPL_SENTINEL;
42
43use parse::parse_source_to_module;
44
45/// Parse all files in parallel, extracting imports and exports.
46/// Uses the cache to skip reparsing files whose content hasn't changed.
47///
48/// When `need_complexity` is true, per-function cyclomatic/cognitive complexity
49/// metrics are computed during parsing (needed by the `health` command).
50/// Pass `false` for dead-code analysis where complexity data is unused.
51pub fn parse_all_files(
52    files: &[DiscoveredFile],
53    cache: Option<&CacheStore>,
54    need_complexity: bool,
55) -> ParseResult {
56    use std::sync::atomic::{AtomicUsize, Ordering};
57    let cache_hits = AtomicUsize::new(0);
58    let cache_misses = AtomicUsize::new(0);
59
60    let modules: Vec<ModuleInfo> = files
61        .par_iter()
62        .filter_map(|file| {
63            parse_single_file_cached(file, cache, &cache_hits, &cache_misses, need_complexity)
64        })
65        .collect();
66
67    let hits = cache_hits.load(Ordering::Relaxed);
68    let misses = cache_misses.load(Ordering::Relaxed);
69    if hits > 0 || misses > 0 {
70        tracing::info!(
71            cache_hits = hits,
72            cache_misses = misses,
73            "incremental cache stats"
74        );
75    }
76
77    ParseResult {
78        modules,
79        cache_hits: hits,
80        cache_misses: misses,
81    }
82}
83
84/// Extract mtime (seconds since epoch) from file metadata.
85/// Returns 0 if mtime cannot be determined (pre-epoch, unsupported OS, etc.).
86fn mtime_secs(metadata: &std::fs::Metadata) -> u64 {
87    metadata
88        .modified()
89        .ok()
90        .and_then(|t| t.duration_since(std::time::SystemTime::UNIX_EPOCH).ok())
91        .map_or(0, |d| d.as_secs())
92}
93
94/// Parse a single file, consulting the cache first.
95///
96/// Cache validation strategy (fast path -> slow path):
97/// 1. `stat()` the file to get mtime + size (single syscall, no file read)
98/// 2. If mtime+size match the cached entry -> cache hit, return immediately
99/// 3. If mtime+size differ -> read file, compute content hash
100/// 4. If content hash matches cached entry -> cache hit (file was `touch`ed but unchanged)
101/// 5. Otherwise -> cache miss, full parse
102fn parse_single_file_cached(
103    file: &DiscoveredFile,
104    cache: Option<&CacheStore>,
105    cache_hits: &std::sync::atomic::AtomicUsize,
106    cache_misses: &std::sync::atomic::AtomicUsize,
107    need_complexity: bool,
108) -> Option<ModuleInfo> {
109    use std::sync::atomic::Ordering;
110
111    // Fast path: check mtime+size before reading file content.
112    // A single stat() syscall is ~100x cheaper than read()+hash().
113    if let Some(store) = cache
114        && let Ok(metadata) = std::fs::metadata(&file.path)
115    {
116        let mt = mtime_secs(&metadata);
117        let sz = metadata.len();
118        if let Some(cached) = store.get_by_metadata(&file.path, mt, sz) {
119            // When complexity is requested but the cached entry lacks it
120            // (populated by a prior `check` run), skip the cache and re-parse.
121            if !need_complexity || !cached.complexity.is_empty() {
122                cache_hits.fetch_add(1, Ordering::Relaxed);
123                return Some(cache::cached_to_module(cached, file.id));
124            }
125        }
126    }
127
128    // Slow path: read file content and compute content hash.
129    let source = std::fs::read_to_string(&file.path).ok()?;
130    let content_hash = xxhash_rust::xxh3::xxh3_64(source.as_bytes());
131
132    // Check cache by content hash (handles touch/save-without-change)
133    if let Some(store) = cache
134        && let Some(cached) = store.get(&file.path, content_hash)
135        && (!need_complexity || !cached.complexity.is_empty())
136    {
137        cache_hits.fetch_add(1, Ordering::Relaxed);
138        return Some(cache::cached_to_module(cached, file.id));
139    }
140    cache_misses.fetch_add(1, Ordering::Relaxed);
141
142    // Cache miss — do a full parse
143    Some(parse_source_to_module(
144        file.id,
145        &file.path,
146        &source,
147        content_hash,
148        need_complexity,
149    ))
150}
151
152/// Parse a single file and extract module information (without complexity).
153#[must_use]
154pub fn parse_single_file(file: &DiscoveredFile) -> Option<ModuleInfo> {
155    let source = std::fs::read_to_string(&file.path).ok()?;
156    let content_hash = xxhash_rust::xxh3::xxh3_64(source.as_bytes());
157    Some(parse_source_to_module(
158        file.id,
159        &file.path,
160        &source,
161        content_hash,
162        false,
163    ))
164}
165
166/// Parse from in-memory content (for LSP, includes complexity).
167#[must_use]
168pub fn parse_from_content(file_id: FileId, path: &Path, content: &str) -> ModuleInfo {
169    let content_hash = xxhash_rust::xxh3::xxh3_64(content.as_bytes());
170    parse_source_to_module(file_id, path, content, content_hash, true)
171}
172
173// Parser integration tests invoke Oxc under Miri which is ~1000x slower.
174// Unit tests in individual modules (visitor, suppress, sfc, css, etc.) still run.
175#[cfg(all(test, not(miri)))]
176mod tests;