Skip to main content

fallow_extract/
lib.rs

1//! Parsing and extraction engine for the fallow codebase analyzer.
2//!
3//! This crate handles all file parsing: JS/TS via Oxc, Vue/Svelte SFC extraction,
4//! Astro frontmatter, MDX import/export extraction, CSS Module class name extraction,
5//! and incremental caching of parse results.
6
7#![warn(missing_docs)]
8
9pub mod astro;
10pub mod cache;
11pub(crate) mod complexity;
12pub mod css;
13pub mod mdx;
14mod parse;
15pub mod sfc;
16pub mod suppress;
17pub mod visitor;
18
19use std::path::Path;
20
21use rayon::prelude::*;
22
23use cache::CacheStore;
24use fallow_types::discover::{DiscoveredFile, FileId};
25
26// Re-export all extract types from fallow-types
27pub use fallow_types::extract::{
28    DynamicImportInfo, DynamicImportPattern, ExportInfo, ExportName, ImportInfo, ImportedName,
29    MemberAccess, MemberInfo, MemberKind, ModuleInfo, ParseResult, ReExportInfo, RequireCallInfo,
30    compute_line_offsets,
31};
32
33// Re-export extraction functions for internal use and fuzzing
34pub use astro::extract_astro_frontmatter;
35pub use css::extract_css_module_exports;
36pub use mdx::extract_mdx_statements;
37pub use sfc::{extract_sfc_scripts, is_sfc_file};
38
39use parse::parse_source_to_module;
40
41/// Parse all files in parallel, extracting imports and exports.
42/// Uses the cache to skip reparsing files whose content hasn't changed.
43pub fn parse_all_files(files: &[DiscoveredFile], cache: Option<&CacheStore>) -> ParseResult {
44    use std::sync::atomic::{AtomicUsize, Ordering};
45    let cache_hits = AtomicUsize::new(0);
46    let cache_misses = AtomicUsize::new(0);
47
48    let modules: Vec<ModuleInfo> = files
49        .par_iter()
50        .filter_map(|file| parse_single_file_cached(file, cache, &cache_hits, &cache_misses))
51        .collect();
52
53    let hits = cache_hits.load(Ordering::Relaxed);
54    let misses = cache_misses.load(Ordering::Relaxed);
55    if hits > 0 || misses > 0 {
56        tracing::info!(
57            cache_hits = hits,
58            cache_misses = misses,
59            "incremental cache stats"
60        );
61    }
62
63    ParseResult {
64        modules,
65        cache_hits: hits,
66        cache_misses: misses,
67    }
68}
69
70/// Extract mtime (seconds since epoch) from file metadata.
71/// Returns 0 if mtime cannot be determined (pre-epoch, unsupported OS, etc.).
72fn mtime_secs(metadata: &std::fs::Metadata) -> u64 {
73    metadata
74        .modified()
75        .ok()
76        .and_then(|t| t.duration_since(std::time::SystemTime::UNIX_EPOCH).ok())
77        .map_or(0, |d| d.as_secs())
78}
79
80/// Parse a single file, consulting the cache first.
81///
82/// Cache validation strategy (fast path -> slow path):
83/// 1. `stat()` the file to get mtime + size (single syscall, no file read)
84/// 2. If mtime+size match the cached entry -> cache hit, return immediately
85/// 3. If mtime+size differ -> read file, compute content hash
86/// 4. If content hash matches cached entry -> cache hit (file was `touch`ed but unchanged)
87/// 5. Otherwise -> cache miss, full parse
88fn parse_single_file_cached(
89    file: &DiscoveredFile,
90    cache: Option<&CacheStore>,
91    cache_hits: &std::sync::atomic::AtomicUsize,
92    cache_misses: &std::sync::atomic::AtomicUsize,
93) -> Option<ModuleInfo> {
94    use std::sync::atomic::Ordering;
95
96    // Fast path: check mtime+size before reading file content.
97    // A single stat() syscall is ~100x cheaper than read()+hash().
98    if let Some(store) = cache
99        && let Ok(metadata) = std::fs::metadata(&file.path)
100    {
101        let mt = mtime_secs(&metadata);
102        let sz = metadata.len();
103        if let Some(cached) = store.get_by_metadata(&file.path, mt, sz) {
104            cache_hits.fetch_add(1, Ordering::Relaxed);
105            return Some(cache::cached_to_module(cached, file.id));
106        }
107    }
108
109    // Slow path: read file content and compute content hash.
110    let source = std::fs::read_to_string(&file.path).ok()?;
111    let content_hash = xxhash_rust::xxh3::xxh3_64(source.as_bytes());
112
113    // Check cache by content hash (handles touch/save-without-change)
114    if let Some(store) = cache
115        && let Some(cached) = store.get(&file.path, content_hash)
116    {
117        cache_hits.fetch_add(1, Ordering::Relaxed);
118        return Some(cache::cached_to_module(cached, file.id));
119    }
120    cache_misses.fetch_add(1, Ordering::Relaxed);
121
122    // Cache miss — do a full parse
123    Some(parse_source_to_module(
124        file.id,
125        &file.path,
126        &source,
127        content_hash,
128    ))
129}
130
131/// Parse a single file and extract module information.
132pub fn parse_single_file(file: &DiscoveredFile) -> Option<ModuleInfo> {
133    let source = std::fs::read_to_string(&file.path).ok()?;
134    let content_hash = xxhash_rust::xxh3::xxh3_64(source.as_bytes());
135    Some(parse_source_to_module(
136        file.id,
137        &file.path,
138        &source,
139        content_hash,
140    ))
141}
142
143/// Parse from in-memory content (for LSP).
144pub fn parse_from_content(file_id: FileId, path: &Path, content: &str) -> ModuleInfo {
145    let content_hash = xxhash_rust::xxh3::xxh3_64(content.as_bytes());
146    parse_source_to_module(file_id, path, content, content_hash)
147}
148
149// Parser integration tests invoke Oxc under Miri which is ~1000x slower.
150// Unit tests in individual modules (visitor, suppress, sfc, css, etc.) still run.
151#[cfg(all(test, not(miri)))]
152mod tests;