Skip to main content

aft/inspect/oxc_engine/
mod.rs

1//! Pure oxc-backed TS/JS module graph facts and export-liveness verdicts.
2//!
3//! H1-1 intentionally stops at an engine/API boundary: scanners wire this into
4//! inspect contributions in H1-2. The engine is pure (files in, verdicts out)
5//! and keeps only an in-memory facts cache keyed by file content hash + parser
6//! source type + facts format. That is enough for the required warm-cache perf gate while
7//! avoiding premature persistence coupling to InspectCache/AppContext.
8
9mod facts;
10mod graph;
11mod resolver;
12pub mod types;
13
14use std::collections::{BTreeMap, BTreeSet};
15use std::fs;
16use std::path::{Path, PathBuf};
17use std::time::SystemTime;
18
19use oxc_span::SourceType;
20
21use facts::parse_file_facts;
22use graph::compute_verdicts;
23use resolver::{normalize_path, ModuleResolver};
24pub use types::{
25    DecoratorFact, DynamicImportFact, ExportFact, ExportName, FileFacts, FileId, ImportFact,
26    ImportKind, LivenessVerdict, OxcEngineError, OxcEngineResult, OxcEngineStats, OxcExportVerdict,
27    OxcFileVerdicts, OxcReExportContext, OxcResolvedEdge, ReExportFact, ReExportKind,
28    ResolverConfigInput, OXC_PROVENANCE,
29};
30
31pub(crate) const FACTS_FORMAT_VERSION: u32 = 4;
32
33#[derive(Debug, Clone, Default)]
34pub struct AnalyzeOptions {
35    pub entry_points: Vec<PathBuf>,
36    pub public_api_files: Vec<PathBuf>,
37    pub executable_root_exports: BTreeMap<PathBuf, BTreeSet<String>>,
38    /// Files already proven stale by the inspect freshness layer. These paths
39    /// bypass the path metadata fast path so same-size/same-mtime edits are
40    /// still re-read and content-hashed before facts are reused.
41    pub force_reparse_files: Vec<PathBuf>,
42    /// When true, imports/re-exports only make targets live after execution is
43    /// reachable from entry/public files. Used by dead_code; unused_exports keeps
44    /// the default import-usage semantics.
45    pub entry_reachability: bool,
46}
47
48#[derive(Debug, Clone, Default)]
49pub struct OxcFactsCache {
50    entries_by_hash: BTreeMap<String, FileFacts>,
51    entries_by_path: BTreeMap<PathBuf, OxcFactsPathEntry>,
52}
53
54#[derive(Debug, Clone)]
55struct OxcFactsPathEntry {
56    mtime: SystemTime,
57    size: u64,
58    cache_key: String,
59}
60
61#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
62pub struct OxcFactsCacheStats {
63    pub hits: usize,
64    pub misses: usize,
65}
66
67impl OxcFactsCache {
68    pub fn new() -> Self {
69        Self::default()
70    }
71
72    pub fn len(&self) -> usize {
73        self.entries_by_hash.len()
74    }
75
76    pub fn is_empty(&self) -> bool {
77        self.entries_by_hash.is_empty()
78    }
79
80    fn facts_for_file(
81        &mut self,
82        file_id: FileId,
83        path: &Path,
84        force_reparse: bool,
85        stats: &mut OxcFactsCacheStats,
86    ) -> std::io::Result<FileFacts> {
87        let source_type = SourceType::from_path(path).unwrap_or_default();
88        let source_type_key = source_type_cache_key(source_type);
89        let metadata = fs::metadata(path)?;
90        let mtime = metadata.modified().unwrap_or(std::time::UNIX_EPOCH);
91        let size = metadata.len();
92        let path_key = path.to_path_buf();
93
94        if !force_reparse {
95            if let Some(entry) = self.entries_by_path.get(&path_key) {
96                if entry.mtime == mtime && entry.size == size {
97                    if let Some(cached) = self.entries_by_hash.get(&entry.cache_key) {
98                        stats.hits += 1;
99                        return Ok(rebind_facts(cached, file_id, path, &cached.content_hash));
100                    }
101                }
102            }
103        }
104
105        let source = fs::read_to_string(path)?;
106        Ok(self.facts_for_source_with_metadata(
107            file_id,
108            path,
109            &source,
110            source_type,
111            source_type_key,
112            Some((mtime, size)),
113            stats,
114        ))
115    }
116
117    fn facts_for_source_with_metadata(
118        &mut self,
119        file_id: FileId,
120        path: &Path,
121        source: &str,
122        source_type: SourceType,
123        source_type_key: String,
124        metadata: Option<(SystemTime, u64)>,
125        stats: &mut OxcFactsCacheStats,
126    ) -> FileFacts {
127        let content_hash = crate::cache_freshness::hash_bytes(source.as_bytes())
128            .to_hex()
129            .to_string();
130        let cache_key = format!("v{FACTS_FORMAT_VERSION}:{source_type_key}:{content_hash}");
131        if let Some(cached) = self.entries_by_hash.get(&cache_key) {
132            stats.hits += 1;
133            if let Some((mtime, size)) = metadata {
134                self.entries_by_path.insert(
135                    path.to_path_buf(),
136                    OxcFactsPathEntry {
137                        mtime,
138                        size,
139                        cache_key,
140                    },
141                );
142            }
143            return rebind_facts(cached, file_id, path, &content_hash);
144        }
145
146        stats.misses += 1;
147        let facts = parse_file_facts(file_id, path, source, content_hash, source_type);
148        self.entries_by_hash
149            .insert(cache_key.clone(), facts.clone());
150        if let Some((mtime, size)) = metadata {
151            self.entries_by_path.insert(
152                path.to_path_buf(),
153                OxcFactsPathEntry {
154                    mtime,
155                    size,
156                    cache_key,
157                },
158            );
159        }
160        facts
161    }
162}
163
164fn rebind_facts(cached: &FileFacts, file_id: FileId, path: &Path, content_hash: &str) -> FileFacts {
165    let mut facts = cached.clone();
166    facts.file_id = file_id;
167    facts.path = path.to_path_buf();
168    facts.content_hash = content_hash.to_string();
169    facts
170}
171
172fn source_type_cache_key(source_type: SourceType) -> String {
173    let language = if source_type.is_typescript_definition() {
174        "dts"
175    } else if source_type.is_typescript() {
176        "ts"
177    } else {
178        "js"
179    };
180    let module_kind = if source_type.is_commonjs() {
181        "commonjs"
182    } else if source_type.is_module() {
183        "module"
184    } else if source_type.is_script() {
185        "script"
186    } else {
187        "unambiguous"
188    };
189    let variant = if source_type.is_jsx() {
190        "jsx"
191    } else {
192        "standard"
193    };
194
195    format!("{language}:{module_kind}:{variant}")
196}
197
198pub fn analyze_files(
199    project_root: &Path,
200    files: &[PathBuf],
201    options: AnalyzeOptions,
202) -> Result<OxcEngineResult, String> {
203    let mut cache = OxcFactsCache::new();
204    analyze_files_with_cache(project_root, files, options, &mut cache)
205}
206
207pub fn analyze_files_with_cache(
208    project_root: &Path,
209    files: &[PathBuf],
210    options: AnalyzeOptions,
211    cache: &mut OxcFactsCache,
212) -> Result<OxcEngineResult, String> {
213    // De-verbatim the canonical root (Windows \\?\ form) so strip_prefix
214    // against normalize_path-built module paths keeps working.
215    let project_root = fs::canonicalize(project_root)
216        .map(|canonical| normalize_path(&canonical))
217        .unwrap_or_else(|_| normalize_path(project_root));
218    let force_reparse_files = normalize_option_paths(&options.force_reparse_files);
219    let normalized_files = normalize_file_set(&project_root, files);
220    let files = normalized_files.files;
221    let skipped_outside_root = normalized_files.skipped_outside_root;
222    let mut cache_stats = OxcFactsCacheStats::default();
223    let mut errors = Vec::new();
224    let mut facts = Vec::with_capacity(files.len());
225
226    for (idx, path) in files.iter().enumerate() {
227        match cache.facts_for_file(
228            FileId(idx),
229            path,
230            force_reparse_files.contains(path),
231            &mut cache_stats,
232        ) {
233            Ok(file_facts) => facts.push(file_facts),
234            Err(error) => errors.push(OxcEngineError {
235                file: path.clone(),
236                message: format!("read: {error}"),
237            }),
238        }
239    }
240
241    Ok(analyze_preparsed_facts(
242        project_root,
243        facts,
244        options,
245        cache_stats,
246        errors,
247        skipped_outside_root,
248    ))
249}
250
251pub(crate) fn analyze_file_facts(
252    project_root: &Path,
253    facts: Vec<FileFacts>,
254    options: AnalyzeOptions,
255    skipped_outside_root: Vec<PathBuf>,
256) -> OxcEngineResult {
257    // Same de-verbatim rule as analyze_files_with_cache: FileFacts paths are
258    // normalize_path-built, so the root they are relativized against must be too.
259    let project_root = fs::canonicalize(project_root)
260        .map(|canonical| normalize_path(&canonical))
261        .unwrap_or_else(|_| normalize_path(project_root));
262    analyze_preparsed_facts(
263        project_root,
264        facts,
265        options,
266        OxcFactsCacheStats::default(),
267        Vec::new(),
268        skipped_outside_root,
269    )
270}
271
272fn analyze_preparsed_facts(
273    project_root: PathBuf,
274    mut facts: Vec<FileFacts>,
275    options: AnalyzeOptions,
276    cache_stats: OxcFactsCacheStats,
277    mut errors: Vec<OxcEngineError>,
278    skipped_outside_root: Vec<PathBuf>,
279) -> OxcEngineResult {
280    // Preserve dense FileId indexing when unreadable files were skipped or facts
281    // were reconstructed from contribution records.
282    for (idx, fact) in facts.iter_mut().enumerate() {
283        fact.file_id = FileId(idx);
284        if let Some(parse_error) = &fact.parse_error {
285            errors.push(OxcEngineError {
286                file: fact.path.clone(),
287                message: format!("parse: {parse_error}"),
288            });
289        }
290    }
291    let resolved_files = facts
292        .iter()
293        .map(|fact| fact.path.clone())
294        .collect::<Vec<_>>();
295    let resolver = ModuleResolver::new(&project_root, &resolved_files);
296    let (resolved_modules, tracker, edges) = resolver.resolve_modules(&facts);
297    let entry_points = normalize_option_paths(&options.entry_points);
298    let public_api_files = normalize_option_paths(&options.public_api_files);
299    let executable_root_exports =
300        normalize_executable_root_exports(&options.executable_root_exports);
301    let file_verdicts = compute_verdicts(
302        &project_root,
303        &resolved_modules,
304        &entry_points,
305        &public_api_files,
306        &executable_root_exports,
307        options.entry_reachability,
308    );
309    let resolved_edges = edges
310        .iter()
311        .filter(|edge| edge.resolved_file.is_some())
312        .count();
313    let unresolved_edges = edges.len().saturating_sub(resolved_edges);
314    let resolver_config_inputs = tracker.inputs();
315    let resolver_config_fingerprint = tracker.fingerprint();
316
317    OxcEngineResult {
318        files: file_verdicts,
319        facts,
320        resolver_config_inputs,
321        resolver_config_fingerprint,
322        edges,
323        stats: OxcEngineStats {
324            files: resolved_files.len(),
325            cache_hits: cache_stats.hits,
326            cache_misses: cache_stats.misses,
327            resolved_edges,
328            unresolved_edges,
329        },
330        errors,
331        skipped_outside_root,
332    }
333}
334
335#[derive(Debug, Default)]
336struct NormalizedFileSet {
337    files: Vec<PathBuf>,
338    skipped_outside_root: Vec<PathBuf>,
339}
340
341fn normalize_file_set(project_root: &Path, files: &[PathBuf]) -> NormalizedFileSet {
342    let mut normalized = NormalizedFileSet::default();
343    for path in files.iter().filter(|path| is_ts_js_file(path)) {
344        let path = normalize_input_path(project_root, path);
345        if path.strip_prefix(project_root).is_ok() {
346            normalized.files.push(path);
347        } else {
348            normalized.skipped_outside_root.push(path);
349        }
350    }
351
352    normalized.files.sort();
353    normalized.files.dedup();
354    normalized.skipped_outside_root.sort();
355    normalized.skipped_outside_root.dedup();
356    normalized
357}
358
359pub(crate) fn normalize_input_path(project_root: &Path, path: &Path) -> PathBuf {
360    // Route the canonicalized form through normalize_path too: on Windows,
361    // fs::canonicalize returns verbatim (\\?\C:\) paths, while every set we
362    // compare module paths against (entry_points, public_api_files,
363    // executable_root_exports) is built via normalize_path, which strips the
364    // verbatim prefix. Returning the raw canonical form makes those membership
365    // checks silently miss on Windows only.
366    fs::canonicalize(path)
367        .map(|canonical| normalize_path(&canonical))
368        .unwrap_or_else(|_| {
369            if path.is_absolute() {
370                normalize_path(path)
371            } else {
372                normalize_path(&project_root.join(path))
373            }
374        })
375}
376
377fn normalize_executable_root_exports(
378    roots: &BTreeMap<PathBuf, BTreeSet<String>>,
379) -> BTreeMap<PathBuf, BTreeSet<String>> {
380    let mut normalized = BTreeMap::<PathBuf, BTreeSet<String>>::new();
381    for (path, exports) in roots {
382        normalized
383            .entry(normalize_path(path))
384            .or_default()
385            .extend(exports.iter().cloned());
386        if let Ok(canonical) = fs::canonicalize(path) {
387            normalized
388                .entry(normalize_path(&canonical))
389                .or_default()
390                .extend(exports.iter().cloned());
391        }
392    }
393    normalized
394}
395
396fn normalize_option_paths(paths: &[PathBuf]) -> BTreeSet<PathBuf> {
397    // Both insertions go through normalize_path (which de-verbatims Windows
398    // \\?\ canonical forms) so membership checks against module paths built by
399    // normalize_input_path always compare like with like.
400    let mut normalized = BTreeSet::new();
401    for path in paths {
402        normalized.insert(normalize_path(path));
403        if let Ok(canonical) = fs::canonicalize(path) {
404            normalized.insert(normalize_path(&canonical));
405        }
406    }
407    normalized
408}
409
410fn is_ts_js_file(path: &Path) -> bool {
411    path.extension()
412        .and_then(|ext| ext.to_str())
413        .is_some_and(|ext| {
414            matches!(
415                ext,
416                "ts" | "tsx" | "js" | "jsx" | "mts" | "cts" | "mjs" | "cjs"
417            )
418        })
419}