Skip to main content

fallow_extract/
inventory.rs

1//! Function inventory walker for `fallow coverage upload-inventory`.
2//!
3//! Emits one [`InventoryEntry`] per function (declaration, expression, arrow,
4//! method) whose name matches what `oxc-coverage-instrument` produces at
5//! instrument time. This is the **static side** of the three-state production
6//! coverage story: uploaded inventory minus runtime-seen functions equals
7//! `untracked`.
8//!
9//! # Naming contract
10//!
11//! The cloud stores function identity as
12//! `(filePath, functionName, lineNumber)`. This walker is responsible for the
13//! `functionName` and `lineNumber` parts of that contract. Anonymous functions
14//! are named `(anonymous_N)` where `N` is a file-scoped monotonic counter that
15//! starts at 0 and increments in pre-order AST traversal each time a function
16//! is entered without a resolvable explicit name. Name resolution precedence:
17//!
18//! 1. Parent-provided `pending_name` (from `MethodDefinition`,
19//!    `VariableDeclarator`), same pattern as the internal complexity visitor.
20//! 2. The function's own `id` (named `function foo() {}`, named function
21//!    expression `const x = function named() {}`).
22//! 3. `(anonymous_N)` with the current counter value; counter then increments.
23//!
24//! Counter scope is per-file. Reference implementation:
25//! `oxc-coverage-instrument/src/transform.rs` (`fn_counter` field; lines 201
26//! and 612 at the time of writing).
27
28use std::path::Path;
29
30use oxc_allocator::Allocator;
31#[allow(clippy::wildcard_imports, reason = "many AST types used")]
32use oxc_ast::ast::*;
33use oxc_ast_visit::{Visit, walk};
34use oxc_parser::Parser;
35use oxc_semantic::ScopeFlags;
36use oxc_span::{SourceType, Span};
37use rustc_hash::FxHashMap;
38
39/// A single static-inventory entry for one function.
40///
41/// `name` is beacon-compatible (see the module docs for the naming rule).
42/// `line` is 1-based, matching the AST span start. The `start_column` /
43/// `end_line` / `end_column` fields carry the function-node span in the
44/// 1-indexed UTF-16 convention the cross-surface `FunctionIdentity` join key
45/// expects (see `fallow_cov_protocol::FunctionIdentity::start_column`). They
46/// are descriptive metadata: the join hash is `(file, name, line)` only, so
47/// column fidelity never affects the join, only display / same-line
48/// disambiguation.
49#[derive(Debug, Clone, PartialEq, Eq)]
50pub struct InventoryEntry {
51    /// Beacon-compatible function name.
52    pub name: String,
53    /// 1-based source line of the function declaration (node `span.start`).
54    pub line: u32,
55    /// 1-indexed UTF-16 column of the function node start.
56    pub start_column: u32,
57    /// 1-based source line where the function node ends.
58    pub end_line: u32,
59    /// 1-indexed UTF-16 column of the function node end.
60    pub end_column: u32,
61    /// Content digest of the function's full-span source slice
62    /// (`&source[span.start..span.end]`): first 8 bytes of SHA-256 as 16
63    /// lowercase hex characters, via `fallow_cov_protocol::source_hash_for`.
64    /// The slice is the canonical body bytes (signature line + body + closing
65    /// brace, no whitespace normalization), identical for `Function` and
66    /// `ArrowFunctionExpression`. Stable across line moves, so a
67    /// moved-but-unedited function keeps the same hash.
68    pub source_hash: String,
69}
70
71/// Visitor that collects [`InventoryEntry`] values in file traversal order.
72struct InventoryVisitor<'a> {
73    source: &'a str,
74    line_offsets: &'a [u32],
75    entries: Vec<InventoryEntry>,
76    /// Parent-provided name override (method key, variable binding, etc.).
77    pending_name: Option<String>,
78    /// File-scoped monotonic counter for unnamed functions.
79    anonymous_counter: u32,
80}
81
82impl<'a> InventoryVisitor<'a> {
83    const fn new(source: &'a str, line_offsets: &'a [u32]) -> Self {
84        Self {
85            source,
86            line_offsets,
87            entries: Vec::new(),
88            pending_name: None,
89            anonymous_counter: 0,
90        }
91    }
92
93    /// Resolve a function's name and advance the counter.
94    ///
95    /// Mirrors `oxc-coverage-instrument`'s two-step flow: `resolve_function_name`
96    /// reads the current counter value for the anonymous-case name, and
97    /// `add_function` advances the counter unconditionally on every
98    /// instrumented function (named or not). We collapse both into one call.
99    ///
100    /// Name precedence: parent `pending_name` (method key / variable binding)
101    /// → function's own `id` → counter.
102    fn resolve_name(&mut self, explicit: Option<&str>) -> String {
103        let n = self.anonymous_counter;
104        self.anonymous_counter += 1;
105        if let Some(pending) = self.pending_name.take() {
106            return pending;
107        }
108        if let Some(name) = explicit {
109            return name.to_owned();
110        }
111        format!("(anonymous_{n})")
112    }
113
114    fn record(&mut self, name: String, span: Span) {
115        let (line, start_column) = self.line_col_utf16(span.start);
116        let (end_line, end_column) = self.line_col_utf16(span.end);
117        let source_hash = self
118            .source
119            .get(span.start as usize..span.end as usize)
120            .map_or_else(
121                || fallow_cov_protocol::source_hash_for(b""),
122                |slice| fallow_cov_protocol::source_hash_for(slice.as_bytes()),
123            );
124        self.entries.push(InventoryEntry {
125            name,
126            line,
127            start_column,
128            end_line,
129            end_column,
130            source_hash,
131        });
132    }
133
134    /// Map a UTF-8 byte offset to `(1-based line, 1-indexed UTF-16 column)`.
135    ///
136    /// The line comes from the precomputed offset table; the column counts
137    /// UTF-16 code units from the line start to `byte_offset`, matching the
138    /// `FunctionIdentity` column convention (Istanbul / V8 / oxc all normalize
139    /// to 1-indexed UTF-16). A byte offset that does not fall on a char
140    /// boundary (it always should for an AST span) clamps to the nearest
141    /// boundary at or before it rather than panicking.
142    fn line_col_utf16(&self, byte_offset: u32) -> (u32, u32) {
143        let line_idx = match self.line_offsets.binary_search(&byte_offset) {
144            Ok(idx) => idx,
145            Err(idx) => idx.saturating_sub(1),
146        };
147        let line = line_idx as u32 + 1;
148        let line_start = self.line_offsets[line_idx] as usize;
149        let mut end = byte_offset as usize;
150        while end > line_start && !self.source.is_char_boundary(end) {
151            end -= 1;
152        }
153        let col_utf16 = self
154            .source
155            .get(line_start..end)
156            .map_or(0, |slice| slice.encode_utf16().count());
157        (line, col_utf16 as u32 + 1)
158    }
159}
160
161impl<'ast> Visit<'ast> for InventoryVisitor<'_> {
162    fn visit_function(&mut self, func: &Function<'ast>, flags: ScopeFlags) {
163        if func.body.is_none() {
164            walk::walk_function(self, func, flags);
165            return;
166        }
167        let name = self.resolve_name(func.id.as_ref().map(|id| id.name.as_str()));
168        self.record(name, func.span);
169        walk::walk_function(self, func, flags);
170    }
171
172    fn visit_arrow_function_expression(&mut self, arrow: &ArrowFunctionExpression<'ast>) {
173        let name = self.resolve_name(None);
174        self.record(name, arrow.span);
175        walk::walk_arrow_function_expression(self, arrow);
176    }
177
178    fn visit_method_definition(&mut self, method: &MethodDefinition<'ast>) {
179        if let Some(name) = method.key.static_name() {
180            self.pending_name = Some(name.to_string());
181        }
182        walk::walk_method_definition(self, method);
183        self.pending_name = None;
184    }
185
186    fn visit_variable_declarator(&mut self, decl: &VariableDeclarator<'ast>) {
187        if let Some(id) = decl.id.get_binding_identifier()
188            && decl.init.as_ref().is_some_and(|init| {
189                matches!(
190                    init,
191                    Expression::ArrowFunctionExpression(_) | Expression::FunctionExpression(_)
192                )
193            })
194        {
195            self.pending_name = Some(id.name.to_string());
196        }
197        walk::walk_variable_declarator(self, decl);
198        self.pending_name = None;
199    }
200
201    fn visit_object_property(&mut self, prop: &ObjectProperty<'ast>) {
202        self.pending_name = None;
203        walk::walk_object_property(self, prop);
204        self.pending_name = None;
205    }
206}
207
208/// Per-function static complexity collected alongside the inventory walk.
209///
210/// Keyed to an [`InventoryEntry`] by its `source_hash`, which both this and the
211/// inventory walk derive from the identical full-span byte slice over the same
212/// parsed program (see [`InventoryEntry::source_hash`]). The hash is stable
213/// across line moves, so the pairing survives reformatting that shifts line
214/// numbers. `cyclomatic` and `cognitive` are descriptive context for downstream
215/// importance weighting, never thresholds.
216#[derive(Debug, Clone, Copy, PartialEq, Eq)]
217pub struct InventoryComplexity {
218    /// `McCabe` cyclomatic complexity (1 + decision points).
219    pub cyclomatic: u16,
220    /// `SonarSource` cognitive complexity (structural + nesting penalty).
221    pub cognitive: u16,
222}
223
224/// Parse `source` at `path` and return every function as an [`InventoryEntry`].
225///
226/// Only plain JS/TS/JSX/TSX sources are supported. Callers should skip SFC,
227/// Astro, MDX, CSS, HTML, and other non-JS inputs; those use different
228/// instrumentation paths and are out of scope for the first inventory release.
229///
230/// Errors are swallowed: the returned vector covers whatever could be parsed.
231/// This mirrors how the rest of the extract pipeline handles partial parse
232/// results.
233#[must_use]
234pub fn walk_source(path: &Path, source: &str) -> Vec<InventoryEntry> {
235    walk_source_with_complexity(path, source).0
236}
237
238/// Parse `source` at `path` once and return every function as an
239/// [`InventoryEntry`] together with a `source_hash -> InventoryComplexity` map.
240///
241/// Both the inventory entries and the complexity map come from the SAME parse
242/// (including the JSX fallback retry), so the per-function `source_hash` values
243/// line up exactly and a caller can enrich each entry's metrics by a hash
244/// lookup. Functions whose span slice could not be sliced share the empty-input
245/// hash and simply don't pair; that degrades to "no metrics", never a panic.
246///
247/// Errors are swallowed, matching [`walk_source`]: the returned data covers
248/// whatever could be parsed.
249#[must_use]
250pub fn walk_source_with_complexity(
251    path: &Path,
252    source: &str,
253) -> (Vec<InventoryEntry>, FxHashMap<String, InventoryComplexity>) {
254    let source_type = SourceType::from_path(path).unwrap_or_default();
255    let line_offsets = fallow_types::extract::compute_line_offsets(source);
256
257    let primary = walk_one_parse(source, source_type, &line_offsets);
258    if primary.0.is_empty() && !source_type.is_jsx() {
259        let jsx_type = if source_type.is_typescript() {
260            SourceType::tsx()
261        } else {
262            SourceType::jsx()
263        };
264        let retry = walk_one_parse(source, jsx_type, &line_offsets);
265        if !retry.0.is_empty() {
266            return retry;
267        }
268    }
269
270    primary
271}
272
273/// Run both the inventory and complexity visitors over a single parse of
274/// `source` under `source_type`, pairing them by `source_hash`.
275fn walk_one_parse(
276    source: &str,
277    source_type: SourceType,
278    line_offsets: &[u32],
279) -> (Vec<InventoryEntry>, FxHashMap<String, InventoryComplexity>) {
280    let allocator = Allocator::default();
281    let parser_return = Parser::new(&allocator, source, source_type).parse();
282
283    let mut visitor = InventoryVisitor::new(source, line_offsets);
284    visitor.visit_program(&parser_return.program);
285
286    let complexity =
287        crate::complexity::compute_complexity(&parser_return.program, source, line_offsets);
288    let metrics: FxHashMap<String, InventoryComplexity> = complexity
289        .into_iter()
290        .filter_map(|fc| {
291            fc.source_hash.map(|hash| {
292                (
293                    hash,
294                    InventoryComplexity {
295                        cyclomatic: fc.cyclomatic,
296                        cognitive: fc.cognitive,
297                    },
298                )
299            })
300        })
301        .collect();
302
303    (visitor.entries, metrics)
304}
305
306#[cfg(all(test, not(miri)))]
307mod tests {
308    use super::*;
309    use std::path::PathBuf;
310
311    fn walk(source: &str) -> Vec<InventoryEntry> {
312        walk_source(&PathBuf::from("test.ts"), source)
313    }
314
315    #[test]
316    fn named_function_declaration_uses_its_own_name() {
317        let entries = walk("function foo() { return 1; }");
318        assert_eq!(entries.len(), 1);
319        assert_eq!(entries[0].name, "foo");
320        assert_eq!(entries[0].line, 1);
321    }
322
323    #[test]
324    fn const_arrow_captures_binding_name() {
325        let entries = walk("const bar = () => 42;");
326        assert_eq!(entries.len(), 1);
327        assert_eq!(entries[0].name, "bar");
328    }
329
330    #[test]
331    fn const_function_expression_captures_binding_name_not_fn_id() {
332        let entries = walk("const outer = function inner() { return 1; };");
333        assert_eq!(entries.len(), 1);
334        assert_eq!(entries[0].name, "outer");
335    }
336
337    #[test]
338    fn class_methods_use_method_names() {
339        let entries = walk(
340            r"
341            class Foo {
342              bar() { return 1; }
343              baz() { return 2; }
344            }",
345        );
346        let names: Vec<_> = entries.iter().map(|e| e.name.as_str()).collect();
347        assert_eq!(names, vec!["bar", "baz"]);
348    }
349
350    #[test]
351    fn anonymous_arrow_passed_as_argument_uses_counter() {
352        let entries = walk("setTimeout(() => { console.log('hi'); }, 10);");
353        assert_eq!(entries.len(), 1);
354        assert_eq!(entries[0].name, "(anonymous_0)");
355    }
356
357    #[test]
358    fn multiple_anonymous_functions_increment_counter_in_source_order() {
359        let entries = walk(
360            r"
361            [1, 2, 3].map(() => 1);
362            [4, 5, 6].filter(() => true);
363            ",
364        );
365        let names: Vec<_> = entries.iter().map(|e| e.name.as_str()).collect();
366        assert_eq!(names, vec!["(anonymous_0)", "(anonymous_1)"]);
367    }
368
369    #[test]
370    fn named_function_still_advances_counter_matching_instrumenter() {
371        let entries = walk(
372            r"
373            function named() { return 1; }
374            [1].map(() => 2);
375            ",
376        );
377        let names: Vec<_> = entries.iter().map(|e| e.name.as_str()).collect();
378        assert_eq!(names, vec!["named", "(anonymous_1)"]);
379    }
380
381    #[test]
382    fn anonymous_after_named_chain_uses_next_counter_value() {
383        let entries = walk(
384            r"
385            function a() {}
386            function b() {}
387            function c() {}
388            const d = () => 4;
389            ",
390        );
391        let names: Vec<_> = entries.iter().map(|e| e.name.as_str()).collect();
392        assert_eq!(names, vec!["a", "b", "c", "d"]);
393    }
394
395    #[test]
396    fn typescript_overload_signatures_dont_emit_or_advance_counter() {
397        let entries = walk(
398            r"
399            function foo(): number;
400            function foo(s: string): string;
401            function foo(s?: string): number | string { return s ? s : 1; }
402            [1].map(() => 2);
403            ",
404        );
405        let names: Vec<_> = entries.iter().map(|e| e.name.as_str()).collect();
406        assert_eq!(names, vec!["foo", "(anonymous_1)"]);
407    }
408
409    #[test]
410    fn export_default_named_function_keeps_explicit_name() {
411        let entries = walk("export default function foo() { return 1; }");
412        assert_eq!(entries.len(), 1);
413        assert_eq!(entries[0].name, "foo");
414    }
415
416    #[test]
417    fn export_default_anonymous_function_uses_counter() {
418        let entries = walk("export default function() { return 1; }");
419        assert_eq!(entries.len(), 1);
420        assert_eq!(entries[0].name, "(anonymous_0)");
421    }
422
423    #[test]
424    fn nested_function_numbered_after_parent_in_traversal_order() {
425        let entries = walk(
426            r"
427            function outer() {
428              return function() { return 1; };
429            }",
430        );
431        let names: Vec<_> = entries.iter().map(|e| e.name.as_str()).collect();
432        assert_eq!(names, vec!["outer", "(anonymous_1)"]);
433    }
434
435    #[test]
436    fn line_number_is_one_based_from_source_start() {
437        let entries = walk("\n\nfunction atLineThree() {}");
438        assert_eq!(entries.len(), 1);
439        assert_eq!(entries[0].line, 3);
440    }
441
442    #[test]
443    fn short_jsx_in_js_file_retries_with_jsx_parser() {
444        let entries = walk_source(&PathBuf::from("component.js"), "const A = () => <div />;");
445        assert_eq!(entries.len(), 1);
446        assert_eq!(entries[0].name, "A");
447        assert_eq!(entries[0].line, 1);
448    }
449
450    #[test]
451    fn object_method_shorthand_uses_anonymous_counter() {
452        let entries = walk("const obj = { run() { return 1; } };");
453        let names: Vec<_> = entries.iter().map(|e| e.name.as_str()).collect();
454        assert_eq!(names, vec!["(anonymous_0)"]);
455    }
456
457    #[test]
458    fn class_property_arrow_uses_anonymous_counter() {
459        let entries = walk(
460            r"
461            class Foo {
462              bar = () => 1;
463            }",
464        );
465        let names: Vec<_> = entries.iter().map(|e| e.name.as_str()).collect();
466        assert_eq!(names, vec!["(anonymous_0)"]);
467    }
468
469    #[test]
470    fn records_one_indexed_utf16_columns() {
471        let entries = walk("function foo() { return 1; }");
472        assert_eq!(entries.len(), 1);
473        assert_eq!(entries[0].start_column, 1);
474        assert_eq!(entries[0].end_line, 1);
475        assert!(entries[0].end_column > entries[0].start_column);
476    }
477
478    #[test]
479    fn utf16_column_counts_code_units_not_bytes() {
480        let entries = walk("const e = \"\u{1F600}\"; const f = () => 1;");
481        let f = entries.iter().find(|e| e.name == "f").expect("f present");
482        let byte_prefix_len = "const e = \"\u{1F600}\"; const f = ".len() as u32;
483        assert!(f.start_column < byte_prefix_len + 1);
484    }
485
486    #[test]
487    fn same_line_distinct_named_functions_have_distinct_positions() {
488        let entries = walk("function a() {} function b() {}");
489        let a = entries.iter().find(|e| e.name == "a").expect("a present");
490        let b = entries.iter().find(|e| e.name == "b").expect("b present");
491        assert_eq!(a.line, b.line, "both on line 1");
492        assert_ne!(
493            a.start_column, b.start_column,
494            "same-line functions are column-disambiguated"
495        );
496    }
497
498    #[test]
499    fn same_line_anonymous_functions_stay_distinct_via_counter() {
500        let entries = walk("const xs = [() => 1, () => 2];");
501        let names: Vec<_> = entries.iter().map(|e| e.name.as_str()).collect();
502        assert_eq!(names, vec!["(anonymous_0)", "(anonymous_1)"]);
503        assert_eq!(entries[0].line, entries[1].line, "both on line 1");
504        assert_ne!(
505            entries[0].name, entries[1].name,
506            "counter keeps them distinct"
507        );
508    }
509
510    #[test]
511    fn source_hash_is_the_content_digest_of_the_function_span() {
512        let src = "function foo() { return 1; }";
513        let entries = walk(src);
514        assert_eq!(entries.len(), 1);
515        assert_eq!(
516            entries[0].source_hash,
517            fallow_cov_protocol::source_hash_for(src.as_bytes())
518        );
519        assert_eq!(entries[0].source_hash.len(), 16);
520        assert!(
521            entries[0]
522                .source_hash
523                .chars()
524                .all(|c| c.is_ascii_hexdigit())
525        );
526    }
527
528    #[test]
529    fn source_hash_survives_line_moves_and_tracks_body_edits() {
530        let original = walk("function foo() { return 1; }");
531        let moved = walk("\n\nfunction foo() { return 1; }");
532        assert_eq!(
533            original[0].source_hash, moved[0].source_hash,
534            "a moved-but-unedited function must keep its source_hash"
535        );
536        let edited = walk("function foo() { return 2; }");
537        assert_ne!(
538            original[0].source_hash, edited[0].source_hash,
539            "an edited body must change the source_hash"
540        );
541    }
542}