Skip to main content

fallow_extract/
inventory.rs

1//! Function inventory walker for `fallow coverage upload-inventory`.
2//!
3//! Emits one [`InventoryEntry`] per function (declaration, expression, arrow,
4//! method) whose name matches what `oxc-coverage-instrument` produces at
5//! instrument time. This is the **static side** of the three-state production
6//! coverage story: uploaded inventory minus runtime-seen functions equals
7//! `untracked`.
8//!
9//! # Naming contract
10//!
11//! The cloud stores function identity as
12//! `(filePath, functionName, lineNumber)`. This walker is responsible for the
13//! `functionName` and `lineNumber` parts of that contract. Anonymous functions
14//! are named `(anonymous_N)` where `N` is a file-scoped monotonic counter that
15//! starts at 0 and increments in pre-order AST traversal each time a function
16//! is entered without a resolvable explicit name. Name resolution precedence:
17//!
18//! 1. Parent-provided `pending_name` (from `MethodDefinition`,
19//!    `VariableDeclarator`), same pattern as the internal complexity visitor.
20//! 2. The function's own `id` (named `function foo() {}`, named function
21//!    expression `const x = function named() {}`).
22//! 3. `(anonymous_N)` with the current counter value; counter then increments.
23//!
24//! Counter scope is per-file. Reference implementation:
25//! `oxc-coverage-instrument/src/transform.rs` (`fn_counter` field; lines 201
26//! and 612 at the time of writing).
27
28use std::path::Path;
29
30use oxc_allocator::Allocator;
31#[allow(clippy::wildcard_imports, reason = "many AST types used")]
32use oxc_ast::ast::*;
33use oxc_ast_visit::{Visit, walk};
34use oxc_parser::Parser;
35use oxc_semantic::ScopeFlags;
36use oxc_span::{SourceType, Span};
37
38/// A single static-inventory entry for one function.
39///
40/// `name` is beacon-compatible (see the module docs for the naming rule).
41/// `line` is 1-based, matching the AST span start. The `start_column` /
42/// `end_line` / `end_column` fields carry the function-node span in the
43/// 1-indexed UTF-16 convention the cross-surface `FunctionIdentity` join key
44/// expects (see `fallow_cov_protocol::FunctionIdentity::start_column`). They
45/// are descriptive metadata: the join hash is `(file, name, line)` only, so
46/// column fidelity never affects the join, only display / same-line
47/// disambiguation.
48#[derive(Debug, Clone, PartialEq, Eq)]
49pub struct InventoryEntry {
50    /// Beacon-compatible function name.
51    pub name: String,
52    /// 1-based source line of the function declaration (node `span.start`).
53    pub line: u32,
54    /// 1-indexed UTF-16 column of the function node start.
55    pub start_column: u32,
56    /// 1-based source line where the function node ends.
57    pub end_line: u32,
58    /// 1-indexed UTF-16 column of the function node end.
59    pub end_column: u32,
60    /// Content digest of the function's full-span source slice
61    /// (`&source[span.start..span.end]`): first 8 bytes of SHA-256 as 16
62    /// lowercase hex characters, via `fallow_cov_protocol::source_hash_for`.
63    /// The slice is the canonical body bytes (signature line + body + closing
64    /// brace, no whitespace normalization), identical for `Function` and
65    /// `ArrowFunctionExpression`. Stable across line moves, so a
66    /// moved-but-unedited function keeps the same hash.
67    pub source_hash: String,
68}
69
70/// Visitor that collects [`InventoryEntry`] values in file traversal order.
71struct InventoryVisitor<'a> {
72    source: &'a str,
73    line_offsets: &'a [u32],
74    entries: Vec<InventoryEntry>,
75    /// Parent-provided name override (method key, variable binding, etc.).
76    pending_name: Option<String>,
77    /// File-scoped monotonic counter for unnamed functions.
78    anonymous_counter: u32,
79}
80
81impl<'a> InventoryVisitor<'a> {
82    const fn new(source: &'a str, line_offsets: &'a [u32]) -> Self {
83        Self {
84            source,
85            line_offsets,
86            entries: Vec::new(),
87            pending_name: None,
88            anonymous_counter: 0,
89        }
90    }
91
92    /// Resolve a function's name and advance the counter.
93    ///
94    /// Mirrors `oxc-coverage-instrument`'s two-step flow: `resolve_function_name`
95    /// reads the current counter value for the anonymous-case name, and
96    /// `add_function` advances the counter unconditionally on every
97    /// instrumented function (named or not). We collapse both into one call.
98    ///
99    /// Name precedence: parent `pending_name` (method key / variable binding)
100    /// → function's own `id` → counter.
101    fn resolve_name(&mut self, explicit: Option<&str>) -> String {
102        let n = self.anonymous_counter;
103        self.anonymous_counter += 1;
104        if let Some(pending) = self.pending_name.take() {
105            return pending;
106        }
107        if let Some(name) = explicit {
108            return name.to_owned();
109        }
110        format!("(anonymous_{n})")
111    }
112
113    fn record(&mut self, name: String, span: Span) {
114        let (line, start_column) = self.line_col_utf16(span.start);
115        let (end_line, end_column) = self.line_col_utf16(span.end);
116        // Canonical body bytes: the function node's full-span slice. Valid AST
117        // spans always fall on char boundaries within range; fall back to an
118        // empty-input hash defensively rather than panicking.
119        let source_hash = self
120            .source
121            .get(span.start as usize..span.end as usize)
122            .map_or_else(
123                || fallow_cov_protocol::source_hash_for(b""),
124                |slice| fallow_cov_protocol::source_hash_for(slice.as_bytes()),
125            );
126        self.entries.push(InventoryEntry {
127            name,
128            line,
129            start_column,
130            end_line,
131            end_column,
132            source_hash,
133        });
134    }
135
136    /// Map a UTF-8 byte offset to `(1-based line, 1-indexed UTF-16 column)`.
137    ///
138    /// The line comes from the precomputed offset table; the column counts
139    /// UTF-16 code units from the line start to `byte_offset`, matching the
140    /// `FunctionIdentity` column convention (Istanbul / V8 / oxc all normalize
141    /// to 1-indexed UTF-16). A byte offset that does not fall on a char
142    /// boundary (it always should for an AST span) clamps to the nearest
143    /// boundary at or before it rather than panicking.
144    fn line_col_utf16(&self, byte_offset: u32) -> (u32, u32) {
145        let line_idx = match self.line_offsets.binary_search(&byte_offset) {
146            Ok(idx) => idx,
147            Err(idx) => idx.saturating_sub(1),
148        };
149        let line = line_idx as u32 + 1;
150        let line_start = self.line_offsets[line_idx] as usize;
151        let mut end = byte_offset as usize;
152        while end > line_start && !self.source.is_char_boundary(end) {
153            end -= 1;
154        }
155        let col_utf16 = self
156            .source
157            .get(line_start..end)
158            .map_or(0, |slice| slice.encode_utf16().count());
159        (line, col_utf16 as u32 + 1)
160    }
161}
162
163impl<'ast> Visit<'ast> for InventoryVisitor<'_> {
164    fn visit_function(&mut self, func: &Function<'ast>, flags: ScopeFlags) {
165        // Bodyless functions (TypeScript overload signatures, `abstract`
166        // class methods, `declare function ...`) are not instrumented at
167        // runtime. The instrumenter only calls `add_function` when a body
168        // exists, so neither recording an entry nor advancing the counter
169        // for these signatures keeps our naming in lockstep.
170        if func.body.is_none() {
171            walk::walk_function(self, func, flags);
172            return;
173        }
174        let name = self.resolve_name(func.id.as_ref().map(|id| id.name.as_str()));
175        self.record(name, func.span);
176        walk::walk_function(self, func, flags);
177    }
178
179    fn visit_arrow_function_expression(&mut self, arrow: &ArrowFunctionExpression<'ast>) {
180        let name = self.resolve_name(None);
181        self.record(name, arrow.span);
182        walk::walk_arrow_function_expression(self, arrow);
183    }
184
185    fn visit_method_definition(&mut self, method: &MethodDefinition<'ast>) {
186        if let Some(name) = method.key.static_name() {
187            self.pending_name = Some(name.to_string());
188        }
189        walk::walk_method_definition(self, method);
190        self.pending_name = None;
191    }
192
193    fn visit_variable_declarator(&mut self, decl: &VariableDeclarator<'ast>) {
194        if let Some(id) = decl.id.get_binding_identifier()
195            && decl.init.as_ref().is_some_and(|init| {
196                matches!(
197                    init,
198                    Expression::ArrowFunctionExpression(_) | Expression::FunctionExpression(_)
199                )
200            })
201        {
202            self.pending_name = Some(id.name.to_string());
203        }
204        walk::walk_variable_declarator(self, decl);
205        self.pending_name = None;
206    }
207
208    fn visit_object_property(&mut self, prop: &ObjectProperty<'ast>) {
209        // Object-literal methods (`{ run() {} }`) and arrow properties
210        // (`{ run: () => 1 }`) intentionally do NOT inherit the outer
211        // variable binding's name. Clear any pending_name leaked from an
212        // ancestor (e.g., `const obj = { run() {} }`) so the inner function
213        // falls through to the anonymous counter, matching the e2e
214        // verification against `oxc-coverage-instrument`.
215        self.pending_name = None;
216        walk::walk_object_property(self, prop);
217        self.pending_name = None;
218    }
219}
220
221/// Parse `source` at `path` and return every function as an [`InventoryEntry`].
222///
223/// Only plain JS/TS/JSX/TSX sources are supported. Callers should skip SFC,
224/// Astro, MDX, CSS, HTML, and other non-JS inputs; those use different
225/// instrumentation paths and are out of scope for the first inventory release.
226///
227/// Errors are swallowed: the returned vector covers whatever could be parsed.
228/// This mirrors how the rest of the extract pipeline handles partial parse
229/// results.
230#[must_use]
231pub fn walk_source(path: &Path, source: &str) -> Vec<InventoryEntry> {
232    let source_type = SourceType::from_path(path).unwrap_or_default();
233    let allocator = Allocator::default();
234    let parser_return = Parser::new(&allocator, source, source_type).parse();
235
236    let line_offsets = fallow_types::extract::compute_line_offsets(source);
237    let mut visitor = InventoryVisitor::new(source, &line_offsets);
238    visitor.visit_program(&parser_return.program);
239
240    // If the initial parse found nothing, retry with JSX/TSX source type
241    // (matches parse.rs fallback for `.js` files that actually contain JSX).
242    // Keep this independent of file length: tiny components such as
243    // `const A = () => <div />;` are common and still need inventory entries.
244    if visitor.entries.is_empty() && !source_type.is_jsx() {
245        let jsx_type = if source_type.is_typescript() {
246            SourceType::tsx()
247        } else {
248            SourceType::jsx()
249        };
250        let allocator2 = Allocator::default();
251        let retry_return = Parser::new(&allocator2, source, jsx_type).parse();
252        let mut retry_visitor = InventoryVisitor::new(source, &line_offsets);
253        retry_visitor.visit_program(&retry_return.program);
254        if !retry_visitor.entries.is_empty() {
255            return retry_visitor.entries;
256        }
257    }
258
259    visitor.entries
260}
261
262#[cfg(all(test, not(miri)))]
263mod tests {
264    use super::*;
265    use std::path::PathBuf;
266
267    fn walk(source: &str) -> Vec<InventoryEntry> {
268        walk_source(&PathBuf::from("test.ts"), source)
269    }
270
271    #[test]
272    fn named_function_declaration_uses_its_own_name() {
273        let entries = walk("function foo() { return 1; }");
274        assert_eq!(entries.len(), 1);
275        assert_eq!(entries[0].name, "foo");
276        assert_eq!(entries[0].line, 1);
277    }
278
279    #[test]
280    fn const_arrow_captures_binding_name() {
281        let entries = walk("const bar = () => 42;");
282        assert_eq!(entries.len(), 1);
283        assert_eq!(entries[0].name, "bar");
284    }
285
286    #[test]
287    fn const_function_expression_captures_binding_name_not_fn_id() {
288        // When both are present, oxc-coverage-instrument prefers the
289        // parent-provided pending_name (the `const` binding). Our walker
290        // matches that precedence.
291        let entries = walk("const outer = function inner() { return 1; };");
292        assert_eq!(entries.len(), 1);
293        assert_eq!(entries[0].name, "outer");
294    }
295
296    #[test]
297    fn class_methods_use_method_names() {
298        let entries = walk(
299            r"
300            class Foo {
301              bar() { return 1; }
302              baz() { return 2; }
303            }",
304        );
305        let names: Vec<_> = entries.iter().map(|e| e.name.as_str()).collect();
306        assert_eq!(names, vec!["bar", "baz"]);
307    }
308
309    #[test]
310    fn anonymous_arrow_passed_as_argument_uses_counter() {
311        let entries = walk("setTimeout(() => { console.log('hi'); }, 10);");
312        assert_eq!(entries.len(), 1);
313        assert_eq!(entries[0].name, "(anonymous_0)");
314    }
315
316    #[test]
317    fn multiple_anonymous_functions_increment_counter_in_source_order() {
318        let entries = walk(
319            r"
320            [1, 2, 3].map(() => 1);
321            [4, 5, 6].filter(() => true);
322            ",
323        );
324        let names: Vec<_> = entries.iter().map(|e| e.name.as_str()).collect();
325        assert_eq!(names, vec!["(anonymous_0)", "(anonymous_1)"]);
326    }
327
328    #[test]
329    fn named_function_still_advances_counter_matching_instrumenter() {
330        // Oracle: `oxc-coverage-instrument` advances its `fn_counter` on
331        // every function with a body (named or anonymous). The anonymous
332        // arrow below is the second emitted function, so its slot is `1`.
333        let entries = walk(
334            r"
335            function named() { return 1; }
336            [1].map(() => 2);
337            ",
338        );
339        let names: Vec<_> = entries.iter().map(|e| e.name.as_str()).collect();
340        assert_eq!(names, vec!["named", "(anonymous_1)"]);
341    }
342
343    #[test]
344    fn anonymous_after_named_chain_uses_next_counter_value() {
345        // Regression for the "counter only advances on anonymous" bug caught
346        // in rust-reviewer BLOCK. Each named function MUST still bump the
347        // counter so a trailing anonymous gets the right index.
348        let entries = walk(
349            r"
350            function a() {}
351            function b() {}
352            function c() {}
353            const d = () => 4;
354            ",
355        );
356        let names: Vec<_> = entries.iter().map(|e| e.name.as_str()).collect();
357        // `a`, `b`, `c`, and the binding `d` consume counter slots 0-3.
358        // There is no free-floating anonymous here; all four are resolved
359        // by name. If a truly anonymous arrow appeared, it would be slot 4.
360        assert_eq!(names, vec!["a", "b", "c", "d"]);
361    }
362
363    #[test]
364    fn typescript_overload_signatures_dont_emit_or_advance_counter() {
365        // Overload signatures have no body, are not runtime-instrumented,
366        // and therefore must not consume a counter slot. The trailing
367        // anonymous arrow is the second bodyful function, so it must be
368        // `(anonymous_1)` (slot 0 goes to the `foo` implementation).
369        let entries = walk(
370            r"
371            function foo(): number;
372            function foo(s: string): string;
373            function foo(s?: string): number | string { return s ? s : 1; }
374            [1].map(() => 2);
375            ",
376        );
377        let names: Vec<_> = entries.iter().map(|e| e.name.as_str()).collect();
378        assert_eq!(names, vec!["foo", "(anonymous_1)"]);
379    }
380
381    #[test]
382    fn export_default_named_function_keeps_explicit_name() {
383        let entries = walk("export default function foo() { return 1; }");
384        assert_eq!(entries.len(), 1);
385        assert_eq!(entries[0].name, "foo");
386    }
387
388    #[test]
389    fn export_default_anonymous_function_uses_counter() {
390        let entries = walk("export default function() { return 1; }");
391        assert_eq!(entries.len(), 1);
392        assert_eq!(entries[0].name, "(anonymous_0)");
393    }
394
395    #[test]
396    fn nested_function_numbered_after_parent_in_traversal_order() {
397        let entries = walk(
398            r"
399            function outer() {
400              return function() { return 1; };
401            }",
402        );
403        let names: Vec<_> = entries.iter().map(|e| e.name.as_str()).collect();
404        // `outer` is slot 0 (uses its own name); the nested anonymous is
405        // slot 1. The counter advances on every bodyful function, so the
406        // anonymous sees counter value 1 at resolution time.
407        assert_eq!(names, vec!["outer", "(anonymous_1)"]);
408    }
409
410    #[test]
411    fn line_number_is_one_based_from_source_start() {
412        let entries = walk("\n\nfunction atLineThree() {}");
413        assert_eq!(entries.len(), 1);
414        assert_eq!(entries[0].line, 3);
415    }
416
417    #[test]
418    fn short_jsx_in_js_file_retries_with_jsx_parser() {
419        let entries = walk_source(&PathBuf::from("component.js"), "const A = () => <div />;");
420        assert_eq!(entries.len(), 1);
421        assert_eq!(entries[0].name, "A");
422        assert_eq!(entries[0].line, 1);
423    }
424
425    #[test]
426    fn object_method_shorthand_uses_anonymous_counter() {
427        let entries = walk("const obj = { run() { return 1; } };");
428        let names: Vec<_> = entries.iter().map(|e| e.name.as_str()).collect();
429        assert_eq!(names, vec!["(anonymous_0)"]);
430    }
431
432    #[test]
433    fn class_property_arrow_uses_anonymous_counter() {
434        let entries = walk(
435            r"
436            class Foo {
437              bar = () => 1;
438            }",
439        );
440        let names: Vec<_> = entries.iter().map(|e| e.name.as_str()).collect();
441        assert_eq!(names, vec!["(anonymous_0)"]);
442    }
443
444    #[test]
445    fn records_one_indexed_utf16_columns() {
446        // `function foo` starts at byte 0, so UTF-16 column 1.
447        let entries = walk("function foo() { return 1; }");
448        assert_eq!(entries.len(), 1);
449        assert_eq!(entries[0].start_column, 1);
450        assert_eq!(entries[0].end_line, 1);
451        // Single-line function: end column is past the closing brace.
452        assert!(entries[0].end_column > entries[0].start_column);
453    }
454
455    #[test]
456    fn utf16_column_counts_code_units_not_bytes() {
457        // A 4-byte emoji (2 UTF-16 code units) sits before the arrow function.
458        // A byte-based column would be 4 higher than the UTF-16 column; assert
459        // the column stays well under the prefix byte length to prove code-unit
460        // counting.
461        let entries = walk("const e = \"\u{1F600}\"; const f = () => 1;");
462        let f = entries.iter().find(|e| e.name == "f").expect("f present");
463        let byte_prefix_len = "const e = \"\u{1F600}\"; const f = ".len() as u32;
464        assert!(f.start_column < byte_prefix_len + 1);
465    }
466
467    #[test]
468    fn same_line_distinct_named_functions_have_distinct_positions() {
469        // Two functions on one line with different names. The (name) differs,
470        // so the cross-surface stable_id (file+name+line) differs; their
471        // columns also differ for display disambiguation.
472        let entries = walk("function a() {} function b() {}");
473        let a = entries.iter().find(|e| e.name == "a").expect("a present");
474        let b = entries.iter().find(|e| e.name == "b").expect("b present");
475        assert_eq!(a.line, b.line, "both on line 1");
476        assert_ne!(
477            a.start_column, b.start_column,
478            "same-line functions are column-disambiguated"
479        );
480    }
481
482    #[test]
483    fn same_line_anonymous_functions_stay_distinct_via_counter() {
484        // Two anonymous arrows on one line get distinct names from the
485        // file-scoped counter, so their stable_ids (file+name+line) differ even
486        // though file and line are identical.
487        let entries = walk("const xs = [() => 1, () => 2];");
488        let names: Vec<_> = entries.iter().map(|e| e.name.as_str()).collect();
489        assert_eq!(names, vec!["(anonymous_0)", "(anonymous_1)"]);
490        assert_eq!(entries[0].line, entries[1].line, "both on line 1");
491        assert_ne!(
492            entries[0].name, entries[1].name,
493            "counter keeps them distinct"
494        );
495    }
496
497    #[test]
498    fn source_hash_is_the_content_digest_of_the_function_span() {
499        // The whole declaration is the function node's span here, so the
500        // canonical body bytes are the entire source. The recorded hash must
501        // equal the protocol helper over those exact bytes (16 lowercase hex).
502        let src = "function foo() { return 1; }";
503        let entries = walk(src);
504        assert_eq!(entries.len(), 1);
505        assert_eq!(
506            entries[0].source_hash,
507            fallow_cov_protocol::source_hash_for(src.as_bytes())
508        );
509        assert_eq!(entries[0].source_hash.len(), 16);
510        assert!(
511            entries[0]
512                .source_hash
513                .chars()
514                .all(|c| c.is_ascii_hexdigit())
515        );
516    }
517
518    #[test]
519    fn source_hash_survives_line_moves_and_tracks_body_edits() {
520        // The #742 property: the same function shifted down keeps its
521        // source_hash (the body bytes are identical), while an edit to the body
522        // changes it. This is what lets baselines survive a pure line shift.
523        let original = walk("function foo() { return 1; }");
524        let moved = walk("\n\nfunction foo() { return 1; }");
525        assert_eq!(
526            original[0].source_hash, moved[0].source_hash,
527            "a moved-but-unedited function must keep its source_hash"
528        );
529        let edited = walk("function foo() { return 2; }");
530        assert_ne!(
531            original[0].source_hash, edited[0].source_hash,
532            "an edited body must change the source_hash"
533        );
534    }
535}