Skip to main content

fallow_extract/
inventory.rs

1//! Function inventory walker for `fallow coverage upload-inventory`.
2//!
3//! Emits one [`InventoryEntry`] per function (declaration, expression, arrow,
4//! method) whose name matches what `oxc-coverage-instrument` produces at
5//! instrument time. This is the **static side** of the three-state production
6//! coverage story: uploaded inventory minus runtime-seen functions equals
7//! `untracked`.
8//!
9//! # Naming contract
10//!
11//! The cloud stores function identity as
12//! `(filePath, functionName, lineNumber)`. This walker is responsible for the
13//! `functionName` and `lineNumber` parts of that contract. Anonymous functions
14//! are named `(anonymous_N)` where `N` is a file-scoped monotonic counter that
15//! starts at 0 and increments in pre-order AST traversal each time a function
16//! is entered without a resolvable explicit name. Name resolution precedence:
17//!
18//! 1. Parent-provided `pending_name` (from `MethodDefinition`,
19//!    `VariableDeclarator`), same pattern as the internal complexity visitor.
20//! 2. The function's own `id` (named `function foo() {}`, named function
21//!    expression `const x = function named() {}`).
22//! 3. `(anonymous_N)` with the current counter value; counter then increments.
23//!
24//! Counter scope is per-file. Reference implementation:
25//! `oxc-coverage-instrument/src/transform.rs` (`fn_counter` field; lines 201
26//! and 612 at the time of writing).
27
28use std::path::Path;
29
30use oxc_allocator::Allocator;
31#[allow(clippy::wildcard_imports, reason = "many AST types used")]
32use oxc_ast::ast::*;
33use oxc_ast_visit::{Visit, walk};
34use oxc_parser::Parser;
35use oxc_semantic::ScopeFlags;
36use oxc_span::{SourceType, Span};
37
38/// A single static-inventory entry for one function.
39///
40/// `name` is beacon-compatible (see the module docs for the naming rule).
41/// `line` is 1-based, matching the AST span start. The `start_column` /
42/// `end_line` / `end_column` fields carry the function-node span in the
43/// 1-indexed UTF-16 convention the cross-surface `FunctionIdentity` join key
44/// expects (see `fallow_cov_protocol::FunctionIdentity::start_column`). They
45/// are descriptive metadata: the join hash is `(file, name, line)` only, so
46/// column fidelity never affects the join, only display / same-line
47/// disambiguation.
48#[derive(Debug, Clone, PartialEq, Eq)]
49pub struct InventoryEntry {
50    /// Beacon-compatible function name.
51    pub name: String,
52    /// 1-based source line of the function declaration (node `span.start`).
53    pub line: u32,
54    /// 1-indexed UTF-16 column of the function node start.
55    pub start_column: u32,
56    /// 1-based source line where the function node ends.
57    pub end_line: u32,
58    /// 1-indexed UTF-16 column of the function node end.
59    pub end_column: u32,
60    /// Content digest of the function's full-span source slice
61    /// (`&source[span.start..span.end]`): first 8 bytes of SHA-256 as 16
62    /// lowercase hex characters, via `fallow_cov_protocol::source_hash_for`.
63    /// The slice is the canonical body bytes (signature line + body + closing
64    /// brace, no whitespace normalization), identical for `Function` and
65    /// `ArrowFunctionExpression`. Stable across line moves, so a
66    /// moved-but-unedited function keeps the same hash.
67    pub source_hash: String,
68}
69
70/// Visitor that collects [`InventoryEntry`] values in file traversal order.
71struct InventoryVisitor<'a> {
72    source: &'a str,
73    line_offsets: &'a [u32],
74    entries: Vec<InventoryEntry>,
75    /// Parent-provided name override (method key, variable binding, etc.).
76    pending_name: Option<String>,
77    /// File-scoped monotonic counter for unnamed functions.
78    anonymous_counter: u32,
79}
80
81impl<'a> InventoryVisitor<'a> {
82    const fn new(source: &'a str, line_offsets: &'a [u32]) -> Self {
83        Self {
84            source,
85            line_offsets,
86            entries: Vec::new(),
87            pending_name: None,
88            anonymous_counter: 0,
89        }
90    }
91
92    /// Resolve a function's name and advance the counter.
93    ///
94    /// Mirrors `oxc-coverage-instrument`'s two-step flow: `resolve_function_name`
95    /// reads the current counter value for the anonymous-case name, and
96    /// `add_function` advances the counter unconditionally on every
97    /// instrumented function (named or not). We collapse both into one call.
98    ///
99    /// Name precedence: parent `pending_name` (method key / variable binding)
100    /// → function's own `id` → counter.
101    fn resolve_name(&mut self, explicit: Option<&str>) -> String {
102        let n = self.anonymous_counter;
103        self.anonymous_counter += 1;
104        if let Some(pending) = self.pending_name.take() {
105            return pending;
106        }
107        if let Some(name) = explicit {
108            return name.to_owned();
109        }
110        format!("(anonymous_{n})")
111    }
112
113    fn record(&mut self, name: String, span: Span) {
114        let (line, start_column) = self.line_col_utf16(span.start);
115        let (end_line, end_column) = self.line_col_utf16(span.end);
116        let source_hash = self
117            .source
118            .get(span.start as usize..span.end as usize)
119            .map_or_else(
120                || fallow_cov_protocol::source_hash_for(b""),
121                |slice| fallow_cov_protocol::source_hash_for(slice.as_bytes()),
122            );
123        self.entries.push(InventoryEntry {
124            name,
125            line,
126            start_column,
127            end_line,
128            end_column,
129            source_hash,
130        });
131    }
132
133    /// Map a UTF-8 byte offset to `(1-based line, 1-indexed UTF-16 column)`.
134    ///
135    /// The line comes from the precomputed offset table; the column counts
136    /// UTF-16 code units from the line start to `byte_offset`, matching the
137    /// `FunctionIdentity` column convention (Istanbul / V8 / oxc all normalize
138    /// to 1-indexed UTF-16). A byte offset that does not fall on a char
139    /// boundary (it always should for an AST span) clamps to the nearest
140    /// boundary at or before it rather than panicking.
141    fn line_col_utf16(&self, byte_offset: u32) -> (u32, u32) {
142        let line_idx = match self.line_offsets.binary_search(&byte_offset) {
143            Ok(idx) => idx,
144            Err(idx) => idx.saturating_sub(1),
145        };
146        let line = line_idx as u32 + 1;
147        let line_start = self.line_offsets[line_idx] as usize;
148        let mut end = byte_offset as usize;
149        while end > line_start && !self.source.is_char_boundary(end) {
150            end -= 1;
151        }
152        let col_utf16 = self
153            .source
154            .get(line_start..end)
155            .map_or(0, |slice| slice.encode_utf16().count());
156        (line, col_utf16 as u32 + 1)
157    }
158}
159
160impl<'ast> Visit<'ast> for InventoryVisitor<'_> {
161    fn visit_function(&mut self, func: &Function<'ast>, flags: ScopeFlags) {
162        if func.body.is_none() {
163            walk::walk_function(self, func, flags);
164            return;
165        }
166        let name = self.resolve_name(func.id.as_ref().map(|id| id.name.as_str()));
167        self.record(name, func.span);
168        walk::walk_function(self, func, flags);
169    }
170
171    fn visit_arrow_function_expression(&mut self, arrow: &ArrowFunctionExpression<'ast>) {
172        let name = self.resolve_name(None);
173        self.record(name, arrow.span);
174        walk::walk_arrow_function_expression(self, arrow);
175    }
176
177    fn visit_method_definition(&mut self, method: &MethodDefinition<'ast>) {
178        if let Some(name) = method.key.static_name() {
179            self.pending_name = Some(name.to_string());
180        }
181        walk::walk_method_definition(self, method);
182        self.pending_name = None;
183    }
184
185    fn visit_variable_declarator(&mut self, decl: &VariableDeclarator<'ast>) {
186        if let Some(id) = decl.id.get_binding_identifier()
187            && decl.init.as_ref().is_some_and(|init| {
188                matches!(
189                    init,
190                    Expression::ArrowFunctionExpression(_) | Expression::FunctionExpression(_)
191                )
192            })
193        {
194            self.pending_name = Some(id.name.to_string());
195        }
196        walk::walk_variable_declarator(self, decl);
197        self.pending_name = None;
198    }
199
200    fn visit_object_property(&mut self, prop: &ObjectProperty<'ast>) {
201        self.pending_name = None;
202        walk::walk_object_property(self, prop);
203        self.pending_name = None;
204    }
205}
206
207/// Parse `source` at `path` and return every function as an [`InventoryEntry`].
208///
209/// Only plain JS/TS/JSX/TSX sources are supported. Callers should skip SFC,
210/// Astro, MDX, CSS, HTML, and other non-JS inputs; those use different
211/// instrumentation paths and are out of scope for the first inventory release.
212///
213/// Errors are swallowed: the returned vector covers whatever could be parsed.
214/// This mirrors how the rest of the extract pipeline handles partial parse
215/// results.
216#[must_use]
217pub fn walk_source(path: &Path, source: &str) -> Vec<InventoryEntry> {
218    let source_type = SourceType::from_path(path).unwrap_or_default();
219    let allocator = Allocator::default();
220    let parser_return = Parser::new(&allocator, source, source_type).parse();
221
222    let line_offsets = fallow_types::extract::compute_line_offsets(source);
223    let mut visitor = InventoryVisitor::new(source, &line_offsets);
224    visitor.visit_program(&parser_return.program);
225
226    if visitor.entries.is_empty() && !source_type.is_jsx() {
227        let jsx_type = if source_type.is_typescript() {
228            SourceType::tsx()
229        } else {
230            SourceType::jsx()
231        };
232        let allocator2 = Allocator::default();
233        let retry_return = Parser::new(&allocator2, source, jsx_type).parse();
234        let mut retry_visitor = InventoryVisitor::new(source, &line_offsets);
235        retry_visitor.visit_program(&retry_return.program);
236        if !retry_visitor.entries.is_empty() {
237            return retry_visitor.entries;
238        }
239    }
240
241    visitor.entries
242}
243
244#[cfg(all(test, not(miri)))]
245mod tests {
246    use super::*;
247    use std::path::PathBuf;
248
249    fn walk(source: &str) -> Vec<InventoryEntry> {
250        walk_source(&PathBuf::from("test.ts"), source)
251    }
252
253    #[test]
254    fn named_function_declaration_uses_its_own_name() {
255        let entries = walk("function foo() { return 1; }");
256        assert_eq!(entries.len(), 1);
257        assert_eq!(entries[0].name, "foo");
258        assert_eq!(entries[0].line, 1);
259    }
260
261    #[test]
262    fn const_arrow_captures_binding_name() {
263        let entries = walk("const bar = () => 42;");
264        assert_eq!(entries.len(), 1);
265        assert_eq!(entries[0].name, "bar");
266    }
267
268    #[test]
269    fn const_function_expression_captures_binding_name_not_fn_id() {
270        let entries = walk("const outer = function inner() { return 1; };");
271        assert_eq!(entries.len(), 1);
272        assert_eq!(entries[0].name, "outer");
273    }
274
275    #[test]
276    fn class_methods_use_method_names() {
277        let entries = walk(
278            r"
279            class Foo {
280              bar() { return 1; }
281              baz() { return 2; }
282            }",
283        );
284        let names: Vec<_> = entries.iter().map(|e| e.name.as_str()).collect();
285        assert_eq!(names, vec!["bar", "baz"]);
286    }
287
288    #[test]
289    fn anonymous_arrow_passed_as_argument_uses_counter() {
290        let entries = walk("setTimeout(() => { console.log('hi'); }, 10);");
291        assert_eq!(entries.len(), 1);
292        assert_eq!(entries[0].name, "(anonymous_0)");
293    }
294
295    #[test]
296    fn multiple_anonymous_functions_increment_counter_in_source_order() {
297        let entries = walk(
298            r"
299            [1, 2, 3].map(() => 1);
300            [4, 5, 6].filter(() => true);
301            ",
302        );
303        let names: Vec<_> = entries.iter().map(|e| e.name.as_str()).collect();
304        assert_eq!(names, vec!["(anonymous_0)", "(anonymous_1)"]);
305    }
306
307    #[test]
308    fn named_function_still_advances_counter_matching_instrumenter() {
309        let entries = walk(
310            r"
311            function named() { return 1; }
312            [1].map(() => 2);
313            ",
314        );
315        let names: Vec<_> = entries.iter().map(|e| e.name.as_str()).collect();
316        assert_eq!(names, vec!["named", "(anonymous_1)"]);
317    }
318
319    #[test]
320    fn anonymous_after_named_chain_uses_next_counter_value() {
321        let entries = walk(
322            r"
323            function a() {}
324            function b() {}
325            function c() {}
326            const d = () => 4;
327            ",
328        );
329        let names: Vec<_> = entries.iter().map(|e| e.name.as_str()).collect();
330        assert_eq!(names, vec!["a", "b", "c", "d"]);
331    }
332
333    #[test]
334    fn typescript_overload_signatures_dont_emit_or_advance_counter() {
335        let entries = walk(
336            r"
337            function foo(): number;
338            function foo(s: string): string;
339            function foo(s?: string): number | string { return s ? s : 1; }
340            [1].map(() => 2);
341            ",
342        );
343        let names: Vec<_> = entries.iter().map(|e| e.name.as_str()).collect();
344        assert_eq!(names, vec!["foo", "(anonymous_1)"]);
345    }
346
347    #[test]
348    fn export_default_named_function_keeps_explicit_name() {
349        let entries = walk("export default function foo() { return 1; }");
350        assert_eq!(entries.len(), 1);
351        assert_eq!(entries[0].name, "foo");
352    }
353
354    #[test]
355    fn export_default_anonymous_function_uses_counter() {
356        let entries = walk("export default function() { return 1; }");
357        assert_eq!(entries.len(), 1);
358        assert_eq!(entries[0].name, "(anonymous_0)");
359    }
360
361    #[test]
362    fn nested_function_numbered_after_parent_in_traversal_order() {
363        let entries = walk(
364            r"
365            function outer() {
366              return function() { return 1; };
367            }",
368        );
369        let names: Vec<_> = entries.iter().map(|e| e.name.as_str()).collect();
370        assert_eq!(names, vec!["outer", "(anonymous_1)"]);
371    }
372
373    #[test]
374    fn line_number_is_one_based_from_source_start() {
375        let entries = walk("\n\nfunction atLineThree() {}");
376        assert_eq!(entries.len(), 1);
377        assert_eq!(entries[0].line, 3);
378    }
379
380    #[test]
381    fn short_jsx_in_js_file_retries_with_jsx_parser() {
382        let entries = walk_source(&PathBuf::from("component.js"), "const A = () => <div />;");
383        assert_eq!(entries.len(), 1);
384        assert_eq!(entries[0].name, "A");
385        assert_eq!(entries[0].line, 1);
386    }
387
388    #[test]
389    fn object_method_shorthand_uses_anonymous_counter() {
390        let entries = walk("const obj = { run() { return 1; } };");
391        let names: Vec<_> = entries.iter().map(|e| e.name.as_str()).collect();
392        assert_eq!(names, vec!["(anonymous_0)"]);
393    }
394
395    #[test]
396    fn class_property_arrow_uses_anonymous_counter() {
397        let entries = walk(
398            r"
399            class Foo {
400              bar = () => 1;
401            }",
402        );
403        let names: Vec<_> = entries.iter().map(|e| e.name.as_str()).collect();
404        assert_eq!(names, vec!["(anonymous_0)"]);
405    }
406
407    #[test]
408    fn records_one_indexed_utf16_columns() {
409        let entries = walk("function foo() { return 1; }");
410        assert_eq!(entries.len(), 1);
411        assert_eq!(entries[0].start_column, 1);
412        assert_eq!(entries[0].end_line, 1);
413        assert!(entries[0].end_column > entries[0].start_column);
414    }
415
416    #[test]
417    fn utf16_column_counts_code_units_not_bytes() {
418        let entries = walk("const e = \"\u{1F600}\"; const f = () => 1;");
419        let f = entries.iter().find(|e| e.name == "f").expect("f present");
420        let byte_prefix_len = "const e = \"\u{1F600}\"; const f = ".len() as u32;
421        assert!(f.start_column < byte_prefix_len + 1);
422    }
423
424    #[test]
425    fn same_line_distinct_named_functions_have_distinct_positions() {
426        let entries = walk("function a() {} function b() {}");
427        let a = entries.iter().find(|e| e.name == "a").expect("a present");
428        let b = entries.iter().find(|e| e.name == "b").expect("b present");
429        assert_eq!(a.line, b.line, "both on line 1");
430        assert_ne!(
431            a.start_column, b.start_column,
432            "same-line functions are column-disambiguated"
433        );
434    }
435
436    #[test]
437    fn same_line_anonymous_functions_stay_distinct_via_counter() {
438        let entries = walk("const xs = [() => 1, () => 2];");
439        let names: Vec<_> = entries.iter().map(|e| e.name.as_str()).collect();
440        assert_eq!(names, vec!["(anonymous_0)", "(anonymous_1)"]);
441        assert_eq!(entries[0].line, entries[1].line, "both on line 1");
442        assert_ne!(
443            entries[0].name, entries[1].name,
444            "counter keeps them distinct"
445        );
446    }
447
448    #[test]
449    fn source_hash_is_the_content_digest_of_the_function_span() {
450        let src = "function foo() { return 1; }";
451        let entries = walk(src);
452        assert_eq!(entries.len(), 1);
453        assert_eq!(
454            entries[0].source_hash,
455            fallow_cov_protocol::source_hash_for(src.as_bytes())
456        );
457        assert_eq!(entries[0].source_hash.len(), 16);
458        assert!(
459            entries[0]
460                .source_hash
461                .chars()
462                .all(|c| c.is_ascii_hexdigit())
463        );
464    }
465
466    #[test]
467    fn source_hash_survives_line_moves_and_tracks_body_edits() {
468        let original = walk("function foo() { return 1; }");
469        let moved = walk("\n\nfunction foo() { return 1; }");
470        assert_eq!(
471            original[0].source_hash, moved[0].source_hash,
472            "a moved-but-unedited function must keep its source_hash"
473        );
474        let edited = walk("function foo() { return 2; }");
475        assert_ne!(
476            original[0].source_hash, edited[0].source_hash,
477            "an edited body must change the source_hash"
478        );
479    }
480}