Skip to main content

fallow_v8_coverage/
lib.rs

1//! V8 `ScriptCoverage` JSON parser and Istanbul-compatible normalizer.
2//!
3//! This is the open-source layer of fallow's production-coverage pipeline.
4//! It performs the mechanical conversion from V8's byte-offset-based coverage
5//! format (as emitted by `node --experimental-test-coverage`, `c8`, the
6//! Inspector protocol, or any V8 isolate) into the line/column-based
7//! [`IstanbulFileCoverage`] shape that fallow's CRAP scoring already
8//! consumes.
9//!
10//! The closed-source three-state cross-reference, combined scoring, hot-path
11//! heuristics and verdict generation live in `fallow-cov` (private) and
12//! consume this crate's normalized output via the `fallow-cov-protocol`
13//! envelope.
14
15#![forbid(unsafe_code)]
16
17use serde::{Deserialize, Deserializer, Serialize};
18
19// -- V8 input types ---------------------------------------------------------
20
21/// Top-level shape emitted by Node's `NODE_V8_COVERAGE` directory: one file
22/// per worker / process containing a `result` array of [`ScriptCoverage`].
23#[derive(Debug, Clone, Serialize, Deserialize)]
24pub struct V8CoverageDump {
25    /// Per-script coverage entries.
26    pub result: Vec<ScriptCoverage>,
27    /// Optional source-map cache emitted by Node 13+.
28    #[serde(default, rename = "source-map-cache")]
29    pub source_map_cache: Option<serde_json::Value>,
30}
31
32/// V8's per-script coverage record. Field names mirror the V8 inspector
33/// protocol verbatim.
34#[derive(Debug, Clone, Serialize, Deserialize)]
35pub struct ScriptCoverage {
36    /// V8 script identifier.
37    #[serde(rename = "scriptId")]
38    pub script_id: String,
39    /// File URL — typically `file:///abs/path` for Node, `https://…` for
40    /// browsers. Callers normalize to absolute paths before merging.
41    pub url: String,
42    /// One entry per function (including the implicit module-level function).
43    pub functions: Vec<FunctionCoverage>,
44}
45
46/// V8 per-function coverage. Each function carries one or more
47/// [`CoverageRange`]s — block-level for instrumented coverage, function-level
48/// for `--coverage=best-effort`.
49#[derive(Debug, Clone, Serialize, Deserialize)]
50pub struct FunctionCoverage {
51    /// Source-as-written function name. Empty for the module-level wrapper
52    /// and anonymous functions.
53    #[serde(rename = "functionName")]
54    pub function_name: String,
55    /// Coverage ranges, byte-offsets relative to the script's source text.
56    pub ranges: Vec<CoverageRange>,
57    /// True when V8 emitted block-level data for this function (instrumented
58    /// coverage). False when only the outer function range is reliable
59    /// (best-effort / production coverage).
60    #[serde(rename = "isBlockCoverage", default)]
61    pub is_block_coverage: bool,
62}
63
64/// A single coverage range. `count == 0` means the byte range was never hit.
65#[derive(Debug, Clone, Serialize, Deserialize)]
66pub struct CoverageRange {
67    /// Inclusive byte offset into the script's source.
68    #[serde(rename = "startOffset")]
69    pub start_offset: u32,
70    /// Exclusive byte offset into the script's source.
71    #[serde(rename = "endOffset")]
72    pub end_offset: u32,
73    /// Number of times the range was executed.
74    pub count: u64,
75}
76
77// -- Istanbul output types --------------------------------------------------
78
79/// Subset of the Istanbul `FileCoverage` shape that fallow needs for CRAP
80/// scoring. We do not emit statement / branch maps because fallow only needs
81/// per-function call counts.
82#[derive(Debug, Clone, Serialize, Deserialize)]
83pub struct IstanbulFileCoverage {
84    /// Absolute path of the source file.
85    pub path: String,
86    /// Per-function records keyed by stable index (`f0`, `f1`, …).
87    #[serde(rename = "fnMap")]
88    pub fn_map: std::collections::BTreeMap<String, IstanbulFunction>,
89    /// Per-function hit counts, keyed identically to `fn_map`.
90    pub f: std::collections::BTreeMap<String, u64>,
91}
92
93/// Istanbul function descriptor.
94#[derive(Debug, Clone, Serialize, Deserialize)]
95pub struct IstanbulFunction {
96    /// Source-as-written function name (matches V8's `functionName`).
97    pub name: String,
98    /// Declaration position. Matches Istanbul's `decl`.
99    pub decl: IstanbulRange,
100    /// Full body position. Matches Istanbul's `loc`.
101    pub loc: IstanbulRange,
102    /// 1-indexed line of the function declaration's start.
103    pub line: u32,
104}
105
106/// 1-indexed line/column range matching Istanbul's `Range`.
107#[derive(Debug, Clone, Serialize, Deserialize)]
108pub struct IstanbulRange {
109    /// Inclusive start position.
110    pub start: IstanbulPosition,
111    /// Exclusive end position.
112    pub end: IstanbulPosition,
113}
114
115/// 1-indexed line + 0-indexed column.
116#[derive(Debug, Clone, Serialize, Deserialize)]
117pub struct IstanbulPosition {
118    /// 1-indexed line number.
119    pub line: u32,
120    /// 0-indexed column within the line.
121    ///
122    /// Some real Istanbul producers (including Vitest in certain transforms)
123    /// emit `null` for end columns. We normalize those to `0` at parse time
124    /// so downstream CRAP/prod-coverage consumers can still ingest the file.
125    #[serde(deserialize_with = "deserialize_nullable_u32")]
126    pub column: u32,
127}
128
129fn deserialize_nullable_u32<'de, D>(deserializer: D) -> Result<u32, D::Error>
130where
131    D: Deserializer<'de>,
132{
133    Ok(Option::<u32>::deserialize(deserializer)?.unwrap_or(0))
134}
135
136// -- Byte-offset to line/column mapper -------------------------------------
137
138/// Pre-computed line-start byte-offset table for converting V8 byte offsets
139/// into Istanbul line/column positions in O(log n) per lookup.
140///
141/// The source is consumed once at construction; subsequent lookups are
142/// allocation-free.
143#[derive(Debug)]
144pub struct LineOffsetTable {
145    /// Byte offset of the first character of each line. `line_starts[0]` is
146    /// always `0` (the start of the file).
147    line_starts: Vec<u32>,
148}
149
150impl LineOffsetTable {
151    /// Build a table from the full source text. The source must be UTF-8 with
152    /// LF, CRLF, or CR line endings (mixed endings are tolerated).
153    #[must_use]
154    pub fn from_source(source: &str) -> Self {
155        let mut line_starts = Vec::with_capacity(source.lines().count() + 1);
156        line_starts.push(0);
157        let bytes = source.as_bytes();
158        let mut i = 0;
159        while i < bytes.len() {
160            match bytes[i] {
161                b'\n' => {
162                    line_starts.push((i + 1) as u32);
163                    i += 1;
164                }
165                b'\r' => {
166                    let next_offset = if bytes.get(i + 1) == Some(&b'\n') {
167                        i + 2
168                    } else {
169                        i + 1
170                    };
171                    line_starts.push(next_offset as u32);
172                    i = next_offset;
173                }
174                _ => i += 1,
175            }
176        }
177        Self { line_starts }
178    }
179
180    /// Convert a byte offset to a 1-indexed line + 0-indexed column.
181    ///
182    /// Offsets at or past the end of the source clamp to the last line +
183    /// remaining column.
184    #[must_use]
185    pub fn position(&self, byte_offset: u32) -> IstanbulPosition {
186        // Binary search for the last line_start <= byte_offset.
187        let line_zero_indexed = match self.line_starts.binary_search(&byte_offset) {
188            Ok(exact) => exact,
189            Err(insertion_point) => insertion_point.saturating_sub(1),
190        };
191        let line_start = self.line_starts[line_zero_indexed];
192        IstanbulPosition {
193            line: (line_zero_indexed as u32) + 1,
194            column: byte_offset.saturating_sub(line_start),
195        }
196    }
197}
198
199// -- Normalizer -------------------------------------------------------------
200
201/// Input bundle to [`normalize_script`].
202pub struct ScriptInput<'a> {
203    /// Absolute path to the source file (already resolved from V8's `url`).
204    pub path: &'a str,
205    /// Full source text used to convert byte offsets.
206    pub source: &'a str,
207    /// V8 coverage entry for this script.
208    pub script: &'a ScriptCoverage,
209}
210
211/// Convert one V8 [`ScriptCoverage`] entry into an [`IstanbulFileCoverage`].
212///
213/// Each V8 [`FunctionCoverage`] contributes one Istanbul function entry whose
214/// hit count is taken from the function's first range (the outermost
215/// `[startOffset, endOffset)`). Block-level sub-ranges are deliberately not
216/// flattened into separate functions — that's the closed-source three-state
217/// tracker's job.
218#[must_use]
219pub fn normalize_script(input: &ScriptInput<'_>) -> IstanbulFileCoverage {
220    let table = LineOffsetTable::from_source(input.source);
221    let mut fn_map = std::collections::BTreeMap::new();
222    let mut hits = std::collections::BTreeMap::new();
223    for (idx, function) in input.script.functions.iter().enumerate() {
224        let key = format!("f{idx}");
225        let outer = function.ranges.first().copied().unwrap_or(CoverageRange {
226            start_offset: 0,
227            end_offset: 0,
228            count: 0,
229        });
230        let start_pos = table.position(outer.start_offset);
231        let end_pos = table.position(outer.end_offset);
232        fn_map.insert(
233            key.clone(),
234            IstanbulFunction {
235                name: if function.function_name.is_empty() {
236                    "(anonymous)".to_owned()
237                } else {
238                    function.function_name.clone()
239                },
240                decl: IstanbulRange {
241                    start: start_pos,
242                    end: start_pos,
243                },
244                loc: IstanbulRange {
245                    start: start_pos,
246                    end: end_pos,
247                },
248                line: start_pos.line,
249            },
250        );
251        hits.insert(key, outer.count);
252    }
253    IstanbulFileCoverage {
254        path: input.path.to_owned(),
255        fn_map,
256        f: hits,
257    }
258}
259
260// Manual Copy for IstanbulPosition + CoverageRange to keep normalize_script cheap.
261impl Copy for CoverageRange {}
262impl Copy for IstanbulPosition {}
263
264#[cfg(test)]
265mod tests {
266    use super::*;
267
268    #[test]
269    fn line_table_handles_lf() {
270        let table = LineOffsetTable::from_source("a\nbb\nccc");
271        assert_eq!(table.position(0).line, 1);
272        assert_eq!(table.position(0).column, 0);
273        assert_eq!(table.position(2).line, 2);
274        assert_eq!(table.position(2).column, 0);
275        assert_eq!(table.position(5).line, 3);
276        assert_eq!(table.position(5).column, 0);
277    }
278
279    #[test]
280    fn line_table_handles_crlf() {
281        let table = LineOffsetTable::from_source("a\r\nbb\r\nccc");
282        assert_eq!(table.position(3).line, 2);
283        assert_eq!(table.position(3).column, 0);
284    }
285
286    #[test]
287    fn line_table_handles_lone_cr() {
288        let table = LineOffsetTable::from_source("a\rbb");
289        assert_eq!(table.position(2).line, 2);
290        assert_eq!(table.position(2).column, 0);
291    }
292
293    #[test]
294    fn line_table_clamps_past_end() {
295        let table = LineOffsetTable::from_source("abc");
296        let pos = table.position(100);
297        assert_eq!(pos.line, 1);
298        assert_eq!(pos.column, 100);
299    }
300
301    #[test]
302    fn normalize_round_trips_function_hits() {
303        let source = "function alpha() {}\nfunction beta() {}\n";
304        let script = ScriptCoverage {
305            script_id: "1".into(),
306            url: "file:///t/foo.js".into(),
307            functions: vec![
308                FunctionCoverage {
309                    function_name: "alpha".into(),
310                    ranges: vec![CoverageRange {
311                        start_offset: 0,
312                        end_offset: 19,
313                        count: 7,
314                    }],
315                    is_block_coverage: false,
316                },
317                FunctionCoverage {
318                    function_name: "beta".into(),
319                    ranges: vec![CoverageRange {
320                        start_offset: 20,
321                        end_offset: 39,
322                        count: 0,
323                    }],
324                    is_block_coverage: false,
325                },
326            ],
327        };
328        let normalized = normalize_script(&ScriptInput {
329            path: "/t/foo.js",
330            source,
331            script: &script,
332        });
333        assert_eq!(normalized.f["f0"], 7);
334        assert_eq!(normalized.f["f1"], 0);
335        assert_eq!(normalized.fn_map["f0"].name, "alpha");
336        assert_eq!(normalized.fn_map["f1"].line, 2);
337    }
338
339    #[test]
340    fn anonymous_function_renamed() {
341        let source = "() => {}";
342        let script = ScriptCoverage {
343            script_id: "1".into(),
344            url: "file:///t/anon.js".into(),
345            functions: vec![FunctionCoverage {
346                function_name: String::new(),
347                ranges: vec![CoverageRange {
348                    start_offset: 0,
349                    end_offset: 8,
350                    count: 1,
351                }],
352                is_block_coverage: false,
353            }],
354        };
355        let normalized = normalize_script(&ScriptInput {
356            path: "/t/anon.js",
357            source,
358            script: &script,
359        });
360        assert_eq!(normalized.fn_map["f0"].name, "(anonymous)");
361    }
362
363    #[test]
364    fn parse_node_v8_coverage_dump() {
365        let raw = serde_json::json!({
366            "result": [{
367                "scriptId": "42",
368                "url": "file:///t/x.js",
369                "functions": [{
370                    "functionName": "a",
371                    "ranges": [{"startOffset": 0, "endOffset": 10, "count": 3}],
372                    "isBlockCoverage": false
373                }]
374            }]
375        });
376        let dump: V8CoverageDump = serde_json::from_value(raw).unwrap();
377        assert_eq!(dump.result.len(), 1);
378        assert_eq!(dump.result[0].functions[0].function_name, "a");
379    }
380
381    #[test]
382    fn parse_istanbul_coverage_with_null_columns() {
383        let raw = serde_json::json!({
384            "/t/linkUtils.ts": {
385                "path": "/t/linkUtils.ts",
386                "fnMap": {
387                    "0": {
388                        "name": "normalizeInternalLink",
389                        "decl": {
390                            "start": { "line": 66, "column": 0 },
391                            "end": { "line": 66, "column": null }
392                        },
393                        "loc": {
394                            "start": { "line": 66, "column": 0 },
395                            "end": { "line": 76, "column": null }
396                        },
397                        "line": 66
398                    }
399                },
400                "f": { "0": 9 }
401            }
402        });
403
404        let dump: std::collections::BTreeMap<String, IstanbulFileCoverage> =
405            serde_json::from_value(raw).unwrap();
406        let file = &dump["/t/linkUtils.ts"];
407        assert_eq!(file.fn_map["0"].decl.end.column, 0);
408        assert_eq!(file.fn_map["0"].loc.end.column, 0);
409        assert_eq!(file.f["0"], 9);
410    }
411}