oxc_coverage_instrument 0.3.15

Istanbul-compatible JavaScript/TypeScript coverage instrumentation using the Oxc AST
Documentation
//! Top-level instrumentation API.

use std::path::PathBuf;

use oxc_allocator::Allocator;
use oxc_ast::ast::Program;
use oxc_codegen::{Codegen, CodegenOptions};
use oxc_parser::{Parser, ParserReturn};
use oxc_semantic::{Scoping, SemanticBuilder};
use oxc_span::SourceType;
use oxc_traverse::traverse_mut;

use crate::pragma::PragmaMap;
use crate::transform::{
    CoverageState, CoverageTransform, PreambleInputs, TransformInit, generate_cov_fn_name,
    generate_preamble_source,
};
use crate::types::{CoverageMaps, FileCoverage, UnhandledPragma};

/// Options for the `instrument` function.
#[derive(Debug, Clone)]
pub struct InstrumentOptions {
    /// Name of the global coverage variable (default: `"__coverage__"`).
    pub coverage_variable: String,
    /// Whether to generate a source map for the instrumented output.
    pub source_map: bool,
    /// Input source map JSON string from a prior transformation (e.g., TypeScript → JS).
    /// When provided, this is stored on the `FileCoverage` as `inputSourceMap` so
    /// downstream tools (nyc, istanbul-reports) can chain back to the original source.
    pub input_source_map: Option<String>,
    /// When true, adds truthy-value tracking (`bT`) for logical expression operands.
    /// This enables nyc-style logic coverage that tracks not just which branch was
    /// taken, but whether each operand evaluated to a truthy value.
    pub report_logic: bool,
    /// Class method names to exclude from coverage instrumentation.
    /// Matches Istanbul's `ignoreClassMethods` behavior for class methods and
    /// named function expressions with a matching id.
    pub ignore_class_methods: Vec<String>,
}

impl Default for InstrumentOptions {
    fn default() -> Self {
        Self {
            coverage_variable: "__coverage__".to_string(),
            source_map: false,
            input_source_map: None,
            report_logic: false,
            ignore_class_methods: Vec::new(),
        }
    }
}

/// Result of instrumenting a source file.
#[derive(Debug)]
pub struct InstrumentResult {
    /// The instrumented source code with coverage counters injected.
    pub code: String,
    /// Istanbul-compatible coverage map for this file.
    pub coverage_map: FileCoverage,
    /// Pre-serialized JSON of `coverage_map`. Produced once internally for the
    /// preamble's `coverageData` literal and the hash guard, then exposed here
    /// so language bindings (napi-rs, etc.) and downstream JSON sinks can avoid
    /// a second serialization of the same `BTreeMap` tree.
    pub coverage_map_json: String,
    /// Output source map JSON string (only present if `InstrumentOptions::source_map` is true).
    pub source_map: Option<String>,
    /// Unhandled pragma comments found during instrumentation.
    /// Contains `/* istanbul ignore ... */` and `/* v8 ignore ... */` comments
    /// that were not processed. Callers should decide whether to warn or error.
    pub unhandled_pragmas: Vec<UnhandledPragma>,
}

/// Check whether a string is a valid JavaScript identifier (ASCII subset).
///
/// Returns `true` if the string is non-empty, starts with `[a-zA-Z_$]`,
/// and all remaining characters are `[a-zA-Z0-9_$]`.
fn is_valid_js_identifier(s: &str) -> bool {
    !s.is_empty()
        && s.starts_with(|c: char| c.is_ascii_alphabetic() || c == '_' || c == '$')
        && s.chars().all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '$')
}

fn stable_hex_hash(input: &str) -> String {
    let mut hash: u64 = 0;
    for byte in input.bytes() {
        hash = hash.wrapping_mul(31).wrapping_add(u64::from(byte));
    }
    format!("{hash:x}")
}

/// Instrument a JavaScript/TypeScript source file for coverage collection.
///
/// Parses the source with `oxc_parser`, collects statement/function/branch
/// locations via AST traversal, injects coverage counter expressions into
/// the AST, and emits the instrumented code via `oxc_codegen`.
///
/// # Errors
///
/// Returns an error if the source cannot be parsed.
///
/// # Example
///
/// ```
/// use oxc_coverage_instrument::{instrument, InstrumentOptions};
///
/// let source = "function add(a, b) { return a + b; }";
/// let result = instrument(source, "add.js", &InstrumentOptions::default()).unwrap();
///
/// // coverage_map contains fnMap, statementMap, branchMap
/// assert_eq!(result.coverage_map.fn_map.len(), 1);
/// assert_eq!(result.coverage_map.fn_map["0"].name, "add");
/// ```
pub fn instrument(
    source: &str,
    filename: &str,
    options: &InstrumentOptions,
) -> Result<InstrumentResult, InstrumentError> {
    if !is_valid_js_identifier(&options.coverage_variable) {
        return Err(InstrumentError::InvalidCoverageVariable(options.coverage_variable.clone()));
    }

    let allocator = Allocator::default();
    let mut parsed = parse_program(&allocator, source, filename)?;

    let (pragmas, unhandled_pragmas) = PragmaMap::from_program(&parsed.program, source);
    if pragmas.ignore_file {
        return Ok(empty_coverage_result(filename, source, unhandled_pragmas));
    }

    let scoping = SemanticBuilder::new().build(&parsed.program).semantic.into_scoping();
    let cov_fn_name = generate_cov_fn_name(filename);

    let mut transform = CoverageTransform::new(TransformInit {
        allocator: &allocator,
        source,
        cov_fn_name: &cov_fn_name,
        report_logic: options.report_logic,
        ignore_class_methods: options.ignore_class_methods.clone(),
    });
    let state = CoverageState { pragmas };
    let scoping = traverse_mut(&mut transform, &allocator, &mut parsed.program, scoping, state);

    let coverage_map = build_coverage_map(filename, transform, options.input_source_map.as_deref());

    // Serialize the coverage map once and reuse it for both the hash guard and
    // the preamble's coverageData literal. Istanbul refreshes stale coverage
    // objects when the same path is reinstrumented with a different shape, and
    // the hash is computed over the same JSON we embed in the preamble.
    //
    // `FileCoverage` is composed of `BTreeMap`, `Vec`, `String`, and primitive
    // numbers, all with first-party serde implementations that cannot fail
    // at runtime. The .expect call documents this rather than threading a
    // never-produced error variant through the call chain.
    let coverage_json =
        serde_json::to_string(&coverage_map).expect("FileCoverage serializes to JSON infallibly");
    let coverage_hash = stable_hex_hash(&coverage_json);

    let preamble = generate_preamble_source(&PreambleInputs {
        coverage: &coverage_map,
        coverage_json: &coverage_json,
        coverage_hash: &coverage_hash,
        coverage_var: &options.coverage_variable,
        cov_fn_name: &cov_fn_name,
        report_logic: options.report_logic,
    });

    let (code, raw_source_map) = emit_code(EmitInputs {
        program: &parsed.program,
        scoping,
        source,
        filename,
        preamble: &preamble,
        options,
    });
    let source_map = raw_source_map
        .map(|sm| finalize_source_map(sm, &preamble, options.input_source_map.as_deref()));

    Ok(InstrumentResult {
        code,
        coverage_map,
        coverage_map_json: coverage_json,
        source_map,
        unhandled_pragmas,
    })
}

fn parse_program<'a>(
    allocator: &'a Allocator,
    source: &'a str,
    filename: &str,
) -> Result<ParserReturn<'a>, InstrumentError> {
    let source_type = SourceType::from_path(filename).unwrap_or_default();
    let parsed = Parser::new(allocator, source, source_type).parse();
    if parsed.errors.is_empty() {
        Ok(parsed)
    } else {
        Err(InstrumentError::ParseError(
            parsed.errors.iter().map(|e| format!("{e}")).collect::<Vec<_>>().join("; "),
        ))
    }
}

fn empty_coverage_result(
    filename: &str,
    source: &str,
    unhandled_pragmas: Vec<UnhandledPragma>,
) -> InstrumentResult {
    let coverage_map = FileCoverage::from_maps(CoverageMaps {
        path: filename.to_string(),
        statement_locs: Vec::new(),
        fn_entries: Vec::new(),
        branch_entries: Vec::new(),
        logical_branch_ids: Vec::new(),
    });
    let coverage_map_json =
        serde_json::to_string(&coverage_map).expect("FileCoverage serializes to JSON infallibly");
    InstrumentResult {
        code: source.to_string(),
        coverage_map,
        coverage_map_json,
        source_map: None,
        unhandled_pragmas,
    }
}

fn build_coverage_map(
    filename: &str,
    transform: CoverageTransform<'_, '_>,
    input_source_map: Option<&str>,
) -> FileCoverage {
    let mut coverage_map = FileCoverage::from_maps(CoverageMaps {
        path: filename.to_string(),
        statement_locs: transform.statement_map,
        fn_entries: transform.fn_map,
        branch_entries: transform.branch_map,
        logical_branch_ids: transform.logical_branch_ids,
    });
    if let Some(input_sm) = input_source_map {
        coverage_map.input_source_map = serde_json::from_str(input_sm).ok();
    }
    coverage_map
}

struct EmitInputs<'a, 'arena> {
    program: &'a Program<'arena>,
    scoping: Scoping,
    source: &'a str,
    filename: &'a str,
    preamble: &'a str,
    options: &'a InstrumentOptions,
}

fn emit_code(inputs: EmitInputs<'_, '_>) -> (String, Option<oxc_sourcemap::SourceMap>) {
    let EmitInputs { program, scoping, source, filename, preamble, options } = inputs;
    let codegen_options = CodegenOptions {
        source_map_path: if options.source_map { Some(PathBuf::from(filename)) } else { None },
        ..CodegenOptions::default()
    };
    let codegen_ret = Codegen::new()
        .with_options(codegen_options)
        .with_source_text(source)
        .with_scoping(Some(scoping))
        .build(program);
    let code = format!("{preamble}{}", codegen_ret.code);
    (code, codegen_ret.map)
}

fn finalize_source_map(
    sm: oxc_sourcemap::SourceMap,
    preamble: &str,
    input_source_map: Option<&str>,
) -> String {
    // Offset mappings by preamble line count so generated positions in the combined
    // output map correctly resolve back to the original source.
    let preamble_lines =
        u32::try_from(preamble.chars().filter(|&c| c == '\n').count()).unwrap_or(u32::MAX);
    let offset_sm = if preamble_lines > 0 {
        oxc_sourcemap::ConcatSourceMapBuilder::from_sourcemaps(&[(&sm, preamble_lines)])
            .into_sourcemap()
    } else {
        sm
    };

    // If an input source map was provided, compose it with the output map so the
    // final map chains back to the original source (e.g., TypeScript).
    if let Some(input_sm_json) = input_source_map
        && let Ok(input_sm) = oxc_sourcemap::SourceMap::from_json_string(input_sm_json)
    {
        return compose_source_maps(&offset_sm, &input_sm).to_json_string();
    }

    offset_sm.to_json_string()
}

/// Compose two source maps: for each mapping in `output_sm` (instrumented → intermediate),
/// look up the corresponding position in `input_sm` (intermediate → original) to produce
/// a composed map (instrumented → original).
fn compose_source_maps(
    output_sm: &oxc_sourcemap::SourceMap,
    input_sm: &oxc_sourcemap::SourceMap,
) -> oxc_sourcemap::SourceMap {
    let input_lookup = input_sm.generate_lookup_table();
    let mut builder = oxc_sourcemap::SourceMapBuilder::default();

    // Copy source files and contents from input (the originals)
    for (source, content) in input_sm.get_sources().zip(input_sm.get_source_contents()) {
        let content_str = content.map_or("", |c| c.as_ref());
        builder.add_source_and_content(source, content_str);
    }

    // Copy names from input map
    for name in input_sm.get_names() {
        builder.add_name(name);
    }

    // For each token in the output map, look up in the input map.
    // When the input map has no mapping for a given intermediate position, the
    // composed map cannot reference an original source either — the output map's
    // source/name ids index a different table. Emit a position-only token so the
    // generated position still decodes, but don't claim a wrong original source.
    for token in output_sm.get_tokens() {
        let src_line = token.get_src_line();
        let src_col = token.get_src_col();

        if let Some(original) = input_sm.lookup_token(&input_lookup, src_line, src_col) {
            builder.add_token(
                token.get_dst_line(),
                token.get_dst_col(),
                original.get_src_line(),
                original.get_src_col(),
                original.get_source_id(),
                original.get_name_id(),
            );
        } else {
            builder.add_token(token.get_dst_line(), token.get_dst_col(), 0, 0, None, None);
        }
    }

    builder.into_sourcemap()
}

/// Error type for instrumentation failures.
#[derive(Debug, Clone)]
pub enum InstrumentError {
    /// The source could not be parsed.
    ParseError(String),
    /// The coverage variable name is not a valid JavaScript identifier.
    InvalidCoverageVariable(String),
    /// Coverage data serialization failed. Reserved for future use: the current
    /// `FileCoverage` shape only contains types whose serde implementations are
    /// infallible, so `instrument()` does not currently construct this variant.
    SerializationError(String),
}

impl std::fmt::Display for InstrumentError {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            Self::ParseError(msg) => write!(f, "parse error: {msg}"),
            Self::SerializationError(msg) => write!(f, "serialization error: {msg}"),
            Self::InvalidCoverageVariable(name) => {
                write!(
                    f,
                    "invalid coverage variable: {name:?} is not a valid JavaScript identifier"
                )
            }
        }
    }
}

impl std::error::Error for InstrumentError {}