srcmap-sourcemap 0.3.8

High-performance source map parser and consumer
Documentation
use crate::{GeneratedLocation, LazySourceMap, OriginalLocation, SourceMap};

const NEWLINE_LF: u8 = b'\n';
const NEWLINE_CR: u8 = b'\r';

/// Source map lookup shape used by [`GeneratedOffsetLookup`].
///
/// Implemented for [`SourceMap`] and [`LazySourceMap`], so runtime coverage
/// callers can normalize generated byte offsets once and use either eager or
/// lazy source-map decoding.
pub trait OriginalPositionLookup {
    /// Look up an original source location from a generated position.
    ///
    /// Lines and columns are 0-based. Columns are UTF-16 code units, matching
    /// JavaScript source-map semantics.
    fn original_position_for(&self, line: u32, column: u32) -> Option<OriginalLocation>;
}

impl OriginalPositionLookup for SourceMap {
    fn original_position_for(&self, line: u32, column: u32) -> Option<OriginalLocation> {
        SourceMap::original_position_for(self, line, column)
    }
}

impl OriginalPositionLookup for LazySourceMap {
    fn original_position_for(&self, line: u32, column: u32) -> Option<OriginalLocation> {
        LazySourceMap::original_position_for(self, line, column)
    }
}

/// Converts generated UTF-8 byte offsets into source-map generated positions.
///
/// V8 coverage ranges are often reported as byte offsets in generated code,
/// while source maps use generated line and UTF-16 column pairs. This helper
/// builds a reusable line index for one generated asset and converts offsets
/// before forwarding them to a source map lookup.
#[derive(Debug, Clone)]
pub struct GeneratedOffsetLookup<'a> {
    generated_source: &'a str,
    line_start_bytes: Vec<usize>,
    line_end_bytes: Vec<usize>,
    line_ascii_only: Vec<bool>,
}

impl<'a> GeneratedOffsetLookup<'a> {
    /// Create a lookup for one generated source asset.
    pub fn new(generated_source: &'a str) -> Self {
        let bytes = generated_source.as_bytes();
        let mut line_start_bytes = vec![0];
        let mut line_end_bytes = Vec::new();
        let mut line_ascii_only = Vec::new();
        let mut current_line_ascii_only = true;
        let mut offset = 0;

        while offset < bytes.len() {
            match bytes[offset] {
                NEWLINE_LF => {
                    line_end_bytes.push(offset);
                    line_ascii_only.push(current_line_ascii_only);
                    offset += 1;
                    line_start_bytes.push(offset);
                    current_line_ascii_only = true;
                }
                NEWLINE_CR => {
                    line_end_bytes.push(offset);
                    line_ascii_only.push(current_line_ascii_only);
                    offset += 1;
                    if offset < bytes.len() && bytes[offset] == NEWLINE_LF {
                        offset += 1;
                    }
                    line_start_bytes.push(offset);
                    current_line_ascii_only = true;
                }
                byte if byte.is_ascii() => {
                    offset += 1;
                }
                _ => {
                    current_line_ascii_only = false;
                    let ch = generated_source[offset..]
                        .chars()
                        .next()
                        .expect("offset is always on a UTF-8 character boundary");
                    offset += ch.len_utf8();
                }
            }
        }

        line_end_bytes.push(bytes.len());
        line_ascii_only.push(current_line_ascii_only);

        Self { generated_source, line_start_bytes, line_end_bytes, line_ascii_only }
    }

    /// Number of generated lines tracked by this lookup.
    #[inline]
    pub fn line_count(&self) -> usize {
        self.line_start_bytes.len()
    }

    /// Total byte length of the generated source.
    #[inline]
    pub fn total_bytes(&self) -> usize {
        self.generated_source.len()
    }

    /// Convert a generated UTF-8 byte offset to a 0-based line and UTF-16 column.
    ///
    /// Returns `None` when the offset is out of bounds, does not fit in `u32`,
    /// or points into the middle of a UTF-8 code point. Offsets that point at a
    /// line terminator resolve to the end column of that line. Offsets after a
    /// line terminator resolve to column 0 of the next line.
    pub fn byte_offset_to_position(&self, byte_offset: u32) -> Option<GeneratedLocation> {
        let offset = byte_offset as usize;
        if offset > self.generated_source.len() || !self.generated_source.is_char_boundary(offset) {
            return None;
        }

        let line = self
            .line_start_bytes
            .partition_point(|line_start| *line_start <= offset)
            .saturating_sub(1);
        let line_start = self.line_start_bytes[line];
        let line_end = self.line_end_bytes[line];
        let column = if offset <= line_end {
            self.byte_offset_to_utf16_column(line_start, offset, line)?
        } else {
            self.byte_offset_to_utf16_column(line_start, line_end, line)?
        };

        Some(GeneratedLocation { line: u32::try_from(line).ok()?, column })
    }

    /// Look up the original source position for a generated byte offset.
    pub fn original_position_for_offset<M>(
        &self,
        source_map: &M,
        byte_offset: u32,
    ) -> Option<OriginalLocation>
    where
        M: OriginalPositionLookup + ?Sized,
    {
        let position = self.byte_offset_to_position(byte_offset)?;
        source_map.original_position_for(position.line, position.column)
    }

    /// Convert multiple generated byte offsets to generated positions.
    ///
    /// Returns `None` if any offset is invalid.
    pub fn byte_offsets_to_positions(
        &self,
        byte_offsets: &[u32],
    ) -> Option<Vec<GeneratedLocation>> {
        byte_offsets.iter().map(|byte_offset| self.byte_offset_to_position(*byte_offset)).collect()
    }

    /// Look up original source positions for multiple generated byte offsets.
    pub fn original_positions_for_offsets<M>(
        &self,
        source_map: &M,
        byte_offsets: &[u32],
    ) -> Vec<Option<OriginalLocation>>
    where
        M: OriginalPositionLookup + ?Sized,
    {
        byte_offsets
            .iter()
            .map(|byte_offset| self.original_position_for_offset(source_map, *byte_offset))
            .collect()
    }

    fn byte_offset_to_utf16_column(
        &self,
        line_start: usize,
        offset: usize,
        line: usize,
    ) -> Option<u32> {
        let byte_column = offset.checked_sub(line_start)?;
        if self.line_ascii_only[line] {
            return u32::try_from(byte_column).ok();
        }

        let line_source = self.generated_source.get(line_start..offset)?;
        line_source.chars().try_fold(0u32, |column, ch| column.checked_add(ch.len_utf16() as u32))
    }
}

#[cfg(test)]
mod tests {
    use super::GeneratedOffsetLookup;
    use crate::{LazySourceMap, SourceMap};

    const TWO_LINE_MAP: &str = r#"{
        "version": 3,
        "sources": ["input.ts"],
        "names": ["lineOne", "lineTwo"],
        "mappings": "AAAAA;AACAC"
    }"#;

    #[test]
    fn generated_offset_lookup_maps_ascii_offsets() {
        let lookup = GeneratedOffsetLookup::new("const value = 1;\nvalue;\n");

        assert_eq!(lookup.line_count(), 3);
        assert_eq!(lookup.total_bytes(), 24);

        let start = lookup.byte_offset_to_position(0).unwrap();
        assert_eq!(start.line, 0);
        assert_eq!(start.column, 0);

        let second_line =
            lookup.byte_offset_to_position("const value = 1;\n".len() as u32).unwrap();
        assert_eq!(second_line.line, 1);
        assert_eq!(second_line.column, 0);
    }

    #[test]
    fn generated_offset_lookup_converts_utf8_offsets_to_utf16_columns() {
        let source = "const letter = 'é';\nconst emoji = '😀';\n";
        let lookup = GeneratedOffsetLookup::new(source);

        let before_accent = "const letter = '".len() as u32;
        let after_accent = "const letter = 'é".len() as u32;
        let before_emoji = "const letter = 'é';\nconst emoji = '".len() as u32;
        let after_emoji = "const letter = 'é';\nconst emoji = '😀".len() as u32;

        let before_accent_pos = lookup.byte_offset_to_position(before_accent).unwrap();
        let after_accent_pos = lookup.byte_offset_to_position(after_accent).unwrap();
        let before_emoji_pos = lookup.byte_offset_to_position(before_emoji).unwrap();
        let after_emoji_pos = lookup.byte_offset_to_position(after_emoji).unwrap();

        assert_eq!(before_accent_pos.line, 0);
        assert_eq!(before_accent_pos.column, 16);
        assert_eq!(after_accent_pos.line, 0);
        assert_eq!(after_accent_pos.column, 17);
        assert_eq!(before_emoji_pos.line, 1);
        assert_eq!(before_emoji_pos.column, 15);
        assert_eq!(after_emoji_pos.line, 1);
        assert_eq!(after_emoji_pos.column, 17);
    }

    #[test]
    fn generated_offset_lookup_handles_crlf_and_line_boundaries() {
        let source = "alpha();\r\nbeta();\r\ngamma();";
        let lookup = GeneratedOffsetLookup::new(source);
        let first_cr = "alpha();".len() as u32;
        let first_lf = first_cr + 1;
        let second_line = "alpha();\r\n".len() as u32;

        let at_cr = lookup.byte_offset_to_position(first_cr).unwrap();
        let at_lf = lookup.byte_offset_to_position(first_lf).unwrap();
        let after_crlf = lookup.byte_offset_to_position(second_line).unwrap();

        assert_eq!(at_cr.line, 0);
        assert_eq!(at_cr.column, 8);
        assert_eq!(at_lf.line, 0);
        assert_eq!(at_lf.column, 8);
        assert_eq!(after_crlf.line, 1);
        assert_eq!(after_crlf.column, 0);
    }

    #[test]
    fn generated_offset_lookup_rejects_invalid_offsets() {
        let source = "é";
        let lookup = GeneratedOffsetLookup::new(source);

        assert!(lookup.byte_offset_to_position(1).is_none());
        assert!(lookup.byte_offset_to_position(3).is_none());
    }

    #[test]
    fn generated_offset_lookup_resolves_original_positions_for_eager_and_lazy_maps() {
        let generated = "alpha();\nbeta();\n";
        let offset = "alpha();\n".len() as u32;
        let lookup = GeneratedOffsetLookup::new(generated);
        let eager = SourceMap::from_json(TWO_LINE_MAP).unwrap();
        let lazy = LazySourceMap::from_json(TWO_LINE_MAP).unwrap();

        let eager_loc = lookup.original_position_for_offset(&eager, offset).unwrap();
        let lazy_loc = lookup.original_position_for_offset(&lazy, offset).unwrap();

        assert_eq!(eager.source(eager_loc.source), "input.ts");
        assert_eq!(lazy.source(lazy_loc.source), "input.ts");
        assert_eq!(eager_loc.line, 1);
        assert_eq!(eager_loc.column, 0);
        assert_eq!(eager.name(eager_loc.name.unwrap()), "lineTwo");
        assert_eq!(lazy_loc.line, eager_loc.line);
        assert_eq!(lazy_loc.column, eager_loc.column);
        assert_eq!(lazy_loc.name, eager_loc.name);
    }
}