oxc_sourcemap 8.0.0

Basic sourcemap handling for Rust
Documentation
use std::borrow::Cow;

use rustc_hash::FxHashMap;

use crate::{
    SourceMap,
    token::{Token, TokenChunk},
};

/// Helper to build a [`SourceMap`].
///
/// The builder **borrows** the names, sources and source contents you add for its lifetime `'a`,
/// so building allocates essentially nothing beyond the tokens vector (the dedup maps key by
/// `&'a str`, not owned copies, and the map stores `Cow::Borrowed`).
///
/// The ownership decision is deferred to the end:
/// * [`into_sourcemap`](Self::into_sourcemap) returns a borrowed [`SourceMap<'a>`] — zero copy.
/// * [`into_owned_sourcemap`](Self::into_owned_sourcemap) copies the strings once into a
///   `'static` [`crate::OwnedSourceMap`].
#[derive(Debug, Default)]
pub struct SourceMapBuilder<'a> {
    pub(crate) file: Option<Cow<'a, str>>,
    pub(crate) names_map: FxHashMap<&'a str, u32>,
    pub(crate) names: Vec<Cow<'a, str>>,
    pub(crate) sources: Vec<Cow<'a, str>>,
    pub(crate) sources_map: FxHashMap<&'a str, u32>,
    pub(crate) source_contents: Vec<Option<Cow<'a, str>>>,
    pub(crate) tokens: Vec<Token>,
    pub(crate) token_chunks: Option<Vec<TokenChunk>>,
}

impl<'a> SourceMapBuilder<'a> {
    /// Add a name, deduplicating. The name is borrowed for `'a` (no allocation).
    pub fn add_name(&mut self, name: &'a str) -> u32 {
        if let Some(&id) = self.names_map.get(name) {
            return id;
        }
        let count = self.names.len() as u32;
        self.names_map.insert(name, count);
        self.names.push(Cow::Borrowed(name));
        count
    }

    /// Add a source and its content, deduplicating on the source path.
    /// Both are borrowed for `'a` (no allocation). Use this if `source` may be a duplicate.
    pub fn add_source_and_content(&mut self, source: &'a str, source_content: &'a str) -> u32 {
        if let Some(&id) = self.sources_map.get(source) {
            return id;
        }
        let count = self.sources.len() as u32;
        self.sources_map.insert(source, count);
        self.sources.push(Cow::Borrowed(source));
        self.source_contents.push(Some(Cow::Borrowed(source_content)));
        count
    }

    /// Add a source and its content without deduplicating (skips the hash lookup when sources
    /// are unique).
    ///
    /// The `source` name is a `Cow` so callers can pass an owned, constructed path (e.g. from
    /// [`std::path::Path::to_string_lossy`]) without a separate borrow, while the (large)
    /// `source_content` is borrowed for `'a` — the content is never copied.
    pub fn set_source_and_content(&mut self, source: Cow<'a, str>, source_content: &'a str) -> u32 {
        let count = self.sources.len() as u32;
        self.sources.push(source);
        self.source_contents.push(Some(Cow::Borrowed(source_content)));
        count
    }

    /// Add item to `SourceMap::tokens`.
    pub fn add_token(
        &mut self,
        dst_line: u32,
        dst_col: u32,
        src_line: u32,
        src_col: u32,
        src_id: Option<u32>,
        name_id: Option<u32>,
    ) {
        self.tokens.push(Token::new(dst_line, dst_col, src_line, src_col, src_id, name_id));
    }

    /// Set the generated file name. Borrowed for `'a` (no allocation).
    pub fn set_file(&mut self, file: &'a str) {
        self.file = Some(Cow::Borrowed(file));
    }

    /// Set the `SourceMap::token_chunks` to make the sourcemap to vlq mapping at parallel.
    pub fn set_token_chunks(&mut self, token_chunks: Vec<TokenChunk>) {
        self.token_chunks = Some(token_chunks);
    }

    /// Finish, borrowing the names/sources/contents for `'a` (zero copy).
    pub fn into_sourcemap(mut self) -> SourceMap<'a> {
        // Trade performance for memory.
        // The tokens array take enormously large amount of data,
        // which is not ideal for large applications.
        self.names.shrink_to_fit();
        self.sources.shrink_to_fit();
        // For checker.ts, capacity for `tokens` before and after are 262144 and 171174 respectively.
        self.tokens.shrink_to_fit();
        if let Some(c) = self.token_chunks.as_mut() {
            c.shrink_to_fit()
        }
        SourceMap::new(
            self.file,
            self.names,
            None,
            self.sources,
            self.source_contents,
            self.tokens.into_boxed_slice(),
            self.token_chunks,
        )
    }

    /// Same as [`Self::into_sourcemap`], but copies the strings once into an owned
    /// [`crate::OwnedSourceMap`] so callers can store the result without spelling out `'static`.
    #[inline]
    pub fn into_owned_sourcemap(self) -> crate::OwnedSourceMap {
        crate::OwnedSourceMap::new(self.into_sourcemap().into_owned())
    }
}

#[test]
fn test_sourcemap_builder() {
    let mut builder = SourceMapBuilder::default();
    builder.set_source_and_content("baz.js".into(), "");
    builder.add_name("x");
    builder.set_file("file");

    let sm = builder.into_sourcemap();
    assert_eq!(sm.get_source(0), Some("baz.js"));
    assert_eq!(sm.get_name(0), Some("x"));
    assert_eq!(sm.get_file(), Some("file"));

    let expected = r#"{"version":3,"file":"file","names":["x"],"sources":["baz.js"],"sourcesContent":[""],"mappings":""}"#;
    assert_eq!(expected, sm.to_json_string());
}

#[test]
fn test_sourcemap_builder_dedup() {
    let mut builder = SourceMapBuilder::default();
    let id_a = builder.add_name("foo");
    let id_b = builder.add_name("bar");
    let id_a_again = builder.add_name("foo");
    let id_b_again = builder.add_name("bar");
    assert_eq!(id_a, id_a_again);
    assert_eq!(id_b, id_b_again);
    assert_ne!(id_a, id_b);

    let src_a = builder.add_source_and_content("a.js", "content a");
    let src_b = builder.add_source_and_content("b.js", "content b");
    let src_a_again = builder.add_source_and_content("a.js", "different content (ignored)");
    assert_eq!(src_a, src_a_again);
    assert_ne!(src_a, src_b);

    let sm = builder.into_sourcemap();
    assert_eq!(sm.get_names().collect::<Vec<_>>(), vec!["foo", "bar"]);
    assert_eq!(sm.get_sources().collect::<Vec<_>>(), vec!["a.js", "b.js"]);
    // Source content for the first add wins; the second add returns the
    // existing id without overwriting.
    assert_eq!(sm.get_source_content(src_a), Some("content a"));
}