oxc_mangler 0.119.0

A collection of JavaScript tools written in Rust.
Documentation
use oxc_data_structures::inline_string::InlineString;

#[repr(C, align(64))]
struct Aligned64([u8; 64]);

/// The characters are in frequency order, so that the characters with higher frequency are used first.
///
/// This idea was inspired by nanoid. <https://github.com/ai/nanoid/blob/5.0.9/url-alphabet/index.js>
///
/// This list was generated by the following steps:
/// 1. Generate a source code with replacing all manglable variable names with `$` (assuming `$` is the least used character).
///    You can do this by passing the following `blank` function to the `generate_name` parameter of [crate::Mangler::build_with_semantic_impl].
///    ```rust,ignore
///    fn blank(_: usize) -> InlineString<12> {
///        let mut str = InlineString::new();
///        unsafe { str.push_unchecked(b"$"[0]); }
///        str
///    }
///    ```
/// 2. Run the following command in `target/minifier/default` to check generate the list:
///    ```shell
///    find . -type f -exec cat {} + | `# concat all files in that directory` \
///      tr -d '\n' | fold -w1 | `# separate each characters in to each line` \
///      grep -E '[abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ$_0123456789]' | `# filter the character` \
///      sort | uniq -c | `# count each characters` \
///      sort -nr | awk '{print $2}' | tr -d '\n' `# format output`
///    ```
///    The result I got is `etnriaoscludfpmhg_10vy2436b8x579SCwTEDOkAjMNPFILRzBVHUWGKqJYXZQ`.
/// 3. Add `$` at the end and then move all numbers to the end of the list.
const BASE54_CHARS: Aligned64 =
    Aligned64(*b"etnriaoscludfpmhg_vybxSCwTEDOkAjMNPFILRzBVHUWGKqJYXZQ$1024368579");

/// Get the shortest mangled name for a given n.
/// Code adapted from [terser](https://github.com/terser/terser/blob/8b966d687395ab493d2c6286cc9dd38650324c11/lib/scope.js#L1041-L1051)
//
// Maximum length of string is 6 (`xKrTKr` for `u32::MAX`), but set `CAPACITY` as 7,
// so the total size of `InlineString` is 8, including the `u8` length field.
// Then initializing the `InlineString` is a single instruction, and with luck it'll sit in a register
// throughout this function.
#[expect(clippy::items_after_statements)]
pub fn base54(n: u32) -> InlineString<7, u8> {
    let mut str = InlineString::new();

    let mut num = n as usize;

    // Base 54 at first because these are the usable first characters in JavaScript identifiers
    // <https://tc39.es/ecma262/#prod-IdentifierStart>
    const FIRST_BASE: usize = 54;
    let byte = BASE54_CHARS.0[num % FIRST_BASE];
    // SAFETY: All `BASE54_CHARS` are ASCII. This is first byte we push, so can't be out of bounds.
    unsafe { str.push_unchecked(byte) };
    num /= FIRST_BASE;

    // Base 64 for the rest because after the first character we can also use 0-9 too
    // <https://tc39.es/ecma262/#prod-IdentifierPart>
    const REST_BASE: usize = 64;
    while num > 0 {
        num -= 1;
        let byte = BASE54_CHARS.0[num % REST_BASE];
        // SAFETY: All `BASE54_CHARS` are ASCII.
        // String for `u64::MAX` is `ZrN6rN6rN6r` (11 bytes), so cannot push more than `CAPACITY` (12).
        unsafe { str.push_unchecked(byte) };
        num /= REST_BASE;
    }

    str
}

#[cfg(test)]
mod test {
    use super::base54;

    #[test]
    fn test_base54() {
        assert_eq!(&*base54(0), "e");
        assert_eq!(&*base54(52), "Q");
        assert_eq!(&*base54(53), "$");
        assert_eq!(&*base54(54), "ee");
        assert_eq!(&*base54(55), "te");
        assert_eq!(&*base54(u32::MAX), "xKrTKr");
    }
}