#![expect(missing_docs)]
use unicode_id_start::{is_id_continue_unicode, is_id_start_unicode};
use oxc_data_structures::assert_unchecked;
pub const EOF: char = '\0';
pub const ZWNJ: char = '\u{200c}';
pub const ZWJ: char = '\u{200d}';
pub const ZWNBSP: char = '\u{feff}';
pub const TAB: char = '\u{9}';
pub const VT: char = '\u{b}';
pub const FF: char = '\u{c}';
pub const SP: char = '\u{20}';
pub const NBSP: char = '\u{a0}';
const NEL: char = '\u{85}';
const OGHAM_SPACE_MARK: char = '\u{1680}';
const EN_QUAD: char = '\u{2000}';
const ZWSP: char = '\u{200b}';
const NNBSP: char = '\u{202f}';
const MMSP: char = '\u{205f}';
const IDEOGRAPHIC_SPACE: char = '\u{3000}';
fn is_unicode_space_separator(c: char) -> bool {
c.is_whitespace() && !matches!(c, TAB | LF | VT | FF | CR | NEL | LS | PS)
}
pub fn is_white_space(c: char) -> bool {
matches!(c, TAB | VT | FF | ZWNBSP) || is_unicode_space_separator(c)
}
#[rustfmt::skip]
pub fn is_irregular_whitespace(c: char) -> bool {
matches!(c,
VT | FF | NBSP | ZWNBSP | NEL | OGHAM_SPACE_MARK
| EN_QUAD..=ZWSP | NNBSP | MMSP | IDEOGRAPHIC_SPACE
)
}
pub fn is_white_space_single_line(c: char) -> bool {
matches!(c, SP | TAB) || is_irregular_whitespace(c)
}
pub const LF: char = '\u{a}';
pub const CR: char = '\u{d}';
pub const LS: char = '\u{2028}';
pub const PS: char = '\u{2029}';
pub fn is_regular_line_terminator(c: char) -> bool {
matches!(c, LF | CR)
}
pub fn is_irregular_line_terminator(c: char) -> bool {
matches!(c, LS | PS)
}
pub fn is_line_terminator(c: char) -> bool {
is_regular_line_terminator(c) || is_irregular_line_terminator(c)
}
const XX: bool = true;
const __: bool = false;
#[repr(C, align(64))]
pub struct Align64<T>(pub(crate) T);
#[rustfmt::skip]
pub static ASCII_START: Align64<[bool; 128]> = Align64([
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, XX, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, __, __, __, __, XX, __, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, __, __, __, __, __, ]);
#[rustfmt::skip]
pub static ASCII_CONTINUE: Align64<[bool; 128]> = Align64([
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, XX, __, __, __, __, __, __, __, __, __, __, __, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, __, __, __, __, __, __, __, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, __, __, __, __, XX, __, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, __, __, __, __, __, ]);
#[inline]
pub fn is_identifier_start(c: char) -> bool {
if c.is_ascii() {
return is_identifier_start_ascii(c);
}
is_identifier_start_unicode(c)
}
#[inline]
pub fn is_identifier_start_ascii(c: char) -> bool {
ASCII_START.0[c as usize]
}
#[inline]
pub fn is_identifier_start_unicode(c: char) -> bool {
is_id_start_unicode(c)
}
#[inline]
pub fn is_identifier_part(c: char) -> bool {
if c.is_ascii() {
return is_identifier_part_ascii(c);
}
is_identifier_part_unicode(c)
}
#[inline]
pub fn is_identifier_part_ascii(c: char) -> bool {
ASCII_CONTINUE.0[c as usize]
}
#[inline]
pub fn is_identifier_part_unicode(c: char) -> bool {
is_id_continue_unicode(c) || c == ZWNJ || c == ZWJ
}
#[expect(clippy::missing_panics_doc)]
pub fn is_identifier_name(name: &str) -> bool {
let bytes = name.as_bytes();
let Some(&first_byte) = bytes.first() else { return false };
let mut chars = if first_byte.is_ascii() {
if !is_identifier_start_ascii(first_byte as char) {
return false;
}
let mut index = 1;
'outer: loop {
let bytes_remaining = bytes.len() - index;
if bytes_remaining >= 8 {
#[expect(clippy::cast_ptr_alignment)]
let next8_as_u64 = unsafe {
let ptr = bytes.as_ptr().add(index).cast::<u64>();
ptr.read_unaligned()
};
let high_bits = next8_as_u64 & 0x8080_8080_8080_8080;
if high_bits != 0 {
break;
}
let next8 = next8_as_u64.to_ne_bytes();
for b in next8 {
unsafe { assert_unchecked!(b.is_ascii()) };
if !is_identifier_part_ascii(b as char) {
return false;
}
}
index += 8;
} else if bytes_remaining >= 4 {
#[expect(clippy::cast_ptr_alignment)]
let next4_as_u32 = unsafe {
let ptr = bytes.as_ptr().add(index).cast::<u32>();
ptr.read_unaligned()
};
let high_bits = next4_as_u32 & 0x8080_8080;
if high_bits != 0 {
break;
}
let next4 = next4_as_u32.to_ne_bytes();
for b in next4 {
unsafe { assert_unchecked!(b.is_ascii()) };
if !is_identifier_part_ascii(b as char) {
return false;
}
}
index += 4;
} else {
loop {
let Some(&b) = bytes.get(index) else {
return true;
};
if b.is_ascii() {
if !is_identifier_part_ascii(b as char) {
return false;
}
} else {
break 'outer;
}
index += 1;
}
}
}
name[index..].chars()
} else {
let mut chars = name.chars();
let first_char = chars.next().unwrap();
if !is_identifier_start_unicode(first_char) {
return false;
}
chars
};
chars.all(is_identifier_part)
}
#[test]
fn is_identifier_name_true() {
let cases = [
"a",
"z",
"A",
"Z",
"_",
"$",
"µ", "ख", "𐀀", "az",
"AZ",
"_a",
"$Z",
"a0",
"A9",
"_0",
"$9",
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_$",
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_$",
"_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789$",
"$abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_",
"µख𐀀",
"AµBखC𐀀D",
"µAखB𐀀",
];
for str in cases {
assert!(is_identifier_name(str));
}
}
#[test]
fn is_identifier_name_false() {
let cases = [
"",
"0",
"9",
"-",
"~",
"+",
"£", "৸", "𐄬", "0a",
"9a",
"-a",
"+a",
"a-Z",
"A+z",
"a-",
"a+",
"£৸𐄬",
"A£",
"A৸",
"A𐄬",
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_$abc£",
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_$abc৸",
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_$abc𐄬",
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_$abc£abcdefghijklmnopqrstuvwxyz",
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_$abc৸abcdefghijklmnopqrstuvwxyz",
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_$abc𐄬abcdefghijklmnopqrstuvwxyz",
"£A",
"৸A",
"𐄬A",
];
for str in cases {
assert!(!is_identifier_name(str));
}
}