1#![expect(missing_docs)] use unicode_id_start::{is_id_continue_unicode, is_id_start_unicode};
4
5use oxc_data_structures::assert_unchecked;
6
7pub const EOF: char = '\0';
8
9pub const ZWNJ: char = '\u{200c}';
14
15pub const ZWJ: char = '\u{200d}';
18
19pub const ZWNBSP: char = '\u{feff}';
22
23pub const TAB: char = '\u{9}';
26
27pub const VT: char = '\u{b}';
29
30pub const FF: char = '\u{c}';
32
33pub const SP: char = '\u{20}';
35
36pub const NBSP: char = '\u{a0}';
38
39const NEL: char = '\u{85}';
41
42const OGHAM_SPACE_MARK: char = '\u{1680}';
43
44const EN_QUAD: char = '\u{2000}';
45
46const ZWSP: char = '\u{200b}';
48
49const NNBSP: char = '\u{202f}';
51
52const MMSP: char = '\u{205f}';
54
55const IDEOGRAPHIC_SPACE: char = '\u{3000}';
56
57fn is_unicode_space_separator(c: char) -> bool {
58 c.is_whitespace() && !matches!(c, TAB | LF | VT | FF | CR | NEL | LS | PS)
62}
63
64pub fn is_white_space(c: char) -> bool {
65 matches!(c, TAB | VT | FF | ZWNBSP) || is_unicode_space_separator(c)
66}
67
68#[rustfmt::skip]
70pub fn is_irregular_whitespace(c: char) -> bool {
71 matches!(c,
72 VT | FF | NBSP | ZWNBSP | NEL | OGHAM_SPACE_MARK
73 | EN_QUAD..=ZWSP | NNBSP | MMSP | IDEOGRAPHIC_SPACE
74 )
75}
76
77pub fn is_white_space_single_line(c: char) -> bool {
79 matches!(c, SP | TAB) || is_irregular_whitespace(c)
82}
83
84pub const LF: char = '\u{a}';
88
89pub const CR: char = '\u{d}';
91
92pub const LS: char = '\u{2028}';
94
95pub const PS: char = '\u{2029}';
97
98pub fn is_regular_line_terminator(c: char) -> bool {
99 matches!(c, LF | CR)
100}
101
102pub fn is_irregular_line_terminator(c: char) -> bool {
103 matches!(c, LS | PS)
104}
105
106pub fn is_line_terminator(c: char) -> bool {
107 is_regular_line_terminator(c) || is_irregular_line_terminator(c)
108}
109
110const XX: bool = true;
111const __: bool = false;
112
113#[repr(C, align(64))]
114pub struct Align64<T>(pub(crate) T);
115
116#[rustfmt::skip]
118pub static ASCII_START: Align64<[bool; 128]> = Align64([
119__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, XX, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, __, __, __, __, XX, __, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, __, __, __, __, __, ]);
129
130#[rustfmt::skip]
132pub static ASCII_CONTINUE: Align64<[bool; 128]> = Align64([
133__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, XX, __, __, __, __, __, __, __, __, __, __, __, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, __, __, __, __, __, __, __, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, __, __, __, __, XX, __, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, __, __, __, __, __, ]);
143
144#[inline]
146pub fn is_identifier_start(c: char) -> bool {
147 if c.is_ascii() {
148 return is_identifier_start_ascii(c);
149 }
150 is_identifier_start_unicode(c)
151}
152
153#[inline]
154pub fn is_identifier_start_ascii(c: char) -> bool {
155 ASCII_START.0[c as usize]
156}
157
158#[inline]
159pub fn is_identifier_start_unicode(c: char) -> bool {
160 is_id_start_unicode(c)
161}
162
163#[inline]
166pub fn is_identifier_part(c: char) -> bool {
167 if c.is_ascii() {
168 return is_identifier_part_ascii(c);
169 }
170 is_identifier_part_unicode(c)
171}
172
173#[inline]
174pub fn is_identifier_part_ascii(c: char) -> bool {
175 ASCII_CONTINUE.0[c as usize]
176}
177
178#[inline]
179pub fn is_identifier_part_unicode(c: char) -> bool {
180 is_id_continue_unicode(c) || c == ZWNJ || c == ZWJ
181}
182
183#[expect(clippy::missing_panics_doc)]
185pub fn is_identifier_name(name: &str) -> bool {
186 let bytes = name.as_bytes();
194 let Some(&first_byte) = bytes.first() else { return false };
195
196 let mut chars = if first_byte.is_ascii() {
197 if !is_identifier_start_ascii(first_byte as char) {
199 return false;
200 }
201
202 let mut index = 1;
203 'outer: loop {
204 let bytes_remaining = bytes.len() - index;
206 if bytes_remaining >= 8 {
207 #[expect(clippy::cast_ptr_alignment)]
211 let next8_as_u64 = unsafe {
212 let ptr = bytes.as_ptr().add(index).cast::<u64>();
213 ptr.read_unaligned()
214 };
215 let high_bits = next8_as_u64 & 0x8080_8080_8080_8080;
216 if high_bits != 0 {
217 break;
219 }
220
221 let next8 = next8_as_u64.to_ne_bytes();
222 for b in next8 {
223 unsafe { assert_unchecked!(b.is_ascii()) };
225 if !is_identifier_part_ascii(b as char) {
226 return false;
227 }
228 }
229
230 index += 8;
231 } else if bytes_remaining >= 4 {
232 #[expect(clippy::cast_ptr_alignment)]
236 let next4_as_u32 = unsafe {
237 let ptr = bytes.as_ptr().add(index).cast::<u32>();
238 ptr.read_unaligned()
239 };
240 let high_bits = next4_as_u32 & 0x8080_8080;
241 if high_bits != 0 {
242 break;
244 }
245
246 let next4 = next4_as_u32.to_ne_bytes();
247 for b in next4 {
248 unsafe { assert_unchecked!(b.is_ascii()) };
250 if !is_identifier_part_ascii(b as char) {
251 return false;
252 }
253 }
254
255 index += 4;
256 } else {
257 loop {
258 let Some(&b) = bytes.get(index) else {
259 return true;
261 };
262
263 if b.is_ascii() {
264 if !is_identifier_part_ascii(b as char) {
265 return false;
266 }
267 } else {
268 break 'outer;
270 }
271
272 index += 1;
273 }
274 }
275 }
276
277 name[index..].chars()
279 } else {
280 let mut chars = name.chars();
283 let first_char = chars.next().unwrap();
284 if !is_identifier_start_unicode(first_char) {
285 return false;
286 }
287 chars
289 };
290
291 chars.all(is_identifier_part)
293}
294
295#[test]
296fn is_identifier_name_true() {
297 let cases = [
298 "a",
300 "z",
301 "A",
302 "Z",
303 "_",
304 "$",
305 "µ", "ख", "𐀀", "az",
311 "AZ",
312 "_a",
313 "$Z",
314 "a0",
315 "A9",
316 "_0",
317 "$9",
318 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_$",
319 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_$",
320 "_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789$",
321 "$abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_",
322 "µख𐀀",
324 "AµBखC𐀀D",
326 "µAखB𐀀",
328 ];
329
330 for str in cases {
331 assert!(is_identifier_name(str));
332 }
333}
334
335#[test]
336fn is_identifier_name_false() {
337 let cases = [
338 "",
340 "0",
342 "9",
343 "-",
344 "~",
345 "+",
346 "£", "৸", "𐄬", "0a",
352 "9a",
353 "-a",
354 "+a",
355 "a-Z",
356 "A+z",
357 "a-",
358 "a+",
359 "£৸𐄬",
361 "A£",
363 "A৸",
364 "A𐄬",
365 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_$abc£",
366 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_$abc৸",
367 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_$abc𐄬",
368 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_$abc£abcdefghijklmnopqrstuvwxyz",
369 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_$abc৸abcdefghijklmnopqrstuvwxyz",
370 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_$abc𐄬abcdefghijklmnopqrstuvwxyz",
371 "£A",
373 "৸A",
374 "𐄬A",
375 ];
376
377 for str in cases {
378 assert!(!is_identifier_name(str));
379 }
380}