type ValidityFlags = u8;
const FLAG_NAME_START: ValidityFlags = 1 << 0;
const FLAG_NAME_CONTINUE: ValidityFlags = 1 << 1;
const FLAG_NCNAME_START: ValidityFlags = 1 << 2;
const FLAG_NCNAME_CONTINUE: ValidityFlags = 1 << 3;
const NAME_CHAR_VALIDITY: [ValidityFlags; 128] = [
0b0000, 0b0000, 0b0000, 0b0000, 0b0000, 0b0000, 0b0000, 0b0000, 0b0000, 0b0000, 0b0000, 0b0000, 0b0000, 0b0000, 0b0000, 0b0000, 0b0000, 0b0000, 0b0000, 0b0000, 0b0000, 0b0000, 0b0000, 0b0000, 0b0000, 0b0000, 0b0000, 0b0000, 0b0000, 0b0000, 0b0000, 0b0000, 0b0000, 0b0000, 0b0000, 0b0000, 0b0000, 0b0000, 0b0000, 0b0000, 0b0000, 0b0000, 0b0000, 0b0000, 0b0000, 0b1010, 0b1010, 0b0000, 0b1010, 0b1010, 0b1010, 0b1010, 0b1010, 0b1010, 0b1010, 0b1010, 0b1010, 0b1010, 0b0011, 0b0000, 0b0000, 0b0000, 0b0000, 0b0000, 0b0000, 0b1111, 0b1111, 0b1111, 0b1111, 0b1111, 0b1111, 0b1111, 0b1111, 0b1111, 0b1111, 0b1111, 0b1111, 0b1111, 0b1111, 0b1111, 0b1111, 0b1111, 0b1111, 0b1111, 0b1111, 0b1111, 0b1111, 0b1111, 0b1111, 0b1111, 0b1111, 0b0000, 0b0000, 0b0000, 0b0000, 0b1111, 0b0000, 0b1111, 0b1111, 0b1111, 0b1111, 0b1111, 0b1111, 0b1111, 0b1111, 0b1111, 0b1111, 0b1111, 0b1111, 0b1111, 0b1111, 0b1111, 0b1111, 0b1111, 0b1111, 0b1111, 0b1111, 0b1111, 0b1111, 0b1111, 0b1111, 0b1111, 0b1111, 0b0000, 0b0000, 0b0000, 0b0000, 0b0000, ];
const ASCII_INDEX_MASK: usize = 0x7f;
#[must_use]
fn is_name_start_ascii(c: char) -> bool {
NAME_CHAR_VALIDITY[(c as usize) & ASCII_INDEX_MASK] & FLAG_NAME_START != 0
}
#[must_use]
fn is_name_continue_ascii(c: char) -> bool {
NAME_CHAR_VALIDITY[(c as usize) & ASCII_INDEX_MASK] & FLAG_NAME_CONTINUE != 0
}
#[must_use]
fn is_ncname_start_ascii(c: char) -> bool {
NAME_CHAR_VALIDITY[(c as usize) & ASCII_INDEX_MASK] & FLAG_NCNAME_START != 0
}
#[must_use]
fn is_ncname_continue_ascii(c: char) -> bool {
NAME_CHAR_VALIDITY[(c as usize) & ASCII_INDEX_MASK] & FLAG_NCNAME_CONTINUE != 0
}
fn is_name_start_nonascii(c: char) -> bool {
matches!(c,
'\u{C0}'..='\u{D6}'
| '\u{D8}'..='\u{F6}'
| '\u{F8}'..='\u{2FF}'
| '\u{370}'..='\u{37D}'
| '\u{37F}'..='\u{1FFF}'
| '\u{200C}'..='\u{200D}'
| '\u{2070}'..='\u{218F}'
| '\u{2C00}'..='\u{2FEF}'
| '\u{3001}'..='\u{D7FF}'
| '\u{F900}'..='\u{FDCF}'
| '\u{FDF0}'..='\u{FFFD}'
| '\u{10000}'..='\u{EFFFF}')
}
fn is_name_continue_nonascii(c: char) -> bool {
matches!(c,
'\u{B7}'
| '\u{C0}'..='\u{D6}'
| '\u{D8}'..='\u{F6}'
| '\u{F8}'..='\u{37D}'
| '\u{37F}'..='\u{1FFF}'
| '\u{200C}'..='\u{200D}'
| '\u{203F}'..='\u{2040}'
| '\u{2070}'..='\u{218F}'
| '\u{2C00}'..='\u{2FEF}'
| '\u{3001}'..='\u{D7FF}'
| '\u{F900}'..='\u{FDCF}'
| '\u{FDF0}'..='\u{FFFD}'
| '\u{10000}'..='\u{EFFFF}')
}
pub(super) fn is_name_start(c: char) -> bool {
if c.is_ascii() {
is_name_start_ascii(c)
} else {
is_name_start_nonascii(c)
}
}
#[must_use]
pub(super) fn is_name_continue(c: char) -> bool {
if c.is_ascii() {
is_name_continue_ascii(c)
} else {
is_name_continue_nonascii(c)
}
}
#[must_use]
pub(super) fn is_ncname_start(c: char) -> bool {
if c.is_ascii() {
is_ncname_start_ascii(c)
} else {
is_name_start_nonascii(c)
}
}
#[must_use]
pub(super) fn is_ncname_continue(c: char) -> bool {
if c.is_ascii() {
is_ncname_continue_ascii(c)
} else {
is_name_continue_nonascii(c)
}
}
#[cfg(test)]
mod tests {
use super::*;
fn is_name_start_ascii_spec(c: char) -> bool {
matches!(c, ':' | 'A'..='Z' | '_' | 'a'..='z')
}
fn is_name_continue_ascii_spec(c: char) -> bool {
matches!(c, ':' | 'A'..='Z' | '_' | 'a'..='z' | '-' | '.' | '0'..='9')
}
#[test]
fn name_start_ascii() {
for i in 0_u8..127 {
let c = i as char;
assert_eq!(
is_name_start_ascii(c),
is_name_start_ascii_spec(c),
"test failed for {:?}",
c
);
}
}
#[test]
fn name_continue_ascii() {
for i in 0_u8..127 {
let c = i as char;
assert_eq!(
is_name_continue_ascii(c),
is_name_continue_ascii_spec(c),
"test failed for {:?}",
c
);
}
}
#[test]
fn ncname_start_ascii() {
for i in 0_u8..127 {
let c = i as char;
assert_eq!(
is_ncname_start_ascii(c),
is_name_start_ascii_spec(c) && (c != ':'),
"test failed for {:?}",
c
);
}
}
#[test]
fn ncname_continue_ascii() {
for i in 0_u8..127 {
let c = i as char;
assert_eq!(
is_ncname_continue_ascii(c),
is_name_continue_ascii_spec(c) && (c != ':'),
"test failed for {:?}",
c
);
}
}
}