use once_cell::sync::Lazy;
use std::collections::HashSet;
#[allow(dead_code)]
#[derive(Clone, Copy, Debug, Ord, PartialOrd, Eq, PartialEq, Hash)]
pub enum EcmaVersion {
ES5,
ES2015,
ES2016,
ES2017,
ES2018,
ES2019,
ES2020,
ES2021,
}
struct PatternVersions {
es2018: HashSet<&'static str>,
es2019: HashSet<&'static str>,
es2020: HashSet<&'static str>,
}
static GC_NAME_PATTERN: Lazy<HashSet<&'static str>> =
Lazy::new(|| ["General_Category", "gc"].iter().copied().collect());
static SC_NAME_PATTERN: Lazy<HashSet<&'static str>> = Lazy::new(|| {
["Script", "Script_Extensions", "sc", "scx"]
.iter()
.copied()
.collect()
});
static GC_VALUE_PATTERNS: Lazy<PatternVersions> = Lazy::new(|| PatternVersions {
es2018: [
"C",
"Cased_Letter",
"Cc",
"Cf",
"Close_Punctuation",
"Cn",
"Co",
"Combining_Mark",
"Connector_Punctuation",
"Control",
"Cs",
"Currency_Symbol",
"Dash_Punctuation",
"Decimal_Number",
"Enclosing_Mark",
"Final_Punctuation",
"Format",
"Initial_Punctuation",
"L",
"LC",
"Letter",
"Letter_Number",
"Line_Separator",
"Ll",
"Lm",
"Lo",
"Lowercase_Letter",
"Lt",
"Lu",
"M",
"Mark",
"Math_Symbol",
"Mc",
"Me",
"Mn",
"Modifier_Letter",
"Modifier_Symbol",
"N",
"Nd",
"Nl",
"No",
"Nonspacing_Mark",
"Number",
"Open_Punctuation",
"Other",
"Other_Letter",
"Other_Number",
"Other_Punctuation",
"Other_Symbol",
"P",
"Paragraph_Separator",
"Pc",
"Pd",
"Pe",
"Pf",
"Pi",
"Po",
"Private_Use",
"Ps",
"Punctuation",
"S",
"Sc",
"Separator",
"Sk",
"Sm",
"So",
"Space_Separator",
"Spacing_Mark",
"Surrogate",
"Symbol",
"Titlecase_Letter",
"Unassigned",
"Uppercase_Letter",
"Z",
"Zl",
"Zp",
"Zs",
"cntrl",
"digit",
"punct",
]
.iter()
.copied()
.collect(),
es2019: HashSet::new(),
es2020: HashSet::new(),
});
static SC_VALUE_PATTERNS: Lazy<PatternVersions> = Lazy::new(|| PatternVersions {
es2018: [
"Adlam",
"Adlm",
"Aghb",
"Ahom",
"Anatolian_Hieroglyphs",
"Arab",
"Arabic",
"Armenian",
"Armi",
"Armn",
"Avestan",
"Avst",
"Bali",
"Balinese",
"Bamu",
"Bamum",
"Bass",
"Bassa_Vah",
"Batak",
"Batk",
"Beng",
"Bengali",
"Bhaiksuki",
"Bhks",
"Bopo",
"Bopomofo",
"Brah",
"Brahmi",
"Brai",
"Braille",
"Bugi",
"Buginese",
"Buhd",
"Buhid",
"Cakm",
"Canadian_Aboriginal",
"Cans",
"Cari",
"Carian",
"Caucasian_Albanian",
"Chakma",
"Cham",
"Cher",
"Cherokee",
"Common",
"Copt",
"Coptic",
"Cprt",
"Cuneiform",
"Cypriot",
"Cyrillic",
"Cyrl",
"Deseret",
"Deva",
"Devanagari",
"Dsrt",
"Dupl",
"Duployan",
"Egyp",
"Egyptian_Hieroglyphs",
"Elba",
"Elbasan",
"Ethi",
"Ethiopic",
"Geor",
"Georgian",
"Glag",
"Glagolitic",
"Gonm",
"Goth",
"Gothic",
"Gran",
"Grantha",
"Greek",
"Grek",
"Gujarati",
"Gujr",
"Gurmukhi",
"Guru",
"Han",
"Hang",
"Hangul",
"Hani",
"Hano",
"Hanunoo",
"Hatr",
"Hatran",
"Hebr",
"Hebrew",
"Hira",
"Hiragana",
"Hluw",
"Hmng",
"Hung",
"Imperial_Aramaic",
"Inherited",
"Inscriptional_Pahlavi",
"Inscriptional_Parthian",
"Ital",
"Java",
"Javanese",
"Kaithi",
"Kali",
"Kana",
"Kannada",
"Katakana",
"Kayah_Li",
"Khar",
"Kharoshthi",
"Khmer",
"Khmr",
"Khoj",
"Khojki",
"Khudawadi",
"Knda",
"Kthi",
"Lana",
"Lao",
"Laoo",
"Latin",
"Latn",
"Lepc",
"Lepcha",
"Limb",
"Limbu",
"Lina",
"Linb",
"Linear_A",
"Linear_B",
"Lisu",
"Lyci",
"Lycian",
"Lydi",
"Lydian",
"Mahajani",
"Mahj",
"Malayalam",
"Mand",
"Mandaic",
"Mani",
"Manichaean",
"Marc",
"Marchen",
"Masaram_Gondi",
"Meetei_Mayek",
"Mend",
"Mende_Kikakui",
"Merc",
"Mero",
"Meroitic_Cursive",
"Meroitic_Hieroglyphs",
"Miao",
"Mlym",
"Modi",
"Mong",
"Mongolian",
"Mro",
"Mroo",
"Mtei",
"Mult",
"Multani",
"Myanmar",
"Mymr",
"Nabataean",
"Narb",
"Nbat",
"New_Tai_Lue",
"Newa",
"Nko",
"Nkoo",
"Nshu",
"Nushu",
"Ogam",
"Ogham",
"Ol_Chiki",
"Olck",
"Old_Hungarian",
"Old_Italic",
"Old_North_Arabian",
"Old_Permic",
"Old_Persian",
"Old_South_Arabian",
"Old_Turkic",
"Oriya",
"Orkh",
"Orya",
"Osage",
"Osge",
"Osma",
"Osmanya",
"Pahawh_Hmong",
"Palm",
"Palmyrene",
"Pau_Cin_Hau",
"Pauc",
"Perm",
"Phag",
"Phags_Pa",
"Phli",
"Phlp",
"Phnx",
"Phoenician",
"Plrd",
"Prti",
"Psalter_Pahlavi",
"Qaac",
"Qaai",
"Rejang",
"Rjng",
"Runic",
"Runr",
"Samaritan",
"Samr",
"Sarb",
"Saur",
"Saurashtra",
"Sgnw",
"Sharada",
"Shavian",
"Shaw",
"Shrd",
"Sidd",
"Siddham",
"SignWriting",
"Sind",
"Sinh",
"Sinhala",
"Sora",
"Sora_Sompeng",
"Soyo",
"Soyombo",
"Sund",
"Sundanese",
"Sylo",
"Syloti_Nagri",
"Syrc",
"Syriac",
"Tagalog",
"Tagb",
"Tagbanwa",
"Tai_Le",
"Tai_Tham",
"Tai_Viet",
"Takr",
"Takri",
"Tale",
"Talu",
"Tamil",
"Taml",
"Tang",
"Tangut",
"Tavt",
"Telu",
"Telugu",
"Tfng",
"Tglg",
"Thaa",
"Thaana",
"Thai",
"Tibetan",
"Tibt",
"Tifinagh",
"Tirh",
"Tirhuta",
"Ugar",
"Ugaritic",
"Vai",
"Vaii",
"Wara",
"Warang_Citi",
"Xpeo",
"Xsux",
"Yi",
"Yiii",
"Zanabazar_Square",
"Zanb",
"Zinh",
"Zyyy",
]
.iter()
.copied()
.collect(),
es2019: [
"Dogr",
"Dogra",
"Gong",
"Gunjala_Gondi",
"Hanifi_Rohingya",
"Maka",
"Makasar",
"Medefaidrin",
"Medf",
"Old_Sogdian",
"Rohg",
"Sogd",
"Sogdian",
"Sogo",
]
.iter()
.copied()
.collect(),
es2020: [
"Elym",
"Elymaic",
"Hmnp",
"Nand",
"Nandinagari",
"Nyiakeng_Puachue_Hmong",
"Wancho",
"Wcho",
]
.iter()
.copied()
.collect(),
});
static BIN_PROPERTY_PATTERNS: Lazy<PatternVersions> = Lazy::new(|| PatternVersions {
es2018: [
"AHex",
"ASCII",
"ASCII_Hex_Digit",
"Alpha",
"Alphabetic",
"Any",
"Assigned",
"Bidi_C",
"Bidi_Control",
"Bidi_M",
"Bidi_Mirrored",
"CI",
"CWCF",
"CWCM",
"CWKCF",
"CWL",
"CWT",
"CWU",
"Case_Ignorable",
"Cased",
"Changes_When_Casefolded",
"Changes_When_Casemapped",
"Changes_When_Lowercased",
"Changes_When_NFKC_Casefolded",
"Changes_When_Titlecased",
"Changes_When_Uppercased",
"DI",
"Dash",
"Default_Ignorable_Code_Point",
"Dep",
"Deprecated",
"Dia",
"Diacritic",
"Emoji",
"Emoji_Component",
"Emoji_Modifier",
"Emoji_Modifier_Base",
"Emoji_Presentation",
"Ext",
"Extender",
"Gr_Base",
"Gr_Ext",
"Grapheme_Base",
"Grapheme_Extend",
"Hex",
"Hex_Digit",
"IDC",
"IDS",
"IDSB",
"IDST",
"IDS_Binary_Operator",
"IDS_Trinary_Operator",
"ID_Continue",
"ID_Start",
"Ideo",
"Ideographic",
"Join_C",
"Join_Control",
"LOE",
"Logical_Order_Exception",
"Lower",
"Lowercase",
"Math",
"NChar",
"Noncharacter_Code_Point",
"Pat_Syn",
"Pat_WS",
"Pattern_Syntax",
"Pattern_White_Space",
"QMark",
"Quotation_Mark",
"RI",
"Radical",
"Regional_Indicator",
"SD",
"STerm",
"Sentence_Terminal",
"Soft_Dotted",
"Term",
"Terminal_Punctuation",
"UIdeo",
"Unified_Ideograph",
"Upper",
"Uppercase",
"VS",
"Variation_Selector",
"White_Space",
"XIDC",
"XIDS",
"XID_Continue",
"XID_Start",
"space",
]
.iter()
.copied()
.collect(),
es2019: ["Extended_Pictographic"].iter().copied().collect(),
es2020: HashSet::new(),
});
static LARGE_ID_START_RANGES: Lazy<Vec<u32>> = Lazy::new(|| {
restore_ranges(
"4q 0 b 0 5 0 6 m 2 u 2 cp 5 b f 4 8 0 2 0 3m 4 2 1 3 3 2 0 7 0 2 2 2 0 2 j 2 2a 2 3u 9 4l 2 11 3 0 7 14 20 q 5 3 1a 16 10 1 2 2q 2 0 g 1 8 1 b 2 3 0 h 0 2 t u 2g c 0 p w a 1 5 0 6 l 5 0 a 0 4 0 o o 8 a 1i k 2 h 1p 1h 4 0 j 0 8 9 g f 5 7 3 1 3 l 2 6 2 0 4 3 4 0 h 0 e 1 2 2 f 1 b 0 9 5 5 1 3 l 2 6 2 1 2 1 2 1 w 3 2 0 k 2 h 8 2 2 2 l 2 6 2 1 2 4 4 0 j 0 g 1 o 0 c 7 3 1 3 l 2 6 2 1 2 4 4 0 v 1 2 2 g 0 i 0 2 5 4 2 2 3 4 1 2 0 2 1 4 1 4 2 4 b n 0 1h 7 2 2 2 m 2 f 4 0 r 2 6 1 v 0 5 7 2 2 2 m 2 9 2 4 4 0 x 0 2 1 g 1 i 8 2 2 2 14 3 0 h 0 6 2 9 2 p 5 6 h 4 n 2 8 2 0 3 6 1n 1b 2 1 d 6 1n 1 2 0 2 4 2 n 2 0 2 9 2 1 a 0 3 4 2 0 m 3 x 0 1s 7 2 z s 4 38 16 l 0 h 5 5 3 4 0 4 1 8 2 5 c d 0 i 11 2 0 6 0 3 16 2 98 2 3 3 6 2 0 2 3 3 14 2 3 3 w 2 3 3 6 2 0 2 3 3 e 2 1k 2 3 3 1u 12 f h 2d 3 5 4 h7 3 g 2 p 6 22 4 a 8 c 2 3 f h f h f c 2 2 g 1f 10 0 5 0 1w 2g 8 14 2 0 6 1x b u 1e t 3 4 c 17 5 p 1j m a 1g 2b 0 2m 1a i 6 1k t e 1 b 17 r z 16 2 b z 3 8 8 16 3 2 16 3 2 5 2 1 4 0 6 5b 1t 7p 3 5 3 11 3 5 3 7 2 0 2 0 2 0 2 u 3 1g 2 6 2 0 4 2 2 6 4 3 3 5 5 c 6 2 2 6 39 0 e 0 h c 2u 0 5 0 3 9 2 0 3 5 7 0 2 0 2 0 2 f 3 3 6 4 5 0 i 14 22g 1a 2 1a 2 3o 7 3 4 1 d 11 2 0 6 0 3 1j 8 0 h m a 6 2 6 2 6 2 6 2 6 2 6 2 6 2 6 fb 2 q 8 8 4 3 4 5 2d 5 4 2 2h 2 3 6 16 2 2l i v 1d f e9 533 1t g70 4 wc 1w 19 3 7g 4 f b 1 l 1a h u 3 27 14 8 3 2u 3 1g 3 8 17 c 2 2 2 3 2 m u 1f f 1d 1r 5 4 0 2 1 c r b m q s 8 1a t 0 h 4 2 9 b 4 2 14 o 2 2 7 l m 4 0 4 1d 2 0 4 1 3 4 3 0 2 0 p 2 3 a 8 2 d 5 3 5 3 5 a 6 2 6 2 16 2 d 7 36 u 8mb d m 5 1c 6it a5 3 2x 13 6 d 4 6 0 2 9 2 c 2 4 2 0 2 1 2 1 2 2z y a2 j 1r 3 1h 15 b 39 4 2 3q 11 p 7 p c 2g 4 5 3 5 3 5 3 2 10 b 2 p 2 i 2 1 2 e 3 d z 3e 1y 1g 7g s 4 1c 1c v e t 6 11 b t 3 z 5 7 2 4 17 4d j z 5 z 5 13 9 1f 4d 8m a l b 7 49 5 3 0 2 17 2 1 4 0 3 m b m a u 1u i 2 1 b l b p 1z 1j 7 1 1t 0 g 3 2 2 2 s 17 s 4 s 10 7 2 r s 1h b l b i e h 33 20 1k 1e e 1e e z 9p 15 7 1 27 s b 0 9 l 2z k s m d 1g 24 18 x o r z u 0 3 0 9 y 4 0 d 1b f 3 m 0 2 0 10 h 2 o 2d 6 2 0 2 3 2 e 2 9 8 1a 13 7 3 1 3 l 2 6 2 1 2 4 4 0 j 0 d 4 4f 1g j 3 l 2 v 1b l 1 2 0 55 1a 16 3 11 1b l 0 1o 16 e 0 20 q 6e 17 39 1r w 7 3 0 3 7 2 1 2 n g 0 2 0 2n 7 3 12 h 0 2 0 t 0 b 13 8 0 m 0 c 19 k 0 z 1k 7c 8 2 10 i 0 1e t 35 6 2 1 2 11 m 0 q 5 2 1 2 v f 0 94 i 5a 0 28 pl 2v 32 i 5f 24d tq 34i g6 6nu fs 8 u 36 t j 1b h 3 w k 6 i j5 1r 3l 22 6 0 1v c 1t 1 2 0 t 4qf 9 yd 17 8 6wo 7y 1e 2 i 3 9 az 1s5 2y 6 c 4 8 8 9 4mf 2c 2 1y 2 1 3 0 3 1 3 3 2 b 2 0 2 6 2 1s 2 3 3 7 2 6 2 r 2 3 2 4 2 0 4 6 2 9f 3 o 2 o 2 u 2 o 2 u 2 o 2 u 2 o 2 u 2 o 2 7 1th 18 b 6 h 0 aa 17 105 5g 1o 1v 8 0 xh 3 2 q 2 1 2 0 3 0 2 9 2 3 2 0 2 0 7 0 5 0 2 0 2 0 2 2 2 1 2 0 3 0 2 0 2 0 2 0 2 0 2 1 2 0 3 3 2 6 2 3 2 3 2 0 2 9 2 g 6 2 2 4 2 g 3et wyl z 378 c 65 3 4g1 f 5rk 2e8 f1 15v 3t6",
)
});
static LARGE_ID_CONTINUE_RANGES: Lazy<Vec<u32>> = Lazy::new(|| {
restore_ranges(
"53 0 g9 33 o 0 70 4 7e 18 2 0 2 1 2 1 2 0 21 a 1d u 7 0 2u 6 3 5 3 1 2 3 3 9 o 0 v q 2k a g 9 y 8 a 0 p 3 2 8 2 2 2 4 18 2 3c e 2 w 1j 2 2 h 2 6 b 1 3 9 i 2 1l 0 2 6 3 1 3 2 a 0 b 1 3 9 f 0 3 2 1l 0 2 4 5 1 3 2 4 0 l b 4 0 c 2 1l 0 2 7 2 2 2 2 l 1 3 9 b 5 2 2 1l 0 2 6 3 1 3 2 8 2 b 1 3 9 j 0 1o 4 4 2 2 3 a 0 f 9 h 4 1m 6 2 2 2 3 8 1 c 1 3 9 i 2 1l 0 2 6 2 2 2 3 8 1 c 1 3 9 h 3 1k 1 2 6 2 2 2 3 a 0 b 1 3 9 i 2 1z 0 5 5 2 0 2 7 7 9 3 1 1q 0 3 6 d 7 2 9 2g 0 3 8 c 5 3 9 1r 1 7 9 c 0 2 0 2 0 5 1 1e j 2 1 6 a 2 z a 0 2t j 2 9 d 3 5 2 2 2 3 6 4 3 e b 2 e jk 2 a 8 pt 2 u 2 u 1 v 1 1t v a 0 3 9 y 2 3 9 40 0 3b b 5 b b 9 3l a 1p 4 1m 9 2 s 3 a 7 9 n d 2 1 1s 4 1c g c 9 i 8 d 2 v c 3 9 19 d 1d j 9 9 7 9 3b 2 2 k 5 0 7 0 3 2 5j 1l 2 4 g0 1 k 0 3g c 5 0 4 b 2db 2 3y 0 2p v ff 5 2y 1 n7q 9 1y 0 5 9 x 1 29 1 7l 0 4 0 5 0 o 4 5 0 2c 1 1f h b 9 7 h e a t 7 q c 19 3 1c d g 9 c 0 b 9 1c d d 0 9 1 3 9 y 2 1f 0 2 2 3 1 6 1 2 0 16 4 6 1 6l 7 2 1 3 9 fmt 0 ki f h f 4 1 p 2 5d 9 12 0 ji 0 6b 0 46 4 86 9 120 2 2 1 6 3 15 2 5 0 4m 1 fy 3 9 9 aa 1 4a a 4w 2 1i e w 9 g 3 1a a 1i 9 7 2 11 d 2 9 6 1 19 0 d 2 1d d 9 3 2 b 2b b 7 0 4h b 6 9 7 3 1k 1 2 6 3 1 3 2 a 0 b 1 3 6 4 4 5d h a 9 5 0 2a j d 9 5y 6 3 8 s 1 2b g g 9 2a c 9 9 2c e 5 9 6r e 4m 9 1z 5 2 1 3 3 2 0 2 1 d 9 3c 6 3 6 4 0 t 9 15 6 2 3 9 0 a a 1b f ba 7 2 7 h 9 1l l 2 d 3f 5 4 0 2 1 2 6 2 0 9 9 1d 4 2 1 2 4 9 9 96 3 ewa 9 3r 4 1o 6 q 9 s6 0 2 1i 8 3 2a 0 c 1 f58 1 43r 4 4 5 9 7 3 6 v 3 45 2 13e 1d e9 1i 5 1d 9 0 f 0 n 4 2 e 11t 6 2 g 3 6 2 1 2 4 7a 6 a 9 bn d 15j 6 32 6 6 9 3o7 9 gvt3 6n",
)
});
pub fn is_valid_unicode_property(version: EcmaVersion, name: &str, value: &str) -> bool {
if GC_NAME_PATTERN.contains(name)
&& version >= EcmaVersion::ES2018
&& GC_VALUE_PATTERNS.es2018.contains(value)
{
true
} else if SC_NAME_PATTERN.contains(name) {
(version >= EcmaVersion::ES2018 && SC_VALUE_PATTERNS.es2018.contains(value))
|| (version >= EcmaVersion::ES2019 && SC_VALUE_PATTERNS.es2019.contains(value))
|| (version >= EcmaVersion::ES2020 && SC_VALUE_PATTERNS.es2020.contains(value))
} else {
false
}
}
pub fn is_valid_lone_unicode_property(version: EcmaVersion, value: &str) -> bool {
(version >= EcmaVersion::ES2018 && BIN_PROPERTY_PATTERNS.es2018.contains(value))
|| (version >= EcmaVersion::ES2019 && BIN_PROPERTY_PATTERNS.es2019.contains(value))
}
pub fn is_large_id_start(cp: char) -> bool {
is_in_range(cp as u32, &LARGE_ID_START_RANGES)
}
pub fn is_large_id_continue(cp: char) -> bool {
is_in_range(cp as u32, &LARGE_ID_CONTINUE_RANGES)
}
fn is_in_range(cp: u32, ranges: &[u32]) -> bool {
let mut l = 0;
let mut r = ranges.len() / 2;
while l < r {
let i = (l + r) / 2;
let min = ranges[2 * i];
let max = ranges[2 * i + 1];
if cp < min {
r = i;
} else if cp > max {
l = i + 1;
} else {
return true;
}
}
false
}
fn restore_ranges(data: &str) -> Vec<u32> {
let mut last = 0;
data.split(' ')
.map(|s| {
last += u32::from_str_radix(s, 36).unwrap();
last
})
.collect()
}