mxmlextrema_as3parser/parser/
character_validator.rs

1//use lazy_regex::{Lazy, Regex, lazy_regex};
2use unicode_general_category::{get_general_category, GeneralCategory};
3
4// pub(crate) static CR_OR_CRLF_REGEX: Lazy<Regex> = lazy_regex!(r"\r\n?");
5
6/// The `CharacterValidator` structure defines static methods for character
7/// validation.
8pub struct CharacterValidator;
9
10impl CharacterValidator {
11    /// Returns the count of indentation characters in a string.
12    pub fn indent_count(string: &str) -> usize {
13        let mut n: usize = 0;
14        for ch in string.chars() {
15            if !CharacterValidator::is_whitespace(ch) {
16                break;
17            }
18            n += 1;
19        }
20        n
21    }
22
23    pub fn is_whitespace(ch: char) -> bool {
24        if ch == '\x20' || ch == '\x09' || ch == '\x08'
25        || ch == '\x0C' || ch == '\u{A0}' {
26            return true;
27        }
28        let category = get_general_category(ch);
29        category == GeneralCategory::SpaceSeparator
30    }
31
32    pub fn is_line_terminator(ch: char) -> bool {
33        ch == '\x0A' || ch == '\x0D' || ch == '\u{2028}' || ch == '\u{2029}'
34    }
35
36    pub fn is_bin_digit(ch: char) -> bool {
37        ch == '\x30' || ch == '\x31'
38    }
39
40    pub fn is_dec_digit(ch: char) -> bool {
41        ch >= '\x30' && ch <= '\x39'
42    }
43
44    pub fn is_hex_digit(ch: char) -> bool {
45        CharacterValidator::is_dec_digit(ch) || (ch >= '\x41' && ch <= '\x46') || (ch >= '\x61' && ch <= '\x66')
46    }
47
48    /// Returns the mathematical value of a hexadecimal digit.
49    pub fn hex_digit_mv(ch: char) -> Option<u32> {
50        if ch >= 'A' && ch <= 'F' {
51            Some((ch as u32) - 0x41 + 10)
52        } else if ch >= 'a' && ch <= 'f' {
53            Some((ch as u32) - 0x61 + 10)
54        } else if ch >= '0' && ch <= '9' {
55            Some((ch as u32) - 0x30)
56        } else {
57            None
58        }
59    }
60
61    /// Returns the mathematical value of a binary digit.
62    pub fn bin_digit_mv(ch: char) -> Option<u32> {
63        if ch >= '0' && ch <= '1' {
64            Some((ch as u32) - 0x30)
65        } else {
66            None
67        }
68    }
69
70    pub fn is_css_identifier_start(ch: char) -> bool {
71        (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')
72    }
73
74    pub fn is_css_identifier_part(ch: char) -> bool {
75        (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') ||
76        (ch >= '0' && ch <= '9') || ch == '_' || ch == '-'
77    }
78
79    pub fn is_identifier_start(ch: char) -> bool {
80        if ch == '\x5f' || ch == '\x24' {
81            return true;
82        }
83        let category = get_general_category(ch);
84        [
85            GeneralCategory::LowercaseLetter,
86            GeneralCategory::UppercaseLetter,
87            GeneralCategory::ModifierLetter,
88            GeneralCategory::OtherLetter,
89            GeneralCategory::TitlecaseLetter,
90            GeneralCategory::LetterNumber,
91        ].contains(&category)
92    }
93
94    pub fn is_identifier_part(ch: char) -> bool {
95        if ch == '\x5f' || ch == '\x24' {
96            return true;
97        }
98        let category = get_general_category(ch);
99        [
100            GeneralCategory::LowercaseLetter,
101            GeneralCategory::UppercaseLetter,
102            GeneralCategory::ModifierLetter,
103            GeneralCategory::OtherLetter,
104            GeneralCategory::TitlecaseLetter,
105            GeneralCategory::LetterNumber,
106            GeneralCategory::NonspacingMark,
107            GeneralCategory::SpacingMark,
108            GeneralCategory::ConnectorPunctuation,
109            GeneralCategory::DecimalNumber,
110        ].contains(&category)
111    }
112
113    pub fn is_xml_name_start(ch: char) -> bool {
114        if ch == '\x5f' || ch == ':' {
115            return true;
116        }
117        let category = get_general_category(ch);
118        [
119            GeneralCategory::LowercaseLetter,
120            GeneralCategory::UppercaseLetter,
121            GeneralCategory::ModifierLetter,
122            GeneralCategory::OtherLetter,
123            GeneralCategory::TitlecaseLetter,
124            GeneralCategory::LetterNumber,
125        ].contains(&category)
126    }
127
128    pub fn is_xml_name_part(ch: char) -> bool {
129        if ch == '\x5f' || ch == ':' || ch == '.' || ch == '-' {
130            return true;
131        }
132        let category = get_general_category(ch);
133        [
134            GeneralCategory::LowercaseLetter,
135            GeneralCategory::UppercaseLetter,
136            GeneralCategory::ModifierLetter,
137            GeneralCategory::OtherLetter,
138            GeneralCategory::TitlecaseLetter,
139            GeneralCategory::LetterNumber,
140            GeneralCategory::DecimalNumber,
141        ].contains(&category)
142    }
143
144    pub fn is_xml_whitespace(ch: char) -> bool {
145        ch == '\x20' || ch == '\x09' || ch == '\x0A' || ch == '\x0D'
146    }
147}