mxmlextrema_as3parser/parser/
character_validator.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
//use lazy_regex::{Lazy, Regex, lazy_regex};
use unicode_general_category::{get_general_category, GeneralCategory};

// pub(crate) static CR_OR_CRLF_REGEX: Lazy<Regex> = lazy_regex!(r"\r\n?");

/// The `CharacterValidator` structure defines static methods for character
/// validation.
pub struct CharacterValidator;

impl CharacterValidator {
    /// Returns the count of indentation characters in a string.
    pub fn indent_count(string: &str) -> usize {
        let mut n: usize = 0;
        for ch in string.chars() {
            if !CharacterValidator::is_whitespace(ch) {
                break;
            }
            n += 1;
        }
        n
    }

    pub fn is_whitespace(ch: char) -> bool {
        if ch == '\x20' || ch == '\x09' || ch == '\x08'
        || ch == '\x0C' || ch == '\u{A0}' {
            return true;
        }
        let category = get_general_category(ch);
        category == GeneralCategory::SpaceSeparator
    }

    pub fn is_line_terminator(ch: char) -> bool {
        ch == '\x0A' || ch == '\x0D' || ch == '\u{2028}' || ch == '\u{2029}'
    }

    pub fn is_bin_digit(ch: char) -> bool {
        ch == '\x30' || ch == '\x31'
    }

    pub fn is_dec_digit(ch: char) -> bool {
        ch >= '\x30' && ch <= '\x39'
    }

    pub fn is_hex_digit(ch: char) -> bool {
        CharacterValidator::is_dec_digit(ch) || (ch >= '\x41' && ch <= '\x46') || (ch >= '\x61' && ch <= '\x66')
    }

    /// Returns the mathematical value of a hexadecimal digit.
    pub fn hex_digit_mv(ch: char) -> Option<u32> {
        if ch >= 'A' && ch <= 'F' {
            Some((ch as u32) - 0x41 + 10)
        } else if ch >= 'a' && ch <= 'f' {
            Some((ch as u32) - 0x61 + 10)
        } else if ch >= '0' && ch <= '9' {
            Some((ch as u32) - 0x30)
        } else {
            None
        }
    }

    /// Returns the mathematical value of a binary digit.
    pub fn bin_digit_mv(ch: char) -> Option<u32> {
        if ch >= '0' && ch <= '1' {
            Some((ch as u32) - 0x30)
        } else {
            None
        }
    }

    pub fn is_css_identifier_start(ch: char) -> bool {
        (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')
    }

    pub fn is_css_identifier_part(ch: char) -> bool {
        (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') ||
        (ch >= '0' && ch <= '9') || ch == '_' || ch == '-'
    }

    pub fn is_identifier_start(ch: char) -> bool {
        if ch == '\x5f' || ch == '\x24' {
            return true;
        }
        let category = get_general_category(ch);
        [
            GeneralCategory::LowercaseLetter,
            GeneralCategory::UppercaseLetter,
            GeneralCategory::ModifierLetter,
            GeneralCategory::OtherLetter,
            GeneralCategory::TitlecaseLetter,
            GeneralCategory::LetterNumber,
        ].contains(&category)
    }

    pub fn is_identifier_part(ch: char) -> bool {
        if ch == '\x5f' || ch == '\x24' {
            return true;
        }
        let category = get_general_category(ch);
        [
            GeneralCategory::LowercaseLetter,
            GeneralCategory::UppercaseLetter,
            GeneralCategory::ModifierLetter,
            GeneralCategory::OtherLetter,
            GeneralCategory::TitlecaseLetter,
            GeneralCategory::LetterNumber,
            GeneralCategory::NonspacingMark,
            GeneralCategory::SpacingMark,
            GeneralCategory::ConnectorPunctuation,
            GeneralCategory::DecimalNumber,
        ].contains(&category)
    }

    pub fn is_xml_name_start(ch: char) -> bool {
        if ch == '\x5f' || ch == ':' {
            return true;
        }
        let category = get_general_category(ch);
        [
            GeneralCategory::LowercaseLetter,
            GeneralCategory::UppercaseLetter,
            GeneralCategory::ModifierLetter,
            GeneralCategory::OtherLetter,
            GeneralCategory::TitlecaseLetter,
            GeneralCategory::LetterNumber,
        ].contains(&category)
    }

    pub fn is_xml_name_part(ch: char) -> bool {
        if ch == '\x5f' || ch == ':' || ch == '.' || ch == '-' {
            return true;
        }
        let category = get_general_category(ch);
        [
            GeneralCategory::LowercaseLetter,
            GeneralCategory::UppercaseLetter,
            GeneralCategory::ModifierLetter,
            GeneralCategory::OtherLetter,
            GeneralCategory::TitlecaseLetter,
            GeneralCategory::LetterNumber,
            GeneralCategory::DecimalNumber,
        ].contains(&category)
    }

    pub fn is_xml_whitespace(ch: char) -> bool {
        ch == '\x20' || ch == '\x09' || ch == '\x0A' || ch == '\x0D'
    }
}