1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
/// Extension methods for XML-subset only operations.
pub trait XmlCharExt {
    /// Checks if the value is within the
    /// [NameStartChar](https://www.w3.org/TR/xml/#NT-NameStartChar) range.
    fn is_xml_name_start(&self) -> bool;

    /// Checks if the value is within the
    /// [NameChar](https://www.w3.org/TR/xml/#NT-NameChar) range.
    fn is_xml_name(&self) -> bool;

    /// Checks if the value is within the
    /// [Char](https://www.w3.org/TR/xml/#NT-Char) range.
    fn is_xml_char(&self) -> bool;
}

impl XmlCharExt for char {
    #[inline]
    fn is_xml_name_start(&self) -> bool {
        // Check for ASCII first.
        if *self as u32 <= 128 {
            return matches!(*self as u8, b'A'...b'Z' | b'a'...b'z' | b':' | b'_');
        }

        matches!(*self as u32, 0x0000C0...0x0000D6
            | 0x0000D8...0x0000F6
            | 0x0000F8...0x0002FF
            | 0x000370...0x00037D
            | 0x00037F...0x001FFF
            | 0x00200C...0x00200D
            | 0x002070...0x00218F
            | 0x002C00...0x002FEF
            | 0x003001...0x00D7FF
            | 0x00F900...0x00FDCF
            | 0x00FDF0...0x00FFFD
            | 0x010000...0x0EFFFF)
    }

    #[inline]
    fn is_xml_name(&self) -> bool {
        // Check for ASCII first.
        if *self as u32 <= 128 {
            return (*self as u8).is_xml_name();
        }

        matches!(*self as u32, 0x0000B7
            | 0x0000C0...0x0000D6
            | 0x0000D8...0x0000F6
            | 0x0000F8...0x0002FF
            | 0x000300...0x00036F
            | 0x000370...0x00037D
            | 0x00037F...0x001FFF
            | 0x00200C...0x00200D
            | 0x00203F...0x002040
            | 0x002070...0x00218F
            | 0x002C00...0x002FEF
            | 0x003001...0x00D7FF
            | 0x00F900...0x00FDCF
            | 0x00FDF0...0x00FFFD
            | 0x010000...0x0EFFFF)
    }

    #[inline]
    fn is_xml_char(&self) -> bool {
        matches!(*self as u32, 0x000009
            | 0x00000A
            | 0x00000D
            | 0x000020...0x00D7FF
            | 0x00E000...0x00FFFD
            | 0x010000...0x10FFFF)
    }
}

/// Extension methods for XML-subset only operations.
pub trait XmlByteExt {
    /// Checks if byte is a digit.
    ///
    /// `[0-9]`
    fn is_xml_digit(&self) -> bool;

    /// Checks if byte is a hex digit.
    ///
    /// `[0-9A-Fa-f]`
    fn is_xml_hex_digit(&self) -> bool;

    /// Checks if byte is a space.
    ///
    /// `[ \r\n\t]`
    fn is_xml_space(&self) -> bool;

    /// Checks if byte is an ASCII char.
    ///
    /// `[A-Za-z]`
    fn is_xml_letter(&self) -> bool;

    /// Checks if byte is within the ASCII
    /// [Char](https://www.w3.org/TR/xml/#NT-Char) range.
    fn is_xml_name(&self) -> bool;
}

impl XmlByteExt for u8 {
    #[inline]
    fn is_xml_digit(&self) -> bool {
        matches!(*self, b'0'...b'9')
    }

    #[inline]
    fn is_xml_hex_digit(&self) -> bool {
        matches!(*self, b'0'...b'9' | b'A'...b'F' | b'a'...b'f')
    }

    #[inline]
    fn is_xml_space(&self) -> bool {
        matches!(*self, b' ' | b'\t' | b'\n' | b'\r')
    }

    #[inline]
    fn is_xml_letter(&self) -> bool {
        matches!(*self, b'A'...b'Z' | b'a'...b'z')
    }

    #[inline]
    fn is_xml_name(&self) -> bool {
        matches!(*self, b'A'...b'Z' | b'a'...b'z'| b'0'...b'9'| b':' | b'_' | b'-' | b'.')
    }
}