htmlparser/
xmlchar.rs

1/// Extension methods for XML-subset only operations.
2pub trait XmlCharExt {
3    /// Checks if the value is within the
4    /// [NameStartChar](https://www.w3.org/TR/xml/#NT-NameStartChar) range.
5    fn is_xml_name_start(&self) -> bool;
6
7    /// Checks if the value is within the
8    /// [NameChar](https://www.w3.org/TR/xml/#NT-NameChar) range.
9    fn is_xml_name(&self) -> bool;
10
11    /// Checks if the value is within the
12    /// [Char](https://www.w3.org/TR/xml/#NT-Char) range.
13    fn is_xml_char(&self) -> bool;
14}
15
16impl XmlCharExt for char {
17    #[inline]
18    #[allow(clippy::match_like_matches_macro)]
19    fn is_xml_name_start(&self) -> bool {
20        // Check for ASCII first.
21        if *self as u32 <= 128 {
22            return matches!(*self as u8, b'A'...b'Z' | b'a'...b'z' | b':' | b'_');
23        }
24
25        match *self as u32 {
26            0x0000C0...0x0000D6
27            | 0x0000D8...0x0000F6
28            | 0x0000F8...0x0002FF
29            | 0x000370...0x00037D
30            | 0x00037F...0x001FFF
31            | 0x00200C...0x00200D
32            | 0x002070...0x00218F
33            | 0x002C00...0x002FEF
34            | 0x003001...0x00D7FF
35            | 0x00F900...0x00FDCF
36            | 0x00FDF0...0x00FFFD
37            | 0x010000...0x0EFFFF => true,
38            _ => false,
39        }
40    }
41
42    #[inline]
43    #[allow(clippy::match_like_matches_macro)]
44    fn is_xml_name(&self) -> bool {
45        // Check for ASCII first.
46        if *self as u32 <= 128 {
47            return (*self as u8).is_xml_name();
48        }
49
50        match *self as u32 {
51            0x0000B7
52            | 0x0000C0...0x0000D6
53            | 0x0000D8...0x0000F6
54            | 0x0000F8...0x0002FF
55            | 0x000300...0x00036F
56            | 0x000370...0x00037D
57            | 0x00037F...0x001FFF
58            | 0x00200C...0x00200D
59            | 0x00203F...0x002040
60            | 0x002070...0x00218F
61            | 0x002C00...0x002FEF
62            | 0x003001...0x00D7FF
63            | 0x00F900...0x00FDCF
64            | 0x00FDF0...0x00FFFD
65            | 0x010000...0x0EFFFF => true,
66            _ => false,
67        }
68    }
69
70    #[inline]
71    fn is_xml_char(&self) -> bool {
72        // Does not check for surrogate code points U+D800-U+DFFF,
73        // since that check was performed by Rust when the `&str` was constructed.
74        if (*self as u32) < 0x20 {
75            return (*self as u8).is_xml_space();
76        }
77        !matches!(*self as u32, 0xFFFF | 0xFFFE)
78    }
79}
80
81/// Extension methods for XML-subset only operations.
82pub trait XmlByteExt {
83    /// Checks if byte is a digit.
84    ///
85    /// `[0-9]`
86    fn is_xml_digit(&self) -> bool;
87
88    /// Checks if byte is a hex digit.
89    ///
90    /// `[0-9A-Fa-f]`
91    fn is_xml_hex_digit(&self) -> bool;
92
93    /// Checks if byte is a space.
94    ///
95    /// `[ \r\n\t]`
96    fn is_xml_space(&self) -> bool;
97
98    /// Checks if byte is an ASCII char.
99    ///
100    /// `[A-Za-z]`
101    fn is_xml_letter(&self) -> bool;
102
103    /// Checks if byte is within the ASCII
104    /// [Char](https://www.w3.org/TR/xml/#NT-Char) range.
105    fn is_xml_name(&self) -> bool;
106}
107
108impl XmlByteExt for u8 {
109    #[inline]
110    fn is_xml_digit(&self) -> bool {
111        matches!(*self, b'0'...b'9')
112    }
113
114    #[inline]
115    fn is_xml_hex_digit(&self) -> bool {
116        matches!(*self, b'0'...b'9' | b'A'...b'F' | b'a'...b'f')
117    }
118
119    #[inline]
120    fn is_xml_space(&self) -> bool {
121        matches!(*self, b' ' | b'\t' | b'\n' | b'\r')
122    }
123
124    #[inline]
125    fn is_xml_letter(&self) -> bool {
126        matches!(*self, b'A'...b'Z' | b'a'...b'z')
127    }
128
129    #[inline]
130    fn is_xml_name(&self) -> bool {
131        matches!(*self, b'A'...b'Z' | b'a'...b'z'| b'0'...b'9'| b':' | b'_' | b'-' | b'.')
132    }
133}