1use core::fmt;
8
9pub trait CharSelector {
13 fn select(&self, c: char) -> bool;
15}
16
17#[derive(Debug, Copy, Clone)]
19pub struct AllChars();
20
21impl CharSelector for char {
22 fn select(&self, c: char) -> bool {
23 *self == c
24 }
25}
26
27impl CharSelector for &'_ [char] {
28 fn select(&self, c: char) -> bool {
29 for r in self.iter() {
30 if *r == c {
31 return true;
32 }
33 }
34 false
35 }
36}
37
38impl CharSelector for AllChars {
39 fn select(&self, _c: char) -> bool {
40 return true;
41 }
42}
43
44pub struct CodepointRange(pub char, pub char);
48
49pub const VALID_XML_CDATA_RANGES: &'static [CodepointRange] = &[
51 CodepointRange('\x09', '\x0a'),
52 CodepointRange('\x0d', '\x0d'),
53 CodepointRange('\u{0020}', '\u{d7ff}'),
54 CodepointRange('\u{e000}', '\u{fffd}'),
55 CodepointRange('\u{10000}', '\u{10ffff}'),
56];
57
58pub const INVALID_XML_CDATA_RANGES: &'static [CodepointRange] = &[
63 CodepointRange('\x00', '\x08'),
64 CodepointRange('\x0b', '\x0c'),
65 CodepointRange('\x0e', '\x1f'),
66 CodepointRange('\u{fffe}', '\u{ffff}'),
67];
68
69const VALID_XML_NAME_START_RANGES: &'static [CodepointRange] = &[
70 CodepointRange(':', ':'),
71 CodepointRange('A', 'Z'),
72 CodepointRange('_', '_'),
73 CodepointRange('a', 'z'),
74 CodepointRange('\u{c0}', '\u{d6}'),
75 CodepointRange('\u{d8}', '\u{f6}'),
76 CodepointRange('\u{f8}', '\u{2ff}'),
77 CodepointRange('\u{370}', '\u{37d}'),
78 CodepointRange('\u{37f}', '\u{1fff}'),
79 CodepointRange('\u{200c}', '\u{200d}'),
80 CodepointRange('\u{2070}', '\u{218f}'),
81 CodepointRange('\u{2c00}', '\u{2fef}'),
82 CodepointRange('\u{3001}', '\u{d7ff}'),
83 CodepointRange('\u{f900}', '\u{fdcf}'),
84 CodepointRange('\u{10000}', '\u{effff}'),
85];
86
87const VALID_XML_NAME_RANGES: &'static [CodepointRange] = &[
88 CodepointRange(':', ':'),
89 CodepointRange('-', '-'),
90 CodepointRange('.', '.'),
91 CodepointRange('A', 'Z'),
92 CodepointRange('_', '_'),
93 CodepointRange('0', '9'),
94 CodepointRange('a', 'z'),
95 CodepointRange('\u{b7}', '\u{b7}'),
96 CodepointRange('\u{c0}', '\u{d6}'),
97 CodepointRange('\u{d8}', '\u{f6}'),
98 CodepointRange('\u{f8}', '\u{2ff}'),
99 CodepointRange('\u{300}', '\u{36f}'),
100 CodepointRange('\u{370}', '\u{37d}'),
101 CodepointRange('\u{37f}', '\u{1fff}'),
102 CodepointRange('\u{200c}', '\u{200d}'),
103 CodepointRange('\u{203f}', '\u{2040}'),
104 CodepointRange('\u{2070}', '\u{218f}'),
105 CodepointRange('\u{2c00}', '\u{2fef}'),
106 CodepointRange('\u{3001}', '\u{d7ff}'),
107 CodepointRange('\u{f900}', '\u{fdcf}'),
108 CodepointRange('\u{10000}', '\u{effff}'),
109];
110
111impl CodepointRange {
112 pub fn contains(&self, c: char) -> bool {
115 return (self.0 <= c) && (c <= self.1);
116 }
117}
118
119#[derive(Copy)]
121pub struct CodepointRanges(pub &'static [CodepointRange]);
122
123pub static CLASS_XML_NAME: CodepointRanges = CodepointRanges(VALID_XML_NAME_RANGES);
125
126pub static CLASS_XML_NAMESTART: CodepointRanges = CodepointRanges(VALID_XML_NAME_START_RANGES);
128
129pub static CLASS_XML_NONCHAR: CodepointRanges = CodepointRanges(INVALID_XML_CDATA_RANGES);
131
132impl CharSelector for CodepointRange {
133 fn select(&self, c: char) -> bool {
134 self.contains(c)
135 }
136}
137
138impl CharSelector for CodepointRanges {
139 fn select(&self, c: char) -> bool {
140 contained_in_ranges(c, self.0)
141 }
142}
143
144pub fn contained_in_ranges(c: char, rs: &[CodepointRange]) -> bool {
147 for r in rs.iter() {
148 if r.contains(c) {
149 return true;
150 }
151 }
152 false
153}
154
155impl fmt::Debug for CodepointRanges {
156 fn fmt<'f>(&self, f: &'f mut fmt::Formatter) -> fmt::Result {
157 write!(f, "CodepointRanges(<{} ranges>)", self.0.len())
158 }
159}
160
161impl Clone for CodepointRanges {
162 fn clone(&self) -> Self {
163 CodepointRanges(self.0)
164 }
165}
166
167impl PartialEq for CodepointRanges {
168 fn eq(&self, other: &CodepointRanges) -> bool {
169 core::ptr::eq(&self.0, &other.0)
170 }
171}
172
173#[cfg(test)]
174mod tests {
175 use super::*;
176
177 #[test]
178 fn cdata_inclusion_and_exclusion_are_equivalent() {
179 let excluder = CodepointRanges(INVALID_XML_CDATA_RANGES);
180 let includer = CodepointRanges(VALID_XML_CDATA_RANGES);
181 for cp in 0x0..=0x10ffffu32 {
182 if let Some(ch) = core::char::from_u32(cp) {
183 if !includer.select(ch) != excluder.select(ch) {
184 panic!("INVALID_XML_CDATA_RANGES and VALID_XML_CDATA_RANGES have different opinions about U+{:x}", cp)
185 }
186 }
187 }
188 }
189}