1trait StrParseExt {
2 fn end_of_start_rest<F1, F2>(&self, is_first: F1, is_rest: F2) -> Option<usize>
3 where F1: Fn(char) -> bool,
4 F2: Fn(char) -> bool;
5}
6
7impl<'a> StrParseExt for &'a str {
8 fn end_of_start_rest<F1, F2>(&self, is_first: F1, is_rest: F2) -> Option<usize>
9 where F1: Fn(char) -> bool,
10 F2: Fn(char) -> bool,
11 {
12 let mut positions = self.char_indices();
13
14 match positions.next() {
15 Some((_, c)) if is_first(c) => (),
16 Some((_, _)) => return None,
17 None => return None,
18 };
19
20 let mut positions = positions.skip_while(|&(_, c)| is_rest(c));
21 match positions.next() {
22 Some((offset, _)) => Some(offset),
23 None => Some(self.len()),
24 }
25 }
26}
27
28pub trait XmlStr {
29 fn end_of_attribute(&self, quote: &str) -> Option<usize>;
31 fn end_of_char_data(&self) -> Option<usize>;
33 fn end_of_cdata(&self) -> Option<usize>;
35 fn end_of_decimal_chars(&self) -> Option<usize>;
37 fn end_of_hex_chars(&self) -> Option<usize>;
39 fn end_of_comment(&self) -> Option<usize>;
41 fn end_of_pi_value(&self) -> Option<usize>;
43 fn end_of_name(&self) -> Option<usize>;
45 fn end_of_ncname(&self) -> Option<usize>;
47 fn end_of_space(&self) -> Option<usize>;
49 fn end_of_start_tag(&self) -> Option<usize>;
51 fn end_of_encoding(&self) -> Option<usize>;
52 fn end_of_int_subset(&self) -> Option<usize>;
54}
55
56impl<'a> XmlStr for &'a str {
57 fn end_of_attribute(&self, quote: &str) -> Option<usize> {
58 if self.len() == 0 ||
59 self.starts_with('&') ||
60 self.starts_with('<') ||
61 self.starts_with(quote)
62 {
63 return None;
64 }
65
66 let quote_char = quote.chars().next().expect("Cant have null quote");
67
68 self.find(&['&', '<', quote_char][..]).or(Some(self.len()))
69 }
70
71 fn end_of_char_data(&self) -> Option<usize> {
72 fn find_end_of_char_data(bytes: &[u8]) -> Option<usize> {
73 for (i, &b) in bytes.iter().enumerate() {
74 if b == b'<' || b == b'&' { return Some(i) }
75
76 if b == b']' && bytes[i..].starts_with(b"]]>") {
77 return Some(i)
78 }
79 }
80 None
81 }
82
83 match find_end_of_char_data(self.as_bytes()) {
84 Some(0) => None,
85 Some(v) => Some(v),
86 None => Some(self.len()),
87 }
88 }
89
90 fn end_of_cdata(&self) -> Option<usize> {
91 self.find("]]>")
92 }
93
94 fn end_of_decimal_chars(&self) -> Option<usize> {
95 self.end_of_start_rest(|c| c.is_decimal_char(),
96 |c| c.is_decimal_char())
97 }
98
99 fn end_of_hex_chars(&self) -> Option<usize> {
100 self.end_of_start_rest(|c| c.is_hex_char(),
101 |c| c.is_hex_char())
102 }
103
104 fn end_of_comment(&self) -> Option<usize> {
105 self.find("--")
109 }
110
111 fn end_of_pi_value(&self) -> Option<usize> {
112 self.find("?>")
113 }
114
115 fn end_of_name(&self) -> Option<usize> {
116 self.end_of_start_rest(|c| c.is_name_start_char(), |c| c.is_name_char())
117 }
118
119 fn end_of_ncname(&self) -> Option<usize> {
120 self.end_of_start_rest(|c| c.is_ncname_start_char(), |c| c.is_ncname_char())
121 }
122
123 fn end_of_space(&self) -> Option<usize> {
124 self.end_of_start_rest(|c| c.is_space_char(), |c| c.is_space_char())
125 }
126
127 fn end_of_start_tag(&self) -> Option<usize> {
128 let mut positions = self.char_indices();
129
130 match positions.next() {
131 Some((_, c)) if '<' == c => (),
132 _ => return None,
133 };
134
135 match positions.next() {
136 Some((offset, c)) =>
137 match c {
138 '?' | '!' | '/' => None,
139 _ => Some(offset),
140 },
141 None => Some(self.len()),
142 }
143 }
144
145 fn end_of_encoding(&self) -> Option<usize> {
146 self.end_of_start_rest(|c| c.is_encoding_start_char(), |c| c.is_encoding_rest_char())
147 }
148
149 fn end_of_int_subset(&self) -> Option<usize> { self.find("]") }
150}
151
152pub trait XmlChar {
154 fn is_name_start_char(self) -> bool;
156 fn is_name_char(self) -> bool;
158 fn is_ncname_start_char(self) -> bool;
160 fn is_ncname_char(self) -> bool;
162 fn is_space_char(self) -> bool;
164 fn is_decimal_char(self) -> bool;
165 fn is_hex_char(self) -> bool;
166 fn is_encoding_start_char(self) -> bool;
167 fn is_encoding_rest_char(self) -> bool;
168}
169
170impl XmlChar for char {
171 fn is_name_start_char(self) -> bool {
172 self == ':' || self.is_ncname_start_char()
173 }
174
175 fn is_name_char(self) -> bool {
176 self.is_name_start_char() || self.is_ncname_char()
177 }
178
179 fn is_ncname_start_char(self) -> bool {
180 match self {
181 'A'...'Z' |
182 '_' |
183 'a'...'z' |
184 '\u{0000C0}'...'\u{0000D6}' |
185 '\u{0000D8}'...'\u{0000F6}' |
186 '\u{0000F8}'...'\u{0002FF}' |
187 '\u{000370}'...'\u{00037D}' |
188 '\u{00037F}'...'\u{001FFF}' |
189 '\u{00200C}'...'\u{00200D}' |
190 '\u{002070}'...'\u{00218F}' |
191 '\u{002C00}'...'\u{002FEF}' |
192 '\u{003001}'...'\u{00D7FF}' |
193 '\u{00F900}'...'\u{00FDCF}' |
194 '\u{00FDF0}'...'\u{00FFFD}' |
195 '\u{010000}'...'\u{0EFFFF}' => true,
196 _ => false,
197 }
198 }
199
200 fn is_ncname_char(self) -> bool {
201 if self.is_ncname_start_char() { return true; }
202 match self {
203 '-' |
204 '.' |
205 '0'...'9' |
206 '\u{00B7}' |
207 '\u{0300}'...'\u{036F}' |
208 '\u{203F}'...'\u{2040}' => true,
209 _ => false
210 }
211 }
212
213 fn is_space_char(self) -> bool {
214 match self {
215 '\x20' |
216 '\x09' |
217 '\x0D' |
218 '\x0A' => true,
219 _ => false,
220 }
221 }
222
223 fn is_decimal_char(self) -> bool {
224 match self {
225 '0'...'9' => true,
226 _ => false,
227 }
228 }
229
230 fn is_hex_char(self) -> bool {
231 match self {
232 '0'...'9' |
233 'a'...'f' |
234 'A'...'F' => true,
235 _ => false,
236 }
237 }
238
239 fn is_encoding_start_char(self) -> bool {
240 match self {
241 'A'...'Z' |
242 'a'...'z' => true,
243 _ => false,
244 }
245 }
246
247 fn is_encoding_rest_char(self) -> bool {
248 match self {
249 'A'...'Z' |
250 'a'...'z' |
251 '0'...'9' |
252 '.' |
253 '_' |
254 '-' => true,
255 _ => false,
256 }
257 }
258
259}
260
261#[cfg(test)]
262mod test {
263 use super::XmlStr;
264
265 #[test]
266 fn end_of_char_data_leading_ampersand() {
267 assert_eq!("&".end_of_char_data(), None);
268 }
269
270 #[test]
271 fn end_of_char_data_leading_less_than() {
272 assert_eq!("<".end_of_char_data(), None);
273 }
274
275 #[test]
276 fn end_of_char_data_leading_cdata_end() {
277 assert_eq!("]]>".end_of_char_data(), None);
278 }
279
280 #[test]
281 fn end_of_char_data_until_ampersand() {
282 assert_eq!("hello&world".end_of_char_data(), Some("hello".len()));
283 }
284
285 #[test]
286 fn end_of_char_data_until_less_than() {
287 assert_eq!("hello<world".end_of_char_data(), Some("hello".len()));
288 }
289
290 #[test]
291 fn end_of_char_data_until_cdata_end() {
292 assert_eq!("hello]]>world".end_of_char_data(), Some("hello".len()));
293 }
294
295 #[test]
296 fn end_of_char_data_includes_right_square() {
297 assert_eq!("hello]world".end_of_char_data(), Some("hello]world".len()));
298 }
299
300 #[test]
301 fn end_of_char_data_includes_multiple_right_squares() {
302 assert_eq!("hello]]world".end_of_char_data(), Some("hello]]world".len()));
303 }
304
305 #[test]
306 fn end_of_int_subset_excludes_right_square() {
307 assert_eq!("hello]>world".end_of_int_subset(), Some("hello".len()))
308 }
309}