xml_tokens/
lib.rs

1pub mod serializer;
2
3use std::char;
4
5const XML_DECL_START: [char; 5] = ['<', '?', 'x', 'm', 'l'];
6const XML_DECL_VERSION: [char; 7] = ['v', 'e', 'r', 's', 'i', 'o', 'n'];
7const XML_DECL_VERSION_PREFIX: [char; 2] = ['1', '.'];
8const XML_DECL_ENCODING: [char; 8] = ['e', 'n', 'c', 'o', 'd', 'i', 'n', 'g'];
9const XML_DECL_STANDALONE: [char; 10] = ['s', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e'];
10const XML_DECL_END: [char; 2] = ['?', '>'];
11const YES: [char; 3] = ['y', 'e', 's'];
12const NO: [char; 2] = ['n', 'o'];
13const COMMENT_START: [char; 4] = ['<', '!', '-', '-'];
14const COMMENT_END: [char; 3] = ['-', '-', '>'];
15const CDATA_START: [char; 9] = ['<', '!', '[', 'C', 'D', 'A', 'T', 'A', '['];
16const CDATA_END: [char; 3] = [']', ']', '>'];
17const DOCTYPE_START: [char; 9] = ['<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E'];
18const DOCTYPE_END: char = STAG_END;
19const PI_START: [char; 2] = ['<', '?'];
20const PI_END: [char; 2] = XML_DECL_END;
21const EQUALS: char = '=';
22const SINGLE_QUOTE: char = '\'';
23const DOUBLE_QUOTE: char = '"';
24const STAG_START: char = '<';
25const STAG_END: char = '>';
26const ETAG_START: [char; 2] = ['<', '/'];
27const ETAG_END: char = STAG_END;
28const EMPTY_TAG_END: [char; 2] = ['/', '>'];
29const XMLNS: [char; 5] = ['x', 'm', 'l', 'n', 's'];
30const ENTITY_REFERENCE_START: char = '&';
31const HEXIDECIMAL_CHAR_REFERENCE_START: [char; 3] = ['&', '#', 'x'];
32const DECIMAL_CHAR_REFERENCE_START: [char; 2] = ['&', '#'];
33const REFERENCE_END: char = ';';
34const HYPHEN: char = '-';
35const COLON: char = ':';
36
37/// Token representing an "atomic unit" of an XML document or XML
38/// fragment.
39#[derive(Debug, PartialEq)]
40pub enum Token {
41    XMLDeclStart,
42    XMLVersion(XMLVersion),
43    XMLEncoding(EncName),
44    XMLStandalone(bool),
45    XMLDeclEnd,
46    DoctypeDeclStart,
47    DoctypeName(Name),
48    DoctypeDeclEnd,
49    Comment(Comment),
50    PIStart,
51    PITarget(PITarget),
52    PIData(PIData),
53    PIEnd,
54    ElementStart(QName),
55    ElementEmptyEnd,
56    ElementSTagEnd,
57    ElementEnd(QName),
58    AttributeStart,
59    AttributeName(QName),
60    AttributeValueStart, // TODO Deprecate????
61    AttributeValue(AttributeValue),
62    AttributeValueEnd, // TODO Deprecate?
63    AttributeEnd,
64    NamespaceStart,
65    NamespaceDefault,
66    NamespacePrefix(NCName),
67    NamespaceValue(NamespaceValue),
68    NamespaceEnd,
69    Text(Text),
70    CDATASection(CDATASection),
71    EntityRef(Name),
72    DecCharRef(DecCharRef),
73    HexCharRef(HexCharRef),
74}
75
76/// Specifies the version of XML which is being used and is contained
77/// by the `Token::XMLVersion` variant.
78///
79/// There are currently two versions: 1.0 and 1.1, the main difference
80/// being in the characters which are allowed. XML 1.0 is
81/// conservative, while XML 1.1 is liberal. See the [XML
82/// 1.0](https://www.w3.org/TR/xml) and [XML
83/// 1.1](https://www.w3.org/TR/xml11) specifications for details,
84/// particularly the ["Rationale and list of changes for XML
85/// 1.1"](https://www.w3.org/TR/xml11/#sec-xml11).
86#[derive(Debug, PartialEq)]
87pub enum XMLVersion {
88    Version1_0,
89    Version1_1,
90}
91
92/// Specifies the character encoding which is being used and is
93/// contained by the `Token::XMLEncoding` variant.
94///
95/// See the [XML 1.1
96/// specification](https://www.w3.org/TR/xml11/#NT-EncName) for
97/// details.
98#[derive(Debug, PartialEq)]
99pub struct EncName {
100    enc_name: String,
101}
102impl EncName {
103    /// Create a `EncName` without validating that `enc_name` conforms
104    /// to the [XML specification](https://www.w3.org/TR/xml11/#NT-EncName).
105    fn new_unvalidated(enc_name: String) -> EncName {
106        EncName { enc_name }
107    }
108
109    /// Returns `true` if `c` is a legal starting character of an
110    /// `EncName` according to the [XML
111    /// specification](https://www.w3.org/TR/xml11/#NT-EncName).
112    fn is_valid_start_char(c: char) -> bool {
113        match c {
114            'A'..='Z' => true,
115            'a'..='z' => true,
116            _ => false,
117        }
118    }
119
120    /// Returns `true` if `c` is a legal subsequent character of an
121    /// `EncName` according to the [XML
122    /// specification](https://www.w3.org/TR/xml11/#NT-EncName).
123    fn is_valid_char(c: char) -> bool {
124        match c {
125            'A'..='Z' => true,
126            'a'..='z' => true,
127            '0'..='9' => true,
128            '.' | '_' | '-' => true,
129            _ => false,
130        }
131    }
132
133    /// Returns the encoding name as a  `&str`.
134    pub fn get_as_str(&self) -> &str {
135        &self.enc_name
136    }
137}
138
139/// An XML Name is contained by the `Token::DoctypeName` and
140/// `Token::EntityRef` variants. Contrast this with `QName` which is defined by
141/// the XML Namespaces specification and is used by attributes and
142/// elements.
143///
144/// See the [XML 1.1
145/// specification](https://www.w3.org/TR/xml11/#NT-NameStartChar) for
146/// details.
147#[derive(Debug, PartialEq)]
148pub struct Name {
149    name: String,
150}
151impl Name {
152    /// Create a `Name` without validating that `name` conforms
153    /// to the [XML specification](https://www.w3.org/TR/xml11/#NT--NameStartChar).
154    fn new_unvalidated(name: String) -> Name {
155        Name { name }
156    }
157
158    /// Returns `true` if `c` is a legal starting character of an
159    /// `Name` according to the [XML
160    /// specification](https://www.w3.org/TR/xml11/#NT-NameStartChar).
161    fn is_valid_start_char(c: char) -> bool {
162        match c {
163            'a'..='z' => true,
164            'A'..='Z' => true,
165            ':' | '_' => true,
166            '\u{C0}'..='\u{D6}' => true,
167            '\u{D8}'..='\u{F6}' => true,
168            '\u{F8}'..='\u{2FF}' => true,
169            '\u{370}'..='\u{37D}' => true,
170            '\u{37F}'..='\u{1FFF}' => true,
171            '\u{200C}'..='\u{200D}' => true,
172            '\u{2070}'..='\u{218F}' => true,
173            '\u{2C00}'..='\u{2FEF}' => true,
174            '\u{3001}'..='\u{D7FF}' => true,
175            '\u{F900}'..='\u{FDCF}' => true,
176            '\u{FDF0}'..='\u{FFFD}' => true,
177            '\u{10000}'..='\u{EFFFF}' => true,
178            _ => false,
179        }
180    }
181
182    /// Returns `true` if `c` is a legal subsequent character for a `Name`
183    /// according to the [XML
184    /// specification](https://www.w3.org/TR/xml11/#NT-NameChar).
185    fn is_valid_char(c: char) -> bool {
186        match c {
187            'a'..='z' => true,
188            'A'..='Z' => true,
189            ':' | '_' | '-' | '.' | '\u{B7}' => true,
190            '0'..='9' => true,
191            '\u{C0}'..='\u{D6}' => true,
192            '\u{D8}'..='\u{F6}' => true,
193            '\u{F8}'..='\u{2FF}' => true,
194            '\u{300}'..='\u{37D}' => true,
195            '\u{37F}'..='\u{1FFF}' => true,
196            '\u{200C}'..='\u{200D}' => true,
197            '\u{203F}'..='\u{2040}' => true,
198            '\u{2070}'..='\u{218F}' => true,
199            '\u{2C00}'..='\u{2FEF}' => true,
200            '\u{3001}'..='\u{D7FF}' => true,
201            '\u{F900}'..='\u{FDCF}' => true,
202            '\u{FDF0}'..='\u{FFFD}' => true,
203            '\u{10000}'..='\u{EFFFF}' => true,
204            _ => false,
205        }
206    }
207
208    /// Returns the name as a  `&str`.
209    pub fn get_as_str(&self) -> &str {
210        &self.name
211    }
212}
213
214/// Represents an XML comment, contained by the `Token::Comment` variant.
215///
216/// See the [XML 1.1
217/// specification](https://www.w3.org/TR/xml11/#NT-Comment) for
218/// details.
219#[derive(Debug, PartialEq)]
220pub struct Comment {
221    comment: String,
222}
223impl Comment {
224    /// Create a `Comment` without validating that `comment` conforms
225    /// to the [XML specification](https://www.w3.org/TR/xml11/#NT-Comment).
226    fn new_unvalidated(comment: String) -> Comment {
227        Comment { comment }
228    }
229
230    /// Returns `true` if `c` is a legal character for a `Comment`
231    /// excluding the hyphen `-`. This is because the double hyphen
232    /// character sequence `--` must not appear in comments. See the
233    /// [XML specification](https://www.w3.org/TR/xml11/#NT-Comment)
234    /// for details.
235    fn is_valid_char_minus_hyphen(c: char, version: &XMLVersion) -> bool {
236        if c == HYPHEN {
237            return false;
238        }
239
240        is_xml_char(c, version)
241    }
242
243    /// Returns the comment as a `&str`.
244    pub fn get_as_str(&self) -> &str {
245        &self.comment
246    }
247}
248
249/// Check is `c` is a valid `Char` in the specified XML version.
250///
251/// See the [XML 1.0](https://www.w3.org/TR/xml11/#NT-Char) and [XML
252/// 1.1](https://www.w3.org/TR/xml11/#NT-Char) specifications for more
253/// details.
254fn is_xml_char(c: char, version: &XMLVersion) -> bool {
255    match version {
256        XMLVersion::Version1_0 => match c {
257            '\u{9}' | '\u{A}' | '\u{D}' => true,
258            '\u{20}'..='\u{D7FF}' => true,
259            '\u{E000}'..='\u{FFFD}' => true,
260            '\u{10000}'..='\u{10FFFF}' => true,
261            _ => false,
262        },
263        XMLVersion::Version1_1 => match c {
264            '\u{1}'..='\u{D7FF}' => true,
265            '\u{E000}'..='\u{FFFD}' => true,
266            '\u{10000}'..='\u{10FFFF}' => true,
267            _ => false,
268        },
269    }
270}
271
272/// Represents a processing instruction target, contained by the
273/// `Token::PITarget` variant.
274///
275/// A processing instruction target can be any `Name` with the
276/// exclusion of the reserved `xml` or any case variation thereof. See
277/// the [XML 1.1
278/// specification](https://www.w3.org/TR/xml11/#NT-PITarget) for
279/// details.
280#[derive(Debug, PartialEq)]
281pub struct PITarget {
282    target: String,
283}
284impl PITarget {
285    /// Create a `PITarget` without validating that `target` conforms
286    /// to the [XML specification](https://www.w3.org/TR/xml11/#NT-PITarget).
287    fn new_unvalidated(target: String) -> PITarget {
288        PITarget { target }
289    }
290
291    /// Returns `true` if `c` is a legal start character for a
292    /// processing instruction target.
293    fn is_valid_start_char(c: char) -> bool {
294        Name::is_valid_start_char(c)
295    }
296
297    /// Returns `true` if `c` is a legal subsequent character for a
298    /// processing instruction target.
299    fn is_valid_char(c: char) -> bool {
300        Name::is_valid_char(c)
301    }
302
303    /// Returns the target as a  `&str`.
304    pub fn get_as_str(&self) -> &str {
305        &self.target
306    }
307}
308
309/// Represents processing instruction data, contained by the
310/// `Token::PIData` variant.
311///
312/// See the [XML 1.1
313/// specification](https://www.w3.org/TR/xml11/#NT-PI) for details.
314#[derive(Debug, PartialEq)]
315pub struct PIData {
316    data: String,
317}
318impl PIData {
319    /// Create a `PIData` without validating that `data` conforms
320    /// to the [XML specification](https://www.w3.org/TR/xml11/#NT-PI).
321    pub fn new_unvalidated(data: String) -> PIData {
322        PIData { data }
323    }
324
325    /// Returns `true` if `c` is a legal start character for
326    /// processing instruction data. Processing instruction data, when
327    /// it exists must start with whitespace. See the [XML 1.1
328    /// specification](https://www.w3.org/TR/xml11/#NT-PI) for details.
329    fn is_valid_start_char(c: char) -> bool {
330        is_whitespace(c)
331    }
332
333    /// Returns `true` if `c` is a legal subsequent character for
334    /// processing instruction data. Processing instruction data must
335    /// not include the closing character sequence '?>', but this is
336    /// not checked by this method.
337    fn is_valid_char(c: char, version: &XMLVersion) -> bool {
338        is_xml_char(c, version)
339    }
340
341    /// Returns the data as a  `&str`.
342    pub fn get_as_str(&self) -> &str {
343        &self.data
344    }
345}
346
347/// Return `true` if `c` is a whitespace character according to the
348/// XML 1.0 and [XML 1.1](https://www.w3.org/TR/xml11/#NT-S) specifications.
349fn is_whitespace(c: char) -> bool {
350    match c {
351        '\u{9}' | '\u{A}' | '\u{D}' | '\u{20}' => true,
352        _ => false,
353    }
354}
355
356/// An XML QName is contained by the `Token::ElementStart`, `Token::ElementEnd` and
357/// `Token::AttributeName` variants.
358///
359/// See the ["Namespaces in XML 1.1"
360/// specification](https://www.w3.org/TR/xml-names11/) for details.
361#[derive(Debug, PartialEq)]
362pub struct QName {
363    prefix: Option<String>,
364    local_part: String,
365}
366impl QName {
367    /// Create a `QName` without validating that `prefix` and
368    /// `local_part` conform to the ["Namespaces in XML 1.1"
369    /// specification](https://www.w3.org/TR/xml-names11/).
370    fn new_unvalidated(prefix: Option<String>, local_part: String) -> QName {
371        QName { prefix, local_part }
372    }
373
374    /// Returns `true` if `c` is a legal `NCNameStartChar` as defined
375    /// by the ["Namespaces in XML 1.1"
376    /// specification](https://www.w3.org/TR/xml-names11/#NT-NameStartChar).
377    fn is_valid_start_char(c: char) -> bool {
378        NCName::is_valid_start_char(c)
379    }
380
381    /// Returns `true` if `c` is a legal `NCNameChar` as defined by
382    /// the ["Namespaces in XML 1.1"
383    /// specification](https://www.w3.org/TR/xml11/#NT-NameChar).
384    fn is_valid_char(c: char) -> bool {
385        NCName::is_valid_char(c)
386    }
387
388    /// Returns the qname prefix as a `&str`.
389    pub fn get_prefix_as_str(&self) -> Option<&str> {
390        match &self.prefix {
391            Some(prefix) => Some(&prefix),
392            None => None,
393        }
394    }
395
396    /// Returns the qname local_part as a `&str`.
397    pub fn get_local_part_as_str(&self) -> &str {
398        &self.local_part
399    }
400}
401
402/// An `AttributeValue` is contained by the `Token::AttributeValue` variants.
403///
404/// See the [XML 1.1 specification](https://www.w3.org/TR/xml11/#NT-AttValue) for details.
405#[derive(Debug, PartialEq)]
406pub struct AttributeValue {
407    value: String,
408}
409impl AttributeValue {
410    /// Create a `AttributeValue` without validating that `value`
411    /// conforms to the [XML 1.1
412    /// specification](https://www.w3.org/TR/xml11/#NT-AttValue).
413    fn new_unvalidated(value: String) -> AttributeValue {
414        AttributeValue { value }
415    }
416
417    /// Returns `true` if `c` is a legal `AttributeValue` character
418    /// when it is inside single quotes.
419    fn is_valid_inside_single_quotes_char(c: char) -> bool {
420        match c {
421            STAG_START | ENTITY_REFERENCE_START | SINGLE_QUOTE => false,
422            _ => true,
423        }
424    }
425
426    /// Returns `true` if `c` is a legal `AttributeValue` character
427    /// when it is inside double quotes.
428    fn is_valid_inside_double_quotes_char(c: char) -> bool {
429        match c {
430            STAG_START | ENTITY_REFERENCE_START | DOUBLE_QUOTE => false,
431            _ => true,
432        }
433    }
434
435    /// Returns the attribute value as a `&str`.
436    pub fn get_as_str(&self) -> &str {
437        &self.value
438    }
439}
440
441/// An `NCName` is contained by the `Token::NamespacePrefix` variant.
442///
443/// See the ["Namespaces in XML 1.1"
444/// specification](https://www.w3.org/TR/xml-names11/#NT-NCName) for
445/// details.
446#[derive(Debug, PartialEq)]
447pub struct NCName {
448    nc_name: String,
449}
450impl NCName {
451    /// Create a `NCName` without validating that `nc_name` conform to
452    /// the ["Namespaces in XML 1.1"
453    /// specification](https://www.w3.org/TR/xml-names11/#NT-NCName).
454    fn new_unvalidated(nc_name: String) -> NCName {
455        NCName { nc_name }
456    }
457
458    /// Returns `true` if `c` is a legal `NCNameStartChar` as defined
459    /// by the ["Namespaces in XML 1.1"
460    /// specification](https://www.w3.org/TR/xml-names11/#NT-NameStartChar).
461    fn is_valid_start_char(c: char) -> bool {
462        match c {
463            'a'..='z' => true,
464            'A'..='Z' => true,
465            '_' => true,
466            '\u{C0}'..='\u{D6}' => true,
467            '\u{D8}'..='\u{F6}' => true,
468            '\u{F8}'..='\u{2FF}' => true,
469            '\u{370}'..='\u{37D}' => true,
470            '\u{37F}'..='\u{1FFF}' => true,
471            '\u{200C}'..='\u{200D}' => true,
472            '\u{2070}'..='\u{218F}' => true,
473            '\u{2C00}'..='\u{2FEF}' => true,
474            '\u{3001}'..='\u{D7FF}' => true,
475            '\u{F900}'..='\u{FDCF}' => true,
476            '\u{FDF0}'..='\u{FFFD}' => true,
477            '\u{10000}'..='\u{EFFFF}' => true,
478            _ => false,
479        }
480    }
481
482    /// Returns `true` if `c` is a legal `NCNameChar` as defined by
483    /// the ["Namespaces in XML 1.1"
484    /// specification](https://www.w3.org/TR/xml11/#NT-NameChar).
485    fn is_valid_char(c: char) -> bool {
486        match c {
487            'a'..='z' => true,
488            'A'..='Z' => true,
489            '_' | '-' | '.' | '\u{B7}' => true,
490            '0'..='9' => true,
491            '\u{C0}'..='\u{D6}' => true,
492            '\u{D8}'..='\u{F6}' => true,
493            '\u{F8}'..='\u{2FF}' => true,
494            '\u{300}'..='\u{37D}' => true,
495            '\u{37F}'..='\u{1FFF}' => true,
496            '\u{200C}'..='\u{200D}' => true,
497            '\u{203F}'..='\u{2040}' => true,
498            '\u{2070}'..='\u{218F}' => true,
499            '\u{2C00}'..='\u{2FEF}' => true,
500            '\u{3001}'..='\u{D7FF}' => true,
501            '\u{F900}'..='\u{FDCF}' => true,
502            '\u{FDF0}'..='\u{FFFD}' => true,
503            '\u{10000}'..='\u{EFFFF}' => true,
504            _ => false,
505        }
506    }
507
508    /// Returns the data as a  `&str`.
509    pub fn get_as_str(&self) -> &str {
510        &self.nc_name
511    }
512}
513
514/// A `NamespaceValue` is contained by the `Token::NamespaceValue`
515/// variant. In XML 1.0 the contained value must be a URI, in XML 1.1
516/// the contained value must be an IRI.
517#[derive(Debug, PartialEq)]
518pub struct NamespaceValue {
519    value: String,
520}
521impl NamespaceValue {
522    /// Create a `NamespaceValue` without validating that `value`
523    /// conforms to a URI (for XML 1.0) or an IRI (for XML 1.1).
524    fn new_unvalidated(value: String) -> NamespaceValue {
525        NamespaceValue { value }
526    }
527
528    /// Returns `true` if `version` corresponds to XML 1.0 and `c` is
529    /// a legal `URI` character. If version corresponds to XML 1.1
530    /// then `c` is checked to see if it is a legal `IRI` character.
531    fn is_valid_char(c: char, version: &XMLVersion) -> bool {
532        match version {
533            XMLVersion::Version1_0 => match c {
534                // reserved gen-delims
535                ':' | '/' | '?' | '#' | '[' | ']' | '@' => true,
536                // reserved sub-delims
537                '!' | '$' | '&' | '\'' | '(' | ')' | '*' | '+' | ',' | ';' | '=' => true,
538                // unreserved
539                'a'..='z' => true,
540                'A'..='Z' => true,
541                '0'..='9' => true,
542                '-' | '.' | '_' | '~' => true,
543                _ => false,
544            },
545            XMLVersion::Version1_1 => match c {
546                // reserved gen-delims
547                ':' | '/' | '?' | '#' | '[' | ']' | '@' => true,
548                // reserved sub-delims
549                '!' | '$' | '&' | '\'' | '(' | ')' | '*' | '+' | ',' | ';' | '=' => true,
550                // unreserved
551                'a'..='z' => true,
552                'A'..='Z' => true,
553                '0'..='9' => true,
554                '-' | '.' | '_' | '~' => true,
555                // usc char
556                '\u{A0}'..='\u{D7FF}' => true,
557                '\u{F900}'..='\u{FDCF}' => true,
558                '\u{FDF0}'..='\u{FFEF}' => true,
559                '\u{10000}'..='\u{1FFFD}' => true,
560                '\u{20000}'..='\u{2FFFD}' => true,
561                '\u{30000}'..='\u{3FFFD}' => true,
562                '\u{40000}'..='\u{4FFFD}' => true,
563                '\u{50000}'..='\u{5FFFD}' => true,
564                '\u{60000}'..='\u{6FFFD}' => true,
565                '\u{70000}'..='\u{7FFFD}' => true,
566                '\u{80000}'..='\u{8FFFD}' => true,
567                '\u{90000}'..='\u{9FFFD}' => true,
568                '\u{A0000}'..='\u{AFFFD}' => true,
569                '\u{B0000}'..='\u{BFFFD}' => true,
570                '\u{C0000}'..='\u{CFFFD}' => true,
571                '\u{D0000}'..='\u{DFFFD}' => true,
572                '\u{E1000}'..='\u{EFFFD}' => true,
573                // iprivate
574                '\u{E000}'..='\u{F8FF}' => true,
575                '\u{F0000}'..='\u{FFFFD}' => true,
576                '\u{100000}'..='\u{10FFFD}' => true,
577                _ => false,
578            },
579        }
580    }
581
582    /// Returns the value as a `&str`.
583    pub fn get_as_str(&self) -> &str {
584        &self.value
585    }
586}
587
588/// A `Text` is contained by the `Token::Text` variant.
589///
590/// See the [XML 1.1
591/// specification](https://www.w3.org/TR/xml11/#dt-chardata) for
592/// details.
593#[derive(Debug, PartialEq)]
594pub struct Text {
595    text: String,
596}
597impl Text {
598    /// Create a `Text` without validating that `text`
599    /// conforms to the [XML 1.1
600    /// specification](https://www.w3.org/TR/xml11/#dt-chardata).
601    fn new_unvalidated(text: String) -> Text {
602        Text { text }
603    }
604
605    /// Returns `true` if `c` is a legal `Text` character.
606    fn is_valid_char(c: char) -> bool {
607        // TODO what about RestrictedCharacters and compatability
608        // characters see XML 1.1 spec see
609        // https://www.w3.org/TR/xml11/#NT-RestrictedChar
610        match c {
611            STAG_START | ENTITY_REFERENCE_START => false,
612            _ => true,
613        }
614    }
615
616    /// Returns the text as a `&str`.
617    pub fn get_as_str(&self) -> &str {
618        &self.text
619    }
620
621    /// Return a space normalized `String` where whitespace is trimmed
622    /// from the head and tail, and duplicate whitespace is replaced
623    /// with a single space character in the body.
624    pub fn normalize_space(&self) -> String {
625        let collection: Vec<&str> = self.text.split_whitespace().collect();
626        collection.join(" ")
627    }
628
629    /// Return a space deduplicated `String` where duplicate
630    /// whitespace is replaced with a single space character.
631    pub fn deduplicate_whitespace(&self) -> String {
632        let normalized_space = self.normalize_space();
633
634        if self.text.len() == 0 {
635            return normalized_space;
636        } else if normalized_space.len() == 0 {
637            return String::from(" ");
638        }
639
640        let whitespace_head: bool;
641        if is_whitespace(self.text.chars().next().unwrap()) {
642            whitespace_head = true;
643        } else {
644            whitespace_head = false;
645        }
646
647        let whitespace_tail: bool;
648        if is_whitespace(self.text.chars().last().unwrap()) {
649            whitespace_tail = true;
650        } else {
651            whitespace_tail = false;
652        }
653
654        if whitespace_head && whitespace_tail {
655            return format!(" {} ", normalized_space);
656        } else if whitespace_head {
657            return format!(" {}", normalized_space);
658        } else if whitespace_tail {
659            return format!("{} ", normalized_space);
660        } else {
661            return normalized_space;
662        }
663    }
664
665    /// Return a space normalized `String` but whitespace is only
666    /// deduplicated from the head, rather than trimmed entirley.
667    pub fn normalize_space_deduplicate_head(&self) -> String {
668        let normalized_space = self.normalize_space();
669
670        if self.text.len() == 0 {
671            return normalized_space;
672        } else if normalized_space.len() == 0 {
673            return String::from(" ");
674        }
675
676        if is_whitespace(self.text.chars().next().unwrap()) {
677            return format!(" {}", normalized_space);
678        } else {
679            return normalized_space;
680        }
681    }
682
683    /// Return a space normalized `String` but whitespace is only
684    /// deduplicated from the tail, rather than trimmed entirley.
685    pub fn normalize_space_deduplicate_tail(&self) -> String {
686        let normalized_space = self.normalize_space();
687
688        if self.text.len() == 0 {
689            return normalized_space;
690        } else if normalized_space.len() == 0 {
691            return String::from(" ");
692        }
693
694        if is_whitespace(self.text.chars().last().unwrap()) {
695            return format!("{} ", normalized_space);
696        } else {
697            return normalized_space;
698        }
699    }
700}
701#[cfg(test)]
702mod text_tests {
703    use super::*;
704
705    #[test]
706    fn normalize_space() {
707        let text = Text::new_unvalidated(String::from("  a   b    c     "));
708        assert_eq!(text.normalize_space(), String::from("a b c"));
709    }
710
711    #[test]
712    fn deduplicate_whitespace() {
713        let text = Text::new_unvalidated(String::from("  a   b    c     "));
714        assert_eq!(text.deduplicate_whitespace(), String::from(" a b c "));
715    }
716
717    #[test]
718    fn normalize_space_deduplicate_head() {
719        let text = Text::new_unvalidated(String::from("  a   b    c     "));
720        assert_eq!(
721            text.normalize_space_deduplicate_head(),
722            String::from(" a b c")
723        );
724    }
725
726    #[test]
727    fn normalize_space_deduplicate_tail() {
728        let text = Text::new_unvalidated(String::from("  a   b    c     "));
729        assert_eq!(
730            text.normalize_space_deduplicate_tail(),
731            String::from("a b c ")
732        );
733    }
734}
735
736/// A `CDATASection` is contained by the `Token::CDATASection` variant.
737///
738/// See the [XML 1.1
739/// specification](https://www.w3.org/TR/xml11/#NT-CDSect) for
740/// details.
741#[derive(Debug, PartialEq)]
742pub struct CDATASection {
743    data: String,
744}
745impl CDATASection {
746    /// Create a `CDATASection` without validating that `data`
747    /// conforms to the [XML 1.1
748    /// specification](https://www.w3.org/TR/xml11/#NT-CDSect).
749    fn new_unvalidated(data: String) -> CDATASection {
750        CDATASection { data }
751    }
752
753    /// Returns `true` if `c` is a legal `CDATASection` character. A
754    /// `CDATASection` must not contain the ending sequence ']]>',
755    /// this is not checked by this function.
756    fn is_valid_char(c: char, version: &XMLVersion) -> bool {
757        is_xml_char(c, version)
758    }
759
760    /// Returns the CDATA as a `&str`.
761    pub fn get_as_str(&self) -> &str {
762        &self.data
763    }
764}
765
766/// A `DecCharRef` is contained by the `Token::DecCharRef` variant.
767///
768/// See the [XML 1.1
769/// specification](https://www.w3.org/TR/xml11/#NT-CharRef) for
770/// details.
771#[derive(Debug, PartialEq)]
772pub struct DecCharRef {
773    character: char,
774}
775impl DecCharRef {
776    /// Create a `DecCharRef` without validating that `character`
777    /// conforms to the [XML 1.1
778    /// specification](https://www.w3.org/TR/xml11/#NT-CharRef).    
779    fn new_unvalidated(character: char) -> DecCharRef {
780        DecCharRef { character }
781    }
782
783    /// Create a `DecCharRef` from a `dec_code` string which
784    /// corresponds to the decimal representation of the unicode code
785    /// point ot the desired character, which must match a legal
786    /// `Char` in the specified XML version.
787    pub fn new_from_string(
788        dec_code: String,
789        version: &XMLVersion,
790    ) -> Result<DecCharRef, ParseTokenError> {
791        match dec_code.parse::<u32>() {
792            Ok(u32_value) => match char::from_u32(u32_value) {
793                Some(c) => {
794                    if is_xml_char(c, version) {
795                        return Ok(DecCharRef::new_unvalidated(c));
796                    }
797                }
798                None => {}
799            },
800            _ => {}
801        }
802
803        Err(ParseTokenError::new(ParseTokenErrorKind::DecCharRef))
804    }
805
806    /// Return the character as a `char` value.
807    pub fn get_as_char(&self) -> char {
808        self.character
809    }
810
811    /// Return the character as a `u32` value.
812    pub fn get_as_u32(&self) -> u32 {
813        self.character as u32
814    }
815}
816#[cfg(test)]
817mod dec_char_ref_tests {
818    use super::*;
819
820    #[test]
821    fn new_from_string_test() {
822        let result = DecCharRef::new_from_string(String::from("169"), &XMLVersion::Version1_0);
823        match result {
824            Ok(dec_char_ref) => {
825                assert_eq!(dec_char_ref.get_as_char(), '©');
826            }
827            Err(_error) => assert!(false),
828        }
829    }
830}
831
832/// A `HexCharRef` is contained by the `Token::HexCharRef` variant.
833///
834/// See the [XML 1.1
835/// specification](https://www.w3.org/TR/xml11/#NT-CharRef) for
836/// details.
837#[derive(Debug, PartialEq)]
838pub struct HexCharRef {
839    character: char,
840}
841impl HexCharRef {
842    /// Create a `HexCharRef` without validating that `character`
843    /// conforms to the [XML 1.1
844    /// specification](https://www.w3.org/TR/xml11/#NT-CharRef).    
845    fn new_unvalidated(character: char) -> HexCharRef {
846        HexCharRef { character }
847    }
848
849    /// Create a `HexCharRef` from a `hex_code` string which
850    /// corresponds to the hexidecimal representation of the unicode code
851    /// point ot the desired character, which must match a legal
852    /// `Char` in the specified XML version.
853    pub fn new_from_string(
854        hex_code: String,
855        version: &XMLVersion,
856    ) -> Result<HexCharRef, ParseTokenError> {
857        match u32::from_str_radix(&hex_code, 16) {
858            Ok(u32_value) => match char::from_u32(u32_value) {
859                Some(c) => {
860                    if is_xml_char(c, version) {
861                        return Ok(HexCharRef::new_unvalidated(c));
862                    }
863                }
864                None => {}
865            },
866            _ => {}
867        }
868
869        Err(ParseTokenError::new(ParseTokenErrorKind::HexCharRef))
870    }
871
872    /// Return the character as a `char` value.
873    pub fn get_as_char(&self) -> char {
874        self.character
875    }
876
877    /// Return the character as a `u32` value.
878    pub fn get_as_u32(&self) -> u32 {
879        self.character as u32
880    }
881}
882#[cfg(test)]
883mod hex_char_ref_tests {
884    use super::*;
885
886    #[test]
887    fn new_from_string_test() {
888        let result = HexCharRef::new_from_string(String::from("1f61e"), &XMLVersion::Version1_0);
889        match result {
890            Ok(hex_char_ref) => {
891                assert_eq!(hex_char_ref.get_as_char(), '😞');
892            }
893            Err(_error) => assert!(false),
894        }
895    }
896}
897
898pub struct Tokenizer {
899    c: Vec<char>,
900    i: usize,
901    length: usize,
902    span_start: usize,
903    pub tokens: Vec<Token>,
904    version: XMLVersion,
905    warning_messages: Vec<String>,
906    pub error_messages: Vec<String>,
907    error: bool,
908}
909impl Tokenizer {
910    pub fn new(xml: String) -> Tokenizer {
911        let c: Vec<char> = xml.chars().collect();
912        let length = c.len();
913
914        Tokenizer {
915            c: c,
916            i: 0,
917            length: length,
918            span_start: 0,
919            tokens: Vec::new(),
920            version: XMLVersion::Version1_0,
921            warning_messages: Vec::new(),
922            error_messages: Vec::new(),
923            error: false,
924        }
925    }
926
927    pub fn tokenize_document(&mut self) -> bool {
928        self.munch_document()
929    }
930
931    fn munch_document(&mut self) -> bool {
932        self.munch_prolog() && self.munch_element() && self.munch_misc_asterisk()
933    }
934
935    fn munch_prolog(&mut self) -> bool {
936        self.munch_xml_decl_eroteme()
937            && self.munch_misc_asterisk()
938            && self.munch_doctypedecl_misc_asterisk_eroteme()
939    }
940
941    fn munch_xml_decl_eroteme(&mut self) -> bool {
942        self.munch_xml_decl();
943
944        !self.error
945    }
946
947    fn munch_xml_decl(&mut self) -> bool {
948        if !self.munch_xml_decl_start() {
949            return false;
950        }
951
952        if !self.munch_version_info() {
953            self.error("An XML declaration must have a version attribute.");
954            return false;
955        }
956
957        self.munch_encoding_decl();
958        if self.error {
959            return false;
960        }
961
962        self.munch_sd_decl();
963        if self.error {
964            return false;
965        }
966
967        self.munch_s_eroteme();
968
969        if !self.munch_xml_decl_end() {
970            self.error("An XML declaration must end with '?>'.");
971            return false;
972        } else {
973            return true;
974        }
975    }
976
977    fn munch_xml_decl_start(&mut self) -> bool {
978        if self.munch_sequence(&XML_DECL_START) {
979            self.tokens.push(Token::XMLDeclStart);
980            return true;
981        }
982
983        false
984    }
985
986    fn munch_version_info(&mut self) -> bool {
987        self.munch_s();
988
989        if !self.munch_version() {
990            return false;
991        }
992
993        if !self.munch_eq() {
994            self.error("Expected an '=' after version attribute name in XML declaration.");
995            return false;
996        }
997
998        let double_quotes: bool;
999        if self.munch_double_quote() {
1000            double_quotes = true;
1001        } else if self.munch_single_quote() {
1002            double_quotes = false;
1003        } else {
1004            self.error("Expected a single or double quote.");
1005            return false;
1006        }
1007
1008        if !self.munch_version_num() {
1009            self.error("Expected legal version number in XML declaration.");
1010        }
1011
1012        if double_quotes && !self.munch_double_quote() {
1013            self.error("Expected closing double quote following version value in XML declaration.");
1014            return false;
1015        }
1016
1017        if !double_quotes && !self.munch_single_quote() {
1018            self.error("Expected closing single quote following version value in XML declaration.");
1019            return false;
1020        }
1021
1022        true
1023    }
1024
1025    fn munch_s(&mut self) -> bool {
1026        if self.i < self.length && is_whitespace(self.c[self.i]) {
1027            self.i += 1;
1028
1029            return self.munch_s_eroteme();
1030        }
1031
1032        false
1033    }
1034
1035    fn munch_s_eroteme(&mut self) -> bool {
1036        while self.i < self.length && is_whitespace(self.c[self.i]) {
1037            self.i += 1;
1038        }
1039
1040        true
1041    }
1042
1043    fn munch_version(&mut self) -> bool {
1044        self.munch_sequence(&XML_DECL_VERSION)
1045    }
1046
1047    fn munch_eq(&mut self) -> bool {
1048        self.munch_s_eroteme() && self.munch_character(EQUALS) && self.munch_s_eroteme()
1049    }
1050
1051    fn munch_single_quote(&mut self) -> bool {
1052        self.munch_character(SINGLE_QUOTE)
1053    }
1054
1055    fn munch_double_quote(&mut self) -> bool {
1056        self.munch_character(DOUBLE_QUOTE)
1057    }
1058
1059    fn munch_version_num(&mut self) -> bool {
1060        if self.munch_sequence(&XML_DECL_VERSION_PREFIX) {
1061            self.start_span();
1062
1063            if self.munch_digits() {
1064                let span = self.get_span(0);
1065
1066                match span.as_ref() {
1067                    "0" => {
1068                        self.version = XMLVersion::Version1_0;
1069                        self.tokens.push(Token::XMLVersion(XMLVersion::Version1_0));
1070                        return true;
1071                    }
1072                    "1" => {
1073                        self.version = XMLVersion::Version1_1;
1074                        self.tokens.push(Token::XMLVersion(XMLVersion::Version1_1));
1075                        return true;
1076                    }
1077                    _ => {
1078                        self.version = XMLVersion::Version1_1;
1079                        self.warning(&format!(
1080                            "Unknown XML version 1.{} tokenizing as if it were version '1.1'.",
1081                            span
1082                        ));
1083                    }
1084                }
1085            }
1086        }
1087
1088        false
1089    }
1090
1091    fn munch_digits(&mut self) -> bool {
1092        if self.i < self.length && is_digit(self.c[self.i]) {
1093            self.i += 1;
1094
1095            while self.i < self.length && is_digit(self.c[self.i]) {
1096                self.i += 1;
1097            }
1098
1099            return true;
1100        }
1101
1102        false
1103    }
1104
1105    fn munch_encoding_decl(&mut self) -> bool {
1106        self.munch_s();
1107
1108        if !self.munch_encoding() {
1109            return false;
1110        }
1111
1112        if !self.munch_eq() {
1113            self.error("Expected an '=' after encoding attribute name in XML declaration.");
1114            return false;
1115        }
1116
1117        let double_quotes: bool;
1118        if self.munch_double_quote() {
1119            double_quotes = true;
1120        } else if self.munch_single_quote() {
1121            double_quotes = false;
1122        } else {
1123            self.error("Expected a single or double quote.");
1124            return false;
1125        }
1126
1127        if !self.munch_enc_name() {
1128            self.error("Expected legal encoding value in XML declaration.");
1129            return false;
1130        }
1131
1132        if double_quotes && !self.munch_double_quote() {
1133            self.error(
1134                "Expected closing double quote following encoding value in XML declaration.",
1135            );
1136            return false;
1137        }
1138
1139        if !double_quotes && !self.munch_single_quote() {
1140            self.error(
1141                "Expected closing single quote following encoding value in XML declaration.",
1142            );
1143            return false;
1144        }
1145
1146        true
1147    }
1148
1149    fn munch_encoding(&mut self) -> bool {
1150        self.munch_sequence(&XML_DECL_ENCODING)
1151    }
1152
1153    fn munch_enc_name(&mut self) -> bool {
1154        if self.i < self.length && EncName::is_valid_start_char(self.c[self.i]) {
1155            self.start_span();
1156            self.i += 1;
1157
1158            while self.i < self.length && EncName::is_valid_char(self.c[self.i]) {
1159                self.i += 1;
1160            }
1161
1162            let span = self.get_span(0);
1163
1164            self.tokens
1165                .push(Token::XMLEncoding(EncName::new_unvalidated(span)));
1166
1167            return true;
1168        }
1169
1170        false
1171    }
1172
1173    fn munch_sd_decl(&mut self) -> bool {
1174        self.munch_s();
1175
1176        if !self.munch_standalone() {
1177            return false;
1178        }
1179
1180        if !self.munch_eq() {
1181            self.error("Expected an '=' after standalone attribute name in XML declaration.");
1182            return false;
1183        }
1184
1185        let double_quotes: bool;
1186        if self.munch_double_quote() {
1187            double_quotes = true;
1188        } else if self.munch_single_quote() {
1189            double_quotes = false;
1190        } else {
1191            self.error("Expected a single or double quote.");
1192            return false;
1193        }
1194
1195        if !self.munch_yes_no() {
1196            self.error("Expected yes or no for standalone value in XML declaration.");
1197            return false;
1198        }
1199
1200        if double_quotes && !self.munch_double_quote() {
1201            self.error(
1202                "Expected closing double quote following standalone value in XML declaration.",
1203            );
1204            return false;
1205        }
1206
1207        if !double_quotes && !self.munch_single_quote() {
1208            self.error(
1209                "Expected closing single quote following standalone value in XML declaration.",
1210            );
1211            return false;
1212        }
1213
1214        true
1215    }
1216
1217    fn munch_standalone(&mut self) -> bool {
1218        self.munch_sequence(&XML_DECL_STANDALONE)
1219    }
1220
1221    fn munch_yes_no(&mut self) -> bool {
1222        if self.munch_sequence(&YES) {
1223            self.tokens.push(Token::XMLStandalone(true));
1224        } else if self.munch_sequence(&NO) {
1225            self.tokens.push(Token::XMLStandalone(false));
1226        } else {
1227            return false;
1228        }
1229
1230        true
1231    }
1232
1233    fn munch_xml_decl_end(&mut self) -> bool {
1234        if self.munch_sequence(&XML_DECL_END) {
1235            self.tokens.push(Token::XMLDeclEnd);
1236            return true;
1237        }
1238
1239        false
1240    }
1241
1242    fn munch_misc_asterisk(&mut self) -> bool {
1243        while self.i < self.length {
1244            if !self.munch_misc() {
1245                break;
1246            }
1247        }
1248
1249        !self.error
1250    }
1251
1252    fn munch_misc(&mut self) -> bool {
1253        if self.munch_comment() {
1254            return !self.error;
1255        }
1256
1257        if self.munch_pi() {
1258            return !self.error;
1259        }
1260
1261        if self.munch_s() {
1262            return !self.error;
1263        }
1264
1265        false
1266    }
1267
1268    fn munch_comment(&mut self) -> bool {
1269        if !self.munch_sequence(&COMMENT_START) {
1270            return false;
1271        }
1272        self.start_span();
1273
1274        while self.i < self.length {
1275            if Comment::is_valid_char_minus_hyphen(self.c[self.i], &self.version) {
1276                self.i += 1;
1277                continue;
1278            } else if self.munch_sequence(&COMMENT_END) {
1279                let span = self.get_span(COMMENT_END.len());
1280                self.tokens
1281                    .push(Token::Comment(Comment::new_unvalidated(span)));
1282                return true;
1283            } else if self.i + 1 < self.length
1284                && self.c[self.i] == HYPHEN
1285                && Comment::is_valid_char_minus_hyphen(self.c[self.i + 1], &self.version)
1286            {
1287                self.i += 1;
1288                continue;
1289            } else {
1290                self.error("Illegal character in comment.");
1291                return false;
1292            }
1293        }
1294
1295        self.error("Comment must end with the character sequence '-->'.");
1296        return false;
1297    }
1298
1299    fn munch_pi(&mut self) -> bool {
1300        if self.munch_sequence(&PI_START) {
1301            self.tokens.push(Token::PIStart);
1302        } else {
1303            return false;
1304        }
1305
1306        if !self.munch_pi_target() {
1307            return false;
1308        }
1309
1310        return self.munch_pi_data();
1311    }
1312
1313    fn munch_pi_target(&mut self) -> bool {
1314        if PITarget::is_valid_start_char(self.c[self.i]) {
1315            self.start_span();
1316            self.i += 1;
1317
1318            while self.i < self.length && PITarget::is_valid_char(self.c[self.i]) {
1319                self.i += 1;
1320            }
1321
1322            let pi_target = self.get_span(0);
1323            if pi_target.to_lowercase() == "xml" {
1324                self.error("Illegal processing instruction target. The string 'xml' and all case variations are reserved.");
1325                return false;
1326            }
1327            self.tokens
1328                .push(Token::PITarget(PITarget::new_unvalidated(pi_target)));
1329            return true;
1330        }
1331
1332        self.error("Illegal start character for processing instruction target.");
1333        false
1334    }
1335
1336    fn munch_pi_data(&mut self) -> bool {
1337        if self.munch_sequence(&PI_END) {
1338            self.tokens.push(Token::PIEnd);
1339            return true;
1340        } else if PIData::is_valid_start_char(self.c[self.i]) {
1341            self.start_span();
1342            self.i += 1;
1343
1344            while self.i < self.length {
1345                if self.munch_sequence(&PI_END) {
1346                    let pi_data = self.get_span(PI_END.len());
1347                    self.tokens
1348                        .push(Token::PIData(PIData::new_unvalidated(pi_data)));
1349                    self.tokens.push(Token::PIEnd);
1350                    return true;
1351                } else if PIData::is_valid_char(self.c[self.i], &self.version) {
1352                    self.i += 1;
1353                } else {
1354                    self.error("Illegal character in processing instruction data.");
1355                    return false;
1356                }
1357            }
1358
1359            self.error("Processing instruction must end with the '?>' character sequence.");
1360            return false;
1361        } else {
1362            self.error("Illegal start character in processing instruction data.");
1363            false
1364        }
1365    }
1366
1367    fn munch_doctypedecl_misc_asterisk_eroteme(&mut self) -> bool {
1368        if !self.munch_doctypedecl() {
1369            return !self.error;
1370        }
1371
1372        return self.munch_misc_asterisk();
1373    }
1374
1375    fn munch_doctypedecl(&mut self) -> bool {
1376        if !self.munch_sequence(&DOCTYPE_START) {
1377            return false;
1378        }
1379
1380        if !self.munch_s() {
1381            self.error("Doctypedecl must have an S following Doctype start.");
1382            return false;
1383        }
1384
1385        if !self.munch_doctype_name() {
1386            self.error("Doctypedecl must have a doctype name.");
1387            return false;
1388        }
1389
1390        //TODO 	'<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>'
1391        // TODO ExternamID, intSubset, etc
1392
1393        if self.munch_character(DOCTYPE_END) {
1394            self.tokens.push(Token::DoctypeDeclEnd);
1395            return true;
1396        } else {
1397            self.error("Doctypedecl must end in a '>' character.");
1398            return false;
1399        }
1400    }
1401
1402    fn munch_doctype_name(&mut self) -> bool {
1403        if Name::is_valid_start_char(self.c[self.i]) {
1404            self.start_span();
1405            self.i += 1;
1406
1407            while self.i < self.length {
1408                if Name::is_valid_char(self.c[self.i]) {
1409                    self.i += 1;
1410                } else {
1411                    let span = self.get_span(0);
1412                    self.tokens.push(Token::DoctypeDeclStart);
1413                    self.tokens.push(Token::DoctypeName(Name { name: span }));
1414                    return true;
1415                }
1416            }
1417
1418            self.error("Doctype declaration must end with a '>' character.");
1419            return false;
1420        }
1421
1422        self.error("Illegal first character of doctype name.");
1423        false
1424    }
1425
1426    fn munch_element(&mut self) -> bool {
1427        if !self.munch_character(STAG_START) {
1428            return false;
1429        }
1430
1431        if !self.munch_element_name() {
1432            self.error("Expected an element name.");
1433            return false;
1434        }
1435
1436        self.munch_s_attibute_asterisk();
1437        if self.error {
1438            return false;
1439        }
1440
1441        if self.munch_empty_element_end() {
1442            return !self.error;
1443        }
1444
1445        if self.munch_character(STAG_END) {
1446            self.tokens.push(Token::ElementSTagEnd);
1447        } else {
1448            self.error("Expected end of STag.");
1449            return false;
1450        }
1451
1452        return self.munch_content();
1453    }
1454
1455    fn munch_element_name(&mut self) -> bool {
1456        if QName::is_valid_start_char(self.c[self.i]) {
1457            self.start_span();
1458            self.i += 1;
1459            let mut prefix_defined = false;
1460            let mut prefix = String::new();
1461
1462            while self.i < self.length {
1463                if QName::is_valid_char(self.c[self.i]) {
1464                    self.i += 1;
1465                } else if self.c[self.i] == COLON {
1466                    prefix = self.get_span(0);
1467                    prefix_defined = true;
1468                    self.i += 1;
1469                    self.start_span();
1470                } else {
1471                    let local_part = self.get_span(0);
1472                    if prefix_defined {
1473                        self.tokens.push(Token::ElementStart(QName::new_unvalidated(
1474                            Some(prefix),
1475                            local_part,
1476                        )));
1477                    } else {
1478                        self.tokens.push(Token::ElementStart(QName::new_unvalidated(
1479                            None, local_part,
1480                        )));
1481                    }
1482                    return true;
1483                }
1484            }
1485
1486            self.error("Premature end of input in element tag.");
1487            return false;
1488        }
1489
1490        self.error("Expected name start character.");
1491        false
1492    }
1493
1494    fn munch_s_attibute_asterisk(&mut self) -> bool {
1495        while self.i < self.length {
1496            if self.munch_s() && self.munch_attribute() {
1497                continue;
1498            } else {
1499                break;
1500            }
1501        }
1502
1503        !self.error
1504    }
1505
1506    fn munch_empty_element_end(&mut self) -> bool {
1507        if self.munch_sequence(&EMPTY_TAG_END) {
1508            self.tokens.push(Token::ElementEmptyEnd);
1509            return true;
1510        }
1511
1512        false
1513    }
1514
1515    fn munch_content(&mut self) -> bool {
1516        while self.i < self.length {
1517            if self.munch_etag() {
1518                return true;
1519            } else if self.munch_cd_sect() {
1520                continue;
1521            } else if self.munch_comment() {
1522                continue;
1523            } else if self.munch_pi() {
1524                continue;
1525            } else if self.munch_element() {
1526                continue;
1527            } else if self.munch_reference() {
1528                continue;
1529            } else if self.munch_char_data() {
1530                continue;
1531            } else {
1532                return false;
1533            }
1534        }
1535
1536        !self.error
1537    }
1538
1539    fn munch_char_data(&mut self) -> bool {
1540        if self.i < self.length && Text::is_valid_char(self.c[self.i]) {
1541            self.start_span();
1542            self.i += 1;
1543
1544            while self.i < self.length {
1545                if self.munch_sequence(&CDATA_END) {
1546                    self.error("Illegal CDATA END in character data.");
1547                    return false;
1548                } else if Text::is_valid_char(self.c[self.i]) {
1549                    self.i += 1;
1550                } else {
1551                    break;
1552                }
1553            }
1554
1555            let span = self.get_span(0);
1556            self.tokens.push(Token::Text(Text::new_unvalidated(span)));
1557            return true;
1558        }
1559
1560        false
1561    }
1562
1563    fn munch_cd_sect(&mut self) -> bool {
1564        if self.munch_sequence(&CDATA_START) {
1565            self.start_span();
1566
1567            while self.i < self.length {
1568                if self.munch_sequence(&CDATA_END) {
1569                    let span = self.get_span(CDATA_END.len());
1570                    self.tokens
1571                        .push(Token::CDATASection(CDATASection::new_unvalidated(span)));
1572                    return true;
1573                } else if CDATASection::is_valid_char(self.c[self.i], &self.version) {
1574                    self.i += 1;
1575                } else {
1576                    self.error("Illegal character in CDATA Section.");
1577                    return false;
1578                }
1579            }
1580
1581            self.error("CDATA Section must end with a ']]>' character sequence.");
1582            return false;
1583        }
1584
1585        false
1586    }
1587
1588    fn munch_reference(&mut self) -> bool {
1589        if self.munch_char_ref() {
1590            return !self.error;
1591        } else if self.munch_entity_ref() {
1592            return !self.error;
1593        } else {
1594            return false;
1595        }
1596    }
1597
1598    fn munch_char_ref(&mut self) -> bool {
1599        if self.munch_hexidecimal_char_ref() {
1600            return !self.error;
1601        } else if self.munch_decimal_char_ref() {
1602            return !self.error;
1603        } else {
1604            return false;
1605        }
1606    }
1607
1608    fn munch_hexidecimal_char_ref(&mut self) -> bool {
1609        if !self.munch_sequence(&HEXIDECIMAL_CHAR_REFERENCE_START) {
1610            return false;
1611        }
1612        self.start_span();
1613
1614        while self.i < self.length {
1615            if is_hexidecimal_digit(self.c[self.i]) {
1616                self.i += 1;
1617            } else if self.munch_character(REFERENCE_END) {
1618                match HexCharRef::new_from_string(self.get_span(1), &self.version) {
1619                    Ok(hex_char_ref) => {
1620                        self.tokens.push(Token::HexCharRef(hex_char_ref));
1621                        return true;
1622                    }
1623                    Err(parse_char_ref_err) => {
1624                        self.error(
1625                            "Failed to parse hexidecimal character reference to a character.",
1626                        );
1627                        self.error(parse_char_ref_err.message());
1628                        return false;
1629                    }
1630                }
1631            } else {
1632                self.error("Illegal character in hexidecimal character reference.");
1633                return false;
1634            }
1635        }
1636
1637        self.error("Expected a ';' character to terminate the hexidecimal character reference.");
1638        false
1639    }
1640
1641    fn munch_decimal_char_ref(&mut self) -> bool {
1642        if !self.munch_sequence(&DECIMAL_CHAR_REFERENCE_START) {
1643            return false;
1644        }
1645        self.start_span();
1646
1647        while self.i < self.length {
1648            if is_digit(self.c[self.i]) {
1649                self.i += 1;
1650            } else if self.munch_character(REFERENCE_END) {
1651                match DecCharRef::new_from_string(self.get_span(1), &self.version) {
1652                    Ok(dec_char_ref) => {
1653                        self.tokens.push(Token::DecCharRef(dec_char_ref));
1654                        return true;
1655                    }
1656                    Err(parse_char_ref_err) => {
1657                        self.error(
1658                            "Failed to parse decidecimal character reference to a character.",
1659                        );
1660                        self.error(parse_char_ref_err.message());
1661                        return false;
1662                    }
1663                }
1664            } else {
1665                self.error("Illegal character in decidecimal character reference.");
1666                return false;
1667            }
1668        }
1669
1670        self.error("Expected a ';' character to terminate the decidecimal character reference.");
1671        false
1672    }
1673
1674    fn munch_entity_ref(&mut self) -> bool {
1675        if self.munch_character(ENTITY_REFERENCE_START) {
1676            self.start_span();
1677
1678            if Name::is_valid_start_char(self.c[self.i]) {
1679                self.i += 1;
1680            } else {
1681                self.error("Expected a legal name start character in entity reference.");
1682                return false;
1683            }
1684
1685            while self.i < self.length {
1686                if Name::is_valid_char(self.c[self.i]) {
1687                    self.i += 1;
1688                } else if self.munch_character(REFERENCE_END) {
1689                    let span = self.get_span(1);
1690                    self.tokens
1691                        .push(Token::EntityRef(Name::new_unvalidated(span)));
1692                    return true;
1693                } else {
1694                    self.error("Illegal character in entity reference name.");
1695                    return false;
1696                }
1697            }
1698
1699            self.error("Entity reference must end with a ';' character.");
1700            return false;
1701        }
1702
1703        false
1704    }
1705
1706    fn munch_etag(&mut self) -> bool {
1707        if !self.munch_sequence(&ETAG_START) {
1708            return false;
1709        }
1710
1711        if !QName::is_valid_start_char(self.c[self.i]) {
1712            self.error("Expected a qname start character after ETag start.");
1713            return false;
1714        }
1715
1716        let mut prefix_defined = false;
1717        let mut prefix = String::new();
1718        self.start_span();
1719        self.i += 1;
1720
1721        while self.i < self.length {
1722            if QName::is_valid_char(self.c[self.i]) {
1723                self.i += 1;
1724            } else if self.c[self.i] == COLON {
1725                prefix = self.get_span(0);
1726                prefix_defined = true;
1727                self.i += 1;
1728                self.start_span();
1729            } else if self.munch_character(ETAG_END) {
1730                let local_part = self.get_span(1);
1731                if prefix_defined {
1732                    self.tokens.push(Token::ElementEnd(QName::new_unvalidated(
1733                        Some(prefix),
1734                        local_part,
1735                    )));
1736                } else {
1737                    self.tokens
1738                        .push(Token::ElementEnd(QName::new_unvalidated(None, local_part)));
1739                }
1740                return true;
1741            } else {
1742                self.error("Illegal character in ETag name.");
1743                return false;
1744            }
1745        }
1746
1747        self.error("ETag must finish with a closing '>' character.");
1748        false
1749    }
1750
1751    fn munch_attribute(&mut self) -> bool {
1752        if self.munch_namespace() {
1753            return true;
1754        }
1755
1756        if !self.munch_attribute_name() {
1757            return false;
1758        }
1759
1760        if !self.munch_eq() {
1761            self.error("Expected an '=' character after an attribute name.");
1762            return false;
1763        }
1764
1765        if !self.munch_attribute_value() {
1766            self.error("Expected an attribute value.");
1767            return false;
1768        }
1769
1770        true
1771    }
1772
1773    fn munch_namespace(&mut self) -> bool {
1774        if self.munch_sequence(&XMLNS) {
1775            self.tokens.push(Token::NamespaceStart);
1776        } else {
1777            return false;
1778        }
1779
1780        if self.munch_character(COLON) {
1781            if !self.munch_namespace_prefix() {
1782                self.error("Expected a namespace prefix after the character sequence 'xmlns:'.");
1783                return false;
1784            }
1785        } else {
1786            self.tokens.push(Token::NamespaceDefault);
1787        }
1788
1789        if !self.munch_eq() {
1790            self.error("Expected an '=' character after a namespace attribute name.");
1791            return false;
1792        }
1793
1794        self.munch_namespace_value()
1795    }
1796
1797    fn munch_namespace_prefix(&mut self) -> bool {
1798        if NCName::is_valid_start_char(self.c[self.i]) {
1799            self.start_span();
1800            self.i += 1;
1801
1802            while self.i < self.length {
1803                if NCName::is_valid_char(self.c[self.i]) {
1804                    self.i += 1;
1805                } else {
1806                    break;
1807                }
1808            }
1809
1810            let prefix = self.get_span(0);
1811            self.tokens
1812                .push(Token::NamespacePrefix(NCName::new_unvalidated(prefix)));
1813            return true;
1814        }
1815
1816        false
1817    }
1818
1819    fn munch_namespace_value(&mut self) -> bool {
1820        let double_quotes: bool;
1821        if self.munch_double_quote() {
1822            double_quotes = true;
1823        } else if self.munch_single_quote() {
1824            double_quotes = false;
1825        } else {
1826            self.error("Expected a single or double quote.");
1827            return false;
1828        }
1829
1830        self.start_span();
1831
1832        while self.i < self.length {
1833            if double_quotes && self.munch_character(DOUBLE_QUOTE)
1834                || !double_quotes && self.munch_character(SINGLE_QUOTE)
1835            {
1836                let span = self.get_span(1);
1837                self.tokens
1838                    .push(Token::NamespaceValue(NamespaceValue::new_unvalidated(span)));
1839                self.tokens.push(Token::NamespaceEnd);
1840                return true;
1841            } else if NamespaceValue::is_valid_char(self.c[self.i], &self.version) {
1842                self.i += 1;
1843            } else {
1844                self.error("Illegal character in namespace value.");
1845                return false;
1846            }
1847        }
1848
1849        if double_quotes {
1850            self.error("Expected closing double quote following namespace value.");
1851        } else {
1852            self.error("Expected closing single quote following namespace value.");
1853        }
1854
1855        false
1856    }
1857
1858    fn munch_attribute_name(&mut self) -> bool {
1859        if QName::is_valid_start_char(self.c[self.i]) {
1860            let mut prefix_defined = false;
1861            let mut prefix = String::new();
1862            self.start_span();
1863            self.i += 1;
1864
1865            while self.i < self.length {
1866                if QName::is_valid_char(self.c[self.i]) {
1867                    self.i += 1;
1868                } else if self.c[self.i] == COLON {
1869                    prefix = self.get_span(0);
1870                    prefix_defined = true;
1871                    self.i += 1;
1872                    self.start_span();
1873                } else {
1874                    break;
1875                }
1876            }
1877
1878            let local_part = self.get_span(0);
1879            if prefix_defined {
1880                self.tokens.push(Token::AttributeStart);
1881                self.tokens
1882                    .push(Token::AttributeName(QName::new_unvalidated(
1883                        Some(prefix),
1884                        local_part,
1885                    )));
1886            } else {
1887                self.tokens.push(Token::AttributeStart);
1888                self.tokens
1889                    .push(Token::AttributeName(QName::new_unvalidated(
1890                        None, local_part,
1891                    )));
1892            }
1893            return true;
1894        }
1895
1896        false
1897    }
1898
1899    fn munch_attribute_value(&mut self) -> bool {
1900        let double_quotes: bool;
1901        if self.munch_double_quote() {
1902            double_quotes = true;
1903        } else if self.munch_single_quote() {
1904            double_quotes = false;
1905        } else {
1906            self.error("Expected a single or double quote.");
1907            return false;
1908        }
1909
1910        self.tokens.push(Token::AttributeValueStart);
1911        self.start_span();
1912
1913        while self.i < self.length {
1914            if double_quotes && AttributeValue::is_valid_inside_double_quotes_char(self.c[self.i])
1915                || !double_quotes
1916                    && AttributeValue::is_valid_inside_single_quotes_char(self.c[self.i])
1917            {
1918                self.i += 1;
1919            } else if self.c[self.i] == ENTITY_REFERENCE_START {
1920                let span = self.get_span(0);
1921                self.tokens
1922                    .push(Token::AttributeValue(AttributeValue::new_unvalidated(span)));
1923                self.munch_reference();
1924                self.start_span();
1925            } else if (double_quotes && self.munch_double_quote())
1926                || (!double_quotes && self.munch_single_quote())
1927            {
1928                let span = self.get_span(1);
1929                self.tokens
1930                    .push(Token::AttributeValue(AttributeValue::new_unvalidated(span)));
1931                self.tokens.push(Token::AttributeEnd);
1932                return true;
1933            } else {
1934                self.error("Illegal character in attribute value.");
1935                return false;
1936            }
1937        }
1938
1939        if double_quotes {
1940            self.error("Expected closing double quote following attribute value.");
1941        } else {
1942            self.error("Expected closing single quote following attriubte value.");
1943        }
1944
1945        false
1946    }
1947
1948    fn munch_character(&mut self, character: char) -> bool {
1949        if self.i < self.length && self.c[self.i] == character {
1950            self.i += 1;
1951            return true;
1952        }
1953
1954        false
1955    }
1956
1957    fn munch_sequence(&mut self, sequence: &[char]) -> bool {
1958        let sequence_length = sequence.len();
1959        let sequence_end = self.i + sequence_length;
1960
1961        if sequence_end > self.length {
1962            return false;
1963        }
1964
1965        if &self.c[self.i..sequence_end] == sequence {
1966            self.i += sequence_length;
1967            return true;
1968        }
1969
1970        false
1971    }
1972
1973    fn start_span(&mut self) {
1974        self.span_start = self.i;
1975    }
1976
1977    fn get_span(&mut self, span_end_offset: usize) -> String {
1978        let span_end = self.i - span_end_offset;
1979
1980        self.c[self.span_start..span_end].iter().collect()
1981    }
1982
1983    fn warning(&mut self, msg: &str) {
1984        if self.i < self.length {
1985            self.warning_messages
1986                .push(format!("c[{}]={} {}", self.i, self.c[self.i], msg));
1987        } else {
1988            self.warning_messages
1989                .push(format!("Out of bounds: {}", msg));
1990        }
1991    }
1992
1993    fn error(&mut self, msg: &str) {
1994        if self.i < self.length {
1995            self.error_messages
1996                .push(format!("c[{}]={} {}", self.i, self.c[self.i], msg));
1997        } else {
1998            self.error_messages.push(format!("Out of bounds: {}", msg));
1999        }
2000    }
2001}
2002#[cfg(test)]
2003mod tokenizer_tests {
2004    use super::*;
2005
2006    #[test]
2007    fn tokenize_document_hit() {
2008        let mut tok = Tokenizer::new(String::from("<a/>"));
2009        assert!(tok.tokenize_document());
2010        assert_eq!(
2011            tok.tokens,
2012            vec![
2013                Token::ElementStart(QName::new_unvalidated(None, String::from("a"))),
2014                Token::ElementEmptyEnd
2015            ]
2016        );
2017        assert!(!tok.error);
2018    }
2019
2020    #[test]
2021    fn munch_document_hit() {
2022        let mut tok = Tokenizer::new(String::from("<a/>"));
2023        assert!(tok.munch_document());
2024        assert_eq!(
2025            tok.tokens,
2026            vec![
2027                Token::ElementStart(QName::new_unvalidated(None, String::from("a"))),
2028                Token::ElementEmptyEnd
2029            ]
2030        );
2031        assert!(!tok.error);
2032    }
2033
2034    #[test]
2035    fn munch_prolog_hit() {
2036        let mut tok = Tokenizer::new(String::from(""));
2037        assert!(tok.munch_prolog());
2038        assert_eq!(tok.tokens, vec![]);
2039        assert!(!tok.error);
2040
2041        //TODO
2042    }
2043
2044    #[test]
2045    fn munch_xml_decl_eroteme_hit() {
2046        let mut tok = Tokenizer::new(String::from(""));
2047        assert!(tok.munch_xml_decl_eroteme());
2048        assert_eq!(tok.tokens, vec![]);
2049        assert!(!tok.error);
2050    }
2051
2052    #[test]
2053    fn munch_xml_decl_hit() {
2054        let mut tok = Tokenizer::new(String::from("<?xml version='1.0'?>"));
2055        assert!(tok.munch_xml_decl());
2056        assert_eq!(
2057            tok.tokens,
2058            vec![
2059                Token::XMLDeclStart,
2060                Token::XMLVersion(XMLVersion::Version1_0),
2061                Token::XMLDeclEnd
2062            ]
2063        );
2064        assert!(!tok.error);
2065
2066        let mut tok = Tokenizer::new(String::from("<?xml version='1.0' encoding='utf-8'?>"));
2067        assert!(tok.munch_xml_decl());
2068        assert_eq!(
2069            tok.tokens,
2070            vec![
2071                Token::XMLDeclStart,
2072                Token::XMLVersion(XMLVersion::Version1_0),
2073                Token::XMLEncoding(EncName::new_unvalidated(String::from("utf-8"))),
2074                Token::XMLDeclEnd
2075            ]
2076        );
2077        assert!(!tok.error);
2078
2079        let mut tok = Tokenizer::new(String::from("<?xml version='1.0' standalone='yes'?>"));
2080        tok.munch_xml_decl();
2081        assert_eq!(
2082            tok.tokens,
2083            vec![
2084                Token::XMLDeclStart,
2085                Token::XMLVersion(XMLVersion::Version1_0),
2086                Token::XMLStandalone(true),
2087                Token::XMLDeclEnd
2088            ]
2089        );
2090        assert!(!tok.error);
2091
2092        let mut tok = Tokenizer::new(String::from(
2093            "<?xml version='1.0' encoding='utf-8' standalone='no'?>",
2094        ));
2095        assert!(tok.munch_xml_decl());
2096        assert_eq!(
2097            tok.tokens,
2098            vec![
2099                Token::XMLDeclStart,
2100                Token::XMLVersion(XMLVersion::Version1_0),
2101                Token::XMLEncoding(EncName::new_unvalidated(String::from("utf-8"))),
2102                Token::XMLStandalone(false),
2103                Token::XMLDeclEnd
2104            ]
2105        );
2106        assert!(!tok.error);
2107
2108        let mut tok =
2109            Tokenizer::new(String::from("<?xml    version  =   \"1.0\"     encoding  = \"utf-8\"    standalone   =  \"no\"    ?>"));
2110        assert!(tok.munch_xml_decl());
2111        assert_eq!(
2112            tok.tokens,
2113            vec![
2114                Token::XMLDeclStart,
2115                Token::XMLVersion(XMLVersion::Version1_0),
2116                Token::XMLEncoding(EncName::new_unvalidated(String::from("utf-8"))),
2117                Token::XMLStandalone(false),
2118                Token::XMLDeclEnd
2119            ]
2120        );
2121        assert!(!tok.error);
2122    }
2123
2124    #[test]
2125    fn munch_xml_decl_start_hit() {
2126        let mut tok = Tokenizer::new(String::from("<?xml"));
2127        assert!(tok.munch_xml_decl_start());
2128        assert_eq!(tok.tokens, vec![Token::XMLDeclStart,]);
2129        assert!(!tok.error);
2130    }
2131
2132    #[test]
2133    fn munch_version_info_hit() {
2134        let mut tok = Tokenizer::new(String::from(" version='1.0'"));
2135        assert!(tok.munch_version_info());
2136        assert_eq!(tok.tokens, vec![Token::XMLVersion(XMLVersion::Version1_0),]);
2137        assert!(!tok.error);
2138    }
2139
2140    #[test]
2141    fn munch_s_hit() {
2142        let mut tok = Tokenizer::new(String::from(" "));
2143        assert!(tok.munch_s());
2144        assert_eq!(tok.tokens, vec![]);
2145        assert!(!tok.error);
2146    }
2147
2148    #[test]
2149    fn munch_s_eroteme_hit() {
2150        let mut tok = Tokenizer::new(String::from(""));
2151        assert!(tok.munch_s_eroteme());
2152        assert_eq!(tok.tokens, vec![]);
2153        assert!(!tok.error);
2154
2155        let mut tok = Tokenizer::new(String::from(" \n \t \r "));
2156        assert!(tok.munch_s_eroteme());
2157        assert_eq!(tok.tokens, vec![]);
2158        assert!(!tok.error);
2159    }
2160
2161    #[test]
2162    fn munch_version_hit() {
2163        let mut tok = Tokenizer::new(String::from("version"));
2164        assert!(tok.munch_version());
2165        assert_eq!(tok.tokens, vec![]);
2166        assert!(!tok.error);
2167    }
2168
2169    #[test]
2170    fn munch_eq_hit() {
2171        let mut tok = Tokenizer::new(String::from("="));
2172        assert!(tok.munch_eq());
2173        assert_eq!(tok.tokens, vec![]);
2174        assert!(!tok.error);
2175
2176        let mut tok = Tokenizer::new(String::from("  =  "));
2177        assert!(tok.munch_eq());
2178        assert_eq!(tok.tokens, vec![]);
2179        assert!(!tok.error);
2180    }
2181
2182    #[test]
2183    fn munch_single_quote_hit() {
2184        let mut tok = Tokenizer::new(String::from("'"));
2185        assert!(tok.munch_single_quote());
2186        assert_eq!(tok.tokens, vec![]);
2187        assert!(!tok.error);
2188    }
2189
2190    #[test]
2191    fn munch_double_quote_hit() {
2192        let mut tok = Tokenizer::new(String::from("\""));
2193        assert!(tok.munch_double_quote());
2194        assert_eq!(tok.tokens, vec![]);
2195        assert!(!tok.error);
2196    }
2197
2198    #[test]
2199    fn munch_version_num_hit() {
2200        let mut tok = Tokenizer::new(String::from("1.0"));
2201        assert!(tok.munch_version_num());
2202        assert_eq!(tok.tokens, vec![Token::XMLVersion(XMLVersion::Version1_0)]);
2203        assert!(!tok.error);
2204
2205        let mut tok = Tokenizer::new(String::from("1.1"));
2206        assert!(tok.munch_version_num());
2207        assert_eq!(tok.tokens, vec![Token::XMLVersion(XMLVersion::Version1_1)]);
2208        assert!(!tok.error);
2209    }
2210
2211    #[test]
2212    fn munch_digits_hit() {
2213        let mut tok = Tokenizer::new(String::from("1234567890"));
2214        assert!(tok.munch_digits());
2215        assert_eq!(tok.tokens, vec![]);
2216        assert!(!tok.error);
2217    }
2218
2219    #[test]
2220    fn munch_encoding_decl_hit() {
2221        let mut tok = Tokenizer::new(String::from(" encoding='utf-8'"));
2222        assert!(tok.munch_encoding_decl());
2223        assert_eq!(
2224            tok.tokens,
2225            vec![Token::XMLEncoding(EncName::new_unvalidated(String::from(
2226                "utf-8"
2227            ))),]
2228        );
2229        assert!(!tok.error);
2230
2231        let mut tok = Tokenizer::new(String::from("    encoding  =   \"UTF-8\"   "));
2232        assert!(tok.munch_encoding_decl());
2233        assert_eq!(
2234            tok.tokens,
2235            vec![Token::XMLEncoding(EncName::new_unvalidated(String::from(
2236                "UTF-8"
2237            ))),]
2238        );
2239        assert!(!tok.error);
2240    }
2241
2242    #[test]
2243    fn munch_encoding_hit() {
2244        let mut tok = Tokenizer::new(String::from("encoding"));
2245        assert!(tok.munch_encoding());
2246        assert_eq!(tok.tokens, vec![]);
2247        assert!(!tok.error);
2248    }
2249
2250    #[test]
2251    fn munch_enc_name_hit() {
2252        let mut tok = Tokenizer::new(String::from("iso8859-1"));
2253        assert!(tok.munch_enc_name());
2254        assert_eq!(
2255            tok.tokens,
2256            vec![Token::XMLEncoding(EncName::new_unvalidated(String::from(
2257                "iso8859-1"
2258            )))]
2259        );
2260        assert!(!tok.error);
2261    }
2262
2263    #[test]
2264    fn munch_sd_decl_hit() {
2265        let mut tok = Tokenizer::new(String::from(" standalone='yes'"));
2266        assert!(tok.munch_sd_decl());
2267        assert_eq!(tok.tokens, vec![Token::XMLStandalone(true)]);
2268        assert!(!tok.error);
2269    }
2270
2271    #[test]
2272    fn munch_standalone_hit() {
2273        let mut tok = Tokenizer::new(String::from("standalone"));
2274        assert!(tok.munch_standalone());
2275        assert_eq!(tok.tokens, vec![]);
2276        assert!(!tok.error);
2277    }
2278
2279    #[test]
2280    fn munch_yes_no_hit() {
2281        let mut tok = Tokenizer::new(String::from("yes"));
2282        assert!(tok.munch_yes_no());
2283        assert_eq!(tok.tokens, vec![Token::XMLStandalone(true)]);
2284        assert!(!tok.error);
2285
2286        let mut tok = Tokenizer::new(String::from("no"));
2287        assert!(tok.munch_yes_no());
2288        assert_eq!(tok.tokens, vec![Token::XMLStandalone(false)]);
2289        assert!(!tok.error);
2290    }
2291
2292    #[test]
2293    fn munch_xml_decl_end_hit() {
2294        let mut tok = Tokenizer::new(String::from("?>"));
2295        assert!(tok.munch_xml_decl_end());
2296        assert_eq!(tok.tokens, vec![Token::XMLDeclEnd]);
2297        assert!(!tok.error);
2298    }
2299
2300    #[test]
2301    fn munch_misc_asterisk_hit() {
2302        let mut tok = Tokenizer::new(String::from(""));
2303        assert!(tok.munch_misc_asterisk());
2304        assert_eq!(tok.tokens, vec![]);
2305        assert!(!tok.error);
2306
2307        //TODO
2308    }
2309
2310    #[test]
2311    fn munch_misc_hit() {
2312        let mut tok = Tokenizer::new(String::from(" "));
2313        assert!(tok.munch_misc());
2314        assert_eq!(tok.tokens, vec![]);
2315        assert!(!tok.error);
2316
2317        //TODO
2318    }
2319
2320    #[test]
2321    fn munch_comment_hit() {
2322        let mut tok = Tokenizer::new(String::from("<!---->"));
2323        assert!(tok.munch_comment());
2324        assert_eq!(
2325            tok.tokens,
2326            vec![Token::Comment(Comment::new_unvalidated(String::from("")))]
2327        );
2328        assert!(!tok.error);
2329
2330        let mut tok = Tokenizer::new(String::from("<!--My comment text-->"));
2331        assert!(tok.munch_comment());
2332        assert_eq!(
2333            tok.tokens,
2334            vec![Token::Comment(Comment::new_unvalidated(String::from(
2335                "My comment text"
2336            )))]
2337        );
2338        assert!(!tok.error);
2339
2340        let mut tok = Tokenizer::new(String::from("<!-- My - comment - text -->"));
2341        assert!(tok.munch_comment());
2342        assert_eq!(
2343            tok.tokens,
2344            vec![Token::Comment(Comment::new_unvalidated(String::from(
2345                " My - comment - text "
2346            )))]
2347        );
2348        assert!(!tok.error);
2349    }
2350
2351    #[test]
2352    fn munch_pi_hit() {
2353        let mut tok = Tokenizer::new(String::from("<?mypi my pi data?>"));
2354        assert!(tok.munch_pi());
2355        assert_eq!(
2356            tok.tokens,
2357            vec![
2358                Token::PIStart,
2359                Token::PITarget(PITarget {
2360                    target: String::from("mypi")
2361                }),
2362                Token::PIData(PIData::new_unvalidated(String::from(" my pi data"))),
2363                Token::PIEnd,
2364            ]
2365        );
2366        assert!(!tok.error);
2367    }
2368
2369    #[test]
2370    fn munch_pi_target() {
2371        let mut tok = Tokenizer::new(String::from("mypi"));
2372        assert!(tok.munch_pi_target());
2373        assert_eq!(
2374            tok.tokens,
2375            vec![Token::PITarget(PITarget {
2376                target: String::from("mypi")
2377            }),]
2378        );
2379        assert!(!tok.error);
2380    }
2381
2382    #[test]
2383    fn munch_pi_data() {
2384        let mut tok = Tokenizer::new(String::from("?>"));
2385        assert!(tok.munch_pi_data());
2386        assert_eq!(tok.tokens, vec![Token::PIEnd]);
2387        assert!(!tok.error);
2388
2389        let mut tok = Tokenizer::new(String::from(" Valid PI data is empty or starts with S.?>"));
2390        assert!(tok.munch_pi_data());
2391        assert_eq!(
2392            tok.tokens,
2393            vec![
2394                Token::PIData(PIData {
2395                    data: String::from(" Valid PI data is empty or starts with S.")
2396                }),
2397                Token::PIEnd
2398            ]
2399        );
2400        assert!(!tok.error);
2401    }
2402
2403    #[test]
2404    fn munch_doctypedecl_misc_asterisk_eroteme_hit() {
2405        let mut tok = Tokenizer::new(String::from(""));
2406        assert!(tok.munch_doctypedecl_misc_asterisk_eroteme());
2407        assert_eq!(tok.tokens, vec![]);
2408        assert!(!tok.error);
2409
2410        let mut tok = Tokenizer::new(String::from("<!DOCTYPE html>"));
2411        assert!(tok.munch_doctypedecl_misc_asterisk_eroteme());
2412        assert_eq!(
2413            tok.tokens,
2414            vec![
2415                Token::DoctypeDeclStart,
2416                Token::DoctypeName(Name::new_unvalidated(String::from("html"))),
2417                Token::DoctypeDeclEnd,
2418            ]
2419        );
2420        assert!(!tok.error);
2421
2422        let mut tok = Tokenizer::new(String::from("<!DOCTYPE html>"));
2423        assert!(tok.munch_doctypedecl_misc_asterisk_eroteme());
2424        assert_eq!(
2425            tok.tokens,
2426            vec![
2427                Token::DoctypeDeclStart,
2428                Token::DoctypeName(Name::new_unvalidated(String::from("html"))),
2429                Token::DoctypeDeclEnd,
2430            ]
2431        );
2432        assert!(!tok.error);
2433
2434        let mut tok = Tokenizer::new(String::from("<!DOCTYPE html> <!--Comment-->"));
2435        assert!(tok.munch_doctypedecl_misc_asterisk_eroteme());
2436        assert_eq!(
2437            tok.tokens,
2438            vec![
2439                Token::DoctypeDeclStart,
2440                Token::DoctypeName(Name::new_unvalidated(String::from("html"))),
2441                Token::DoctypeDeclEnd,
2442                Token::Comment(Comment::new_unvalidated(String::from("Comment"))),
2443            ]
2444        );
2445        assert!(!tok.error);
2446
2447        //TODO
2448    }
2449
2450    #[test]
2451    fn munch_doctypedecl_hit() {
2452        let mut tok = Tokenizer::new(String::from("<!DOCTYPE html>"));
2453        assert!(tok.munch_doctypedecl());
2454        assert_eq!(
2455            tok.tokens,
2456            vec![
2457                Token::DoctypeDeclStart,
2458                Token::DoctypeName(Name::new_unvalidated(String::from("html"))),
2459                Token::DoctypeDeclEnd,
2460            ]
2461        );
2462        assert!(!tok.error);
2463
2464        //TODO
2465    }
2466
2467    #[test]
2468    fn munch_element_hit() {
2469        let mut tok = Tokenizer::new(String::from("<a/>"));
2470        assert!(tok.munch_element());
2471        assert_eq!(
2472            tok.tokens,
2473            vec![
2474                Token::ElementStart(QName::new_unvalidated(None, String::from("a"))),
2475                Token::ElementEmptyEnd
2476            ]
2477        );
2478        assert!(!tok.error);
2479
2480        //TODO
2481    }
2482
2483    #[test]
2484    fn munch_element_name_hit() {
2485        let mut tok = Tokenizer::new(String::from("emptyelementname/>"));
2486        assert!(tok.munch_element_name());
2487        assert_eq!(
2488            tok.tokens,
2489            vec![Token::ElementStart(QName::new_unvalidated(
2490                None,
2491                String::from("emptyelementname")
2492            )),]
2493        );
2494        assert!(!tok.error);
2495
2496        let mut tok = Tokenizer::new(String::from("emptyelementname />"));
2497        assert!(tok.munch_element_name());
2498        assert_eq!(
2499            tok.tokens,
2500            vec![Token::ElementStart(QName::new_unvalidated(
2501                None,
2502                String::from("emptyelementname")
2503            )),]
2504        );
2505        assert!(!tok.error);
2506
2507        let mut tok = Tokenizer::new(String::from("validname>"));
2508        assert!(tok.munch_element_name());
2509        assert_eq!(
2510            tok.tokens,
2511            vec![Token::ElementStart(QName::new_unvalidated(
2512                None,
2513                String::from("validname")
2514            )),]
2515        );
2516        assert!(!tok.error);
2517
2518        let mut tok = Tokenizer::new(String::from("prefix:emptyelementname/>"));
2519        assert!(tok.munch_element_name());
2520        assert_eq!(
2521            tok.tokens,
2522            vec![Token::ElementStart(QName::new_unvalidated(
2523                Some(String::from("prefix")),
2524                String::from("emptyelementname")
2525            )),]
2526        );
2527        assert!(!tok.error);
2528
2529        let mut tok = Tokenizer::new(String::from("prefix:emptyelementname />"));
2530        assert!(tok.munch_element_name());
2531        assert_eq!(
2532            tok.tokens,
2533            vec![Token::ElementStart(QName::new_unvalidated(
2534                Some(String::from("prefix")),
2535                String::from("emptyelementname")
2536            )),]
2537        );
2538        assert!(!tok.error);
2539
2540        let mut tok = Tokenizer::new(String::from("prefix:validname>"));
2541        assert!(tok.munch_element_name());
2542        assert_eq!(
2543            tok.tokens,
2544            vec![Token::ElementStart(QName::new_unvalidated(
2545                Some(String::from("prefix")),
2546                String::from("validname")
2547            )),]
2548        );
2549        assert!(!tok.error);
2550    }
2551
2552    #[test]
2553    fn munch_s_attibute_asterisk_hit() {
2554        let mut tok = Tokenizer::new(String::from(""));
2555        assert!(tok.munch_s_attibute_asterisk());
2556        assert_eq!(tok.tokens, vec![]);
2557        assert!(!tok.error);
2558
2559        // TODO
2560    }
2561
2562    #[test]
2563    fn munch_empty_element_end_hit() {
2564        let mut tok = Tokenizer::new(String::from("/>"));
2565        assert!(tok.munch_empty_element_end());
2566        assert_eq!(tok.tokens, vec![Token::ElementEmptyEnd]);
2567        assert!(!tok.error);
2568    }
2569
2570    #[test]
2571    fn munch_content_hit() {
2572        let mut tok = Tokenizer::new(String::from(""));
2573        assert!(tok.munch_content());
2574        assert_eq!(tok.tokens, vec![]);
2575        assert!(!tok.error);
2576
2577        let mut tok = Tokenizer::new(String::from("character data"));
2578        assert!(tok.munch_content());
2579        assert_eq!(
2580            tok.tokens,
2581            vec![Token::Text(Text::new_unvalidated(String::from(
2582                "character data"
2583            )))]
2584        );
2585        assert!(!tok.error);
2586
2587        let mut tok = Tokenizer::new(String::from("character data"));
2588        assert!(tok.munch_content());
2589        assert_eq!(
2590            tok.tokens,
2591            vec![Token::Text(Text::new_unvalidated(String::from(
2592                "character data"
2593            )))]
2594        );
2595        assert!(!tok.error);
2596
2597        let mut tok = Tokenizer::new(String::from("<a/>"));
2598        assert!(tok.munch_content());
2599        assert_eq!(
2600            tok.tokens,
2601            vec![
2602                Token::ElementStart(QName::new_unvalidated(None, String::from("a"))),
2603                Token::ElementEmptyEnd
2604            ]
2605        );
2606        assert!(!tok.error);
2607
2608        let mut tok = Tokenizer::new(String::from("<a></a>"));
2609        assert!(tok.munch_content());
2610        assert_eq!(
2611            tok.tokens,
2612            vec![
2613                Token::ElementStart(QName::new_unvalidated(None, String::from("a"))),
2614                Token::ElementSTagEnd,
2615                Token::ElementEnd(QName::new_unvalidated(None, String::from("a"))),
2616            ]
2617        );
2618        assert!(!tok.error);
2619
2620        let mut tok = Tokenizer::new(String::from("&amp;"));
2621        assert!(tok.munch_content());
2622        assert_eq!(
2623            tok.tokens,
2624            vec![Token::EntityRef(Name::new_unvalidated(String::from("amp"))),]
2625        );
2626        assert!(!tok.error);
2627
2628        let mut tok = Tokenizer::new(String::from("<![CDATA[mycdata]]>"));
2629        assert!(tok.munch_content());
2630        assert_eq!(
2631            tok.tokens,
2632            vec![Token::CDATASection(CDATASection::new_unvalidated(
2633                String::from("mycdata")
2634            ))]
2635        );
2636        assert!(!tok.error);
2637
2638        let mut tok = Tokenizer::new(String::from("<?pi my pi data?>"));
2639        assert!(tok.munch_content());
2640        assert_eq!(
2641            tok.tokens,
2642            vec![
2643                Token::PIStart,
2644                Token::PITarget(PITarget::new_unvalidated(String::from("pi"))),
2645                Token::PIData(PIData::new_unvalidated(String::from(" my pi data"))),
2646                Token::PIEnd
2647            ]
2648        );
2649        assert!(!tok.error);
2650
2651        let mut tok = Tokenizer::new(String::from("<!--my comment-->"));
2652        assert!(tok.munch_content());
2653        assert_eq!(
2654            tok.tokens,
2655            vec![Token::Comment(Comment::new_unvalidated(String::from(
2656                "my comment"
2657            ))),]
2658        );
2659        assert!(!tok.error);
2660
2661        //TODO multiples
2662    }
2663
2664    #[test]
2665    fn munch_char_data_hit() {
2666        let mut tok = Tokenizer::new(String::from("Valid character data<"));
2667        assert!(tok.munch_char_data());
2668        assert_eq!(
2669            tok.tokens,
2670            vec![Token::Text(Text::new_unvalidated(String::from(
2671                "Valid character data"
2672            )))]
2673        );
2674        assert!(!tok.error);
2675
2676        //TODO
2677    }
2678
2679    #[test]
2680    fn munch_cd_sect_hit() {
2681        let mut tok = Tokenizer::new(String::from("<![CDATA[Valid cdata section]]>"));
2682        assert!(tok.munch_cd_sect());
2683        assert_eq!(
2684            tok.tokens,
2685            vec![Token::CDATASection(CDATASection::new_unvalidated(
2686                String::from("Valid cdata section")
2687            ))]
2688        );
2689        assert!(!tok.error);
2690
2691        //TODO
2692    }
2693
2694    #[test]
2695    fn munch_reference_hit() {
2696        let mut tok = Tokenizer::new(String::from("&amp;"));
2697        assert!(tok.munch_reference());
2698        assert_eq!(
2699            tok.tokens,
2700            vec![Token::EntityRef(Name::new_unvalidated(String::from("amp"))),]
2701        );
2702        assert!(!tok.error);
2703
2704        let mut tok = Tokenizer::new(String::from("&#65;"));
2705        assert!(tok.munch_reference());
2706        assert_eq!(
2707            tok.tokens,
2708            vec![Token::DecCharRef(DecCharRef::new_unvalidated('A')),]
2709        );
2710        assert!(!tok.error);
2711
2712        let mut tok = Tokenizer::new(String::from("&#x1f61e;"));
2713        assert!(tok.munch_reference());
2714        assert_eq!(
2715            tok.tokens,
2716            vec![Token::HexCharRef(HexCharRef::new_unvalidated('😞'))]
2717        );
2718        assert!(!tok.error);
2719    }
2720
2721    #[test]
2722    fn munch_char_ref_hit() {
2723        let mut tok = Tokenizer::new(String::from("&#65;"));
2724        assert!(tok.munch_char_ref());
2725        assert_eq!(
2726            tok.tokens,
2727            vec![Token::DecCharRef(DecCharRef::new_unvalidated('A'))]
2728        );
2729        assert!(!tok.error);
2730
2731        let mut tok = Tokenizer::new(String::from("&#x1F61E;"));
2732        assert!(tok.munch_char_ref());
2733        assert_eq!(
2734            tok.tokens,
2735            vec![Token::HexCharRef(HexCharRef::new_unvalidated('😞'))]
2736        );
2737        assert!(!tok.error);
2738    }
2739
2740    #[test]
2741    fn munch_hexidecimal_char_ref_hit() {
2742        let mut tok = Tokenizer::new(String::from("&#x1F61E;"));
2743        assert!(tok.munch_hexidecimal_char_ref());
2744        assert_eq!(
2745            tok.tokens,
2746            vec![Token::HexCharRef(HexCharRef::new_unvalidated('😞'))]
2747        );
2748        assert!(!tok.error);
2749    }
2750
2751    #[test]
2752    fn munch_decimal_char_ref_hit() {
2753        let mut tok = Tokenizer::new(String::from("&#65;"));
2754        assert!(tok.munch_decimal_char_ref());
2755        assert_eq!(
2756            tok.tokens,
2757            vec![Token::DecCharRef(DecCharRef::new_unvalidated('A'))]
2758        );
2759        assert!(!tok.error);
2760    }
2761
2762    #[test]
2763    fn munch_entity_ref_hit() {
2764        let mut tok = Tokenizer::new(String::from("&amp;"));
2765        assert!(tok.munch_reference());
2766        assert_eq!(
2767            tok.tokens,
2768            vec![Token::EntityRef(Name::new_unvalidated(String::from("amp")))]
2769        );
2770        assert!(!tok.error);
2771    }
2772
2773    #[test]
2774    fn munch_etag_hit() {
2775        let mut tok = Tokenizer::new(String::from("</etag>"));
2776        assert!(tok.munch_etag());
2777        assert_eq!(
2778            tok.tokens,
2779            vec![Token::ElementEnd(QName::new_unvalidated(
2780                None,
2781                String::from("etag")
2782            ))]
2783        );
2784        assert!(!tok.error);
2785
2786        let mut tok = Tokenizer::new(String::from("</prefix:etag>"));
2787        assert!(tok.munch_etag());
2788        assert_eq!(
2789            tok.tokens,
2790            vec![Token::ElementEnd(QName::new_unvalidated(
2791                Some(String::from("prefix")),
2792                String::from("etag")
2793            ))]
2794        );
2795        assert!(!tok.error);
2796    }
2797
2798    #[test]
2799    fn munch_attribute_hit() {
2800        let mut tok = Tokenizer::new(String::from("name='value'"));
2801        assert!(tok.munch_attribute());
2802        assert_eq!(
2803            tok.tokens,
2804            vec![
2805                Token::AttributeStart,
2806                Token::AttributeName(QName::new_unvalidated(None, String::from("name"))),
2807                Token::AttributeValueStart,
2808                Token::AttributeValue(AttributeValue::new_unvalidated(String::from("value"))),
2809                Token::AttributeEnd
2810            ]
2811        );
2812        assert!(!tok.error);
2813    }
2814
2815    #[test]
2816    fn munch_namespace_hit() {
2817        let mut tok = Tokenizer::new(String::from("xmlns='http://defaultnamespace.com'"));
2818        assert!(tok.munch_namespace());
2819        assert_eq!(
2820            tok.tokens,
2821            vec![
2822                Token::NamespaceStart,
2823                Token::NamespaceDefault,
2824                Token::NamespaceValue(NamespaceValue::new_unvalidated(String::from(
2825                    "http://defaultnamespace.com"
2826                ))),
2827                Token::NamespaceEnd,
2828            ]
2829        );
2830        assert!(!tok.error);
2831
2832        let mut tok = Tokenizer::new(String::from("xmlns:prefix='http://prefixednamespace.com'"));
2833        assert!(tok.munch_namespace());
2834        assert_eq!(
2835            tok.tokens,
2836            vec![
2837                Token::NamespaceStart,
2838                Token::NamespacePrefix(NCName::new_unvalidated(String::from("prefix"))),
2839                Token::NamespaceValue(NamespaceValue::new_unvalidated(String::from(
2840                    "http://prefixednamespace.com"
2841                ))),
2842                Token::NamespaceEnd,
2843            ]
2844        );
2845        assert!(!tok.error);
2846    }
2847
2848    #[test]
2849    fn munch_namespace_prefix_hit() {
2850        let mut tok = Tokenizer::new(String::from("validprefix"));
2851        assert!(tok.munch_namespace_prefix());
2852        assert_eq!(
2853            tok.tokens,
2854            vec![Token::NamespacePrefix(NCName::new_unvalidated(
2855                String::from("validprefix")
2856            )),]
2857        );
2858        assert!(!tok.error);
2859    }
2860
2861    #[test]
2862    fn munch_namespace_value_hit() {
2863        let mut tok = Tokenizer::new(String::from("'namespacevalue'"));
2864        assert!(tok.munch_namespace_value());
2865        assert_eq!(
2866            tok.tokens,
2867            vec![
2868                Token::NamespaceValue(NamespaceValue::new_unvalidated(String::from(
2869                    "namespacevalue"
2870                ))),
2871                Token::NamespaceEnd,
2872            ]
2873        );
2874        assert!(!tok.error);
2875
2876        // TODO
2877    }
2878
2879    #[test]
2880    fn munch_attribute_name_hit() {
2881        let mut tok = Tokenizer::new(String::from("validname"));
2882        assert!(tok.munch_attribute_name());
2883        assert_eq!(
2884            tok.tokens,
2885            vec![
2886                Token::AttributeStart,
2887                Token::AttributeName(QName::new_unvalidated(None, String::from("validname"))),
2888            ]
2889        );
2890        assert!(!tok.error);
2891
2892        let mut tok = Tokenizer::new(String::from("prefix:validname"));
2893        assert!(tok.munch_attribute_name());
2894        assert_eq!(
2895            tok.tokens,
2896            vec![
2897                Token::AttributeStart,
2898                Token::AttributeName(QName::new_unvalidated(
2899                    Some(String::from("prefix")),
2900                    String::from("validname")
2901                )),
2902            ]
2903        );
2904        assert!(!tok.error);
2905    }
2906
2907    #[test]
2908    fn munch_attribute_value_hit() {
2909        let mut tok = Tokenizer::new(String::from("'value'"));
2910        assert!(tok.munch_attribute_value());
2911        assert_eq!(
2912            tok.tokens,
2913            vec![
2914                Token::AttributeValueStart,
2915                Token::AttributeValue(AttributeValue::new_unvalidated(String::from("value"))),
2916                Token::AttributeEnd
2917            ]
2918        );
2919        assert!(!tok.error);
2920
2921        let mut tok = Tokenizer::new(String::from("'this &amp; that'"));
2922        assert!(tok.munch_attribute_value());
2923        assert_eq!(
2924            tok.tokens,
2925            vec![
2926                Token::AttributeValueStart,
2927                Token::AttributeValue(AttributeValue::new_unvalidated(String::from("this "))),
2928                Token::EntityRef(Name::new_unvalidated(String::from("amp"))),
2929                Token::AttributeValue(AttributeValue::new_unvalidated(String::from(" that"))),
2930                Token::AttributeEnd
2931            ]
2932        );
2933        assert!(!tok.error);
2934
2935        let mut tok = Tokenizer::new(String::from("\"This &amp; that &#x2014; and &#65;.\""));
2936        assert!(tok.munch_attribute_value());
2937        assert_eq!(
2938            tok.tokens,
2939            vec![
2940                Token::AttributeValueStart,
2941                Token::AttributeValue(AttributeValue::new_unvalidated(String::from("This "))),
2942                Token::EntityRef(Name::new_unvalidated(String::from("amp"))),
2943                Token::AttributeValue(AttributeValue::new_unvalidated(String::from(" that "))),
2944                Token::HexCharRef(HexCharRef::new_unvalidated('—')),
2945                Token::AttributeValue(AttributeValue::new_unvalidated(String::from(" and "))),
2946                Token::DecCharRef(DecCharRef::new_unvalidated('A')),
2947                Token::AttributeValue(AttributeValue::new_unvalidated(String::from("."))),
2948                Token::AttributeEnd
2949            ]
2950        );
2951        assert!(!tok.error);
2952    }
2953}
2954
2955fn is_digit(c: char) -> bool {
2956    match c {
2957        '0'..='9' => true,
2958        _ => false,
2959    }
2960}
2961
2962fn is_hexidecimal_digit(c: char) -> bool {
2963    match c {
2964        '0'..='9' => true,
2965        'a'..='f' => true,
2966        'A'..='F' => true,
2967        _ => false,
2968    }
2969}
2970
2971pub struct ParseTokenError {
2972    kind: ParseTokenErrorKind,
2973}
2974impl ParseTokenError {
2975    pub fn new(kind: ParseTokenErrorKind) -> ParseTokenError {
2976        ParseTokenError { kind }
2977    }
2978
2979    //TODO is the a rust convention for this sort of thing?
2980    fn message(&self) -> &str {
2981        match self.kind {
2982            ParseTokenErrorKind::PITarget => "Error parsing processing instruction target.",
2983            ParseTokenErrorKind::DecCharRef => "Error parsing Decimal Character Reference value.",
2984            ParseTokenErrorKind::HexCharRef => {
2985                "Error parsing Hexidecimal Character Reference value."
2986            }
2987            ParseTokenErrorKind::FromU32 => "Error converting u32 to char.",
2988        }
2989    }
2990}
2991
2992pub enum ParseTokenErrorKind {
2993    PITarget,
2994    DecCharRef,
2995    HexCharRef,
2996    FromU32,
2997}