trashy_xml/
lib.rs

1//! A non-spec compliant xml parser that does not stop parsing when encountering errors.
2//! # Examples
3//!
4//! ```
5//! use trashy_xml::XmlParser;
6//!
7//! // Gets each open element matching "this_element"
8//! // then prints the debug representation of its attributes.
9//! let parsed = XmlParser::str("<this_element attribute=\"value\" />").parse();
10//! for token in parsed.elements_from_name("this_element") {
11//!     dbg!(token.attributes());
12//! }
13//! ```
14
15#![warn(
16    missing_debug_implementations,
17    rust_2018_idioms,
18    missing_doc_code_examples
19)]
20
21use lazy_static::lazy_static;
22use mimalloc::MiMalloc;
23
24#[global_allocator]
25static GLOBAL: MiMalloc = MiMalloc;
26
27mod lexer;
28/// The different types returned by the parser.
29pub mod tokens;
30
31use crate::tokens::{TokenKind, XmlToken};
32use rustc_hash::FxHashMap;
33use std::ops::Range;
34use std::{
35    borrow::Cow, cell::RefCell, collections::VecDeque, iter::Peekable, path::Path, rc::Rc,
36    slice::Iter, str, string::String,
37};
38
39use tokens::{FilePosition, OpenElement, Token, XmlError};
40
41/// Struct with settings that's used during parsing.
42#[derive(Debug, Clone)]
43pub struct Settings {
44    /// Sets if the parser will make comment tokens or not.
45    pub ignore_comments: bool,
46    /// Sets if the parser will put the tokens into a map, where one can get a token from file position.
47    pub create_position_map: bool,
48}
49
50impl Default for Settings {
51    fn default() -> Self {
52        Self {
53            ignore_comments: true,
54            create_position_map: false,
55        }
56    }
57}
58
59/// The main parser struct.
60#[derive(Debug)]
61pub struct XmlParser {
62    settings: Settings,
63    buffer: Vec<u8>,
64    raw_index: usize,
65    pub(crate) raw_tokens: Vec<Token>,
66}
67
68type RefXmlToken<'a> = Rc<RefCell<XmlToken<'a>>>;
69
70/// Formatted error message token.
71#[derive(Debug, Default)]
72pub struct FmtXmlError {
73    pub error: String,
74    pub position: FilePosition,
75}
76
77impl FmtXmlError {
78    pub(crate) fn new<S>(str: S, position: FilePosition) -> Self
79    where
80        S: Into<String>,
81    {
82        FmtXmlError {
83            error: str.into(),
84            position,
85        }
86    }
87}
88
89/// Struct returned after calling [XmlParser::parse()].
90#[derive(Debug, Default)]
91pub struct ParsedXml<'a> {
92    /// Vector with the tokens.
93    pub tokens: Vec<RefXmlToken<'a>>,
94    token_map: FxHashMap<usize, FxHashMap<Range<usize>, RefXmlToken<'a>>>,
95    open_elements: FxHashMap<String, Vec<RefXmlToken<'a>>>,
96    /// Vector with error tokens.
97    pub errors: Vec<FmtXmlError>,
98    create_position_map: bool,
99}
100
101impl<'a> ParsedXml<'a> {
102    /// Takes a name and returns all open elements matching that name.
103    pub fn elements_from_name(&self, name: &str) -> Vec<OpenElement<'a>> {
104        self.open_elements
105            .get(name)
106            .unwrap_or(&Vec::new())
107            .iter()
108            .map(|e| e.borrow().as_open_element().clone())
109            .collect::<Vec<_>>()
110    }
111
112    /// Returns vector with all open elements.
113    pub fn elements(&self) -> Vec<OpenElement<'a>> {
114        self.open_elements
115            .values()
116            .into_iter()
117            .flatten()
118            .map(|e| e.borrow().as_open_element().clone())
119            .collect()
120    }
121
122    fn insert_into_map(&mut self, position: FilePosition, token: RefXmlToken<'a>, length: usize) {
123        assert!(self.create_position_map);
124        if let Some(l) = self.token_map.get_mut(&position.line) {
125            l.insert(
126                Range {
127                    start: position.column as usize,
128                    end: position.column as usize + length + 1,
129                },
130                Rc::clone(&token),
131            );
132        } else {
133            self.token_map.insert(position.line, FxHashMap::default());
134            self.token_map.get_mut(&position.line).unwrap().insert(
135                Range {
136                    start: position.column,
137                    end: position.column as usize + length + 1,
138                },
139                Rc::clone(&token),
140            );
141        }
142    }
143
144    fn push_error(&mut self, error: XmlError, settings: &Settings) {
145        match error {
146            XmlError::EmptyDocument(p) => {
147                self.errors.push(FmtXmlError::new("Document is empty", p));
148            }
149            XmlError::Expected(c, p) => {
150                self.errors
151                    .push(FmtXmlError::new(format!("Expected {}", c), p));
152            }
153            XmlError::NotPermittedInComments(p) => {
154                self.errors
155                    .push(FmtXmlError::new("-- is not permitted within comments", p));
156            }
157            XmlError::OpenCloseElementsMismatch(p) => {
158                self.errors.push(FmtXmlError::new(
159                    "Mismatch between closing and opening elements",
160                    p,
161                ));
162            }
163            XmlError::OpenCloseElementMismatch(s1, s2, p) => {
164                self.errors.push(FmtXmlError::new(
165                    format!(
166                        "Mismatch between closing {} and opening {} elements",
167                        s1, s2
168                    ),
169                    p,
170                ));
171            }
172            XmlError::Unescaped(c, s, p, pa, ep) => {
173                self.errors.push(FmtXmlError::new(
174                    format!("Unescaped {} not allowed in attribute values", c),
175                    ep,
176                ));
177                let token = XmlToken::invalid_attribute(
178                    s,
179                    p,
180                    pa.map(|p| Rc::clone(self.tokens.get(p).unwrap())),
181                );
182                self.push_attribute(token, pa, settings);
183            }
184            XmlError::MissingValue(s, p, pa) => {
185                self.errors.push(FmtXmlError::new(
186                    format!("Specification mandates value for attribute {}", s),
187                    p,
188                ));
189                let token = XmlToken::invalid_attribute(
190                    s,
191                    p,
192                    pa.map(|p| Rc::clone(self.tokens.get(p).unwrap())),
193                );
194                self.push_attribute(token, pa, settings);
195            }
196            XmlError::QuoteExpected(s, p, pa) => {
197                self.errors.push(FmtXmlError::new("\" or \' expected", p));
198                let token = XmlToken::invalid_attribute(
199                    s,
200                    p,
201                    pa.map(|p| Rc::clone(self.tokens.get(p).unwrap())),
202                );
203                self.push_attribute(token, pa, settings);
204            }
205            XmlError::ElementMustBeFollowedBy(s, p) => {
206                self.errors.push(FmtXmlError::new(format!("Element \"{}\" must be followed by either attribute specifications, \">\" or \"/>\"", s), p));
207            }
208        }
209    }
210
211    fn push_open_element(
212        &mut self,
213        token: XmlToken<'a>,
214        parent: Option<usize>,
215        settings: &Settings,
216    ) {
217        let token = new_rc_refcell(token);
218        if let Some(parent) = parent {
219            if let Some(p) = self.tokens.get_mut(parent) {
220                let mut p_token = p.borrow_mut();
221                p_token
222                    .as_mut_open_element()
223                    .children
224                    .push(Rc::clone(&token));
225            }
226        }
227        {
228            let t = token.borrow();
229            let t = t.as_open_element();
230            if let Some(vec) = self.open_elements.get_mut(&t.name.to_string()) {
231                vec.push(Rc::clone(&token));
232            } else {
233                self.open_elements
234                    .insert(t.name.to_string(), vec![Rc::clone(&token)]);
235            }
236            if settings.create_position_map {
237                self.insert_into_map(t.position, Rc::clone(&token), t.name.len());
238            }
239        }
240        self.tokens.push(token);
241    }
242
243    fn push_close_element(
244        &mut self,
245        token: XmlToken<'a>,
246        parent: Option<usize>,
247        settings: &Settings,
248    ) {
249        let token = new_rc_refcell(token);
250        if let Some(parent) = parent {
251            if let Some(p) = self.tokens.get_mut(parent) {
252                let mut p_token = p.borrow_mut();
253                p_token
254                    .as_mut_open_element()
255                    .children
256                    .push(Rc::clone(&token));
257            }
258        }
259        {
260            let t = token.borrow();
261            let t = t.as_close_element();
262            if settings.create_position_map {
263                self.insert_into_map(
264                    t.position,
265                    Rc::clone(&token),
266                    t.name.as_ref().map_or(0, |t| t.len()),
267                );
268            }
269        }
270        self.tokens.push(token);
271    }
272
273    fn push_attribute(&mut self, token: XmlToken<'a>, parent: Option<usize>, settings: &Settings) {
274        let token = new_rc_refcell(token);
275        {
276            let token_borrowed = token.borrow();
277            let attribute = token_borrowed.as_attribute();
278            if let Some(parent) = parent {
279                if let Some(p) = self.tokens.get_mut(parent) {
280                    let mut p_token = p.borrow_mut();
281                    p_token
282                        .as_mut_open_element()
283                        .children
284                        .push(Rc::clone(&token));
285                }
286            }
287            if let Some(parent) = parent {
288                if let Some(p) = self.tokens.get_mut(parent) {
289                    let mut attributes = p.borrow_mut();
290                    if let Some(attrs) = attributes
291                        .as_mut_open_element()
292                        .attributes
293                        .get_mut(&attribute.key.0)
294                    {
295                        attrs.push(Rc::clone(&token));
296                    } else {
297                        attributes
298                            .as_mut_open_element()
299                            .attributes
300                            .insert(attribute.key.0.to_string(), vec![Rc::clone(&token)]);
301                    }
302                }
303            }
304            if settings.create_position_map {
305                self.insert_into_map(attribute.key.1, Rc::clone(&token), attribute.key.0.len());
306                if let Some(value) = &attribute.value {
307                    self.insert_into_map(value.1, Rc::clone(&token), value.0.len());
308                } else {
309                    self.insert_into_map(attribute.key.1, Rc::clone(&token), 1);
310                }
311            }
312        }
313        self.tokens.push(token);
314    }
315
316    fn push_comment(&mut self, token: XmlToken<'a>, parent: Option<usize>, settings: &Settings) {
317        let token = new_rc_refcell(token);
318        if !settings.ignore_comments {
319            if let Some(parent) = parent {
320                if let Some(p) = self.tokens.get_mut(parent) {
321                    let mut p_token = p.borrow_mut();
322                    p_token
323                        .as_mut_open_element()
324                        .children
325                        .push(Rc::clone(&token));
326                }
327            }
328            if settings.create_position_map {
329                let t = token.borrow();
330                let t = t.as_comment();
331                self.insert_into_map(t.position, Rc::clone(&token), t.string.len());
332            }
333        }
334        self.tokens.push(token);
335    }
336
337    fn push_inner_text(&mut self, token: XmlToken<'a>, parent: Option<usize>, settings: &Settings) {
338        let token = new_rc_refcell(token);
339        if let Some(parent) = parent {
340            if let Some(p) = self.tokens.get_mut(parent) {
341                let mut p_token = p.borrow_mut();
342                p_token
343                    .as_mut_open_element()
344                    .children
345                    .push(Rc::clone(&token));
346            }
347        }
348        if settings.create_position_map {
349            let t = token.borrow();
350            let t = t.as_inner_text();
351            self.insert_into_map(t.position, Rc::clone(&token), t.string.len());
352        }
353        self.tokens.push(token);
354    }
355
356    pub fn token_from_position(&self, position: FilePosition) -> Option<XmlToken<'a>> {
357        assert!(self.create_position_map);
358        if let Some(line) = self.token_map.get(&position.line) {
359            for (range, token) in line {
360                if range.contains(&position.column) {
361                    return Some(token.borrow().clone());
362                }
363            }
364        }
365        None
366    }
367}
368
369lazy_static! {
370    static ref KEY_CHARS: Vec<bool> = {
371        let mut m = vec![false; u8::MAX as usize];
372        m[b'<' as usize] = true;
373        m[b'>' as usize] = true;
374        m[b'/' as usize] = true;
375        m[b'=' as usize] = true;
376        m[b'"' as usize] = true;
377        m[b'\'' as usize] = true;
378        m[b'-' as usize] = true;
379        m[b'!' as usize] = true;
380        m[b'?' as usize] = true;
381        m
382    };
383}
384
385#[inline]
386fn new_rc_refcell<T>(t: T) -> Rc<RefCell<T>> {
387    Rc::new(RefCell::new(t))
388}
389
390#[derive(Debug)]
391struct Strings {
392    strings: Vec<String>,
393    map: FxHashMap<String, usize>,
394}
395
396impl Default for Strings {
397    fn default() -> Self {
398        Self {
399            strings: Vec::with_capacity(32),
400            map: FxHashMap::default(),
401        }
402    }
403}
404
405impl Strings {
406    fn get_index_or_insert(&mut self, string: &str) -> usize {
407        if let Some(a) = self.map.get(string) {
408            *a
409        } else {
410            let index = self.strings.len();
411            self.map.insert(string.to_string(), self.strings.len());
412            self.strings.push(string.to_string());
413            index
414        }
415    }
416
417    #[inline]
418    fn get(&self, index: usize) -> Cow<'_, str> {
419        Cow::Borrowed(&self.strings[index])
420    }
421}
422
423struct Tokenizer<'a> {
424    position: FilePosition,
425    strings: Strings,
426    buffer: Peekable<Iter<'a, u8>>,
427}
428
429impl<'a> Iterator for Tokenizer<'a> {
430    type Item = Token;
431
432    #[inline]
433    fn next(&mut self) -> Option<Self::Item> {
434        if let Some(v) = lexer::next(self) {
435            let position = self.position;
436            if KEY_CHARS[v as usize] {
437                return Some(Token {
438                    position,
439                    kind: TokenKind::KeyChar(v),
440                });
441            }
442            let mut text = String::with_capacity(10);
443            text.push(v as char);
444            if v.is_ascii_whitespace() {
445                while lexer::peek(&mut self.buffer)?.is_ascii_whitespace() {
446                    text.push(lexer::next(self)? as char);
447                }
448                let string_index = self.strings.get_index_or_insert(&text);
449                return Some(Token {
450                    position,
451                    kind: TokenKind::Whitespace(string_index),
452                });
453            }
454            while let Some(peeked_character) = lexer::peek(&mut self.buffer) {
455                if !peeked_character.is_ascii_whitespace() && !KEY_CHARS[peeked_character as usize]
456                {
457                    text.push(lexer::next(self)? as char);
458                } else {
459                    break;
460                }
461            }
462            let string_index = self.strings.get_index_or_insert(&text);
463            return Some(Token {
464                position,
465                kind: TokenKind::Text(string_index),
466            });
467        }
468        None
469    }
470}
471
472impl<'a> Tokenizer<'a> {
473    fn fill(&mut self) -> Vec<Token> {
474        self.collect()
475    }
476}
477
478impl<'a> XmlParser {
479    /// Initializes a [XmlParser] by reading the file into a buffer.
480    pub fn file<P: AsRef<Path>>(filepath: P) -> Result<Self, Box<dyn std::error::Error + 'static>> {
481        Ok(Self {
482            settings: Settings::default(),
483            buffer: std::fs::read(filepath)?,
484            raw_index: 0,
485            raw_tokens: Vec::new(),
486        })
487    }
488
489    /// Same as above, but also takes another argument with [Settings].
490    pub fn file_with_settings<P: AsRef<Path>>(
491        filepath: P,
492        settings: Settings,
493    ) -> Result<Self, Box<dyn std::error::Error + 'static>> {
494        Ok(Self {
495            settings,
496            buffer: std::fs::read(filepath)?,
497            raw_index: 0,
498            raw_tokens: Vec::new(),
499        })
500    }
501
502    /// Initializes a [XmlParser] by convering the string slice to a vector of bytes.
503    pub fn str(s: &str) -> Self {
504        Self {
505            settings: Settings::default(),
506            buffer: s.as_bytes().to_vec(),
507            raw_index: 0,
508            raw_tokens: Vec::new(),
509        }
510    }
511
512    /// Same as above, but also takes another argument with [Settings].
513    pub fn str_with_settings(s: &str, settings: Settings) -> Self {
514        Self {
515            settings,
516            buffer: s.as_bytes().to_vec(),
517            raw_index: 0,
518            raw_tokens: Vec::new(),
519        }
520    }
521
522    #[inline]
523    fn char_match(&self, t: &Token, c: u8, string_map: &[String]) -> bool {
524        match &t.kind {
525            TokenKind::KeyChar(kc) => *kc == c,
526            TokenKind::Text(s) => string_map[*s].as_bytes()[0] == c,
527            _ => false,
528        }
529    }
530
531    #[inline]
532    fn match_next_str(&self, characters: &str, string_map: &[String]) -> (bool, usize) {
533        let chars = characters.as_bytes();
534        let chars_count = chars.len();
535        if self.raw_index + chars_count < self.raw_tokens.len() {
536            if !self.raw_tokens[self.raw_index + 1..=self.raw_index + chars_count]
537                .iter()
538                .zip(chars)
539                .all(|(t, c)| self.char_match(t, *c, string_map))
540            {
541                return (false, 0);
542            }
543        } else {
544            return (false, 0);
545        }
546        (true, chars_count)
547    }
548
549    fn match_next_char(&self, character: u8, string_map: &[String]) -> bool {
550        if let Some(token) = self.raw_tokens.get(self.raw_index + 1) {
551            match &token.kind {
552                TokenKind::KeyChar(kc) => {
553                    if *kc == character {
554                        return true;
555                    }
556                }
557                TokenKind::Text(s) => {
558                    if string_map[*s].as_bytes()[0] == character {
559                        return true;
560                    }
561                }
562                TokenKind::Whitespace(s) => {
563                    if string_map[*s].as_bytes()[0] == character {
564                        return true;
565                    }
566                }
567            }
568        }
569        false
570    }
571
572    /// Takes the settings and does the actual parsing and returning a [ParsedXml] struct.
573    pub fn parse(mut self) -> ParsedXml<'a> {
574        use TokenKind::*;
575
576        let mut tokenizer = Tokenizer {
577            position: FilePosition::default(),
578            buffer: self.buffer.iter().peekable(),
579            strings: Strings::default(),
580        };
581        self.raw_tokens = tokenizer.fill();
582
583        let mut open_elements = VecDeque::<usize>::new();
584        let mut parsed_xml = ParsedXml {
585            create_position_map: self.settings.create_position_map,
586            ..Default::default()
587        };
588
589        'outer: while let Some(raw_token) = self.raw_tokens.get(self.raw_index) {
590            let parent = open_elements.front().copied();
591            match &raw_token.kind {
592                Text(text) => {
593                    let key_token = raw_token;
594                    self.raw_index += 1;
595                    if open_elements.is_empty() {
596                        parsed_xml.push_error(
597                            XmlError::EmptyDocument(key_token.position),
598                            &self.settings,
599                        );
600                        continue;
601                    }
602                    while let Some(token) = self.raw_tokens.get(self.raw_index) {
603                        match token.kind {
604                            KeyChar(kc) => {
605                                if kc == b'=' {
606                                    break;
607                                }
608                            }
609                            Text(..) => {
610                                parsed_xml.push_error(
611                                    XmlError::MissingValue(
612                                        tokenizer.strings.get(*text).to_string(),
613                                        key_token.position,
614                                        parent,
615                                    ),
616                                    &self.settings,
617                                );
618                                continue 'outer;
619                            }
620                            _ => {}
621                        }
622                        self.raw_index += 1;
623                    }
624                    while let Some(token) = self.raw_tokens.get(self.raw_index) {
625                        match token.kind {
626                            KeyChar(kc) => {
627                                if KEY_CHARS[kc as usize] {
628                                    if kc == b'"' || kc == b'\'' {
629                                        break;
630                                    } else if kc != b'=' {
631                                        parsed_xml.push_error(
632                                            XmlError::QuoteExpected(
633                                                tokenizer.strings.get(*text).to_string(),
634                                                key_token.position,
635                                                parent,
636                                            ),
637                                            &self.settings,
638                                        );
639                                        continue 'outer;
640                                    }
641                                }
642                            }
643                            Text(..) => {
644                                parsed_xml.push_error(
645                                    XmlError::QuoteExpected(
646                                        tokenizer.strings.get(*text).to_string(),
647                                        key_token.position,
648                                        parent,
649                                    ),
650                                    &self.settings,
651                                );
652                                continue 'outer;
653                            }
654                            _ => {}
655                        }
656                        self.raw_index += 1;
657                    }
658                    if let Some(token) = self.raw_tokens.get(self.raw_index) {
659                        if let KeyChar(attribute_value_start) = token.kind {
660                            let mut found_boundary = false;
661                            let attribute = token;
662                            let boundary_character = attribute_value_start;
663                            let mut value = String::with_capacity(10);
664                            while let Some(token) = self.raw_tokens.get(self.raw_index + 1) {
665                                match &token.kind {
666                                    KeyChar(key_char_index) => {
667                                        if *key_char_index == b'<' {
668                                            parsed_xml.push_error(
669                                                XmlError::Unescaped(
670                                                    '<',
671                                                    tokenizer.strings.get(*text).to_string(),
672                                                    raw_token.position,
673                                                    parent,
674                                                    token.position,
675                                                ),
676                                                &self.settings,
677                                            );
678                                            continue 'outer;
679                                        } else if *key_char_index == boundary_character {
680                                            let attribute = XmlToken::attribute(
681                                                tokenizer.strings.get(*text),
682                                                value,
683                                                raw_token.position,
684                                                attribute.position,
685                                                parent.map(|p| {
686                                                    Rc::clone(parsed_xml.tokens.get(p).unwrap())
687                                                }),
688                                            );
689                                            parsed_xml.push_attribute(
690                                                attribute,
691                                                parent,
692                                                &self.settings,
693                                            );
694                                            found_boundary = true;
695                                            let mut offset = 2;
696                                            while let Some(token) =
697                                                self.raw_tokens.get(self.raw_index + offset)
698                                            {
699                                                offset += 1;
700                                                match token.kind {
701                                                    KeyChar(kc) => {
702                                                        if kc == b'>'
703                                                            || (kc == b'?'
704                                                                && token.position.line == 1)
705                                                        {
706                                                            break;
707                                                        } else if kc == b'/' {
708                                                            if let Some(token) = self
709                                                                .raw_tokens
710                                                                .get(self.raw_index + offset)
711                                                            {
712                                                                if let KeyChar(b'>') = token.kind {
713                                                                    break;
714                                                                }
715                                                            }
716                                                        }
717                                                        parsed_xml.push_error(
718                                                            XmlError::ElementMustBeFollowedBy(
719                                                                tokenizer
720                                                                    .strings
721                                                                    .get(*text)
722                                                                    .to_string(),
723                                                                raw_token.position,
724                                                            ),
725                                                            &self.settings,
726                                                        );
727                                                        break;
728                                                    }
729                                                    Text(_) => {
730                                                        break;
731                                                    }
732                                                    Whitespace(_) => {
733                                                        continue;
734                                                    }
735                                                }
736                                            }
737                                            break;
738                                        }
739                                        value.push(*key_char_index as char);
740                                    }
741                                    Text(text) => {
742                                        value.push_str(&tokenizer.strings.get(*text));
743                                    }
744                                    Whitespace(whitespace) => {
745                                        value.push_str(&tokenizer.strings.get(*whitespace));
746                                    }
747                                }
748                                self.raw_index += 1;
749                            }
750                            if !found_boundary {
751                                parsed_xml.push_error(
752                                    XmlError::QuoteExpected(
753                                        tokenizer.strings.get(*text).to_string(),
754                                        raw_token.position,
755                                        parent,
756                                    ),
757                                    &self.settings,
758                                );
759                                continue 'outer;
760                            }
761                        }
762                    } else {
763                        parsed_xml.push_error(
764                            XmlError::QuoteExpected(
765                                tokenizer.strings.get(*text).to_string(),
766                                raw_token.position,
767                                parent,
768                            ),
769                            &self.settings,
770                        );
771                    }
772                }
773                KeyChar(kc) => match kc {
774                    b'<' => {
775                        if let (true, char_num) =
776                            self.match_next_str("!--", &tokenizer.strings.strings)
777                        {
778                            self.raw_index += char_num;
779                            let position = self.raw_tokens[self.raw_index].position;
780                            let mut comment = String::with_capacity(10);
781                            while let Some(raw_token) = self.raw_tokens.get(self.raw_index + 1) {
782                                if let (true, char_num) =
783                                    self.match_next_str("--", &tokenizer.strings.strings)
784                                {
785                                    self.raw_index += char_num;
786                                    if self.match_next_char(b'>', &tokenizer.strings.strings) {
787                                        self.raw_index += 1;
788                                        break;
789                                    }
790                                    if !self.settings.ignore_comments {
791                                        parsed_xml.push_error(
792                                            XmlError::NotPermittedInComments(position),
793                                            &self.settings,
794                                        );
795                                    }
796                                }
797                                if !self.settings.ignore_comments {
798                                    match &raw_token.kind {
799                                        KeyChar(kc) => {
800                                            comment.push(*kc as char);
801                                        }
802                                        Text(text) | Whitespace(text) => {
803                                            comment.push_str(&tokenizer.strings.get(*text));
804                                        }
805                                    }
806                                }
807                                self.raw_index += 1;
808                            }
809                            parsed_xml.push_comment(
810                                XmlToken::comment(comment, position),
811                                parent,
812                                &self.settings,
813                            );
814                        } else if let Some(raw_token) = self.raw_tokens.get(self.raw_index + 1) {
815                            let position = raw_token.position;
816                            match &raw_token.kind {
817                                Text(name) => {
818                                    let id = parsed_xml.tokens.len();
819                                    let token = XmlToken::open_element(
820                                        tokenizer.strings.get(*name),
821                                        id,
822                                        position,
823                                        parent
824                                            .map(|p| Rc::clone(parsed_xml.tokens.get(p).unwrap())),
825                                    );
826                                    parsed_xml.push_open_element(token, parent, &self.settings);
827                                    open_elements.push_front(id);
828                                    self.raw_index += 1;
829                                }
830                                KeyChar(kc) => {
831                                    if let b'/' = kc {
832                                        if let Some(raw_token) =
833                                            self.raw_tokens.get(self.raw_index + 2)
834                                        {
835                                            self.raw_index += 2;
836                                            if let Text(text) = &raw_token.kind {
837                                                if let Some(front) = open_elements.pop_front() {
838                                                    let (name, id) =
839                                                        if let XmlToken::OpenElement(e) =
840                                                            &*parsed_xml.tokens[front].borrow()
841                                                        {
842                                                            (Some(e.name.clone()), Some(e.id))
843                                                        } else {
844                                                            (None, None)
845                                                        };
846                                                    if let (Some(name), Some(id)) = (name, id) {
847                                                        if id != front
848                                                            || name != tokenizer.strings.get(*text)
849                                                        {
850                                                            parsed_xml.push_error(
851                                                                XmlError::OpenCloseElementMismatch(
852                                                                    tokenizer
853                                                                        .strings
854                                                                        .get(*text)
855                                                                        .to_string(),
856                                                                    name,
857                                                                    position,
858                                                                ),
859                                                                &self.settings,
860                                                            );
861                                                        }
862                                                    }
863                                                } else {
864                                                    parsed_xml.push_error(
865                                                        XmlError::OpenCloseElementsMismatch(
866                                                            position,
867                                                        ),
868                                                        &self.settings,
869                                                    );
870                                                }
871                                                let token = XmlToken::close_element(
872                                                    tokenizer.strings.get(*text),
873                                                    position,
874                                                    parent.map(|p| {
875                                                        Rc::clone(parsed_xml.tokens.get(p).unwrap())
876                                                    }),
877                                                );
878                                                parsed_xml.push_close_element(
879                                                    token,
880                                                    parent,
881                                                    &self.settings,
882                                                );
883                                                if (self.raw_index + 1) >= self.raw_tokens.len() {
884                                                    parsed_xml.push_error(
885                                                        XmlError::Expected(
886                                                            '>'.to_string(),
887                                                            position,
888                                                        ),
889                                                        &self.settings,
890                                                    );
891                                                    break;
892                                                }
893                                                while let Whitespace(..) =
894                                                    self.raw_tokens[self.raw_index + 1].kind
895                                                {
896                                                    self.raw_index += 1;
897                                                }
898                                                match self.raw_tokens[self.raw_index + 1].kind {
899                                                    KeyChar(index) => {
900                                                        if index != b'>' {
901                                                            parsed_xml.push_error(
902                                                                XmlError::Expected(
903                                                                    '>'.to_string(),
904                                                                    position,
905                                                                ),
906                                                                &self.settings,
907                                                            );
908                                                            self.raw_index += 1;
909                                                        }
910                                                    }
911                                                    _ => {
912                                                        parsed_xml.push_error(
913                                                            XmlError::Expected(
914                                                                '>'.to_string(),
915                                                                position,
916                                                            ),
917                                                            &self.settings,
918                                                        );
919                                                        self.raw_index += 1;
920                                                    }
921                                                }
922                                            }
923                                        }
924                                    } else if let b'?' = kc {
925                                        self.raw_index += 2;
926                                        if self.raw_index >= self.raw_tokens.len() {
927                                            break;
928                                        }
929                                        let parent = open_elements.front().copied();
930                                        if let Text(text) = &self.raw_tokens[self.raw_index].kind {
931                                            if tokenizer.strings.get(*text) == "xml" {
932                                                let id = parsed_xml.tokens.len();
933                                                let token = XmlToken::open_element(
934                                                    tokenizer.strings.get(*text),
935                                                    id,
936                                                    position,
937                                                    parent.map(|p| {
938                                                        Rc::clone(parsed_xml.tokens.get(p).unwrap())
939                                                    }),
940                                                );
941                                                open_elements.push_front(id);
942                                                parsed_xml.push_open_element(
943                                                    token,
944                                                    parent,
945                                                    &self.settings,
946                                                );
947                                            }
948                                        } else {
949                                            parsed_xml.push_error(
950                                                XmlError::Expected("xml".to_string(), position),
951                                                &self.settings,
952                                            );
953                                        }
954                                    }
955                                }
956                                _ => {}
957                            }
958                        }
959                    }
960                    b'/' | b'?' => {
961                        if self.match_next_char(b'>', &tokenizer.strings.strings) {
962                            open_elements.pop_front();
963                            let position = self.raw_tokens[self.raw_index].position;
964                            let token = XmlToken::close_element_quick(
965                                position,
966                                parent.map(|p| Rc::clone(parsed_xml.tokens.get(p).unwrap())),
967                            );
968                            parsed_xml.push_close_element(token, parent, &self.settings);
969                        }
970                    }
971                    b'>' => {
972                        let mut inner_text = String::new();
973                        while let Some(raw_token) = self.raw_tokens.get(self.raw_index + 1) {
974                            match &raw_token.kind {
975                                Text(text) | Whitespace(text) => {
976                                    inner_text.push_str(&tokenizer.strings.get(*text));
977                                }
978                                KeyChar(kc) => {
979                                    if *kc == b'<' {
980                                        break;
981                                    }
982                                    inner_text.push(*kc as char);
983                                }
984                            }
985                            self.raw_index += 1;
986                        }
987
988                        let token = XmlToken::inner_text(
989                            inner_text,
990                            raw_token.position,
991                            parent.map(|p| Rc::clone(parsed_xml.tokens.get(p).unwrap())),
992                        );
993                        parsed_xml.push_inner_text(token, parent, &self.settings);
994                    }
995                    _ => {}
996                },
997                _ => {}
998            }
999            self.raw_index += 1;
1000        }
1001        if let Some(last) = open_elements.iter().last() {
1002            let position = parsed_xml.tokens[*last].borrow().position();
1003            parsed_xml.push_error(
1004                XmlError::OpenCloseElementsMismatch(position),
1005                &self.settings,
1006            );
1007        }
1008        parsed_xml
1009    }
1010}