1#![warn(
16 missing_debug_implementations,
17 rust_2018_idioms,
18 missing_doc_code_examples
19)]
20
21use lazy_static::lazy_static;
22use mimalloc::MiMalloc;
23
24#[global_allocator]
25static GLOBAL: MiMalloc = MiMalloc;
26
27mod lexer;
28pub mod tokens;
30
31use crate::tokens::{TokenKind, XmlToken};
32use rustc_hash::FxHashMap;
33use std::ops::Range;
34use std::{
35 borrow::Cow, cell::RefCell, collections::VecDeque, iter::Peekable, path::Path, rc::Rc,
36 slice::Iter, str, string::String,
37};
38
39use tokens::{FilePosition, OpenElement, Token, XmlError};
40
41#[derive(Debug, Clone)]
43pub struct Settings {
44 pub ignore_comments: bool,
46 pub create_position_map: bool,
48}
49
50impl Default for Settings {
51 fn default() -> Self {
52 Self {
53 ignore_comments: true,
54 create_position_map: false,
55 }
56 }
57}
58
59#[derive(Debug)]
61pub struct XmlParser {
62 settings: Settings,
63 buffer: Vec<u8>,
64 raw_index: usize,
65 pub(crate) raw_tokens: Vec<Token>,
66}
67
68type RefXmlToken<'a> = Rc<RefCell<XmlToken<'a>>>;
69
70#[derive(Debug, Default)]
72pub struct FmtXmlError {
73 pub error: String,
74 pub position: FilePosition,
75}
76
77impl FmtXmlError {
78 pub(crate) fn new<S>(str: S, position: FilePosition) -> Self
79 where
80 S: Into<String>,
81 {
82 FmtXmlError {
83 error: str.into(),
84 position,
85 }
86 }
87}
88
89#[derive(Debug, Default)]
91pub struct ParsedXml<'a> {
92 pub tokens: Vec<RefXmlToken<'a>>,
94 token_map: FxHashMap<usize, FxHashMap<Range<usize>, RefXmlToken<'a>>>,
95 open_elements: FxHashMap<String, Vec<RefXmlToken<'a>>>,
96 pub errors: Vec<FmtXmlError>,
98 create_position_map: bool,
99}
100
101impl<'a> ParsedXml<'a> {
102 pub fn elements_from_name(&self, name: &str) -> Vec<OpenElement<'a>> {
104 self.open_elements
105 .get(name)
106 .unwrap_or(&Vec::new())
107 .iter()
108 .map(|e| e.borrow().as_open_element().clone())
109 .collect::<Vec<_>>()
110 }
111
112 pub fn elements(&self) -> Vec<OpenElement<'a>> {
114 self.open_elements
115 .values()
116 .into_iter()
117 .flatten()
118 .map(|e| e.borrow().as_open_element().clone())
119 .collect()
120 }
121
122 fn insert_into_map(&mut self, position: FilePosition, token: RefXmlToken<'a>, length: usize) {
123 assert!(self.create_position_map);
124 if let Some(l) = self.token_map.get_mut(&position.line) {
125 l.insert(
126 Range {
127 start: position.column as usize,
128 end: position.column as usize + length + 1,
129 },
130 Rc::clone(&token),
131 );
132 } else {
133 self.token_map.insert(position.line, FxHashMap::default());
134 self.token_map.get_mut(&position.line).unwrap().insert(
135 Range {
136 start: position.column,
137 end: position.column as usize + length + 1,
138 },
139 Rc::clone(&token),
140 );
141 }
142 }
143
144 fn push_error(&mut self, error: XmlError, settings: &Settings) {
145 match error {
146 XmlError::EmptyDocument(p) => {
147 self.errors.push(FmtXmlError::new("Document is empty", p));
148 }
149 XmlError::Expected(c, p) => {
150 self.errors
151 .push(FmtXmlError::new(format!("Expected {}", c), p));
152 }
153 XmlError::NotPermittedInComments(p) => {
154 self.errors
155 .push(FmtXmlError::new("-- is not permitted within comments", p));
156 }
157 XmlError::OpenCloseElementsMismatch(p) => {
158 self.errors.push(FmtXmlError::new(
159 "Mismatch between closing and opening elements",
160 p,
161 ));
162 }
163 XmlError::OpenCloseElementMismatch(s1, s2, p) => {
164 self.errors.push(FmtXmlError::new(
165 format!(
166 "Mismatch between closing {} and opening {} elements",
167 s1, s2
168 ),
169 p,
170 ));
171 }
172 XmlError::Unescaped(c, s, p, pa, ep) => {
173 self.errors.push(FmtXmlError::new(
174 format!("Unescaped {} not allowed in attribute values", c),
175 ep,
176 ));
177 let token = XmlToken::invalid_attribute(
178 s,
179 p,
180 pa.map(|p| Rc::clone(self.tokens.get(p).unwrap())),
181 );
182 self.push_attribute(token, pa, settings);
183 }
184 XmlError::MissingValue(s, p, pa) => {
185 self.errors.push(FmtXmlError::new(
186 format!("Specification mandates value for attribute {}", s),
187 p,
188 ));
189 let token = XmlToken::invalid_attribute(
190 s,
191 p,
192 pa.map(|p| Rc::clone(self.tokens.get(p).unwrap())),
193 );
194 self.push_attribute(token, pa, settings);
195 }
196 XmlError::QuoteExpected(s, p, pa) => {
197 self.errors.push(FmtXmlError::new("\" or \' expected", p));
198 let token = XmlToken::invalid_attribute(
199 s,
200 p,
201 pa.map(|p| Rc::clone(self.tokens.get(p).unwrap())),
202 );
203 self.push_attribute(token, pa, settings);
204 }
205 XmlError::ElementMustBeFollowedBy(s, p) => {
206 self.errors.push(FmtXmlError::new(format!("Element \"{}\" must be followed by either attribute specifications, \">\" or \"/>\"", s), p));
207 }
208 }
209 }
210
211 fn push_open_element(
212 &mut self,
213 token: XmlToken<'a>,
214 parent: Option<usize>,
215 settings: &Settings,
216 ) {
217 let token = new_rc_refcell(token);
218 if let Some(parent) = parent {
219 if let Some(p) = self.tokens.get_mut(parent) {
220 let mut p_token = p.borrow_mut();
221 p_token
222 .as_mut_open_element()
223 .children
224 .push(Rc::clone(&token));
225 }
226 }
227 {
228 let t = token.borrow();
229 let t = t.as_open_element();
230 if let Some(vec) = self.open_elements.get_mut(&t.name.to_string()) {
231 vec.push(Rc::clone(&token));
232 } else {
233 self.open_elements
234 .insert(t.name.to_string(), vec![Rc::clone(&token)]);
235 }
236 if settings.create_position_map {
237 self.insert_into_map(t.position, Rc::clone(&token), t.name.len());
238 }
239 }
240 self.tokens.push(token);
241 }
242
243 fn push_close_element(
244 &mut self,
245 token: XmlToken<'a>,
246 parent: Option<usize>,
247 settings: &Settings,
248 ) {
249 let token = new_rc_refcell(token);
250 if let Some(parent) = parent {
251 if let Some(p) = self.tokens.get_mut(parent) {
252 let mut p_token = p.borrow_mut();
253 p_token
254 .as_mut_open_element()
255 .children
256 .push(Rc::clone(&token));
257 }
258 }
259 {
260 let t = token.borrow();
261 let t = t.as_close_element();
262 if settings.create_position_map {
263 self.insert_into_map(
264 t.position,
265 Rc::clone(&token),
266 t.name.as_ref().map_or(0, |t| t.len()),
267 );
268 }
269 }
270 self.tokens.push(token);
271 }
272
273 fn push_attribute(&mut self, token: XmlToken<'a>, parent: Option<usize>, settings: &Settings) {
274 let token = new_rc_refcell(token);
275 {
276 let token_borrowed = token.borrow();
277 let attribute = token_borrowed.as_attribute();
278 if let Some(parent) = parent {
279 if let Some(p) = self.tokens.get_mut(parent) {
280 let mut p_token = p.borrow_mut();
281 p_token
282 .as_mut_open_element()
283 .children
284 .push(Rc::clone(&token));
285 }
286 }
287 if let Some(parent) = parent {
288 if let Some(p) = self.tokens.get_mut(parent) {
289 let mut attributes = p.borrow_mut();
290 if let Some(attrs) = attributes
291 .as_mut_open_element()
292 .attributes
293 .get_mut(&attribute.key.0)
294 {
295 attrs.push(Rc::clone(&token));
296 } else {
297 attributes
298 .as_mut_open_element()
299 .attributes
300 .insert(attribute.key.0.to_string(), vec![Rc::clone(&token)]);
301 }
302 }
303 }
304 if settings.create_position_map {
305 self.insert_into_map(attribute.key.1, Rc::clone(&token), attribute.key.0.len());
306 if let Some(value) = &attribute.value {
307 self.insert_into_map(value.1, Rc::clone(&token), value.0.len());
308 } else {
309 self.insert_into_map(attribute.key.1, Rc::clone(&token), 1);
310 }
311 }
312 }
313 self.tokens.push(token);
314 }
315
316 fn push_comment(&mut self, token: XmlToken<'a>, parent: Option<usize>, settings: &Settings) {
317 let token = new_rc_refcell(token);
318 if !settings.ignore_comments {
319 if let Some(parent) = parent {
320 if let Some(p) = self.tokens.get_mut(parent) {
321 let mut p_token = p.borrow_mut();
322 p_token
323 .as_mut_open_element()
324 .children
325 .push(Rc::clone(&token));
326 }
327 }
328 if settings.create_position_map {
329 let t = token.borrow();
330 let t = t.as_comment();
331 self.insert_into_map(t.position, Rc::clone(&token), t.string.len());
332 }
333 }
334 self.tokens.push(token);
335 }
336
337 fn push_inner_text(&mut self, token: XmlToken<'a>, parent: Option<usize>, settings: &Settings) {
338 let token = new_rc_refcell(token);
339 if let Some(parent) = parent {
340 if let Some(p) = self.tokens.get_mut(parent) {
341 let mut p_token = p.borrow_mut();
342 p_token
343 .as_mut_open_element()
344 .children
345 .push(Rc::clone(&token));
346 }
347 }
348 if settings.create_position_map {
349 let t = token.borrow();
350 let t = t.as_inner_text();
351 self.insert_into_map(t.position, Rc::clone(&token), t.string.len());
352 }
353 self.tokens.push(token);
354 }
355
356 pub fn token_from_position(&self, position: FilePosition) -> Option<XmlToken<'a>> {
357 assert!(self.create_position_map);
358 if let Some(line) = self.token_map.get(&position.line) {
359 for (range, token) in line {
360 if range.contains(&position.column) {
361 return Some(token.borrow().clone());
362 }
363 }
364 }
365 None
366 }
367}
368
369lazy_static! {
370 static ref KEY_CHARS: Vec<bool> = {
371 let mut m = vec![false; u8::MAX as usize];
372 m[b'<' as usize] = true;
373 m[b'>' as usize] = true;
374 m[b'/' as usize] = true;
375 m[b'=' as usize] = true;
376 m[b'"' as usize] = true;
377 m[b'\'' as usize] = true;
378 m[b'-' as usize] = true;
379 m[b'!' as usize] = true;
380 m[b'?' as usize] = true;
381 m
382 };
383}
384
385#[inline]
386fn new_rc_refcell<T>(t: T) -> Rc<RefCell<T>> {
387 Rc::new(RefCell::new(t))
388}
389
390#[derive(Debug)]
391struct Strings {
392 strings: Vec<String>,
393 map: FxHashMap<String, usize>,
394}
395
396impl Default for Strings {
397 fn default() -> Self {
398 Self {
399 strings: Vec::with_capacity(32),
400 map: FxHashMap::default(),
401 }
402 }
403}
404
405impl Strings {
406 fn get_index_or_insert(&mut self, string: &str) -> usize {
407 if let Some(a) = self.map.get(string) {
408 *a
409 } else {
410 let index = self.strings.len();
411 self.map.insert(string.to_string(), self.strings.len());
412 self.strings.push(string.to_string());
413 index
414 }
415 }
416
417 #[inline]
418 fn get(&self, index: usize) -> Cow<'_, str> {
419 Cow::Borrowed(&self.strings[index])
420 }
421}
422
423struct Tokenizer<'a> {
424 position: FilePosition,
425 strings: Strings,
426 buffer: Peekable<Iter<'a, u8>>,
427}
428
429impl<'a> Iterator for Tokenizer<'a> {
430 type Item = Token;
431
432 #[inline]
433 fn next(&mut self) -> Option<Self::Item> {
434 if let Some(v) = lexer::next(self) {
435 let position = self.position;
436 if KEY_CHARS[v as usize] {
437 return Some(Token {
438 position,
439 kind: TokenKind::KeyChar(v),
440 });
441 }
442 let mut text = String::with_capacity(10);
443 text.push(v as char);
444 if v.is_ascii_whitespace() {
445 while lexer::peek(&mut self.buffer)?.is_ascii_whitespace() {
446 text.push(lexer::next(self)? as char);
447 }
448 let string_index = self.strings.get_index_or_insert(&text);
449 return Some(Token {
450 position,
451 kind: TokenKind::Whitespace(string_index),
452 });
453 }
454 while let Some(peeked_character) = lexer::peek(&mut self.buffer) {
455 if !peeked_character.is_ascii_whitespace() && !KEY_CHARS[peeked_character as usize]
456 {
457 text.push(lexer::next(self)? as char);
458 } else {
459 break;
460 }
461 }
462 let string_index = self.strings.get_index_or_insert(&text);
463 return Some(Token {
464 position,
465 kind: TokenKind::Text(string_index),
466 });
467 }
468 None
469 }
470}
471
472impl<'a> Tokenizer<'a> {
473 fn fill(&mut self) -> Vec<Token> {
474 self.collect()
475 }
476}
477
478impl<'a> XmlParser {
479 pub fn file<P: AsRef<Path>>(filepath: P) -> Result<Self, Box<dyn std::error::Error + 'static>> {
481 Ok(Self {
482 settings: Settings::default(),
483 buffer: std::fs::read(filepath)?,
484 raw_index: 0,
485 raw_tokens: Vec::new(),
486 })
487 }
488
489 pub fn file_with_settings<P: AsRef<Path>>(
491 filepath: P,
492 settings: Settings,
493 ) -> Result<Self, Box<dyn std::error::Error + 'static>> {
494 Ok(Self {
495 settings,
496 buffer: std::fs::read(filepath)?,
497 raw_index: 0,
498 raw_tokens: Vec::new(),
499 })
500 }
501
502 pub fn str(s: &str) -> Self {
504 Self {
505 settings: Settings::default(),
506 buffer: s.as_bytes().to_vec(),
507 raw_index: 0,
508 raw_tokens: Vec::new(),
509 }
510 }
511
512 pub fn str_with_settings(s: &str, settings: Settings) -> Self {
514 Self {
515 settings,
516 buffer: s.as_bytes().to_vec(),
517 raw_index: 0,
518 raw_tokens: Vec::new(),
519 }
520 }
521
522 #[inline]
523 fn char_match(&self, t: &Token, c: u8, string_map: &[String]) -> bool {
524 match &t.kind {
525 TokenKind::KeyChar(kc) => *kc == c,
526 TokenKind::Text(s) => string_map[*s].as_bytes()[0] == c,
527 _ => false,
528 }
529 }
530
531 #[inline]
532 fn match_next_str(&self, characters: &str, string_map: &[String]) -> (bool, usize) {
533 let chars = characters.as_bytes();
534 let chars_count = chars.len();
535 if self.raw_index + chars_count < self.raw_tokens.len() {
536 if !self.raw_tokens[self.raw_index + 1..=self.raw_index + chars_count]
537 .iter()
538 .zip(chars)
539 .all(|(t, c)| self.char_match(t, *c, string_map))
540 {
541 return (false, 0);
542 }
543 } else {
544 return (false, 0);
545 }
546 (true, chars_count)
547 }
548
549 fn match_next_char(&self, character: u8, string_map: &[String]) -> bool {
550 if let Some(token) = self.raw_tokens.get(self.raw_index + 1) {
551 match &token.kind {
552 TokenKind::KeyChar(kc) => {
553 if *kc == character {
554 return true;
555 }
556 }
557 TokenKind::Text(s) => {
558 if string_map[*s].as_bytes()[0] == character {
559 return true;
560 }
561 }
562 TokenKind::Whitespace(s) => {
563 if string_map[*s].as_bytes()[0] == character {
564 return true;
565 }
566 }
567 }
568 }
569 false
570 }
571
572 pub fn parse(mut self) -> ParsedXml<'a> {
574 use TokenKind::*;
575
576 let mut tokenizer = Tokenizer {
577 position: FilePosition::default(),
578 buffer: self.buffer.iter().peekable(),
579 strings: Strings::default(),
580 };
581 self.raw_tokens = tokenizer.fill();
582
583 let mut open_elements = VecDeque::<usize>::new();
584 let mut parsed_xml = ParsedXml {
585 create_position_map: self.settings.create_position_map,
586 ..Default::default()
587 };
588
589 'outer: while let Some(raw_token) = self.raw_tokens.get(self.raw_index) {
590 let parent = open_elements.front().copied();
591 match &raw_token.kind {
592 Text(text) => {
593 let key_token = raw_token;
594 self.raw_index += 1;
595 if open_elements.is_empty() {
596 parsed_xml.push_error(
597 XmlError::EmptyDocument(key_token.position),
598 &self.settings,
599 );
600 continue;
601 }
602 while let Some(token) = self.raw_tokens.get(self.raw_index) {
603 match token.kind {
604 KeyChar(kc) => {
605 if kc == b'=' {
606 break;
607 }
608 }
609 Text(..) => {
610 parsed_xml.push_error(
611 XmlError::MissingValue(
612 tokenizer.strings.get(*text).to_string(),
613 key_token.position,
614 parent,
615 ),
616 &self.settings,
617 );
618 continue 'outer;
619 }
620 _ => {}
621 }
622 self.raw_index += 1;
623 }
624 while let Some(token) = self.raw_tokens.get(self.raw_index) {
625 match token.kind {
626 KeyChar(kc) => {
627 if KEY_CHARS[kc as usize] {
628 if kc == b'"' || kc == b'\'' {
629 break;
630 } else if kc != b'=' {
631 parsed_xml.push_error(
632 XmlError::QuoteExpected(
633 tokenizer.strings.get(*text).to_string(),
634 key_token.position,
635 parent,
636 ),
637 &self.settings,
638 );
639 continue 'outer;
640 }
641 }
642 }
643 Text(..) => {
644 parsed_xml.push_error(
645 XmlError::QuoteExpected(
646 tokenizer.strings.get(*text).to_string(),
647 key_token.position,
648 parent,
649 ),
650 &self.settings,
651 );
652 continue 'outer;
653 }
654 _ => {}
655 }
656 self.raw_index += 1;
657 }
658 if let Some(token) = self.raw_tokens.get(self.raw_index) {
659 if let KeyChar(attribute_value_start) = token.kind {
660 let mut found_boundary = false;
661 let attribute = token;
662 let boundary_character = attribute_value_start;
663 let mut value = String::with_capacity(10);
664 while let Some(token) = self.raw_tokens.get(self.raw_index + 1) {
665 match &token.kind {
666 KeyChar(key_char_index) => {
667 if *key_char_index == b'<' {
668 parsed_xml.push_error(
669 XmlError::Unescaped(
670 '<',
671 tokenizer.strings.get(*text).to_string(),
672 raw_token.position,
673 parent,
674 token.position,
675 ),
676 &self.settings,
677 );
678 continue 'outer;
679 } else if *key_char_index == boundary_character {
680 let attribute = XmlToken::attribute(
681 tokenizer.strings.get(*text),
682 value,
683 raw_token.position,
684 attribute.position,
685 parent.map(|p| {
686 Rc::clone(parsed_xml.tokens.get(p).unwrap())
687 }),
688 );
689 parsed_xml.push_attribute(
690 attribute,
691 parent,
692 &self.settings,
693 );
694 found_boundary = true;
695 let mut offset = 2;
696 while let Some(token) =
697 self.raw_tokens.get(self.raw_index + offset)
698 {
699 offset += 1;
700 match token.kind {
701 KeyChar(kc) => {
702 if kc == b'>'
703 || (kc == b'?'
704 && token.position.line == 1)
705 {
706 break;
707 } else if kc == b'/' {
708 if let Some(token) = self
709 .raw_tokens
710 .get(self.raw_index + offset)
711 {
712 if let KeyChar(b'>') = token.kind {
713 break;
714 }
715 }
716 }
717 parsed_xml.push_error(
718 XmlError::ElementMustBeFollowedBy(
719 tokenizer
720 .strings
721 .get(*text)
722 .to_string(),
723 raw_token.position,
724 ),
725 &self.settings,
726 );
727 break;
728 }
729 Text(_) => {
730 break;
731 }
732 Whitespace(_) => {
733 continue;
734 }
735 }
736 }
737 break;
738 }
739 value.push(*key_char_index as char);
740 }
741 Text(text) => {
742 value.push_str(&tokenizer.strings.get(*text));
743 }
744 Whitespace(whitespace) => {
745 value.push_str(&tokenizer.strings.get(*whitespace));
746 }
747 }
748 self.raw_index += 1;
749 }
750 if !found_boundary {
751 parsed_xml.push_error(
752 XmlError::QuoteExpected(
753 tokenizer.strings.get(*text).to_string(),
754 raw_token.position,
755 parent,
756 ),
757 &self.settings,
758 );
759 continue 'outer;
760 }
761 }
762 } else {
763 parsed_xml.push_error(
764 XmlError::QuoteExpected(
765 tokenizer.strings.get(*text).to_string(),
766 raw_token.position,
767 parent,
768 ),
769 &self.settings,
770 );
771 }
772 }
773 KeyChar(kc) => match kc {
774 b'<' => {
775 if let (true, char_num) =
776 self.match_next_str("!--", &tokenizer.strings.strings)
777 {
778 self.raw_index += char_num;
779 let position = self.raw_tokens[self.raw_index].position;
780 let mut comment = String::with_capacity(10);
781 while let Some(raw_token) = self.raw_tokens.get(self.raw_index + 1) {
782 if let (true, char_num) =
783 self.match_next_str("--", &tokenizer.strings.strings)
784 {
785 self.raw_index += char_num;
786 if self.match_next_char(b'>', &tokenizer.strings.strings) {
787 self.raw_index += 1;
788 break;
789 }
790 if !self.settings.ignore_comments {
791 parsed_xml.push_error(
792 XmlError::NotPermittedInComments(position),
793 &self.settings,
794 );
795 }
796 }
797 if !self.settings.ignore_comments {
798 match &raw_token.kind {
799 KeyChar(kc) => {
800 comment.push(*kc as char);
801 }
802 Text(text) | Whitespace(text) => {
803 comment.push_str(&tokenizer.strings.get(*text));
804 }
805 }
806 }
807 self.raw_index += 1;
808 }
809 parsed_xml.push_comment(
810 XmlToken::comment(comment, position),
811 parent,
812 &self.settings,
813 );
814 } else if let Some(raw_token) = self.raw_tokens.get(self.raw_index + 1) {
815 let position = raw_token.position;
816 match &raw_token.kind {
817 Text(name) => {
818 let id = parsed_xml.tokens.len();
819 let token = XmlToken::open_element(
820 tokenizer.strings.get(*name),
821 id,
822 position,
823 parent
824 .map(|p| Rc::clone(parsed_xml.tokens.get(p).unwrap())),
825 );
826 parsed_xml.push_open_element(token, parent, &self.settings);
827 open_elements.push_front(id);
828 self.raw_index += 1;
829 }
830 KeyChar(kc) => {
831 if let b'/' = kc {
832 if let Some(raw_token) =
833 self.raw_tokens.get(self.raw_index + 2)
834 {
835 self.raw_index += 2;
836 if let Text(text) = &raw_token.kind {
837 if let Some(front) = open_elements.pop_front() {
838 let (name, id) =
839 if let XmlToken::OpenElement(e) =
840 &*parsed_xml.tokens[front].borrow()
841 {
842 (Some(e.name.clone()), Some(e.id))
843 } else {
844 (None, None)
845 };
846 if let (Some(name), Some(id)) = (name, id) {
847 if id != front
848 || name != tokenizer.strings.get(*text)
849 {
850 parsed_xml.push_error(
851 XmlError::OpenCloseElementMismatch(
852 tokenizer
853 .strings
854 .get(*text)
855 .to_string(),
856 name,
857 position,
858 ),
859 &self.settings,
860 );
861 }
862 }
863 } else {
864 parsed_xml.push_error(
865 XmlError::OpenCloseElementsMismatch(
866 position,
867 ),
868 &self.settings,
869 );
870 }
871 let token = XmlToken::close_element(
872 tokenizer.strings.get(*text),
873 position,
874 parent.map(|p| {
875 Rc::clone(parsed_xml.tokens.get(p).unwrap())
876 }),
877 );
878 parsed_xml.push_close_element(
879 token,
880 parent,
881 &self.settings,
882 );
883 if (self.raw_index + 1) >= self.raw_tokens.len() {
884 parsed_xml.push_error(
885 XmlError::Expected(
886 '>'.to_string(),
887 position,
888 ),
889 &self.settings,
890 );
891 break;
892 }
893 while let Whitespace(..) =
894 self.raw_tokens[self.raw_index + 1].kind
895 {
896 self.raw_index += 1;
897 }
898 match self.raw_tokens[self.raw_index + 1].kind {
899 KeyChar(index) => {
900 if index != b'>' {
901 parsed_xml.push_error(
902 XmlError::Expected(
903 '>'.to_string(),
904 position,
905 ),
906 &self.settings,
907 );
908 self.raw_index += 1;
909 }
910 }
911 _ => {
912 parsed_xml.push_error(
913 XmlError::Expected(
914 '>'.to_string(),
915 position,
916 ),
917 &self.settings,
918 );
919 self.raw_index += 1;
920 }
921 }
922 }
923 }
924 } else if let b'?' = kc {
925 self.raw_index += 2;
926 if self.raw_index >= self.raw_tokens.len() {
927 break;
928 }
929 let parent = open_elements.front().copied();
930 if let Text(text) = &self.raw_tokens[self.raw_index].kind {
931 if tokenizer.strings.get(*text) == "xml" {
932 let id = parsed_xml.tokens.len();
933 let token = XmlToken::open_element(
934 tokenizer.strings.get(*text),
935 id,
936 position,
937 parent.map(|p| {
938 Rc::clone(parsed_xml.tokens.get(p).unwrap())
939 }),
940 );
941 open_elements.push_front(id);
942 parsed_xml.push_open_element(
943 token,
944 parent,
945 &self.settings,
946 );
947 }
948 } else {
949 parsed_xml.push_error(
950 XmlError::Expected("xml".to_string(), position),
951 &self.settings,
952 );
953 }
954 }
955 }
956 _ => {}
957 }
958 }
959 }
960 b'/' | b'?' => {
961 if self.match_next_char(b'>', &tokenizer.strings.strings) {
962 open_elements.pop_front();
963 let position = self.raw_tokens[self.raw_index].position;
964 let token = XmlToken::close_element_quick(
965 position,
966 parent.map(|p| Rc::clone(parsed_xml.tokens.get(p).unwrap())),
967 );
968 parsed_xml.push_close_element(token, parent, &self.settings);
969 }
970 }
971 b'>' => {
972 let mut inner_text = String::new();
973 while let Some(raw_token) = self.raw_tokens.get(self.raw_index + 1) {
974 match &raw_token.kind {
975 Text(text) | Whitespace(text) => {
976 inner_text.push_str(&tokenizer.strings.get(*text));
977 }
978 KeyChar(kc) => {
979 if *kc == b'<' {
980 break;
981 }
982 inner_text.push(*kc as char);
983 }
984 }
985 self.raw_index += 1;
986 }
987
988 let token = XmlToken::inner_text(
989 inner_text,
990 raw_token.position,
991 parent.map(|p| Rc::clone(parsed_xml.tokens.get(p).unwrap())),
992 );
993 parsed_xml.push_inner_text(token, parent, &self.settings);
994 }
995 _ => {}
996 },
997 _ => {}
998 }
999 self.raw_index += 1;
1000 }
1001 if let Some(last) = open_elements.iter().last() {
1002 let position = parsed_xml.tokens[*last].borrow().position();
1003 parsed_xml.push_error(
1004 XmlError::OpenCloseElementsMismatch(position),
1005 &self.settings,
1006 );
1007 }
1008 parsed_xml
1009 }
1010}