1use std::{
12 borrow::Cow,
13 fmt::{Debug, Display},
14 ops::Range,
15};
16
17use crate::{
18 escape::unescape,
19 lut::{is_invalid_attribute_name, is_invalid_name, is_whitespace},
20};
21
22#[derive(Debug, Clone, Copy)]
23pub struct StartEvent<'a> {
26 text: &'a str,
27 prefix_end: usize,
28 name_end: usize,
29}
30
31impl<'a> StartEvent<'a> {
32 pub fn prefix(&self) -> Option<&'a str> {
34 (self.prefix_end > 0).then(|| &self.text[1..self.prefix_end])
35 }
36
37 pub fn name(&self) -> &'a str {
39 &self.text[self.prefix_end + 1..self.name_end]
40 }
41
42 pub fn is_empty(&self) -> bool {
50 self.text.as_bytes()[self.text.len() - 2] == b'/'
51 }
52
53 pub fn position_in(&self, reader: &Reader) -> Range<usize> {
59 reader.range_for_ptrs(self.text.as_bytes().as_ptr_range())
60 }
61
62 pub fn name_position_in(&self, reader: &Reader) -> Range<usize> {
68 reader.range_for_ptrs(self.name().as_bytes().as_ptr_range())
69 }
70
71 pub fn prefix_position_in(&self, reader: &Reader) -> Option<Range<usize>> {
77 (self.prefix_end > 0)
78 .then(|| reader.range_for_ptrs(self.text.as_bytes()[1..self.prefix_end].as_ptr_range()))
79 }
80
81 pub fn prefixed_name_position_in(&self, reader: &Reader) -> Range<usize> {
87 reader.range_for_ptrs(self.text.as_bytes()[1..self.name_end].as_ptr_range())
88 }
89
90 pub fn attributes(&self) -> Attributes<'a> {
92 Attributes(ParsingBuffer::new(&self.text[self.name_end..]))
93 }
94}
95
96#[derive(Debug, Clone, Copy)]
97pub struct AttributeEvent<'a> {
99 pub(crate) text: &'a str,
100 name_end: usize,
101 value_start: usize,
102}
103
104#[repr(u8)]
105pub enum AttributeQuote {
107 Single = b'\'',
109 Double = b'\"',
111}
112
113impl AttributeQuote {
114 pub fn to_char(self) -> char {
116 self as u8 as char
117 }
118}
119
120impl<'a> AttributeEvent<'a> {
121 pub fn name(&self) -> &'a str {
123 &self.text[..self.name_end]
124 }
125
126 pub fn value(&self) -> Cow<'a, str> {
128 unescape(self.raw_value())
129 }
130
131 pub fn raw_value(&self) -> &'a str {
133 &self.text[self.value_start..self.text.len() - 1]
134 }
135
136 pub fn quote(&self) -> AttributeQuote {
138 match self.text.bytes().last().unwrap() {
139 b'\'' => AttributeQuote::Single,
140 b'\"' => AttributeQuote::Double,
141 _ => unreachable!(),
142 }
143 }
144
145 pub fn position_in(&self, reader: &Reader) -> Range<usize> {
151 reader.range_for_ptrs(self.text.as_bytes().as_ptr_range())
152 }
153
154 pub fn name_position_in(&self, reader: &Reader) -> Range<usize> {
160 reader.range_for_ptrs(self.name().as_bytes().as_ptr_range())
161 }
162
163 pub fn value_position_in(&self, reader: &Reader) -> Range<usize> {
169 reader.range_for_ptrs(self.raw_value().as_bytes().as_ptr_range())
170 }
171}
172
173#[derive(Debug, Clone, Copy)]
174pub struct EndEvent<'a> {
176 text: &'a str,
177 prefix_end: usize,
178 name_end: usize,
179}
180
181impl<'a> EndEvent<'a> {
182 pub fn prefix(&self) -> Option<&'a str> {
184 (self.prefix_end != 1).then(|| &self.text[2..self.prefix_end])
185 }
186
187 pub fn name(&self) -> &'a str {
189 debug_assert_ne!(self.prefix_end, 0);
190 &self.text[self.prefix_end + 1..self.name_end]
191 }
192
193 pub fn position_in(&self, reader: &Reader) -> Range<usize> {
199 reader.range_for_ptrs(self.text.as_bytes().as_ptr_range())
200 }
201}
202
203macro_rules! simple_text_event {
204 (@mkunescape raw_content) => {
205 pub fn content(&self) -> Cow<'a, str> {
207 unescape(self.raw_content())
208 }
209 };
210 (@mkunescape content) => {};
211
212 ($name: ident$(, $prefix: literal, $suffix: literal)?, $content_type: ident, $emitted_by_what: literal, $what_content: literal) => {
213 #[derive(Debug, Clone, Copy)]
214 #[doc = concat!("An event emitted by ", $emitted_by_what, ".")]
215 pub struct $name<'a> {
216 pub(crate) text: &'a str,
217 }
218
219 impl<'a> $name<'a> {
220 simple_text_event!(@mkunescape $content_type);
221
222 #[doc = concat!("Returns this event's ", $what_content, " content.")]
223 pub fn $content_type(&self) -> &'a str {
224 &self.text$([$prefix.len()..self.text.len() - $suffix.len()])?
225 }
226
227 pub fn position_in(&self, parser: &Reader) -> Range<usize> {
233 parser.range_for_ptrs(self.text.as_bytes().as_ptr_range())
234 }
235 }
236 };
237}
238
239simple_text_event!(TextEvent, raw_content, "text content", "escaped");
240simple_text_event!(
241 CDataEvent,
242 "<![CDATA[",
243 "]]>",
244 content,
245 "cdata",
246 "unescaped"
247);
248simple_text_event!(
249 CommentEvent,
250 "<!--",
251 "-->",
252 content,
253 "comments",
254 "unescaped"
255);
256simple_text_event!(
257 DoctypeEvent,
258 "<!DOCTYPE ",
259 ">",
260 content,
261 "doctype declarations",
262 "unescaped"
263);
264
265#[derive(Debug, Clone, Copy)]
266pub enum Event<'a> {
268 Start(StartEvent<'a>),
270 End(EndEvent<'a>),
272 Empty(StartEvent<'a>),
274 Text(TextEvent<'a>),
276 CData(CDataEvent<'a>),
278 Comment(CommentEvent<'a>),
280 Doctype(DoctypeEvent<'a>),
282}
283
284#[derive(Debug, Clone, Copy, PartialEq, Eq)]
285pub enum ErrorKind {
287 TopLevelText,
289 UnclosedPITag,
291
292 ExpectedElementName,
294 InvalidElementName,
296 UnclosedElementTag,
298 UnclosedEmptyElementTag,
300 UnclosedEndTag,
302 UnclosedElement,
304
305 ExpectedAttributeEq,
307 ExpectedAttributeValue,
309 InvalidAttributeValue,
311 UnclosedAttributeValue,
313
314 UnclosedComment,
316 UnclosedCData,
318 UnclosedUnknownSpecial,
320 DoctypeEof,
322}
323
324impl ErrorKind {
325 pub fn message(&self) -> &'static str {
327 match self {
328 Self::TopLevelText => "top-level text is forbidden",
329 Self::UnclosedPITag => "unclosed processing instruction",
330
331 Self::ExpectedElementName => "expected element name",
332 Self::InvalidElementName => "invalid element name",
333 Self::UnclosedElementTag => "expected a `>` or `/`",
334 Self::UnclosedEmptyElementTag => "expected a `>`",
335 Self::UnclosedEndTag => "expected a `>`",
336 Self::UnclosedElement => "unclosed element",
337
338 Self::ExpectedAttributeEq => "expected `=` after attribute name",
339 Self::ExpectedAttributeValue => {
340 "expected an attribute value enclosed in either `'` or `\"`"
341 }
342 Self::UnclosedAttributeValue => "unclosed attribute value",
343 Self::InvalidAttributeValue => "attribute value contains null byte",
344
345 Self::UnclosedComment => "unclosed comment",
346 Self::UnclosedCData => "unclosed cdata",
347 Self::UnclosedUnknownSpecial => "unclosed unknown <! tag",
348 Self::DoctypeEof => "unexpected end of file in <!DOCTYPE",
349 }
350 }
351}
352
353impl Display for ErrorKind {
354 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
355 f.write_str(self.message())
356 }
357}
358
359#[derive(Clone)]
360pub struct Error {
362 kind: ErrorKind,
363 span: Range<usize>,
364}
365
366impl Error {
367 fn new(kind: ErrorKind, span: Range<usize>) -> Self {
368 Self { kind, span }
369 }
370
371 pub fn kind(&self) -> ErrorKind {
373 self.kind
374 }
375
376 pub fn span(&self) -> Range<usize> {
378 self.span.clone()
379 }
380}
381
382impl std::error::Error for Error {}
383
384impl Debug for Error {
385 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
386 <Self as Display>::fmt(self, f)
387 }
388}
389
390impl Display for Error {
391 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
392 write!(f, "parse error at {:?}: {}", self.span, self.kind)
393 }
394}
395
396struct ParsingBuffer<'a> {
397 text: &'a str,
398 current: usize,
399}
400
401impl<'a> ParsingBuffer<'a> {
402 pub fn new(text: &'a str) -> Self {
403 Self { text, current: 0 }
404 }
405
406 #[inline]
407 fn empty_range_here(&self) -> Range<usize> {
408 self.current..self.current
409 }
410
411 #[inline]
412 fn char_range_here(&self) -> Range<usize> {
413 self.current..(self.current + 1).min(self.text.len())
414 }
415
416 #[inline]
417 fn as_bytes(&self) -> &'a [u8] {
418 self.text.as_bytes()
419 }
420
421 #[inline(always)]
422 fn byte(&self, idx: usize) -> Option<u8> {
423 self.as_bytes().get(idx).copied()
424 }
425
426 #[inline]
427 fn position_or_end(&self, start: usize, fun: impl Fn(u8) -> bool) -> usize {
428 let mut current = start;
429 loop {
431 match self.as_bytes().get(current) {
432 Some(&b) if fun(b) => return current,
433 Some(_) => current += 1,
434 None => return self.text.len(),
435 }
436 }
437 }
438
439 #[inline]
440 fn memchr(&self, start: usize, needle: u8) -> Option<usize> {
441 memchr::memchr(needle, &self.text.as_bytes()[start..]).map(|i| i + start)
442 }
443
444 #[inline]
445 fn memchr2(&self, start: usize, needle1: u8, needle2: u8) -> Option<usize> {
446 memchr::memchr2(needle1, needle2, &self.text.as_bytes()[start..]).map(|i| i + start)
447 }
448
449 #[inline]
450 fn memmem(&self, needle: &[u8]) -> Option<usize> {
451 memchr::memmem::find(&self.text.as_bytes()[self.current..], needle)
452 .map(|value| value + self.current)
453 }
454
455 #[inline]
456 fn skip_whitespace(&mut self) {
457 self.current = self.position_or_end(self.current, |b| !is_whitespace(b));
458 }
459}
460
461pub struct Attributes<'a>(ParsingBuffer<'a>);
463
464impl<'a> Iterator for Attributes<'a> {
465 type Item = AttributeEvent<'a>;
466
467 fn next(&mut self) -> Option<Self::Item> {
468 self.0.skip_whitespace();
469
470 let name_start = self.0.current;
471 let name_end = self
472 .0
473 .position_or_end(self.0.current, is_invalid_attribute_name);
474 if name_end == self.0.current {
475 return None;
476 }
477 self.0.current = name_end;
478
479 self.0.skip_whitespace();
480 self.0.current += 1;
481 self.0.skip_whitespace();
482
483 let quote = self.0.byte(self.0.current).unwrap();
484
485 self.0.current += 1;
486
487 let value_start = self.0.current;
488 let value_end = self.0.memchr(self.0.current, quote).unwrap();
489
490 self.0.current = value_end + 1;
491
492 Some(AttributeEvent {
493 text: &self.0.text[name_start..self.0.current],
494 name_end: name_end - name_start,
495 value_start: value_start - name_start,
496 })
497 }
498}
499
500#[non_exhaustive]
501#[derive(Default, Debug, Clone)]
502pub struct Options {
504 allow_top_level_text: bool,
505 allow_unmatched_closing_tags: bool,
506 allow_unclosed_tags: bool,
507}
508
509impl Options {
510 pub fn allow_top_level_text(mut self, value: bool) -> Self {
512 self.allow_top_level_text = value;
513 self
514 }
515
516 pub fn allow_unmatched_closing_tags(mut self, value: bool) -> Self {
518 self.allow_unmatched_closing_tags = value;
519 self
520 }
521
522 pub fn allow_unclosed_tags(mut self, value: bool) -> Self {
524 self.allow_unclosed_tags = value;
525 self
526 }
527}
528
529pub struct Reader<'a> {
531 buffer: ParsingBuffer<'a>,
532 depth: u32,
533 options: Options,
534}
535
536impl<'a> Reader<'a> {
537 pub fn new(text: &'a str) -> Self {
539 Self {
540 buffer: ParsingBuffer::new(text),
541 depth: 0,
542 options: Default::default(),
543 }
544 }
545
546 pub fn with_options(text: &'a str, options: Options) -> Self {
548 Self {
549 buffer: ParsingBuffer::new(text),
550 depth: 0,
551 options,
552 }
553 }
554
555 pub fn buffer(&self) -> &'a str {
557 self.buffer.text
558 }
559
560 pub fn depth(&self) -> u32 {
562 self.depth
563 }
564
565 fn range_for_ptrs(&self, range: Range<*const u8>) -> Range<usize> {
566 let self_range = self.buffer.as_bytes().as_ptr_range();
567 assert!(
568 self_range.start <= range.start
569 && self_range.end >= range.end
570 && range.start <= range.end,
571 "Parser::range_for_ptrs called with invalid pointer range"
572 );
573
574 range.start.addr() - self_range.start.addr()..range.end.addr() - self_range.start.addr()
575 }
576
577 fn set_error_state(&mut self) {
578 self.buffer.current = self.buffer.text.len();
579 self.depth = 0;
580 }
581
582 #[inline]
583 fn bytes(&self) -> &'a [u8] {
584 self.buffer.as_bytes()
585 }
586
587 #[inline]
588 fn byte(&self, idx: usize) -> Option<u8> {
589 self.buffer.byte(idx)
590 }
591
592 fn skip_element_attributes(&mut self) -> Result<(), Error> {
593 loop {
594 self.buffer.skip_whitespace();
595
596 let name_start = self.buffer.current;
597 let name_end = self
598 .buffer
599 .position_or_end(self.buffer.current, is_invalid_attribute_name);
600 if name_end == self.buffer.current {
601 return Ok(());
602 }
603 self.buffer.current = name_end;
604
605 self.buffer.skip_whitespace();
606
607 if self.byte(self.buffer.current) != Some(b'=') {
608 self.set_error_state();
609 return Err(Error::new(
610 ErrorKind::ExpectedAttributeEq,
611 name_start..name_end,
612 ));
613 };
614
615 self.buffer.current += 1;
616
617 let eq_end = self.buffer.current;
618
619 self.buffer.skip_whitespace();
620
621 let Some(quote) = self
622 .byte(self.buffer.current)
623 .filter(|b| [b'\'', b'\"'].contains(b))
624 else {
625 self.set_error_state();
626 return Err(Error::new(
627 ErrorKind::ExpectedAttributeValue,
628 name_start..eq_end,
629 ));
630 };
631
632 self.buffer.current += 1;
633
634 let value_start = self.buffer.current;
635 let Some(value_end) = self.buffer.memchr2(self.buffer.current, quote, b'\0') else {
636 self.set_error_state();
637 return Err(Error::new(
638 ErrorKind::UnclosedAttributeValue,
639 self.buffer.current..(self.buffer.current + 1).min(self.buffer.text.len()),
640 ));
641 };
642
643 if self.bytes()[value_end] == b'\0' {
644 self.set_error_state();
645 return Err(Error::new(
646 ErrorKind::InvalidAttributeValue,
647 value_start..value_end + 1,
648 ));
649 }
650
651 self.buffer.current = value_end + 1;
652 }
653 }
654
655 fn skip_doctype(&mut self) -> Result<(), Error> {
656 loop {
657 match self.buffer.memchr2(self.buffer.current, b'>', b'[') {
658 Some(idx) if self.bytes()[idx] == b'[' => {
659 self.buffer.current = idx + 1;
660 let mut depth = 1;
661 while depth > 0 {
662 match self.buffer.memchr2(self.buffer.current, b'[', b']') {
663 Some(idx) => {
664 if self.bytes()[idx] == b'[' {
665 depth += 1;
666 } else {
667 depth -= 1;
668 }
669 self.buffer.current = idx + 1;
670 }
671 None => {
672 self.set_error_state();
673 return Err(Error::new(
674 ErrorKind::DoctypeEof,
675 self.buffer.empty_range_here(),
676 ));
677 }
678 }
679 }
680 }
681 Some(idx) => {
682 self.buffer.current = idx + 1;
683 return Ok(());
684 }
685 None => {
686 self.set_error_state();
687 return Err(Error::new(
688 ErrorKind::DoctypeEof,
689 self.buffer.empty_range_here(),
690 ));
691 }
692 }
693 }
694 }
695
696 fn take_prefixed_name(
697 &mut self,
698 start: usize,
699 prefix_end_default: usize,
700 ) -> Result<(usize, usize), Error> {
701 let first_end = self
702 .buffer
703 .position_or_end(self.buffer.current, is_invalid_name);
704 if first_end == self.buffer.current {
705 self.set_error_state();
706 return Err(Error::new(
707 ErrorKind::ExpectedElementName,
708 start..self.buffer.current,
709 ));
710 }
711
712 self.buffer.current = first_end;
713
714 let prefix_end;
715 let name_end;
716 if self.buffer.byte(self.buffer.current) == Some(b':') {
717 let second_end = self
718 .buffer
719 .position_or_end(self.buffer.current + 1, is_invalid_name);
720 if second_end == self.buffer.current {
721 self.set_error_state();
722 return Err(Error::new(
723 ErrorKind::ExpectedElementName,
724 start..self.buffer.current,
725 ));
726 }
727 self.buffer.current = second_end;
728 prefix_end = first_end;
729 name_end = second_end;
730 } else {
731 prefix_end = start + prefix_end_default;
732 name_end = first_end
733 }
734
735 Ok((prefix_end, name_end))
736 }
737
738 fn parse_node(&mut self) -> Result<Option<Event<'a>>, Error> {
739 let start = self.buffer.current;
740 self.buffer.current += 1;
741
742 match self.byte(self.buffer.current).ok_or_else(|| {
743 Error::new(
744 ErrorKind::InvalidElementName,
745 self.buffer.empty_range_here(),
746 )
747 })? {
748 b'?' => {
749 let Some(end) = self.buffer.memmem(b"?>") else {
757 let name_range = self.buffer.current + 1
758 ..self
759 .buffer
760 .position_or_end(self.buffer.current + 1, is_invalid_name);
761 self.set_error_state();
762 return Err(Error::new(ErrorKind::UnclosedPITag, name_range));
763 };
764 self.buffer.current = end + 2;
765
766 Ok(None)
767 }
768
769 b'!' => match self.byte(self.buffer.current + 1) {
770 Some(b'-') if self.byte(self.buffer.current + 2) == Some(b'-') => {
771 self.buffer.current += 2;
772 let Some(end) = self.buffer.memmem(b"-->") else {
773 let span = start..self.buffer.current;
774 self.set_error_state();
775 return Err(Error::new(ErrorKind::UnclosedComment, span));
776 };
777
778 self.buffer.current = end + 3;
779 Ok(Some(Event::Comment(CommentEvent {
780 text: &self.buffer.text[start..self.buffer.current],
781 })))
782 }
783 Some(b'[') if self.bytes()[self.buffer.current + 2..].starts_with(b"CDATA[") => {
784 self.buffer.current += 8;
785 let Some(end) = self.buffer.memmem(b"]]>") else {
786 let span = start..self.buffer.current;
787 self.set_error_state();
788 return Err(Error::new(ErrorKind::UnclosedCData, span));
789 };
790
791 self.buffer.current = end + 3;
792 Ok(Some(Event::CData(CDataEvent {
793 text: &self.buffer.text[start..self.buffer.current],
794 })))
795 }
796 Some(b'D')
797 if self.bytes()[self.buffer.current + 2..].starts_with(b"OCTYPE")
798 && self
799 .byte(self.buffer.current + 8)
800 .is_some_and(is_whitespace) =>
801 {
802 self.buffer.current += 9;
803 self.skip_doctype()?;
804 Ok(Some(Event::Doctype(DoctypeEvent {
805 text: &self.buffer.text[start..self.buffer.current],
806 })))
807 }
808 _ => {
809 let Some(end) = self.buffer.memchr(self.buffer.current + 1, b'>') else {
810 let span = start..self.buffer.position_or_end(start + 2, is_invalid_name);
811 self.set_error_state();
812 return Err(Error::new(ErrorKind::UnclosedUnknownSpecial, span));
813 };
814 self.buffer.current = end + 1;
815 Ok(None)
816 }
817 },
818
819 b'/' if self.depth > 0 || self.options.allow_unmatched_closing_tags => {
821 self.buffer.current += 1;
822 let (prefix_end, name_end) = self.take_prefixed_name(start, 1)?;
823
824 self.buffer.skip_whitespace();
825
826 if self.byte(self.buffer.current) != Some(b'>') {
827 let span = self.buffer.char_range_here();
828 self.set_error_state();
829 return Err(Error::new(ErrorKind::UnclosedEndTag, span));
830 }
831
832 self.depth = self.depth.saturating_sub(1);
833 self.buffer.current += 1;
834 Ok(Some(Event::End(EndEvent {
835 text: &self.buffer.text[start..self.buffer.current],
836 prefix_end: prefix_end - start,
837 name_end: name_end - start,
838 })))
839 }
840
841 _ => {
842 let (prefix_end, name_end) = self.take_prefixed_name(start, 0)?;
843
844 self.skip_element_attributes()?;
845 self.buffer.skip_whitespace();
846
847 match self.byte(self.buffer.current) {
848 Some(b'>') => {
849 self.buffer.current += 1;
850 self.depth += 1;
851 Ok(Some(Event::Start(StartEvent {
852 text: &self.buffer.text[start..self.buffer.current],
853 prefix_end: prefix_end - start,
854 name_end: name_end - start,
855 })))
856 }
857 Some(b'/') => {
858 if self.byte(self.buffer.current + 1) != Some(b'>') {
859 let span = self.buffer.char_range_here();
860 self.set_error_state();
861 return Err(Error::new(ErrorKind::UnclosedEmptyElementTag, span));
862 }
863
864 self.buffer.current += 2;
865 Ok(Some(Event::Empty(StartEvent {
866 text: &self.buffer.text[start..self.buffer.current],
867 prefix_end: prefix_end - start,
868 name_end: name_end - start,
869 })))
870 }
871 _ => {
872 let span = self.buffer.char_range_here();
873 self.set_error_state();
874 Err(Error::new(ErrorKind::UnclosedElementTag, span))
875 }
876 }
877 }
878 }
879 }
880
881 pub fn skip_to_end(&mut self) -> Result<Option<EndEvent<'a>>, Error> {
898 let end_depth = self.depth;
899
900 loop {
901 match self.next().transpose()? {
902 Some(Event::End(end)) if self.depth + 1 == end_depth => return Ok(Some(end)),
903 Some(_) => (),
904 None => return Ok(None),
905 }
906 }
907 }
908}
909
910impl<'a> Iterator for Reader<'a> {
911 type Item = Result<Event<'a>, Error>;
912
913 fn next(&mut self) -> Option<Result<Event<'a>, Error>> {
914 loop {
915 return match self.byte(self.buffer.current) {
916 Some(b'<') => match self.parse_node() {
917 Ok(Some(event)) => Some(Ok(event)),
918 Ok(None) => continue,
919 Err(err) => Some(Err(err)),
920 },
921 Some(_) => {
922 let node_start = self
923 .buffer
924 .memchr(self.buffer.current, b'<')
925 .unwrap_or(self.buffer.text.len());
926 let text_range = self.buffer.current..node_start;
927 self.buffer.current = text_range.end;
928
929 if self.depth == 0 && !self.options.allow_top_level_text {
930 if !unsafe { self.buffer.as_bytes().get_unchecked(text_range.clone()) }
933 .iter()
934 .copied()
935 .all(is_whitespace)
936 {
937 self.set_error_state();
938 return Some(Err(Error::new(ErrorKind::TopLevelText, text_range)));
939 } else {
940 self.buffer.current = text_range.end;
941 continue;
942 }
943 }
944
945 Some(Ok(Event::Text(TextEvent {
946 text: unsafe { self.buffer.text.get_unchecked(text_range) },
948 })))
949 }
950 None if self.depth > 0 => {
951 if self.options.allow_unclosed_tags {
952 return None;
953 }
954
955 self.depth = 0;
956 return Some(Err(Error::new(
957 ErrorKind::UnclosedElement,
958 self.buffer.empty_range_here(),
959 )));
960 }
961 None => None,
962 };
963 }
964 }
965}
966
967#[cfg(test)]
968mod test {
969 use super::Reader;
970
971 macro_rules! unwrap {
972 ($event: expr, Some($($what: tt)*)) => {
973 unwrap!($event.expect("unexpected end of event stream"), $($what)*)
974 };
975 ($event: expr, Ok($what: ident)) => {
976 unwrap!($event.expect("parse error"), $what)
977 };
978 ($event: expr, $what: ident) => {{
979 let e = $event;
980 if let super::Event::$what(r) = e {
981 r
982 } else {
983 panic!(
984 concat!("mismatched event, expected ", stringify!($what), " got {:?}"),
985 e
986 )
987 }
988 }};
989 }
990
991 #[test]
992 fn element() {
993 let code =
994 " <hello attr = \"value\" 0ther4ttr=\t'val'ue'>con ten t</hello> ";
995 let mut reader = Reader::new(code);
996
997 {
998 let start = unwrap!(reader.next(), Some(Ok(Start)));
999 assert_eq!(start.name(), "hello");
1000
1001 let mut attributes = start.attributes();
1002 {
1003 let attr = attributes.next().unwrap();
1004 assert_eq!(attr.name(), "attr");
1005 assert_eq!(attr.value(), "value");
1006 assert_eq!(attr.raw_value(), "value");
1007 }
1008 {
1009 let attr = attributes.next().unwrap();
1010 assert_eq!(attr.name(), "0ther4ttr");
1011 assert_eq!(attr.value(), "val'ue");
1012 assert_eq!(attr.raw_value(), "val'ue");
1013 }
1014 assert!(attributes.next().is_none());
1015 }
1016
1017 {
1018 let text = unwrap!(reader.next(), Some(Ok(Text)));
1019 assert_eq!(text.content(), "con ten t");
1020 assert_eq!(text.raw_content(), "con ten t");
1021 }
1022
1023 {
1024 let end = unwrap!(reader.next(), Some(Ok(End)));
1025 assert_eq!(end.name(), "hello");
1026 }
1027 }
1028
1029 #[test]
1030 fn comments() {
1031 let comment_text = " this is a &comment -- text ";
1032 let code = format!(" <!--{comment_text}--> ");
1033 let mut reader = Reader::new(&code);
1034
1035 let comment = unwrap!(reader.next(), Some(Ok(Comment)));
1036 assert_eq!(comment.content(), comment_text);
1037 }
1038
1039 #[test]
1040 fn element_tree() {
1041 let code = r#"
1042 <tree>
1043 <ns:stuff1>one</stuff2>
1044 one is < two
1045 </not:tree>
1046 "#;
1047 let mut reader = Reader::new(code);
1048
1049 {
1050 let start = unwrap!(reader.next(), Some(Ok(Start)));
1051 assert_eq!(start.prefix(), None);
1052 assert_eq!(start.name(), "tree");
1053 assert!(start.attributes().next().is_none());
1054 }
1055
1056 {
1057 let text = unwrap!(reader.next(), Some(Ok(Text)));
1058 assert_eq!(text.raw_content(), "\n ");
1059 }
1060
1061 {
1062 let start = unwrap!(reader.next(), Some(Ok(Start)));
1063 assert_eq!(start.prefix(), Some("ns"));
1064 assert_eq!(start.name(), "stuff1");
1065 assert!(start.attributes().next().is_none());
1066 }
1067
1068 {
1069 let text = unwrap!(reader.next(), Some(Ok(Text)));
1070 assert_eq!(text.content(), "one");
1071 }
1072
1073 {
1074 let end = unwrap!(reader.next(), Some(Ok(End)));
1075 assert_eq!(end.name(), "stuff2");
1076 }
1077
1078 {
1079 let text = unwrap!(reader.next(), Some(Ok(Text)));
1080 assert_eq!(
1081 text.content(),
1082 "\n one is < two\n "
1083 );
1084 assert_eq!(
1085 text.raw_content(),
1086 "\n one is < two\n "
1087 );
1088 }
1089
1090 {
1091 let end = unwrap!(reader.next(), Some(Ok(End)));
1092 assert_eq!(end.prefix(), Some("not"));
1093 assert_eq!(end.name(), "tree");
1094 }
1095 }
1096
1097 #[test]
1098 fn cdata() {
1099 let content = "this is some cdata < > > & & !!";
1100 let code = format!("<![CDATA[{content}]]>");
1101 let mut reader = Reader::new(&code);
1102
1103 {
1104 let end = unwrap!(reader.next(), Some(Ok(CData)));
1105 assert_eq!(end.content(), content);
1106 }
1107 }
1108
1109 #[test]
1110 fn doctype() {
1111 let content = "\tthis is a doctype [with] [many [brackets[[[]]][][]]]\n";
1112 let code = format!("<!DOCTYPE {content}>");
1113 let mut reader = Reader::new(&code);
1114
1115 {
1116 let end = unwrap!(reader.next(), Some(Ok(Doctype)));
1117 assert_eq!(end.content(), content);
1118 }
1119 }
1120}