1use crate::bitstream::{self, TransposeFn};
2use crate::classify::{self, CharClassMasks};
3use crate::state::{DoctypeSubState, ParserState, QuoteStyle};
4use crate::types::{is_xml_whitespace, Error, ErrorKind, ParseError, Span};
5use crate::visitor::Visitor;
6
7const MAX_NAME_LENGTH: usize = 1_000;
10
11const MAX_CHAR_REF_LENGTH: usize = 7;
14
15pub struct Reader {
22 state: ParserState,
23 transpose: TransposeFn,
24
25 markup_start: Option<usize>,
28
29 text_start: Option<usize>,
31
32 resume_pos: usize,
35
36 content_bracket_count: u8,
39
40 content_start: Option<usize>,
43
44 markup_stream_offset: Option<u64>,
47
48 had_markup: bool,
51
52 in_xml_decl: bool,
54
55 xml_decl_buf: [u8; 256],
57
58 xml_decl_buf_len: usize,
60
61 xml_decl_span_start: u64,
63}
64
65impl Reader {
66 pub fn new() -> Self {
67 Self {
68 state: ParserState::Content,
69 transpose: bitstream::select_transpose(),
70 markup_start: None,
71 text_start: None,
72 resume_pos: 0,
73 content_bracket_count: 0,
74 content_start: None,
75 markup_stream_offset: None,
76 had_markup: false,
77 in_xml_decl: false,
78 xml_decl_buf: [0; 256],
79 xml_decl_buf_len: 0,
80 xml_decl_span_start: 0,
81 }
82 }
83
84 pub fn reset(&mut self) {
86 self.finish_content_body();
87 self.resume_pos = 0;
88 self.content_bracket_count = 0;
89 self.had_markup = false;
90 self.in_xml_decl = false;
91 self.xml_decl_buf_len = 0;
92 }
93
94 #[inline(always)]
96 fn finish_markup(&mut self) {
97 self.state = ParserState::Content;
98 self.markup_start = None;
99 self.text_start = None;
100 self.markup_stream_offset = None;
101 self.had_markup = true;
102 }
103
104 #[inline(always)]
107 fn finish_content_body(&mut self) {
108 self.finish_markup();
109 self.content_start = None;
110 }
111
112 #[inline(never)]
122 fn try_inline_with_peek<V: Visitor>(
123 &mut self,
124 buf: &[u8],
125 delim_pos: usize,
126 stream_offset: u64,
127 visitor: &mut V,
128 ) -> Result<Option<(usize, usize)>, ParseError<V::Error>> {
129 let b = buf[delim_pos];
130 let first_pos = if b == b'<' {
131 self.try_inline_tag(buf, delim_pos, stream_offset, visitor)?
132 } else if b == b'&' {
133 self.try_inline_ref(buf, delim_pos, stream_offset, visitor)?
134 } else {
135 None
137 };
138 let Some(mut pos) = first_pos else {
139 return Ok(None);
140 };
141 'peek: loop {
143 let limit = (pos + 16).min(buf.len());
144 let mut text_scan = pos;
145 while text_scan < limit {
146 let ch = buf[text_scan];
147 if ch == b'<' {
148 if let Some(next) = self.try_inline_tag(
149 buf, text_scan, stream_offset, visitor,
150 )? {
151 pos = next;
152 continue 'peek;
153 }
154 self.text_start = Some(pos);
155 return Ok(Some((text_scan, text_scan / 64 * 64)));
156 } else if ch == b'&' {
157 if let Some(next) = self.try_inline_ref(
158 buf, text_scan, stream_offset, visitor,
159 )? {
160 pos = next;
161 continue 'peek;
162 }
163 self.text_start = Some(pos);
164 return Ok(Some((text_scan, text_scan / 64 * 64)));
165 } else if ch == b']' {
166 self.text_start = Some(pos);
167 return Ok(Some((text_scan, text_scan / 64 * 64)));
168 }
169 text_scan += 1;
170 }
171 self.text_start = Some(pos);
173 return Ok(Some((limit, limit / 64 * 64)));
174 }
175 }
176
177 #[inline]
181 fn try_inline_tag<V: Visitor>(
182 &mut self,
183 buf: &[u8],
184 lt_pos: usize,
185 stream_offset: u64,
186 visitor: &mut V,
187 ) -> Result<Option<usize>, ParseError<V::Error>> {
188 let after = lt_pos + 1;
189 if after >= buf.len() {
190 return Ok(None);
191 }
192 let b = buf[after];
193 if b == b'/' {
194 self.try_inline_end_tag(buf, lt_pos, stream_offset, visitor)
195 } else if is_name_start_byte(b) {
196 self.try_inline_start_tag(buf, lt_pos, stream_offset, visitor)
197 } else {
198 Ok(None)
199 }
200 }
201
202 #[inline]
204 fn try_inline_end_tag<V: Visitor>(
205 &mut self,
206 buf: &[u8],
207 lt_pos: usize,
208 stream_offset: u64,
209 visitor: &mut V,
210 ) -> Result<Option<usize>, ParseError<V::Error>> {
211 let name_start = lt_pos + 2;
212 if name_start >= buf.len() {
213 return Ok(None);
214 }
215 if !is_name_start_byte(buf[name_start]) {
216 return Ok(None);
217 }
218 let mut i = name_start + 1;
219 while i < buf.len() && is_name_byte(buf[i]) {
220 i += 1;
221 }
222 if i >= buf.len() {
223 return Ok(None);
224 }
225 let name_end = i;
226 if name_end - name_start > MAX_NAME_LENGTH {
227 return Ok(None);
228 }
229 if buf[name_end] != b'>' {
230 return Ok(None);
231 }
232 self.flush_text_before(buf, lt_pos, stream_offset, visitor)?;
233 let name = &buf[name_start..name_end];
234 let span = Span::new(
235 stream_offset + name_start as u64,
236 stream_offset + name_end as u64,
237 );
238 visitor.end_tag(name, span).map_err(ParseError::Visitor)?;
239 Ok(Some(name_end + 1))
240 }
241
242 #[inline]
244 fn try_inline_start_tag<V: Visitor>(
245 &mut self,
246 buf: &[u8],
247 lt_pos: usize,
248 stream_offset: u64,
249 visitor: &mut V,
250 ) -> Result<Option<usize>, ParseError<V::Error>> {
251 let name_start = lt_pos + 1;
252 let mut i = name_start + 1;
253 while i < buf.len() && is_name_byte(buf[i]) {
254 i += 1;
255 }
256 if i >= buf.len() {
257 return Ok(None);
258 }
259 let name_end = i;
260 if name_end - name_start > MAX_NAME_LENGTH {
261 return Ok(None);
262 }
263 let byte = buf[name_end];
264 if byte == b'>' {
265 self.flush_text_before(buf, lt_pos, stream_offset, visitor)?;
266 let name = &buf[name_start..name_end];
267 let name_span = Span::new(
268 stream_offset + name_start as u64,
269 stream_offset + name_end as u64,
270 );
271 visitor.start_tag_open(name, name_span).map_err(ParseError::Visitor)?;
272 let close_span = Span::new(
273 stream_offset + name_end as u64,
274 stream_offset + name_end as u64 + 1,
275 );
276 visitor.start_tag_close(close_span).map_err(ParseError::Visitor)?;
277 Ok(Some(name_end + 1))
278 } else if byte == b'/' {
279 let gt_pos = name_end + 1;
280 if gt_pos >= buf.len() || buf[gt_pos] != b'>' {
281 return Ok(None);
282 }
283 self.flush_text_before(buf, lt_pos, stream_offset, visitor)?;
284 let name = &buf[name_start..name_end];
285 let name_span = Span::new(
286 stream_offset + name_start as u64,
287 stream_offset + name_end as u64,
288 );
289 visitor.start_tag_open(name, name_span).map_err(ParseError::Visitor)?;
290 let close_span = Span::new(
291 stream_offset + name_end as u64,
292 stream_offset + gt_pos as u64 + 1,
293 );
294 visitor.empty_element_end(close_span).map_err(ParseError::Visitor)?;
295 Ok(Some(gt_pos + 1))
296 } else {
297 Ok(None)
298 }
299 }
300
301 #[inline]
303 fn try_inline_ref<V: Visitor>(
304 &mut self,
305 buf: &[u8],
306 amp_pos: usize,
307 stream_offset: u64,
308 visitor: &mut V,
309 ) -> Result<Option<usize>, ParseError<V::Error>> {
310 let name_start = amp_pos + 1;
311 if name_start >= buf.len() {
312 return Ok(None);
313 }
314 if buf[name_start] == b'#' || !is_name_start_byte(buf[name_start]) {
315 return Ok(None);
316 }
317 let mut i = name_start + 1;
318 while i < buf.len() && is_name_byte(buf[i]) {
319 i += 1;
320 }
321 if i >= buf.len() || buf[i] != b';' {
322 return Ok(None);
323 }
324 let name_end = i;
325 if name_end - name_start > MAX_NAME_LENGTH {
326 return Ok(None);
327 }
328 self.flush_text_before(buf, amp_pos, stream_offset, visitor)?;
329 let name = &buf[name_start..name_end];
330 let span = Span::new(
331 stream_offset + name_start as u64,
332 stream_offset + name_end as u64,
333 );
334 visitor.entity_ref(name, span).map_err(ParseError::Visitor)?;
335 Ok(Some(name_end + 1))
336 }
337
338 #[inline]
340 fn flush_text_before<V: Visitor>(
341 &mut self,
342 buf: &[u8],
343 end_pos: usize,
344 stream_offset: u64,
345 visitor: &mut V,
346 ) -> Result<(), ParseError<V::Error>> {
347 if let Some(text_start) = self.text_start.take() {
348 if text_start < end_pos {
349 let span = Span::new(
350 stream_offset + text_start as u64,
351 stream_offset + end_pos as u64,
352 );
353 visitor
354 .characters(&buf[text_start..end_pos], span)
355 .map_err(ParseError::Visitor)?;
356 }
357 }
358 Ok(())
359 }
360
361 #[inline(always)]
366 fn handle_empty_element_slash<V: Visitor>(
367 &mut self,
368 buf: &[u8],
369 abs: usize,
370 block_rel_pos: usize,
371 stream_offset: u64,
372 visitor: &mut V,
373 ) -> Result<usize, ParseError<V::Error>> {
374 let gt_pos = abs + 1;
375 if gt_pos < buf.len() {
376 if buf[gt_pos] == b'>' {
377 let span = Span::new(
378 stream_offset + abs as u64,
379 stream_offset + gt_pos as u64 + 1,
380 );
381 visitor
382 .empty_element_end(span)
383 .map_err(ParseError::Visitor)?;
384 self.finish_markup();
385 Ok(block_rel_pos + 2)
386 } else {
387 Err(ParseError::Xml(Error {
388 kind: ErrorKind::UnexpectedByte(buf[gt_pos]),
389 offset: stream_offset + gt_pos as u64,
390 }))
391 }
392 } else {
393 self.state = ParserState::StartTagGotSlash;
394 Ok(block_rel_pos + 1)
395 }
396 }
397
398 pub fn parse_slice<V: Visitor>(
404 &mut self,
405 buf: &[u8],
406 visitor: &mut V,
407 ) -> Result<u64, ParseError<V::Error>> {
408 self.parse(buf, 0, true, visitor)
409 }
410
411 pub fn parse<V: Visitor>(
431 &mut self,
432 buf: &[u8],
433 stream_offset: u64,
434 is_final: bool,
435 visitor: &mut V,
436 ) -> Result<u64, ParseError<V::Error>> {
437 if buf.is_empty() {
438 if is_final && self.state != ParserState::Content {
439 return Err(ParseError::Xml(Error {
440 kind: ErrorKind::UnexpectedEof,
441 offset: stream_offset,
442 }));
443 }
444 return Ok(0);
445 }
446
447 let first_block = (self.resume_pos / 64) * 64;
449 let mut block_offset = first_block;
450
451 while block_offset < buf.len() {
452 if matches!(self.state, ParserState::Content) && self.content_bracket_count == 0 {
457 let scan_start = if block_offset <= self.resume_pos {
458 self.resume_pos
459 } else {
460 block_offset
461 };
462 if scan_start < buf.len() {
463 if let Some(rel) = memchr::memchr3(b'<', b'&', b']', &buf[scan_start..]) {
464 let delim_pos = scan_start + rel;
465 let delim_block = delim_pos / 64 * 64;
466 if delim_block > block_offset {
467 if self.text_start.is_none() {
469 self.text_start = Some(scan_start);
470 }
471 if let Some((resume, block)) = self.try_inline_with_peek(
473 buf, delim_pos, stream_offset, visitor,
474 )? {
475 self.resume_pos = resume;
476 block_offset = block;
477 continue;
478 }
479 block_offset = delim_block;
481 self.resume_pos = block_offset;
482 continue;
483 }
484 } else {
486 if self.text_start.is_none() {
488 self.text_start = Some(scan_start);
489 }
490 self.resume_pos = buf.len();
491 block_offset = buf.len();
492 continue;
493 }
494 }
495 }
496
497 let (bp, block_len) = bitstream::transpose_block(self.transpose, buf, block_offset);
498 let masks = classify::classify(&bp);
499
500 let start_pos = if block_offset <= self.resume_pos {
501 self.resume_pos - block_offset
502 } else {
503 0
504 };
505
506 let final_buf_pos =
507 self.process_block(buf, block_offset, block_len, start_pos, &masks, stream_offset, visitor)?;
508
509 self.resume_pos = final_buf_pos;
510 block_offset += block_len;
511 }
512
513 let mut consumed = if let Some(start) = self.markup_start {
515 if is_final {
516 return Err(ParseError::Xml(Error {
517 kind: ErrorKind::UnexpectedEof,
518 offset: stream_offset + start as u64,
519 }));
520 }
521 start
522 } else if is_final {
523 if let Some(offset) = self.markup_stream_offset {
524 return Err(ParseError::Xml(Error {
525 kind: ErrorKind::UnexpectedEof,
526 offset,
527 }));
528 }
529 buf.len()
530 } else {
531 match utf8_boundary_rewind(buf) {
532 Ok(rewind) => buf.len() - rewind,
533 Err(offset) => {
534 return Err(ParseError::Xml(Error {
535 kind: ErrorKind::InvalidUtf8,
536 offset: stream_offset + offset as u64,
537 }));
538 }
539 }
540 };
541
542 if !is_final {
547 let exclude = match &self.state {
548 ParserState::CommentContent { dash_count } => *dash_count as usize,
549 ParserState::CdataContent { bracket_count } => *bracket_count as usize,
550 ParserState::PIContent { saw_qmark: true } => 1,
551 _ => 0,
552 };
553 if exclude > 0 {
554 consumed = consumed.saturating_sub(exclude);
555 match &mut self.state {
557 ParserState::CommentContent { dash_count } => *dash_count = 0,
558 ParserState::CdataContent { bracket_count } => *bracket_count = 0,
559 ParserState::PIContent { saw_qmark } => *saw_qmark = false,
560 _ => {}
561 }
562 self.resume_pos = consumed;
564 }
565 }
566
567 if let Some(text_start) = self.text_start {
569 if text_start < consumed {
570 let text = &buf[text_start..consumed];
571 if !text.is_empty() {
572 let span = Span::new(
573 stream_offset + text_start as u64,
574 stream_offset + consumed as u64,
575 );
576 visitor
577 .characters(text, span)
578 .map_err(ParseError::Visitor)?;
579 }
580 }
581 if consumed >= buf.len() {
582 self.text_start = None;
583 } else {
584 self.text_start = Some(text_start.saturating_sub(consumed));
585 }
586 }
587
588 if let Some(cs) = self.content_start {
590 if cs < consumed {
591 let content = &buf[cs..consumed];
592 if !content.is_empty() {
593 let span = Span::new(
594 stream_offset + cs as u64,
595 stream_offset + consumed as u64,
596 );
597 match &self.state {
598 ParserState::CommentContent { .. } => {
599 visitor.comment_content(content, span).map_err(ParseError::Visitor)?;
600 }
601 ParserState::CdataContent { .. } => {
602 visitor.cdata_content(content, span).map_err(ParseError::Visitor)?;
603 }
604 ParserState::PIContent { .. } => {
605 self.emit_pi_content(content, span, visitor)?;
606 }
607 ParserState::DoctypeContent { .. } => {
608 visitor.doctype_content(content, span).map_err(ParseError::Visitor)?;
609 }
610 ParserState::AttrValue { .. } => {
611 visitor.attribute_value(content, span).map_err(ParseError::Visitor)?;
612 }
613 _ => {}
614 }
615 }
616 }
617 self.content_start = Some(cs.saturating_sub(consumed));
619 }
620
621 if consumed > 0 {
623 self.markup_start = self.markup_start.map(|s| s - consumed);
624 self.resume_pos = self.resume_pos.saturating_sub(consumed);
625 self.state.adjust_positions(consumed);
626 }
627
628 Ok(consumed as u64)
629 }
630
631 fn process_block<V: Visitor>(
634 &mut self,
635 buf: &[u8],
636 block_offset: usize,
637 block_len: usize,
638 start_pos: usize,
639 masks: &CharClassMasks,
640 stream_offset: u64,
641 visitor: &mut V,
642 ) -> Result<usize, ParseError<V::Error>> {
643 let mut pos = start_pos; while pos < block_len {
646 match self.state {
647 ParserState::Content => {
648 pos = self.scan_content(
649 buf, block_offset, block_len, pos, masks, stream_offset, visitor,
650 )?;
651 }
652
653 ParserState::AfterLt => {
654 let abs = block_offset + pos;
655 let byte = buf[abs];
656 match byte {
657 b'/' => {
658 self.state = ParserState::EndTagName {
659 name_start: abs + 1,
660 };
661 pos += 1;
662 }
663 b'?' => {
664 self.state = ParserState::PITarget {
665 name_start: abs + 1,
666 };
667 pos += 1;
668 }
669 b'!' => {
670 self.state = ParserState::AfterLtBang;
671 pos += 1;
672 }
673 _ if is_name_start_byte(byte) => {
674 self.state = ParserState::StartTagName { name_start: abs };
675 }
676 _ => {
677 return Err(ParseError::Xml(Error {
678 kind: ErrorKind::UnexpectedByte(byte),
679 offset: stream_offset + abs as u64,
680 }));
681 }
682 }
683 }
684
685 ParserState::StartTagName { name_start } => {
686 let Some((next, abs)) =
687 find_name_end(masks.name_end, pos, block_offset, block_len)
688 else {
689 check_name_length(
690 block_offset + block_len,
691 name_start,
692 stream_offset,
693 )?;
694 pos = block_len;
695 continue;
696 };
697 let name = validate_name(buf, name_start, abs, stream_offset)?;
698 let name_span = Span::new(
699 stream_offset + name_start as u64,
700 stream_offset + abs as u64,
701 );
702 visitor
703 .start_tag_open(name, name_span)
704 .map_err(ParseError::Visitor)?;
705 self.markup_stream_offset = Some(stream_offset + self.markup_start.unwrap() as u64);
706 self.markup_start = None;
707
708 let byte = buf[abs];
709 match byte {
710 b'>' => {
711 let span = Span::new(
712 stream_offset + abs as u64,
713 stream_offset + abs as u64 + 1,
714 );
715 visitor.start_tag_close(span).map_err(ParseError::Visitor)?;
716 self.finish_markup();
717 pos = next + 1;
718 }
719 b'/' => {
720 pos = self.handle_empty_element_slash(
721 buf, abs, next, stream_offset, visitor,
722 )?;
723 }
724 _ => {
725 self.state = ParserState::StartTagPostName;
726 pos = next;
727 }
728 }
729 }
730
731 ParserState::StartTagPostName => {
732 let Some((next, abs)) =
733 find_non_whitespace(masks.whitespace, pos, block_offset, block_len)
734 else {
735 pos = block_len;
736 continue;
737 };
738 let byte = buf[abs];
739 match byte {
740 b'>' => {
741 let span = Span::new(
742 stream_offset + abs as u64,
743 stream_offset + abs as u64 + 1,
744 );
745 visitor.start_tag_close(span).map_err(ParseError::Visitor)?;
746 self.finish_markup();
747 pos = next + 1;
748 }
749 b'/' => {
750 pos = self.handle_empty_element_slash(
751 buf, abs, next, stream_offset, visitor,
752 )?;
753 }
754 _ if is_name_start_byte(byte) => {
755 self.markup_start = Some(abs);
756 self.state = ParserState::AttrName { name_start: abs };
757 pos = next;
758 }
759 _ => {
760 return Err(ParseError::Xml(Error {
761 kind: ErrorKind::UnexpectedByte(byte),
762 offset: stream_offset + abs as u64,
763 }));
764 }
765 }
766 }
767
768 ParserState::StartTagGotSlash => {
769 let abs = block_offset + pos;
770 let byte = buf[abs];
771 if byte == b'>' {
772 let close_span = Span::new(
773 stream_offset + abs as u64 - 1,
774 stream_offset + abs as u64 + 1,
775 );
776 visitor
777 .empty_element_end(close_span)
778 .map_err(ParseError::Visitor)?;
779 self.finish_markup();
780 pos += 1;
781 } else {
782 return Err(ParseError::Xml(Error {
783 kind: ErrorKind::UnexpectedByte(byte),
784 offset: stream_offset + abs as u64,
785 }));
786 }
787 }
788
789 ParserState::AttrName { name_start } => {
790 let Some((next, abs)) =
791 find_name_end(masks.name_end, pos, block_offset, block_len)
792 else {
793 check_name_length(
794 block_offset + block_len,
795 name_start,
796 stream_offset,
797 )?;
798 pos = block_len;
799 continue;
800 };
801 let name = validate_name(buf, name_start, abs, stream_offset)?;
802 let name_span = Span::new(
803 stream_offset + name_start as u64,
804 stream_offset + abs as u64,
805 );
806 visitor
807 .attribute_name(name, name_span)
808 .map_err(ParseError::Visitor)?;
809 self.markup_start = None;
810
811 let byte = buf[abs];
812 if byte == b'=' {
813 self.state = ParserState::BeforeAttrValue;
814 pos = next + 1;
815 } else {
816 self.state = ParserState::AfterAttrName;
817 pos = next;
818 }
819 }
820
821 ParserState::AfterAttrName => {
822 let Some((next, abs)) =
823 find_non_whitespace(masks.whitespace, pos, block_offset, block_len)
824 else {
825 pos = block_len;
826 continue;
827 };
828 let byte = buf[abs];
829 if byte == b'=' {
830 self.state = ParserState::BeforeAttrValue;
831 pos = next + 1;
832 } else {
833 return Err(ParseError::Xml(Error {
834 kind: ErrorKind::UnexpectedByte(byte),
835 offset: stream_offset + abs as u64,
836 }));
837 }
838 }
839
840 ParserState::BeforeAttrValue => {
841 let abs = block_offset + pos;
842 let byte = buf[abs];
843 if byte == b'"' {
844 self.state = ParserState::AttrValue { quote: QuoteStyle::Double };
845 self.content_start = Some(abs + 1);
846 pos += 1;
847 } else if byte == b'\'' {
848 self.state = ParserState::AttrValue { quote: QuoteStyle::Single };
849 self.content_start = Some(abs + 1);
850 pos += 1;
851 } else if is_xml_whitespace(byte) {
852 pos += 1;
853 } else {
854 return Err(ParseError::Xml(Error {
855 kind: ErrorKind::UnexpectedByte(byte),
856 offset: stream_offset + abs as u64,
857 }));
858 }
859 }
860
861 ParserState::AttrValue { quote } => {
862 let content_start = self.content_start.unwrap();
863 let delim_mask = match quote {
864 QuoteStyle::Double => masks.attr_dq_delim,
865 QuoteStyle::Single => masks.attr_sq_delim,
866 };
867 let delim_byte = match quote {
868 QuoteStyle::Double => b'"',
869 QuoteStyle::Single => b'\'',
870 };
871 let Some((next, abs)) =
872 find_name_end(delim_mask, pos, block_offset, block_len)
873 else {
874 pos = block_len;
875 continue;
876 };
877 let byte = buf[abs];
878 if byte == delim_byte {
879 if content_start < abs {
880 let value = &buf[content_start..abs];
881 let span = Span::new(
882 stream_offset + content_start as u64,
883 stream_offset + abs as u64,
884 );
885 visitor
886 .attribute_value(value, span)
887 .map_err(ParseError::Visitor)?;
888 }
889 let quote_span = Span::new(
890 stream_offset + abs as u64,
891 stream_offset + abs as u64 + 1,
892 );
893 visitor
894 .attribute_end(quote_span)
895 .map_err(ParseError::Visitor)?;
896 self.content_start = None;
897 self.state = ParserState::StartTagPostName;
898 pos = next + 1;
899 } else if byte == b'<' {
900 return Err(ParseError::Xml(Error {
901 kind: ErrorKind::UnexpectedByte(byte),
902 offset: stream_offset + abs as u64,
903 }));
904 } else {
905 if content_start < abs {
907 let value = &buf[content_start..abs];
908 let span = Span::new(
909 stream_offset + content_start as u64,
910 stream_offset + abs as u64,
911 );
912 visitor
913 .attribute_value(value, span)
914 .map_err(ParseError::Visitor)?;
915 }
916 self.markup_start = Some(abs);
917 self.content_start = None;
918 self.state = ParserState::AttrEntityRef {
919 name_start: abs + 1,
920 quote,
921 };
922 pos = next + 1;
923 }
924 }
925
926 ParserState::EndTagName { name_start } => {
927 let Some((next, abs)) =
928 find_name_end(masks.name_end, pos, block_offset, block_len)
929 else {
930 check_name_length(
931 block_offset + block_len,
932 name_start,
933 stream_offset,
934 )?;
935 pos = block_len;
936 continue;
937 };
938 let name = validate_name(buf, name_start, abs, stream_offset)?;
939 let name_span = Span::new(
940 stream_offset + name_start as u64,
941 stream_offset + abs as u64,
942 );
943 visitor
944 .end_tag(name, name_span)
945 .map_err(ParseError::Visitor)?;
946
947 let byte = buf[abs];
948 if byte == b'>' {
949 self.finish_markup();
950 pos = next + 1;
951 } else {
952 self.state = ParserState::EndTagPostName;
953 pos = next;
954 }
955 }
956
957 ParserState::EndTagPostName => {
958 let Some((next, abs)) =
959 find_non_whitespace(masks.whitespace, pos, block_offset, block_len)
960 else {
961 pos = block_len;
962 continue;
963 };
964 let byte = buf[abs];
965 if byte == b'>' {
966 self.finish_markup();
967 pos = next + 1;
968 } else {
969 return Err(ParseError::Xml(Error {
970 kind: ErrorKind::UnexpectedByte(byte),
971 offset: stream_offset + abs as u64,
972 }));
973 }
974 }
975
976 ParserState::AfterLtBang => {
979 let abs = block_offset + pos;
980 let byte = buf[abs];
981 match byte {
982 b'-' => {
983 self.state = ParserState::AfterLtBangDash;
984 pos += 1;
985 }
986 b'[' => {
987 self.state = ParserState::AfterLtBangBracket { matched: 0 };
988 pos += 1;
989 }
990 b'D' => {
991 self.state = ParserState::AfterLtBangD { matched: 0 };
992 pos += 1;
993 }
994 _ => {
995 return Err(ParseError::Xml(Error {
996 kind: ErrorKind::UnexpectedByte(byte),
997 offset: stream_offset + abs as u64,
998 }));
999 }
1000 }
1001 }
1002
1003 ParserState::AfterLtBangDash => {
1004 let abs = block_offset + pos;
1005 let byte = buf[abs];
1006 if byte == b'-' {
1007 let start_span = Span::new(
1009 stream_offset + self.markup_start.unwrap() as u64,
1010 stream_offset + abs as u64 + 1,
1011 );
1012 visitor
1013 .comment_start(start_span)
1014 .map_err(ParseError::Visitor)?;
1015 self.markup_stream_offset = Some(stream_offset + self.markup_start.unwrap() as u64);
1016 self.markup_start = None;
1017 self.content_start = Some(abs + 1);
1018 self.state = ParserState::CommentContent {
1019 dash_count: 0,
1020 };
1021 pos += 1;
1022 } else {
1023 return Err(ParseError::Xml(Error {
1024 kind: ErrorKind::UnexpectedByte(byte),
1025 offset: stream_offset + abs as u64,
1026 }));
1027 }
1028 }
1029
1030 ParserState::CommentContent { dash_count } => {
1031 pos = self.scan_comment_content(
1032 buf, block_offset, block_len, pos, masks,
1033 stream_offset, dash_count, visitor,
1034 )?;
1035 }
1036
1037 ParserState::AfterLtBangBracket { matched } => {
1038 let abs = block_offset + pos;
1039 let byte = buf[abs];
1040 const CDATA_CHARS: &[u8] = b"CDATA[";
1041 if byte == CDATA_CHARS[matched as usize] {
1042 let new_matched = matched + 1;
1043 if new_matched as usize == CDATA_CHARS.len() {
1044 let start_span = Span::new(
1046 stream_offset + self.markup_start.unwrap() as u64,
1047 stream_offset + abs as u64 + 1,
1048 );
1049 visitor
1050 .cdata_start(start_span)
1051 .map_err(ParseError::Visitor)?;
1052 self.markup_stream_offset = Some(stream_offset + self.markup_start.unwrap() as u64);
1053 self.markup_start = None;
1054 self.content_start = Some(abs + 1);
1055 self.state = ParserState::CdataContent {
1056 bracket_count: 0,
1057 };
1058 } else {
1059 self.state = ParserState::AfterLtBangBracket { matched: new_matched };
1060 }
1061 pos += 1;
1062 } else {
1063 return Err(ParseError::Xml(Error {
1064 kind: ErrorKind::UnexpectedByte(byte),
1065 offset: stream_offset + abs as u64,
1066 }));
1067 }
1068 }
1069
1070 ParserState::CdataContent { bracket_count } => {
1071 pos = self.scan_cdata_content(
1072 buf, block_offset, block_len, pos, masks,
1073 stream_offset, bracket_count, visitor,
1074 )?;
1075 }
1076
1077 ParserState::AfterLtBangD { matched } => {
1078 let abs = block_offset + pos;
1079 let byte = buf[abs];
1080 const DOCTYPE_CHARS: &[u8] = b"OCTYPE";
1081 if byte == DOCTYPE_CHARS[matched as usize] {
1082 let new_matched = matched + 1;
1083 if new_matched as usize == DOCTYPE_CHARS.len() {
1084 self.state = ParserState::DoctypeName { name_start: usize::MAX };
1086 } else {
1087 self.state = ParserState::AfterLtBangD { matched: new_matched };
1088 }
1089 pos += 1;
1090 } else {
1091 return Err(ParseError::Xml(Error {
1092 kind: ErrorKind::UnexpectedByte(byte),
1093 offset: stream_offset + abs as u64,
1094 }));
1095 }
1096 }
1097
1098 ParserState::DoctypeName { name_start } => {
1099 pos = self.scan_doctype_name(
1100 buf, block_offset, block_len, pos, masks,
1101 stream_offset, name_start, visitor,
1102 )?;
1103 }
1104
1105 ParserState::DoctypeContent { depth, sub } => {
1106 pos = self.scan_doctype_content(
1107 buf, block_offset, block_len, pos,
1108 stream_offset, depth, sub, visitor,
1109 )?;
1110 }
1111
1112 ParserState::PITarget { name_start } => {
1113 pos = self.scan_pi_target(
1114 buf, block_offset, block_len, pos, masks,
1115 stream_offset, name_start, visitor,
1116 )?;
1117 }
1118
1119 ParserState::PIContent { saw_qmark } => {
1120 pos = self.scan_pi_content(
1121 buf, block_offset, block_len, pos, masks,
1122 stream_offset, saw_qmark, visitor,
1123 )?;
1124 }
1125
1126 ParserState::EntityRef { name_start } => {
1127 pos = self.scan_entity_ref(
1128 buf, block_offset, block_len, pos, masks,
1129 stream_offset, name_start, visitor,
1130 )?;
1131 }
1132
1133 ParserState::CharRef { value_start } => {
1134 pos = self.scan_char_ref(
1135 buf, block_offset, block_len, pos, masks,
1136 stream_offset, value_start, visitor,
1137 )?;
1138 }
1139
1140 ParserState::AttrEntityRef { name_start, quote } => {
1141 pos = self.scan_attr_entity_ref(
1142 buf, block_offset, block_len, pos, masks,
1143 stream_offset, name_start, quote, visitor,
1144 )?;
1145 }
1146
1147 ParserState::AttrCharRef { value_start, quote } => {
1148 pos = self.scan_attr_char_ref(
1149 buf, block_offset, block_len, pos, masks,
1150 stream_offset, value_start, quote, visitor,
1151 )?;
1152 }
1153 }
1154 }
1155
1156 Ok(block_offset + pos)
1157 }
1158
1159 fn scan_content<V: Visitor>(
1161 &mut self,
1162 buf: &[u8],
1163 block_offset: usize,
1164 block_len: usize,
1165 mut pos: usize,
1166 masks: &CharClassMasks,
1167 stream_offset: u64,
1168 visitor: &mut V,
1169 ) -> Result<usize, ParseError<V::Error>> {
1170 if self.text_start.is_none() {
1171 self.text_start = Some(block_offset + pos);
1172 }
1173
1174 if self.content_bracket_count > 0 {
1176 let abs = block_offset + pos;
1177 if abs < buf.len() {
1178 let mut scan = abs;
1179 let mut brackets = self.content_bracket_count;
1180 while scan < buf.len() {
1181 let ch = buf[scan];
1182 if ch == b']' {
1183 brackets = brackets.saturating_add(1);
1184 scan += 1;
1185 } else if ch == b'>' && brackets >= 2 {
1186 return Err(ParseError::Xml(Error {
1187 kind: ErrorKind::CdataEndInContent,
1188 offset: stream_offset + scan as u64 - 2,
1189 }));
1190 } else {
1191 self.content_bracket_count = 0;
1192 break;
1193 }
1194 }
1195 if scan >= buf.len() {
1196 self.content_bracket_count = brackets.min(2);
1197 }
1198 let consumed_in_block = scan - block_offset;
1199 if consumed_in_block >= block_len {
1200 return Ok(block_len);
1201 }
1202 pos = consumed_in_block;
1203 }
1204 }
1205
1206 loop {
1207 if pos >= block_len {
1208 return Ok(block_len);
1209 }
1210
1211 let shifted = masks.content_delim >> pos;
1212 if shifted == 0 {
1213 return Ok(block_len);
1214 }
1215
1216 let next = shifted.trailing_zeros() as usize;
1217 if pos + next >= block_len {
1218 return Ok(block_len);
1219 }
1220
1221 let abs = block_offset + pos + next;
1222 let byte = buf[abs];
1223
1224 match byte {
1225 b'<' => {
1226 self.content_bracket_count = 0;
1227 if let Some(text_start) = self.text_start.take() {
1228 if text_start < abs {
1229 let span = Span::new(
1230 stream_offset + text_start as u64,
1231 stream_offset + abs as u64,
1232 );
1233 visitor
1234 .characters(&buf[text_start..abs], span)
1235 .map_err(ParseError::Visitor)?;
1236 }
1237 }
1238 self.markup_start = Some(abs);
1239
1240 let after = abs + 1;
1242 if after < buf.len() {
1243 let b = buf[after];
1244 match b {
1245 b'/' => {
1246 self.state = ParserState::EndTagName {
1247 name_start: after + 1,
1248 };
1249 return Ok(pos + next + 2);
1250 }
1251 b'?' => {
1252 self.state = ParserState::PITarget {
1253 name_start: after + 1,
1254 };
1255 return Ok(pos + next + 2);
1256 }
1257 b'!' => {
1258 self.state = ParserState::AfterLtBang;
1259 return Ok(pos + next + 2);
1260 }
1261 _ if is_name_start_byte(b) => {
1262 self.state =
1263 ParserState::StartTagName { name_start: after };
1264 return Ok(pos + next + 1);
1265 }
1266 _ => {
1267 return Err(ParseError::Xml(Error {
1268 kind: ErrorKind::UnexpectedByte(b),
1269 offset: stream_offset + after as u64,
1270 }));
1271 }
1272 }
1273 } else {
1274 self.state = ParserState::AfterLt;
1276 return Ok(pos + next + 1);
1277 }
1278 }
1279 b'&' => {
1280 self.content_bracket_count = 0;
1281 if let Some(text_start) = self.text_start.take() {
1282 if text_start < abs {
1283 let span = Span::new(
1284 stream_offset + text_start as u64,
1285 stream_offset + abs as u64,
1286 );
1287 visitor
1288 .characters(&buf[text_start..abs], span)
1289 .map_err(ParseError::Visitor)?;
1290 }
1291 }
1292 self.state = ParserState::EntityRef {
1293 name_start: abs + 1,
1294 };
1295 self.markup_start = Some(abs);
1296 return Ok(pos + next + 1);
1297 }
1298 b']' => {
1299 let mut scan = abs + 1;
1302 let mut brackets: u8 = self.content_bracket_count + 1;
1303 while scan < buf.len() {
1304 let ch = buf[scan];
1305 if ch == b']' {
1306 brackets = brackets.saturating_add(1);
1307 scan += 1;
1308 } else if ch == b'>' && brackets >= 2 {
1309 return Err(ParseError::Xml(Error {
1310 kind: ErrorKind::CdataEndInContent,
1311 offset: stream_offset + scan as u64 - 2,
1312 }));
1313 } else {
1314 self.content_bracket_count = 0;
1316 break;
1317 }
1318 }
1319 if scan >= buf.len() {
1320 self.content_bracket_count = brackets.min(2);
1322 }
1323 let consumed_in_block = scan - block_offset;
1325 if consumed_in_block >= block_len {
1326 return Ok(block_len);
1327 }
1328 pos = consumed_in_block;
1329 }
1330 _ => unreachable!(),
1331 }
1332 }
1333 }
1334
1335 fn scan_comment_content<V: Visitor>(
1344 &mut self,
1345 buf: &[u8],
1346 block_offset: usize,
1347 block_len: usize,
1348 mut pos: usize,
1349 masks: &CharClassMasks,
1350 stream_offset: u64,
1351 mut dash_count: u8,
1352 visitor: &mut V,
1353 ) -> Result<usize, ParseError<V::Error>> {
1354 let content_start = self.content_start.unwrap();
1355 loop {
1356 while dash_count > 0 && pos < block_len {
1358 let abs = block_offset + pos;
1359 let byte = buf[abs];
1360 if byte == b'>' && dash_count >= 2 {
1361 let content_end = abs - dash_count as usize;
1363 if content_end > content_start {
1364 let span = Span::new(
1365 stream_offset + content_start as u64,
1366 stream_offset + content_end as u64,
1367 );
1368 visitor
1369 .comment_content(&buf[content_start..content_end], span)
1370 .map_err(ParseError::Visitor)?;
1371 }
1372 let end_span = Span::new(
1373 stream_offset + content_end as u64,
1374 stream_offset + abs as u64 + 1,
1375 );
1376 visitor
1377 .comment_end(end_span)
1378 .map_err(ParseError::Visitor)?;
1379 self.finish_content_body();
1380 return Ok(pos + 1);
1381 } else if dash_count >= 2 {
1382 return Err(ParseError::Xml(Error {
1384 kind: ErrorKind::DoubleDashInComment,
1385 offset: stream_offset + abs as u64 - 2,
1386 }));
1387 } else if byte == b'-' {
1388 dash_count += 1;
1389 pos += 1;
1390 } else {
1391 dash_count = 0;
1393 pos += 1;
1394 break;
1395 }
1396 }
1397
1398 if pos >= block_len {
1399 self.state = ParserState::CommentContent { dash_count };
1400 return Ok(block_len);
1401 }
1402
1403 if dash_count > 0 {
1404 self.state = ParserState::CommentContent { dash_count };
1406 return Ok(block_len);
1407 }
1408
1409 let shifted = masks.dash >> pos;
1411 if shifted == 0 {
1412 self.state = ParserState::CommentContent { dash_count: 0 };
1413 return Ok(block_len);
1414 }
1415
1416 let next = shifted.trailing_zeros() as usize;
1417 if pos + next >= block_len {
1418 self.state = ParserState::CommentContent { dash_count: 0 };
1419 return Ok(block_len);
1420 }
1421
1422 pos = pos + next;
1424 dash_count = 1;
1425 pos += 1;
1426 }
1428 }
1429
1430 fn scan_cdata_content<V: Visitor>(
1435 &mut self,
1436 buf: &[u8],
1437 block_offset: usize,
1438 block_len: usize,
1439 mut pos: usize,
1440 masks: &CharClassMasks,
1441 stream_offset: u64,
1442 mut bracket_count: u8,
1443 visitor: &mut V,
1444 ) -> Result<usize, ParseError<V::Error>> {
1445 let content_start = self.content_start.unwrap();
1446 loop {
1447 while bracket_count > 0 && pos < block_len {
1449 let abs = block_offset + pos;
1450 let byte = buf[abs];
1451 if byte == b']' {
1452 bracket_count = (bracket_count + 1).min(2);
1453 pos += 1;
1454 } else if byte == b'>' && bracket_count >= 2 {
1455 let content_end = abs - bracket_count as usize;
1457 if content_end > content_start {
1458 let span = Span::new(
1459 stream_offset + content_start as u64,
1460 stream_offset + content_end as u64,
1461 );
1462 visitor
1463 .cdata_content(&buf[content_start..content_end], span)
1464 .map_err(ParseError::Visitor)?;
1465 }
1466 let end_span = Span::new(
1467 stream_offset + content_end as u64,
1468 stream_offset + abs as u64 + 1,
1469 );
1470 visitor
1471 .cdata_end(end_span)
1472 .map_err(ParseError::Visitor)?;
1473 self.finish_content_body();
1474 return Ok(pos + 1);
1475 } else {
1476 bracket_count = 0;
1477 pos += 1;
1478 break;
1479 }
1480 }
1481
1482 if pos >= block_len {
1483 self.state = ParserState::CdataContent { bracket_count };
1484 return Ok(block_len);
1485 }
1486
1487 if bracket_count > 0 {
1488 self.state = ParserState::CdataContent { bracket_count };
1489 return Ok(block_len);
1490 }
1491
1492 let shifted = masks.rbracket >> pos;
1494 if shifted == 0 {
1495 self.state = ParserState::CdataContent { bracket_count: 0 };
1496 return Ok(block_len);
1497 }
1498
1499 let next = shifted.trailing_zeros() as usize;
1500 if pos + next >= block_len {
1501 self.state = ParserState::CdataContent { bracket_count: 0 };
1502 return Ok(block_len);
1503 }
1504
1505 pos = pos + next;
1507 bracket_count = 1;
1508 pos += 1;
1509 }
1510 }
1511
1512 fn scan_doctype_name<V: Visitor>(
1517 &mut self,
1518 buf: &[u8],
1519 block_offset: usize,
1520 block_len: usize,
1521 pos: usize,
1522 masks: &CharClassMasks,
1523 stream_offset: u64,
1524 name_start: usize,
1525 visitor: &mut V,
1526 ) -> Result<usize, ParseError<V::Error>> {
1527 if name_start >= usize::MAX - 1 {
1532 if name_start == usize::MAX {
1533 let abs = block_offset + pos;
1535 if abs >= buf.len() {
1536 return Ok(block_len);
1537 }
1538 let byte = buf[abs];
1539 if !is_xml_whitespace(byte) {
1540 return Err(ParseError::Xml(Error {
1541 kind: ErrorKind::DoctypeMissingWhitespace,
1542 offset: stream_offset + abs as u64,
1543 }));
1544 }
1545 self.state = ParserState::DoctypeName { name_start: usize::MAX - 1 };
1547 return Ok(pos + 1);
1548 }
1549
1550 let non_ws = !masks.whitespace >> pos;
1552 if non_ws == 0 {
1553 return Ok(block_len);
1554 }
1555 let next = non_ws.trailing_zeros() as usize;
1556 if pos + next >= block_len {
1557 return Ok(block_len);
1558 }
1559 let new_abs = block_offset + pos + next;
1560 let byte = buf[new_abs];
1561 if byte == b'>' || !is_name_start_byte(byte) {
1562 return Err(ParseError::Xml(Error {
1564 kind: ErrorKind::DoctypeMissingName,
1565 offset: stream_offset + new_abs as u64,
1566 }));
1567 }
1568 self.state = ParserState::DoctypeName { name_start: new_abs };
1570 let shifted2 = masks.name_end >> (pos + next);
1572 if shifted2 == 0 {
1573 return Ok(block_len);
1574 }
1575 let next2 = shifted2.trailing_zeros() as usize;
1576 if pos + next + next2 >= block_len {
1577 return Ok(block_len);
1578 }
1579 let end_abs = block_offset + pos + next + next2;
1580 return self.finish_doctype_name(buf, pos + next + next2, end_abs, new_abs, stream_offset, visitor);
1581 }
1582
1583 let Some((next, end_abs)) =
1585 find_name_end(masks.name_end, pos, block_offset, block_len)
1586 else {
1587 check_name_length(block_offset + block_len, name_start, stream_offset)?;
1588 return Ok(block_len);
1589 };
1590 self.finish_doctype_name(buf, next, end_abs, name_start, stream_offset, visitor)
1591 }
1592
1593 fn finish_doctype_name<V: Visitor>(
1595 &mut self,
1596 buf: &[u8],
1597 block_rel_pos: usize,
1598 end_abs: usize,
1599 name_start: usize,
1600 stream_offset: u64,
1601 visitor: &mut V,
1602 ) -> Result<usize, ParseError<V::Error>> {
1603 let name = &buf[name_start..end_abs];
1604 if name.len() > MAX_NAME_LENGTH {
1605 return Err(ParseError::Xml(Error {
1606 kind: ErrorKind::NameTooLong,
1607 offset: stream_offset + name_start as u64,
1608 }));
1609 }
1610 let name_span = Span::new(
1611 stream_offset + name_start as u64,
1612 stream_offset + end_abs as u64,
1613 );
1614 visitor
1615 .doctype_start(name, name_span)
1616 .map_err(ParseError::Visitor)?;
1617
1618 let byte = buf[end_abs];
1619 if byte == b'>' {
1620 let end_span = Span::new(
1621 stream_offset + end_abs as u64,
1622 stream_offset + end_abs as u64 + 1,
1623 );
1624 visitor
1625 .doctype_end(end_span)
1626 .map_err(ParseError::Visitor)?;
1627 self.finish_markup();
1628 Ok(block_rel_pos + 1)
1629 } else {
1630 self.markup_stream_offset = Some(stream_offset + self.markup_start.unwrap() as u64);
1632 self.markup_start = None;
1633 self.content_start = Some(end_abs + 1);
1634 self.state = ParserState::DoctypeContent {
1635 depth: 0,
1636 sub: DoctypeSubState::Normal,
1637 };
1638 Ok(block_rel_pos + 1)
1639 }
1640 }
1641
1642 fn scan_doctype_content<V: Visitor>(
1648 &mut self,
1649 buf: &[u8],
1650 block_offset: usize,
1651 block_len: usize,
1652 mut pos: usize,
1653 stream_offset: u64,
1654 mut depth: u32,
1655 mut sub: DoctypeSubState,
1656 visitor: &mut V,
1657 ) -> Result<usize, ParseError<V::Error>> {
1658 let content_start = self.content_start.unwrap();
1659
1660 while pos < block_len {
1661 let abs = block_offset + pos;
1662 let byte = buf[abs];
1663
1664 match sub {
1665 DoctypeSubState::Normal => match byte {
1666 b'[' => {
1667 depth += 1;
1668 if depth > 1024 {
1669 return Err(ParseError::Xml(Error {
1670 kind: ErrorKind::DoctypeBracketsTooDeep,
1671 offset: stream_offset + abs as u64,
1672 }));
1673 }
1674 }
1675 b']' => {
1676 depth = depth.saturating_sub(1);
1677 }
1678 b'>' => {
1679 if depth == 0 {
1680 if abs > content_start {
1682 let span = Span::new(
1683 stream_offset + content_start as u64,
1684 stream_offset + abs as u64,
1685 );
1686 visitor
1687 .doctype_content(&buf[content_start..abs], span)
1688 .map_err(ParseError::Visitor)?;
1689 }
1690 let end_span = Span::new(
1691 stream_offset + abs as u64,
1692 stream_offset + abs as u64 + 1,
1693 );
1694 visitor
1695 .doctype_end(end_span)
1696 .map_err(ParseError::Visitor)?;
1697 self.finish_content_body();
1698 return Ok(pos + 1);
1699 }
1700 }
1701 b'<' => sub = DoctypeSubState::AfterLt,
1702 b'"' => sub = DoctypeSubState::DoubleQuoted,
1703 b'\'' => sub = DoctypeSubState::SingleQuoted,
1704 _ => {}
1705 },
1706
1707 DoctypeSubState::AfterLt => match byte {
1708 b'!' => sub = DoctypeSubState::AfterLtBang,
1709 b'?' => sub = DoctypeSubState::PI { saw_qmark: false },
1710 _ => { sub = DoctypeSubState::Normal; continue; }
1711 },
1712
1713 DoctypeSubState::AfterLtBang => match byte {
1714 b'-' => sub = DoctypeSubState::AfterLtBangDash,
1715 _ => { sub = DoctypeSubState::Normal; continue; }
1716 },
1717
1718 DoctypeSubState::AfterLtBangDash => match byte {
1719 b'-' => sub = DoctypeSubState::Comment { dash_count: 0 },
1720 _ => { sub = DoctypeSubState::Normal; continue; }
1721 },
1722
1723 DoctypeSubState::Comment { ref mut dash_count } => match byte {
1724 b'-' => *dash_count = dash_count.saturating_add(1),
1725 b'>' if *dash_count >= 2 => sub = DoctypeSubState::Normal,
1726 _ => *dash_count = 0,
1727 },
1728
1729 DoctypeSubState::PI { ref mut saw_qmark } => match byte {
1730 b'?' => *saw_qmark = true,
1731 b'>' if *saw_qmark => sub = DoctypeSubState::Normal,
1732 _ => *saw_qmark = false,
1733 },
1734
1735 DoctypeSubState::DoubleQuoted => {
1736 if byte == b'"' { sub = DoctypeSubState::Normal; }
1737 }
1738
1739 DoctypeSubState::SingleQuoted => {
1740 if byte == b'\'' { sub = DoctypeSubState::Normal; }
1741 }
1742 }
1743
1744 pos += 1;
1745 }
1746
1747 self.state = ParserState::DoctypeContent { depth, sub };
1748 Ok(block_len)
1749 }
1750
1751 const XML_DECL_BUF_LIMIT: usize = 256;
1753
1754 fn emit_pi_content<V: Visitor>(
1756 &mut self,
1757 content: &[u8],
1758 span: Span,
1759 visitor: &mut V,
1760 ) -> Result<(), ParseError<V::Error>> {
1761 if self.in_xml_decl {
1762 let new_len = self.xml_decl_buf_len + content.len();
1763 if new_len > Self::XML_DECL_BUF_LIMIT {
1764 return Err(ParseError::Xml(Error {
1765 kind: ErrorKind::MalformedXmlDeclaration,
1766 offset: span.start,
1767 }));
1768 }
1769 self.xml_decl_buf[self.xml_decl_buf_len..new_len].copy_from_slice(content);
1770 self.xml_decl_buf_len = new_len;
1771 Ok(())
1772 } else {
1773 visitor.pi_content(content, span).map_err(ParseError::Visitor)
1774 }
1775 }
1776
1777 fn emit_pi_end<V: Visitor>(
1779 &mut self,
1780 end_span: Span,
1781 visitor: &mut V,
1782 ) -> Result<(), ParseError<V::Error>> {
1783 if self.in_xml_decl {
1784 self.in_xml_decl = false;
1785 let decl_span = Span::new(self.xml_decl_span_start, end_span.end);
1786 let len = self.xml_decl_buf_len;
1787 self.xml_decl_buf_len = 0;
1788 let (version, encoding, standalone) =
1789 parse_xml_decl(&self.xml_decl_buf[..len], self.xml_decl_span_start)?;
1790 visitor
1791 .xml_declaration(version, encoding, standalone, decl_span)
1792 .map_err(ParseError::Visitor)
1793 } else {
1794 visitor.pi_end(end_span).map_err(ParseError::Visitor)
1795 }
1796 }
1797
1798 fn scan_pi_target<V: Visitor>(
1800 &mut self,
1801 buf: &[u8],
1802 block_offset: usize,
1803 block_len: usize,
1804 pos: usize,
1805 masks: &CharClassMasks,
1806 stream_offset: u64,
1807 name_start: usize,
1808 visitor: &mut V,
1809 ) -> Result<usize, ParseError<V::Error>> {
1810 let Some((next, abs)) =
1811 find_name_end(masks.name_end, pos, block_offset, block_len)
1812 else {
1813 check_name_length(block_offset + block_len, name_start, stream_offset)?;
1814 return Ok(block_len);
1815 };
1816
1817 let name = validate_name(buf, name_start, abs, stream_offset)?;
1818 let name_span = Span::new(
1819 stream_offset + name_start as u64,
1820 stream_offset + abs as u64,
1821 );
1822
1823 let is_xml_target = name.eq_ignore_ascii_case(b"xml");
1825 if is_xml_target {
1826 if self.had_markup {
1827 return Err(ParseError::Xml(Error {
1829 kind: ErrorKind::ReservedPITarget,
1830 offset: stream_offset + name_start as u64,
1831 }));
1832 }
1833 self.in_xml_decl = true;
1835 self.xml_decl_buf_len = 0;
1836 self.xml_decl_span_start = stream_offset + self.markup_start.unwrap() as u64;
1837 } else {
1838 visitor
1839 .pi_start(name, name_span)
1840 .map_err(ParseError::Visitor)?;
1841 }
1842
1843 let byte = buf[abs];
1844 if byte == b'?' {
1845 let gt_pos = abs + 1;
1847 if gt_pos < buf.len() && buf[gt_pos] == b'>' {
1848 let end_span = Span::new(
1849 stream_offset + abs as u64,
1850 stream_offset + gt_pos as u64 + 1,
1851 );
1852 self.emit_pi_end(end_span, visitor)?;
1853 self.finish_markup();
1854 Ok(next + 2)
1855 } else {
1856 self.markup_stream_offset = Some(stream_offset + self.markup_start.unwrap() as u64);
1857 self.markup_start = None;
1858 self.content_start = Some(abs + 1);
1859 self.state = ParserState::PIContent {
1860 saw_qmark: true,
1861 };
1862 Ok(next + 1)
1863 }
1864 } else {
1865 self.markup_stream_offset = Some(stream_offset + self.markup_start.unwrap() as u64);
1867 self.markup_start = None;
1868 self.content_start = Some(abs + 1);
1869 self.state = ParserState::PIContent {
1870 saw_qmark: false,
1871 };
1872 Ok(next + 1)
1873 }
1874 }
1875
1876 fn scan_pi_content<V: Visitor>(
1878 &mut self,
1879 buf: &[u8],
1880 block_offset: usize,
1881 block_len: usize,
1882 mut pos: usize,
1883 masks: &CharClassMasks,
1884 stream_offset: u64,
1885 mut saw_qmark: bool,
1886 visitor: &mut V,
1887 ) -> Result<usize, ParseError<V::Error>> {
1888 let content_start = self.content_start.unwrap();
1889 loop {
1890 if saw_qmark {
1892 if pos >= block_len {
1893 self.state = ParserState::PIContent { saw_qmark: true };
1894 return Ok(block_len);
1895 }
1896 let abs = block_offset + pos;
1897 let byte = buf[abs];
1898 if byte == b'>' {
1899 let content_end = abs - 1;
1901 if content_end > content_start {
1902 let span = Span::new(
1903 stream_offset + content_start as u64,
1904 stream_offset + content_end as u64,
1905 );
1906 self.emit_pi_content(&buf[content_start..content_end], span, visitor)?;
1907 }
1908 let end_span = Span::new(
1909 stream_offset + abs as u64 - 1,
1910 stream_offset + abs as u64 + 1,
1911 );
1912 self.emit_pi_end(end_span, visitor)?;
1913 self.finish_content_body();
1914 return Ok(pos + 1);
1915 }
1916 saw_qmark = false;
1917 if byte == b'?' {
1918 saw_qmark = true;
1919 pos += 1;
1920 continue;
1921 }
1922 pos += 1;
1923 continue;
1924 }
1925
1926 if pos >= block_len {
1927 self.state = ParserState::PIContent { saw_qmark: false };
1928 return Ok(block_len);
1929 }
1930
1931 let shifted = masks.qmark >> pos;
1932 if shifted == 0 {
1933 self.state = ParserState::PIContent {
1934 saw_qmark: false,
1935 };
1936 return Ok(block_len);
1937 }
1938
1939 let next = shifted.trailing_zeros() as usize;
1940 if pos + next >= block_len {
1941 self.state = ParserState::PIContent {
1942 saw_qmark: false,
1943 };
1944 return Ok(block_len);
1945 }
1946
1947 let qmark_abs = block_offset + pos + next;
1948 let gt_pos = qmark_abs + 1;
1949 if gt_pos < buf.len() {
1950 if buf[gt_pos] == b'>' {
1951 let content_end = qmark_abs;
1953 if content_end > content_start {
1954 let span = Span::new(
1955 stream_offset + content_start as u64,
1956 stream_offset + content_end as u64,
1957 );
1958 self.emit_pi_content(&buf[content_start..content_end], span, visitor)?;
1959 }
1960 let end_span = Span::new(
1961 stream_offset + qmark_abs as u64,
1962 stream_offset + gt_pos as u64 + 1,
1963 );
1964 self.emit_pi_end(end_span, visitor)?;
1965 self.finish_content_body();
1966 return Ok(pos + next + 2);
1967 }
1968 pos = pos + next + 1;
1970 } else {
1971 self.state = ParserState::PIContent {
1973 saw_qmark: true,
1974 };
1975 return Ok(pos + next + 1);
1976 }
1977 }
1978 }
1979
1980 fn scan_entity_ref<V: Visitor>(
1982 &mut self,
1983 buf: &[u8],
1984 block_offset: usize,
1985 block_len: usize,
1986 pos: usize,
1987 masks: &CharClassMasks,
1988 stream_offset: u64,
1989 name_start: usize,
1990 visitor: &mut V,
1991 ) -> Result<usize, ParseError<V::Error>> {
1992 let abs = block_offset + pos;
1994 if abs == name_start && abs < buf.len() && buf[abs] == b'#' {
1995 self.state = ParserState::CharRef {
1996 value_start: abs + 1,
1997 };
1998 return Ok(pos + 1);
1999 }
2000
2001 let Some((name, span, next_pos)) = find_and_validate_entity_name(
2002 buf, block_offset, block_len, pos, masks.semicolon,
2003 stream_offset, name_start,
2004 )? else {
2005 return Ok(block_len);
2006 };
2007
2008 visitor
2009 .entity_ref(name, span)
2010 .map_err(ParseError::Visitor)?;
2011
2012 self.finish_markup();
2013 Ok(next_pos)
2014 }
2015
2016 fn scan_char_ref<V: Visitor>(
2018 &mut self,
2019 buf: &[u8],
2020 block_offset: usize,
2021 block_len: usize,
2022 pos: usize,
2023 masks: &CharClassMasks,
2024 stream_offset: u64,
2025 value_start: usize,
2026 visitor: &mut V,
2027 ) -> Result<usize, ParseError<V::Error>> {
2028 let Some((value, span, next_pos)) = find_and_validate_char_ref(
2029 buf, block_offset, block_len, pos, masks.semicolon,
2030 stream_offset, value_start,
2031 )? else {
2032 return Ok(block_len);
2033 };
2034
2035 visitor
2036 .char_ref(value, span)
2037 .map_err(ParseError::Visitor)?;
2038
2039 self.finish_markup();
2040 Ok(next_pos)
2041 }
2042
2043 fn scan_attr_entity_ref<V: Visitor>(
2045 &mut self,
2046 buf: &[u8],
2047 block_offset: usize,
2048 block_len: usize,
2049 pos: usize,
2050 masks: &CharClassMasks,
2051 stream_offset: u64,
2052 name_start: usize,
2053 quote: QuoteStyle,
2054 visitor: &mut V,
2055 ) -> Result<usize, ParseError<V::Error>> {
2056 let abs = block_offset + pos;
2058 if abs == name_start && abs < buf.len() && buf[abs] == b'#' {
2059 self.state = ParserState::AttrCharRef {
2060 value_start: abs + 1,
2061 quote,
2062 };
2063 return Ok(pos + 1);
2064 }
2065
2066 let Some((name, span, next_pos)) = find_and_validate_entity_name(
2067 buf, block_offset, block_len, pos, masks.semicolon,
2068 stream_offset, name_start,
2069 )? else {
2070 return Ok(block_len);
2071 };
2072
2073 visitor
2074 .attribute_entity_ref(name, span)
2075 .map_err(ParseError::Visitor)?;
2076
2077 self.markup_start = None;
2078 self.content_start = Some(block_offset + next_pos);
2079 self.state = ParserState::AttrValue { quote };
2080 Ok(next_pos)
2081 }
2082
2083 fn scan_attr_char_ref<V: Visitor>(
2085 &mut self,
2086 buf: &[u8],
2087 block_offset: usize,
2088 block_len: usize,
2089 pos: usize,
2090 masks: &CharClassMasks,
2091 stream_offset: u64,
2092 value_start: usize,
2093 quote: QuoteStyle,
2094 visitor: &mut V,
2095 ) -> Result<usize, ParseError<V::Error>> {
2096 let Some((value, span, next_pos)) = find_and_validate_char_ref(
2097 buf, block_offset, block_len, pos, masks.semicolon,
2098 stream_offset, value_start,
2099 )? else {
2100 return Ok(block_len);
2101 };
2102
2103 visitor
2104 .attribute_char_ref(value, span)
2105 .map_err(ParseError::Visitor)?;
2106
2107 self.markup_start = None;
2108 self.content_start = Some(block_offset + next_pos);
2109 self.state = ParserState::AttrValue { quote };
2110 Ok(next_pos)
2111 }
2112}
2113
2114#[inline(always)]
2119fn find_name_end(
2120 name_end_mask: u64,
2121 pos: usize,
2122 block_offset: usize,
2123 block_len: usize,
2124) -> Option<(usize, usize)> {
2125 let shifted = name_end_mask >> pos;
2126 if shifted == 0 {
2127 return None;
2128 }
2129 let next = shifted.trailing_zeros() as usize;
2130 if pos + next >= block_len {
2131 return None;
2132 }
2133 Some((pos + next, block_offset + pos + next))
2134}
2135
2136#[inline]
2140fn check_name_length<E>(
2141 block_end: usize,
2142 name_start: usize,
2143 stream_offset: u64,
2144) -> Result<(), ParseError<E>> {
2145 if block_end - name_start > MAX_NAME_LENGTH {
2146 return Err(ParseError::Xml(Error {
2147 kind: ErrorKind::NameTooLong,
2148 offset: stream_offset + name_start as u64,
2149 }));
2150 }
2151 Ok(())
2152}
2153
2154#[inline]
2158fn validate_name<'a, E>(
2159 buf: &'a [u8],
2160 name_start: usize,
2161 name_end: usize,
2162 stream_offset: u64,
2163) -> Result<&'a [u8], ParseError<E>> {
2164 if name_start == name_end {
2165 return Err(ParseError::Xml(Error {
2166 kind: ErrorKind::UnexpectedByte(buf[name_end]),
2167 offset: stream_offset + name_end as u64,
2168 }));
2169 }
2170 if name_end - name_start > MAX_NAME_LENGTH {
2171 return Err(ParseError::Xml(Error {
2172 kind: ErrorKind::NameTooLong,
2173 offset: stream_offset + name_start as u64,
2174 }));
2175 }
2176 Ok(&buf[name_start..name_end])
2177}
2178
2179#[inline]
2185fn find_and_validate_entity_name<'a, E>(
2186 buf: &'a [u8],
2187 block_offset: usize,
2188 block_len: usize,
2189 pos: usize,
2190 semicolon_mask: u64,
2191 stream_offset: u64,
2192 name_start: usize,
2193) -> Result<Option<(&'a [u8], Span, usize)>, ParseError<E>> {
2194 let shifted = semicolon_mask >> pos;
2195 if shifted == 0 {
2196 if block_offset + block_len - name_start > MAX_NAME_LENGTH {
2197 return Err(ParseError::Xml(Error {
2198 kind: ErrorKind::NameTooLong,
2199 offset: stream_offset + name_start as u64,
2200 }));
2201 }
2202 return Ok(None);
2203 }
2204
2205 let next = shifted.trailing_zeros() as usize;
2206 if pos + next >= block_len {
2207 if block_offset + block_len - name_start > MAX_NAME_LENGTH {
2208 return Err(ParseError::Xml(Error {
2209 kind: ErrorKind::NameTooLong,
2210 offset: stream_offset + name_start as u64,
2211 }));
2212 }
2213 return Ok(None);
2214 }
2215
2216 let semi_abs = block_offset + pos + next;
2217 let name = &buf[name_start..semi_abs];
2218
2219 if name.is_empty() {
2220 return Err(ParseError::Xml(Error {
2221 kind: ErrorKind::UnexpectedByte(b';'),
2222 offset: stream_offset + semi_abs as u64,
2223 }));
2224 }
2225 if name.len() > MAX_NAME_LENGTH {
2226 return Err(ParseError::Xml(Error {
2227 kind: ErrorKind::NameTooLong,
2228 offset: stream_offset + name_start as u64,
2229 }));
2230 }
2231 if !is_name_start_byte(name[0]) {
2232 return Err(ParseError::Xml(Error {
2233 kind: ErrorKind::UnexpectedByte(name[0]),
2234 offset: stream_offset + name_start as u64,
2235 }));
2236 }
2237 for (i, &b) in name[1..].iter().enumerate() {
2238 if !is_name_byte(b) {
2239 return Err(ParseError::Xml(Error {
2240 kind: ErrorKind::UnexpectedByte(b),
2241 offset: stream_offset + name_start as u64 + 1 + i as u64,
2242 }));
2243 }
2244 }
2245
2246 let span = Span::new(
2247 stream_offset + name_start as u64,
2248 stream_offset + semi_abs as u64,
2249 );
2250 Ok(Some((name, span, pos + next + 1)))
2251}
2252
2253#[inline]
2259fn find_and_validate_char_ref<'a, E>(
2260 buf: &'a [u8],
2261 block_offset: usize,
2262 block_len: usize,
2263 pos: usize,
2264 semicolon_mask: u64,
2265 stream_offset: u64,
2266 value_start: usize,
2267) -> Result<Option<(&'a [u8], Span, usize)>, ParseError<E>> {
2268 let shifted = semicolon_mask >> pos;
2269 if shifted == 0 {
2270 if block_offset + block_len - value_start > MAX_CHAR_REF_LENGTH {
2271 return Err(ParseError::Xml(Error {
2272 kind: ErrorKind::CharRefTooLong,
2273 offset: stream_offset + value_start as u64,
2274 }));
2275 }
2276 return Ok(None);
2277 }
2278
2279 let next = shifted.trailing_zeros() as usize;
2280 if pos + next >= block_len {
2281 if block_offset + block_len - value_start > MAX_CHAR_REF_LENGTH {
2282 return Err(ParseError::Xml(Error {
2283 kind: ErrorKind::CharRefTooLong,
2284 offset: stream_offset + value_start as u64,
2285 }));
2286 }
2287 return Ok(None);
2288 }
2289
2290 let semi_abs = block_offset + pos + next;
2291 let value = &buf[value_start..semi_abs];
2292
2293 if value.is_empty() {
2294 return Err(ParseError::Xml(Error {
2295 kind: ErrorKind::InvalidCharRef,
2296 offset: stream_offset + semi_abs as u64,
2297 }));
2298 }
2299 if value.len() > MAX_CHAR_REF_LENGTH {
2300 return Err(ParseError::Xml(Error {
2301 kind: ErrorKind::CharRefTooLong,
2302 offset: stream_offset + value_start as u64,
2303 }));
2304 }
2305 if value[0] == b'x' {
2306 let hex_digits = &value[1..];
2307 if hex_digits.is_empty() || !hex_digits.iter().all(|b| b.is_ascii_hexdigit()) {
2308 return Err(ParseError::Xml(Error {
2309 kind: ErrorKind::InvalidCharRef,
2310 offset: stream_offset + value_start as u64,
2311 }));
2312 }
2313 } else if !value.iter().all(|b| b.is_ascii_digit()) {
2314 return Err(ParseError::Xml(Error {
2315 kind: ErrorKind::InvalidCharRef,
2316 offset: stream_offset + value_start as u64,
2317 }));
2318 }
2319
2320 let span = Span::new(
2321 stream_offset + value_start as u64,
2322 stream_offset + semi_abs as u64,
2323 );
2324 Ok(Some((value, span, pos + next + 1)))
2325}
2326
2327#[inline(always)]
2332fn find_non_whitespace(
2333 whitespace_mask: u64,
2334 pos: usize,
2335 block_offset: usize,
2336 block_len: usize,
2337) -> Option<(usize, usize)> {
2338 find_name_end(!whitespace_mask, pos, block_offset, block_len)
2339}
2340
2341#[inline]
2343fn is_name_start_byte(b: u8) -> bool {
2344 b.is_ascii_alphabetic() || b == b'_' || b == b':' || b >= 0x80
2345}
2346
2347#[inline]
2349fn is_name_byte(b: u8) -> bool {
2350 is_name_start_byte(b) || b.is_ascii_digit() || b == b'-' || b == b'.'
2351}
2352
2353fn parse_xml_decl<E>(
2363 buf: &[u8],
2364 offset: u64,
2365) -> Result<(&[u8], Option<&[u8]>, Option<bool>), ParseError<E>> {
2366 let err = || {
2367 ParseError::Xml(Error {
2368 kind: ErrorKind::MalformedXmlDeclaration,
2369 offset,
2370 })
2371 };
2372
2373 let mut pos = 0;
2374
2375 while pos < buf.len() && is_xml_whitespace(buf[pos]) {
2377 pos += 1;
2378 }
2379
2380 let version = parse_pseudo_attr(buf, &mut pos, b"version").ok_or_else(err)?;
2382
2383 while pos < buf.len() && is_xml_whitespace(buf[pos]) {
2385 pos += 1;
2386 }
2387
2388 if pos >= buf.len() {
2390 return Ok((version, None, None));
2391 }
2392
2393 let mut encoding = None;
2395 let mut standalone = None;
2396
2397 if buf[pos..].starts_with(b"encoding") {
2398 encoding = Some(parse_pseudo_attr(buf, &mut pos, b"encoding").ok_or_else(err)?);
2399
2400 while pos < buf.len() && is_xml_whitespace(buf[pos]) {
2402 pos += 1;
2403 }
2404
2405 if pos >= buf.len() {
2406 return Ok((version, encoding, None));
2407 }
2408 }
2409
2410 if buf[pos..].starts_with(b"standalone") {
2412 let val = parse_pseudo_attr(buf, &mut pos, b"standalone").ok_or_else(err)?;
2413 standalone = Some(match val {
2414 b"yes" => true,
2415 b"no" => false,
2416 _ => return Err(err()),
2417 });
2418
2419 while pos < buf.len() && is_xml_whitespace(buf[pos]) {
2421 pos += 1;
2422 }
2423 }
2424
2425 if pos < buf.len() {
2427 return Err(err());
2428 }
2429
2430 Ok((version, encoding, standalone))
2431}
2432
2433fn parse_pseudo_attr<'a>(buf: &'a [u8], pos: &mut usize, expected_name: &[u8]) -> Option<&'a [u8]> {
2436 let end = *pos + expected_name.len();
2438 if end > buf.len() || &buf[*pos..end] != expected_name {
2439 return None;
2440 }
2441 *pos = end;
2442
2443 while *pos < buf.len() && is_xml_whitespace(buf[*pos]) {
2445 *pos += 1;
2446 }
2447 if *pos >= buf.len() || buf[*pos] != b'=' {
2448 return None;
2449 }
2450 *pos += 1;
2451 while *pos < buf.len() && is_xml_whitespace(buf[*pos]) {
2452 *pos += 1;
2453 }
2454
2455 if *pos >= buf.len() {
2457 return None;
2458 }
2459 let quote = buf[*pos];
2460 if quote != b'"' && quote != b'\'' {
2461 return None;
2462 }
2463 *pos += 1;
2464
2465 let value_start = *pos;
2467 while *pos < buf.len() && buf[*pos] != quote {
2468 *pos += 1;
2469 }
2470 if *pos >= buf.len() {
2471 return None;
2472 }
2473 let value = &buf[value_start..*pos];
2474 *pos += 1; Some(value)
2477}
2478
2479fn utf8_boundary_rewind(buf: &[u8]) -> Result<usize, usize> {
2483 if buf.is_empty() {
2484 return Ok(0);
2485 }
2486 let start = buf.len().saturating_sub(3);
2487 for i in (start..buf.len()).rev() {
2488 let b = buf[i];
2489 if b < 0x80 {
2490 return Ok(0); }
2492 if b >= 0xC0 {
2493 let expected_len = if b < 0xE0 {
2494 2
2495 } else if b < 0xF0 {
2496 3
2497 } else {
2498 4
2499 };
2500 let available = buf.len() - i;
2501 if available >= expected_len {
2502 return Ok(0); } else {
2504 return Ok(available); }
2506 }
2507 }
2508 Err(buf.len().saturating_sub(3)) }
2510
2511#[cfg(feature = "std")]
2513const DEFAULT_BUF_SIZE: usize = 8192;
2514
2515#[cfg(feature = "std")]
2530pub fn parse_read<R: std::io::Read, V: Visitor>(
2531 reader: R,
2532 visitor: &mut V,
2533) -> Result<(), ReadError<V::Error>> {
2534 parse_read_with_capacity(reader, visitor, DEFAULT_BUF_SIZE)
2535}
2536
2537#[cfg(feature = "std")]
2541pub fn parse_read_with_capacity<R: std::io::Read, V: Visitor>(
2542 mut reader: R,
2543 visitor: &mut V,
2544 capacity: usize,
2545) -> Result<(), ReadError<V::Error>> {
2546 let capacity = capacity.max(64);
2547 let mut buf = std::vec![0u8; capacity];
2548 let mut parser = Reader::new();
2549 let mut stream_offset: u64 = 0;
2550 let mut valid: usize = 0;
2551
2552 loop {
2553 let n = reader.read(&mut buf[valid..]).map_err(ReadError::Io)?;
2555 valid += n;
2556 let is_final = n == 0;
2557
2558 if valid == 0 {
2559 break;
2560 }
2561
2562 let consumed = parser
2563 .parse(&buf[..valid], stream_offset, is_final, visitor)
2564 .map_err(ReadError::from_parse)? as usize;
2565
2566 let leftover = valid - consumed;
2568 if leftover > 0 {
2569 buf.copy_within(consumed..valid, 0);
2570 }
2571 valid = leftover;
2572 stream_offset += consumed as u64;
2573
2574 if consumed == 0 && is_final {
2575 break;
2576 }
2577 }
2578
2579 Ok(())
2580}
2581
2582#[cfg(feature = "std")]
2584pub enum ReadError<E> {
2585 Xml(Error),
2587 Visitor(E),
2589 Io(std::io::Error),
2591}
2592
2593#[cfg(feature = "std")]
2594impl<E> ReadError<E> {
2595 fn from_parse(e: ParseError<E>) -> Self {
2596 match e {
2597 ParseError::Xml(e) => ReadError::Xml(e),
2598 ParseError::Visitor(e) => ReadError::Visitor(e),
2599 }
2600 }
2601}
2602
2603#[cfg(feature = "std")]
2604impl<E: core::fmt::Debug> core::fmt::Debug for ReadError<E> {
2605 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
2606 match self {
2607 ReadError::Xml(e) => write!(f, "ReadError::Xml({e:?})"),
2608 ReadError::Visitor(e) => write!(f, "ReadError::Visitor({e:?})"),
2609 ReadError::Io(e) => write!(f, "ReadError::Io({e:?})"),
2610 }
2611 }
2612}
2613
2614#[cfg(feature = "std")]
2615impl<E: core::fmt::Display> core::fmt::Display for ReadError<E> {
2616 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
2617 match self {
2618 ReadError::Xml(e) => write!(f, "XML error: {e}"),
2619 ReadError::Visitor(e) => write!(f, "visitor error: {e}"),
2620 ReadError::Io(e) => write!(f, "I/O error: {e}"),
2621 }
2622 }
2623}
2624
2625#[cfg(feature = "std")]
2626impl<E: core::error::Error> core::error::Error for ReadError<E> {}
2627
2628#[cfg(feature = "std")]
2629impl<E> From<Error> for ReadError<E> {
2630 fn from(e: Error) -> Self {
2631 ReadError::Xml(e)
2632 }
2633}
2634
2635#[cfg(feature = "std")]
2636impl<E> From<std::io::Error> for ReadError<E> {
2637 fn from(e: std::io::Error) -> Self {
2638 ReadError::Io(e)
2639 }
2640}