1use nom::Offset;
2
3use crate::{
4 sax as xml_sax,
5 sax::internal::{
6 content_relaxed, insidecdata, insidecomment, misc, misc_before_doctype,
7 misc_before_xmldecl, Attribute2, AttributeRange, ContentRelaxed, InsideCdata,
8 InsideComment, Misc, MiscBeforeDoctype, MiscBeforeXmlDecl, QName, SAXAttribute2,
9 },
10};
11
12enum InternalSuccess<'a> {
13 StartDocument,
14 EndDocument,
15
16 ContentRelaxed(ContentRelaxed<'a>),
17 InsideCdata(InsideCdata<'a>),
18 InsideComment(InsideComment<'a>),
19 Misc(Misc<'a>),
20 MiscBeforeDoctype(MiscBeforeDoctype<'a>),
21 MiscBeforeXmlDecl(MiscBeforeXmlDecl<'a>),
22}
23
24use std::{
25 borrow::BorrowMut,
26 cell::RefCell,
27 io::{BufRead, BufReader, Read, Write},
28 ops::Range,
29 vec,
30};
31
32use super::{circular, Attribute};
33
34#[derive(Clone, Copy, Debug, Eq, PartialEq)]
35enum ParserState {
36 Initial,
37 DocStartBeforeXmlDecl, DocStartBeforeDocType, DocStartBeforeDocTypeInsideComment, DocStart,
43 DocStartInsideComment,
44
45 Content,
46 InsideCdata,
47 InsideComment, DocEnd, DocEndInsideComment,
51}
52
53struct Namespace {
54 level: usize,
55 prefix: Range<usize>,
56 value: Range<usize>,
57}
58pub struct Parser<R: Read> {
59 state: ParserState,
60 bufreader: BufReader<R>,
61 buffer3: circular::Buffer,
62
63 strbuffer: String,
64 offset: usize,
65
66 element_level: usize,
68 element_strbuffer: String,
69 element_list: Vec<Range<usize>>,
70
71 is_namespace_aware: bool,
72 namespace_strbuffer: String,
73 namespace_list: Vec<Namespace>,
74
75 attribute_list: Vec<AttributeRange>,
76}
77
78pub(crate) fn convert_attribute_range<'a>(
79 strbuffer: &'a str,
80 namespace_strbuffer: &'a str,
81 range: AttributeRange,
82) -> Attribute<'a> {
83 Attribute {
84 value: &strbuffer[range.value],
85 name: &strbuffer[range.name],
86 local_name: &strbuffer[range.local_name],
87 prefix: &strbuffer[range.prefix],
88 namespace: &namespace_strbuffer[range.namespace],
89 }
90}
91
92fn convert_start_element_name_and_add_attributes<'a>(
93 strbuffer: &'a mut String,
94 namespace_strbuffer: &'a mut String,
95
96 event1: crate::sax::internal::StartElement,
97 buffer3: &circular::Buffer,
98 attribute_list: &'a mut Vec<AttributeRange>,
99) -> SaxResult<Range<usize>> {
100 attribute_list.clear();
101
102 let start = strbuffer.len();
103 let size = event1.name.len();
104 let element_name_range = start..start + size;
105 strbuffer.push_str(event1.name);
106
107 let start = strbuffer.len();
110 let size = event1.attributes_chunk.len();
111 let attributes_chunk = unsafe { std::str::from_utf8_unchecked(event1.attributes_chunk) };
112 strbuffer.push_str(attributes_chunk);
113
114 let mut inp = strbuffer[start..start + size].as_bytes();
115 let mut offset1: usize = start;
116 loop {
118 if inp.len() == 0 {
119 break;
120 }
121
122 let res = Attribute2(inp);
123
124 match res {
125 Ok((remainder, mut attr_range)) => {
126 attr_range.name =
127 (attr_range.name.start + offset1)..(attr_range.name.end + offset1);
128 attr_range.value =
129 (attr_range.value.start + offset1)..(attr_range.value.end + offset1);
130
131 offset1 += inp.offset(remainder);
132 inp = remainder;
133
134 attribute_list.push(attr_range)
135 }
136 Err(_e) => {
137 return Err(error::Error::Parsing(format!(
138 "Error while parsing attributes.",
139 )))
140 }
141 }
142 }
143
144 Ok(element_name_range)
145}
146
147struct ElementRange {
148 prefix_range: Range<usize>,
149 local_name_range: Range<usize>,
150 namespace_range: Range<usize>,
151}
152
153fn parse_start_element(
154 start_element_name_range: Range<usize>,
155 is_namespace_aware: bool,
156 element_level: usize,
157
158 strbuffer: &mut String,
159 attribute_list: &mut Vec<AttributeRange>,
160 namespace_strbuffer: &mut String,
161 namespace_list: &mut Vec<Namespace>,
162) -> SaxResult<ElementRange> {
163 let start_element_name = &strbuffer[start_element_name_range];
164
165 let mut prefix_range = 0..0;
170 let mut local_name_range = 0..0;
171 let mut namespace_range = 0..0;
172
173 if is_namespace_aware {
175 for attr in attribute_list.iter_mut() {
177 let inp = strbuffer[attr.name.clone()].as_bytes();
178
179 match QName(inp) {
180 Ok(qres) => {
181 let qname = qres.1;
182
183 if qname.prefix == "" && qname.local_name == "xmlns" {
184 let ns = push_ns_values_get_ns(
186 namespace_strbuffer,
187 "",
188 &strbuffer[attr.value.clone()],
189 element_level,
190 );
191 namespace_list.push(ns);
192 }
193
194 if qname.prefix == "xmlns" {
195 let prefix = qname.local_name;
197 let ns = push_ns_values_get_ns(
198 namespace_strbuffer,
199 prefix,
200 &strbuffer[attr.value.clone()],
201 element_level,
202 );
203 namespace_list.push(ns);
204 }
205 attr.local_name = Range {
206 start: qname.local_name_range.start + attr.name.start.clone(),
207 end: qname.local_name_range.end + attr.name.start.clone(),
208 };
209 attr.prefix = Range {
211 start: qname.prefix_range.start + attr.name.start.clone(),
212 end: qname.prefix_range.end + attr.name.start.clone(),
213 };
214 }
220 Err(_e) => {
221 return Err(error::Error::Parsing(format!(
222 "Attribute does not conform to QName spec: {}",
223 &strbuffer[attr.name.clone()]
224 )))
225 }
226 }
227 }
228
229 for attr in attribute_list.iter_mut() {
232 if &strbuffer[attr.prefix.clone()] == "" || &strbuffer[attr.prefix.clone()] == "xmlns" {
234 continue;
235 }
236 match namespace_list.iter().rfind(|ns| {
237 &namespace_strbuffer[ns.prefix.clone()] == &strbuffer[attr.prefix.clone()]
238 }) {
239 Some(ns) => attr.namespace = ns.value.clone(),
240 None => {
241 return Err(error::Error::Parsing(format!(
242 "Namespace not found for prefix: {} , attribute: {} , element: {}",
243 &strbuffer[attr.prefix.clone()],
244 &strbuffer[attr.name.clone()],
245 start_element_name
246 )))
247 }
248 }
249 }
250
251 match QName(start_element_name.as_bytes()) {
252 Ok(qres) => {
253 let qname = qres.1;
254 local_name_range = qname.local_name_range;
257 prefix_range = qname.prefix_range;
258
259 match namespace_list.iter().rfind(|ns| {
260 &namespace_strbuffer[ns.prefix.clone()] == &strbuffer[prefix_range.clone()]
261 }) {
262 Some(ns) => namespace_range = ns.value.clone(),
263
264 None => {
265 if &strbuffer[prefix_range.clone()] == "" {
266 } else {
268 return Err(error::Error::Parsing(format!(
269 "Namespace prefix not found for element: {}",
270 start_element_name
271 )));
272 }
273 }
274 }
275 }
276 Err(_e) => {
277 return Err(error::Error::Parsing(format!(
278 "Element name does not conform to QName spec: {}",
279 start_element_name
280 )))
281 }
282 }
283 }
284
285 Ok(ElementRange {
286 prefix_range: prefix_range,
287 local_name_range: local_name_range,
288 namespace_range: namespace_range,
289 })
290}
291
292fn push_str_get_range(strbuffer: &mut String, addition: &str) -> Range<usize> {
293 let start = strbuffer.len();
294 let size = addition.len();
295 let range = Range {
296 start: start,
297 end: start + size,
298 };
299 strbuffer.push_str(addition);
300 range
301}
302
303fn push_ns_values_get_ns(
304 namespace_strbuffer: &mut String,
305 prefix: &str,
306 value: &str,
307 element_level: usize,
308) -> Namespace {
309 let range_prefix = push_str_get_range(namespace_strbuffer, prefix);
310 let range_value = push_str_get_range(namespace_strbuffer, value);
311 Namespace {
312 level: element_level,
313 prefix: range_prefix,
314 value: range_value,
315 }
316}
317
318pub type SaxResult<T> = Result<T, error::Error>;
319
320mod error {
321 use thiserror::Error;
322 #[derive(Debug, Error)]
323 pub enum Error {
324 #[error(transparent)]
325 Io(#[from] std::io::Error),
326
327 #[error("SAX Parsing Err: {0}")]
329 Parsing(String),
330
331 #[error("SAX Parsing Err: Unexpected EOF")]
332 UnexpectedEof,
333 }
334}
335
336fn read_data_splitted<R: Read>(
338 bufreader: &mut BufReader<R>,
339 buffer2: &mut Vec<u8>,
340) -> Result<(), std::io::Error> {
341 match bufreader.fill_buf() {
342 Ok(_ok) => {}
343 Err(err) => return Err(err),
344 }
345
346 let amt: usize;
347 {
348 let data2 = bufreader.buffer();
349
350 buffer2.extend_from_slice(data2);
351 amt = data2.len();
352 }
353 bufreader.consume(amt);
354 Ok(())
355}
356fn read_data_splitted_refcell<R: Read>(
357 bufreader: &mut BufReader<R>,
358 buffer2: &RefCell<Vec<u8>>,
359) -> Result<(), std::io::Error> {
360 match bufreader.fill_buf() {
361 Ok(_ok) => {}
362 Err(err) => return Err(err),
363 }
364
365 let amt: usize;
366 {
367 let data2 = bufreader.buffer();
368
369 buffer2.borrow_mut().extend_from_slice(data2);
370 amt = data2.len();
371 }
372 bufreader.consume(amt);
373 Ok(())
374}
375
376fn event_converter<'a, 'b>(
379 mut state: ParserState,
380 internal_event: InternalSuccess<'b>,
381 buffer3: &'b circular::Buffer,
382
383 element_list: &mut Vec<Range<usize>>,
384 mut strbuffer: &'a mut String,
385 mut namespace_strbuffer: &'a mut String,
386 namespace_list: &mut Vec<Namespace>,
387
388 is_namespace_aware: bool,
389 mut element_level: usize,
390 mut element_strbuffer: &mut String,
391
392 attribute_list: &'a mut Vec<AttributeRange>,
393) -> SaxResult<(xml_sax::Event<'a>, ParserState, usize)> {
394 let event = match internal_event {
395 InternalSuccess::StartDocument => xml_sax::Event::StartDocument,
396 InternalSuccess::EndDocument => xml_sax::Event::EndDocument,
397 InternalSuccess::ContentRelaxed(cr) => match cr {
398 ContentRelaxed::CharData(event1) => {
399 let start = strbuffer.len();
400 let size = event1.len();
401 strbuffer.push_str(unsafe { std::str::from_utf8_unchecked(event1) });
402 xml_sax::Event::Characters(&strbuffer[start..(start + size)])
403 }
404 ContentRelaxed::StartElement(event1) => {
405 if is_namespace_aware {
408 match namespace_list
410 .iter()
411 .rposition(|ns| ns.level <= element_level)
412 {
413 Some(pos) => {
414 if let Some(starting_pos) =
415 namespace_list.get(pos + 1).map(|ns| ns.prefix.start)
416 {
417 namespace_list.truncate(pos + 1);
418 namespace_strbuffer.truncate(starting_pos);
419 }
420 }
421 None => {
422 }
424 }
425 }
426
427 let start_element_name_range = convert_start_element_name_and_add_attributes(
428 strbuffer,
429 namespace_strbuffer,
430 event1,
431 buffer3,
432 attribute_list,
433 )?;
434
435 element_level += 1;
436
437 let element_list_range = push_str_get_range(
440 &mut element_strbuffer,
441 &strbuffer[start_element_name_range.clone()],
442 );
443 element_list.push(element_list_range.clone());
444
445 let element_ranges = parse_start_element(
449 start_element_name_range.clone(),
450 is_namespace_aware,
451 element_level,
452 strbuffer,
453 attribute_list,
454 namespace_strbuffer,
455 namespace_list,
456 )?;
457
458 let start_element = xml_sax::StartElement {
459 name: &strbuffer[start_element_name_range],
460 is_empty: false,
462
463 local_name: &strbuffer[element_ranges.local_name_range],
464 namespace: &namespace_strbuffer[element_ranges.namespace_range],
465 prefix: &strbuffer[element_ranges.prefix_range],
466
467 range_list: attribute_list,
468 strbuffer: strbuffer,
469 namespace_strbuffer: namespace_strbuffer,
470 };
471
472 xml_sax::Event::StartElement(start_element)
473 }
474 ContentRelaxed::EmptyElemTag(event1) => {
475 if is_namespace_aware {
476 match namespace_list
478 .iter()
479 .rposition(|ns| ns.level <= element_level)
480 {
481 Some(pos) => {
482 if let Some(starting_pos) =
483 namespace_list.get(pos + 1).map(|ns| ns.prefix.start)
484 {
485 namespace_list.truncate(pos + 1);
486 namespace_strbuffer.truncate(starting_pos);
487 }
488 }
489 None => {
490 }
492 }
493 }
494
495 let start_element_name_range = convert_start_element_name_and_add_attributes(
496 strbuffer,
497 namespace_strbuffer,
498 event1,
499 buffer3,
500 attribute_list,
501 )?;
502
503 element_level += 1; let element_ranges = parse_start_element(
508 start_element_name_range.clone(),
509 is_namespace_aware,
510 element_level,
511 strbuffer,
512 attribute_list,
513 namespace_strbuffer,
514 namespace_list,
515 )?;
516
517 let start_element = xml_sax::StartElement {
518 name: &strbuffer[start_element_name_range],
519 is_empty: false,
521
522 local_name: &strbuffer[element_ranges.local_name_range],
523 namespace: &namespace_strbuffer[element_ranges.namespace_range],
524 prefix: &strbuffer[element_ranges.prefix_range],
525
526 range_list: attribute_list,
527 strbuffer: strbuffer,
528 namespace_strbuffer: namespace_strbuffer,
529 };
530
531 element_level -= 1;
538 if element_level == 0 {
539 state = ParserState::DocEnd;
540 }
541
542 xml_sax::Event::StartElement(start_element)
543 }
544 ContentRelaxed::EndElement(event1) => {
545 match element_list.pop() {
548 Some(r) => {
549 if &element_strbuffer[r.clone()] == event1.name {
550 element_strbuffer.truncate(r.start);
551 } else {
552 return Err(error::Error::Parsing(format!(
553 "Expected closing tag: {} ,found: {}",
554 &element_strbuffer[r.clone()],
555 event1.name
556 )));
557
558 }
560 }
561 None => {
562 return Err(error::Error::Parsing(format!(
563 "No starting tag for: {}",
564 event1.name
565 )))
566 }
567 }
568
569 if is_namespace_aware {
570 match namespace_list
572 .iter()
573 .rposition(|ns| ns.level <= element_level)
574 {
575 Some(pos) => {
576 if let Some(starting_pos) =
577 namespace_list.get(pos + 1).map(|ns| ns.prefix.start)
578 {
579 namespace_list.truncate(pos + 1);
580 namespace_strbuffer.truncate(starting_pos);
581 }
582 }
583 None => {
584 }
586 }
587 }
588
589 let start = strbuffer.len();
596 let size = event1.name.len();
597 strbuffer.push_str(event1.name);
598 let mut end_element = xml_sax::EndElement {
599 name: &strbuffer[start..(start + size)],
600 local_name: "",
601 prefix: "",
602 namespace: "",
603 };
604
605 element_level -= 1;
606 if element_level == 0 {
607 state = ParserState::DocEnd;
608 }
609
610 if is_namespace_aware {
611 match QName(end_element.name.as_bytes()) {
612 Ok(qres) => {
613 let qname = qres.1;
614 end_element.local_name = qname.local_name;
615 end_element.prefix = qname.prefix;
616
617 match namespace_list.iter().rfind(|ns| {
618 &namespace_strbuffer[ns.prefix.clone()] == end_element.prefix
619 }) {
620 Some(ns) => {
621 end_element.namespace = &namespace_strbuffer[ns.value.clone()]
622 }
623 None => {
624 if end_element.prefix == "" {
625 } else {
627 return Err(error::Error::Parsing(format!(
628 "Namespace prefix not found for element: {}",
629 end_element.name
630 )));
631 }
632 }
633 }
634 }
635 Err(_e) => {
636 return Err(error::Error::Parsing(format!(
637 "Element name does not conform to QName spec: {}",
638 end_element.name
639 )))
640 }
641 }
642 }
643 xml_sax::Event::EndElement(end_element)
644 }
645 ContentRelaxed::Reference(event1) => {
646 let range: Range<usize> = push_str_get_range(&mut strbuffer, event1.initial);
655
656 let raw = event1.initial;
658 let resolved_char: Option<char>;
659 if raw.starts_with("&#x") {
660 let hex_val = &raw[3..raw.len() - 1];
661
662 resolved_char = match u32::from_str_radix(&hex_val, 16) {
663 Ok(a) => match char::from_u32(a) {
664 Some(c) => Some(c),
665 None => None,
666 },
667 Err(_) => None,
668 }
669 } else if raw.starts_with("&#") {
670 let hex_val = &raw[2..raw.len() - 1];
671
672 resolved_char = match u32::from_str_radix(&hex_val, 10) {
673 Ok(a) => match char::from_u32(a) {
674 Some(c) => Some(c),
675 None => None,
676 },
677 Err(_) => None,
678 }
679 } else {
680 resolved_char = match event1.initial {
681 "&" => Some('&'),
683 "<" => Some('<'),
684 ">" => Some('>'),
685 """ => Some('"'),
686 "'" => Some('\''),
687 _ => None,
688 }
689 }
690
691 let range_resolved: Option<Range<usize>> = match resolved_char {
692 Some(ch) => {
693 let mut tmp = [0u8; 4];
694 let addition = ch.encode_utf8(&mut tmp);
695 Some(push_str_get_range(&mut strbuffer, addition))
696 }
697 None => None,
698 };
706
707 let reference_event = xml_sax::Reference {
710 raw: &strbuffer[range],
711 resolved: match range_resolved {
712 Some(range) => Some(&strbuffer[range]),
713 None => None,
714 },
715 };
716
717 xml_sax::Event::Reference(reference_event)
718 }
719 ContentRelaxed::CdataStart => xml_sax::Event::StartCdataSection,
720 ContentRelaxed::CommentStart => xml_sax::Event::StartComment,
721 },
722 InternalSuccess::InsideCdata(ic) => match ic {
723 InsideCdata::Characters(characters) => {
724 let start = strbuffer.len();
725 let size = characters.len();
726 strbuffer.push_str(unsafe { std::str::from_utf8_unchecked(characters) });
727 xml_sax::Event::Cdata(&strbuffer[start..(start + size)])
728 }
729 InsideCdata::CdataEnd => xml_sax::Event::EndCdataSection,
730 },
731 InternalSuccess::InsideComment(ic) => match ic {
732 InsideComment::Characters(characters) => {
733 let start = strbuffer.len();
734 let size = characters.len();
735 strbuffer.push_str(unsafe { std::str::from_utf8_unchecked(characters) });
736
737 xml_sax::Event::Comment(&strbuffer[start..(start + size)])
738 }
739 InsideComment::CommentEnd => xml_sax::Event::EndComment,
740 },
741 InternalSuccess::Misc(misc) => match misc {
742 Misc::PI(a) => {
743 let str = unsafe { std::str::from_utf8_unchecked(a) };
744 let range = push_str_get_range(&mut strbuffer, &str);
745 xml_sax::Event::ProcessingInstruction(&strbuffer[range])
746 }
747 Misc::Whitespace(a) => {
748 let str = unsafe { std::str::from_utf8_unchecked(a) };
749 let range = push_str_get_range(&mut strbuffer, &str);
750 xml_sax::Event::Whitespace(&strbuffer[range])
751 }
752 Misc::CommentStart => xml_sax::Event::StartComment,
753 },
754 InternalSuccess::MiscBeforeDoctype(misc) => match misc {
755 MiscBeforeDoctype::PI(a) => {
756 let str = unsafe { std::str::from_utf8_unchecked(a) };
757 let range = push_str_get_range(&mut strbuffer, &str);
758 xml_sax::Event::ProcessingInstruction(&strbuffer[range])
759 }
760 MiscBeforeDoctype::Whitespace(a) => {
761 let str = unsafe { std::str::from_utf8_unchecked(a) };
762 let range = push_str_get_range(&mut strbuffer, &str);
763 xml_sax::Event::Whitespace(&strbuffer[range])
764 }
765 MiscBeforeDoctype::CommentStart => xml_sax::Event::StartComment,
766 MiscBeforeDoctype::DocType(a) => {
767 let str = unsafe { std::str::from_utf8_unchecked(a) };
768 let range = push_str_get_range(&mut strbuffer, &str);
769 xml_sax::Event::DocumentTypeDeclaration(&strbuffer[range])
770 }
771 },
772 InternalSuccess::MiscBeforeXmlDecl(misc) => match misc {
773 MiscBeforeXmlDecl::XmlDecl(a) => {
774 let str = unsafe { std::str::from_utf8_unchecked(a) };
775 let range = push_str_get_range(&mut strbuffer, &str);
776 xml_sax::Event::XmlDeclaration(&strbuffer[range])
777 }
778 MiscBeforeXmlDecl::PI(a) => {
779 let str = unsafe { std::str::from_utf8_unchecked(a) };
780 let range = push_str_get_range(&mut strbuffer, &str);
781 xml_sax::Event::ProcessingInstruction(&strbuffer[range])
782 }
783 MiscBeforeXmlDecl::Whitespace(a) => {
784 let str = unsafe { std::str::from_utf8_unchecked(a) };
785 let range = push_str_get_range(&mut strbuffer, &str);
786 xml_sax::Event::Whitespace(&strbuffer[range])
787 }
788 MiscBeforeXmlDecl::CommentStart => xml_sax::Event::StartComment,
789 MiscBeforeXmlDecl::DocType(a) => {
790 let str = unsafe { std::str::from_utf8_unchecked(a) };
791 let range = push_str_get_range(&mut strbuffer, &str);
792 xml_sax::Event::DocumentTypeDeclaration(&strbuffer[range])
793 }
794 },
795 };
796 Ok((event, state, element_level))
797}
798
799fn read_event_splitted<'a, 'b, R: Read>(
800 mut state: ParserState,
801
802 bufreader: &BufReader<R>,
803
804 buffer3: &'b circular::Buffer,
805
806 mut offset: usize,
807 ) -> SaxResult<(InternalSuccess<'b>, ParserState, usize)> {
809 let event2: InternalSuccess;
810 match state {
811 ParserState::Initial => {
812 state = ParserState::DocStartBeforeXmlDecl;
813 return Ok((InternalSuccess::StartDocument, state, offset));
814 }
815 ParserState::DocStartBeforeXmlDecl => {
816 let res = misc_before_xmldecl(&buffer3.data());
817 match res {
818 Ok(parseresult) => {
819 offset = buffer3.data().offset(parseresult.0);
820 state = ParserState::DocStartBeforeDocType;
821
822 match parseresult.1 {
823 MiscBeforeXmlDecl::XmlDecl(_a) => {}
824 MiscBeforeXmlDecl::PI(_a) => {}
825 MiscBeforeXmlDecl::Whitespace(_a) => {}
826 MiscBeforeXmlDecl::CommentStart => {
827 state = ParserState::DocStartBeforeDocTypeInsideComment;
828 }
829 MiscBeforeXmlDecl::DocType(_a) => {
830 state = ParserState::DocStart;
831 }
832 }
833 event2 = InternalSuccess::MiscBeforeXmlDecl(parseresult.1);
834 }
835 Err(nom::Err::Incomplete(_e)) => {
836 return Err(error::Error::UnexpectedEof);
837 }
838 Err(_err) => {
839 state = ParserState::Content;
841 return read_event_splitted(state, bufreader, buffer3, offset);
842 }
843 }
844 }
845 ParserState::DocStartBeforeDocType => {
846 let res = misc_before_doctype(&buffer3.data());
847 match res {
848 Ok(parseresult) => {
849 offset = buffer3.data().offset(parseresult.0);
850
851 match parseresult.1 {
852 MiscBeforeDoctype::PI(_a) => {}
853 MiscBeforeDoctype::Whitespace(_a) => {}
854 MiscBeforeDoctype::CommentStart => {
855 state = ParserState::DocStartBeforeDocTypeInsideComment;
856 }
857 MiscBeforeDoctype::DocType(_a) => {
858 state = ParserState::DocStart;
859 }
860 }
861 event2 = InternalSuccess::MiscBeforeDoctype(parseresult.1);
862 }
863 Err(nom::Err::Incomplete(_e)) => {
864 return Err(error::Error::UnexpectedEof);
865 }
866 Err(_err) => {
867 state = ParserState::Content;
869 return read_event_splitted(state, bufreader, buffer3, offset);
870 }
871 }
872 }
873 ParserState::DocStartBeforeDocTypeInsideComment => {
874 let res = insidecomment(&buffer3.data());
876 match res {
877 Ok(parseresult) => {
878 offset = buffer3.data().offset(parseresult.0);
879
880 match parseresult.1 {
881 InsideComment::Characters(_characters) => {}
882 InsideComment::CommentEnd => {
883 state = ParserState::DocStartBeforeDocType;
884 }
885 }
886 event2 = InternalSuccess::InsideComment(parseresult.1);
887 }
888 Err(nom::Err::Incomplete(_e)) => {
889 return Err(error::Error::UnexpectedEof);
890 }
891 Err(_err) => {
892 return Err(error::Error::Parsing(
893 "Expected Comment content or Comment end".to_owned(),
894 ))
895 }
896 }
897 }
898 ParserState::DocStart => {
899 let res = misc(&buffer3.data());
900 match res {
901 Ok(parseresult) => {
902 offset = buffer3.data().offset(parseresult.0);
903 match parseresult.1 {
906 Misc::PI(_a) => {}
907 Misc::Whitespace(_a) => {}
908 Misc::CommentStart => {
909 state = ParserState::DocStartInsideComment;
910 }
911 }
912 event2 = InternalSuccess::Misc(parseresult.1);
913 }
914
915 Err(nom::Err::Incomplete(_e)) => {
916 return Err(error::Error::UnexpectedEof);
917 }
918 Err(_err) => {
919 state = ParserState::Content;
921 return read_event_splitted(state, bufreader, buffer3, offset);
922 }
923 }
924 }
925 ParserState::DocStartInsideComment => {
926 let res = insidecomment(&buffer3.data());
928 match res {
929 Ok(parseresult) => {
930 offset = buffer3.data().offset(parseresult.0);
931
932 match parseresult.1 {
933 InsideComment::Characters(_characters) => {}
934 InsideComment::CommentEnd => {
935 state = ParserState::DocStart;
936 }
937 }
938 event2 = InternalSuccess::InsideComment(parseresult.1);
939 }
940 Err(nom::Err::Incomplete(_e)) => {
941 return Err(error::Error::UnexpectedEof);
942 }
943 Err(_err) => {
944 return Err(error::Error::Parsing(format!(
945 "Expecting comment content or comment closing tag "
946 )))
947 }
948 }
949 }
950 ParserState::Content => {
951 let res = content_relaxed(&buffer3.data());
952 match res {
953 Ok(parseresult) => {
954 offset = buffer3.data().offset(parseresult.0);
955
956 match &parseresult.1 {
957 ContentRelaxed::CharData(_event1) => {}
958 ContentRelaxed::StartElement(_event1) => {}
959 ContentRelaxed::EmptyElemTag(_event1) => {}
960 ContentRelaxed::EndElement(_event1) => {}
961 ContentRelaxed::Reference(_event1) => {}
962 ContentRelaxed::CdataStart => {
963 state = ParserState::InsideCdata;
964 }
965 ContentRelaxed::CommentStart => {
966 state = ParserState::InsideComment;
967 }
968 }
969 event2 = InternalSuccess::ContentRelaxed(parseresult.1);
970 }
971 Err(nom::Err::Incomplete(_e)) => {
973 return Err(error::Error::UnexpectedEof);
974 }
975 Err(_e) => {
976 let ending = String::from_utf8_lossy(&buffer3.data());
977 let ending_truncated = match ending.char_indices().nth(50) {
978 None => &ending,
979 Some((idx, _)) => &ending[..idx],
980 };
981
982 return Err(error::Error::Parsing(format!(
983 "Expected one of (CharData | element | Reference | CDSect | PI | Comment), found: {}",
984 ending_truncated
985 )));
986 }
987 }
988 }
989
990 ParserState::InsideCdata => {
991 let res = insidecdata(&buffer3.data());
993 match res {
994 Ok(parseresult) => {
995 offset = buffer3.data().offset(parseresult.0);
996
997 match parseresult.1 {
998 InsideCdata::Characters(_characters) => {}
999 InsideCdata::CdataEnd => {
1000 state = ParserState::Content;
1001 }
1002 }
1003 event2 = InternalSuccess::InsideCdata(parseresult.1);
1004 }
1005 Err(nom::Err::Incomplete(_e)) => {
1006 return Err(error::Error::UnexpectedEof);
1007 }
1008 Err(_err) => {
1009 return Err(error::Error::Parsing(format!(
1010 "Expecting CDATA content or CDATA closing tag "
1011 )))
1012 }
1013 }
1014 }
1015 ParserState::InsideComment => {
1016 let res = insidecomment(&buffer3.data());
1018 match res {
1019 Ok(parseresult) => {
1020 offset = buffer3.data().offset(parseresult.0);
1021
1022 match parseresult.1 {
1023 InsideComment::Characters(_characters) => {}
1024 InsideComment::CommentEnd => {
1025 state = ParserState::Content;
1026 }
1027 }
1028 event2 = InternalSuccess::InsideComment(parseresult.1);
1029 }
1030 Err(nom::Err::Incomplete(_e)) => {
1031 return Err(error::Error::UnexpectedEof);
1032 }
1033 Err(_err) => {
1034 return Err(error::Error::Parsing(format!(
1035 "Expecting comment content or comment closing tag "
1036 )))
1037 }
1038 }
1039 }
1040 ParserState::DocEnd => {
1041 if buffer3.data().len() == 0 {
1043 return Ok((InternalSuccess::EndDocument, state, offset));
1045 }
1046
1047 let res = misc(&buffer3.data());
1048 match res {
1049 Ok(parseresult) => {
1050 offset = buffer3.data().offset(parseresult.0);
1051
1052 match parseresult.1 {
1053 Misc::PI(_a) => {}
1054 Misc::Whitespace(_a) => {}
1055 Misc::CommentStart => {
1056 state = ParserState::DocEndInsideComment;
1057 }
1058 }
1059 event2 = InternalSuccess::Misc(parseresult.1);
1060 }
1061 Err(nom::Err::Incomplete(_e)) => {
1062 return Err(error::Error::UnexpectedEof);
1063 }
1064 Err(_err) => {
1065 return Err(error::Error::Parsing(format!(
1066 "Unexpected entity/content at the end of the document."
1067 )))
1068 }
1069 }
1070 }
1071 ParserState::DocEndInsideComment => {
1072 let res = insidecomment(&buffer3.data());
1074 match res {
1075 Ok(parseresult) => {
1076 offset = buffer3.data().offset(parseresult.0);
1077
1078 match parseresult.1 {
1079 InsideComment::Characters(_characters) => {}
1080 InsideComment::CommentEnd => {
1081 state = ParserState::DocEnd;
1082 }
1083 }
1084 event2 = InternalSuccess::InsideComment(parseresult.1);
1085 }
1086 Err(nom::Err::Incomplete(_e)) => {
1087 return Err(error::Error::UnexpectedEof);
1088 }
1089 Err(_err) => {
1090 return Err(error::Error::Parsing(format!(
1091 "Expecting comment content or comment closing tag "
1092 )))
1093 }
1094 }
1095 }
1096 }
1097
1098 Ok((event2, state, offset))
1099}
1100
1101impl<R: Read> Parser<R> {
1102 pub fn from_reader(reader: R) -> Parser<R> {
1103 Parser {
1104 state: ParserState::Initial,
1105 bufreader: BufReader::with_capacity(8 * 1024, reader),
1106 offset: 0,
1107
1108 buffer3: circular::Buffer::with_capacity(16 * 1024),
1109 strbuffer: String::new(),
1110
1111 element_level: 0, element_list: Vec::with_capacity(10),
1113 element_strbuffer: String::new(),
1114
1115 is_namespace_aware: true,
1116 namespace_list: Vec::with_capacity(10),
1117 namespace_strbuffer: String::new(),
1118
1119 attribute_list: Vec::with_capacity(5),
1120 }
1121 }
1122
1123 fn read_data(&mut self) -> Result<usize, std::io::Error> {
1124 let newread: usize;
1125 match self.bufreader.fill_buf() {
1126 Ok(ok) => {
1127 newread = ok.len();
1128 }
1129 Err(err) => return Err(err),
1130 }
1131
1132 let amt: usize;
1133 {
1134 let data2 = self.bufreader.buffer();
1135 let data_len = data2.len();
1136 self.buffer3.shift();
1139 if data_len > self.buffer3.available_space() {
1140 let new_size = std::cmp::max(
1141 self.buffer3.position() + data_len,
1142 self.buffer3.capacity() * 2,
1143 );
1144
1145 self.buffer3.grow(new_size);
1146 }
1147
1148 self.buffer3.write_all(data2).unwrap();
1151 amt = data2.len();
1153 }
1154 self.bufreader.consume(amt);
1155
1156 Ok(newread)
1157 }
1158
1159 pub fn read_event<'a>(&'a mut self) -> SaxResult<xml_sax::Event<'a>> {
1162 self.buffer3.consume(self.offset);
1163 self.offset = 0;
1165 self.strbuffer.clear();
1175 let mut bytes_read: usize = 1; if self.buffer3.available_space() > self.bufreader.capacity() {
1182 bytes_read = self.read_data()?;
1183 }
1184
1185 let mut read_more_data = false;
1186 loop {
1187 if read_more_data {
1188 bytes_read = self.read_data()?;
1190 read_more_data = false;
1191 } else {
1192 let res =
1193 read_event_splitted(self.state, &self.bufreader, &self.buffer3, self.offset);
1194 match res {
1195 Ok(o) => {
1196 self.state = o.1;
1197 self.offset = o.2;
1198
1199 let event = event_converter(
1202 self.state,
1203 o.0,
1204 &self.buffer3,
1205 &mut self.element_list,
1206 &mut self.strbuffer,
1207 &mut self.namespace_strbuffer,
1208 &mut self.namespace_list,
1209 self.is_namespace_aware,
1210 self.element_level,
1211 &mut self.element_strbuffer,
1212 &mut self.attribute_list,
1213 );
1214 match event {
1215 Ok(tpl) => {
1216 self.state = tpl.1;
1217 self.element_level = tpl.2;
1218
1219 return Ok(tpl.0);
1220 }
1221 Err(err) => return Err(err),
1222 };
1223 }
1224 Err(error::Error::UnexpectedEof) => {
1225 if bytes_read == 0 {
1228 return Err(error::Error::UnexpectedEof);
1229 } else {
1230 read_more_data = true;
1231 }
1232 }
1233 Err(err) => {
1234 return Err(err);
1236 }
1237 }
1238 }
1239 }
1240 }
1241}
1242
1243#[test]
1244fn test_parser1() {
1245 let data = r#"<root><A a='x'>
1246 <B b="val" a:b12='val2' ><C/></B> </A> </root>"#
1247 .as_bytes();
1248
1249 let mut p = Parser::from_reader(data);
1251 loop {
1252 let res = p.read_event();
1253 println!("{:?}", res);
1254 match res {
1255 Ok(event) => match event {
1256 xml_sax::Event::StartDocument => {}
1257 xml_sax::Event::EndDocument => {
1258 break;
1259 }
1260 xml_sax::Event::StartElement(_el) => {}
1261 xml_sax::Event::EndElement(_) => {}
1262 xml_sax::Event::Characters(_c) => {}
1263 xml_sax::Event::Reference(_c) => {}
1264 _ => {}
1265 },
1266
1267 Err(_err) => {
1268 break;
1269 }
1270 }
1271 }
1272}