1use std::{
2 borrow::Cow,
3 cell::RefCell,
4 collections::HashMap,
5 ffi::c_void,
6 io::Read,
7 mem::take,
8 ptr::null_mut,
9 rc::Rc,
10 str::{from_utf8, from_utf8_unchecked},
11 sync::atomic::AtomicPtr,
12};
13
14use crate::{
15 chvalid::XmlCharValid,
16 encoding::{
17 XmlCharEncoding, XmlCharEncodingHandler, detect_encoding, find_encoding_handler,
18 get_encoding_handler,
19 },
20 error::{XmlError, XmlParserErrors, parser_validity_error, parser_validity_warning},
21 generic_error,
22 globals::{
23 GenericErrorContext, get_do_validity_checking_default_value,
24 get_get_warnings_default_value, get_keep_blanks_default_value,
25 get_line_numbers_default_value, get_load_ext_dtd_default_value, get_parser_debug_entities,
26 get_pedantic_parser_default_value, get_substitute_entities_default_value,
27 },
28 io::{XmlParserInputBuffer, xml_parser_get_directory},
29 libxml::{
30 catalog::XmlCatalogEntry,
31 sax2::{
32 xml_sax_version, xml_sax2_end_element, xml_sax2_ignorable_whitespace,
33 xml_sax2_start_element,
34 },
35 },
36 parser::{
37 __xml_err_encoding, INPUT_CHUNK, XML_COMPLETE_ATTRS, XML_DETECT_IDS, XML_MAX_LOOKUP_LIMIT,
38 XML_PARSER_MAX_DEPTH, XML_VCTXT_USE_PCTXT, XmlParserInputState, XmlSAXHandler, XmlStartTag,
39 xml_err_encoding_int, xml_err_internal, xml_fatal_err_msg_int, xml_fatal_err_msg_str,
40 xml_init_parser,
41 },
42 tree::{
43 XML_ENT_EXPANDING, XML_ENT_PARSED, XML_XML_NAMESPACE, XmlAttrPtr, XmlAttributeType,
44 XmlDocPtr, XmlEntityType, XmlNodePtr, xml_free_doc,
45 },
46 uri::{build_uri, canonic_path},
47 valid::XmlValidCtxt,
48};
49
50use super::{
51 XmlParserInput, XmlParserNodeInfo, XmlParserNodeInfoSeq, xml_err_memory, xml_fatal_err,
52 xml_load_external_entity,
53};
54
55pub const XML_SAX2_MAGIC: usize = 0xDEEDBEAF;
57
58#[doc(alias = "xmlParserOption")]
61#[repr(C)]
62pub enum XmlParserOption {
63 XmlParseRecover = 1 << 0, XmlParseNoEnt = 1 << 1, XmlParseDTDLoad = 1 << 2, XmlParseDTDAttr = 1 << 3, XmlParseDTDValid = 1 << 4, XmlParseNoError = 1 << 5, XmlParseNoWarning = 1 << 6, XmlParsePedantic = 1 << 7, XmlParseNoBlanks = 1 << 8, XmlParseSAX1 = 1 << 9, XmlParseXInclude = 1 << 10, XmlParseNoNet = 1 << 11, XmlParseNoDict = 1 << 12, XmlParseNsClean = 1 << 13, XmlParseNoCDATA = 1 << 14, XmlParseNoXIncnode = 1 << 15, XmlParseCompact = 1 << 16, XmlParseOld10 = 1 << 17, XmlParseNoBasefix = 1 << 18, XmlParseHuge = 1 << 19, XmlParseOldSAX = 1 << 20, XmlParseIgnoreEnc = 1 << 21, XmlParseBigLines = 1 << 22, }
89
90#[doc(alias = "xmlParserMode")]
92#[repr(C)]
93#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
94pub enum XmlParserMode {
95 #[default]
96 XmlParseUnknown = 0,
97 XmlParseDOM = 1,
98 XmlParseSAX = 2,
99 XmlParsePushDOM = 3,
100 XmlParsePushSAX = 4,
101 XmlParseReader = 5,
102}
103
104pub type XmlParserCtxtPtr<'a> = *mut XmlParserCtxt<'a>;
105#[doc(alias = "xmlParserCtxt")]
116pub struct XmlParserCtxt<'a> {
117 pub sax: Option<Box<XmlSAXHandler>>,
119 pub user_data: Option<GenericErrorContext>,
121 pub my_doc: Option<XmlDocPtr>,
123 pub well_formed: bool,
125 pub(crate) replace_entities: bool,
127 pub(crate) version: Option<String>,
129 pub encoding: Option<String>,
131 pub(crate) standalone: i32,
133 pub(crate) html: i32,
137
138 pub input_tab: Vec<XmlParserInput<'a>>,
141
142 pub(crate) node: Option<XmlNodePtr>,
145 pub(crate) node_tab: Vec<XmlNodePtr>,
147
148 pub(crate) record_info: bool,
150 pub(crate) node_seq: XmlParserNodeInfoSeq,
152
153 pub err_no: i32,
155
156 pub(crate) has_external_subset: bool,
158 pub(crate) has_perefs: bool,
160 pub(crate) external: i32,
162
163 pub valid: i32,
165 pub(crate) validate: bool,
167 pub vctxt: XmlValidCtxt,
169
170 pub instate: XmlParserInputState,
172 pub(crate) token: i32,
174
175 pub(crate) directory: Option<String>,
177
178 pub(crate) name: Option<Rc<str>>,
181 pub(crate) name_tab: Vec<Rc<str>>,
183
184 pub(crate) check_index: usize,
186 pub(crate) keep_blanks: bool,
188 pub(crate) disable_sax: bool,
190 pub in_subset: i32,
192 pub(crate) int_sub_name: Option<Rc<str>>,
194 pub(crate) ext_sub_uri: Option<Rc<str>>,
196 pub(crate) ext_sub_system: Option<Rc<str>>,
198
199 pub(crate) space_tab: Vec<i32>,
202
203 pub(crate) depth: i32,
205 pub charset: XmlCharEncoding,
208
209 pub(crate) pedantic: bool,
211 pub(crate) _private: *mut c_void,
213
214 pub(crate) loadsubset: i32,
216 pub(crate) linenumbers: i32,
218 #[cfg(feature = "catalog")]
220 pub(crate) catalogs: Option<XmlCatalogEntry>,
221 pub(crate) recovery: bool,
223 pub(crate) progressive: bool,
225 pub(crate) atts: Vec<(String, Option<String>)>,
227
228 pub(crate) str_xml: Option<Cow<'static, str>>,
230 pub(crate) str_xmlns: Option<Cow<'static, str>>,
231 pub(crate) str_xml_ns: Option<Cow<'static, str>>,
232
233 pub(crate) sax2: bool,
236 pub(crate) ns_tab: Vec<(Option<String>, String)>,
238 pub(crate) push_tab: Vec<XmlStartTag>,
240 #[allow(clippy::type_complexity)]
244 pub(crate) atts_default: HashMap<
245 (Cow<'static, str>, Option<Cow<'static, str>>),
246 Vec<(String, Option<String>, String, Option<&'static str>)>,
247 >,
248 pub(crate) atts_special: HashMap<(Cow<'static, str>, Cow<'static, str>), XmlAttributeType>,
250 pub(crate) ns_well_formed: bool,
252 pub(crate) options: i32,
254
255 pub(crate) free_elems_nr: i32,
258 pub(crate) free_elems: Option<XmlNodePtr>,
260 pub(crate) free_attrs_nr: i32,
262 pub(crate) free_attrs: Option<XmlAttrPtr>,
264
265 pub last_error: XmlError,
267 pub(crate) parse_mode: XmlParserMode,
269 pub sizeentities: u64,
271
272 pub(crate) node_info_tab: Vec<Rc<RefCell<XmlParserNodeInfo>>>,
275
276 pub(crate) input_id: i32,
278 pub sizeentcopy: u64,
280
281 pub(crate) end_check_state: i32,
283 pub nb_errors: u16,
285 pub(crate) nb_warnings: u16,
287}
288
289impl<'a> XmlParserCtxt<'a> {
290 #[doc(alias = "xmlNewParserCtxt")]
294 pub fn new() -> Option<Self> {
295 Self::new_sax_parser(None, None).ok()
296 }
297
298 #[doc(alias = "xmlNewSAXParserCtxt")]
303 pub fn new_sax_parser(
304 sax: Option<Box<XmlSAXHandler>>,
305 user_data: Option<GenericErrorContext>,
306 ) -> Result<Self, Option<Box<XmlSAXHandler>>> {
307 let mut ctxt = XmlParserCtxt::default();
308 ctxt.init_sax_parser(sax, user_data).map(|_| ctxt)
309 }
310
311 #[doc(alias = "xmlCreateFileParserCtxt")]
319 pub fn from_filename(filename: Option<&str>) -> Option<XmlParserCtxt> {
320 Self::from_filename_with_options(filename, 0)
321 }
322
323 #[doc(alias = "xmlCreateURLParserCtxt")]
331 pub fn from_filename_with_options(
332 filename: Option<&str>,
333 options: i32,
334 ) -> Option<XmlParserCtxt> {
335 let Some(mut ctxt) = XmlParserCtxt::new() else {
336 xml_err_memory(None, Some("cannot allocate parser context"));
337 return None;
338 };
339
340 if options != 0 {
341 ctxt.use_options_internal(options, None);
342 }
343 ctxt.linenumbers = 1;
344
345 let input_stream = xml_load_external_entity(filename, None, &mut ctxt)?;
346
347 ctxt.input_push(input_stream);
348 if ctxt.directory.is_none() {
349 if let Some(filename) = filename {
350 if let Some(directory) = xml_parser_get_directory(filename) {
351 ctxt.directory = Some(directory.to_string_lossy().into_owned());
352 }
353 }
354 }
355
356 Some(ctxt)
357 }
358
359 #[doc(alias = "xmlCreateIOParserCtxt")]
363 pub fn from_io(
364 sax: Option<Box<XmlSAXHandler>>,
365 user_data: Option<GenericErrorContext>,
366 ioctx: impl Read + 'a,
367 enc: XmlCharEncoding,
368 ) -> Option<Self> {
369 let buf = XmlParserInputBuffer::from_reader(ioctx, enc);
370 let mut ctxt = XmlParserCtxt::new_sax_parser(sax, user_data).ok()?;
371
372 let input_stream = XmlParserInput::from_io(&mut ctxt, buf, enc)?;
373 ctxt.input_push(input_stream);
374 Some(ctxt)
375 }
376
377 #[doc(alias = "xmlCreateMemoryParserCtxt", alias = "xmlCreateDocParserCtxt")]
381 pub fn from_memory(buffer: &'a [u8]) -> Option<Self> {
382 if buffer.is_empty() {
383 return None;
384 }
385
386 let mut ctxt = XmlParserCtxt::new()?;
387
388 let buf = XmlParserInputBuffer::from_memory(buffer, XmlCharEncoding::None)?;
389 let mut input = XmlParserInput::new(Some(&mut ctxt))?;
390 input.filename = None;
391 input.buf = Some(buf);
392 input.reset_base();
393
394 ctxt.input_push(input);
395 Some(ctxt)
396 }
397
398 #[doc(alias = "xmlCreateEntityParserCtxtInternal")]
408 pub(crate) fn new_entity_parser_internal(
409 sax: Option<Box<XmlSAXHandler>>,
410 user_data: Option<GenericErrorContext>,
411 mut url: Option<&str>,
412 id: Option<&str>,
413 base: Option<&str>,
414 pctxt: Option<&XmlParserCtxt>,
415 ) -> Result<Self, Option<Box<XmlSAXHandler>>> {
416 let mut ctxt = XmlParserCtxt::new_sax_parser(sax, user_data)?;
417
418 if let Some(pctxt) = pctxt {
419 ctxt.options = pctxt.options;
420 ctxt._private = pctxt._private;
421 ctxt.input_id = pctxt.input_id;
422 }
423
424 if url == Some("-") {
426 url = Some("./-");
427 }
428
429 if let Some(uri) = url.zip(base).and_then(|(url, base)| build_uri(url, base)) {
430 let Some(input_stream) = xml_load_external_entity(Some(&uri), id, &mut ctxt) else {
431 let sax = ctxt.sax.take();
432 return Err(sax);
433 };
434 ctxt.input_push(input_stream);
435
436 if ctxt.directory.is_none() {
437 if let Some(url) = url {
438 if let Some(directory) = xml_parser_get_directory(url) {
439 ctxt.directory = Some(directory.to_string_lossy().into_owned());
440 }
441 }
442 }
443 } else {
444 let Some(input_stream) = xml_load_external_entity(url, id, &mut ctxt) else {
445 let sax = ctxt.sax.take();
446 return Err(sax);
447 };
448 ctxt.input_push(input_stream);
449
450 if ctxt.directory.is_none() {
451 if let Some(url) = url {
452 if let Some(directory) = xml_parser_get_directory(url) {
453 ctxt.directory = Some(directory.to_string_lossy().into_owned());
454 }
455 }
456 }
457 }
458 Ok(ctxt)
459 }
460
461 #[doc(alias = "xmlCreateEntityParserCtxt")]
469 pub fn new_entity_parser(
470 url: Option<&str>,
471 id: Option<&str>,
472 base: Option<&str>,
473 ) -> Result<Self, Option<Box<XmlSAXHandler>>> {
474 Self::new_entity_parser_internal(None, None, url, id, base, None)
475 }
476
477 #[doc(alias = "xmlInitSAXParserCtxt")]
481 fn init_sax_parser(
482 &mut self,
483 sax: Option<Box<XmlSAXHandler>>,
484 user_data: Option<GenericErrorContext>,
485 ) -> Result<(), Option<Box<XmlSAXHandler>>> {
486 xml_init_parser();
487
488 if let Some(mut sax) = sax {
489 if sax.initialized != XML_SAX2_MAGIC as u32 {
490 sax._private = AtomicPtr::new(null_mut());
492 sax.start_element_ns = None;
493 sax.end_element_ns = None;
494 sax.serror = None;
495 }
496 self.sax = Some(sax);
497 self.user_data = user_data;
498 } else {
499 let mut sax = XmlSAXHandler::default();
500 xml_sax_version(&mut sax, 2);
501 self.sax = Some(Box::new(sax));
502 self.user_data = None;
503 }
504
505 self.atts = vec![];
506 self.input_tab.clear();
508 self.version = None;
509 self.encoding = None;
510 self.standalone = -1;
511 self.has_external_subset = false;
512 self.has_perefs = false;
513 self.html = 0;
514 self.external = 0;
515 self.instate = XmlParserInputState::XmlParserStart;
516 self.token = 0;
517 self.directory = None;
518
519 self.node_tab.clear();
521 self.node = None;
522
523 self.name_tab.clear();
525 self.name = None;
526
527 self.space_tab.clear();
529 self.space_tab.push(-1);
530
531 self.my_doc = None;
532 self.well_formed = true;
533 self.ns_well_formed = true;
534 self.valid = 1;
535 self.loadsubset = get_load_ext_dtd_default_value();
536 if self.loadsubset != 0 {
537 self.options |= XmlParserOption::XmlParseDTDLoad as i32;
538 }
539 self.validate = get_do_validity_checking_default_value();
540 self.pedantic = get_pedantic_parser_default_value();
541 if self.pedantic {
542 self.options |= XmlParserOption::XmlParsePedantic as i32;
543 }
544 self.linenumbers = get_line_numbers_default_value();
545 self.keep_blanks = get_keep_blanks_default_value();
546 if !self.keep_blanks {
547 if let Some(sax) = self.sax.as_deref_mut() {
548 sax.ignorable_whitespace = Some(xml_sax2_ignorable_whitespace);
549 }
550 self.options |= XmlParserOption::XmlParseNoBlanks as i32;
551 }
552
553 self.vctxt.flags = XML_VCTXT_USE_PCTXT as _;
554 self.vctxt.user_data = None;
555 self.vctxt.error = Some(parser_validity_error);
556 self.vctxt.warning = Some(parser_validity_warning);
557 if self.validate {
558 if get_get_warnings_default_value() == 0 {
559 self.vctxt.warning = None;
560 } else {
561 self.vctxt.warning = Some(parser_validity_warning);
562 }
563 self.vctxt.node_tab.clear();
564 self.options |= XmlParserOption::XmlParseDTDValid as i32;
565 }
566 self.replace_entities = get_substitute_entities_default_value();
567 if self.replace_entities {
568 self.options |= XmlParserOption::XmlParseNoEnt as i32;
569 }
570 self.record_info = false;
571 self.check_index = 0;
572 self.in_subset = 0;
573 self.err_no = XmlParserErrors::XmlErrOK as i32;
574 self.depth = 0;
575 self.charset = XmlCharEncoding::UTF8;
576 #[cfg(feature = "catalog")]
577 {
578 self.catalogs = None;
579 }
580 self.sizeentities = 0;
581 self.sizeentcopy = 0;
582 self.input_id = 1;
583 self.node_seq.clear();
584 Ok(())
585 }
586
587 pub fn encoding(&self) -> Option<&str> {
588 self.encoding.as_deref()
589 }
590
591 pub(crate) fn current_byte(&self) -> u8 {
592 *self.content_bytes().first().unwrap_or(&0)
593 }
594
595 pub(crate) fn nth_byte(&self, nth: usize) -> u8 {
596 *self.content_bytes().get(nth).unwrap_or(&0)
597 }
598
599 pub fn input(&self) -> Option<&XmlParserInput> {
600 self.input_tab.last()
601 }
602
603 pub fn input_mut(&mut self) -> Option<&mut XmlParserInput<'a>> {
604 self.input_tab.last_mut()
605 }
606
607 #[doc(alias = "xmlByteConsumed")]
617 pub fn byte_consumed(&mut self) -> i64 {
618 let Some(input) = self.input() else {
619 return -1;
620 };
621 if input.buf.is_some() && input.buf.as_ref().unwrap().encoder.is_some() {
622 let mut unused = 0;
623 if input.remainder_len() > 0 {
627 let mut out = [0u8; 32000];
632 let Ok(input) = from_utf8(self.content_bytes()) else {
633 return -1;
634 };
635 let input = input.to_owned();
636 let handler = self
637 .input_mut()
638 .unwrap()
639 .buf
640 .as_mut()
641 .unwrap()
642 .encoder
643 .as_mut()
644 .unwrap();
645 let mut read = 0;
646 while read < input.len() {
647 let Ok((r, w)) = handler.encode(&input[read..], &mut out) else {
648 return -1;
649 };
650 unused += w;
651 read += r;
652 }
653 }
654 let input = self.input().unwrap();
655 if input.buf.as_ref().unwrap().rawconsumed < unused as u64 {
656 return -1;
657 }
658 return (input.buf.as_ref().unwrap().rawconsumed - unused as u64) as i64;
659 }
660 input.consumed as i64 + input.offset_from_base() as i64
661 }
662
663 #[doc(alias = "xmlParserGrow")]
664 pub(crate) fn force_grow(&mut self) -> i32 {
665 if self.progressive {
667 return 0;
668 }
669
670 let input = self.input_mut().unwrap();
671 let cur_end = input.remainder_len();
672 let cur_base = input.offset_from_base();
673
674 let Some(buf) = input.buf.as_mut() else {
675 return 0;
676 };
677 if buf.encoder.is_none() && buf.context.is_none() {
679 return 0;
680 }
681
682 if (cur_end > XML_MAX_LOOKUP_LIMIT || cur_base > XML_MAX_LOOKUP_LIMIT)
683 && self.options & XmlParserOption::XmlParseHuge as i32 == 0
684 {
685 xml_err_internal!(self, "Huge input lookup");
686 self.halt();
687 return -1;
688 }
689
690 if cur_end >= INPUT_CHUNK {
691 return 0;
692 }
693
694 let input = self.input_mut().unwrap();
695 let ret: i32 = input.buf.as_mut().unwrap().grow(INPUT_CHUNK);
696
697 if ret < 0 {
699 xml_err_internal!(self, "Growing input buffer");
700 self.halt();
701 }
702
703 ret
704 }
705
706 pub(crate) fn grow(&mut self) {
707 if !self.progressive && self.input().unwrap().remainder_len() < INPUT_CHUNK {
708 self.force_grow();
709 }
710 }
711
712 #[doc(alias = "xmlParserShrink")]
713 pub(crate) fn force_shrink(&mut self) {
714 let progressive = self.progressive;
715
716 let input = self.input_mut().unwrap();
717
718 let Some(buf) = input.buf.as_mut() else {
720 return;
721 };
722 if !progressive && buf.encoder.is_none() && buf.context.is_none() {
723 return;
724 }
725
726 input.shrink();
728 }
729
730 pub(crate) fn shrink(&mut self) {
731 if !self.progressive
732 && self.input().unwrap().offset_from_base() > 2 * INPUT_CHUNK
733 && self.input().unwrap().remainder_len() < 2 * INPUT_CHUNK
734 {
735 self.force_shrink();
736 }
737 }
738
739 #[doc(alias = "xmlHaltParser")]
741 pub(crate) fn halt(&mut self) {
742 self.instate = XmlParserInputState::XmlParserEOF;
743 self.disable_sax = true;
744 while self.input_tab.len() > 1 {
745 self.input_pop();
746 }
747 if let Some(input) = self.input_mut() {
748 if input.buf.is_some() {
750 let _ = input.buf.take();
751 }
752 input.cur = 0;
753 input.length = 0;
754 input.base = 0;
755 }
756 }
757
758 #[doc(alias = "xmlStopParser")]
760 pub fn stop(&mut self) {
761 self.halt();
762 self.err_no = XmlParserErrors::XmlErrUserStop as i32;
763 }
764
765 #[doc(alias = "xmlCtxtReset")]
767 pub fn reset(&mut self) {
768 self.input_tab.clear();
769 self.space_tab.clear();
770 self.node_tab.clear();
771 self.node = None;
772 self.name_tab.clear();
773 self.name = None;
774 self.ns_tab.clear();
775 self.version = None;
776 self.encoding = None;
777 self.directory = None;
778 self.ext_sub_uri = None;
779 self.ext_sub_system = None;
780 if let Some(doc) = self.my_doc.take() {
781 unsafe {
782 xml_free_doc(doc);
783 }
784 }
785
786 self.standalone = -1;
787 self.has_external_subset = false;
788 self.has_perefs = false;
789 self.html = 0;
790 self.external = 0;
791 self.instate = XmlParserInputState::XmlParserStart;
792 self.token = 0;
793 self.well_formed = true;
794 self.ns_well_formed = true;
795 self.disable_sax = false;
796 self.valid = 1;
797 self.record_info = false;
798 self.check_index = 0;
799 self.end_check_state = 0;
800 self.in_subset = 0;
801 self.err_no = XmlParserErrors::XmlErrOK as i32;
802 self.depth = 0;
803 self.charset = XmlCharEncoding::UTF8;
804 #[cfg(feature = "catalog")]
805 {
806 self.catalogs = None;
807 }
808 self.sizeentities = 0;
809 self.sizeentcopy = 0;
810 self.node_seq.clear();
811 self.atts_default.clear();
812 self.atts_special.clear();
813
814 #[cfg(feature = "catalog")]
815 {
816 self.catalogs = None;
817 }
818 self.nb_errors = 0;
819 self.nb_warnings = 0;
820 if self.last_error.is_err() {
821 self.last_error.reset();
822 }
823 }
824
825 #[doc(alias = "xmlCtxtResetPush")]
829 pub fn reset_push(
830 &mut self,
831 chunk: &[u8],
832 filename: Option<&str>,
833 encoding: Option<&str>,
834 ) -> i32 {
835 let enc = if encoding.is_none() && chunk.len() >= 4 {
836 detect_encoding(chunk)
837 } else {
838 XmlCharEncoding::None
839 };
840
841 let buf = XmlParserInputBuffer::new(enc);
842
843 self.reset();
844
845 if filename.is_none() {
846 self.directory = None;
847 } else if let Some(dir) = filename.and_then(xml_parser_get_directory) {
848 self.directory = Some(dir.to_string_lossy().into_owned());
849 }
850
851 let Some(mut input_stream) = XmlParserInput::new(Some(self)) else {
852 return 1;
853 };
854
855 input_stream.filename = filename
856 .map(canonic_path)
857 .map(|filanem| filanem.into_owned());
858 input_stream.buf = Some(buf);
859 input_stream.reset_base();
860
861 self.input_push(input_stream);
862
863 if !chunk.is_empty() && self.input().is_some() && self.input().unwrap().buf.is_some() {
864 self.input_mut()
865 .unwrap()
866 .buf
867 .as_mut()
868 .unwrap()
869 .push_bytes(chunk);
870 }
871
872 if let Some(encoding) = encoding {
873 self.encoding = Some(encoding.to_owned());
874 if let Some(handler) = find_encoding_handler(self.encoding().unwrap()) {
875 self.switch_to_encoding(handler);
876 } else {
877 xml_fatal_err_msg_str!(
878 self,
879 XmlParserErrors::XmlErrUnsupportedEncoding,
880 "Unsupported encoding {}\n",
881 encoding
882 );
883 };
884 } else if !matches!(enc, XmlCharEncoding::None) {
885 self.switch_encoding(enc);
886 }
887
888 0
889 }
890
891 #[doc(alias = "xmlClearParserCtxt")]
893 pub fn clear(&mut self) {
894 self.node_seq.clear();
895 self.reset();
896 }
897
898 pub(crate) fn advance(&mut self, nth: usize) {
899 if self.content_bytes().len() < nth {
900 self.force_grow();
901 }
902 let input = self.input_mut().unwrap();
903 input.cur += nth;
904 input.col += nth as i32;
905 }
906
907 pub(crate) fn advance_with_line_handling(&mut self, nth: usize) {
910 if self.content_bytes().len() < nth {
911 self.force_grow();
912 }
913 let input = self.input_mut().unwrap();
914 let content = &input.current_contents()[..nth];
915 let mut line = input.line;
916 let mut col = input.col;
917 let mut next = content.split(|b| b == &b'\n');
918 col += next.next().unwrap().len() as i32;
919 for cur in next {
920 line += 1;
921 col = cur.len() as i32 + 1;
922 }
923 input.line = line;
924 input.col = col;
925 input.cur += nth;
926 }
927
928 pub fn content_bytes(&self) -> &[u8] {
929 let input = self.input().unwrap();
930 input.current_contents()
931 }
932
933 #[doc(alias = "xmlNextChar")]
935 pub(crate) fn skip_char(&mut self) {
936 if matches!(self.instate, XmlParserInputState::XmlParserEOF) || self.input().is_none() {
937 return;
938 }
939
940 let input = self.input().unwrap();
941 if input.cur > input.base_contents().len() {
942 xml_err_internal!(self, "Parser input data memory error\n");
943
944 self.err_no = XmlParserErrors::XmlErrInternalError as i32;
945 self.stop();
946
947 return;
948 }
949
950 if input.remainder_len() < INPUT_CHUNK {
951 if self.force_grow() < 0 {
952 return;
953 }
954 if self.content_bytes().is_empty() {
955 return;
956 }
957 }
958
959 let Some(c) = self.current_char() else {
960 return;
961 };
962 let input = self.input_mut().unwrap();
967 if c == '\n' {
968 input.line += 1;
969 input.col = 1;
970 } else {
971 input.col += 1;
972 }
973 input.cur += c.len_utf8();
974 }
975
976 #[doc(alias = "xmlSkipBlankChars")]
981 pub(crate) fn skip_blanks(&mut self) -> i32 {
982 let mut res = 0i32;
983
984 if (self.input_tab.len() == 1 && !matches!(self.instate, XmlParserInputState::XmlParserDTD))
986 || matches!(self.instate, XmlParserInputState::XmlParserStart)
987 {
988 let input = self.input().unwrap();
990 let mut line = input.line;
991 let mut col = input.col;
992 self.force_grow();
993 let mut content = self.content_bytes();
994 while content.first().is_some_and(XmlCharValid::is_xml_blank_char) {
995 if content[0] == b'\n' {
996 line += 1;
997 col = 1;
998 } else {
999 col += 1;
1000 }
1001 content = &content[1..];
1002 res = res.saturating_add(1);
1003 if content.is_empty() {
1004 let len = self.content_bytes().len();
1005 let input = self.input_mut().unwrap();
1006 input.cur += len;
1007 input.line = line;
1008 input.col = col;
1009 self.force_grow();
1010 content = self.content_bytes();
1011 }
1012 }
1013
1014 let diff = self.content_bytes().len() - content.len();
1015 if diff > 0 {
1016 let input = self.input_mut().unwrap();
1017 input.cur += diff;
1018 input.line = line;
1019 input.col = col;
1020 }
1021 } else {
1022 let expand_pe = self.external != 0 || self.input_tab.len() != 1;
1023
1024 while !matches!(self.instate, XmlParserInputState::XmlParserEOF) {
1025 if self.current_byte().is_xml_blank_char() {
1026 self.skip_char();
1028 } else if self.current_byte() == b'%' {
1029 if !expand_pe || self.nth_byte(1).is_xml_blank_char() || self.nth_byte(1) == 0 {
1031 break;
1032 }
1033 self.parse_pe_reference();
1034 } else if self.current_byte() == 0 {
1035 let mut consumed: u64;
1036
1037 if self.input_tab.len() <= 1 {
1038 break;
1039 }
1040
1041 consumed = self.input().unwrap().consumed;
1042 consumed =
1043 consumed.saturating_add(self.input().unwrap().offset_from_base() as u64);
1044
1045 let mut ent = self.input().unwrap().entity.unwrap();
1048 if matches!(ent.etype, XmlEntityType::XmlExternalParameterEntity)
1049 && ent.flags & XML_ENT_PARSED as i32 == 0
1050 {
1051 ent.flags |= XML_ENT_PARSED as i32;
1052
1053 self.sizeentities = self.sizeentities.saturating_add(consumed);
1054 }
1055
1056 self.parser_entity_check(consumed);
1057
1058 self.pop_input();
1059 } else {
1060 break;
1061 }
1062
1063 res = res.saturating_add(1);
1068 }
1069 }
1070 res
1071 }
1072
1073 #[doc(alias = "xmlCurrentChar")]
1088 pub(crate) fn current_char(&mut self) -> Option<char> {
1089 if matches!(self.instate, XmlParserInputState::XmlParserEOF) {
1090 return None;
1091 }
1092
1093 if self.input()?.remainder_len() < INPUT_CHUNK && self.force_grow() < 0 {
1094 return None;
1095 }
1096
1097 let cur_byte = self.current_byte();
1098 if (0x20..0x80).contains(&cur_byte) {
1099 return Some(cur_byte as char);
1100 }
1101
1102 if self.content_bytes().is_empty() {
1103 return None;
1104 }
1105
1106 let c = if cur_byte >= 0x80 {
1107 let input = self.input_mut().unwrap();
1108 if let Some(buf) = input.buf.as_ref() {
1109 if buf.encoder.is_some() {
1110 unsafe {
1111 from_utf8_unchecked(&buf.buffer[input.cur..])
1116 .chars()
1117 .next()?
1118 }
1119 } else if input.cur < input.valid_up_to {
1120 unsafe {
1121 from_utf8_unchecked(&buf.buffer[input.cur..input.valid_up_to])
1125 .chars()
1126 .next()?
1127 }
1128 } else {
1129 match from_utf8(&buf.buffer[input.cur..]) {
1130 Ok(s) => {
1131 input.valid_up_to = input.cur + s.len();
1132 s.chars().next()?
1133 }
1134 Err(e) if e.valid_up_to() > 0 => {
1135 input.valid_up_to = input.cur + e.valid_up_to();
1136 let s = unsafe {
1137 from_utf8_unchecked(&buf.buffer[input.cur..][..e.valid_up_to()])
1140 };
1141 s.chars().next().unwrap()
1142 }
1143 Err(e) => {
1144 return match e.error_len() {
1145 Some(_) => {
1146 if input.remainder_len() < 4 {
1151 __xml_err_encoding!(
1152 self,
1153 XmlParserErrors::XmlErrInvalidChar,
1154 "Input is not proper UTF-8, indicate encoding !\n"
1155 );
1156 } else {
1157 let buffer = format!(
1158 "Bytes: 0x{:02X} 0x{:02X} 0x{:02X} 0x{:02X}\n",
1159 buf.buffer[input.cur],
1160 buf.buffer[input.cur + 1],
1161 buf.buffer[input.cur + 2],
1162 buf.buffer[input.cur + 3],
1163 );
1164 __xml_err_encoding!(
1165 self,
1166 XmlParserErrors::XmlErrInvalidChar,
1167 "Input is not proper UTF-8, indicate encoding !\n{}",
1168 buffer
1169 );
1170 }
1171 self.input_mut()
1172 .unwrap()
1173 .buf
1174 .as_mut()
1175 .unwrap()
1176 .fallback_to_iso_8859_1();
1177 self.charset = XmlCharEncoding::ISO8859_1;
1178 self.current_char()
1179 }
1180 None => None,
1181 };
1182 }
1183 }
1184 }
1185 } else if let Some(content) = input
1186 .entity
1187 .as_deref()
1188 .and_then(|ent| ent.content.as_deref())
1189 {
1190 content[input.cur..].chars().next()?
1191 } else {
1192 return None;
1193 }
1194 } else {
1195 cur_byte as char
1196 };
1197
1198 if (c.len_utf8() > 1 && !c.is_xml_char()) || (c.len_utf8() == 1 && c == '\0') {
1199 xml_err_encoding_int!(
1200 self,
1201 XmlParserErrors::XmlErrInvalidChar,
1202 "Char 0x{:X} out of allowed range\n",
1203 c as i32
1204 );
1205 }
1206 if c == '\r' {
1207 if self.nth_byte(1) == b'\n' {
1208 let input = self.input_mut().unwrap();
1209 input.cur += 1;
1210 }
1211 return Some('\n');
1212 }
1213 Some(c)
1214 }
1215
1216 pub(super) fn consume_char_if(
1217 &mut self,
1218 mut f: impl FnMut(&Self, char) -> bool,
1219 ) -> Option<char> {
1220 let c = self.current_char()?;
1221 f(self, c).then(|| {
1222 let input = self.input_mut().unwrap();
1223 if c == '\n' {
1224 input.line += 1;
1225 input.col = 1;
1226 } else {
1227 input.col += 1;
1228 }
1229 input.cur += c.len_utf8();
1230 c
1231 })
1232 }
1233
1234 #[doc(alias = "xmlGetNamespace")]
1238 pub(crate) fn get_namespace(&self, prefix: Option<&str>) -> Option<&str> {
1239 if prefix == self.str_xml.as_deref() {
1240 return self.str_xml_ns.as_deref();
1241 }
1242 for (pre, href) in self.ns_tab.iter().rev() {
1243 if pre.as_deref() == prefix {
1244 if prefix.is_none() && href.is_empty() {
1245 return None;
1246 }
1247 return Some(href.as_str());
1248 }
1249 }
1250 None
1251 }
1252
1253 #[doc(alias = "inputPush")]
1257 pub fn input_push(&mut self, value: XmlParserInput<'a>) -> usize {
1258 self.input_tab.push(value);
1259 self.input_tab.len() - 1
1260 }
1261
1262 #[doc(alias = "inputPop")]
1266 pub fn input_pop(&mut self) -> Option<XmlParserInput<'a>> {
1267 self.input_tab.pop()
1268 }
1269
1270 #[doc(alias = "nodePush")]
1274 pub(crate) fn node_push(&mut self, value: XmlNodePtr) -> i32 {
1275 if self.node_tab.len() as u32 > XML_PARSER_MAX_DEPTH
1276 && self.options & XmlParserOption::XmlParseHuge as i32 == 0
1277 {
1278 let max_depth = XML_PARSER_MAX_DEPTH as i32;
1279 xml_fatal_err_msg_int!(
1280 self,
1281 XmlParserErrors::XmlErrInternalError,
1282 format!("Excessive depth in document: {max_depth} use XML_PARSE_HUGE option\n")
1283 .as_str(),
1284 max_depth
1285 );
1286 self.halt();
1287 return -1;
1288 }
1289 self.node = Some(value);
1290 self.node_tab.push(value);
1291 self.node_tab.len() as i32 - 1
1292 }
1293
1294 #[doc(alias = "nodePop")]
1298 pub(crate) fn node_pop(&mut self) -> Option<XmlNodePtr> {
1299 let res = self.node_tab.pop();
1300 self.node = self.node_tab.last().cloned();
1301 res
1302 }
1303
1304 #[doc(alias = "namePop")]
1318 pub(crate) fn name_pop(&mut self) -> Option<Rc<str>> {
1319 let res = self.name_tab.pop();
1320 let name = self.name_tab.last().cloned();
1321 self.name = name;
1322 res
1323 }
1324
1325 #[doc(alias = "spacePush")]
1326 pub(crate) fn space_push(&mut self, val: i32) -> i32 {
1327 self.space_tab.push(val);
1328 self.space_tab.len() as i32 - 1
1329 }
1330
1331 #[doc(alias = "spacePop")]
1332 pub(crate) fn space_pop(&mut self) -> i32 {
1333 self.space_tab.pop().unwrap_or(-1)
1334 }
1335
1336 pub(crate) fn space(&self) -> i32 {
1337 *self.space_tab.last().unwrap_or(&-1)
1338 }
1339
1340 pub(crate) fn space_mut(&mut self) -> &mut i32 {
1341 self.space_tab.last_mut().expect("Internal Error")
1342 }
1343
1344 #[doc(alias = "nameNsPush")]
1348 pub(crate) fn name_ns_push(
1349 &mut self,
1350 value: &str,
1351 prefix: Option<&str>,
1352 uri: Option<&str>,
1353 line: i32,
1354 ns_nr: i32,
1355 ) -> i32 {
1356 let name: Rc<str> = value.into();
1357 self.name = Some(name.clone());
1358 self.name_tab.push(name);
1359 self.push_tab
1360 .resize(self.name_tab.len(), XmlStartTag::default());
1361 let res = self.name_tab.len() - 1;
1362 self.push_tab[res].prefix = prefix.map(|pre| pre.into());
1363 self.push_tab[res].uri = uri.map(|uri| uri.into());
1364 self.push_tab[res].line = line;
1365 self.push_tab[res].ns_nr = ns_nr;
1366 res as i32
1367 }
1368
1369 #[doc(alias = "nameNsPop")]
1373 #[cfg(feature = "libxml_push")]
1374 pub(crate) fn name_ns_pop(&mut self) -> Option<Rc<str>> {
1375 let res = self.name_tab.pop();
1376 self.name = self.name_tab.last().cloned();
1377 res
1378 }
1379
1380 #[doc(alias = "nsPush")]
1384 pub(crate) fn ns_push(&mut self, prefix: Option<&str>, url: &str) -> i32 {
1385 if self.options & XmlParserOption::XmlParseNsClean as i32 != 0 {
1386 for (pre, href) in self.ns_tab.iter().rev() {
1387 if pre.as_deref() == prefix {
1388 if href.as_str() == url {
1390 return -2;
1391 }
1392 break;
1394 }
1395 }
1396 }
1397 self.ns_tab
1398 .push((prefix.map(|p| p.to_owned()), url.to_owned()));
1399 self.ns_tab.len() as i32
1400 }
1401
1402 #[doc(alias = "nsPop")]
1406 pub(crate) fn ns_pop(&mut self, mut nr: usize) -> usize {
1407 if self.ns_tab.len() < nr {
1408 generic_error!("Pbm popping {} NS\n", nr);
1409 nr = self.ns_tab.len();
1410 }
1411 if self.ns_tab.is_empty() {
1412 return 0;
1413 }
1414 let rem = self.ns_tab.len() - nr;
1415 self.ns_tab.truncate(rem);
1416 nr
1417 }
1418
1419 #[doc(alias = "xmlPushInput")]
1423 pub fn push_input(&mut self, input: XmlParserInput<'a>) -> Result<usize, XmlParserErrors> {
1424 if get_parser_debug_entities() != 0 {
1425 if self.input().is_some() && self.input().unwrap().filename.is_some() {
1426 generic_error!(
1427 "{}({}): ",
1428 self.input().unwrap().filename.as_ref().unwrap(),
1429 self.input().unwrap().line
1430 );
1431 }
1432 let cur = match from_utf8(&input.base_contents()[input.cur..]) {
1433 Ok(s) => s,
1434 Err(e) if e.valid_up_to() > 0 => {
1435 unsafe {
1436 from_utf8_unchecked(
1439 &input.base_contents()[input.cur..input.cur + e.valid_up_to()],
1440 )
1441 }
1442 }
1443 _ => "(Failed to read buffer)",
1444 };
1445 generic_error!("Pushing input {} : {}\n", self.input_tab.len() + 1, cur);
1446 }
1447 if (self.input_tab.len() > 40 && self.options & XmlParserOption::XmlParseHuge as i32 == 0)
1448 || self.input_tab.len() > 100
1449 {
1450 xml_fatal_err(self, XmlParserErrors::XmlErrEntityLoop, None);
1451 while self.input_tab.len() > 1 {
1452 self.input_pop();
1453 }
1454 return Err(XmlParserErrors::XmlErrEntityLoop);
1455 }
1456 let ret = self.input_push(input);
1457 if matches!(self.instate, XmlParserInputState::XmlParserEOF) {
1458 return Err(XmlParserErrors::XmlErrInternalError);
1459 }
1460 self.grow();
1461 Ok(ret)
1462 }
1463
1464 #[doc(alias = "xmlPopInput")]
1468 pub fn pop_input(&mut self) -> u8 {
1469 if self.input_tab.len() <= 1 {
1470 return 0;
1471 }
1472 if get_parser_debug_entities() != 0 {
1473 generic_error!("Popping input {}\n", self.input_tab.len());
1474 }
1475 if self.input_tab.len() > 1
1476 && self.in_subset == 0
1477 && !matches!(self.instate, XmlParserInputState::XmlParserEOF)
1478 {
1479 xml_fatal_err(
1480 self,
1481 XmlParserErrors::XmlErrInternalError,
1482 Some("Unfinished entity outside the DTD"),
1483 );
1484 }
1485 let input = self.input_pop().unwrap();
1486 if let Some(mut entity) = input.entity {
1487 entity.flags &= !XML_ENT_EXPANDING as i32;
1488 }
1489
1490 if self.current_byte() == 0 {
1491 self.force_grow();
1492 }
1493 self.current_byte()
1494 }
1495
1496 #[doc(alias = "xmlDetectSAX2")]
1498 pub(crate) fn detect_sax2(&mut self) {
1499 let sax = self.sax.as_deref();
1500 #[cfg(feature = "sax1")]
1501 {
1502 if sax.is_some_and(|sax| {
1503 sax.initialized == XML_SAX2_MAGIC as u32
1504 && (sax.start_element_ns.is_some()
1505 || sax.end_element_ns.is_some()
1506 || (sax.start_element.is_none() && sax.end_element.is_none()))
1507 }) {
1508 self.sax2 = true;
1509 }
1510 }
1511 #[cfg(not(feature = "sax1"))]
1512 {
1513 self.sax2 = true;
1514 }
1515
1516 self.str_xml = Some(Cow::Borrowed("xml"));
1517 self.str_xmlns = Some(Cow::Borrowed("xmlns"));
1518 self.str_xml_ns = Some(Cow::Borrowed(XML_XML_NAMESPACE));
1519 }
1520
1521 #[doc(alias = "xmlCtxtUseOptionsInternal")]
1525 pub(crate) fn use_options_internal(&mut self, mut options: i32, encoding: Option<&str>) -> i32 {
1526 if let Some(encoding) = encoding {
1527 self.encoding = Some(encoding.to_owned());
1528 }
1529 if options & XmlParserOption::XmlParseRecover as i32 != 0 {
1530 self.recovery = true;
1531 options -= XmlParserOption::XmlParseRecover as i32;
1532 self.options |= XmlParserOption::XmlParseRecover as i32;
1533 } else {
1534 self.recovery = false;
1535 }
1536 if options & XmlParserOption::XmlParseDTDLoad as i32 != 0 {
1537 self.loadsubset = XML_DETECT_IDS as i32;
1538 options -= XmlParserOption::XmlParseDTDLoad as i32;
1539 self.options |= XmlParserOption::XmlParseDTDLoad as i32;
1540 } else {
1541 self.loadsubset = 0;
1542 }
1543 if options & XmlParserOption::XmlParseDTDAttr as i32 != 0 {
1544 self.loadsubset |= XML_COMPLETE_ATTRS as i32;
1545 options -= XmlParserOption::XmlParseDTDAttr as i32;
1546 self.options |= XmlParserOption::XmlParseDTDAttr as i32;
1547 }
1548 if options & XmlParserOption::XmlParseNoEnt as i32 != 0 {
1549 self.replace_entities = true;
1550 options -= XmlParserOption::XmlParseNoEnt as i32;
1552 self.options |= XmlParserOption::XmlParseNoEnt as i32;
1553 } else {
1554 self.replace_entities = false;
1555 }
1556 if options & XmlParserOption::XmlParsePedantic as i32 != 0 {
1557 self.pedantic = true;
1558 options -= XmlParserOption::XmlParsePedantic as i32;
1559 self.options |= XmlParserOption::XmlParsePedantic as i32;
1560 } else {
1561 self.pedantic = false;
1562 }
1563 if options & XmlParserOption::XmlParseNoBlanks as i32 != 0 {
1564 self.keep_blanks = false;
1565 if let Some(sax) = self.sax.as_deref_mut() {
1566 sax.ignorable_whitespace = Some(xml_sax2_ignorable_whitespace);
1567 }
1568 options -= XmlParserOption::XmlParseNoBlanks as i32;
1569 self.options |= XmlParserOption::XmlParseNoBlanks as i32;
1570 } else {
1571 self.keep_blanks = true;
1572 }
1573 if options & XmlParserOption::XmlParseDTDValid as i32 != 0 {
1574 self.validate = true;
1575 if options & XmlParserOption::XmlParseNoWarning as i32 != 0 {
1576 self.vctxt.warning = None;
1577 }
1578 if options & XmlParserOption::XmlParseNoError as i32 != 0 {
1579 self.vctxt.error = None;
1580 }
1581 options -= XmlParserOption::XmlParseDTDValid as i32;
1582 self.options |= XmlParserOption::XmlParseDTDValid as i32;
1583 } else {
1584 self.validate = false;
1585 }
1586 if options & XmlParserOption::XmlParseNoWarning as i32 != 0 {
1587 if let Some(sax) = self.sax.as_deref_mut() {
1588 sax.warning = None;
1589 }
1590 options -= XmlParserOption::XmlParseNoWarning as i32;
1591 }
1592 if options & XmlParserOption::XmlParseNoError as i32 != 0 {
1593 if let Some(sax) = self.sax.as_deref_mut() {
1594 sax.error = None;
1595 sax.fatal_error = None;
1596 }
1597 options -= XmlParserOption::XmlParseNoError as i32;
1598 }
1599 #[cfg(feature = "sax1")]
1600 if options & XmlParserOption::XmlParseSAX1 as i32 != 0 {
1601 if let Some(sax) = self.sax.as_deref_mut() {
1602 sax.start_element = Some(xml_sax2_start_element);
1603 sax.end_element = Some(xml_sax2_end_element);
1604 sax.start_element_ns = None;
1605 sax.end_element_ns = None;
1606 sax.initialized = 1;
1607 }
1608 options -= XmlParserOption::XmlParseSAX1 as i32;
1609 self.options |= XmlParserOption::XmlParseSAX1 as i32;
1610 }
1611 if options & XmlParserOption::XmlParseNoCDATA as i32 != 0 {
1612 if let Some(sax) = self.sax.as_deref_mut() {
1613 sax.cdata_block = None;
1614 }
1615 options -= XmlParserOption::XmlParseNoCDATA as i32;
1616 self.options |= XmlParserOption::XmlParseNoCDATA as i32;
1617 }
1618 if options & XmlParserOption::XmlParseNsClean as i32 != 0 {
1619 self.options |= XmlParserOption::XmlParseNsClean as i32;
1620 options -= XmlParserOption::XmlParseNsClean as i32;
1621 }
1622 if options & XmlParserOption::XmlParseNoNet as i32 != 0 {
1623 self.options |= XmlParserOption::XmlParseNoNet as i32;
1624 options -= XmlParserOption::XmlParseNoNet as i32;
1625 }
1626 if options & XmlParserOption::XmlParseCompact as i32 != 0 {
1627 self.options |= XmlParserOption::XmlParseCompact as i32;
1628 options -= XmlParserOption::XmlParseCompact as i32;
1629 }
1630 if options & XmlParserOption::XmlParseOld10 as i32 != 0 {
1631 self.options |= XmlParserOption::XmlParseOld10 as i32;
1632 options -= XmlParserOption::XmlParseOld10 as i32;
1633 }
1634 if options & XmlParserOption::XmlParseNoBasefix as i32 != 0 {
1635 self.options |= XmlParserOption::XmlParseNoBasefix as i32;
1636 options -= XmlParserOption::XmlParseNoBasefix as i32;
1637 }
1638 if options & XmlParserOption::XmlParseHuge as i32 != 0 {
1639 self.options |= XmlParserOption::XmlParseHuge as i32;
1640 options -= XmlParserOption::XmlParseHuge as i32;
1641 }
1642 if options & XmlParserOption::XmlParseOldSAX as i32 != 0 {
1643 self.options |= XmlParserOption::XmlParseOldSAX as i32;
1644 options -= XmlParserOption::XmlParseOldSAX as i32;
1645 }
1646 if options & XmlParserOption::XmlParseIgnoreEnc as i32 != 0 {
1647 self.options |= XmlParserOption::XmlParseIgnoreEnc as i32;
1648 options -= XmlParserOption::XmlParseIgnoreEnc as i32;
1649 }
1650 if options & XmlParserOption::XmlParseBigLines as i32 != 0 {
1651 self.options |= XmlParserOption::XmlParseBigLines as i32;
1652 options -= XmlParserOption::XmlParseBigLines as i32;
1653 }
1654 self.linenumbers = 1;
1655 options
1656 }
1657
1658 #[doc(alias = "xmlCtxtUseOptions")]
1663 pub fn use_options(&mut self, options: i32) -> i32 {
1664 self.use_options_internal(options, None)
1665 }
1666
1667 #[doc(alias = "xmlDoRead")]
1671 pub(crate) fn do_read(
1672 &mut self,
1673 url: Option<&str>,
1674 encoding: Option<&str>,
1675 options: i32,
1676 ) -> Option<XmlDocPtr> {
1677 self.use_options_internal(options, encoding);
1678 if let Some(encoding) = encoding {
1679 if let Some(handler) = find_encoding_handler(encoding) {
1684 self.switch_to_encoding(handler);
1685 }
1686 }
1687 if url.is_some() {
1688 if let Some(input) = self.input_mut().filter(|input| input.filename.is_none()) {
1689 input.filename = url.map(|u| u.to_owned());
1690 }
1691 }
1692 self.parse_document();
1693 if self.well_formed || self.recovery {
1694 self.my_doc.take()
1695 } else {
1696 if let Some(my_doc) = self.my_doc.take() {
1697 unsafe {
1698 xml_free_doc(my_doc);
1702 }
1703 }
1704 None
1705 }
1706 }
1707
1708 #[doc(alias = "xmlSwitchInputEncoding")]
1712 pub(crate) fn switch_input_encoding(
1713 &mut self,
1714 input: &mut XmlParserInput<'_>,
1715 handler: XmlCharEncodingHandler,
1716 ) -> i32 {
1717 if input.buf.as_mut().is_none() {
1718 xml_err_internal!(self, "static memory buffer doesn't support encoding\n");
1719 return -1;
1720 }
1721
1722 if input
1723 .buf
1724 .as_mut()
1725 .unwrap()
1726 .encoder
1727 .replace(handler)
1728 .is_some()
1729 {
1730 return 0;
1737 }
1738
1739 self.charset = XmlCharEncoding::UTF8;
1740
1741 if input.buf.as_mut().unwrap().buffer.is_empty() {
1743 return 0;
1744 }
1745 if matches!(
1749 input.buf.as_mut().unwrap().encoder.as_ref().unwrap().name(),
1750 "UTF-16LE" | "UTF-16"
1751 ) && input.base_contents()[input.cur] == 0xFF
1752 && input.base_contents()[input.cur + 1] == 0xFE
1753 {
1754 input.cur += 2;
1755 }
1756 if input.buf.as_mut().unwrap().encoder.as_ref().unwrap().name() == "UTF-16BE"
1757 && input.base_contents()[input.cur] == 0xFE
1758 && input.base_contents()[input.cur + 1] == 0xFF
1759 {
1760 input.cur += 2;
1761 }
1762 if input.buf.as_mut().unwrap().encoder.as_ref().unwrap().name() == "UTF-8"
1765 && input.base_contents()[input.cur] == 0xEF
1766 && input.base_contents()[input.cur + 1] == 0xBB
1767 && input.base_contents()[input.cur + 2] == 0xBF
1768 {
1769 input.cur += 3;
1770 }
1771
1772 let processed = input.offset_from_base();
1775 input.buf.as_mut().unwrap().trim_head(processed);
1776 input.consumed += processed as u64;
1777 let input_buf = input.buf.as_mut().unwrap();
1778 let using = input_buf.buffer.len();
1779 input_buf.raw = take(&mut input_buf.buffer);
1780 input_buf.rawconsumed = processed as u64;
1781
1782 let res = input_buf.decode(true);
1791 input.reset_base();
1792 if res.is_err() {
1793 xml_err_internal!(self, "switching encoding: encoder error\n");
1795 self.halt();
1796 return -1;
1797 }
1798 let input_buf = input.buf.as_mut().unwrap();
1799 let consumed = using - input_buf.raw.len();
1800 let rawconsumed = input_buf.rawconsumed.saturating_add(consumed as u64);
1801 input_buf.rawconsumed = rawconsumed;
1802 0
1803 }
1804
1805 #[doc(alias = "xmlSwitchToEncoding")]
1810 pub fn switch_to_encoding(&mut self, handler: XmlCharEncodingHandler) -> i32 {
1811 let mut input = self.input_pop().unwrap();
1812 let res = self.switch_input_encoding(&mut input, handler);
1813 self.input_push(input);
1814 res
1815 }
1816
1817 #[doc(alias = "xmlSwitchEncoding")]
1821 pub fn switch_encoding(&mut self, enc: XmlCharEncoding) -> i32 {
1822 if self
1828 .input()
1829 .is_some_and(|input| input.consumed == 0 && input.offset_from_base() == 0)
1830 && matches!(
1831 enc,
1832 XmlCharEncoding::UTF8 | XmlCharEncoding::UTF16LE | XmlCharEncoding::UTF16BE
1833 )
1834 {
1835 if self.content_bytes().starts_with(&[0xEF, 0xBB, 0xBF]) {
1838 self.input_mut().unwrap().cur += 3;
1839 }
1840 }
1841
1842 let Some(handler) = (match enc {
1843 XmlCharEncoding::Error => {
1844 __xml_err_encoding!(
1845 self,
1846 XmlParserErrors::XmlErrUnknownEncoding,
1847 "encoding unknown\n"
1848 );
1849 return -1;
1850 }
1851 XmlCharEncoding::None => {
1852 self.charset = XmlCharEncoding::UTF8;
1854 return 0;
1855 }
1856 XmlCharEncoding::UTF8 => {
1857 self.charset = XmlCharEncoding::UTF8;
1859 return 0;
1860 }
1861 XmlCharEncoding::EBCDIC => self.input().unwrap().detect_ebcdic(),
1862 _ => get_encoding_handler(enc),
1863 }) else {
1864 match enc {
1866 XmlCharEncoding::ASCII => {
1867 self.charset = XmlCharEncoding::UTF8;
1869 return 0;
1870 }
1871 XmlCharEncoding::ISO8859_1 => {
1872 if self.input_tab.len() == 1
1873 && self.encoding.is_none()
1874 && self.input().is_some_and(|input| input.encoding.is_some())
1875 {
1876 self.encoding = self.input().unwrap().encoding.clone();
1877 }
1878 self.charset = enc;
1879 return 0;
1880 }
1881 _ => {
1882 let name = enc.get_name().unwrap_or("");
1883 __xml_err_encoding!(
1884 self,
1885 XmlParserErrors::XmlErrUnsupportedEncoding,
1886 "encoding not supported: {}\n",
1887 name
1888 );
1889 self.stop();
1893 return -1;
1894 }
1895 }
1896 };
1897 let mut input = self.input_pop().unwrap();
1898 let ret: i32 = self.switch_input_encoding(&mut input, handler);
1899 self.input_push(input);
1900 if ret < 0 || self.err_no == XmlParserErrors::XmlI18NConvFailed as i32 {
1901 self.stop();
1903 self.err_no = XmlParserErrors::XmlI18NConvFailed as i32;
1904 }
1905 ret
1906 }
1907}
1908
1909impl Default for XmlParserCtxt<'_> {
1910 fn default() -> Self {
1911 Self {
1912 sax: None,
1913 user_data: None,
1914 my_doc: None,
1915 well_formed: true,
1916 replace_entities: get_substitute_entities_default_value(),
1917 version: None,
1918 encoding: None,
1919 standalone: 0,
1920 html: 0,
1921 input_tab: vec![],
1922 node: None,
1923 node_tab: vec![],
1924 record_info: false,
1925 node_seq: XmlParserNodeInfoSeq::default(),
1926 err_no: 0,
1927 has_external_subset: false,
1928 has_perefs: false,
1929 external: 0,
1930 valid: 0,
1931 validate: get_do_validity_checking_default_value(),
1932 vctxt: XmlValidCtxt::default(),
1933 instate: XmlParserInputState::default(),
1934 token: 0,
1935 directory: None,
1936 name: None,
1937 name_tab: vec![],
1938 check_index: 0,
1939 keep_blanks: get_keep_blanks_default_value(),
1940 disable_sax: false,
1941 in_subset: 0,
1942 int_sub_name: None,
1943 ext_sub_uri: None,
1944 ext_sub_system: None,
1945 space_tab: vec![],
1946 depth: 0,
1947 charset: XmlCharEncoding::None,
1948 pedantic: get_pedantic_parser_default_value(),
1949 _private: null_mut(),
1950 loadsubset: 0,
1951 linenumbers: 0,
1952 #[cfg(feature = "catalog")]
1953 catalogs: None,
1954 recovery: false,
1955 progressive: false,
1956 atts: vec![],
1957 str_xml: None,
1958 str_xml_ns: None,
1959 str_xmlns: None,
1960 sax2: false,
1961 ns_tab: vec![],
1962 push_tab: vec![],
1963 atts_default: HashMap::new(),
1964 atts_special: HashMap::new(),
1965 ns_well_formed: true,
1966 options: 0,
1967 free_elems_nr: 0,
1968 free_elems: None,
1969 free_attrs_nr: 0,
1970 free_attrs: None,
1971 last_error: XmlError::default(),
1972 parse_mode: XmlParserMode::default(),
1973 sizeentities: 0,
1974 node_info_tab: vec![],
1975 input_id: 0,
1976 sizeentcopy: 0,
1977 end_check_state: 0,
1978 nb_errors: 0,
1979 nb_warnings: 0,
1980 }
1981 }
1982}
1983
1984impl Drop for XmlParserCtxt<'_> {
1985 #[doc(alias = "xmlFreeParserCtxt")]
1988 fn drop(&mut self) {
1989 unsafe {
1990 let mut cur = self.free_elems;
1991 while let Some(now) = cur {
1992 let next = now.next.map(|node| XmlNodePtr::try_from(node).unwrap());
1993 now.free();
1994 cur = next;
1995 }
1996 if let Some(attrs) = self.free_attrs.take() {
1997 let mut cur = Some(attrs);
1998 while let Some(now) = cur {
1999 let next = now.next;
2000 now.free();
2001 cur = next;
2002 }
2003 }
2004 }
2005 }
2006}