exml/parser/
context.rs

1use std::{
2    borrow::Cow,
3    cell::RefCell,
4    collections::HashMap,
5    ffi::c_void,
6    io::Read,
7    mem::take,
8    ptr::null_mut,
9    rc::Rc,
10    str::{from_utf8, from_utf8_unchecked},
11    sync::atomic::AtomicPtr,
12};
13
14use crate::{
15    chvalid::XmlCharValid,
16    encoding::{
17        XmlCharEncoding, XmlCharEncodingHandler, detect_encoding, find_encoding_handler,
18        get_encoding_handler,
19    },
20    error::{XmlError, XmlParserErrors, parser_validity_error, parser_validity_warning},
21    generic_error,
22    globals::{
23        GenericErrorContext, get_do_validity_checking_default_value,
24        get_get_warnings_default_value, get_keep_blanks_default_value,
25        get_line_numbers_default_value, get_load_ext_dtd_default_value, get_parser_debug_entities,
26        get_pedantic_parser_default_value, get_substitute_entities_default_value,
27    },
28    io::{XmlParserInputBuffer, xml_parser_get_directory},
29    libxml::{
30        catalog::XmlCatalogEntry,
31        sax2::{
32            xml_sax_version, xml_sax2_end_element, xml_sax2_ignorable_whitespace,
33            xml_sax2_start_element,
34        },
35    },
36    parser::{
37        __xml_err_encoding, INPUT_CHUNK, XML_COMPLETE_ATTRS, XML_DETECT_IDS, XML_MAX_LOOKUP_LIMIT,
38        XML_PARSER_MAX_DEPTH, XML_VCTXT_USE_PCTXT, XmlParserInputState, XmlSAXHandler, XmlStartTag,
39        xml_err_encoding_int, xml_err_internal, xml_fatal_err_msg_int, xml_fatal_err_msg_str,
40        xml_init_parser,
41    },
42    tree::{
43        XML_ENT_EXPANDING, XML_ENT_PARSED, XML_XML_NAMESPACE, XmlAttrPtr, XmlAttributeType,
44        XmlDocPtr, XmlEntityType, XmlNodePtr, xml_free_doc,
45    },
46    uri::{build_uri, canonic_path},
47    valid::XmlValidCtxt,
48};
49
50use super::{
51    XmlParserInput, XmlParserNodeInfo, XmlParserNodeInfoSeq, xml_err_memory, xml_fatal_err,
52    xml_load_external_entity,
53};
54
55/// Special constant found in SAX2 blocks initialized fields
56pub const XML_SAX2_MAGIC: usize = 0xDEEDBEAF;
57
58/// This is the set of XML parser options that can be passed down
59/// to the xmlReadDoc() and similar calls.
60#[doc(alias = "xmlParserOption")]
61#[repr(C)]
62pub enum XmlParserOption {
63    XmlParseRecover = 1 << 0,     /* recover on errors */
64    XmlParseNoEnt = 1 << 1,       /* substitute entities */
65    XmlParseDTDLoad = 1 << 2,     /* load the external subset */
66    XmlParseDTDAttr = 1 << 3,     /* default DTD attributes */
67    XmlParseDTDValid = 1 << 4,    /* validate with the DTD */
68    XmlParseNoError = 1 << 5,     /* suppress error reports */
69    XmlParseNoWarning = 1 << 6,   /* suppress warning reports */
70    XmlParsePedantic = 1 << 7,    /* pedantic error reporting */
71    XmlParseNoBlanks = 1 << 8,    /* remove blank nodes */
72    XmlParseSAX1 = 1 << 9,        /* use the SAX1 interface internally */
73    XmlParseXInclude = 1 << 10,   /* Implement XInclude substitution  */
74    XmlParseNoNet = 1 << 11,      /* Forbid network access */
75    XmlParseNoDict = 1 << 12,     /* Do not reuse the context dictionary */
76    XmlParseNsClean = 1 << 13,    /* remove redundant namespaces declarations */
77    XmlParseNoCDATA = 1 << 14,    /* merge CDATA as text nodes */
78    XmlParseNoXIncnode = 1 << 15, /* do not generate XINCLUDE START/END nodes */
79    XmlParseCompact = 1 << 16,    /* compact small text nodes; no modification of
80                                                  the tree allowed afterwards (will possibly
81                                  crash if you try to modify the tree) */
82    XmlParseOld10 = 1 << 17,     /* parse using XML-1.0 before update 5 */
83    XmlParseNoBasefix = 1 << 18, /* do not fixup XINCLUDE xml:base uris */
84    XmlParseHuge = 1 << 19,      /* relax any hardcoded limit from the parser */
85    XmlParseOldSAX = 1 << 20,    /* parse using SAX2 interface before 2.7.0 */
86    XmlParseIgnoreEnc = 1 << 21, /* ignore internal document encoding hint */
87    XmlParseBigLines = 1 << 22,  /* Store big lines numbers in text PSVI field */
88}
89
90/// A parser can operate in various modes
91#[doc(alias = "xmlParserMode")]
92#[repr(C)]
93#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
94pub enum XmlParserMode {
95    #[default]
96    XmlParseUnknown = 0,
97    XmlParseDOM = 1,
98    XmlParseSAX = 2,
99    XmlParsePushDOM = 3,
100    XmlParsePushSAX = 4,
101    XmlParseReader = 5,
102}
103
104pub type XmlParserCtxtPtr<'a> = *mut XmlParserCtxt<'a>;
105/// The parser context.
106///
107/// # Note
108/// This doesn't completely define the parser state, the (current ?)
109/// design of the parser uses recursive function calls since this allow
110/// and easy mapping from the production rules of the specification
111/// to the actual code. The drawback is that the actual function call
112/// also reflect the parser state. However most of the parsing routines
113/// takes as the only argument the parser context pointer, so migrating
114/// to a state based parser for progressive parsing shouldn't be too hard.
115#[doc(alias = "xmlParserCtxt")]
116pub struct XmlParserCtxt<'a> {
117    // The SAX handler
118    pub sax: Option<Box<XmlSAXHandler>>,
119    // For SAX interface only, used by DOM build
120    pub user_data: Option<GenericErrorContext>,
121    // the document being built
122    pub my_doc: Option<XmlDocPtr>,
123    // is the document well formed
124    pub well_formed: bool,
125    // shall we replace entities ?
126    pub(crate) replace_entities: bool,
127    // the XML version string
128    pub(crate) version: Option<String>,
129    // the declared encoding, if any
130    pub encoding: Option<String>,
131    // standalone document
132    pub(crate) standalone: i32,
133    // an HTML(1) document
134    //  3 is HTML after <head>
135    //  10 is HTML after <body>
136    pub(crate) html: i32,
137
138    // Input stream stack
139    // stack of inputs
140    pub input_tab: Vec<XmlParserInput<'a>>,
141
142    // Node analysis stack only used for DOM building
143    // Current parsed Node
144    pub(crate) node: Option<XmlNodePtr>,
145    // array of nodes
146    pub(crate) node_tab: Vec<XmlNodePtr>,
147
148    // Whether node info should be kept
149    pub(crate) record_info: bool,
150    // info about each node parsed
151    pub(crate) node_seq: XmlParserNodeInfoSeq,
152
153    // error code
154    pub err_no: i32,
155
156    // reference and external subset
157    pub(crate) has_external_subset: bool,
158    // the internal subset has PE refs
159    pub(crate) has_perefs: bool,
160    // are we parsing an external entity
161    pub(crate) external: i32,
162
163    // is the document valid
164    pub valid: i32,
165    // shall we try to validate ?
166    pub(crate) validate: bool,
167    // The validity context
168    pub vctxt: XmlValidCtxt,
169
170    // current type of input
171    pub instate: XmlParserInputState,
172    // next char look-ahead
173    pub(crate) token: i32,
174
175    // the data directory
176    pub(crate) directory: Option<String>,
177
178    // Node name stack
179    // Current parsed Node
180    pub(crate) name: Option<Rc<str>>,
181    // array of nodes
182    pub(crate) name_tab: Vec<Rc<str>>,
183
184    // used by progressive parsing lookup
185    pub(crate) check_index: usize,
186    // ugly but ...
187    pub(crate) keep_blanks: bool,
188    // SAX callbacks are disabled
189    pub(crate) disable_sax: bool,
190    // Parsing is in int 1/ext 2 subset
191    pub in_subset: i32,
192    // name of subset
193    pub(crate) int_sub_name: Option<Rc<str>>,
194    // URI of external subset
195    pub(crate) ext_sub_uri: Option<Rc<str>>,
196    // SYSTEM ID of external subset
197    pub(crate) ext_sub_system: Option<Rc<str>>,
198
199    // xml:space values
200    // array of space infos
201    pub(crate) space_tab: Vec<i32>,
202
203    // to prevent entity substitution loops
204    pub(crate) depth: i32,
205    // encoding of the in-memory content
206    // actually an xmlCharEncoding
207    pub charset: XmlCharEncoding,
208
209    // signal pedantic warnings
210    pub(crate) pedantic: bool,
211    // For user data, libxml won't touch it
212    pub(crate) _private: *mut c_void,
213
214    // should the external subset be loaded
215    pub(crate) loadsubset: i32,
216    // set line number in element content
217    pub(crate) linenumbers: i32,
218    // document's own catalog
219    #[cfg(feature = "catalog")]
220    pub(crate) catalogs: Option<XmlCatalogEntry>,
221    // run in recovery mode
222    pub(crate) recovery: bool,
223    // is this a progressive parsing
224    pub(crate) progressive: bool,
225    // array for the attributes callbacks
226    pub(crate) atts: Vec<(String, Option<String>)>,
227
228    // pre-interned strings
229    pub(crate) str_xml: Option<Cow<'static, str>>,
230    pub(crate) str_xmlns: Option<Cow<'static, str>>,
231    pub(crate) str_xml_ns: Option<Cow<'static, str>>,
232
233    // Everything below is used only by the new SAX mode
234    // operating in the new SAX mode
235    pub(crate) sax2: bool,
236    // the array of prefix/namespace name
237    pub(crate) ns_tab: Vec<(Option<String>, String)>,
238    // array of data for push
239    pub(crate) push_tab: Vec<XmlStartTag>,
240    // defaulted attributes if any
241    // Key      : (name, prefix)
242    // Value    : (name, prefix, value, is_external)
243    #[allow(clippy::type_complexity)]
244    pub(crate) atts_default: HashMap<
245        (Cow<'static, str>, Option<Cow<'static, str>>),
246        Vec<(String, Option<String>, String, Option<&'static str>)>,
247    >,
248    // non-CDATA attributes if any
249    pub(crate) atts_special: HashMap<(Cow<'static, str>, Cow<'static, str>), XmlAttributeType>,
250    // is the document XML Namespace okay
251    pub(crate) ns_well_formed: bool,
252    // Extra options
253    pub(crate) options: i32,
254
255    // Those fields are needed only for streaming parsing so far
256    // number of freed element nodes
257    pub(crate) free_elems_nr: i32,
258    // List of freed element nodes
259    pub(crate) free_elems: Option<XmlNodePtr>,
260    // number of freed attributes nodes
261    pub(crate) free_attrs_nr: i32,
262    // List of freed attributes nodes
263    pub(crate) free_attrs: Option<XmlAttrPtr>,
264
265    // the complete error information for the last error.
266    pub last_error: XmlError,
267    // the parser mode
268    pub(crate) parse_mode: XmlParserMode,
269    // size of parsed entities
270    pub sizeentities: u64,
271
272    // for use by HTML non-recursive parser
273    // array of nodeInfos
274    pub(crate) node_info_tab: Vec<Rc<RefCell<XmlParserNodeInfo>>>,
275
276    // we need to label inputs
277    pub(crate) input_id: i32,
278    // volume of entity copy
279    pub sizeentcopy: u64,
280
281    // quote state for push parser
282    pub(crate) end_check_state: i32,
283    // number of errors
284    pub nb_errors: u16,
285    // number of warnings
286    pub(crate) nb_warnings: u16,
287}
288
289impl<'a> XmlParserCtxt<'a> {
290    /// Allocate and initialize a new parser context.
291    ///
292    /// Returns the xmlParserCtxtPtr or NULL
293    #[doc(alias = "xmlNewParserCtxt")]
294    pub fn new() -> Option<Self> {
295        Self::new_sax_parser(None, None).ok()
296    }
297
298    /// Allocate and initialize a new SAX parser context.   
299    /// If userData is NULL, the parser context will be passed as user data.
300    ///
301    /// Returns the xmlParserCtxtPtr or NULL if memory allocation failed.
302    #[doc(alias = "xmlNewSAXParserCtxt")]
303    pub fn new_sax_parser(
304        sax: Option<Box<XmlSAXHandler>>,
305        user_data: Option<GenericErrorContext>,
306    ) -> Result<Self, Option<Box<XmlSAXHandler>>> {
307        let mut ctxt = XmlParserCtxt::default();
308        ctxt.init_sax_parser(sax, user_data).map(|_| ctxt)
309    }
310
311    /// Create a parser context for a file content.
312    ///
313    /// In original libxml2, automatic support for ZLIB/Compress compressed document is provided
314    /// by default if found at compile-time.  
315    /// However, this crate does not support currently.
316    ///
317    /// Returns the new parser context or NULL
318    #[doc(alias = "xmlCreateFileParserCtxt")]
319    pub fn from_filename(filename: Option<&str>) -> Option<XmlParserCtxt> {
320        Self::from_filename_with_options(filename, 0)
321    }
322
323    /// Create a parser context for a file or URL content.
324    ///
325    /// In original libxml2, automatic support for ZLIB/Compress compressed document is provided
326    /// by default if found at compile-time.  
327    /// However, this crate does not support currently.
328    ///
329    /// Returns the new parser context or NULL
330    #[doc(alias = "xmlCreateURLParserCtxt")]
331    pub fn from_filename_with_options(
332        filename: Option<&str>,
333        options: i32,
334    ) -> Option<XmlParserCtxt> {
335        let Some(mut ctxt) = XmlParserCtxt::new() else {
336            xml_err_memory(None, Some("cannot allocate parser context"));
337            return None;
338        };
339
340        if options != 0 {
341            ctxt.use_options_internal(options, None);
342        }
343        ctxt.linenumbers = 1;
344
345        let input_stream = xml_load_external_entity(filename, None, &mut ctxt)?;
346
347        ctxt.input_push(input_stream);
348        if ctxt.directory.is_none() {
349            if let Some(filename) = filename {
350                if let Some(directory) = xml_parser_get_directory(filename) {
351                    ctxt.directory = Some(directory.to_string_lossy().into_owned());
352                }
353            }
354        }
355
356        Some(ctxt)
357    }
358
359    /// Create a parser context for using the XML parser with an existing I/O stream
360    ///
361    /// Returns the new parser context or NULL
362    #[doc(alias = "xmlCreateIOParserCtxt")]
363    pub fn from_io(
364        sax: Option<Box<XmlSAXHandler>>,
365        user_data: Option<GenericErrorContext>,
366        ioctx: impl Read + 'a,
367        enc: XmlCharEncoding,
368    ) -> Option<Self> {
369        let buf = XmlParserInputBuffer::from_reader(ioctx, enc);
370        let mut ctxt = XmlParserCtxt::new_sax_parser(sax, user_data).ok()?;
371
372        let input_stream = XmlParserInput::from_io(&mut ctxt, buf, enc)?;
373        ctxt.input_push(input_stream);
374        Some(ctxt)
375    }
376
377    /// Create a parser context for an XML in-memory document.
378    ///
379    /// Returns the new parser context or NULL
380    #[doc(alias = "xmlCreateMemoryParserCtxt", alias = "xmlCreateDocParserCtxt")]
381    pub fn from_memory(buffer: &'a [u8]) -> Option<Self> {
382        if buffer.is_empty() {
383            return None;
384        }
385
386        let mut ctxt = XmlParserCtxt::new()?;
387
388        let buf = XmlParserInputBuffer::from_memory(buffer, XmlCharEncoding::None)?;
389        let mut input = XmlParserInput::new(Some(&mut ctxt))?;
390        input.filename = None;
391        input.buf = Some(buf);
392        input.reset_base();
393
394        ctxt.input_push(input);
395        Some(ctxt)
396    }
397
398    /// Create a parser context for an external entity
399    ///
400    /// In original libxml2, automatic support for ZLIB/Compress compressed document is provided
401    /// by default if found at compile-time.  
402    /// However, this crate does not support currently.
403    ///
404    /// If create new context successfully, return new context wrapped `Ok`.  
405    /// Otherwise, return received SAX handler wrapped `Err`.
406    /// Returns the new parser context or NULL
407    #[doc(alias = "xmlCreateEntityParserCtxtInternal")]
408    pub(crate) fn new_entity_parser_internal(
409        sax: Option<Box<XmlSAXHandler>>,
410        user_data: Option<GenericErrorContext>,
411        mut url: Option<&str>,
412        id: Option<&str>,
413        base: Option<&str>,
414        pctxt: Option<&XmlParserCtxt>,
415    ) -> Result<Self, Option<Box<XmlSAXHandler>>> {
416        let mut ctxt = XmlParserCtxt::new_sax_parser(sax, user_data)?;
417
418        if let Some(pctxt) = pctxt {
419            ctxt.options = pctxt.options;
420            ctxt._private = pctxt._private;
421            ctxt.input_id = pctxt.input_id;
422        }
423
424        // Don't read from stdin.
425        if url == Some("-") {
426            url = Some("./-");
427        }
428
429        if let Some(uri) = url.zip(base).and_then(|(url, base)| build_uri(url, base)) {
430            let Some(input_stream) = xml_load_external_entity(Some(&uri), id, &mut ctxt) else {
431                let sax = ctxt.sax.take();
432                return Err(sax);
433            };
434            ctxt.input_push(input_stream);
435
436            if ctxt.directory.is_none() {
437                if let Some(url) = url {
438                    if let Some(directory) = xml_parser_get_directory(url) {
439                        ctxt.directory = Some(directory.to_string_lossy().into_owned());
440                    }
441                }
442            }
443        } else {
444            let Some(input_stream) = xml_load_external_entity(url, id, &mut ctxt) else {
445                let sax = ctxt.sax.take();
446                return Err(sax);
447            };
448            ctxt.input_push(input_stream);
449
450            if ctxt.directory.is_none() {
451                if let Some(url) = url {
452                    if let Some(directory) = xml_parser_get_directory(url) {
453                        ctxt.directory = Some(directory.to_string_lossy().into_owned());
454                    }
455                }
456            }
457        }
458        Ok(ctxt)
459    }
460
461    /// Create a parser context for an external entity
462    ///
463    /// In original libxml2, automatic support for ZLIB/Compress compressed document is provided
464    /// by default if found at compile-time.  
465    /// However, this crate does not support currently.
466    ///
467    /// Returns the new parser context or NULL
468    #[doc(alias = "xmlCreateEntityParserCtxt")]
469    pub fn new_entity_parser(
470        url: Option<&str>,
471        id: Option<&str>,
472        base: Option<&str>,
473    ) -> Result<Self, Option<Box<XmlSAXHandler>>> {
474        Self::new_entity_parser_internal(None, None, url, id, base, None)
475    }
476
477    /// Initialize a SAX parser context
478    ///
479    /// Returns 0 in case of success and -1 in case of error
480    #[doc(alias = "xmlInitSAXParserCtxt")]
481    fn init_sax_parser(
482        &mut self,
483        sax: Option<Box<XmlSAXHandler>>,
484        user_data: Option<GenericErrorContext>,
485    ) -> Result<(), Option<Box<XmlSAXHandler>>> {
486        xml_init_parser();
487
488        if let Some(mut sax) = sax {
489            if sax.initialized != XML_SAX2_MAGIC as u32 {
490                // These fields won't used in SAX1 handling.
491                sax._private = AtomicPtr::new(null_mut());
492                sax.start_element_ns = None;
493                sax.end_element_ns = None;
494                sax.serror = None;
495            }
496            self.sax = Some(sax);
497            self.user_data = user_data;
498        } else {
499            let mut sax = XmlSAXHandler::default();
500            xml_sax_version(&mut sax, 2);
501            self.sax = Some(Box::new(sax));
502            self.user_data = None;
503        }
504
505        self.atts = vec![];
506        // Allocate the Input stack
507        self.input_tab.clear();
508        self.version = None;
509        self.encoding = None;
510        self.standalone = -1;
511        self.has_external_subset = false;
512        self.has_perefs = false;
513        self.html = 0;
514        self.external = 0;
515        self.instate = XmlParserInputState::XmlParserStart;
516        self.token = 0;
517        self.directory = None;
518
519        // Allocate the Node stack
520        self.node_tab.clear();
521        self.node = None;
522
523        // Allocate the Name stack
524        self.name_tab.clear();
525        self.name = None;
526
527        // Allocate the space stack
528        self.space_tab.clear();
529        self.space_tab.push(-1);
530
531        self.my_doc = None;
532        self.well_formed = true;
533        self.ns_well_formed = true;
534        self.valid = 1;
535        self.loadsubset = get_load_ext_dtd_default_value();
536        if self.loadsubset != 0 {
537            self.options |= XmlParserOption::XmlParseDTDLoad as i32;
538        }
539        self.validate = get_do_validity_checking_default_value();
540        self.pedantic = get_pedantic_parser_default_value();
541        if self.pedantic {
542            self.options |= XmlParserOption::XmlParsePedantic as i32;
543        }
544        self.linenumbers = get_line_numbers_default_value();
545        self.keep_blanks = get_keep_blanks_default_value();
546        if !self.keep_blanks {
547            if let Some(sax) = self.sax.as_deref_mut() {
548                sax.ignorable_whitespace = Some(xml_sax2_ignorable_whitespace);
549            }
550            self.options |= XmlParserOption::XmlParseNoBlanks as i32;
551        }
552
553        self.vctxt.flags = XML_VCTXT_USE_PCTXT as _;
554        self.vctxt.user_data = None;
555        self.vctxt.error = Some(parser_validity_error);
556        self.vctxt.warning = Some(parser_validity_warning);
557        if self.validate {
558            if get_get_warnings_default_value() == 0 {
559                self.vctxt.warning = None;
560            } else {
561                self.vctxt.warning = Some(parser_validity_warning);
562            }
563            self.vctxt.node_tab.clear();
564            self.options |= XmlParserOption::XmlParseDTDValid as i32;
565        }
566        self.replace_entities = get_substitute_entities_default_value();
567        if self.replace_entities {
568            self.options |= XmlParserOption::XmlParseNoEnt as i32;
569        }
570        self.record_info = false;
571        self.check_index = 0;
572        self.in_subset = 0;
573        self.err_no = XmlParserErrors::XmlErrOK as i32;
574        self.depth = 0;
575        self.charset = XmlCharEncoding::UTF8;
576        #[cfg(feature = "catalog")]
577        {
578            self.catalogs = None;
579        }
580        self.sizeentities = 0;
581        self.sizeentcopy = 0;
582        self.input_id = 1;
583        self.node_seq.clear();
584        Ok(())
585    }
586
587    pub fn encoding(&self) -> Option<&str> {
588        self.encoding.as_deref()
589    }
590
591    pub(crate) fn current_byte(&self) -> u8 {
592        *self.content_bytes().first().unwrap_or(&0)
593    }
594
595    pub(crate) fn nth_byte(&self, nth: usize) -> u8 {
596        *self.content_bytes().get(nth).unwrap_or(&0)
597    }
598
599    pub fn input(&self) -> Option<&XmlParserInput> {
600        self.input_tab.last()
601    }
602
603    pub fn input_mut(&mut self) -> Option<&mut XmlParserInput<'a>> {
604        self.input_tab.last_mut()
605    }
606
607    /// This function provides the current index of the parser relative
608    /// to the start of the current entity. This function is computed in
609    /// bytes from the beginning starting at zero and finishing at the
610    /// size in byte of the file if parsing a file. The function is
611    /// of constant cost if the input is UTF-8 but can be costly if run
612    /// on non-UTF-8 input.
613    ///
614    /// Returns the index in bytes from the beginning of the entity or -1
615    /// in case the index could not be computed.
616    #[doc(alias = "xmlByteConsumed")]
617    pub fn byte_consumed(&mut self) -> i64 {
618        let Some(input) = self.input() else {
619            return -1;
620        };
621        if input.buf.is_some() && input.buf.as_ref().unwrap().encoder.is_some() {
622            let mut unused = 0;
623            // Encoding conversion, compute the number of unused original
624            // bytes from the input not consumed and subtract that from
625            // the raw consumed value, this is not a cheap operation
626            if input.remainder_len() > 0 {
627                // The original code seems to continue processing as long as the write succeeds,
628                // even if encoding errors occur.
629                // However, the new API stops processing when an error occurs,
630                // so it is not possible to reproduce such a process ...
631                let mut out = [0u8; 32000];
632                let Ok(input) = from_utf8(self.content_bytes()) else {
633                    return -1;
634                };
635                let input = input.to_owned();
636                let handler = self
637                    .input_mut()
638                    .unwrap()
639                    .buf
640                    .as_mut()
641                    .unwrap()
642                    .encoder
643                    .as_mut()
644                    .unwrap();
645                let mut read = 0;
646                while read < input.len() {
647                    let Ok((r, w)) = handler.encode(&input[read..], &mut out) else {
648                        return -1;
649                    };
650                    unused += w;
651                    read += r;
652                }
653            }
654            let input = self.input().unwrap();
655            if input.buf.as_ref().unwrap().rawconsumed < unused as u64 {
656                return -1;
657            }
658            return (input.buf.as_ref().unwrap().rawconsumed - unused as u64) as i64;
659        }
660        input.consumed as i64 + input.offset_from_base() as i64
661    }
662
663    #[doc(alias = "xmlParserGrow")]
664    pub(crate) fn force_grow(&mut self) -> i32 {
665        // Don't grow push parser buffer.
666        if self.progressive {
667            return 0;
668        }
669
670        let input = self.input_mut().unwrap();
671        let cur_end = input.remainder_len();
672        let cur_base = input.offset_from_base();
673
674        let Some(buf) = input.buf.as_mut() else {
675            return 0;
676        };
677        // Don't grow memory buffers.
678        if buf.encoder.is_none() && buf.context.is_none() {
679            return 0;
680        }
681
682        if (cur_end > XML_MAX_LOOKUP_LIMIT || cur_base > XML_MAX_LOOKUP_LIMIT)
683            && self.options & XmlParserOption::XmlParseHuge as i32 == 0
684        {
685            xml_err_internal!(self, "Huge input lookup");
686            self.halt();
687            return -1;
688        }
689
690        if cur_end >= INPUT_CHUNK {
691            return 0;
692        }
693
694        let input = self.input_mut().unwrap();
695        let ret: i32 = input.buf.as_mut().unwrap().grow(INPUT_CHUNK);
696
697        // TODO: Get error code from xmlParserInputBufferGrow
698        if ret < 0 {
699            xml_err_internal!(self, "Growing input buffer");
700            self.halt();
701        }
702
703        ret
704    }
705
706    pub(crate) fn grow(&mut self) {
707        if !self.progressive && self.input().unwrap().remainder_len() < INPUT_CHUNK {
708            self.force_grow();
709        }
710    }
711
712    #[doc(alias = "xmlParserShrink")]
713    pub(crate) fn force_shrink(&mut self) {
714        let progressive = self.progressive;
715
716        let input = self.input_mut().unwrap();
717
718        // Don't shrink pull parser memory buffers.
719        let Some(buf) = input.buf.as_mut() else {
720            return;
721        };
722        if !progressive && buf.encoder.is_none() && buf.context.is_none() {
723            return;
724        }
725
726        // Do not shrink on large buffers whose only a tiny fraction was consumed
727        input.shrink();
728    }
729
730    pub(crate) fn shrink(&mut self) {
731        if !self.progressive
732            && self.input().unwrap().offset_from_base() > 2 * INPUT_CHUNK
733            && self.input().unwrap().remainder_len() < 2 * INPUT_CHUNK
734        {
735            self.force_shrink();
736        }
737    }
738
739    /// Blocks further parser processing don't override error.
740    #[doc(alias = "xmlHaltParser")]
741    pub(crate) fn halt(&mut self) {
742        self.instate = XmlParserInputState::XmlParserEOF;
743        self.disable_sax = true;
744        while self.input_tab.len() > 1 {
745            self.input_pop();
746        }
747        if let Some(input) = self.input_mut() {
748            // in case there was a specific allocation deallocate before overriding base
749            if input.buf.is_some() {
750                let _ = input.buf.take();
751            }
752            input.cur = 0;
753            input.length = 0;
754            input.base = 0;
755        }
756    }
757
758    /// Blocks further parser processing
759    #[doc(alias = "xmlStopParser")]
760    pub fn stop(&mut self) {
761        self.halt();
762        self.err_no = XmlParserErrors::XmlErrUserStop as i32;
763    }
764
765    /// Reset a parser context
766    #[doc(alias = "xmlCtxtReset")]
767    pub fn reset(&mut self) {
768        self.input_tab.clear();
769        self.space_tab.clear();
770        self.node_tab.clear();
771        self.node = None;
772        self.name_tab.clear();
773        self.name = None;
774        self.ns_tab.clear();
775        self.version = None;
776        self.encoding = None;
777        self.directory = None;
778        self.ext_sub_uri = None;
779        self.ext_sub_system = None;
780        if let Some(doc) = self.my_doc.take() {
781            unsafe {
782                xml_free_doc(doc);
783            }
784        }
785
786        self.standalone = -1;
787        self.has_external_subset = false;
788        self.has_perefs = false;
789        self.html = 0;
790        self.external = 0;
791        self.instate = XmlParserInputState::XmlParserStart;
792        self.token = 0;
793        self.well_formed = true;
794        self.ns_well_formed = true;
795        self.disable_sax = false;
796        self.valid = 1;
797        self.record_info = false;
798        self.check_index = 0;
799        self.end_check_state = 0;
800        self.in_subset = 0;
801        self.err_no = XmlParserErrors::XmlErrOK as i32;
802        self.depth = 0;
803        self.charset = XmlCharEncoding::UTF8;
804        #[cfg(feature = "catalog")]
805        {
806            self.catalogs = None;
807        }
808        self.sizeentities = 0;
809        self.sizeentcopy = 0;
810        self.node_seq.clear();
811        self.atts_default.clear();
812        self.atts_special.clear();
813
814        #[cfg(feature = "catalog")]
815        {
816            self.catalogs = None;
817        }
818        self.nb_errors = 0;
819        self.nb_warnings = 0;
820        if self.last_error.is_err() {
821            self.last_error.reset();
822        }
823    }
824
825    /// Reset a push parser context
826    ///
827    /// Returns 0 in case of success and 1 in case of error
828    #[doc(alias = "xmlCtxtResetPush")]
829    pub fn reset_push(
830        &mut self,
831        chunk: &[u8],
832        filename: Option<&str>,
833        encoding: Option<&str>,
834    ) -> i32 {
835        let enc = if encoding.is_none() && chunk.len() >= 4 {
836            detect_encoding(chunk)
837        } else {
838            XmlCharEncoding::None
839        };
840
841        let buf = XmlParserInputBuffer::new(enc);
842
843        self.reset();
844
845        if filename.is_none() {
846            self.directory = None;
847        } else if let Some(dir) = filename.and_then(xml_parser_get_directory) {
848            self.directory = Some(dir.to_string_lossy().into_owned());
849        }
850
851        let Some(mut input_stream) = XmlParserInput::new(Some(self)) else {
852            return 1;
853        };
854
855        input_stream.filename = filename
856            .map(canonic_path)
857            .map(|filanem| filanem.into_owned());
858        input_stream.buf = Some(buf);
859        input_stream.reset_base();
860
861        self.input_push(input_stream);
862
863        if !chunk.is_empty() && self.input().is_some() && self.input().unwrap().buf.is_some() {
864            self.input_mut()
865                .unwrap()
866                .buf
867                .as_mut()
868                .unwrap()
869                .push_bytes(chunk);
870        }
871
872        if let Some(encoding) = encoding {
873            self.encoding = Some(encoding.to_owned());
874            if let Some(handler) = find_encoding_handler(self.encoding().unwrap()) {
875                self.switch_to_encoding(handler);
876            } else {
877                xml_fatal_err_msg_str!(
878                    self,
879                    XmlParserErrors::XmlErrUnsupportedEncoding,
880                    "Unsupported encoding {}\n",
881                    encoding
882                );
883            };
884        } else if !matches!(enc, XmlCharEncoding::None) {
885            self.switch_encoding(enc);
886        }
887
888        0
889    }
890
891    /// Clear (release owned resources) and reinitialize a parser context
892    #[doc(alias = "xmlClearParserCtxt")]
893    pub fn clear(&mut self) {
894        self.node_seq.clear();
895        self.reset();
896    }
897
898    pub(crate) fn advance(&mut self, nth: usize) {
899        if self.content_bytes().len() < nth {
900            self.force_grow();
901        }
902        let input = self.input_mut().unwrap();
903        input.cur += nth;
904        input.col += nth as i32;
905    }
906
907    /// Advance the current pointer.  
908    /// If `'\n'` is found, line number is also increased.
909    pub(crate) fn advance_with_line_handling(&mut self, nth: usize) {
910        if self.content_bytes().len() < nth {
911            self.force_grow();
912        }
913        let input = self.input_mut().unwrap();
914        let content = &input.current_contents()[..nth];
915        let mut line = input.line;
916        let mut col = input.col;
917        let mut next = content.split(|b| b == &b'\n');
918        col += next.next().unwrap().len() as i32;
919        for cur in next {
920            line += 1;
921            col = cur.len() as i32 + 1;
922        }
923        input.line = line;
924        input.col = col;
925        input.cur += nth;
926    }
927
928    pub fn content_bytes(&self) -> &[u8] {
929        let input = self.input().unwrap();
930        input.current_contents()
931    }
932
933    /// Skip to the next character.
934    #[doc(alias = "xmlNextChar")]
935    pub(crate) fn skip_char(&mut self) {
936        if matches!(self.instate, XmlParserInputState::XmlParserEOF) || self.input().is_none() {
937            return;
938        }
939
940        let input = self.input().unwrap();
941        if input.cur > input.base_contents().len() {
942            xml_err_internal!(self, "Parser input data memory error\n");
943
944            self.err_no = XmlParserErrors::XmlErrInternalError as i32;
945            self.stop();
946
947            return;
948        }
949
950        if input.remainder_len() < INPUT_CHUNK {
951            if self.force_grow() < 0 {
952                return;
953            }
954            if self.content_bytes().is_empty() {
955                return;
956            }
957        }
958
959        let Some(c) = self.current_char() else {
960            return;
961        };
962        // 2.11 End-of-Line Handling
963        //   the literal two-character sequence "#xD#xA" or a standalone
964        //   literal #xD, an XML processor must pass to the application
965        //   the single character #xA.
966        let input = self.input_mut().unwrap();
967        if c == '\n' {
968            input.line += 1;
969            input.col = 1;
970        } else {
971            input.col += 1;
972        }
973        input.cur += c.len_utf8();
974    }
975
976    /// skip all blanks character found at that point in the input streams.  
977    /// It pops up finished entities in the process if allowable at that point.
978    ///
979    /// Returns the number of space chars skipped
980    #[doc(alias = "xmlSkipBlankChars")]
981    pub(crate) fn skip_blanks(&mut self) -> i32 {
982        let mut res = 0i32;
983
984        // It's Okay to use CUR/NEXT here since all the blanks are on the ASCII range.
985        if (self.input_tab.len() == 1 && !matches!(self.instate, XmlParserInputState::XmlParserDTD))
986            || matches!(self.instate, XmlParserInputState::XmlParserStart)
987        {
988            // if we are in the document content, go really fast
989            let input = self.input().unwrap();
990            let mut line = input.line;
991            let mut col = input.col;
992            self.force_grow();
993            let mut content = self.content_bytes();
994            while content.first().is_some_and(XmlCharValid::is_xml_blank_char) {
995                if content[0] == b'\n' {
996                    line += 1;
997                    col = 1;
998                } else {
999                    col += 1;
1000                }
1001                content = &content[1..];
1002                res = res.saturating_add(1);
1003                if content.is_empty() {
1004                    let len = self.content_bytes().len();
1005                    let input = self.input_mut().unwrap();
1006                    input.cur += len;
1007                    input.line = line;
1008                    input.col = col;
1009                    self.force_grow();
1010                    content = self.content_bytes();
1011                }
1012            }
1013
1014            let diff = self.content_bytes().len() - content.len();
1015            if diff > 0 {
1016                let input = self.input_mut().unwrap();
1017                input.cur += diff;
1018                input.line = line;
1019                input.col = col;
1020            }
1021        } else {
1022            let expand_pe = self.external != 0 || self.input_tab.len() != 1;
1023
1024            while !matches!(self.instate, XmlParserInputState::XmlParserEOF) {
1025                if self.current_byte().is_xml_blank_char() {
1026                    // CHECKED tstblanks.xml
1027                    self.skip_char();
1028                } else if self.current_byte() == b'%' {
1029                    // Need to handle support of entities branching here
1030                    if !expand_pe || self.nth_byte(1).is_xml_blank_char() || self.nth_byte(1) == 0 {
1031                        break;
1032                    }
1033                    self.parse_pe_reference();
1034                } else if self.current_byte() == 0 {
1035                    let mut consumed: u64;
1036
1037                    if self.input_tab.len() <= 1 {
1038                        break;
1039                    }
1040
1041                    consumed = self.input().unwrap().consumed;
1042                    consumed =
1043                        consumed.saturating_add(self.input().unwrap().offset_from_base() as u64);
1044
1045                    // Add to sizeentities when parsing an external entity for the first time.
1046                    // Is this `unwrap` OK ????
1047                    let mut ent = self.input().unwrap().entity.unwrap();
1048                    if matches!(ent.etype, XmlEntityType::XmlExternalParameterEntity)
1049                        && ent.flags & XML_ENT_PARSED as i32 == 0
1050                    {
1051                        ent.flags |= XML_ENT_PARSED as i32;
1052
1053                        self.sizeentities = self.sizeentities.saturating_add(consumed);
1054                    }
1055
1056                    self.parser_entity_check(consumed);
1057
1058                    self.pop_input();
1059                } else {
1060                    break;
1061                }
1062
1063                // Also increase the counter when entering or exiting a PERef.
1064                // The spec says: "When a parameter-entity reference is recognized
1065                // in the DTD and included, its replacement text MUST be enlarged
1066                // by the attachment of one leading and one following space (#x20) character."
1067                res = res.saturating_add(1);
1068            }
1069        }
1070        res
1071    }
1072
1073    /// The current c_char value, if using UTF-8 this may actually span multiple bytes
1074    /// in the input buffer.  
1075    ///
1076    /// Implement the end of line normalization:  
1077    ///
1078    /// 2.11 End-of-Line Handling  
1079    /// Wherever an external parsed entity or the literal entity value
1080    /// of an internal parsed entity contains either the literal two-character
1081    /// sequence "#xD#xA" or a standalone literal #xD, an XML processor
1082    /// must pass to the application the single character #xA.  
1083    /// This behavior can conveniently be produced by normalizing all
1084    /// line breaks to #xA on input, before parsing.
1085    ///
1086    /// Returns the current char value and its length
1087    #[doc(alias = "xmlCurrentChar")]
1088    pub(crate) fn current_char(&mut self) -> Option<char> {
1089        if matches!(self.instate, XmlParserInputState::XmlParserEOF) {
1090            return None;
1091        }
1092
1093        if self.input()?.remainder_len() < INPUT_CHUNK && self.force_grow() < 0 {
1094            return None;
1095        }
1096
1097        let cur_byte = self.current_byte();
1098        if (0x20..0x80).contains(&cur_byte) {
1099            return Some(cur_byte as char);
1100        }
1101
1102        if self.content_bytes().is_empty() {
1103            return None;
1104        }
1105
1106        let c = if cur_byte >= 0x80 {
1107            let input = self.input_mut().unwrap();
1108            if let Some(buf) = input.buf.as_ref() {
1109                if buf.encoder.is_some() {
1110                    unsafe {
1111                        // # Safety
1112                        // If `buf.encoder` is `Some`, `buf.buffer` is decoded by `buf.encoder`.
1113                        // Decoded buffer is already validated as UTF-8 byte sequence,
1114                        // so this function works well.
1115                        from_utf8_unchecked(&buf.buffer[input.cur..])
1116                            .chars()
1117                            .next()?
1118                    }
1119                } else if input.cur < input.valid_up_to {
1120                    unsafe {
1121                        // # Safety
1122                        // `buf.buffer[input.cur..input.valid_up_to]` is kept valid
1123                        // as a UTF-8 byte sequence.
1124                        from_utf8_unchecked(&buf.buffer[input.cur..input.valid_up_to])
1125                            .chars()
1126                            .next()?
1127                    }
1128                } else {
1129                    match from_utf8(&buf.buffer[input.cur..]) {
1130                        Ok(s) => {
1131                            input.valid_up_to = input.cur + s.len();
1132                            s.chars().next()?
1133                        }
1134                        Err(e) if e.valid_up_to() > 0 => {
1135                            input.valid_up_to = input.cur + e.valid_up_to();
1136                            let s = unsafe {
1137                                // # Safety
1138                                // Refer to the documents for `from_utf8_unchecked` and `Utf8Error`.
1139                                from_utf8_unchecked(&buf.buffer[input.cur..][..e.valid_up_to()])
1140                            };
1141                            s.chars().next().unwrap()
1142                        }
1143                        Err(e) => {
1144                            return match e.error_len() {
1145                                Some(_) => {
1146                                    // If we detect an UTF8 error that probably mean that the
1147                                    // input encoding didn't get properly advertised in the
1148                                    // declaration header. Report the error and switch the encoding
1149                                    // to ISO-Latin-1 (if you don't like this policy, just declare the encoding !)
1150                                    if input.remainder_len() < 4 {
1151                                        __xml_err_encoding!(
1152                                            self,
1153                                            XmlParserErrors::XmlErrInvalidChar,
1154                                            "Input is not proper UTF-8, indicate encoding !\n"
1155                                        );
1156                                    } else {
1157                                        let buffer = format!(
1158                                            "Bytes: 0x{:02X} 0x{:02X} 0x{:02X} 0x{:02X}\n",
1159                                            buf.buffer[input.cur],
1160                                            buf.buffer[input.cur + 1],
1161                                            buf.buffer[input.cur + 2],
1162                                            buf.buffer[input.cur + 3],
1163                                        );
1164                                        __xml_err_encoding!(
1165                                            self,
1166                                            XmlParserErrors::XmlErrInvalidChar,
1167                                            "Input is not proper UTF-8, indicate encoding !\n{}",
1168                                            buffer
1169                                        );
1170                                    }
1171                                    self.input_mut()
1172                                        .unwrap()
1173                                        .buf
1174                                        .as_mut()
1175                                        .unwrap()
1176                                        .fallback_to_iso_8859_1();
1177                                    self.charset = XmlCharEncoding::ISO8859_1;
1178                                    self.current_char()
1179                                }
1180                                None => None,
1181                            };
1182                        }
1183                    }
1184                }
1185            } else if let Some(content) = input
1186                .entity
1187                .as_deref()
1188                .and_then(|ent| ent.content.as_deref())
1189            {
1190                content[input.cur..].chars().next()?
1191            } else {
1192                return None;
1193            }
1194        } else {
1195            cur_byte as char
1196        };
1197
1198        if (c.len_utf8() > 1 && !c.is_xml_char()) || (c.len_utf8() == 1 && c == '\0') {
1199            xml_err_encoding_int!(
1200                self,
1201                XmlParserErrors::XmlErrInvalidChar,
1202                "Char 0x{:X} out of allowed range\n",
1203                c as i32
1204            );
1205        }
1206        if c == '\r' {
1207            if self.nth_byte(1) == b'\n' {
1208                let input = self.input_mut().unwrap();
1209                input.cur += 1;
1210            }
1211            return Some('\n');
1212        }
1213        Some(c)
1214    }
1215
1216    pub(super) fn consume_char_if(
1217        &mut self,
1218        mut f: impl FnMut(&Self, char) -> bool,
1219    ) -> Option<char> {
1220        let c = self.current_char()?;
1221        f(self, c).then(|| {
1222            let input = self.input_mut().unwrap();
1223            if c == '\n' {
1224                input.line += 1;
1225                input.col = 1;
1226            } else {
1227                input.col += 1;
1228            }
1229            input.cur += c.len_utf8();
1230            c
1231        })
1232    }
1233
1234    // Lookup the namespace name for the @prefix (which ca be NULL)
1235    //
1236    // Returns the namespace name or NULL if not bound
1237    #[doc(alias = "xmlGetNamespace")]
1238    pub(crate) fn get_namespace(&self, prefix: Option<&str>) -> Option<&str> {
1239        if prefix == self.str_xml.as_deref() {
1240            return self.str_xml_ns.as_deref();
1241        }
1242        for (pre, href) in self.ns_tab.iter().rev() {
1243            if pre.as_deref() == prefix {
1244                if prefix.is_none() && href.is_empty() {
1245                    return None;
1246                }
1247                return Some(href.as_str());
1248            }
1249        }
1250        None
1251    }
1252
1253    /// Pushes a new parser input on top of the input stack
1254    ///
1255    /// Returns -1 in case of error, the index in the stack otherwise
1256    #[doc(alias = "inputPush")]
1257    pub fn input_push(&mut self, value: XmlParserInput<'a>) -> usize {
1258        self.input_tab.push(value);
1259        self.input_tab.len() - 1
1260    }
1261
1262    /// Pops the top parser input from the input stack
1263    ///
1264    /// Returns the input just removed
1265    #[doc(alias = "inputPop")]
1266    pub fn input_pop(&mut self) -> Option<XmlParserInput<'a>> {
1267        self.input_tab.pop()
1268    }
1269
1270    /// Pushes a new element node on top of the node stack
1271    ///
1272    /// Returns -1 in case of error, the index in the stack otherwise
1273    #[doc(alias = "nodePush")]
1274    pub(crate) fn node_push(&mut self, value: XmlNodePtr) -> i32 {
1275        if self.node_tab.len() as u32 > XML_PARSER_MAX_DEPTH
1276            && self.options & XmlParserOption::XmlParseHuge as i32 == 0
1277        {
1278            let max_depth = XML_PARSER_MAX_DEPTH as i32;
1279            xml_fatal_err_msg_int!(
1280                self,
1281                XmlParserErrors::XmlErrInternalError,
1282                format!("Excessive depth in document: {max_depth} use XML_PARSE_HUGE option\n")
1283                    .as_str(),
1284                max_depth
1285            );
1286            self.halt();
1287            return -1;
1288        }
1289        self.node = Some(value);
1290        self.node_tab.push(value);
1291        self.node_tab.len() as i32 - 1
1292    }
1293
1294    /// Pops the top element node from the node stack
1295    ///
1296    /// Returns the node just removed
1297    #[doc(alias = "nodePop")]
1298    pub(crate) fn node_pop(&mut self) -> Option<XmlNodePtr> {
1299        let res = self.node_tab.pop();
1300        self.node = self.node_tab.last().cloned();
1301        res
1302    }
1303
1304    // /// Pushes a new element name on top of the name stack
1305    // ///
1306    // /// Returns -1 in case of error, the index in the stack otherwise
1307    // #[doc(alias = "namePush")]
1308    // pub(crate) fn name_push(&mut self, value: &str) -> i32 {
1309    //     self.name = Some(value.to_owned());
1310    //     self.name_tab.push(value.to_owned());
1311    //     self.name_tab.len() as i32 - 1
1312    // }
1313
1314    /// Pops the top element name from the name stack
1315    ///
1316    /// Returns the name just removed
1317    #[doc(alias = "namePop")]
1318    pub(crate) fn name_pop(&mut self) -> Option<Rc<str>> {
1319        let res = self.name_tab.pop();
1320        let name = self.name_tab.last().cloned();
1321        self.name = name;
1322        res
1323    }
1324
1325    #[doc(alias = "spacePush")]
1326    pub(crate) fn space_push(&mut self, val: i32) -> i32 {
1327        self.space_tab.push(val);
1328        self.space_tab.len() as i32 - 1
1329    }
1330
1331    #[doc(alias = "spacePop")]
1332    pub(crate) fn space_pop(&mut self) -> i32 {
1333        self.space_tab.pop().unwrap_or(-1)
1334    }
1335
1336    pub(crate) fn space(&self) -> i32 {
1337        *self.space_tab.last().unwrap_or(&-1)
1338    }
1339
1340    pub(crate) fn space_mut(&mut self) -> &mut i32 {
1341        self.space_tab.last_mut().expect("Internal Error")
1342    }
1343
1344    /// Pushes a new element name/prefix/URL on top of the name stack
1345    ///
1346    /// Returns -1 in case of error, the index in the stack otherwise
1347    #[doc(alias = "nameNsPush")]
1348    pub(crate) fn name_ns_push(
1349        &mut self,
1350        value: &str,
1351        prefix: Option<&str>,
1352        uri: Option<&str>,
1353        line: i32,
1354        ns_nr: i32,
1355    ) -> i32 {
1356        let name: Rc<str> = value.into();
1357        self.name = Some(name.clone());
1358        self.name_tab.push(name);
1359        self.push_tab
1360            .resize(self.name_tab.len(), XmlStartTag::default());
1361        let res = self.name_tab.len() - 1;
1362        self.push_tab[res].prefix = prefix.map(|pre| pre.into());
1363        self.push_tab[res].uri = uri.map(|uri| uri.into());
1364        self.push_tab[res].line = line;
1365        self.push_tab[res].ns_nr = ns_nr;
1366        res as i32
1367    }
1368
1369    /// Pops the top element/prefix/URI name from the name stack
1370    ///
1371    /// Returns the name just removed
1372    #[doc(alias = "nameNsPop")]
1373    #[cfg(feature = "libxml_push")]
1374    pub(crate) fn name_ns_pop(&mut self) -> Option<Rc<str>> {
1375        let res = self.name_tab.pop();
1376        self.name = self.name_tab.last().cloned();
1377        res
1378    }
1379
1380    /// Pushes a new parser namespace on top of the ns stack
1381    ///
1382    /// Returns -1 in case of error, -2 if the namespace should be discarded and the index in the stack otherwise.
1383    #[doc(alias = "nsPush")]
1384    pub(crate) fn ns_push(&mut self, prefix: Option<&str>, url: &str) -> i32 {
1385        if self.options & XmlParserOption::XmlParseNsClean as i32 != 0 {
1386            for (pre, href) in self.ns_tab.iter().rev() {
1387                if pre.as_deref() == prefix {
1388                    // in scope
1389                    if href.as_str() == url {
1390                        return -2;
1391                    }
1392                    // out of scope keep it
1393                    break;
1394                }
1395            }
1396        }
1397        self.ns_tab
1398            .push((prefix.map(|p| p.to_owned()), url.to_owned()));
1399        self.ns_tab.len() as i32
1400    }
1401
1402    /// Pops the top @nr parser prefix/namespace from the ns stack
1403    ///
1404    /// Returns the number of namespaces removed
1405    #[doc(alias = "nsPop")]
1406    pub(crate) fn ns_pop(&mut self, mut nr: usize) -> usize {
1407        if self.ns_tab.len() < nr {
1408            generic_error!("Pbm popping {} NS\n", nr);
1409            nr = self.ns_tab.len();
1410        }
1411        if self.ns_tab.is_empty() {
1412            return 0;
1413        }
1414        let rem = self.ns_tab.len() - nr;
1415        self.ns_tab.truncate(rem);
1416        nr
1417    }
1418
1419    /// Match to a new input stream which is stacked on top of the previous one(s).
1420    ///
1421    /// Returns -1 in case of error or the index in the input stack
1422    #[doc(alias = "xmlPushInput")]
1423    pub fn push_input(&mut self, input: XmlParserInput<'a>) -> Result<usize, XmlParserErrors> {
1424        if get_parser_debug_entities() != 0 {
1425            if self.input().is_some() && self.input().unwrap().filename.is_some() {
1426                generic_error!(
1427                    "{}({}): ",
1428                    self.input().unwrap().filename.as_ref().unwrap(),
1429                    self.input().unwrap().line
1430                );
1431            }
1432            let cur = match from_utf8(&input.base_contents()[input.cur..]) {
1433                Ok(s) => s,
1434                Err(e) if e.valid_up_to() > 0 => {
1435                    unsafe {
1436                        // # Safety
1437                        // Refer to the documents for `from_utf8_unchecked` and `Utf8Error`.
1438                        from_utf8_unchecked(
1439                            &input.base_contents()[input.cur..input.cur + e.valid_up_to()],
1440                        )
1441                    }
1442                }
1443                _ => "(Failed to read buffer)",
1444            };
1445            generic_error!("Pushing input {} : {}\n", self.input_tab.len() + 1, cur);
1446        }
1447        if (self.input_tab.len() > 40 && self.options & XmlParserOption::XmlParseHuge as i32 == 0)
1448            || self.input_tab.len() > 100
1449        {
1450            xml_fatal_err(self, XmlParserErrors::XmlErrEntityLoop, None);
1451            while self.input_tab.len() > 1 {
1452                self.input_pop();
1453            }
1454            return Err(XmlParserErrors::XmlErrEntityLoop);
1455        }
1456        let ret = self.input_push(input);
1457        if matches!(self.instate, XmlParserInputState::XmlParserEOF) {
1458            return Err(XmlParserErrors::XmlErrInternalError);
1459        }
1460        self.grow();
1461        Ok(ret)
1462    }
1463
1464    /// The current input pointed by self.input came to an end pop it and return the next c_char.
1465    ///
1466    /// Returns the current XmlChar in the parser context
1467    #[doc(alias = "xmlPopInput")]
1468    pub fn pop_input(&mut self) -> u8 {
1469        if self.input_tab.len() <= 1 {
1470            return 0;
1471        }
1472        if get_parser_debug_entities() != 0 {
1473            generic_error!("Popping input {}\n", self.input_tab.len());
1474        }
1475        if self.input_tab.len() > 1
1476            && self.in_subset == 0
1477            && !matches!(self.instate, XmlParserInputState::XmlParserEOF)
1478        {
1479            xml_fatal_err(
1480                self,
1481                XmlParserErrors::XmlErrInternalError,
1482                Some("Unfinished entity outside the DTD"),
1483            );
1484        }
1485        let input = self.input_pop().unwrap();
1486        if let Some(mut entity) = input.entity {
1487            entity.flags &= !XML_ENT_EXPANDING as i32;
1488        }
1489
1490        if self.current_byte() == 0 {
1491            self.force_grow();
1492        }
1493        self.current_byte()
1494    }
1495
1496    /// Do the SAX2 detection and specific initialization
1497    #[doc(alias = "xmlDetectSAX2")]
1498    pub(crate) fn detect_sax2(&mut self) {
1499        let sax = self.sax.as_deref();
1500        #[cfg(feature = "sax1")]
1501        {
1502            if sax.is_some_and(|sax| {
1503                sax.initialized == XML_SAX2_MAGIC as u32
1504                    && (sax.start_element_ns.is_some()
1505                        || sax.end_element_ns.is_some()
1506                        || (sax.start_element.is_none() && sax.end_element.is_none()))
1507            }) {
1508                self.sax2 = true;
1509            }
1510        }
1511        #[cfg(not(feature = "sax1"))]
1512        {
1513            self.sax2 = true;
1514        }
1515
1516        self.str_xml = Some(Cow::Borrowed("xml"));
1517        self.str_xmlns = Some(Cow::Borrowed("xmlns"));
1518        self.str_xml_ns = Some(Cow::Borrowed(XML_XML_NAMESPACE));
1519    }
1520
1521    /// Applies the options to the parser context
1522    ///
1523    /// Returns 0 in case of success, the set of unknown or unimplemented options in case of error.
1524    #[doc(alias = "xmlCtxtUseOptionsInternal")]
1525    pub(crate) fn use_options_internal(&mut self, mut options: i32, encoding: Option<&str>) -> i32 {
1526        if let Some(encoding) = encoding {
1527            self.encoding = Some(encoding.to_owned());
1528        }
1529        if options & XmlParserOption::XmlParseRecover as i32 != 0 {
1530            self.recovery = true;
1531            options -= XmlParserOption::XmlParseRecover as i32;
1532            self.options |= XmlParserOption::XmlParseRecover as i32;
1533        } else {
1534            self.recovery = false;
1535        }
1536        if options & XmlParserOption::XmlParseDTDLoad as i32 != 0 {
1537            self.loadsubset = XML_DETECT_IDS as i32;
1538            options -= XmlParserOption::XmlParseDTDLoad as i32;
1539            self.options |= XmlParserOption::XmlParseDTDLoad as i32;
1540        } else {
1541            self.loadsubset = 0;
1542        }
1543        if options & XmlParserOption::XmlParseDTDAttr as i32 != 0 {
1544            self.loadsubset |= XML_COMPLETE_ATTRS as i32;
1545            options -= XmlParserOption::XmlParseDTDAttr as i32;
1546            self.options |= XmlParserOption::XmlParseDTDAttr as i32;
1547        }
1548        if options & XmlParserOption::XmlParseNoEnt as i32 != 0 {
1549            self.replace_entities = true;
1550            // self.loadsubset |= XML_DETECT_IDS;
1551            options -= XmlParserOption::XmlParseNoEnt as i32;
1552            self.options |= XmlParserOption::XmlParseNoEnt as i32;
1553        } else {
1554            self.replace_entities = false;
1555        }
1556        if options & XmlParserOption::XmlParsePedantic as i32 != 0 {
1557            self.pedantic = true;
1558            options -= XmlParserOption::XmlParsePedantic as i32;
1559            self.options |= XmlParserOption::XmlParsePedantic as i32;
1560        } else {
1561            self.pedantic = false;
1562        }
1563        if options & XmlParserOption::XmlParseNoBlanks as i32 != 0 {
1564            self.keep_blanks = false;
1565            if let Some(sax) = self.sax.as_deref_mut() {
1566                sax.ignorable_whitespace = Some(xml_sax2_ignorable_whitespace);
1567            }
1568            options -= XmlParserOption::XmlParseNoBlanks as i32;
1569            self.options |= XmlParserOption::XmlParseNoBlanks as i32;
1570        } else {
1571            self.keep_blanks = true;
1572        }
1573        if options & XmlParserOption::XmlParseDTDValid as i32 != 0 {
1574            self.validate = true;
1575            if options & XmlParserOption::XmlParseNoWarning as i32 != 0 {
1576                self.vctxt.warning = None;
1577            }
1578            if options & XmlParserOption::XmlParseNoError as i32 != 0 {
1579                self.vctxt.error = None;
1580            }
1581            options -= XmlParserOption::XmlParseDTDValid as i32;
1582            self.options |= XmlParserOption::XmlParseDTDValid as i32;
1583        } else {
1584            self.validate = false;
1585        }
1586        if options & XmlParserOption::XmlParseNoWarning as i32 != 0 {
1587            if let Some(sax) = self.sax.as_deref_mut() {
1588                sax.warning = None;
1589            }
1590            options -= XmlParserOption::XmlParseNoWarning as i32;
1591        }
1592        if options & XmlParserOption::XmlParseNoError as i32 != 0 {
1593            if let Some(sax) = self.sax.as_deref_mut() {
1594                sax.error = None;
1595                sax.fatal_error = None;
1596            }
1597            options -= XmlParserOption::XmlParseNoError as i32;
1598        }
1599        #[cfg(feature = "sax1")]
1600        if options & XmlParserOption::XmlParseSAX1 as i32 != 0 {
1601            if let Some(sax) = self.sax.as_deref_mut() {
1602                sax.start_element = Some(xml_sax2_start_element);
1603                sax.end_element = Some(xml_sax2_end_element);
1604                sax.start_element_ns = None;
1605                sax.end_element_ns = None;
1606                sax.initialized = 1;
1607            }
1608            options -= XmlParserOption::XmlParseSAX1 as i32;
1609            self.options |= XmlParserOption::XmlParseSAX1 as i32;
1610        }
1611        if options & XmlParserOption::XmlParseNoCDATA as i32 != 0 {
1612            if let Some(sax) = self.sax.as_deref_mut() {
1613                sax.cdata_block = None;
1614            }
1615            options -= XmlParserOption::XmlParseNoCDATA as i32;
1616            self.options |= XmlParserOption::XmlParseNoCDATA as i32;
1617        }
1618        if options & XmlParserOption::XmlParseNsClean as i32 != 0 {
1619            self.options |= XmlParserOption::XmlParseNsClean as i32;
1620            options -= XmlParserOption::XmlParseNsClean as i32;
1621        }
1622        if options & XmlParserOption::XmlParseNoNet as i32 != 0 {
1623            self.options |= XmlParserOption::XmlParseNoNet as i32;
1624            options -= XmlParserOption::XmlParseNoNet as i32;
1625        }
1626        if options & XmlParserOption::XmlParseCompact as i32 != 0 {
1627            self.options |= XmlParserOption::XmlParseCompact as i32;
1628            options -= XmlParserOption::XmlParseCompact as i32;
1629        }
1630        if options & XmlParserOption::XmlParseOld10 as i32 != 0 {
1631            self.options |= XmlParserOption::XmlParseOld10 as i32;
1632            options -= XmlParserOption::XmlParseOld10 as i32;
1633        }
1634        if options & XmlParserOption::XmlParseNoBasefix as i32 != 0 {
1635            self.options |= XmlParserOption::XmlParseNoBasefix as i32;
1636            options -= XmlParserOption::XmlParseNoBasefix as i32;
1637        }
1638        if options & XmlParserOption::XmlParseHuge as i32 != 0 {
1639            self.options |= XmlParserOption::XmlParseHuge as i32;
1640            options -= XmlParserOption::XmlParseHuge as i32;
1641        }
1642        if options & XmlParserOption::XmlParseOldSAX as i32 != 0 {
1643            self.options |= XmlParserOption::XmlParseOldSAX as i32;
1644            options -= XmlParserOption::XmlParseOldSAX as i32;
1645        }
1646        if options & XmlParserOption::XmlParseIgnoreEnc as i32 != 0 {
1647            self.options |= XmlParserOption::XmlParseIgnoreEnc as i32;
1648            options -= XmlParserOption::XmlParseIgnoreEnc as i32;
1649        }
1650        if options & XmlParserOption::XmlParseBigLines as i32 != 0 {
1651            self.options |= XmlParserOption::XmlParseBigLines as i32;
1652            options -= XmlParserOption::XmlParseBigLines as i32;
1653        }
1654        self.linenumbers = 1;
1655        options
1656    }
1657
1658    /// Applies the options to the parser context
1659    ///
1660    /// Returns 0 in case of success, the set of unknown or unimplemented options
1661    /// in case of error.
1662    #[doc(alias = "xmlCtxtUseOptions")]
1663    pub fn use_options(&mut self, options: i32) -> i32 {
1664        self.use_options_internal(options, None)
1665    }
1666
1667    /// Common front-end for the xmlRead functions
1668    ///
1669    /// Returns the resulting document tree or NULL
1670    #[doc(alias = "xmlDoRead")]
1671    pub(crate) fn do_read(
1672        &mut self,
1673        url: Option<&str>,
1674        encoding: Option<&str>,
1675        options: i32,
1676    ) -> Option<XmlDocPtr> {
1677        self.use_options_internal(options, encoding);
1678        if let Some(encoding) = encoding {
1679            // TODO: We should consider to set XML_PARSE_IGNORE_ENC if the
1680            // caller provided an encoding. Otherwise, we might match to
1681            // the encoding from the XML declaration which is likely to
1682            // break things. Also see xmlSwitchInputEncoding.
1683            if let Some(handler) = find_encoding_handler(encoding) {
1684                self.switch_to_encoding(handler);
1685            }
1686        }
1687        if url.is_some() {
1688            if let Some(input) = self.input_mut().filter(|input| input.filename.is_none()) {
1689                input.filename = url.map(|u| u.to_owned());
1690            }
1691        }
1692        self.parse_document();
1693        if self.well_formed || self.recovery {
1694            self.my_doc.take()
1695        } else {
1696            if let Some(my_doc) = self.my_doc.take() {
1697                unsafe {
1698                    // # Safety
1699                    // `my_doc` is no longer used and not leaked to the out of this function.
1700                    // Therefore, this operation is safe.
1701                    xml_free_doc(my_doc);
1702                }
1703            }
1704            None
1705        }
1706    }
1707
1708    /// change the input functions when discovering the character encoding of a given entity.
1709    ///
1710    /// Returns 0 in case of success, -1 otherwise
1711    #[doc(alias = "xmlSwitchInputEncoding")]
1712    pub(crate) fn switch_input_encoding(
1713        &mut self,
1714        input: &mut XmlParserInput<'_>,
1715        handler: XmlCharEncodingHandler,
1716    ) -> i32 {
1717        if input.buf.as_mut().is_none() {
1718            xml_err_internal!(self, "static memory buffer doesn't support encoding\n");
1719            return -1;
1720        }
1721
1722        if input
1723            .buf
1724            .as_mut()
1725            .unwrap()
1726            .encoder
1727            .replace(handler)
1728            .is_some()
1729        {
1730            // Switching encodings during parsing is a really bad idea,
1731            // but Chromium can match between ISO-8859-1 and UTF-16 before
1732            // separate calls to xmlParseChunk.
1733            //
1734            // TODO: We should check whether the "raw" input buffer is empty and
1735            // convert the old content using the old encoder.
1736            return 0;
1737        }
1738
1739        self.charset = XmlCharEncoding::UTF8;
1740
1741        // Is there already some content down the pipe to convert ?
1742        if input.buf.as_mut().unwrap().buffer.is_empty() {
1743            return 0;
1744        }
1745        // FIXME: The BOM shouldn't be skipped here, but in the parsing code.
1746
1747        // Specific handling of the Byte Order Mark for UTF-16
1748        if matches!(
1749            input.buf.as_mut().unwrap().encoder.as_ref().unwrap().name(),
1750            "UTF-16LE" | "UTF-16"
1751        ) && input.base_contents()[input.cur] == 0xFF
1752            && input.base_contents()[input.cur + 1] == 0xFE
1753        {
1754            input.cur += 2;
1755        }
1756        if input.buf.as_mut().unwrap().encoder.as_ref().unwrap().name() == "UTF-16BE"
1757            && input.base_contents()[input.cur] == 0xFE
1758            && input.base_contents()[input.cur + 1] == 0xFF
1759        {
1760            input.cur += 2;
1761        }
1762        // Errata on XML-1.0 June 20 2001
1763        // Specific handling of the Byte Order Mark for UTF-8
1764        if input.buf.as_mut().unwrap().encoder.as_ref().unwrap().name() == "UTF-8"
1765            && input.base_contents()[input.cur] == 0xEF
1766            && input.base_contents()[input.cur + 1] == 0xBB
1767            && input.base_contents()[input.cur + 2] == 0xBF
1768        {
1769            input.cur += 3;
1770        }
1771
1772        // Shrink the current input buffer.
1773        // Move it as the raw buffer and create a new input buffer
1774        let processed = input.offset_from_base();
1775        input.buf.as_mut().unwrap().trim_head(processed);
1776        input.consumed += processed as u64;
1777        let input_buf = input.buf.as_mut().unwrap();
1778        let using = input_buf.buffer.len();
1779        input_buf.raw = take(&mut input_buf.buffer);
1780        input_buf.rawconsumed = processed as u64;
1781
1782        // TODO: We must flush and decode the whole buffer to make functions
1783        // like xmlReadMemory work with a user-provided encoding. If the
1784        // encoding is specified directly, we should probably set
1785        // XML_PARSE_IGNORE_ENC in xmlDoRead to avoid switching encodings
1786        // twice. Then we could set "flush" to false which should save
1787        // a considerable amount of memory when parsing from memory.
1788        // It's probably even possible to remove this whole if-block
1789        // completely.
1790        let res = input_buf.decode(true);
1791        input.reset_base();
1792        if res.is_err() {
1793            // TODO: This could be an out of memory or an encoding error.
1794            xml_err_internal!(self, "switching encoding: encoder error\n");
1795            self.halt();
1796            return -1;
1797        }
1798        let input_buf = input.buf.as_mut().unwrap();
1799        let consumed = using - input_buf.raw.len();
1800        let rawconsumed = input_buf.rawconsumed.saturating_add(consumed as u64);
1801        input_buf.rawconsumed = rawconsumed;
1802        0
1803    }
1804
1805    /// change the input functions when discovering the character encoding
1806    /// of a given entity.
1807    ///
1808    /// Returns 0 in case of success, -1 otherwise
1809    #[doc(alias = "xmlSwitchToEncoding")]
1810    pub fn switch_to_encoding(&mut self, handler: XmlCharEncodingHandler) -> i32 {
1811        let mut input = self.input_pop().unwrap();
1812        let res = self.switch_input_encoding(&mut input, handler);
1813        self.input_push(input);
1814        res
1815    }
1816
1817    /// Change the input functions when discovering the character encoding of a given entity.
1818    ///
1819    /// Returns 0 in case of success, -1 otherwise
1820    #[doc(alias = "xmlSwitchEncoding")]
1821    pub fn switch_encoding(&mut self, enc: XmlCharEncoding) -> i32 {
1822        // FIXME: The BOM shouldn't be skipped here, but in the parsing code.
1823        //
1824        // Note that we look for a decoded UTF-8 BOM when switching to UTF-16.
1825        // This is mostly useless but Webkit/Chromium relies on this behavior.
1826        // See https://bugs.chromium.org/p/chromium/issues/detail?id=1451026
1827        if self
1828            .input()
1829            .is_some_and(|input| input.consumed == 0 && input.offset_from_base() == 0)
1830            && matches!(
1831                enc,
1832                XmlCharEncoding::UTF8 | XmlCharEncoding::UTF16LE | XmlCharEncoding::UTF16BE
1833            )
1834        {
1835            // Errata on XML-1.0 June 20 2001
1836            // Specific handling of the Byte Order Mark for UTF-8
1837            if self.content_bytes().starts_with(&[0xEF, 0xBB, 0xBF]) {
1838                self.input_mut().unwrap().cur += 3;
1839            }
1840        }
1841
1842        let Some(handler) = (match enc {
1843            XmlCharEncoding::Error => {
1844                __xml_err_encoding!(
1845                    self,
1846                    XmlParserErrors::XmlErrUnknownEncoding,
1847                    "encoding unknown\n"
1848                );
1849                return -1;
1850            }
1851            XmlCharEncoding::None => {
1852                // let's assume it's UTF-8 without the XML decl
1853                self.charset = XmlCharEncoding::UTF8;
1854                return 0;
1855            }
1856            XmlCharEncoding::UTF8 => {
1857                // default encoding, no conversion should be needed
1858                self.charset = XmlCharEncoding::UTF8;
1859                return 0;
1860            }
1861            XmlCharEncoding::EBCDIC => self.input().unwrap().detect_ebcdic(),
1862            _ => get_encoding_handler(enc),
1863        }) else {
1864            // Default handlers.
1865            match enc {
1866                XmlCharEncoding::ASCII => {
1867                    // default encoding, no conversion should be needed
1868                    self.charset = XmlCharEncoding::UTF8;
1869                    return 0;
1870                }
1871                XmlCharEncoding::ISO8859_1 => {
1872                    if self.input_tab.len() == 1
1873                        && self.encoding.is_none()
1874                        && self.input().is_some_and(|input| input.encoding.is_some())
1875                    {
1876                        self.encoding = self.input().unwrap().encoding.clone();
1877                    }
1878                    self.charset = enc;
1879                    return 0;
1880                }
1881                _ => {
1882                    let name = enc.get_name().unwrap_or("");
1883                    __xml_err_encoding!(
1884                        self,
1885                        XmlParserErrors::XmlErrUnsupportedEncoding,
1886                        "encoding not supported: {}\n",
1887                        name
1888                    );
1889                    // TODO: We could recover from errors in external entities
1890                    // if we didn't stop the parser. But most callers of this
1891                    // function don't check the return value.
1892                    self.stop();
1893                    return -1;
1894                }
1895            }
1896        };
1897        let mut input = self.input_pop().unwrap();
1898        let ret: i32 = self.switch_input_encoding(&mut input, handler);
1899        self.input_push(input);
1900        if ret < 0 || self.err_no == XmlParserErrors::XmlI18NConvFailed as i32 {
1901            // on encoding conversion errors, stop the parser
1902            self.stop();
1903            self.err_no = XmlParserErrors::XmlI18NConvFailed as i32;
1904        }
1905        ret
1906    }
1907}
1908
1909impl Default for XmlParserCtxt<'_> {
1910    fn default() -> Self {
1911        Self {
1912            sax: None,
1913            user_data: None,
1914            my_doc: None,
1915            well_formed: true,
1916            replace_entities: get_substitute_entities_default_value(),
1917            version: None,
1918            encoding: None,
1919            standalone: 0,
1920            html: 0,
1921            input_tab: vec![],
1922            node: None,
1923            node_tab: vec![],
1924            record_info: false,
1925            node_seq: XmlParserNodeInfoSeq::default(),
1926            err_no: 0,
1927            has_external_subset: false,
1928            has_perefs: false,
1929            external: 0,
1930            valid: 0,
1931            validate: get_do_validity_checking_default_value(),
1932            vctxt: XmlValidCtxt::default(),
1933            instate: XmlParserInputState::default(),
1934            token: 0,
1935            directory: None,
1936            name: None,
1937            name_tab: vec![],
1938            check_index: 0,
1939            keep_blanks: get_keep_blanks_default_value(),
1940            disable_sax: false,
1941            in_subset: 0,
1942            int_sub_name: None,
1943            ext_sub_uri: None,
1944            ext_sub_system: None,
1945            space_tab: vec![],
1946            depth: 0,
1947            charset: XmlCharEncoding::None,
1948            pedantic: get_pedantic_parser_default_value(),
1949            _private: null_mut(),
1950            loadsubset: 0,
1951            linenumbers: 0,
1952            #[cfg(feature = "catalog")]
1953            catalogs: None,
1954            recovery: false,
1955            progressive: false,
1956            atts: vec![],
1957            str_xml: None,
1958            str_xml_ns: None,
1959            str_xmlns: None,
1960            sax2: false,
1961            ns_tab: vec![],
1962            push_tab: vec![],
1963            atts_default: HashMap::new(),
1964            atts_special: HashMap::new(),
1965            ns_well_formed: true,
1966            options: 0,
1967            free_elems_nr: 0,
1968            free_elems: None,
1969            free_attrs_nr: 0,
1970            free_attrs: None,
1971            last_error: XmlError::default(),
1972            parse_mode: XmlParserMode::default(),
1973            sizeentities: 0,
1974            node_info_tab: vec![],
1975            input_id: 0,
1976            sizeentcopy: 0,
1977            end_check_state: 0,
1978            nb_errors: 0,
1979            nb_warnings: 0,
1980        }
1981    }
1982}
1983
1984impl Drop for XmlParserCtxt<'_> {
1985    /// Free all the memory used by a parser context. However the parsed
1986    /// document in self.myDoc is not freed.
1987    #[doc(alias = "xmlFreeParserCtxt")]
1988    fn drop(&mut self) {
1989        unsafe {
1990            let mut cur = self.free_elems;
1991            while let Some(now) = cur {
1992                let next = now.next.map(|node| XmlNodePtr::try_from(node).unwrap());
1993                now.free();
1994                cur = next;
1995            }
1996            if let Some(attrs) = self.free_attrs.take() {
1997                let mut cur = Some(attrs);
1998                while let Some(now) = cur {
1999                    let next = now.next;
2000                    now.free();
2001                    cur = next;
2002                }
2003            }
2004        }
2005    }
2006}