mod context;
mod error;
mod input;
mod legacy;
mod loader;
mod node_info;
mod parse;
#[cfg(feature = "libxml_push")]
mod push;
mod qname;
mod sax;
mod valid;
use std::{
io::Read,
sync::atomic::{AtomicBool, Ordering},
};
use crate::{
chvalid::XmlCharValid,
encoding::XmlCharEncoding,
io::{
XmlParserInputBuffer, cleanup_input_callbacks, cleanup_output_callbacks,
register_default_input_callbacks, register_default_output_callbacks,
},
libxml::{
catalog::xml_catalog_cleanup,
threads::{
__xml_global_init_mutex_lock, __xml_global_init_mutex_unlock,
xml_cleanup_threads_internal, xml_init_threads_internal,
},
xmlmemory::{xml_cleanup_memory_internal, xml_init_memory_internal},
xmlschemastypes::xml_schema_cleanup_types,
},
relaxng::xml_relaxng_cleanup_types,
tree::XmlDocPtr,
};
pub use context::*;
pub(crate) use error::*;
pub use input::*;
pub use legacy::*;
pub use loader::*;
pub use node_info::*;
pub use parse::*;
pub use qname::*;
pub use sax::*;
pub(crate) const XML_SUBSTITUTE_REF: usize = 1;
pub(crate) const XML_SUBSTITUTE_PEREF: usize = 2;
pub const XML_DETECT_IDS: usize = 2;
pub const XML_COMPLETE_ATTRS: usize = 4;
pub const XML_SKIP_IDS: usize = 8;
#[doc(alias = "xmlParserMaxDepth")]
pub const XML_PARSER_MAX_DEPTH: u32 = 256;
pub const XML_MAX_TEXT_LENGTH: usize = 10000000;
pub const XML_MAX_HUGE_LENGTH: usize = 1000000000;
pub const XML_MAX_NAME_LENGTH: usize = 50000;
pub const XML_MAX_DICTIONARY_LIMIT: usize = 10000000;
pub const XML_MAX_LOOKUP_LIMIT: usize = 10000000;
pub const XML_MAX_NAMELEN: usize = 100;
pub const INPUT_CHUNK: usize = 250;
pub(crate) const LINE_LEN: usize = 80;
pub static XML_STRING_TEXT: &str = "text";
pub static XML_STRING_TEXT_NOENC: &str = "textnoenc";
pub static XML_STRING_COMMENT: &str = "comment";
pub(crate) const XML_VCTXT_DTD_VALIDATED: usize = 1usize << 0;
pub(crate) const XML_VCTXT_USE_PCTXT: usize = 1usize << 1;
pub(crate) const XML_PARSER_BIG_BUFFER_SIZE: usize = 300;
pub(crate) const XML_PARSER_NON_LINEAR: usize = 5;
pub(crate) const XML_PARSER_ALLOWED_EXPANSION: usize = 1000000;
pub(crate) const XML_ENT_FIXED_COST: usize = 20;
pub(crate) trait XmlParserCharValid {
fn is_name_char(&self, ctxt: &XmlParserCtxt) -> bool;
fn is_name_start_char(&self, ctxt: &XmlParserCtxt) -> bool;
}
impl XmlParserCharValid for u8 {
fn is_name_char(&self, ctxt: &XmlParserCtxt) -> bool {
let c = *self;
if ctxt.options & XmlParserOption::XmlParseOld10 as i32 == 0 {
c != b' '
&& c != b'>'
&& c != b'/'
&& (c.is_ascii_lowercase()
|| c.is_ascii_uppercase()
|| c.is_ascii_digit()
|| c == b'_'
|| c == b':'
|| c == b'-'
|| c == b'.'
|| c == 0xB7
|| (0xC0..=0xD6).contains(&c)
|| (0xD8..=0xF6).contains(&c)
|| (0xF8..=0xFF).contains(&c))
} else {
xml_is_letter(c as u32)
|| c.is_xml_digit()
|| c == b'.'
|| c == b'-'
|| c == b'_'
|| c == b':'
|| c.is_xml_combining()
|| c.is_xml_extender()
}
}
fn is_name_start_char(&self, ctxt: &XmlParserCtxt) -> bool {
let c = *self;
if ctxt.options & XmlParserOption::XmlParseOld10 as i32 == 0 {
c != b' '
&& c != b'>'
&& c != b'/'
&& (c.is_ascii_lowercase()
|| c.is_ascii_uppercase()
|| c == b'_'
|| c == b':'
|| (0xC0..=0xD6).contains(&c)
|| (0xD8..=0xF6).contains(&c)
|| (0xF8..=0xFF).contains(&c))
} else {
xml_is_letter(c as u32) || c == b'_' || c == b':'
}
}
}
impl XmlParserCharValid for u32 {
fn is_name_char(&self, ctxt: &XmlParserCtxt) -> bool {
let c = *self;
if ctxt.options & XmlParserOption::XmlParseOld10 as i32 == 0 {
c != b' ' as u32
&& c != b'>' as u32
&& c != b'/' as u32
&& ((c >= b'a' as u32 && c <= b'z' as u32)
|| (c >= b'A' as u32 && c <= b'Z' as u32)
|| (c >= b'0' as u32 && c <= b'9' as u32)
|| c == b'_' as u32
|| c == b':' as u32
|| c == b'-' as u32
|| c == b'.' as u32
|| c == 0xB7
|| (0xC0..=0xD6).contains(&c)
|| (0xD8..=0xF6).contains(&c)
|| (0xF8..=0x2FF).contains(&c)
|| (0x300..=0x36F).contains(&c)
|| (0x370..=0x37D).contains(&c)
|| (0x37F..=0x1FFF).contains(&c)
|| (0x200C..=0x200D).contains(&c)
|| (0x203F..=0x2040).contains(&c)
|| (0x2070..=0x218F).contains(&c)
|| (0x2C00..=0x2FEF).contains(&c)
|| (0x3001..=0xD7FF).contains(&c)
|| (0xF900..=0xFDCF).contains(&c)
|| (0xFDF0..=0xFFFD).contains(&c)
|| (0x10000..=0xEFFFF).contains(&c))
} else {
xml_is_letter(c)
|| c.is_xml_digit()
|| c == b'.' as u32
|| c == b'-' as u32
|| c == b'_' as u32
|| c == b':' as u32
|| c.is_xml_combining()
|| c.is_xml_extender()
}
}
fn is_name_start_char(&self, ctxt: &XmlParserCtxt) -> bool {
let c = *self;
if ctxt.options & XmlParserOption::XmlParseOld10 as i32 == 0 {
c != b' ' as u32
&& c != b'>' as u32
&& c != b'/' as u32
&& ((c >= b'a' as u32 && c <= b'z' as u32)
|| (c >= b'A' as u32 && c <= b'Z' as u32)
|| c == b'_' as u32
|| c == b':' as u32
|| (0xC0..=0xD6).contains(&c)
|| (0xD8..=0xF6).contains(&c)
|| (0xF8..=0x2FF).contains(&c)
|| (0x370..=0x37D).contains(&c)
|| (0x37F..=0x1FFF).contains(&c)
|| (0x200C..=0x200D).contains(&c)
|| (0x2070..=0x218F).contains(&c)
|| (0x2C00..=0x2FEF).contains(&c)
|| (0x3001..=0xD7FF).contains(&c)
|| (0xF900..=0xFDCF).contains(&c)
|| (0xFDF0..=0xFFFD).contains(&c)
|| (0x10000..=0xEFFFF).contains(&c))
} else {
xml_is_letter(c) || c == b'_' as u32 || c == b':' as u32
}
}
}
impl XmlParserCharValid for char {
fn is_name_char(&self, ctxt: &XmlParserCtxt) -> bool {
(*self as u32).is_name_char(ctxt)
}
fn is_name_start_char(&self, ctxt: &XmlParserCtxt) -> bool {
(*self as u32).is_name_start_char(ctxt)
}
}
#[doc(alias = "xmlIsLetter")]
pub fn xml_is_letter(c: u32) -> bool {
c.is_xml_base_char() || c.is_xml_ideographic()
}
#[doc(alias = "xmlCheckLanguageID")]
pub(crate) fn check_language_id(lang: &str) -> bool {
let mut cur = lang;
if cur.starts_with("i-")
|| cur.starts_with("I-")
|| cur.starts_with("x-")
|| cur.starts_with("X-")
{
cur = &cur[2..];
cur = cur.trim_start_matches(|c: char| c.is_ascii_alphabetic());
return cur.is_empty();
}
let nxt = cur.trim_start_matches(|c: char| c.is_ascii_alphabetic());
if cur.len() - nxt.len() >= 4 {
return cur.len() - nxt.len() <= 8 && nxt.is_empty();
}
if cur.len() - nxt.len() < 2 {
return false;
}
if nxt.is_empty() {
return true;
}
let Some(mut nxt) = nxt.strip_prefix('-') else {
return false;
};
cur = nxt;
'region_m49: {
if nxt.starts_with(|c: char| c.is_ascii_digit()) {
break 'region_m49;
}
nxt = nxt.trim_start_matches(|c: char| c.is_ascii_alphabetic());
'variant: {
'region: {
'script: {
match cur.len() - nxt.len() {
4 => break 'script,
2 => break 'region,
5..=8 => break 'variant,
3 => {}
_ => return false,
}
if nxt.is_empty() {
return true;
}
let Some(rem) = nxt.strip_prefix('-') else {
return false;
};
nxt = rem;
cur = nxt;
if nxt.starts_with(|c: char| c.is_ascii_digit()) {
break 'region_m49;
}
nxt = nxt.trim_start_matches(|c: char| c.is_ascii_alphabetic());
match cur.len() - nxt.len() {
2 => break 'region,
5..=8 => break 'variant,
4 => {}
_ => return false,
}
}
if nxt.is_empty() {
return true;
}
let Some(rem) = nxt.strip_prefix('-') else {
return false;
};
nxt = rem;
cur = nxt;
if nxt.starts_with(|c: char| c.is_ascii_digit()) {
break 'region_m49;
}
nxt = nxt.trim_start_matches(|c: char| c.is_ascii_alphabetic());
match cur.len() - nxt.len() {
5..=8 => break 'variant,
2 => {}
_ => return false,
}
}
if nxt.is_empty() {
return true;
}
let Some(rem) = nxt.strip_prefix('-') else {
return false;
};
nxt = rem;
cur = nxt;
nxt = nxt.trim_start_matches(|c: char| c.is_ascii_alphabetic());
match cur.len() - nxt.len() {
5..=8 => {}
_ => return false,
}
}
return nxt.is_empty() || nxt.starts_with('-');
}
if nxt.len() >= 3 && nxt.as_bytes()[1].is_ascii_digit() && nxt.as_bytes()[2].is_ascii_digit() {
nxt = &nxt[3..];
if nxt.is_empty() {
return true;
}
let Some(rem) = nxt.strip_prefix('-') else {
return false;
};
nxt = rem;
cur = nxt;
nxt = nxt.trim_start_matches(|c: char| c.is_ascii_alphabetic());
match cur.len() - nxt.len() {
5..=8 => {}
_ => return false,
}
return nxt.is_empty() || nxt.starts_with('-');
}
false
}
#[doc(alias = "xmlReadDoc")]
pub fn xml_read_doc(
cur: &[u8],
url: Option<&str>,
encoding: Option<&str>,
options: i32,
) -> Option<XmlDocPtr> {
xml_init_parser();
XmlParserCtxt::from_memory(cur)?.do_read(url, encoding, options)
}
#[doc(alias = "xmlReadFile")]
pub fn xml_read_file(filename: &str, encoding: Option<&str>, options: i32) -> Option<XmlDocPtr> {
xml_init_parser();
XmlParserCtxt::from_filename_with_options(Some(filename), options)?
.do_read(None, encoding, options)
}
#[doc(alias = "xmlReadMemory")]
pub fn xml_read_memory(
buffer: &[u8],
url: Option<&str>,
encoding: Option<&str>,
options: i32,
) -> Option<XmlDocPtr> {
xml_init_parser();
XmlParserCtxt::from_memory(buffer)?.do_read(url, encoding, options)
}
#[doc(alias = "xmlReadIO")]
pub fn xml_read_io<'a>(
ioctx: impl Read + 'a,
url: Option<&str>,
encoding: Option<&str>,
options: i32,
) -> Option<XmlDocPtr> {
xml_init_parser();
let input = XmlParserInputBuffer::from_reader(ioctx, XmlCharEncoding::None);
let mut ctxt = XmlParserCtxt::new()?;
let stream = XmlParserInput::from_io(&mut ctxt, input, XmlCharEncoding::None)?;
ctxt.input_push(stream);
ctxt.do_read(url, encoding, options)
}
#[doc(alias = "xmlCtxtReadDoc")]
pub fn xml_ctxt_read_doc<'a>(
ctxt: &mut XmlParserCtxt<'a>,
cur: &'a [u8],
url: Option<&str>,
encoding: Option<&str>,
options: i32,
) -> Option<XmlDocPtr> {
xml_ctxt_read_memory(ctxt, cur, url, encoding, options)
}
#[doc(alias = "xmlCtxtReadFile")]
pub fn xml_ctxt_read_file(
ctxt: &mut XmlParserCtxt,
filename: &str,
encoding: Option<&str>,
options: i32,
) -> Option<XmlDocPtr> {
xml_init_parser();
ctxt.reset();
let stream = xml_load_external_entity(Some(filename), None, ctxt)?;
ctxt.input_push(stream);
ctxt.do_read(None, encoding, options)
}
#[doc(alias = "xmlCtxtReadMemory")]
pub fn xml_ctxt_read_memory<'a>(
ctxt: &mut XmlParserCtxt<'a>,
buffer: &'a [u8],
url: Option<&str>,
encoding: Option<&str>,
options: i32,
) -> Option<XmlDocPtr> {
xml_init_parser();
ctxt.reset();
let input = XmlParserInputBuffer::from_memory(buffer, XmlCharEncoding::None)?;
let stream = XmlParserInput::from_io(ctxt, input, XmlCharEncoding::None)?;
ctxt.input_push(stream);
ctxt.do_read(url, encoding, options)
}
#[doc(alias = "xmlCtxtReadIO")]
pub fn xml_ctxt_read_io<'a>(
ctxt: &mut XmlParserCtxt<'a>,
ioctx: impl Read + 'a,
url: Option<&str>,
encoding: Option<&str>,
options: i32,
) -> Option<XmlDocPtr> {
xml_init_parser();
ctxt.reset();
let input = XmlParserInputBuffer::from_reader(ioctx, XmlCharEncoding::None);
let stream = XmlParserInput::from_io(ctxt, input, XmlCharEncoding::None)?;
ctxt.input_push(stream);
ctxt.do_read(url, encoding, options)
}
static XML_PARSER_INITIALIZED: AtomicBool = AtomicBool::new(false);
#[doc(alias = "xmlInitParser")]
pub fn xml_init_parser() {
unsafe {
if XML_PARSER_INITIALIZED.load(Ordering::Acquire) {
return;
}
__xml_global_init_mutex_lock();
if !XML_PARSER_INITIALIZED.load(Ordering::Acquire) {
xml_init_threads_internal();
xml_init_memory_internal();
register_default_input_callbacks();
#[cfg(feature = "libxml_output")]
{
register_default_output_callbacks();
}
XML_PARSER_INITIALIZED.store(true, Ordering::Release);
}
__xml_global_init_mutex_unlock();
}
}
#[doc(alias = "xmlCleanupParser")]
pub fn xml_cleanup_parser() {
unsafe {
if !XML_PARSER_INITIALIZED.load(Ordering::Acquire) {
return;
}
#[cfg(feature = "catalog")]
{
xml_catalog_cleanup();
}
cleanup_input_callbacks();
#[cfg(feature = "libxml_output")]
{
cleanup_output_callbacks();
}
#[cfg(feature = "schema")]
{
xml_schema_cleanup_types();
xml_relaxng_cleanup_types();
}
xml_cleanup_threads_internal();
xml_cleanup_memory_internal();
XML_PARSER_INITIALIZED.store(false, Ordering::Release);
}
}