pub mod dom;
pub mod error;
pub mod namespace;
pub mod parser;
mod simd;
pub mod writer;
pub mod xpath;
pub mod xsd;
pub mod xsd_regex;
pub use dom::{
Attribute, ChildrenIter, Document, Element, NodeId, NodeKind, ProcessingInstruction, QName,
XmlDeclaration, XmlWriteOptions,
};
pub use error::{
NamespaceError, ParseError, ValidationError, WellFormednessError, XPathError, XmlError,
XmlResult,
};
pub use namespace::NamespaceResolver;
pub use parser::Parser;
pub use writer::XmlWriter;
pub use xpath::{XPathEvaluator, XPathValue};
pub use xsd::{XsdValidator, XSI_NAMESPACE, XS_NAMESPACE};
pub use xsd_regex::XsdRegex;
pub fn parse(input: &str) -> XmlResult<Document<'_>> {
let parser = Parser::new();
parser.parse(input)
}
pub fn parse_bytes(input: &[u8]) -> XmlResult<Document<'static>> {
let text = decode_xml_bytes(input)?;
let doc = Parser::new().parse(&text)?;
Ok(doc.into_static())
}
fn decode_xml_bytes(input: &[u8]) -> XmlResult<String> {
if input.len() < 2 {
return String::from_utf8(input.to_vec())
.map_err(|e| XmlError::well_formedness(format!("Invalid UTF-8: {}", e), 1, 1));
}
if input[0] == 0xFF && input[1] == 0xFE {
return decode_utf16_le(&input[2..]);
}
if input[0] == 0xFE && input[1] == 0xFF {
return decode_utf16_be(&input[2..]);
}
if input.len() >= 3 && input[0] == 0xEF && input[1] == 0xBB && input[2] == 0xBF {
return String::from_utf8(input[3..].to_vec())
.map_err(|e| XmlError::well_formedness(format!("Invalid UTF-8: {}", e), 1, 1));
}
if input[0] == 0x00 && input[1] == 0x3C {
return decode_utf16_be(input);
}
if input[0] == 0x3C && input[1] == 0x00 {
return decode_utf16_le(input);
}
String::from_utf8(input.to_vec())
.map_err(|e| XmlError::well_formedness(format!("Invalid UTF-8: {}", e), 1, 1))
}
fn decode_utf16_le(bytes: &[u8]) -> XmlResult<String> {
let code_units: Vec<u16> = bytes
.chunks(2)
.filter(|chunk| chunk.len() == 2)
.map(|chunk| u16::from_le_bytes([chunk[0], chunk[1]]))
.collect();
String::from_utf16(&code_units)
.map_err(|e| XmlError::well_formedness(format!("Invalid UTF-16 LE: {}", e), 1, 1))
}
fn decode_utf16_be(bytes: &[u8]) -> XmlResult<String> {
let code_units: Vec<u16> = bytes
.chunks(2)
.filter(|chunk| chunk.len() == 2)
.map(|chunk| u16::from_be_bytes([chunk[0], chunk[1]]))
.collect();
String::from_utf16(&code_units)
.map_err(|e| XmlError::well_formedness(format!("Invalid UTF-16 BE: {}", e), 1, 1))
}