use crate::error::{ErrorSeverity, ParseDiagnostic, ParseError};
use crate::parser::input::{
parse_cdata_content, parse_comment_content, parse_pi_content, parse_xml_decl, split_name,
NamespaceResolver, ParserInput,
};
use crate::parser::ParseOptions;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum XmlNodeType {
None,
Element,
EndElement,
Text,
CData,
Comment,
ProcessingInstruction,
XmlDeclaration,
DocumentType,
Whitespace,
Attribute,
EndDocument,
}
impl std::fmt::Display for XmlNodeType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::None => write!(f, "None"),
Self::Element => write!(f, "Element"),
Self::EndElement => write!(f, "EndElement"),
Self::Text => write!(f, "Text"),
Self::CData => write!(f, "CData"),
Self::Comment => write!(f, "Comment"),
Self::ProcessingInstruction => write!(f, "ProcessingInstruction"),
Self::XmlDeclaration => write!(f, "XmlDeclaration"),
Self::DocumentType => write!(f, "DocumentType"),
Self::Whitespace => write!(f, "Whitespace"),
Self::Attribute => write!(f, "Attribute"),
Self::EndDocument => write!(f, "EndDocument"),
}
}
}
#[derive(Debug, Clone)]
struct ReaderAttribute {
local_name: String,
value: String,
prefix: Option<String>,
namespace_uri: Option<String>,
}
#[derive(Debug, Clone)]
struct ReaderNode {
node_type: XmlNodeType,
local_name: String,
prefix: Option<String>,
namespace_uri: Option<String>,
value: Option<String>,
depth: u32,
is_empty_element: bool,
attributes: Vec<ReaderAttribute>,
}
impl ReaderNode {
fn new(node_type: XmlNodeType) -> Self {
Self {
node_type,
local_name: String::new(),
prefix: None,
namespace_uri: None,
value: None,
depth: 0,
is_empty_element: false,
attributes: Vec::new(),
}
}
}
#[allow(clippy::struct_excessive_bools)]
pub struct XmlReader<'a> {
parser_input: ParserInput<'a>,
options: ParseOptions,
ns: NamespaceResolver,
current: ReaderNode,
queue: Vec<ReaderNode>,
depth: u32,
started: bool,
finished: bool,
prolog_parsed: bool,
root_parsed: bool,
in_element_content: bool,
element_stack: Vec<String>,
attribute_index: Option<usize>,
saved_element: Option<ReaderNode>,
}
impl<'a> XmlReader<'a> {
#[must_use]
pub fn new(input: &'a str) -> Self {
Self::with_options(input, ParseOptions::default())
}
#[must_use]
pub fn with_options(input: &'a str, options: ParseOptions) -> Self {
let mut pi = ParserInput::new(input);
pi.set_recover(options.recover);
pi.set_max_depth(options.max_depth);
pi.set_max_name_length(options.max_name_length);
pi.set_max_entity_expansions(options.max_entity_expansions);
pi.set_entity_resolver(options.entity_resolver.clone());
Self {
parser_input: pi,
options,
ns: NamespaceResolver::new(),
current: ReaderNode::new(XmlNodeType::None),
queue: Vec::new(),
depth: 0,
started: false,
finished: false,
prolog_parsed: false,
root_parsed: false,
in_element_content: false,
element_stack: Vec::new(),
attribute_index: None,
saved_element: None,
}
}
pub fn read(&mut self) -> Result<bool, ParseError> {
self.attribute_index = None;
self.saved_element = None;
if self.finished {
return Ok(false);
}
if let Some(node) = self.queue.pop() {
self.current = node;
return Ok(true);
}
if !self.started {
self.started = true;
}
self.read_next_node()
}
#[must_use]
pub fn node_type(&self) -> XmlNodeType {
self.current.node_type
}
#[must_use]
pub fn name(&self) -> Option<&str> {
match self.current.node_type {
XmlNodeType::Element
| XmlNodeType::EndElement
| XmlNodeType::ProcessingInstruction
| XmlNodeType::Attribute
| XmlNodeType::DocumentType => {
if self.current.local_name.is_empty() {
None
} else {
Some(&self.current.local_name)
}
}
_ => None,
}
}
#[must_use]
pub fn local_name(&self) -> Option<&str> {
match self.current.node_type {
XmlNodeType::Element
| XmlNodeType::EndElement
| XmlNodeType::ProcessingInstruction
| XmlNodeType::Attribute
| XmlNodeType::DocumentType => {
if self.current.local_name.is_empty() {
None
} else {
Some(&self.current.local_name)
}
}
_ => None,
}
}
#[must_use]
pub fn prefix(&self) -> Option<&str> {
self.current.prefix.as_deref()
}
#[must_use]
pub fn namespace_uri(&self) -> Option<&str> {
self.current.namespace_uri.as_deref()
}
#[must_use]
pub fn value(&self) -> Option<&str> {
self.current.value.as_deref()
}
#[must_use]
pub fn has_value(&self) -> bool {
self.current.value.is_some()
}
#[must_use]
pub fn is_empty_element(&self) -> bool {
self.current.is_empty_element
}
#[must_use]
pub fn depth(&self) -> u32 {
self.current.depth
}
#[must_use]
pub fn attribute_count(&self) -> usize {
self.current.attributes.len()
}
#[must_use]
pub fn get_attribute(&self, name: &str) -> Option<&str> {
let attrs = &self.current.attributes;
for attr in attrs {
let full_name = match &attr.prefix {
Some(pfx) => {
if name.starts_with(pfx.as_str())
&& name.as_bytes().get(pfx.len()) == Some(&b':')
&& name[pfx.len() + 1..] == *attr.local_name
{
return Some(&attr.value);
}
continue;
}
None => &attr.local_name,
};
if full_name == name {
return Some(&attr.value);
}
}
None
}
#[must_use]
pub fn get_attribute_ns(&self, local_name: &str, namespace_uri: &str) -> Option<&str> {
self.current.attributes.iter().find_map(|attr| {
if attr.local_name == local_name && attr.namespace_uri.as_deref() == Some(namespace_uri)
{
Some(attr.value.as_str())
} else {
None
}
})
}
pub fn move_to_first_attribute(&mut self) -> bool {
if self.current.node_type != XmlNodeType::Element
&& self.current.node_type != XmlNodeType::Attribute
{
return false;
}
if self.saved_element.is_none() {
if self.current.node_type == XmlNodeType::Attribute {
} else {
self.saved_element = Some(self.current.clone());
}
}
let elem = self.saved_element.as_ref().unwrap_or(&self.current);
if elem.attributes.is_empty() {
return false;
}
let attr = &elem.attributes[0];
self.current = ReaderNode {
node_type: XmlNodeType::Attribute,
local_name: attr.local_name.clone(),
prefix: attr.prefix.clone(),
namespace_uri: attr.namespace_uri.clone(),
value: Some(attr.value.clone()),
depth: elem.depth + 1,
is_empty_element: false,
attributes: elem.attributes.clone(),
};
self.attribute_index = Some(0);
true
}
pub fn move_to_next_attribute(&mut self) -> bool {
let Some(idx) = self.attribute_index else {
return self.move_to_first_attribute();
};
let elem = self.saved_element.as_ref().unwrap_or(&self.current);
let next_idx = idx + 1;
if next_idx >= elem.attributes.len() {
return false;
}
let attr = &elem.attributes[next_idx];
self.current = ReaderNode {
node_type: XmlNodeType::Attribute,
local_name: attr.local_name.clone(),
prefix: attr.prefix.clone(),
namespace_uri: attr.namespace_uri.clone(),
value: Some(attr.value.clone()),
depth: elem.depth + 1,
is_empty_element: false,
attributes: elem.attributes.clone(),
};
self.attribute_index = Some(next_idx);
true
}
pub fn move_to_element(&mut self) -> bool {
if let Some(elem) = self.saved_element.take() {
self.current = elem;
self.attribute_index = None;
true
} else {
false
}
}
#[must_use]
pub fn diagnostics(&self) -> &[ParseDiagnostic] {
&self.parser_input.diagnostics
}
fn read_next_node(&mut self) -> Result<bool, ParseError> {
if !self.prolog_parsed {
return self.read_prolog();
}
if !self.root_parsed {
return self.read_root_or_prolog_misc();
}
if self.in_element_content {
return self.read_element_content();
}
self.read_trailing_misc()
}
fn read_prolog(&mut self) -> Result<bool, ParseError> {
self.parser_input.skip_whitespace();
if self.parser_input.looking_at(b"<?xml ")
|| self.parser_input.looking_at(b"<?xml\t")
|| self.parser_input.looking_at(b"<?xml\r")
|| self.parser_input.looking_at(b"<?xml?>")
{
let node = self.parse_xml_declaration()?;
self.current = node;
return Ok(true);
}
self.prolog_parsed = true;
self.read_root_or_prolog_misc()
}
fn read_root_or_prolog_misc(&mut self) -> Result<bool, ParseError> {
self.parser_input.skip_whitespace();
if self.parser_input.at_end() {
self.finished = true;
self.current = ReaderNode::new(XmlNodeType::EndDocument);
return Ok(false);
}
if self.parser_input.looking_at(b"<!DOCTYPE") || self.parser_input.looking_at(b"<!doctype")
{
let node = self.parse_doctype()?;
self.current = node;
return Ok(true);
}
if self.parser_input.looking_at(b"<!--") {
let node = self.parse_comment()?;
self.current = node;
return Ok(true);
}
if self.parser_input.looking_at(b"<?") {
let node = self.parse_processing_instruction()?;
self.current = node;
return Ok(true);
}
if self.parser_input.peek() == Some(b'<')
&& self
.parser_input
.peek_at(1)
.is_some_and(|b| b != b'!' && b != b'?')
{
self.root_parsed = true;
let node = self.parse_element_start()?;
self.current = node;
return Ok(true);
}
if !self.parser_input.at_end() && !self.options.recover {
return Err(self.parser_input.fatal("expected root element"));
}
self.finished = true;
self.current = ReaderNode::new(XmlNodeType::EndDocument);
Ok(false)
}
fn read_element_content(&mut self) -> Result<bool, ParseError> {
if self.parser_input.at_end() {
if self.options.recover {
if let Some(name) = self.element_stack.pop() {
self.depth -= 1;
self.parser_input.decrement_depth();
self.ns.pop_scope();
let mut node = ReaderNode::new(XmlNodeType::EndElement);
let (prefix, local_name) = split_name(&name);
node.local_name = local_name.to_string();
node.prefix = prefix.map(String::from);
node.depth = self.depth;
self.in_element_content = !self.element_stack.is_empty();
self.current = node;
return Ok(true);
}
self.finished = true;
self.current = ReaderNode::new(XmlNodeType::EndDocument);
return Ok(false);
}
return Err(self
.parser_input
.fatal("unexpected end of input in element content"));
}
if self.parser_input.looking_at(b"</") {
let node = self.parse_end_tag()?;
self.current = node;
return Ok(true);
}
if self.parser_input.looking_at(b"<![CDATA[") {
let node = self.parse_cdata()?;
self.current = node;
return Ok(true);
}
if self.parser_input.looking_at(b"<!--") {
let node = self.parse_comment()?;
self.current = node;
return Ok(true);
}
if self.parser_input.looking_at(b"<?") {
let node = self.parse_processing_instruction()?;
self.current = node;
return Ok(true);
}
if self.parser_input.peek() == Some(b'<')
&& self
.parser_input
.peek_at(1)
.is_some_and(|b| b != b'!' && b != b'?')
{
let node = self.parse_element_start()?;
self.current = node;
return Ok(true);
}
let node = self.parse_char_data()?;
if self.options.no_blanks && node.node_type == XmlNodeType::Whitespace {
return self.read_element_content();
}
self.current = node;
Ok(true)
}
fn read_trailing_misc(&mut self) -> Result<bool, ParseError> {
self.parser_input.skip_whitespace();
if self.parser_input.at_end() {
self.finished = true;
self.current = ReaderNode::new(XmlNodeType::EndDocument);
return Ok(false);
}
if self.parser_input.looking_at(b"<!--") {
let node = self.parse_comment()?;
self.current = node;
return Ok(true);
}
if self.parser_input.looking_at(b"<?") {
let node = self.parse_processing_instruction()?;
self.current = node;
return Ok(true);
}
if !self.options.recover {
return Err(self.parser_input.fatal("content after document element"));
}
self.finished = true;
self.current = ReaderNode::new(XmlNodeType::EndDocument);
Ok(false)
}
fn parse_xml_declaration(&mut self) -> Result<ReaderNode, ParseError> {
let decl = parse_xml_decl(&mut self.parser_input)?;
self.prolog_parsed = true;
let mut value_parts = vec![format!("version={}", decl.version)];
if let Some(ref enc) = decl.encoding {
value_parts.push(format!("encoding={enc}"));
}
if let Some(sa) = decl.standalone {
let sa_str = if sa { "yes" } else { "no" };
value_parts.push(format!("standalone={sa_str}"));
}
let mut node = ReaderNode::new(XmlNodeType::XmlDeclaration);
node.local_name = "xml".to_string();
node.value = Some(value_parts.join(" "));
node.depth = 0;
Ok(node)
}
fn parse_doctype(&mut self) -> Result<ReaderNode, ParseError> {
self.parser_input.expect_str(b"<!DOCTYPE")?;
self.parser_input.skip_whitespace_required()?;
let name = self.parser_input.parse_name()?;
self.parser_input.skip_whitespace();
if self.parser_input.looking_at(b"SYSTEM") {
self.parser_input.expect_str(b"SYSTEM")?;
self.parser_input.skip_whitespace_required()?;
self.parser_input.parse_quoted_value()?;
self.parser_input.skip_whitespace();
} else if self.parser_input.looking_at(b"PUBLIC") {
self.parser_input.expect_str(b"PUBLIC")?;
self.parser_input.skip_whitespace_required()?;
self.parser_input.parse_quoted_value()?;
self.parser_input.skip_whitespace_required()?;
self.parser_input.parse_quoted_value()?;
self.parser_input.skip_whitespace();
}
if self.parser_input.peek() == Some(b'[') {
self.parser_input.advance(1);
let start = self.parser_input.pos();
let mut bracket_depth: u32 = 1;
while !self.parser_input.at_end() && bracket_depth > 0 {
if self.parser_input.looking_at(b"<!--") {
self.parser_input.advance(4);
while !self.parser_input.at_end() && !self.parser_input.looking_at(b"-->") {
self.parser_input.advance(1);
}
if !self.parser_input.at_end() {
self.parser_input.advance(3);
}
} else if let Some(b'"' | b'\'') = self.parser_input.peek() {
let quote = self.parser_input.peek().unwrap_or(b'"');
self.parser_input.advance(1);
while !self.parser_input.at_end() && self.parser_input.peek() != Some(quote) {
self.parser_input.advance(1);
}
if !self.parser_input.at_end() {
self.parser_input.advance(1);
}
} else if self.parser_input.peek() == Some(b'[') {
bracket_depth += 1;
self.parser_input.advance(1);
} else if self.parser_input.peek() == Some(b']') {
bracket_depth -= 1;
self.parser_input.advance(1);
} else {
self.parser_input.advance(1);
}
}
let end = self.parser_input.pos() - 1;
let subset_text = std::str::from_utf8(self.parser_input.slice(start, end))
.ok()
.map(str::to_string);
if let Some(subset_text) = subset_text {
if subset_text.contains('%') {
self.parser_input.has_pe_references = true;
}
if let Ok(dtd) = crate::validation::dtd::parse_dtd(&subset_text) {
for (ent_name, ent_decl) in &dtd.entities {
match &ent_decl.kind {
crate::validation::dtd::EntityKind::Internal(value) => {
self.parser_input
.entity_map
.insert(ent_name.clone(), value.clone());
}
crate::validation::dtd::EntityKind::External {
system_id,
public_id,
} => {
self.parser_input.entity_external.insert(
ent_name.clone(),
crate::parser::input::ExternalEntityInfo {
system_id: system_id.clone(),
public_id: public_id.clone(),
},
);
}
}
}
}
}
self.parser_input.skip_whitespace();
}
self.parser_input.expect_byte(b'>')?;
let mut node = ReaderNode::new(XmlNodeType::DocumentType);
node.local_name = name;
node.depth = 0;
Ok(node)
}
fn parse_element_start(&mut self) -> Result<ReaderNode, ParseError> {
self.parser_input.increment_depth()?;
self.parser_input.expect_byte(b'<')?;
let name = self.parser_input.parse_name()?;
let mut raw_attrs: Vec<(String, String)> = Vec::new();
loop {
let had_ws = self.parser_input.skip_whitespace();
if self.parser_input.peek() == Some(b'>') || self.parser_input.looking_at(b"/>") {
break;
}
if !had_ws {
return Err(self
.parser_input
.fatal("whitespace required between attributes"));
}
let attr_name = self.parser_input.parse_name()?;
self.parser_input.skip_whitespace();
self.parser_input.expect_byte(b'=')?;
self.parser_input.skip_whitespace();
let attr_value = self.parser_input.parse_attribute_value()?;
raw_attrs.push((attr_name, attr_value));
}
self.ns.push_scope();
for (attr_name, attr_value) in &raw_attrs {
if attr_name == "xmlns" {
self.ns.bind(None, attr_value.clone());
} else if let Some(prefix) = attr_name.strip_prefix("xmlns:") {
self.ns.bind(Some(prefix.to_string()), attr_value.clone());
}
}
let (prefix, local_name) = split_name(&name);
let elem_ns = self.ns.resolve(prefix).map(String::from);
let attributes: Vec<ReaderAttribute> = raw_attrs
.iter()
.map(|(attr_name, attr_value)| {
let (attr_prefix, attr_local) = split_name(attr_name);
let attr_ns = if attr_prefix == Some("xmlns")
|| (attr_prefix.is_none() && attr_local == "xmlns")
{
None
} else {
attr_prefix
.and_then(|p| self.ns.resolve(Some(p)))
.map(String::from)
};
ReaderAttribute {
local_name: attr_local.to_string(),
value: attr_value.clone(),
prefix: attr_prefix.map(String::from),
namespace_uri: attr_ns,
}
})
.collect();
let is_empty = self.parser_input.looking_at(b"/>");
if is_empty {
self.parser_input.advance(2);
} else {
self.parser_input.expect_byte(b'>')?;
}
let current_depth = self.depth;
let mut node = ReaderNode::new(XmlNodeType::Element);
node.local_name = local_name.to_string();
node.prefix = prefix.map(String::from);
node.namespace_uri = elem_ns;
node.depth = current_depth;
node.is_empty_element = is_empty;
node.attributes = attributes;
if is_empty {
self.ns.pop_scope();
self.parser_input.decrement_depth();
} else {
self.element_stack.push(name);
self.depth += 1;
self.in_element_content = true;
}
Ok(node)
}
fn parse_end_tag(&mut self) -> Result<ReaderNode, ParseError> {
self.parser_input.expect_str(b"</")?;
let name = self.parser_input.parse_name()?;
self.parser_input.skip_whitespace();
self.parser_input.expect_byte(b'>')?;
if let Some(expected) = self.element_stack.last() {
if *expected != name {
if self.options.recover {
self.parser_input.push_diagnostic(
ErrorSeverity::Error,
format!("mismatched end tag: expected </{expected}>, found </{name}>"),
);
} else {
return Err(self.parser_input.fatal(format!(
"mismatched end tag: expected </{expected}>, found </{name}>"
)));
}
}
}
self.element_stack.pop();
self.depth -= 1;
self.parser_input.decrement_depth();
self.ns.pop_scope();
self.in_element_content = !self.element_stack.is_empty();
let (prefix, local_name) = split_name(&name);
let mut node = ReaderNode::new(XmlNodeType::EndElement);
node.local_name = local_name.to_string();
node.prefix = prefix.map(String::from);
node.depth = self.depth;
Ok(node)
}
fn parse_char_data(&mut self) -> Result<ReaderNode, ParseError> {
let mut text = String::new();
while !self.parser_input.at_end() {
if self.parser_input.peek() == Some(b'<') {
break;
}
if self.parser_input.looking_at(b"]]>") {
if self.options.recover {
self.parser_input.push_diagnostic(
ErrorSeverity::Error,
"']]>' not allowed in character data".to_string(),
);
text.push_str("]]>");
self.parser_input.advance(3);
continue;
}
return Err(self
.parser_input
.fatal("']]>' not allowed in character data"));
}
if self.parser_input.peek() == Some(b'&') {
self.parser_input.parse_reference_into(&mut text)?;
} else {
let ch = self.parser_input.next_char()?;
text.push(ch);
}
}
let is_whitespace = text
.chars()
.all(|c| c == ' ' || c == '\t' || c == '\n' || c == '\r');
let node_type = if is_whitespace {
XmlNodeType::Whitespace
} else {
XmlNodeType::Text
};
let mut node = ReaderNode::new(node_type);
node.value = Some(text);
node.depth = self.depth;
Ok(node)
}
fn parse_comment(&mut self) -> Result<ReaderNode, ParseError> {
let content = parse_comment_content(&mut self.parser_input)?;
let mut node = ReaderNode::new(XmlNodeType::Comment);
node.value = Some(content);
node.depth = self.depth;
Ok(node)
}
fn parse_cdata(&mut self) -> Result<ReaderNode, ParseError> {
let content = parse_cdata_content(&mut self.parser_input)?;
let mut node = ReaderNode::new(XmlNodeType::CData);
node.value = Some(content);
node.depth = self.depth;
Ok(node)
}
fn parse_processing_instruction(&mut self) -> Result<ReaderNode, ParseError> {
let (target, data) = parse_pi_content(&mut self.parser_input)?;
let mut node = ReaderNode::new(XmlNodeType::ProcessingInstruction);
node.local_name = target;
node.value = data;
node.depth = self.depth;
Ok(node)
}
}
#[cfg(test)]
#[allow(clippy::unwrap_used)]
mod tests {
use super::*;
use pretty_assertions::assert_eq;
fn read_all_types(input: &str) -> Vec<(XmlNodeType, String)> {
let mut reader = XmlReader::new(input);
let mut result = Vec::new();
while reader.read().unwrap() {
let label = match reader.node_type() {
XmlNodeType::Element | XmlNodeType::EndElement => {
reader.name().unwrap_or("").to_string()
}
XmlNodeType::Text
| XmlNodeType::CData
| XmlNodeType::Comment
| XmlNodeType::Whitespace
| XmlNodeType::XmlDeclaration => reader.value().unwrap_or("").to_string(),
XmlNodeType::ProcessingInstruction => {
let target = reader.name().unwrap_or("").to_string();
match reader.value() {
Some(data) => format!("{target} {data}"),
None => target,
}
}
XmlNodeType::DocumentType => reader.name().unwrap_or("").to_string(),
_ => String::new(),
};
result.push((reader.node_type(), label));
}
result
}
#[test]
fn test_read_empty_element() {
let mut reader = XmlReader::new("<root/>");
assert!(reader.read().unwrap());
assert_eq!(reader.node_type(), XmlNodeType::Element);
assert_eq!(reader.name(), Some("root"));
assert!(reader.is_empty_element());
assert_eq!(reader.depth(), 0);
assert!(!reader.read().unwrap());
}
#[test]
fn test_read_element_with_content() {
let nodes = read_all_types("<root>Hello</root>");
assert_eq!(
nodes,
vec![
(XmlNodeType::Element, "root".to_string()),
(XmlNodeType::Text, "Hello".to_string()),
(XmlNodeType::EndElement, "root".to_string()),
]
);
}
#[test]
fn test_read_nested_elements() {
let nodes = read_all_types("<a><b>text</b></a>");
assert_eq!(
nodes,
vec![
(XmlNodeType::Element, "a".to_string()),
(XmlNodeType::Element, "b".to_string()),
(XmlNodeType::Text, "text".to_string()),
(XmlNodeType::EndElement, "b".to_string()),
(XmlNodeType::EndElement, "a".to_string()),
]
);
}
#[test]
fn test_read_depth_tracking() {
let mut reader = XmlReader::new("<a><b><c/></b></a>");
reader.read().unwrap(); assert_eq!(reader.depth(), 0);
assert_eq!(reader.name(), Some("a"));
reader.read().unwrap(); assert_eq!(reader.depth(), 1);
assert_eq!(reader.name(), Some("b"));
reader.read().unwrap(); assert_eq!(reader.depth(), 2);
assert_eq!(reader.name(), Some("c"));
assert!(reader.is_empty_element());
reader.read().unwrap(); assert_eq!(reader.depth(), 1);
assert_eq!(reader.node_type(), XmlNodeType::EndElement);
reader.read().unwrap(); assert_eq!(reader.depth(), 0);
assert_eq!(reader.node_type(), XmlNodeType::EndElement);
assert!(!reader.read().unwrap()); }
#[test]
fn test_read_attributes() {
let mut reader = XmlReader::new("<root id=\"1\" class=\"big\"/>");
reader.read().unwrap();
assert_eq!(reader.attribute_count(), 2);
assert_eq!(reader.get_attribute("id"), Some("1"));
assert_eq!(reader.get_attribute("class"), Some("big"));
assert_eq!(reader.get_attribute("missing"), None);
}
#[test]
fn test_attribute_navigation() {
let mut reader = XmlReader::new("<root a=\"1\" b=\"2\" c=\"3\"/>");
reader.read().unwrap();
assert_eq!(reader.node_type(), XmlNodeType::Element);
assert!(reader.move_to_first_attribute());
assert_eq!(reader.node_type(), XmlNodeType::Attribute);
assert_eq!(reader.name(), Some("a"));
assert_eq!(reader.value(), Some("1"));
assert!(reader.move_to_next_attribute());
assert_eq!(reader.name(), Some("b"));
assert_eq!(reader.value(), Some("2"));
assert!(reader.move_to_next_attribute());
assert_eq!(reader.name(), Some("c"));
assert_eq!(reader.value(), Some("3"));
assert!(!reader.move_to_next_attribute());
assert!(reader.move_to_element());
assert_eq!(reader.node_type(), XmlNodeType::Element);
assert_eq!(reader.name(), Some("root"));
}
#[test]
fn test_read_text_content() {
let mut reader = XmlReader::new("<p>Hello & world</p>");
reader.read().unwrap(); reader.read().unwrap(); assert_eq!(reader.node_type(), XmlNodeType::Text);
assert_eq!(reader.value(), Some("Hello & world"));
assert!(reader.has_value());
}
#[test]
fn test_read_whitespace_only_text() {
let mut reader = XmlReader::new("<root> \n </root>");
reader.read().unwrap(); reader.read().unwrap(); assert_eq!(reader.node_type(), XmlNodeType::Whitespace);
assert_eq!(reader.value(), Some(" \n "));
}
#[test]
fn test_read_no_blanks_option() {
let opts = ParseOptions::default().no_blanks(true);
let mut reader = XmlReader::with_options("<root> <child/> </root>", opts);
reader.read().unwrap(); assert_eq!(reader.name(), Some("root"));
reader.read().unwrap(); assert_eq!(reader.node_type(), XmlNodeType::Element);
assert_eq!(reader.name(), Some("child"));
reader.read().unwrap(); assert_eq!(reader.node_type(), XmlNodeType::EndElement);
assert_eq!(reader.name(), Some("root"));
}
#[test]
fn test_read_comment() {
let nodes = read_all_types("<root><!-- hello --></root>");
assert_eq!(
nodes,
vec![
(XmlNodeType::Element, "root".to_string()),
(XmlNodeType::Comment, " hello ".to_string()),
(XmlNodeType::EndElement, "root".to_string()),
]
);
}
#[test]
fn test_read_cdata() {
let nodes = read_all_types("<root><![CDATA[raw & data]]></root>");
assert_eq!(
nodes,
vec![
(XmlNodeType::Element, "root".to_string()),
(XmlNodeType::CData, "raw & data".to_string()),
(XmlNodeType::EndElement, "root".to_string()),
]
);
}
#[test]
fn test_read_processing_instruction() {
let nodes = read_all_types("<?target data?><root/>");
assert_eq!(
nodes,
vec![
(
XmlNodeType::ProcessingInstruction,
"target data".to_string()
),
(XmlNodeType::Element, "root".to_string()),
]
);
}
#[test]
fn test_read_xml_declaration() {
let nodes = read_all_types("<?xml version=\"1.0\" encoding=\"UTF-8\"?><root/>");
assert_eq!(
nodes,
vec![
(
XmlNodeType::XmlDeclaration,
"version=1.0 encoding=UTF-8".to_string()
),
(XmlNodeType::Element, "root".to_string()),
]
);
}
#[test]
fn test_read_doctype() {
let nodes = read_all_types("<!DOCTYPE html><html/>");
assert_eq!(
nodes,
vec![
(XmlNodeType::DocumentType, "html".to_string()),
(XmlNodeType::Element, "html".to_string()),
]
);
}
#[test]
fn test_read_namespace() {
let mut reader = XmlReader::new("<root xmlns=\"http://example.com\"/>");
reader.read().unwrap();
assert_eq!(reader.name(), Some("root"));
assert_eq!(reader.namespace_uri(), Some("http://example.com"));
assert_eq!(reader.prefix(), None);
}
#[test]
fn test_read_prefixed_namespace() {
let mut reader = XmlReader::new("<ns:root xmlns:ns=\"http://example.com\"/>");
reader.read().unwrap();
assert_eq!(reader.name(), Some("root"));
assert_eq!(reader.prefix(), Some("ns"));
assert_eq!(reader.namespace_uri(), Some("http://example.com"));
}
#[test]
fn test_read_attribute_ns() {
let mut reader = XmlReader::new("<root xmlns:x=\"http://x.com\" x:attr=\"val\"/>");
reader.read().unwrap();
assert_eq!(reader.get_attribute("x:attr"), Some("val"));
assert_eq!(reader.get_attribute_ns("attr", "http://x.com"), Some("val"));
assert_eq!(reader.get_attribute_ns("attr", "http://other.com"), None);
}
#[test]
fn test_read_mixed_content() {
let nodes = read_all_types("<p>Hello <b>world</b>!</p>");
assert_eq!(
nodes,
vec![
(XmlNodeType::Element, "p".to_string()),
(XmlNodeType::Text, "Hello ".to_string()),
(XmlNodeType::Element, "b".to_string()),
(XmlNodeType::Text, "world".to_string()),
(XmlNodeType::EndElement, "b".to_string()),
(XmlNodeType::Text, "!".to_string()),
(XmlNodeType::EndElement, "p".to_string()),
]
);
}
#[test]
fn test_read_entity_references() {
let mut reader = XmlReader::new("<root>&<>'"</root>");
reader.read().unwrap(); reader.read().unwrap(); assert_eq!(reader.value(), Some("&<>'\""));
}
#[test]
fn test_read_character_references() {
let mut reader = XmlReader::new("<root>AB</root>");
reader.read().unwrap(); reader.read().unwrap(); assert_eq!(reader.value(), Some("AB"));
}
#[test]
fn test_read_error_mismatched_tags() {
let mut reader = XmlReader::new("<a></b>");
reader.read().unwrap(); let result = reader.read(); assert!(result.is_err());
}
#[test]
fn test_read_returns_false_after_end() {
let mut reader = XmlReader::new("<root/>");
assert!(reader.read().unwrap()); assert!(!reader.read().unwrap()); assert!(!reader.read().unwrap()); }
#[test]
fn test_node_type_display() {
assert_eq!(XmlNodeType::Element.to_string(), "Element");
assert_eq!(XmlNodeType::EndElement.to_string(), "EndElement");
assert_eq!(XmlNodeType::Text.to_string(), "Text");
assert_eq!(XmlNodeType::None.to_string(), "None");
assert_eq!(XmlNodeType::EndDocument.to_string(), "EndDocument");
}
#[test]
fn test_has_value_element() {
let mut reader = XmlReader::new("<root/>");
reader.read().unwrap();
assert_eq!(reader.node_type(), XmlNodeType::Element);
assert!(!reader.has_value());
}
#[test]
fn test_value_none_for_element() {
let mut reader = XmlReader::new("<root/>");
reader.read().unwrap();
assert_eq!(reader.value(), None);
}
#[test]
fn test_initial_state() {
let reader = XmlReader::new("<root/>");
assert_eq!(reader.node_type(), XmlNodeType::None);
assert_eq!(reader.name(), None);
assert_eq!(reader.value(), None);
assert!(!reader.has_value());
assert_eq!(reader.depth(), 0);
assert_eq!(reader.attribute_count(), 0);
}
#[test]
fn test_read_complex_document() {
let xml = r#"<?xml version="1.0"?>
<!DOCTYPE doc>
<!-- prolog comment -->
<?style type="text/css"?>
<doc attr="val">
<child>text</child>
<![CDATA[raw]]>
<!-- inner comment -->
<empty/>
</doc>"#;
let nodes = read_all_types(xml);
let types: Vec<XmlNodeType> = nodes.iter().map(|(t, _)| *t).collect();
assert!(types.contains(&XmlNodeType::XmlDeclaration));
assert!(types.contains(&XmlNodeType::DocumentType));
assert!(types.contains(&XmlNodeType::Comment));
assert!(types.contains(&XmlNodeType::ProcessingInstruction));
assert!(types.contains(&XmlNodeType::Element));
assert!(types.contains(&XmlNodeType::Text));
assert!(types.contains(&XmlNodeType::CData));
assert!(types.contains(&XmlNodeType::EndElement));
}
#[test]
fn test_read_prolog_comment() {
let nodes = read_all_types("<!-- prolog --><root/>");
assert_eq!(
nodes,
vec![
(XmlNodeType::Comment, " prolog ".to_string()),
(XmlNodeType::Element, "root".to_string()),
]
);
}
#[test]
fn test_read_trailing_comment() {
let nodes = read_all_types("<root/><!-- trailing -->");
assert_eq!(
nodes,
vec![
(XmlNodeType::Element, "root".to_string()),
(XmlNodeType::Comment, " trailing ".to_string()),
]
);
}
#[test]
fn test_move_to_element_when_not_on_attribute() {
let mut reader = XmlReader::new("<root/>");
reader.read().unwrap();
assert!(!reader.move_to_element());
}
#[test]
fn test_read_empty_input() {
let mut reader = XmlReader::new("");
assert!(!reader.read().unwrap());
}
#[test]
fn test_read_deeply_nested() {
let mut reader = XmlReader::new("<a><b><c><d><e>deep</e></d></c></b></a>");
reader.read().unwrap(); assert_eq!(reader.depth(), 0);
reader.read().unwrap(); assert_eq!(reader.depth(), 1);
reader.read().unwrap(); assert_eq!(reader.depth(), 2);
reader.read().unwrap(); assert_eq!(reader.depth(), 3);
reader.read().unwrap(); assert_eq!(reader.depth(), 4);
reader.read().unwrap(); assert_eq!(reader.depth(), 5);
assert_eq!(reader.value(), Some("deep"));
}
#[test]
fn test_read_single_quoted_attributes() {
let mut reader = XmlReader::new("<root attr='value'/>");
reader.read().unwrap();
assert_eq!(reader.get_attribute("attr"), Some("value"));
}
}