#![doc(html_root_url = "https://docs.rs/roxmltree/0.11.0")]
#![forbid(unsafe_code)]
#![warn(missing_docs)]
extern crate xmlparser;
use std::borrow::Cow;
use std::cmp::Ordering;
use std::fmt;
use std::hash::{Hash, Hasher};
use std::num::NonZeroU32;
use std::ops::Deref;
pub use xmlparser::TextPos;
mod parse;
pub use crate::parse::*;
pub const NS_XML_URI: &str = "http://www.w3.org/XML/1998/namespace";
pub const NS_XMLNS_URI: &str = "http://www.w3.org/2000/xmlns/";
type Range = std::ops::Range<usize>;
pub struct Document<'input> {
text: &'input str,
nodes: Vec<NodeData<'input>>,
attrs: Vec<Attribute<'input>>,
namespaces: Namespaces<'input>,
}
impl<'input> Document<'input> {
#[inline]
pub fn root<'a>(&'a self) -> Node<'a, 'input> {
Node { id: NodeId::new(0), d: &self.nodes[0], doc: self }
}
#[inline]
pub fn get_node<'a>(&'a self, id: NodeId) -> Option<Node<'a, 'input>> {
self.nodes.get(id.get_usize()).map(|data| Node { id, d: data, doc: self })
}
#[inline]
pub fn root_element<'a>(&'a self) -> Node<'a, 'input> {
self.root().first_element_child().expect("XML documents must contain a root element")
}
#[inline]
pub fn descendants(&self) -> Descendants {
self.root().descendants()
}
#[inline]
pub fn text_pos_at(&self, pos: usize) -> TextPos {
xmlparser::Stream::from(self.text).gen_text_pos_from(pos)
}
#[inline]
pub fn input_text(&self) -> &'input str {
self.text
}
}
impl<'input> fmt::Debug for Document<'input> {
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
if !self.root().has_children() {
return write!(f, "Document []");
}
macro_rules! writeln_indented {
($depth:expr, $f:expr, $fmt:expr) => {
for _ in 0..$depth { write!($f, " ")?; }
writeln!($f, $fmt)?;
};
($depth:expr, $f:expr, $fmt:expr, $($arg:tt)*) => {
for _ in 0..$depth { write!($f, " ")?; }
writeln!($f, $fmt, $($arg)*)?;
};
}
fn print_vec<T: fmt::Debug>(prefix: &str, data: &[T], depth: usize, f: &mut fmt::Formatter)
-> Result<(), fmt::Error>
{
if data.is_empty() {
return Ok(());
}
writeln_indented!(depth, f, "{}: [", prefix);
for v in data {
writeln_indented!(depth + 1, f, "{:?}", v);
}
writeln_indented!(depth, f, "]");
Ok(())
}
fn print_children(parent: Node, depth: usize, f: &mut fmt::Formatter)
-> Result<(), fmt::Error>
{
for child in parent.children() {
if child.is_element() {
writeln_indented!(depth, f, "Element {{");
writeln_indented!(depth, f, " tag_name: {:?}", child.tag_name());
print_vec("attributes", child.attributes(), depth + 1, f)?;
print_vec("namespaces", child.namespaces(), depth + 1, f)?;
if child.has_children() {
writeln_indented!(depth, f, " children: [");
print_children(child, depth + 2, f)?;
writeln_indented!(depth, f, " ]");
}
writeln_indented!(depth, f, "}}");
} else {
writeln_indented!(depth, f, "{:?}", child);
}
}
Ok(())
}
writeln!(f, "Document [")?;
print_children(self.root(), 1, f)?;
writeln!(f, "]")?;
Ok(())
}
}
#[derive(Clone, Copy, PartialEq, Debug)]
pub enum NodeType {
Root,
Element,
PI,
Comment,
Text,
}
#[derive(Clone, Copy, PartialEq, Debug)]
#[allow(missing_docs)]
pub struct PI<'input> {
pub target: &'input str,
pub value: Option<&'input str>,
}
#[derive(Clone, Copy, Debug)]
struct ShortRange {
start: u32,
end: u32,
}
impl From<Range> for ShortRange {
#[inline]
fn from(range: Range) -> Self {
debug_assert!(range.start <= std::u32::MAX as usize);
debug_assert!(range.end <= std::u32::MAX as usize);
ShortRange::new(range.start as u32, range.end as u32)
}
}
impl ShortRange {
#[inline]
fn new(start: u32, end: u32) -> Self {
ShortRange { start, end }
}
#[inline]
fn to_urange(self) -> Range {
self.start as usize .. self.end as usize
}
}
#[derive(Clone, Copy, PartialEq, Debug)]
pub struct NodeId(NonZeroU32);
impl NodeId {
#[inline]
pub fn new(id: u32) -> Self {
debug_assert!(id < std::u32::MAX);
NodeId(NonZeroU32::new(id + 1).unwrap())
}
#[inline]
pub fn get(self) -> u32 {
self.0.get() as u32 - 1
}
#[inline]
pub fn get_usize(self) -> usize {
self.get() as usize
}
}
impl From<u32> for NodeId {
#[inline]
fn from(id: u32) -> Self {
NodeId::new(id)
}
}
impl From<usize> for NodeId {
#[inline]
fn from(id: usize) -> Self {
debug_assert!(id <= std::u32::MAX as usize);
NodeId::new(id as u32)
}
}
enum NodeKind<'input> {
Root,
Element {
tag_name: ExpandedNameOwned<'input>,
attributes: ShortRange,
namespaces: ShortRange,
},
PI(PI<'input>),
Comment(&'input str),
Text(Cow<'input, str>),
}
struct NodeData<'input> {
parent: Option<NodeId>,
prev_sibling: Option<NodeId>,
next_subtree: Option<NodeId>,
last_child: Option<NodeId>,
kind: NodeKind<'input>,
range: ShortRange,
}
#[derive(Clone)]
pub struct Attribute<'input> {
name: ExpandedNameOwned<'input>,
value: Cow<'input, str>,
range: ShortRange,
value_range: ShortRange,
}
impl<'input> Attribute<'input> {
#[inline]
pub fn namespace(&self) -> Option<&str> {
self.name.ns.as_ref().map(Cow::as_ref)
}
#[inline]
pub fn name(&self) -> &str {
self.name.name
}
#[inline]
pub fn value(&self) -> &str {
&self.value
}
#[inline]
pub fn range(&self) -> Range {
self.range.to_urange()
}
#[inline]
pub fn value_range(&self) -> Range {
self.value_range.to_urange()
}
}
impl<'input> PartialEq for Attribute<'input> {
#[inline]
fn eq(&self, other: &Attribute<'input>) -> bool {
self.name == other.name && self.value == other.value
}
}
impl<'input> fmt::Debug for Attribute<'input> {
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
write!(f, "Attribute {{ name: {:?}, value: {:?} }}",
self.name, self.value)
}
}
#[derive(Clone, PartialEq, Debug)]
pub struct Namespace<'input> {
name: Option<&'input str>,
uri: Cow<'input, str>,
}
impl<'input> Namespace<'input> {
#[inline]
pub fn name(&self) -> Option<&str> {
self.name
}
#[inline]
pub fn uri(&self) -> &str {
self.uri.as_ref()
}
}
struct Namespaces<'input>(Vec<Namespace<'input>>);
impl<'input> Namespaces<'input> {
#[inline]
fn push_ns(&mut self, name: Option<&'input str>, uri: Cow<'input, str>) {
debug_assert_ne!(name, Some(""));
self.0.push(Namespace { name, uri });
}
#[inline]
fn exists(&self, start: usize, prefix: Option<&str>) -> bool {
self[start..].iter().any(|ns| ns.name == prefix)
}
}
impl<'input> Deref for Namespaces<'input> {
type Target = Vec<Namespace<'input>>;
#[inline]
fn deref(&self) -> &Self::Target {
&self.0
}
}
#[derive(Clone, PartialEq)]
struct ExpandedNameOwned<'input> {
ns: Option<Cow<'input, str>>,
prefix: &'input str, name: &'input str,
}
impl<'input> ExpandedNameOwned<'input> {
#[inline]
fn as_ref(&self) -> ExpandedName {
ExpandedName {
uri: self.ns.as_ref().map(Cow::as_ref),
name: self.name,
}
}
}
impl<'input> fmt::Debug for ExpandedNameOwned<'input> {
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
match self.ns {
Some(ref ns) => write!(f, "{{{}}}{}", ns.as_ref(), self.name),
None => write!(f, "{}", self.name),
}
}
}
#[derive(Clone, Copy, PartialEq)]
pub struct ExpandedName<'input> {
uri: Option<&'input str>,
name: &'input str,
}
impl<'input> ExpandedName<'input> {
#[inline]
pub fn namespace(&self) -> Option<&'input str> {
self.uri
}
#[inline]
pub fn name(&self) -> &'input str {
self.name
}
}
impl<'input> fmt::Debug for ExpandedName<'input> {
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
match self.namespace() {
Some(ns) => write!(f, "{{{}}}{}", ns, self.name),
None => write!(f, "{}", self.name),
}
}
}
impl<'input> From<&'input str> for ExpandedName<'input> {
#[inline]
fn from(v: &'input str) -> Self {
ExpandedName {
uri: None,
name: v,
}
}
}
impl<'input> From<(&'input str, &'input str)> for ExpandedName<'input> {
#[inline]
fn from(v: (&'input str, &'input str)) -> Self {
ExpandedName {
uri: Some(v.0),
name: v.1,
}
}
}
#[derive(Clone, Copy)]
pub struct Node<'a, 'input: 'a> {
id: NodeId,
doc: &'a Document<'input>,
d: &'a NodeData<'input>,
}
impl Eq for Node<'_, '_> {}
impl PartialEq for Node<'_, '_> {
#[inline]
fn eq(&self, other: &Self) -> bool {
self.id == other.id
&& self.doc as *const _ == other.doc as *const _
&& self.d as *const _ == other.d as *const _
}
}
impl PartialOrd for Node<'_, '_> {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl Ord for Node<'_, '_> {
fn cmp(&self, other: &Self) -> Ordering {
let id_cmp = self.id.0.cmp(&other.id.0);
match id_cmp {
Ordering::Equal => {
let this_doc_ptr = self.doc as *const Document;
let other_doc_ptr = other.doc as *const Document;
this_doc_ptr.cmp(&other_doc_ptr)
}
_ => id_cmp
}
}
}
impl Hash for Node<'_, '_> {
fn hash<H: Hasher>(&self, state: &mut H) {
self.id.0.hash(state);
(self.doc as *const Document).hash(state);
(self.d as *const NodeData).hash(state);
}
}
impl<'a, 'input: 'a> Node<'a, 'input> {
#[inline]
pub fn node_type(&self) -> NodeType {
match self.d.kind {
NodeKind::Root => NodeType::Root,
NodeKind::Element { .. } => NodeType::Element,
NodeKind::PI { .. } => NodeType::PI,
NodeKind::Comment(_) => NodeType::Comment,
NodeKind::Text(_) => NodeType::Text,
}
}
#[inline]
pub fn is_root(&self) -> bool {
self.node_type() == NodeType::Root
}
#[inline]
pub fn is_element(&self) -> bool {
self.node_type() == NodeType::Element
}
#[inline]
pub fn is_pi(&self) -> bool {
self.node_type() == NodeType::PI
}
#[inline]
pub fn is_comment(&self) -> bool {
self.node_type() == NodeType::Comment
}
#[inline]
pub fn is_text(&self) -> bool {
self.node_type() == NodeType::Text
}
#[inline]
pub fn document(&self) -> &'a Document<'input> {
self.doc
}
#[inline]
pub fn tag_name(&self) -> ExpandedName<'a> {
match self.d.kind {
NodeKind::Element { ref tag_name, .. } => tag_name.as_ref(),
_ => "".into()
}
}
pub fn has_tag_name<'n, N>(&self, name: N) -> bool
where
N: Into<ExpandedName<'n>>,
{
let name = name.into();
match self.d.kind {
NodeKind::Element { ref tag_name, .. } => {
match name.namespace() {
Some(_) => tag_name.as_ref() == name,
None => tag_name.name == name.name,
}
}
_ => false,
}
}
pub fn default_namespace(&self) -> Option<&'a str> {
self.namespaces().iter().find(|ns| ns.name.is_none()).map(|v| v.uri.as_ref())
}
pub fn lookup_prefix(&self, uri: &str) -> Option<&'a str> {
if uri == NS_XML_URI {
return Some("xml");
}
self.namespaces().iter().find(|ns| ns.uri == uri).map(|v| v.name).unwrap_or(None)
}
pub fn lookup_namespace_uri(&self, prefix: Option<&'a str>) -> Option<&'a str> {
self.namespaces().iter().find(|ns| ns.name == prefix).map(|v| v.uri.as_ref())
}
pub fn attribute<'n, N>(&self, name: N) -> Option<&'a str>
where
N: Into<ExpandedName<'n>>,
{
let name = name.into();
self.attributes().iter().find(|a| a.name.as_ref() == name).map(|a| a.value.as_ref())
}
pub fn attribute_node<'n, N>(&self, name: N) -> Option<&'a Attribute<'input>>
where
N: Into<ExpandedName<'n>>,
{
let name = name.into();
self.attributes().iter().find(|a| a.name.as_ref() == name)
}
pub fn has_attribute<'n, N>(&self, name: N) -> bool
where
N: Into<ExpandedName<'n>>,
{
let name = name.into();
self.attributes().iter().any(|a| a.name.as_ref() == name)
}
#[inline]
pub fn attributes(&self) -> &'a [Attribute<'input>] {
match self.d.kind {
NodeKind::Element { ref attributes, .. } => &self.doc.attrs[attributes.to_urange()],
_ => &[],
}
}
#[inline]
pub fn namespaces(&self) -> &'a [Namespace<'input>] {
match self.d.kind {
NodeKind::Element { ref namespaces, .. } => {
&self.doc.namespaces[namespaces.to_urange()]
}
_ => &[],
}
}
#[inline]
pub fn text(&self) -> Option<&'a str> {
match self.d.kind {
NodeKind::Element { .. } => {
match self.first_child() {
Some(child) if child.is_text() => {
match self.doc.nodes[child.id.get_usize()].kind {
NodeKind::Text(ref text) => Some(text),
_ => None
}
}
_ => None,
}
}
NodeKind::Comment(text) => Some(text),
NodeKind::Text(ref text) => Some(text),
_ => None,
}
}
#[inline]
pub fn tail(&self) -> Option<&'a str> {
if !self.is_element() {
return None;
}
match self.next_sibling().map(|n| n.id) {
Some(id) => {
match self.doc.nodes[id.get_usize()].kind {
NodeKind::Text(ref text) => Some(text),
_ => None
}
}
None => None,
}
}
#[inline]
pub fn pi(&self) -> Option<PI<'input>> {
match self.d.kind {
NodeKind::PI(pi) => Some(pi),
_ => None,
}
}
#[inline]
pub fn parent(&self) -> Option<Self> {
self.d.parent.map(|id| self.doc.get_node(id).unwrap())
}
pub fn parent_element(&self) -> Option<Self> {
self.ancestors().skip(1).find(|n| n.is_element())
}
#[inline]
pub fn prev_sibling(&self) -> Option<Self> {
self.d.prev_sibling.map(|id| self.doc.get_node(id).unwrap())
}
pub fn prev_sibling_element(&self) -> Option<Self> {
self.prev_siblings().skip(1).find(|n| n.is_element())
}
#[inline]
pub fn next_sibling(&self) -> Option<Self> {
self.d.next_subtree
.map(|id| self.doc.get_node(id).unwrap())
.and_then(|node| {
let possibly_self = node.d.prev_sibling
.expect("next_subtree will always have a previous sibling");
if possibly_self == self.id { Some(node) } else { None }
})
}
pub fn next_sibling_element(&self) -> Option<Self> {
self.next_siblings().skip(1).find(|n| n.is_element())
}
#[inline]
pub fn first_child(&self) -> Option<Self> {
self.d.last_child.map(|_| self.doc.get_node(NodeId::new(self.id.get() + 1)).unwrap())
}
pub fn first_element_child(&self) -> Option<Self> {
self.children().find(|n| n.is_element())
}
#[inline]
pub fn last_child(&self) -> Option<Self> {
self.d.last_child.map(|id| self.doc.get_node(id).unwrap())
}
pub fn last_element_child(&self) -> Option<Self> {
self.children().filter(|n| n.is_element()).last()
}
#[inline]
pub fn has_siblings(&self) -> bool {
self.d.prev_sibling.is_some() || self.next_sibling().is_some()
}
#[inline]
pub fn has_children(&self) -> bool {
self.d.last_child.is_some()
}
#[inline]
pub fn ancestors(&self) -> AxisIter<'a, 'input> {
AxisIter { node: Some(*self), next: Node::parent }
}
#[inline]
pub fn prev_siblings(&self) -> AxisIter<'a, 'input> {
AxisIter { node: Some(*self), next: Node::prev_sibling }
}
#[inline]
pub fn next_siblings(&self) -> AxisIter<'a, 'input> {
AxisIter { node: Some(*self), next: Node::next_sibling }
}
#[inline]
pub fn first_children(&self) -> AxisIter<'a, 'input> {
AxisIter { node: Some(*self), next: Node::first_child }
}
#[inline]
pub fn last_children(&self) -> AxisIter<'a, 'input> {
AxisIter { node: Some(*self), next: Node::last_child }
}
#[inline]
pub fn children(&self) -> Children<'a, 'input> {
Children { front: self.first_child(), back: self.last_child() }
}
#[inline]
pub fn descendants(&self) -> Descendants<'a, 'input> {
Descendants::new(*self)
}
#[inline]
pub fn range(&self) -> Range {
self.d.range.to_urange()
}
#[inline]
pub fn id(&self) -> NodeId {
self.id
}
}
impl<'a, 'input: 'a> fmt::Debug for Node<'a, 'input> {
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
match self.d.kind {
NodeKind::Root => write!(f, "Root"),
NodeKind::Element { .. } => {
write!(f, "Element {{ tag_name: {:?}, attributes: {:?}, namespaces: {:?} }}",
self.tag_name(), self.attributes(), self.namespaces())
}
NodeKind::PI(pi) => {
write!(f, "PI {{ target: {:?}, value: {:?} }}", pi.target, pi.value)
}
NodeKind::Comment(text) => write!(f, "Comment({:?})", text),
NodeKind::Text(ref text) => write!(f, "Text({:?})", text),
}
}
}
#[derive(Clone)]
pub struct AxisIter<'a, 'input: 'a> {
node: Option<Node<'a, 'input>>,
next: fn(&Node<'a, 'input>) -> Option<Node<'a, 'input>>,
}
impl<'a, 'input: 'a> Iterator for AxisIter<'a, 'input> {
type Item = Node<'a, 'input>;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
let node = self.node.take();
self.node = node.as_ref().and_then(self.next);
node
}
}
#[derive(Clone)]
pub struct Children<'a, 'input: 'a> {
front: Option<Node<'a, 'input>>,
back: Option<Node<'a, 'input>>,
}
impl<'a, 'input: 'a> Iterator for Children<'a, 'input> {
type Item = Node<'a, 'input>;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
if self.front == self.back {
let node = self.front.take();
self.back = None;
node
} else {
let node = self.front.take();
self.front = node.as_ref().and_then(Node::next_sibling);
node
}
}
}
impl<'a, 'input: 'a> DoubleEndedIterator for Children<'a, 'input> {
#[inline]
fn next_back(&mut self) -> Option<Self::Item> {
if self.back == self.front {
let node = self.back.take();
self.front = None;
node
} else {
let node = self.back.take();
self.back = node.as_ref().and_then(Node::prev_sibling);
node
}
}
}
#[derive(Clone)]
pub struct Descendants<'a, 'input> {
doc: &'a Document<'input>,
current: NodeId,
until: NodeId,
}
impl<'a, 'input> Descendants<'a, 'input> {
#[inline]
fn new(start: Node<'a, 'input>) -> Self {
Self {
doc: &start.doc,
current: start.id,
until: start.d.next_subtree.unwrap_or_else(|| NodeId::from(start.doc.nodes.len()))
}
}
}
impl<'a, 'input> Iterator for Descendants<'a, 'input> {
type Item = Node<'a, 'input>;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
let next = if self.current == self.until {
None
} else {
Some(self.doc.get_node(self.current).unwrap())
};
self.current = NodeId::new(self.current.get() + 1);
next
}
}