use std::convert::TryInto;
use std::fmt;
use std::iter;
use std::mem;
use std::num::NonZeroU32;
use std::ops::{Deref, DerefMut};
#[doc(no_inline)]
pub use html5ever::{Attribute, LocalName, Namespace, QualName};
#[doc(no_inline)]
pub use tendril::StrTendril;
mod node_ref;
mod serializer;
#[macro_use] pub mod filter;
pub mod html;
pub mod xml;
#[cfg(test)]
mod tests;
pub use node_ref::{NodeRef, Selector};
pub struct Document {
nodes: Vec<Node>,
}
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub struct NodeId(NonZeroU32);
#[derive(Debug)]
pub struct Node {
parent: Option<NodeId>,
prev_sibling: Option<NodeId>,
next_sibling: Option<NodeId>,
first_child: Option<NodeId>,
last_child: Option<NodeId>,
data: NodeData,
}
#[derive(Clone, Debug)]
pub enum NodeData {
Hole,
Document,
Doctype {
name: StrTendril,
_public_id: StrTendril,
_system_id: StrTendril,
},
Text(StrTendril),
Comment(StrTendril),
Elem(Element),
ProcessingInstruction {
target: StrTendril,
data: StrTendril,
},
}
#[derive(Clone, Debug)]
pub struct Element {
pub name: QualName,
pub attrs: Vec<Attribute>,
}
impl Document {
pub const DOCUMENT_NODE_ID: NodeId = NodeId(
unsafe { NonZeroU32::new_unchecked(1) }
);
pub fn new() -> Self {
Document { nodes: vec![
Node::new(NodeData::Hole),
Node::new(NodeData::Document)
]}
}
pub fn root_element(&self) -> Option<NodeId> {
let document_node = &self[Document::DOCUMENT_NODE_ID];
debug_assert!(match document_node.data {
NodeData::Document => true,
_ => false
});
debug_assert!(document_node.parent.is_none());
debug_assert!(document_node.next_sibling.is_none());
debug_assert!(document_node.prev_sibling.is_none());
let mut root = None;
for child in self.children(Document::DOCUMENT_NODE_ID) {
match &self[child].data {
NodeData::Doctype { .. }
| NodeData::Comment(_)
| NodeData::ProcessingInstruction { .. } => {}
NodeData::Document => {
debug_assert!(false, "Document child of Document");
root = None;
break;
}
NodeData::Hole => {
debug_assert!(false, "Hole in Document");
root = None;
break;
}
NodeData::Text(_) => {
root = None;
break;
}
NodeData::Elem(_) => {
if root.is_none() {
root = Some(child);
} else {
root = None;
break;
}
}
}
}
root
}
fn push_node(&mut self, node: Node) -> NodeId {
let next_index = self.nodes.len()
.try_into()
.expect("Document (u32) node index overflow");
debug_assert!(next_index > 1);
self.nodes.push(node);
NodeId(unsafe { NonZeroU32::new_unchecked(next_index) })
}
pub fn detach(&mut self, id: NodeId) {
assert!(
id != Document::DOCUMENT_NODE_ID,
"Can't detach the synthetic document node");
let (parent, prev_sibling, next_sibling) = {
let node = &mut self[id];
(node.parent.take(),
node.prev_sibling.take(),
node.next_sibling.take())
};
if let Some(next_sibling) = next_sibling {
self[next_sibling].prev_sibling = prev_sibling
} else if let Some(parent) = parent {
self[parent].last_child = prev_sibling;
}
if let Some(prev_sibling) = prev_sibling {
self[prev_sibling].next_sibling = next_sibling;
} else if let Some(parent) = parent {
self[parent].first_child = next_sibling;
}
}
pub fn append_child(&mut self, parent: NodeId, node: Node)
-> NodeId
{
let id = self.push_node(node);
self.append(parent, id);
id
}
fn append(&mut self, parent: NodeId, new_child: NodeId) {
self.detach(new_child);
self[new_child].parent = Some(parent);
if let Some(last_child) = self[parent].last_child.take() {
self[new_child].prev_sibling = Some(last_child);
debug_assert!(self[last_child].next_sibling.is_none());
self[last_child].next_sibling = Some(new_child);
} else {
debug_assert!(self[parent].first_child.is_none());
self[parent].first_child = Some(new_child);
}
self[parent].last_child = Some(new_child);
}
pub fn insert_before_sibling(&mut self, sibling: NodeId, node: Node)
-> NodeId
{
let id = self.push_node(node);
self.insert_before(sibling, id);
id
}
fn insert_before(&mut self, sibling: NodeId, new_sibling: NodeId) {
self.detach(new_sibling);
self[new_sibling].parent = self[sibling].parent;
self[new_sibling].next_sibling = Some(sibling);
if let Some(prev_sibling) = self[sibling].prev_sibling.take() {
self[new_sibling].prev_sibling = Some(prev_sibling);
debug_assert_eq!(
self[prev_sibling].next_sibling,
Some(sibling)
);
self[prev_sibling].next_sibling = Some(new_sibling);
} else if let Some(parent) = self[sibling].parent {
debug_assert_eq!(self[parent].first_child, Some(sibling));
self[parent].first_child = Some(new_sibling);
}
self[sibling].prev_sibling = Some(new_sibling);
}
pub fn text(&self, id: NodeId) -> Option<StrTendril> {
let mut next = Vec::new();
push_if(&mut next, self[id].first_child);
let mut text = None;
while let Some(id) = next.pop() {
let node = &self[id];
if let NodeData::Text(t) = &node.data {
match &mut text {
None => text = Some(t.clone()),
Some(text) => text.push_tendril(&t),
}
push_if(&mut next, node.next_sibling);
} else {
push_if(&mut next, node.next_sibling);
push_if(&mut next, node.first_child);
}
}
text
}
pub fn children<'a>(&'a self, id: NodeId)
-> impl Iterator<Item = NodeId> + 'a
{
iter::successors(
self[id].first_child,
move |&id| self[id].next_sibling
)
}
pub fn node_and_following_siblings<'a>(&'a self, id: NodeId)
-> impl Iterator<Item = NodeId> + 'a
{
iter::successors(Some(id), move |&id| self[id].next_sibling)
}
pub fn node_and_ancestors<'a>(&'a self, id: NodeId)
-> impl Iterator<Item = NodeId> + 'a
{
iter::successors(Some(id), move |&id| self[id].parent)
}
pub fn nodes<'a>(&'a self) -> impl Iterator<Item = NodeId> + 'a {
iter::successors(
Some(Document::DOCUMENT_NODE_ID),
move |&id| self.next_in_tree_order(id)
)
}
fn next_in_tree_order(&self, id: NodeId) -> Option<NodeId> {
self[id].first_child.or_else(|| {
self.node_and_ancestors(id)
.find_map(|ancestor| self[ancestor].next_sibling)
})
}
pub fn deep_clone(&self, id: NodeId) -> Document {
let mut ndoc = Document::new();
ndoc.deep_clone_to(Document::DOCUMENT_NODE_ID, self, id);
ndoc
}
fn deep_clone_to(&mut self, id: NodeId, odoc: &Document, oid: NodeId) {
let id = self.append_child(id, odoc[oid].clone());
for child in odoc.children(oid) {
self.deep_clone_to(id, odoc, child);
}
}
pub fn fold(&mut self, id: NodeId) {
assert!(
id != Document::DOCUMENT_NODE_ID,
"Can't fold the synthetic document node");
let mut next_child = self[id].first_child;
while let Some(child) = next_child {
debug_assert_eq!(self[child].parent, Some(id));
next_child = self[child].next_sibling;
self.insert_before(id, child);
}
self.detach(id);
}
}
impl Default for Document {
fn default() -> Document {
Document::new()
}
}
impl fmt::Debug for Document {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_list().entries(&self.nodes[1..]).finish()
}
}
impl std::ops::Index<NodeId> for Document {
type Output = Node;
#[inline]
fn index(&self, id: NodeId) -> &Node {
&self.nodes[id.0.get() as usize]
}
}
impl std::ops::IndexMut<NodeId> for Document {
#[inline]
fn index_mut(&mut self, id: NodeId) -> &mut Node {
&mut self.nodes[id.0.get() as usize]
}
}
impl Element {
pub fn new<LN>(lname: LN) -> Element
where LN: Into<LocalName>
{
Element {
name: QualName::new(None, ns!(), lname.into()),
attrs: Vec::new()
}
}
pub fn is_elem<LN>(&self, lname: LN) -> bool
where LN: Into<LocalName>
{
self.name.local == lname.into()
}
pub fn html_tag_meta(&self) -> Option<&'static html::TagMeta> {
if self.name.ns == html::ns::HTML {
html::TAG_META.get(&self.name.local)
} else {
None
}
}
pub fn attr<LN>(&self, lname: LN) -> Option<&StrTendril>
where LN: Into<LocalName>
{
let lname = lname.into();
self.attrs
.iter()
.find(|attr| attr.name.local == lname)
.map(|attr| &attr.value)
}
pub fn remove_attr<LN>(&mut self, lname: LN) -> Option<StrTendril>
where LN: Into<LocalName>
{
let mut found = None;
let mut i = 0;
let lname = lname.into();
while i < self.attrs.len() {
if self.attrs[i].name.local == lname {
found = Some(self.attrs.remove(i).value);
} else {
i += 1;
}
}
found
}
pub fn set_attr<LN, V>(&mut self, lname: LN, value: V) -> Option<StrTendril>
where LN: Into<LocalName>, V: Into<StrTendril>
{
let mut found = None;
let mut i = 0;
let lname = lname.into();
let mut value = Some(value.into());
while i < self.attrs.len() {
if self.attrs[i].name.local == lname {
if found.is_none() {
found = Some(mem::replace(
&mut self.attrs[i].value,
value.take().unwrap(),
));
i += 1;
} else {
found = Some(self.attrs.remove(i).value);
};
} else {
i += 1;
}
}
if found.is_none() {
self.attrs.push(Attribute {
name: QualName::new(None, ns!(), lname),
value: value.take().unwrap()
});
}
found
}
}
impl Node {
pub fn new_elem(element: Element) -> Node {
Node::new(NodeData::Elem(element))
}
pub fn new_text<T>(text: T) -> Node
where T: Into<StrTendril>
{
Node::new(NodeData::Text(text.into()))
}
fn new(data: NodeData) -> Self {
Node {
parent: None,
prev_sibling: None,
next_sibling: None,
first_child: None,
last_child: None,
data,
}
}
}
impl Deref for Node {
type Target = NodeData;
#[inline]
fn deref(&self) -> &NodeData {
&self.data
}
}
impl DerefMut for Node {
#[inline]
fn deref_mut(&mut self) -> &mut NodeData {
&mut self.data
}
}
impl NodeData {
pub fn as_element(&self) -> Option<&Element> {
match self {
NodeData::Elem(ref data) => Some(data),
_ => None,
}
}
pub fn as_element_mut(&mut self) -> Option<&mut Element> {
match self {
NodeData::Elem(ref mut data) => Some(data),
_ => None,
}
}
pub fn as_text(&self) -> Option<&StrTendril> {
match self {
NodeData::Text(ref t) => Some(t),
_ => None,
}
}
pub fn as_text_mut(&mut self) -> Option<&mut StrTendril> {
match self {
NodeData::Text(ref mut t) => Some(t),
_ => None,
}
}
pub fn attr<LN>(&self, lname: LN) -> Option<&StrTendril>
where LN: Into<LocalName>
{
if let Some(edata) = self.as_element() {
edata.attr(lname)
} else {
None
}
}
pub fn is_elem<LN>(&self, lname: LN) -> bool
where LN: Into<LocalName>
{
if let Some(edata) = self.as_element() {
edata.is_elem(lname)
} else {
false
}
}
}
impl Clone for Node {
fn clone(&self) -> Self {
Node::new(self.data.clone())
}
}
fn push_if(stack: &mut Vec<NodeId>, id: Option<NodeId>) {
if let Some(id) = id {
stack.push(id);
}
}