use std::convert::TryInto;
use std::fmt;
use std::iter;
use std::mem;
use std::num::NonZeroU32;
use std::ops::{Deref, DerefMut};
#[doc(no_inline)]
pub use html5ever::{Attribute, LocalName, Namespace, QualName};
#[doc(no_inline)]
pub use tendril::StrTendril;
mod node_ref;
mod serializer;
#[macro_use] pub mod filter;
pub mod html;
#[cfg(feature = "xml")]
pub mod xml;
#[cfg(test)]
mod tests;
pub use node_ref::{NodeRef, Descender, Selector};
pub struct Document {
nodes: Vec<Node>,
}
#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct NodeId(NonZeroU32);
#[derive(Clone, Debug)]
pub struct Node {
data: NodeData,
parent: Option<NodeId>,
prev_sibling: Option<NodeId>,
next_sibling: Option<NodeId>,
first_child: Option<NodeId>,
last_child: Option<NodeId>,
}
#[derive(Clone, Debug)]
pub enum NodeData {
Hole,
Document,
DocType(DocumentType),
Text(StrTendril),
Comment(StrTendril),
Elem(Element),
Pi(ProcessingInstruction),
}
#[derive(Clone, Debug)]
pub struct DocumentType {
pub name: StrTendril,
_priv: ()
}
#[derive(Clone, Debug)]
pub struct ProcessingInstruction {
pub data: StrTendril,
_priv: ()
}
#[derive(Clone, Debug)]
pub struct Element {
pub name: QualName,
pub attrs: Vec<Attribute>,
_priv: ()
}
impl Document {
pub const DOCUMENT_NODE_ID: NodeId = NodeId(
unsafe { NonZeroU32::new_unchecked(1) }
);
const WASTE_ALLOWANCE: usize = 1024;
pub fn new() -> Self {
Document::with_capacity(8)
}
pub fn with_capacity(count: u32) -> Self {
let mut nodes = Vec::with_capacity(count as usize);
nodes.push(Node::new(NodeData::Hole));
nodes.push(Node::new(NodeData::Document));
Document { nodes }
}
#[inline]
pub fn len(&self) -> u32 {
let nodes: u32 = self.nodes.len()
.try_into()
.expect("Document (u32) node index overflow");
debug_assert!(nodes > 0);
nodes - 1
}
#[inline]
pub fn is_empty(&self) -> bool {
self.len() < 2
}
pub fn root_element(&self) -> Option<NodeId> {
let document_node = &self[Document::DOCUMENT_NODE_ID];
debug_assert!(
(if let NodeData::Document = document_node.data { true }
else { false }),
"not document node: {:?}", document_node);
debug_assert!(document_node.parent.is_none());
debug_assert!(document_node.next_sibling.is_none());
debug_assert!(document_node.prev_sibling.is_none());
let mut root = None;
for child in self.children(Document::DOCUMENT_NODE_ID) {
match &self[child].data {
NodeData::DocType(_) |
NodeData::Comment(_) |
NodeData::Pi(_) => {}
NodeData::Document => {
debug_assert!(false, "Document child of Document");
root = None;
break;
}
NodeData::Hole => {
debug_assert!(false, "Hole in Document");
root = None;
break;
}
NodeData::Text(_) => {
root = None;
break;
}
NodeData::Elem(_) => {
if root.is_none() {
root = Some(child);
} else {
root = None;
break;
}
}
}
}
root
}
fn push_node(&mut self, node: Node) -> NodeId {
debug_assert!(
(if let NodeData::Document | NodeData::Hole = node.data { false }
else { true }),
"Invalid push {:?}", node.data);
let next_index = self.nodes.len()
.try_into()
.expect("Document (u32) node index overflow");
debug_assert!(next_index > 1);
self.nodes.push(node);
NodeId(unsafe { NonZeroU32::new_unchecked(next_index) })
}
#[inline]
#[must_use="If the fragment isn't needed, use `unlink()` instead."]
pub fn detach(&mut self, id: NodeId) -> Document {
self.unlink_only(id);
let guess_cap = std::cmp::max(8, self.len() - id.0.get() + 2);
let mut ndoc = Document::with_capacity(guess_cap);
ndoc.append_move(Document::DOCUMENT_NODE_ID, self, id);
if (ndoc.nodes.capacity() - ndoc.nodes.len()) >
Document::WASTE_ALLOWANCE
{
ndoc.nodes.shrink_to_fit();
}
ndoc
}
pub fn attach_child(&mut self, parent: NodeId, mut other: Document) {
self.nodes.reserve(other.len() as usize - 1);
let children = other.children(Document::DOCUMENT_NODE_ID)
.collect::<Vec<_>>();
for child in children {
self.append_move(parent, &mut other, child);
}
}
pub fn attach_before_sibling(
&mut self,
sibling: NodeId,
mut other: Document)
{
self.nodes.reserve(other.len() as usize - 1);
let children = other.children(Document::DOCUMENT_NODE_ID)
.collect::<Vec<_>>();
for oid in children {
let onode = &mut other[oid];
let nid = self.insert_before_sibling(
sibling,
Node::new(onode.take_data()));
for coid in other.children(oid).collect::<Vec<_>>() {
self.append_move(nid, &mut other, coid);
}
}
}
fn append_move(&mut self, id: NodeId, odoc: &mut Document, oid: NodeId) {
let id = self.append_child(id, Node::new(odoc[oid].take_data()));
let mut ns = NodeStack2::new();
ns.push_if(odoc[oid].first_child, id);
while let Some((oid, id)) = ns.pop() {
let onode = &mut odoc[oid];
let nid = self.append_child(id, Node::new(onode.take_data()));
ns.push_if(onode.next_sibling, id);
ns.push_if(onode.first_child, nid);
}
}
#[inline]
pub fn unlink(&mut self, id: NodeId) -> NodeData {
self.unlink_only(id);
self[id].take_data()
}
fn unlink_only(&mut self, id: NodeId) {
assert!(
id != Document::DOCUMENT_NODE_ID,
"Can't detach the synthetic document node");
let (parent, prev_sibling, next_sibling) = {
let node = &mut self[id];
(node.parent.take(),
node.prev_sibling.take(),
node.next_sibling.take())
};
if let Some(next_sibling) = next_sibling {
self[next_sibling].prev_sibling = prev_sibling
} else if let Some(parent) = parent {
self[parent].last_child = prev_sibling;
}
if let Some(prev_sibling) = prev_sibling {
self[prev_sibling].next_sibling = next_sibling;
} else if let Some(parent) = parent {
self[parent].first_child = next_sibling;
}
}
pub fn append_child(&mut self, parent: NodeId, node: Node)
-> NodeId
{
let id = self.push_node(node);
self.append(parent, id);
id
}
fn append(&mut self, parent: NodeId, new_child: NodeId) {
self.unlink_only(new_child);
self[new_child].parent = Some(parent);
self[parent].assert_suitable_parent();
if let Some(last_child) = self[parent].last_child.take() {
self[new_child].prev_sibling = Some(last_child);
debug_assert!(self[last_child].next_sibling.is_none());
self[last_child].next_sibling = Some(new_child);
} else {
debug_assert!(self[parent].first_child.is_none());
self[parent].first_child = Some(new_child);
}
self[parent].last_child = Some(new_child);
}
pub fn insert_before_sibling(&mut self, sibling: NodeId, node: Node)
-> NodeId
{
let id = self.push_node(node);
self.insert_before(sibling, id);
id
}
fn insert_before(&mut self, sibling: NodeId, new_sibling: NodeId) {
self.unlink_only(new_sibling);
let parent = self[sibling].parent
.expect("insert_before sibling has no parent");
self[parent].assert_suitable_parent();
self[new_sibling].parent = Some(parent);
self[new_sibling].next_sibling = Some(sibling);
if let Some(prev_sibling) = self[sibling].prev_sibling.take() {
self[new_sibling].prev_sibling = Some(prev_sibling);
debug_assert_eq!(
self[prev_sibling].next_sibling,
Some(sibling)
);
self[prev_sibling].next_sibling = Some(new_sibling);
} else {
debug_assert_eq!(self[parent].first_child, Some(sibling));
self[parent].first_child = Some(new_sibling);
}
self[sibling].prev_sibling = Some(new_sibling);
}
pub fn text(&self, id: NodeId) -> Option<StrTendril> {
let mut ns = NodeStack1::new();
ns.push_if(self[id].first_child);
let mut text = None;
while let Some(id) = ns.pop() {
let node = &self[id];
if let NodeData::Text(t) = &node.data {
match &mut text {
None => text = Some(t.clone()),
Some(text) => text.push_tendril(&t),
}
ns.push_if(node.next_sibling);
} else {
ns.push_if(node.next_sibling);
ns.push_if(node.first_child);
}
}
text
}
pub fn children(&self, id: NodeId) -> impl Iterator<Item = NodeId> + '_ {
iter::successors(
self[id].first_child,
move |&id| self[id].next_sibling
)
}
pub fn node_and_following_siblings(&self, id: NodeId)
-> impl Iterator<Item = NodeId> + '_
{
iter::successors(Some(id), move |&id| self[id].next_sibling)
}
pub fn node_and_ancestors(&self, id: NodeId)
-> impl Iterator<Item = NodeId> + '_
{
iter::successors(Some(id), move |&id| self[id].parent)
}
pub fn nodes(&self) -> impl Iterator<Item = NodeId> + '_ {
self.descendants(Document::DOCUMENT_NODE_ID)
}
#[inline]
pub fn descendants(&self, id: NodeId) -> impl Iterator<Item = NodeId> + '_
{
NodeRef::new(self, id).descendants().map(|nr| nr.id())
}
pub fn compact(&mut self) {
let mut ndoc = Document::with_capacity(self.len() + 1);
let mut ns = NodeStack2::new();
ns.push_if(
self[Document::DOCUMENT_NODE_ID].first_child,
Document::DOCUMENT_NODE_ID);
while let Some((id, nid)) = ns.pop() {
let nnode = Node::new(self[id].take_data());
let ncid = ndoc.append_child(nid, nnode);
ns.push_if(self[id].next_sibling, nid);
ns.push_if(self[id].first_child, ncid);
}
if (ndoc.nodes.capacity() - ndoc.nodes.len()) >
Document::WASTE_ALLOWANCE
{
ndoc.nodes.shrink_to_fit();
}
self.nodes = ndoc.nodes;
}
pub fn deep_clone(&self, id: NodeId) -> Document {
let guess_cap = std::cmp::max(8, (self.len() - id.0.get() + 2) / 8);
let mut ndoc = Document::with_capacity(guess_cap);
if id == Document::DOCUMENT_NODE_ID {
for child in self.children(id) {
ndoc.append_deep_clone(Document::DOCUMENT_NODE_ID, self, child);
}
} else {
ndoc.append_deep_clone(Document::DOCUMENT_NODE_ID, self, id);
}
ndoc
}
pub fn append_deep_clone(
&mut self,
id: NodeId,
odoc: &Document,
oid: NodeId)
{
let id = self.append_child(id, Node::new(odoc[oid].data.clone()));
for child in odoc.children(oid) {
self.append_deep_clone(id, odoc, child);
}
}
pub fn bulk_clone(&self) -> Document {
Document { nodes: self.nodes.clone() }
}
#[inline]
pub fn fold(&mut self, id: NodeId) -> NodeData {
self.fold_only(id);
self[id].take_data()
}
fn fold_only(&mut self, id: NodeId) {
assert!(
id != Document::DOCUMENT_NODE_ID,
"Can't fold the synthetic document node");
let mut next_child = self[id].first_child;
while let Some(child) = next_child {
debug_assert_eq!(self[child].parent, Some(id));
next_child = self[child].next_sibling;
self.insert_before(id, child);
}
self.unlink_only(id);
}
}
impl Default for Document {
fn default() -> Document {
Document::new()
}
}
impl fmt::Debug for Document {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_list().entries(&self.nodes[1..]).finish()
}
}
impl std::ops::Index<NodeId> for Document {
type Output = Node;
#[inline]
fn index(&self, id: NodeId) -> &Node {
&self.nodes[id.0.get() as usize]
}
}
impl std::ops::IndexMut<NodeId> for Document {
#[inline]
fn index_mut(&mut self, id: NodeId) -> &mut Node {
&mut self.nodes[id.0.get() as usize]
}
}
impl Element {
pub fn new<LN>(lname: LN) -> Element
where LN: Into<LocalName>
{
Element {
name: QualName::new(None, ns!(), lname.into()),
attrs: Vec::new(),
_priv: ()
}
}
pub fn is_elem<LN>(&self, lname: LN) -> bool
where LN: Into<LocalName>
{
self.name.local == lname.into()
}
pub fn html_tag_meta(&self) -> Option<&'static html::TagMeta> {
if self.name.ns == html::ns::HTML {
html::TAG_META.get(&self.name.local)
} else {
None
}
}
pub fn attr<LN>(&self, lname: LN) -> Option<&StrTendril>
where LN: Into<LocalName>
{
let lname = lname.into();
self.attrs
.iter()
.find(|attr| attr.name.local == lname)
.map(|attr| &attr.value)
}
pub fn remove_attr<LN>(&mut self, lname: LN) -> Option<StrTendril>
where LN: Into<LocalName>
{
let mut found = None;
let mut i = 0;
let lname = lname.into();
while i < self.attrs.len() {
if self.attrs[i].name.local == lname {
found = Some(self.attrs.remove(i).value);
} else {
i += 1;
}
}
found
}
pub fn set_attr<LN, V>(&mut self, lname: LN, value: V)
-> Option<StrTendril>
where LN: Into<LocalName>, V: Into<StrTendril>
{
let mut found = None;
let mut i = 0;
let lname = lname.into();
let mut value = Some(value.into());
while i < self.attrs.len() {
if self.attrs[i].name.local == lname {
if found.is_none() {
found = Some(mem::replace(
&mut self.attrs[i].value,
value.take().unwrap(),
));
i += 1;
} else {
found = Some(self.attrs.remove(i).value);
};
} else {
i += 1;
}
}
if found.is_none() {
self.attrs.push(Attribute {
name: QualName::new(None, ns!(), lname),
value: value.take().unwrap()
});
}
found
}
}
impl Node {
pub fn new_elem(element: Element) -> Node {
Node::new(NodeData::Elem(element))
}
pub fn new_text<T>(text: T) -> Node
where T: Into<StrTendril>
{
Node::new(NodeData::Text(text.into()))
}
fn take_data(&mut self) -> NodeData {
mem::replace(&mut self.data, NodeData::Hole)
}
fn new(data: NodeData) -> Self {
Node {
parent: None,
prev_sibling: None,
next_sibling: None,
first_child: None,
last_child: None,
data,
}
}
}
impl Deref for Node {
type Target = NodeData;
#[inline]
fn deref(&self) -> &NodeData {
&self.data
}
}
impl DerefMut for Node {
#[inline]
fn deref_mut(&mut self) -> &mut NodeData {
&mut self.data
}
}
impl NodeData {
pub fn as_element(&self) -> Option<&Element> {
match self {
NodeData::Elem(ref data) => Some(data),
_ => None,
}
}
pub fn as_element_mut(&mut self) -> Option<&mut Element> {
match self {
NodeData::Elem(ref mut data) => Some(data),
_ => None,
}
}
pub fn as_text(&self) -> Option<&StrTendril> {
match self {
NodeData::Text(ref t) => Some(t),
_ => None,
}
}
pub fn as_text_mut(&mut self) -> Option<&mut StrTendril> {
match self {
NodeData::Text(ref mut t) => Some(t),
_ => None,
}
}
pub fn attr<LN>(&self, lname: LN) -> Option<&StrTendril>
where LN: Into<LocalName>
{
if let Some(edata) = self.as_element() {
edata.attr(lname)
} else {
None
}
}
pub fn is_elem<LN>(&self, lname: LN) -> bool
where LN: Into<LocalName>
{
if let Some(edata) = self.as_element() {
edata.is_elem(lname)
} else {
false
}
}
#[inline]
fn assert_suitable_parent(&self) {
debug_assert!(
(if let NodeData::Document | NodeData::Elem(_) = self { true }
else { false }),
"Not a suitable parent: {:?}", self)
}
}
struct NodeStack1(Vec<NodeId>);
impl NodeStack1 {
#[inline]
fn new() -> Self {
NodeStack1(Vec::with_capacity(16))
}
#[inline]
fn push_if(&mut self, id: Option<NodeId>) {
if let Some(id) = id {
self.0.push(id);
}
}
#[inline]
fn pop(&mut self) -> Option<NodeId> {
self.0.pop()
}
}
struct NodeStack2(Vec<(NodeId, NodeId)>);
impl NodeStack2 {
#[inline]
fn new() -> Self {
NodeStack2(Vec::with_capacity(16))
}
#[inline]
fn push_if(&mut self, id: Option<NodeId>, oid: NodeId) {
if let Some(id) = id {
self.0.push((id, oid));
}
}
#[inline]
fn pop(&mut self) -> Option<(NodeId, NodeId)> {
self.0.pop()
}
}