use super::{
element::Element,
iter::{select, Select},
node::{Node, NodeData, NodeId},
parser,
selectors::ParseError,
serializer::serialize_to,
};
use html5ever::local_name;
use std::{io, io::Write, iter::successors};
#[derive(Debug)]
pub(crate) struct Document {
nodes: Vec<Node>,
styles: Vec<NodeId>,
linked_stylesheets: Vec<NodeId>,
}
impl Document {
pub(crate) fn parse_with_options(bytes: &[u8], preallocate_node_capacity: usize) -> Document {
parser::parse_with_options(bytes, preallocate_node_capacity)
}
pub(super) fn with_capacity(capacity: usize) -> Self {
let mut nodes = vec![Node::new(NodeData::Document), Node::new(NodeData::Document)];
nodes.reserve(capacity);
Document {
nodes,
styles: Vec::new(),
linked_stylesheets: Vec::new(),
}
}
pub(super) fn as_element(&self, node_id: NodeId) -> Option<Element<'_>> {
if let NodeData::Element { element, .. } = &self[node_id].data {
Some(Element::new(self, node_id, element))
} else {
None
}
}
pub(super) fn add_style(&mut self, node: NodeId) {
self.styles.push(node);
}
pub(crate) fn styles(&self) -> impl Iterator<Item = &str> + '_ {
self.styles.iter().filter_map(|node_id| {
self[*node_id]
.first_child
.and_then(|child_id| self[child_id].as_text())
})
}
pub(crate) fn stylesheets(&self) -> impl Iterator<Item = &str> + '_ {
self.linked_stylesheets.iter().filter_map(|node_id| {
self[*node_id]
.as_element()
.and_then(|data| data.attributes.get(local_name!("href")))
})
}
pub(super) fn add_linked_stylesheet(&mut self, node: NodeId) {
self.linked_stylesheets.push(node);
}
pub(super) fn push_node(&mut self, node: NodeData) -> NodeId {
let next_index = self.nodes.len();
self.nodes.push(Node::new(node));
NodeId::new(next_index)
}
pub(super) fn detach(&mut self, node: NodeId) {
let (parent, previous_sibling, next_sibling) = {
let node = &mut self[node];
(
node.parent.take(),
node.previous_sibling.take(),
node.next_sibling.take(),
)
};
if let Some(next_sibling) = next_sibling {
self[next_sibling].previous_sibling = previous_sibling;
} else if let Some(parent) = parent {
self[parent].last_child = previous_sibling;
}
if let Some(previous_sibling) = previous_sibling {
self[previous_sibling].next_sibling = next_sibling;
} else if let Some(parent) = parent {
self[parent].first_child = next_sibling;
}
}
pub(super) fn append(&mut self, parent: NodeId, node: NodeId) {
self.detach(node);
self[node].parent = Some(parent);
if let Some(last_child) = self[parent].last_child.take() {
self[node].previous_sibling = Some(last_child);
self[last_child].next_sibling = Some(node);
} else {
self[parent].first_child = Some(node);
}
self[parent].last_child = Some(node);
}
pub(super) fn insert_before(&mut self, sibling: NodeId, node: NodeId) {
self.detach(node);
self[node].parent = self[sibling].parent;
self[node].next_sibling = Some(sibling);
if let Some(previous_sibling) = self[sibling].previous_sibling.take() {
self[node].previous_sibling = Some(previous_sibling);
self[previous_sibling].next_sibling = Some(node);
} else if let Some(parent) = self[sibling].parent {
self[parent].first_child = Some(node);
}
self[sibling].previous_sibling = Some(node);
}
pub(super) fn children(&self, node: NodeId) -> impl Iterator<Item = NodeId> + '_ {
successors(self[node].first_child, |&node| self[node].next_sibling)
}
pub(crate) fn node_and_ancestors(&self, node: NodeId) -> impl Iterator<Item = NodeId> + '_ {
successors(Some(node), move |&node| self[node].parent)
}
pub(crate) fn next_in_tree_order(&self, node: NodeId) -> Option<NodeId> {
self[node].first_child.or_else(|| {
self.node_and_ancestors(node)
.find_map(|ancestor| self[ancestor].next_sibling)
})
}
pub(crate) fn serialize<W: Write>(
&self,
writer: &mut W,
keep_style_tags: bool,
keep_link_tags: bool,
) -> io::Result<()> {
serialize_to(self, writer, keep_style_tags, keep_link_tags)
}
pub(crate) fn select<'a, 'b>(
&'a self,
selectors: &'b str,
) -> Result<Select<'a>, ParseError<'b>> {
select(self, selectors)
}
}
impl std::ops::Index<NodeId> for Document {
type Output = Node;
#[inline]
fn index(&self, id: NodeId) -> &Node {
&self.nodes[id.get()]
}
}
impl std::ops::IndexMut<NodeId> for Document {
#[inline]
fn index_mut(&mut self, id: NodeId) -> &mut Node {
&mut self.nodes[id.get()]
}
}
#[cfg(test)]
mod tests {
use super::{super::node::ElementData, *};
use html5ever::{local_name, namespace_url, ns, QualName};
use test_case::test_case;
fn new_element() -> NodeData {
NodeData::Element {
element: ElementData::new(QualName::new(None, ns!(), local_name!("span")), vec![]),
inlining_ignored: false,
}
}
fn roundtrip(bytes: &[u8]) -> Vec<u8> {
let mut buffer = Vec::new();
Document::parse_with_options(bytes, 0)
.serialize(&mut buffer, false, false)
.expect("Failed to serialize");
buffer
}
#[test]
fn test_collect_styles() {
let doc = Document::parse_with_options(
r#"
<head
><title>Test</title>
<style>h1 { color:blue; }</style>
<style>h1 { color:red; }</style>
<style data-css-inline='ignore'>h1 { color:yellow; }</style>
</head>"#
.as_bytes(),
0,
);
let styles = doc.styles().collect::<Vec<_>>();
assert_eq!(styles.len(), 2);
assert_eq!(styles[0], "h1 { color:blue; }");
assert_eq!(styles[1], "h1 { color:red; }");
}
#[test]
fn test_collect_stylesheets() {
let doc = Document::parse_with_options(
r#"
<head>
<link href='styles1.css' rel='stylesheet' type='text/css'>
<link href='styles2.css' rel='stylesheet' type='text/css'>
<link href='' rel='stylesheet' type='text/css'>
<link href='styles3.css' rel='stylesheet' type='text/css' data-css-inline='ignore'>
</head>"#
.as_bytes(),
0,
);
let links = doc.stylesheets().collect::<Vec<_>>();
assert_eq!(links.len(), 2);
assert_eq!(links[0], "styles1.css");
assert_eq!(links[1], "styles2.css");
}
#[test]
fn test_insert_before() {
let mut doc = Document::with_capacity(0);
let node1_id = doc.push_node(new_element());
let node2_id = doc.push_node(new_element());
let new_node_id = doc.push_node(new_element());
let document_id = NodeId::document_id();
doc.append(document_id, node1_id);
doc.append(document_id, node2_id);
doc.insert_before(node2_id, new_node_id);
assert_eq!(doc[node2_id].previous_sibling, Some(new_node_id));
assert_eq!(doc[new_node_id].next_sibling, Some(node2_id));
}
#[test]
fn test_append() {
let mut doc = Document::with_capacity(0);
let node1_id = doc.push_node(new_element());
let node2_id = doc.push_node(new_element());
let document_id = NodeId::document_id();
doc.append(document_id, node1_id);
doc.append(document_id, node2_id);
assert_eq!(doc[document_id].last_child, Some(node2_id));
assert_eq!(doc[node1_id].next_sibling, Some(node2_id));
assert_eq!(doc[node2_id].previous_sibling, Some(node1_id));
}
#[test_case(b"<!DOCTYPE html><html><head><title>Title of the document</title></head><body></body></html>")]
#[test_case(b"<!DOCTYPE html><html><head><title>Title of the document</title></head><body><hr></body></html>")]
fn test_roundtrip(input: &[u8]) {
assert_eq!(roundtrip(input), input);
}
#[test]
fn test_ignore_children() {
assert_eq!(roundtrip(b"<!DOCTYPE html><html><head><title>Title of the document</title></head><body><hr><hr></hr></hr></body></html>"), b"<!DOCTYPE html><html><head><title>Title of the document</title></head><body><hr><hr></body></html>");
}
#[test]
fn test_pseudo_class() {
let output = roundtrip(b"<!DOCTYPE html><html><head><title>Title of the document</title><style>h1:hover { color:blue; }</style></head><body><h1>Hello world!</h1></body></html>");
assert_eq!(output, b"<!DOCTYPE html><html><head><title>Title of the document</title></head><body><h1>Hello world!</h1></body></html>");
}
#[test]
fn test_comment() {
let output = roundtrip(b"<html><head><title>Title of the document</title></head><body><!--TTT--></body></html>");
assert_eq!(output, b"<html><head><title>Title of the document</title></head><body><!--TTT--></body></html>");
}
}