use super::{
attributes::should_ignore,
document::Document,
node::{ElementData, Node, NodeData, NodeId},
};
use html5ever::{
expanded_name, local_name, ns,
tendril::{StrTendril, TendrilSink},
tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink},
Attribute, QualName,
};
use std::{
borrow::Cow,
cell::{Ref, RefCell},
};
#[derive(Debug, Copy, Clone)]
pub(crate) enum InliningMode {
Document,
Fragment,
}
pub(crate) fn parse_with_options(
bytes: &[u8],
preallocate_node_capacity: usize,
mode: InliningMode,
) -> Document {
let sink = Sink {
document: RefCell::new(Document::with_capacity(
preallocate_node_capacity,
bytes.len(),
)),
};
let options = html5ever::ParseOpts::default();
match mode {
InliningMode::Document => html5ever::parse_document(sink, options)
.from_utf8()
.one(bytes),
InliningMode::Fragment => {
let mut document = html5ever::parse_fragment(
sink,
options,
QualName::new(None, ns!(html), local_name!("")),
vec![],
false,
)
.from_utf8()
.one(bytes);
let document_id = NodeId::document_id();
let context_element_id = NodeId::new(
document_id
.get()
.checked_add(2)
.expect("Document id is too small to overflow"),
);
document.reparent_children(context_element_id, document_id);
document
}
}
}
struct Sink {
document: RefCell<Document>,
}
impl Sink {
fn push_node(&self, data: NodeData) -> NodeId {
self.document.borrow_mut().push_node(data)
}
fn push_element(
&self,
name: QualName,
attributes: Vec<Attribute>,
inlining_ignored: bool,
) -> NodeId {
let mut id_value = None;
let mut class_value = None;
for attr in &attributes {
if attr.name.local == local_name!("id") {
id_value = Some(attr.value.clone());
} else if attr.name.local == local_name!("class") {
class_value = Some(attr.value.clone());
}
}
let tag_name = name.local.clone();
let node_id = self.push_node(NodeData::Element {
element: ElementData::new(name, attributes),
inlining_ignored,
});
let mut document = self.document.borrow_mut();
document.push_element_id(node_id);
document.index_by_tag(tag_name, node_id);
if let Some(id) = &id_value {
document.index_by_id(id, node_id);
}
if let Some(class) = &class_value {
document.index_by_classes(class, node_id);
}
node_id
}
fn push_text(&self, text: StrTendril) -> NodeId {
self.push_node(NodeData::Text { text })
}
fn push_comment(&self, text: StrTendril) -> NodeId {
self.push_node(NodeData::Comment { text })
}
fn push_processing_instruction(&self, target: StrTendril, data: StrTendril) -> NodeId {
self.push_node(NodeData::ProcessingInstruction { target, data })
}
fn push_doctype(&self, name: StrTendril) -> NodeId {
self.push_node(NodeData::Doctype { name })
}
fn append_impl<P, A>(&self, child: NodeOrText<NodeId>, previous: P, append: A)
where
P: FnOnce(&mut Document) -> Option<NodeId>,
A: FnOnce(&mut Document, NodeId),
{
let new_node = match child {
NodeOrText::AppendText(text) => {
let Some(id) = previous(&mut self.document.borrow_mut()) else {
let new_node = self.push_text(text);
append(&mut self.document.borrow_mut(), new_node);
return;
};
if let Node {
data: NodeData::Text { text: existing },
..
} = &mut self.document.borrow_mut()[id]
{
existing.push_tendril(&text);
return;
}
self.push_text(text)
}
NodeOrText::AppendNode(node) => node,
};
append(&mut self.document.borrow_mut(), new_node);
}
}
impl TreeSink for Sink {
type Handle = NodeId;
type Output = Document;
type ElemName<'a>
= Ref<'a, QualName>
where
Self: 'a;
fn finish(self) -> Document {
self.document.into_inner()
}
fn parse_error(&self, _msg: Cow<'static, str>) {}
fn get_document(&self) -> NodeId {
NodeId::document_id()
}
fn elem_name<'a>(&'a self, target: &NodeId) -> Self::ElemName<'a> {
let document = self.document.borrow();
let node = Ref::map(document, |document| &document[*target]);
let element = Ref::map(node, |node| node.as_element().expect("Not an element"));
Ref::map(element, |element| &element.name)
}
fn create_element(
&self,
name: QualName,
attrs: Vec<Attribute>,
_flags: ElementFlags,
) -> NodeId {
let inlining_ignored = should_ignore(&attrs);
let (is_style, is_stylesheet) = {
if inlining_ignored {
(false, false)
} else if name.expanded() == expanded_name!(html "style") {
(true, false)
} else if name.expanded() == expanded_name!(html "link") {
let mut rel_stylesheet = false;
let mut href_non_empty = false;
for attr in &attrs {
if attr.name.local == local_name!("rel") && &*attr.value == "stylesheet" {
rel_stylesheet = true;
}
if attr.name.local == local_name!("href") && !attr.value.is_empty() {
href_non_empty = true;
}
if rel_stylesheet && href_non_empty {
break;
}
}
(false, rel_stylesheet && href_non_empty)
} else {
(false, false)
}
};
let element = self.push_element(name, attrs, inlining_ignored);
if is_style {
self.document.borrow_mut().add_style(element);
}
if is_stylesheet {
self.document.borrow_mut().add_linked_stylesheet(element);
}
element
}
fn create_comment(&self, text: StrTendril) -> NodeId {
self.push_comment(text)
}
fn create_pi(&self, target: StrTendril, data: StrTendril) -> NodeId {
self.push_processing_instruction(target, data)
}
fn append(&self, &parent: &NodeId, child: NodeOrText<NodeId>) {
self.append_impl(
child,
|document| document[parent].last_child,
|document, new_node| document.append(parent, new_node),
);
}
fn append_based_on_parent_node(
&self,
element: &NodeId,
prev_element: &NodeId,
child: NodeOrText<NodeId>,
) {
if self.document.borrow()[*element].parent.is_some() {
self.append_before_sibling(element, child);
} else {
self.append(prev_element, child);
}
}
fn append_doctype_to_document(
&self,
name: StrTendril,
_public_id: StrTendril,
_system_id: StrTendril,
) {
let node = self.push_doctype(name);
self.document
.borrow_mut()
.append(NodeId::document_id(), node);
}
fn get_template_contents(&self, &target: &NodeId) -> NodeId {
target
}
fn same_node(&self, x: &NodeId, y: &NodeId) -> bool {
x == y
}
fn set_quirks_mode(&self, _mode: QuirksMode) {}
fn append_before_sibling(&self, &sibling: &NodeId, child: NodeOrText<NodeId>) {
self.append_impl(
child,
|document| document[sibling].previous_sibling,
|document, node| document.insert_before(sibling, node),
);
}
fn add_attrs_if_missing(&self, &target: &NodeId, attrs: Vec<Attribute>) {
let mut document = self.document.borrow_mut();
let element = document[target].as_element_mut().expect("not an element");
let attributes = &mut element.attributes;
for attr in attrs {
if attributes
.attributes
.iter()
.any(|entry| entry.name == attr.name)
{
attributes.attributes.push(attr);
}
}
}
fn remove_from_parent(&self, &target: &NodeId) {
self.document.borrow_mut().detach(target);
}
fn reparent_children(&self, node: &NodeId, new_parent: &NodeId) {
self.document
.borrow_mut()
.reparent_children(*node, *new_parent);
}
}