extern crate html5ever;
extern crate typed_arena;
use html5ever::interface::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink};
use html5ever::tendril::{StrTendril, TendrilSink};
use html5ever::{parse_document, Attribute, QualName};
use std::borrow::Cow;
use std::cell::{Cell, RefCell};
use std::collections::HashSet;
use std::io::{self, Read};
use std::ptr;
fn html5ever_parse_slice_into_arena<'a>(bytes: &[u8], arena: Arena<'a>) -> Ref<'a> {
let sink = Sink {
arena,
document: arena.alloc(Node::new(NodeData::Document)),
quirks_mode: Cell::new(QuirksMode::NoQuirks),
};
parse_document(sink, Default::default())
.from_utf8()
.one(bytes)
}
type Arena<'arena> = &'arena typed_arena::Arena<Node<'arena>>;
type Ref<'arena> = &'arena Node<'arena>;
type Link<'arena> = Cell<Option<Ref<'arena>>>;
struct Sink<'arena> {
arena: Arena<'arena>,
document: Ref<'arena>,
quirks_mode: Cell<QuirksMode>,
}
pub struct Node<'arena> {
parent: Link<'arena>,
next_sibling: Link<'arena>,
previous_sibling: Link<'arena>,
first_child: Link<'arena>,
last_child: Link<'arena>,
data: NodeData<'arena>,
}
#[derive(Clone)]
pub enum NodeData<'arena> {
Document,
Doctype {
name: StrTendril,
public_id: StrTendril,
system_id: StrTendril,
},
Text {
contents: RefCell<StrTendril>,
},
Comment {
contents: StrTendril,
},
Element {
name: QualName,
attrs: RefCell<Vec<Attribute>>,
template_contents: Option<Ref<'arena>>,
mathml_annotation_xml_integration_point: bool,
},
ProcessingInstruction {
target: StrTendril,
contents: StrTendril,
},
}
impl<'arena> Node<'arena> {
fn new(data: NodeData<'arena>) -> Self {
Node {
parent: Cell::new(None),
previous_sibling: Cell::new(None),
next_sibling: Cell::new(None),
first_child: Cell::new(None),
last_child: Cell::new(None),
data,
}
}
fn detach(&self) {
let parent = self.parent.take();
let previous_sibling = self.previous_sibling.take();
let next_sibling = self.next_sibling.take();
if let Some(next_sibling) = next_sibling {
next_sibling.previous_sibling.set(previous_sibling);
} else if let Some(parent) = parent {
parent.last_child.set(previous_sibling);
}
if let Some(previous_sibling) = previous_sibling {
previous_sibling.next_sibling.set(next_sibling);
} else if let Some(parent) = parent {
parent.first_child.set(next_sibling);
}
}
fn append(&'arena self, new_child: &'arena Self) {
new_child.detach();
new_child.parent.set(Some(self));
if let Some(last_child) = self.last_child.take() {
new_child.previous_sibling.set(Some(last_child));
debug_assert!(last_child.next_sibling.get().is_none());
last_child.next_sibling.set(Some(new_child));
} else {
debug_assert!(self.first_child.get().is_none());
self.first_child.set(Some(new_child));
}
self.last_child.set(Some(new_child));
}
fn insert_before(&'arena self, new_sibling: &'arena Self) {
new_sibling.detach();
new_sibling.parent.set(self.parent.get());
new_sibling.next_sibling.set(Some(self));
if let Some(previous_sibling) = self.previous_sibling.take() {
new_sibling.previous_sibling.set(Some(previous_sibling));
debug_assert!(ptr::eq::<Node>(
previous_sibling.next_sibling.get().unwrap(),
self
));
previous_sibling.next_sibling.set(Some(new_sibling));
} else if let Some(parent) = self.parent.get() {
debug_assert!(ptr::eq::<Node>(parent.first_child.get().unwrap(), self));
parent.first_child.set(Some(new_sibling));
}
self.previous_sibling.set(Some(new_sibling));
}
}
impl<'arena> Sink<'arena> {
fn new_node(&self, data: NodeData<'arena>) -> Ref<'arena> {
self.arena.alloc(Node::new(data))
}
fn append_common<P, A>(&self, child: NodeOrText<Ref<'arena>>, previous: P, append: A)
where
P: FnOnce() -> Option<Ref<'arena>>,
A: FnOnce(Ref<'arena>),
{
let new_node = match child {
NodeOrText::AppendText(text) => {
if let Some(&Node {
data: NodeData::Text { ref contents },
..
}) = previous()
{
contents.borrow_mut().push_tendril(&text);
return;
}
self.new_node(NodeData::Text {
contents: RefCell::new(text),
})
},
NodeOrText::AppendNode(node) => node,
};
append(new_node)
}
}
impl<'arena> TreeSink for Sink<'arena> {
type Handle = Ref<'arena>;
type Output = Ref<'arena>;
type ElemName<'a>
= &'a QualName
where
Self: 'a;
fn finish(self) -> Ref<'arena> {
self.document
}
fn parse_error(&self, _: Cow<'static, str>) {}
fn get_document(&self) -> Ref<'arena> {
self.document
}
fn set_quirks_mode(&self, mode: QuirksMode) {
self.quirks_mode.set(mode);
}
fn same_node(&self, x: &Ref<'arena>, y: &Ref<'arena>) -> bool {
ptr::eq::<Node>(*x, *y)
}
fn elem_name(&self, target: &Ref<'arena>) -> Self::ElemName<'_> {
match target.data {
NodeData::Element { ref name, .. } => name,
_ => panic!("not an element!"),
}
}
fn get_template_contents(&self, target: &Ref<'arena>) -> Ref<'arena> {
if let NodeData::Element {
template_contents: Some(contents),
..
} = target.data
{
contents
} else {
panic!("not a template element!")
}
}
fn is_mathml_annotation_xml_integration_point(&self, target: &Ref<'arena>) -> bool {
if let NodeData::Element {
mathml_annotation_xml_integration_point,
..
} = target.data
{
mathml_annotation_xml_integration_point
} else {
panic!("not an element!")
}
}
fn create_element(
&self,
name: QualName,
attrs: Vec<Attribute>,
flags: ElementFlags,
) -> Ref<'arena> {
self.new_node(NodeData::Element {
name,
attrs: RefCell::new(attrs),
template_contents: if flags.template {
Some(self.new_node(NodeData::Document))
} else {
None
},
mathml_annotation_xml_integration_point: flags.mathml_annotation_xml_integration_point,
})
}
fn create_comment(&self, text: StrTendril) -> Ref<'arena> {
self.new_node(NodeData::Comment { contents: text })
}
fn create_pi(&self, target: StrTendril, data: StrTendril) -> Ref<'arena> {
self.new_node(NodeData::ProcessingInstruction {
target,
contents: data,
})
}
fn append(&self, parent: &Ref<'arena>, child: NodeOrText<Ref<'arena>>) {
self.append_common(
child,
|| parent.last_child.get(),
|new_node| parent.append(new_node),
)
}
fn append_before_sibling(&self, sibling: &Ref<'arena>, child: NodeOrText<Ref<'arena>>) {
self.append_common(
child,
|| sibling.previous_sibling.get(),
|new_node| sibling.insert_before(new_node),
)
}
fn append_based_on_parent_node(
&self,
element: &Ref<'arena>,
prev_element: &Ref<'arena>,
child: NodeOrText<Ref<'arena>>,
) {
if element.parent.get().is_some() {
self.append_before_sibling(element, child)
} else {
self.append(prev_element, child)
}
}
fn append_doctype_to_document(
&self,
name: StrTendril,
public_id: StrTendril,
system_id: StrTendril,
) {
self.document.append(self.new_node(NodeData::Doctype {
name,
public_id,
system_id,
}))
}
fn add_attrs_if_missing(&self, target: &Ref<'arena>, attrs: Vec<Attribute>) {
let mut existing = if let NodeData::Element { ref attrs, .. } = target.data {
attrs.borrow_mut()
} else {
panic!("not an element")
};
let existing_names = existing
.iter()
.map(|e| e.name.clone())
.collect::<HashSet<_>>();
existing.extend(
attrs
.into_iter()
.filter(|attr| !existing_names.contains(&attr.name)),
);
}
fn remove_from_parent(&self, target: &Ref<'arena>) {
target.detach()
}
fn reparent_children(&self, node: &Ref<'arena>, new_parent: &Ref<'arena>) {
let mut next_child = node.first_child.get();
while let Some(child) = next_child {
debug_assert!(ptr::eq::<Node>(child.parent.get().unwrap(), *node));
next_child = child.next_sibling.get();
new_parent.append(child)
}
}
}
fn main() {
let mut bytes = Vec::new();
io::stdin().read_to_end(&mut bytes).unwrap();
let arena = typed_arena::Arena::new();
let dom = html5ever_parse_slice_into_arena(&bytes, &arena);
print_node(dom, 0);
}
fn print_node<'arena>(node: &Node<'arena>, depth: usize) {
let indent = " ".repeat(depth);
match &node.data {
NodeData::Document => println!("{}Document", indent),
NodeData::Doctype { name, .. } => println!("{}<!DOCTYPE {}>", indent, name),
NodeData::Text { contents } => {
let text = contents.borrow();
if !text.trim().is_empty() {
println!("{}\"{}\"", indent, text.trim());
}
},
NodeData::Comment { contents } => println!("{}<!-- {} -->", indent, contents),
NodeData::Element { name, .. } => println!("{}<{}>", indent, name.local),
NodeData::ProcessingInstruction { target, .. } => println!("{}<?{}>", indent, target),
}
let mut child = node.first_child.get();
while let Some(current_child) = child {
print_node(current_child, depth + 1);
child = current_child.next_sibling.get();
}
}