use std::{
borrow::Cow,
collections::BTreeMap,
fmt::{self, Debug, Formatter},
};
use markup5ever::tendril::{StrTendril, TendrilSink};
use yarte_html::{
driver,
interface::{
Attribute as HtmlAttribute, ElementFlags, ExpandedName, NodeOrText as HtmlNodeOrText,
QualName, TreeSink,
},
tree_builder::{get_marquee, is_marquee},
};
pub type ParseNodeId = usize;
#[derive(Clone)]
pub struct ParseNode {
id: ParseNodeId,
qual_name: Option<QualName>,
}
impl Debug for ParseNode {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
f.debug_struct("ParseNode")
.field("id", &self.id)
.field(
"name",
&self.qual_name.as_ref().map(|x| (*x.local).to_string()),
)
.finish()
}
}
#[derive(Clone)]
pub struct ParseAttribute {
pub name: QualName,
pub value: String,
}
impl Debug for ParseAttribute {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
f.debug_struct("Attr")
.field("name", &self.name.local.to_string())
.field("value", &self.value)
.finish()
}
}
pub enum ParseElement {
Node {
name: QualName,
attrs: Vec<ParseAttribute>,
children: Vec<ParseNodeId>,
parent: Option<ParseNodeId>,
},
Text(String),
Document(Vec<ParseNodeId>),
}
impl Debug for ParseElement {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
match self {
ParseElement::Node {
name,
attrs,
children,
parent,
} => f
.debug_struct("Node")
.field("name", &name.local.to_string())
.field("attributes", attrs)
.field("children", children)
.field("parent", parent)
.finish(),
ParseElement::Text(s) => f.debug_tuple("Text").field(s).finish(),
ParseElement::Document(s) => f.debug_tuple("Document").field(s).finish(),
}
}
}
#[derive(Debug, Default)]
pub struct Sink {
count: usize,
pub nodes: BTreeMap<ParseNodeId, ParseElement>,
fragment: bool,
err: Vec<ParseError>,
}
impl Sink {
fn new_parse_node(&mut self) -> ParseNode {
let id = self.count;
self.count += 1;
ParseNode {
id,
qual_name: None,
}
}
fn append_child(
&mut self,
p: ParseNodeId,
child: HtmlNodeOrText<<Self as TreeSink>::Handle>,
) -> ParseNodeId {
match child {
HtmlNodeOrText::AppendNode(node) => {
self.nodes
.get_mut(&node.id)
.and_then(|x| match x {
ParseElement::Node { parent, name, .. } => {
if is_marquee(name) {
*parent = Some(p);
}
Some(())
}
_ => None,
})
.expect("Get parent");
node.id
}
HtmlNodeOrText::AppendText(text) => {
let id = self.count;
self.count += 1;
self.nodes.insert(id, ParseElement::Text(text.to_string()));
id
}
}
}
}
#[derive(Debug)]
pub struct ParseError(Cow<'static, str>);
pub type ParseResult<T> = Result<T, Vec<ParseError>>;
impl TreeSink for Sink {
type Handle = ParseNode;
type Output = ParseResult<Self>;
fn finish(self) -> Self::Output {
if self.err.is_empty() {
Ok(self)
} else {
Err(self.err)
}
}
fn parse_error(&mut self, msg: Cow<'static, str>) {
self.err.push(ParseError(msg))
}
fn get_document(&mut self) -> Self::Handle {
let node = self.new_parse_node();
self.fragment = node.id != 0;
node
}
fn elem_name<'a>(&'a self, target: &'a Self::Handle) -> ExpandedName<'a> {
target
.qual_name
.as_ref()
.expect("Expected qual name of node!")
.expanded()
}
fn create_element(
&mut self,
name: QualName,
html_attrs: Vec<HtmlAttribute>,
_flags: ElementFlags,
) -> Self::Handle {
let mut new_node = self.new_parse_node();
new_node.qual_name = Some(name.clone());
let attrs = html_attrs
.into_iter()
.map(|attr| ParseAttribute {
name: attr.name,
value: String::from(attr.value),
})
.collect();
self.nodes.insert(
new_node.id,
ParseElement::Node {
name,
attrs,
children: vec![],
parent: None,
},
);
new_node
}
fn append(&mut self, p: &Self::Handle, child: HtmlNodeOrText<Self::Handle>) {
let id = self.append_child(p.id, child);
match self.nodes.get_mut(&p.id) {
Some(ParseElement::Document(children)) | Some(ParseElement::Node { children, .. }) => {
children.push(id);
}
_ if p.id == 0 || self.fragment => (),
_ => panic!("append without parent {:?}, {:?} {:?}", p, id, self.nodes),
};
}
fn append_doctype_to_document(&mut self, _: StrTendril, _: StrTendril, _: StrTendril) {
if self
.nodes
.insert(0, ParseElement::Document(vec![]))
.is_some()
{
panic!("Double Doctype")
}
}
fn get_template_contents(&mut self, target: &Self::Handle) -> Self::Handle {
target.clone()
}
fn same_node(&self, x: &Self::Handle, y: &Self::Handle) -> bool {
x.id == y.id
}
}
pub fn parse_document(doc: &str) -> ParseResult<Sink> {
let parser = driver::parse_document(Sink::default()).from_utf8();
parser.one(doc.as_bytes())
}
pub fn parse_fragment(doc: &str) -> ParseResult<Sink> {
let parser = driver::parse_fragment(Sink::default(), get_marquee(), vec![]).from_utf8();
parser.one(doc.as_bytes()).and_then(|mut a| {
a.nodes
.remove(&0)
.and_then(|_| {
if let Some(ParseElement::Node { name, .. }) = a.nodes.get_mut(&2) {
*name = get_marquee();
Some(a)
} else {
None
}
})
.ok_or_else(Vec::new)
})
}