pub mod data_model;
mod expressions;
mod recipes;
mod terminal_symbols;
mod types;
mod whitespace_recipes;
mod xml_names;
use std::fmt::Display;
use enum_extract_macro::EnumExtract;
pub(crate) use expressions::xpath;
pub use expressions::Xpath;
use indextree::{Arena, NodeId};
#[allow(deprecated)]
use crate::{
html::{DocumentNode, HtmlDocument, HtmlNode},
xpath::grammar::data_model::{
AttributeNode, CommentNode, DoctypeNode, ElementNode, PINode, TextNode,
XpathDocumentNode,
},
};
#[derive(PartialEq, Eq, Debug, Hash, EnumExtract, Clone)]
pub enum XpathItemTreeNode {
DocumentNode(XpathDocumentNode),
ElementNode(ElementNode),
PINode(PINode),
CommentNode(CommentNode),
TextNode(TextNode),
AttributeNode(AttributeNode),
DoctypeNode(DoctypeNode),
}
impl XpathItemTreeNode {
pub fn children<'tree>(&self, tree: &'tree XpathItemTree) -> Vec<&'tree XpathItemTreeNode> {
match self {
XpathItemTreeNode::DocumentNode(node) => node.children(tree),
XpathItemTreeNode::ElementNode(node) => node.children(tree).collect(),
XpathItemTreeNode::PINode(_) => vec![],
XpathItemTreeNode::CommentNode(_) => vec![],
XpathItemTreeNode::TextNode(_) => vec![],
XpathItemTreeNode::AttributeNode(_) => vec![],
XpathItemTreeNode::DoctypeNode(_) => vec![],
}
}
pub fn descendants<'tree>(
&'tree self,
tree: &'tree XpathItemTree,
) -> impl Iterator<Item = &'tree XpathItemTreeNode> + 'tree {
let start_id = self.node_id().unwrap_or(tree.root_node);
start_id
.descendants(&tree.arena)
.map(|node_id| tree.get(node_id))
}
pub(crate) fn node_id(&self) -> Option<NodeId> {
match self {
XpathItemTreeNode::ElementNode(e) => e.id().ok(),
XpathItemTreeNode::TextNode(t) => t.id().ok(),
XpathItemTreeNode::AttributeNode(a) => a.id().ok(),
XpathItemTreeNode::CommentNode(c) => c.id().ok(),
XpathItemTreeNode::PINode(p) => p.id().ok(),
XpathItemTreeNode::DoctypeNode(d) => d.id().ok(),
XpathItemTreeNode::DocumentNode(_) => None,
}
}
pub fn parent<'tree>(&self, tree: &'tree XpathItemTree) -> Option<&'tree XpathItemTreeNode> {
self.node_id().and_then(|id| {
let parent_id = tree.arena.get(id).expect("xpath item node missing from tree").parent()?;
Some(tree.get(parent_id))
})
}
pub fn itertext(&self, tree: &XpathItemTree) -> TextIter {
TextIter::new(tree, self)
}
pub fn text_content(&self, tree: &XpathItemTree) -> String {
match self {
XpathItemTreeNode::DocumentNode(node) => node.text_content(tree),
XpathItemTreeNode::ElementNode(node) => node.text_content(tree),
XpathItemTreeNode::PINode(node) => node.data.clone(),
XpathItemTreeNode::CommentNode(c) => c.content.clone(),
XpathItemTreeNode::TextNode(node) => node.content.to_string(),
XpathItemTreeNode::AttributeNode(_) => String::from(""),
XpathItemTreeNode::DoctypeNode(_) => String::from(""),
}
}
pub fn text(&self, tree: &XpathItemTree) -> Option<String> {
match self {
XpathItemTreeNode::DocumentNode(node) => node.text(tree),
XpathItemTreeNode::ElementNode(node) => node.text(tree),
XpathItemTreeNode::PINode(node) => Some(node.data.clone()),
XpathItemTreeNode::CommentNode(_) => None,
XpathItemTreeNode::TextNode(node) => Some(node.content.to_string()),
XpathItemTreeNode::AttributeNode(_) => None,
XpathItemTreeNode::DoctypeNode(_) => None,
}
}
pub fn display(
&self,
tree: &XpathItemTree,
formatting: DisplayFormatting,
indent: usize,
) -> String {
match self {
XpathItemTreeNode::DocumentNode(node) => node.display(tree, formatting),
XpathItemTreeNode::ElementNode(node) => node.display(tree, formatting, indent),
XpathItemTreeNode::PINode(node) => node.to_string(),
XpathItemTreeNode::CommentNode(node) => node.to_string(),
XpathItemTreeNode::TextNode(node) => node.display(tree, formatting, indent),
XpathItemTreeNode::AttributeNode(node) => node.to_string(),
XpathItemTreeNode::DoctypeNode(node) => node.to_string(),
}
}
}
#[derive(Debug, PartialEq, Clone, Copy)]
pub enum DisplayFormatting {
Pretty,
NoChildren,
Raw,
}
pub(crate) static VOID_ELEMENTS: [&str; 15] = [
"meta", "link", "img", "input", "br", "hr", "col", "area", "base", "embed", "keygen",
"param", "source", "track", "wbr",
];
pub struct TextIter {
inner: std::vec::IntoIter<String>,
}
impl TextIter {
pub(crate) fn empty() -> Self {
TextIter {
inner: Vec::new().into_iter(),
}
}
pub(crate) fn new(tree: &XpathItemTree, node: &XpathItemTreeNode) -> TextIter {
let mut texts = Vec::new();
Self::collect_texts(tree, node, &mut texts);
TextIter { inner: texts.into_iter() }
}
fn collect_texts(tree: &XpathItemTree, node: &XpathItemTreeNode, out: &mut Vec<String>) {
for child in node.children(tree) {
match child {
XpathItemTreeNode::TextNode(text) => {
out.push(text.content.clone());
}
XpathItemTreeNode::ElementNode(_) => {
Self::collect_texts(tree, child, out);
}
_ => {}
}
}
}
}
impl Iterator for TextIter {
type Item = String;
fn next(&mut self) -> Option<Self::Item> {
self.inner.next()
}
}
#[derive(Debug, PartialEq, Clone)]
pub struct XpathItemTree {
pub(crate) arena: Arena<XpathItemTreeNode>,
pub(crate) root_node: NodeId,
pub(crate) quirks_mode: crate::html::grammar::QuirksMode,
}
impl XpathItemTree {
pub(crate) fn new(arena: Arena<XpathItemTreeNode>, root_node: NodeId) -> Self {
Self::new_with_quirks_mode(arena, root_node, crate::html::grammar::QuirksMode::NoQuirks)
}
pub(crate) fn new_with_quirks_mode(
arena: Arena<XpathItemTreeNode>,
root_node: NodeId,
quirks_mode: crate::html::grammar::QuirksMode,
) -> Self {
XpathItemTree {
arena,
root_node,
quirks_mode,
}
}
pub fn quirks_mode(&self) -> crate::html::grammar::QuirksMode {
self.quirks_mode
}
pub(crate) fn get_index_node(&self, id: NodeId) -> &indextree::Node<XpathItemTreeNode> {
self.arena
.get(id)
.expect("xpath item node missing from tree")
}
pub(crate) fn get(&self, id: NodeId) -> &XpathItemTreeNode {
let indextree_node = self.get_index_node(id);
indextree_node.get()
}
pub fn root(&self) -> &XpathItemTreeNode {
self.get(self.root_node)
}
pub fn iter(&self) -> impl Iterator<Item = &XpathItemTreeNode> {
self.arena.iter().map(|node| node.get())
}
}
impl Display for XpathItemTree {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"{}",
self.root().display(self, DisplayFormatting::Raw, 0)
)
}
}
#[allow(deprecated)]
impl From<&HtmlDocument> for XpathItemTree {
fn from(html_document: &HtmlDocument) -> Self {
fn internal_from(
current_html_node: &DocumentNode,
html_document: &HtmlDocument,
item_arena: &mut Arena<XpathItemTreeNode>,
) -> NodeId {
let html_node = html_document
.get_html_node(current_html_node)
.expect("html document missing expected node");
let root_item_id = match html_node {
HtmlNode::Tag(tag) => {
let node =
XpathItemTreeNode::ElementNode(ElementNode::new(tag.name.to_string()));
let item_id = item_arena.new_node(node);
item_arena
.get_mut(item_id)
.unwrap()
.get_mut()
.as_element_node_mut()
.unwrap()
.set_id(item_id);
let attributes: Vec<AttributeNode> = tag
.attributes
.iter()
.map(|(name, value)| {
AttributeNode::new(name.to_string(), value.to_string())
})
.collect();
for attribute in attributes {
let attribute_node = XpathItemTreeNode::AttributeNode(attribute);
let attribute_id = item_arena.new_node(attribute_node);
item_id.append(attribute_id, item_arena);
item_arena
.get_mut(attribute_id)
.unwrap()
.get_mut()
.as_attribute_node_mut()
.unwrap()
.set_id(attribute_id);
}
item_id
}
HtmlNode::Text(text) => {
let node = XpathItemTreeNode::TextNode(TextNode::new(text.value.to_string()));
let item_id = item_arena.new_node(node);
item_arena
.get_mut(item_id)
.unwrap()
.get_mut()
.as_text_node_mut()
.unwrap()
.set_id(item_id);
item_id
}
HtmlNode::Comment(comment) => {
CommentNode::create(comment.value.clone(), item_arena)
}
HtmlNode::ProcessingInstruction(pi) => {
PINode::create(pi.target.clone(), pi.data.clone(), item_arena)
}
HtmlNode::Doctype(doctype) => DoctypeNode::create(
doctype.name.clone(),
doctype.public_id.clone(),
doctype.system_id.clone(),
item_arena,
),
};
for child in current_html_node.children(html_document) {
let child_node = internal_from(&child, html_document, item_arena);
root_item_id.append(child_node, item_arena);
}
root_item_id
}
let mut item_arena = Arena::<XpathItemTreeNode>::new();
let root_node_id =
item_arena.new_node(XpathItemTreeNode::DocumentNode(XpathDocumentNode {}));
let first_child = internal_from(&html_document.root_node, html_document, &mut item_arena);
root_node_id.append(first_child, &mut item_arena);
XpathItemTree {
arena: item_arena,
root_node: root_node_id,
quirks_mode: crate::html::grammar::QuirksMode::NoQuirks,
}
}
}