use html_cat::{Document as HtmlDocument, Element as HtmlElement, Node as HtmlNode};
use crate::document::Document;
use crate::node::{Arena, CommentData, DocumentData, ElementData, Node, NodeId, TextData};
#[must_use]
pub fn from_html_doc(source: &HtmlDocument) -> Document {
let (doc_id, arena) = Arena::new().alloc(Node::Document(DocumentData::new(Vec::new())));
let (root_id, arena) = insert_element(arena, source.root(), Some(doc_id));
let arena = attach_root(arena, doc_id, root_id);
Document::new(arena, doc_id)
}
fn attach_root(arena: Arena, doc_id: NodeId, root_id: NodeId) -> Arena {
let cloned_node = arena.get(doc_id).cloned();
if let Some(node) = cloned_node {
let updated = match node {
Node::Document(_) => Node::Document(DocumentData::with_children(vec![root_id])),
other => other,
};
arena.store(doc_id, updated).unwrap_or_else(|a| a)
} else {
arena
}
}
fn insert_element(arena: Arena, element: &HtmlElement, parent: Option<NodeId>) -> (NodeId, Arena) {
let attributes: Vec<(String, String)> = element
.attributes()
.iter()
.map(|a| (a.name().to_owned(), a.value().to_owned()))
.collect();
let data = ElementData::new(element.name(), attributes, Vec::new(), parent);
let (id, arena) = arena.alloc(Node::Element(data));
let (children, arena) = build_children(arena, element.children(), id);
let arena = set_element_children(arena, id, children);
(id, arena)
}
fn build_children(arena: Arena, children: &[HtmlNode], parent: NodeId) -> (Vec<NodeId>, Arena) {
children
.iter()
.fold((Vec::new(), arena), |(ids, arena), child| {
let (child_id, next_arena) = insert_child(arena, child, parent);
let next_ids: Vec<NodeId> = ids.into_iter().chain(std::iter::once(child_id)).collect();
(next_ids, next_arena)
})
}
fn insert_child(arena: Arena, child: &HtmlNode, parent: NodeId) -> (NodeId, Arena) {
match child {
HtmlNode::Element(e) => insert_element(arena, e, Some(parent)),
HtmlNode::Text(t) => arena.alloc(Node::Text(TextData::new(t.content(), Some(parent)))),
HtmlNode::Comment(c) => {
arena.alloc(Node::Comment(CommentData::new(c.text(), Some(parent))))
}
}
}
fn set_element_children(arena: Arena, id: NodeId, children: Vec<NodeId>) -> Arena {
let cloned_node = arena.get(id).cloned();
if let Some(node) = cloned_node {
let updated = match node {
Node::Element(e) => Node::Element(e.with_children(children)),
other => other,
};
arena.store(id, updated).unwrap_or_else(|a| a)
} else {
arena
}
}