hatmel 0.2.0

HTML model and parser (html5ever)
Documentation
use super::{namespace_equals_or_html, Content, Element, ElementKind, Hatmel, Node};
use html5ever::{
    interface::{NodeOrText, QuirksMode},
    tendril::TendrilSink,
    tree_builder::TreeSink,
    Attribute, ExpandedName, ParseOpts, QualName,
};
pub use html5ever::{tendril::StrTendril, LocalNameStaticSet, NamespaceStaticSet, PrefixStaticSet};
use log::warn;
use std::{borrow::Cow, fs, io, path};

impl Hatmel {
    pub fn load_file(&mut self, path: impl AsRef<path::Path>) -> LoadResult<&mut Self> {
        self.load(fs::File::open(path)?)
    }
    pub fn load(&mut self, mut data: impl io::Read) -> LoadResult<&mut Self> {
        let builder = HatmelBuilder::new(self);
        let parser = html5ever::driver::parse_document(builder, ParseOpts::default());
        parser.from_utf8().read_from(&mut data)??;
        Ok(self)
    }
}

#[derive(Debug)]
struct HatmelBuilder<'a> {
    pub domino: &'a mut Hatmel,
    pub errors: Vec<Cow<'static, str>>,
}

impl<'a> HatmelBuilder<'a> {
    fn new(domino: &'a mut Hatmel) -> Self {
        Self {
            domino,
            errors: vec![],
        }
    }
}

pub type LoadResult<T> = Result<T, LoadError>;
#[derive(thiserror::Error, Debug)]
pub enum LoadError {
    #[error("I/O error - {0}")]
    Io(#[from] io::Error),

    #[error("Parsing errors: {0:?}")]
    Parsing(Vec<Cow<'static, str>>),
}

impl<'a> TreeSink for HatmelBuilder<'a> {
    type Handle = super::Handle;
    type Output = Result<(), LoadError>;

    fn finish(self) -> Self::Output {
        if self.errors.is_empty() {
            Ok(())
        } else {
            Err(LoadError::Parsing(self.errors))
        }
    }

    fn parse_error(&mut self, msg: std::borrow::Cow<'static, str>) {
        self.errors.push(msg);
    }

    fn get_document(&mut self) -> Self::Handle {
        // first node is the top level fragment node
        0
    }

    fn elem_name<'b>(&'b self, target: &'b Self::Handle) -> ExpandedName<'b> {
        match &self.domino.nodes[*target].content {
            Content::Element(Element {
                name, namespace, ..
            }) => ExpandedName {
                ns: namespace,
                local: name,
            },
            otherwise @ Content::Document
            | otherwise @ Content::Comment { .. }
            | otherwise @ Content::ProcessingInstruction { .. }
            | otherwise @ Content::Text { .. }
            | otherwise @ Content::DocType { .. } => {
                unreachable!("Only elements and templates have name. This is {otherwise:?}")
            }
        }
    }

    fn create_element(
        &mut self,
        name: html5ever::QualName,
        attrs: Vec<html5ever::Attribute>,
        flags: html5ever::tree_builder::ElementFlags,
    ) -> Self::Handle {
        let kind = match (
            flags.template,
            flags.mathml_annotation_xml_integration_point,
        ) {
            (false, false) => ElementKind::Regular,
            (true, false) => ElementKind::Template,
            (false, true) => ElementKind::MathmlPoint,
            (true, true) => {
                warn!("cannot have a template be a mathml point!");
                ElementKind::Template
            }
        };
        let attrs = attrs.into_iter().map(|a| a.into()).collect();
        let QualName { local, ns, prefix } = name;
        self.domino
            .register(Node::new_element(local, ns, prefix, attrs, kind))
    }

    fn create_comment(&mut self, text: html5ever::tendril::StrTendril) -> Self::Handle {
        self.domino.register(Node::new_comment(text))
    }

    fn create_pi(
        &mut self,
        target: html5ever::tendril::StrTendril,
        data: html5ever::tendril::StrTendril,
    ) -> Self::Handle {
        self.domino
            .register(Node::new_processing_instruction(target, data))
    }

    fn append(&mut self, parent: &Self::Handle, new_node: NodeOrText<Self::Handle>) {
        match &self.domino.nodes[*parent].content {
            Content::Document | Content::Element { .. } => {
                // OK
            }
            otherwise @ Content::Comment { .. }
            | otherwise @ Content::ProcessingInstruction { .. }
            | otherwise @ Content::Text { .. }
            | otherwise @ Content::DocType { .. } => {
                unreachable!("One can only append to a fragment or element. This is {otherwise:?}")
            }
        };
        let handle = match new_node {
            NodeOrText::AppendNode(node) => node,
            NodeOrText::AppendText(append_text) => {
                if let Some(last_child) = self.domino.children.entry(*parent).or_default().last() {
                    // try to merge texts
                    if let Content::Text { ref mut text } = self.domino.nodes[*last_child].content {
                        *text = format!("{text}{append_text}");
                        // done
                        return;
                    }
                }
                self.domino.register(Node::new_text(append_text))
            }
        };

        self.domino.parents.insert(handle, *parent);
        self.domino
            .children
            .entry(*parent)
            .or_default()
            .push(handle);
    }

    fn append_based_on_parent_node(
        &mut self,
        element: &Self::Handle,
        _prev_element: &Self::Handle,
        child: NodeOrText<Self::Handle>,
    ) {
        self.append_before_sibling(element, child);
    }

    fn append_doctype_to_document(
        &mut self,
        name: html5ever::tendril::StrTendril,
        public_id: html5ever::tendril::StrTendril,
        system_id: html5ever::tendril::StrTendril,
    ) {
        let handle = self
            .domino
            .register(Node::new_doctype(name, public_id, system_id));
        self.append(&0, NodeOrText::AppendNode(handle))
    }

    fn get_template_contents(&mut self, target: &Self::Handle) -> Self::Handle {
        *target
    }

    fn same_node(&self, x: &Self::Handle, y: &Self::Handle) -> bool {
        x == y
    }

    fn set_quirks_mode(&mut self, mode: QuirksMode) {
        self.domino.mode = mode.into();
    }

    fn append_before_sibling(
        &mut self,
        sibling: &Self::Handle,
        new_node: NodeOrText<Self::Handle>,
    ) {
        let handle = match new_node {
            NodeOrText::AppendNode(node) => node,
            NodeOrText::AppendText(prepend_text) => {
                let parent = self.domino.parents[sibling];
                let position = self
                    .domino
                    .children
                    .entry(parent)
                    .or_default()
                    .iter()
                    .position(|h| h == sibling)
                    .expect("sibling must be in parents children");
                if let Some(previous) = position
                    .checked_sub(1)
                    .map(|p| self.domino.children[&parent][p])
                {
                    // try to merge texts
                    if let Content::Text { text } = &mut self.domino.nodes[previous].content {
                        *text = format!("{prepend_text}{text}");
                        // done
                        return;
                    }
                }
                self.domino.register(Node::new_text(prepend_text))
            }
        };

        self.remove_from_parent(&handle);

        let parent = self
            .domino
            .parents
            .get(sibling)
            .expect("sibling must have a parent");
        let children = self.domino.children.entry(*parent).or_default();
        let position = children
            .iter()
            .position(|h| h == sibling)
            .expect("sibling must have a position");
        children.insert(position, handle)
    }

    fn add_attrs_if_missing(&mut self, target: &Self::Handle, attrs: Vec<html5ever::Attribute>) {
        let existing_attrs = match &mut self.domino.nodes[*target].content {
            Content::Element(Element { attrs, .. }) => attrs,
            otherwise => unreachable!("Only elements can have attributes. This is {otherwise:?}"),
        };

        for attr in attrs {
            if existing_attrs.iter().any(|a| {
                a.name == attr.name.local && namespace_equals_or_html(&a.namespace, &attr.name.ns)
            }) {
                continue;
            }
            // todo: adjust namespaces
            existing_attrs.push(attr.into())
        }
    }

    fn remove_from_parent(&mut self, target: &Self::Handle) {
        if let Some(old_parent) = self.domino.parents.remove(target) {
            if let Some(old_siblings) = self.domino.children.get_mut(&old_parent) {
                old_siblings.remove(*target);
            }
        }
    }

    fn reparent_children(&mut self, node: &Self::Handle, new_parent: &Self::Handle) {
        let children = self.domino.children.remove(node).unwrap_or_default();

        self.domino
            .children
            .entry(*new_parent)
            .or_default()
            .extend_from_slice(children.as_ref());

        for child in children {
            self.domino.parents.insert(child, *new_parent);
        }
    }
}

impl From<QuirksMode> for super::QuirksMode {
    fn from(value: QuirksMode) -> Self {
        match value {
            QuirksMode::Quirks => super::QuirksMode::Quirks,
            QuirksMode::LimitedQuirks => super::QuirksMode::LimitedQuirks,
            QuirksMode::NoQuirks => super::QuirksMode::NoQuirks,
        }
    }
}

impl From<Attribute> for super::Attribute {
    fn from(a: Attribute) -> Self {
        super::Attribute {
            name: a.name.local,
            namespace: a.name.ns,
            prefix: a.name.prefix,
            value: a.value,
        }
    }
}