hatmel 0.2.0

HTML model and parser (html5ever)
Documentation
use std::{borrow::Cow, marker::PhantomData};

use crate::{Handle, HandleIter, Hatmel, ParentIter};
use fashion::{
    parse_selectors, select, FashionSubject, SelectionIter, Selector, SingleInquiryIter,
};

impl Hatmel {
    pub fn query(&self, selectors: &str) -> Result<Selection<'_>, fashion::LoadError> {
        let selectors = parse_selectors(selectors)?;
        Ok(self.select(selectors))
    }
    pub fn select(&self, selectors: Vec<Selector>) -> Selection<'_> {
        Selection {
            iter: select(self, selectors),
        }
    }
}

#[derive(Debug, Clone)]
pub struct Selection<'a> {
    iter: SelectionIter<SingleInquiryIter<'a, Hatmel>>,
}
impl<'a> Iterator for Selection<'a> {
    type Item = Handle;

    fn next(&mut self) -> Option<Self::Item> {
        self.iter.next()?.pop()?.pop()
    }
}

/// Enables selector querying of Hatmel
impl FashionSubject for Hatmel {
    type Adept = Handle;
    type AdeptIter<'a> = HandleIter<'a>;
    type ParentIter<'a> = ParentIter<'a>
    where
        Self: 'a;
    fn get_all_adepts(&self) -> Self::AdeptIter<'_> {
        // Hatmel nodes iterator is designed for concurrency:
        // cloning the iterator allows multiple threads/tasks to compete for work
        self.nodes()
    }
    fn get_element_local_name(&self, adept: &Self::Adept) -> Option<&str> {
        Some(self.get_element_name(*adept)?.0)
    }
    fn get_element_name_space(&self, adept: &Self::Adept) -> Option<&str> {
        Some(self.get_element_name(*adept)?.1)
    }
    fn get_ancestors(&self, adept: &Self::Adept) -> Self::ParentIter<'_> {
        Hatmel::ancestors(self, *adept)
    }
    fn get_element_class(&self, adept: &Self::Adept) -> Option<&str> {
        self.get_element_attribute(*adept, "class", "")
    }
    fn get_element_id(&self, adept: &Self::Adept) -> Option<&str> {
        self.get_element_attribute(*adept, "id", "")
    }
    fn get_element_attribute(&self, adept: &Self::Adept, name: &str, ns: &str) -> Option<&str> {
        Hatmel::get_element_attribute(self, *adept, name, ns)
    }
    fn is_root_element(&self, adept: &Self::Adept) -> bool {
        // compare to top level elements
        let mut top_elements = self
            .children(0)
            .filter(|node| self.get_element_name(*node).is_some());
        let first_top = top_elements.next();
        let another_top = top_elements.next().is_some();

        match (first_top, another_top) {
            (None, true) => unreachable!("cannot have second without first"),
            // empty doc
            (None, false) => adept == &0,
            // fragment => root is the doc
            (Some(_), true) => adept == &0,
            // single top => root is the main element
            (Some(single), false) => adept == &single,
        }
    }

    type OlderSiblingIter<'a> = OlderSiblingIter<'a>
    where
        Self: 'a;

    fn get_older_siblings(&self, adept: &Self::Adept) -> Self::OlderSiblingIter<'_> {
        OlderSiblingIter::new(self, *adept)
    }
}

#[derive(Debug, Default)]
pub struct OlderSiblingIter<'a> {
    siblings: Vec<Handle>,
    // pretend we depend on the doc to prevent modification
    phantom: PhantomData<Cow<'a, ()>>,
}
impl<'a> OlderSiblingIter<'a> {
    fn new(doc: &'a Hatmel, adept: Handle) -> Self {
        let parent = match doc.parent(adept) {
            Some(parent) => parent,
            None => return Self::default(),
        };
        let children = doc.children(parent);
        let siblings = children.take_while(|s| s != &adept).collect();
        Self {
            siblings,
            phantom: PhantomData,
        }
    }
}
impl<'a> Iterator for OlderSiblingIter<'a> {
    type Item = Handle;

    fn next(&mut self) -> Option<Self::Item> {
        self.siblings.pop()
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use fashion::Selector;

    const DIVS_AND_SPANS: &str = "<!doctype html>
                                <html>
                                <body>
                                    <div class='main'>
                                        <span>
                                            <div>
                                                <span id=123>a</span>
                                            </div>
                                        </span>
                                    </div>
                                </body>
                                </html>";

    #[test]
    fn test_ancestor_selection() {
        let query = "div span";
        let (doc, selectors) = rig(DIVS_AND_SPANS, query);
        let selection = select(&doc, selectors);
        let result = selection.collect::<Vec<_>>();
        insta::assert_debug_snapshot!(pretty_paths(result, &doc), @r###"
        [
            "6 div, then 8 span",
            "6 div or 10 div, then 12 span",
        ]
        "###)
    }
    #[test]
    fn test_class_selection() {
        let query = "div.main";
        let (doc, selectors) = rig(DIVS_AND_SPANS, query);
        let selection = select(&doc, selectors);
        let result = selection.collect::<Vec<_>>();
        insta::assert_debug_snapshot!(pretty_paths(result, &doc), @r###"
        [
            "6 div",
        ]
        "###)
    }
    #[test]
    fn test_id_selection() {
        let query = "span#123";
        let (doc, selectors) = rig(DIVS_AND_SPANS, query);
        let selection = select(&doc, selectors);
        let result = selection.collect::<Vec<_>>();
        insta::assert_debug_snapshot!(pretty_paths(result, &doc), @r###"
        [
            "12 span",
        ]
        "###)
    }
    #[test]
    fn test_complex() {
        let query = "body > div span#123";
        let (doc, selectors) = rig(DIVS_AND_SPANS, query);
        let selection = select(&doc, selectors);
        let result = selection.collect::<Vec<_>>();
        insta::assert_debug_snapshot!(pretty_paths(result, &doc), @r###"
        [
            "4 body, then 6 div or 10 div, then 12 span",
        ]
        "###)
    }
    fn rig(data: &str, query: &str) -> (Hatmel, Vec<Selector>) {
        let mut doc = Hatmel::default();
        doc.load(data.as_bytes()).expect("loads fine");
        let selectors = parse_selectors(query).expect("must parse selectors");
        (doc, selectors)
    }
    fn pretty_paths(paths: Vec<Vec<Vec<Handle>>>, doc: &Hatmel) -> Vec<String> {
        paths
            .into_iter()
            .map(|path| {
                path.into_iter()
                    .map(|step| {
                        step.into_iter()
                            .map(|node| {
                                format!("{node} {}", doc.get_element_name(node).expect("node").0)
                            })
                            .flat_map(|node| [" or ".to_owned(), node].into_iter())
                            .skip(1)
                            .collect::<String>()
                    })
                    .flat_map(|node| [", then ".to_owned(), node].into_iter())
                    .skip(1)
                    .collect::<String>()
            })
            .collect::<Vec<_>>()
    }
}