html-cat 0.1.0

HTML5 parser: tokenizer + tree builder producing a Document tree of Element/Text/Comment nodes. No mut, no Rc/Arc, no interior mutability, no panics, exhaustive matches. First sub-crate of a Servo-replacement webview runtime targeting Tauri.
//! Integration tests covering full HTML documents and common patterns.

use html_cat::{Error, Node, parse};

fn first_child_named<'a>(doc: &'a html_cat::Document, name: &str) -> Option<&'a html_cat::Element> {
    doc.root().children().iter().find_map(|n| match n {
        Node::Element(e) if e.name() == name => Some(e),
        _other => None,
    })
}

fn descendant_named<'a>(
    element: &'a html_cat::Element,
    name: &str,
) -> Option<&'a html_cat::Element> {
    element.children().iter().find_map(|n| match n {
        Node::Element(e) if e.name() == name => Some(e),
        Node::Element(e) => descendant_named(e, name),
        _other => None,
    })
}

#[test]
fn doctype_recognised() -> Result<(), Error> {
    let doc = parse("<!DOCTYPE html><html><body></body></html>")?;
    doc.doctype()
        .filter(|d| d.name() == "html")
        .map(|_| ())
        .ok_or(Error::InvalidDoctype {
            at: html_cat::span::Span::synthetic(),
        })
}

#[test]
fn explicit_html_body() -> Result<(), Error> {
    let doc = parse("<html><body><p>hi</p></body></html>")?;
    let body = first_child_named(&doc, "body").ok_or(Error::InvalidDoctype {
        at: html_cat::span::Span::synthetic(),
    })?;
    let p = descendant_named(body, "p").ok_or(Error::InvalidDoctype {
        at: html_cat::span::Span::synthetic(),
    })?;
    (p.name() == "p")
        .then_some(())
        .ok_or(Error::InvalidDoctype {
            at: html_cat::span::Span::synthetic(),
        })
}

#[test]
fn auto_inserts_html_and_body() -> Result<(), Error> {
    let doc = parse("<p>hi</p>")?;
    let body = first_child_named(&doc, "body").ok_or(Error::InvalidDoctype {
        at: html_cat::span::Span::synthetic(),
    })?;
    descendant_named(body, "p")
        .map(|_| ())
        .ok_or(Error::InvalidDoctype {
            at: html_cat::span::Span::synthetic(),
        })
}

#[test]
fn void_element_no_children() -> Result<(), Error> {
    let doc = parse("<html><body><img src=\"x.png\"><p>after</p></body></html>")?;
    let body = first_child_named(&doc, "body").ok_or(Error::InvalidDoctype {
        at: html_cat::span::Span::synthetic(),
    })?;
    let img = descendant_named(body, "img").ok_or(Error::InvalidDoctype {
        at: html_cat::span::Span::synthetic(),
    })?;
    (img.children().is_empty() && img.is_self_closing())
        .then_some(())
        .ok_or(Error::InvalidDoctype {
            at: html_cat::span::Span::synthetic(),
        })
}

#[test]
fn attributes_parsed() -> Result<(), Error> {
    let doc =
        parse("<html><body><a href=\"https://example.com\" class='link'>x</a></body></html>")?;
    let body = first_child_named(&doc, "body").ok_or(Error::InvalidDoctype {
        at: html_cat::span::Span::synthetic(),
    })?;
    let a = descendant_named(body, "a").ok_or(Error::InvalidDoctype {
        at: html_cat::span::Span::synthetic(),
    })?;
    let href = a.attributes().get("href").ok_or(Error::InvalidDoctype {
        at: html_cat::span::Span::synthetic(),
    })?;
    (href.value() == "https://example.com")
        .then_some(())
        .ok_or(Error::InvalidDoctype {
            at: html_cat::span::Span::synthetic(),
        })
}

#[test]
fn valueless_attribute() -> Result<(), Error> {
    let doc = parse("<html><body><input disabled></body></html>")?;
    let body = first_child_named(&doc, "body").ok_or(Error::InvalidDoctype {
        at: html_cat::span::Span::synthetic(),
    })?;
    let input = descendant_named(body, "input").ok_or(Error::InvalidDoctype {
        at: html_cat::span::Span::synthetic(),
    })?;
    input
        .attributes()
        .get("disabled")
        .filter(|a| a.value().is_empty())
        .map(|_| ())
        .ok_or(Error::InvalidDoctype {
            at: html_cat::span::Span::synthetic(),
        })
}

#[test]
fn text_with_entity() -> Result<(), Error> {
    let doc = parse("<html><body><p>a &amp; b</p></body></html>")?;
    let body = first_child_named(&doc, "body").ok_or(Error::InvalidDoctype {
        at: html_cat::span::Span::synthetic(),
    })?;
    let p = descendant_named(body, "p").ok_or(Error::InvalidDoctype {
        at: html_cat::span::Span::synthetic(),
    })?;
    p.children()
        .iter()
        .find_map(|n| match n {
            Node::Text(t) if t.content() == "a & b" => Some(()),
            _other => None,
        })
        .ok_or(Error::InvalidDoctype {
            at: html_cat::span::Span::synthetic(),
        })
}

#[test]
fn comment_preserved() -> Result<(), Error> {
    let doc = parse("<html><body><!-- hi --><p>x</p></body></html>")?;
    let body = first_child_named(&doc, "body").ok_or(Error::InvalidDoctype {
        at: html_cat::span::Span::synthetic(),
    })?;
    body.children()
        .iter()
        .find_map(|n| match n {
            Node::Comment(c) if c.text().trim() == "hi" => Some(()),
            _other => None,
        })
        .ok_or(Error::InvalidDoctype {
            at: html_cat::span::Span::synthetic(),
        })
}

#[test]
fn script_content_raw() -> Result<(), Error> {
    let doc = parse("<html><body><script>let x = 1 < 2 && 3 > 2;</script></body></html>")?;
    let body = first_child_named(&doc, "body").ok_or(Error::InvalidDoctype {
        at: html_cat::span::Span::synthetic(),
    })?;
    let script = descendant_named(body, "script").ok_or(Error::InvalidDoctype {
        at: html_cat::span::Span::synthetic(),
    })?;
    script
        .children()
        .iter()
        .find_map(|n| match n {
            Node::Text(t) if t.content().contains("1 < 2") => Some(()),
            _other => None,
        })
        .ok_or(Error::InvalidDoctype {
            at: html_cat::span::Span::synthetic(),
        })
}

#[test]
fn nested_elements_balance() -> Result<(), Error> {
    let doc = parse("<html><body><div><span>a<b>c</b>d</span></div></body></html>")?;
    let body = first_child_named(&doc, "body").ok_or(Error::InvalidDoctype {
        at: html_cat::span::Span::synthetic(),
    })?;
    let div = descendant_named(body, "div").ok_or(Error::InvalidDoctype {
        at: html_cat::span::Span::synthetic(),
    })?;
    let span = descendant_named(div, "span").ok_or(Error::InvalidDoctype {
        at: html_cat::span::Span::synthetic(),
    })?;
    descendant_named(span, "b")
        .map(|_| ())
        .ok_or(Error::InvalidDoctype {
            at: html_cat::span::Span::synthetic(),
        })
}