rjango 0.1.1 - Docs.rs

//! Simple HTML document parser for test assertions.

/// A parsed HTML document.
#[derive(Debug, Clone, Default)]
pub struct HtmlDocument {
    pub elements: Vec<HtmlElement>,
}

/// A single HTML element.
#[derive(Debug, Clone, Default)]
pub struct HtmlElement {
    pub tag: String,
    pub text: String,
    pub attrs: Vec<(String, String)>,
    pub children: Vec<HtmlElement>,
}

impl HtmlDocument {
    /// Parse a simple HTML string into a document.
    #[must_use]
    pub fn from_html(html: &str) -> Self {
        let mut doc = Self::default();
        for part in html.split('<').filter(|p| !p.is_empty()) {
            if let Some((tag_part, rest)) = part.split_once('>') {
                let tag = tag_part
                    .split_whitespace()
                    .next()
                    .unwrap_or("")
                    .trim_start_matches('/')
                    .to_string();
                if !tag.is_empty() && !tag_part.starts_with('/') {
                    doc.elements.push(HtmlElement {
                        tag,
                        text: rest.to_string(),
                        attrs: Vec::new(),
                        children: Vec::new(),
                    });
                }
            }
        }
        doc
    }

    /// Select elements by tag name.
    #[must_use]
    pub fn select(&self, tag: &str) -> Vec<&HtmlElement> {
        self.elements.iter().filter(|e| e.tag == tag).collect()
    }

    /// Get all text content concatenated.
    #[must_use]
    pub fn text_content(&self) -> String {
        self.elements
            .iter()
            .map(|e| e.text.as_str())
            .collect::<Vec<_>>()
            .join(" ")
    }

    /// Check if document contains specific text.
    #[must_use]
    pub fn contains_text(&self, text: &str) -> bool {
        self.text_content().contains(text)
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn parse_basic_html() {
        let doc = HtmlDocument::from_html("<h1>Hello</h1><p>World</p>");
        assert_eq!(doc.elements.len(), 2);
        assert_eq!(doc.elements[0].tag, "h1");
        assert_eq!(doc.elements[0].text, "Hello");
    }

    #[test]
    fn select_by_tag() {
        let doc = HtmlDocument::from_html("<p>One</p><div>Two</div><p>Three</p>");
        let paragraphs = doc.select("p");
        assert_eq!(paragraphs.len(), 2);
    }

    #[test]
    fn text_content() {
        let doc = HtmlDocument::from_html("<h1>Title</h1><p>Body</p>");
        assert!(doc.text_content().contains("Title"));
        assert!(doc.text_content().contains("Body"));
    }

    #[test]
    fn contains_text_positive() {
        let doc = HtmlDocument::from_html("<p>Hello World</p>");
        assert!(doc.contains_text("Hello"));
    }

    #[test]
    fn contains_text_negative() {
        let doc = HtmlDocument::from_html("<p>Hello</p>");
        assert!(!doc.contains_text("Goodbye"));
    }

    #[test]
    fn empty_document() {
        let doc = HtmlDocument::from_html("");
        assert!(doc.elements.is_empty());
        assert!(doc.text_content().is_empty());
    }
}