Skip to main content

fhp_tokenizer/
token.rs

1//! Token types emitted by the HTML tokenizer.
2//!
3//! All variants carry `&'a str` references into the original input,
4//! achieving zero-copy tokenization.
5
6use std::borrow::Cow;
7
8use fhp_core::tag::Tag;
9
10/// A single token produced by the tokenizer.
11///
12/// All string payloads borrow from the original input (`'a` lifetime),
13/// except entity-decoded values which use `Cow::Owned`.
14#[derive(Clone, Debug, PartialEq)]
15pub enum Token<'a> {
16    /// An opening tag, e.g. `<div class="foo">`.
17    OpenTag {
18        /// Interned tag name.
19        tag: Tag,
20        /// Raw tag name slice from the input.
21        name: Cow<'a, str>,
22        /// Attribute list (may be empty).
23        attributes: Vec<Attribute<'a>>,
24        /// Whether the tag is self-closing (`<br/>`).
25        self_closing: bool,
26    },
27
28    /// A closing tag, e.g. `</div>`.
29    CloseTag {
30        /// Interned tag name.
31        tag: Tag,
32        /// Raw tag name slice from the input.
33        name: Cow<'a, str>,
34    },
35
36    /// Text content between tags.
37    Text {
38        /// The text content, entity-decoded if needed.
39        content: Cow<'a, str>,
40    },
41
42    /// An HTML comment, e.g. `<!-- comment -->`.
43    Comment {
44        /// The comment body (without `<!--` and `-->`).
45        content: Cow<'a, str>,
46    },
47
48    /// A DOCTYPE declaration, e.g. `<!DOCTYPE html>`.
49    Doctype {
50        /// The content after `DOCTYPE`.
51        content: Cow<'a, str>,
52    },
53
54    /// A CDATA section, e.g. `<![CDATA[...]]>`.
55    CData {
56        /// The CDATA content (without `<![CDATA[` and `]]>`).
57        content: Cow<'a, str>,
58    },
59}
60
61/// A single HTML attribute (name-value pair).
62#[derive(Clone, Debug, PartialEq)]
63pub struct Attribute<'a> {
64    /// Attribute name, borrowed from input.
65    pub name: Cow<'a, str>,
66    /// Attribute value, entity-decoded if needed.
67    /// `None` for boolean attributes (e.g. `disabled`).
68    pub value: Option<Cow<'a, str>>,
69}
70
71#[cfg(test)]
72mod tests {
73    use super::*;
74
75    #[test]
76    fn token_clone_and_debug() {
77        let tok = Token::Text {
78            content: Cow::Borrowed("hello"),
79        };
80        let tok2 = tok.clone();
81        assert_eq!(tok, tok2);
82        assert!(format!("{tok:?}").contains("hello"));
83    }
84
85    #[test]
86    fn attribute_with_value() {
87        let attr = Attribute {
88            name: Cow::Borrowed("class"),
89            value: Some(Cow::Borrowed("foo")),
90        };
91        assert_eq!(attr.name.as_ref(), "class");
92        assert_eq!(attr.value.as_deref(), Some("foo"));
93    }
94
95    #[test]
96    fn boolean_attribute() {
97        let attr = Attribute {
98            name: Cow::Borrowed("disabled"),
99            value: None,
100        };
101        assert!(attr.value.is_none());
102    }
103}