fhp_tokenizer/token.rs
1//! Token types emitted by the HTML tokenizer.
2//!
3//! All variants carry `&'a str` references into the original input,
4//! achieving zero-copy tokenization.
5
6use std::borrow::Cow;
7
8use fhp_core::tag::Tag;
9
10/// A single token produced by the tokenizer.
11///
12/// All string payloads borrow from the original input (`'a` lifetime),
13/// except entity-decoded values which use `Cow::Owned`.
14#[derive(Clone, Debug, PartialEq)]
15pub enum Token<'a> {
16 /// An opening tag, e.g. `<div class="foo">`.
17 OpenTag {
18 /// Interned tag name.
19 tag: Tag,
20 /// Raw tag name slice from the input.
21 name: Cow<'a, str>,
22 /// Attribute list (may be empty).
23 attributes: Vec<Attribute<'a>>,
24 /// Whether the tag is self-closing (`<br/>`).
25 self_closing: bool,
26 },
27
28 /// A closing tag, e.g. `</div>`.
29 CloseTag {
30 /// Interned tag name.
31 tag: Tag,
32 /// Raw tag name slice from the input.
33 name: Cow<'a, str>,
34 },
35
36 /// Text content between tags.
37 Text {
38 /// The text content, entity-decoded if needed.
39 content: Cow<'a, str>,
40 },
41
42 /// An HTML comment, e.g. `<!-- comment -->`.
43 Comment {
44 /// The comment body (without `<!--` and `-->`).
45 content: Cow<'a, str>,
46 },
47
48 /// A DOCTYPE declaration, e.g. `<!DOCTYPE html>`.
49 Doctype {
50 /// The content after `DOCTYPE`.
51 content: Cow<'a, str>,
52 },
53
54 /// A CDATA section, e.g. `<![CDATA[...]]>`.
55 CData {
56 /// The CDATA content (without `<![CDATA[` and `]]>`).
57 content: Cow<'a, str>,
58 },
59}
60
61/// A single HTML attribute (name-value pair).
62#[derive(Clone, Debug, PartialEq)]
63pub struct Attribute<'a> {
64 /// Attribute name, borrowed from input.
65 pub name: Cow<'a, str>,
66 /// Attribute value, entity-decoded if needed.
67 /// `None` for boolean attributes (e.g. `disabled`).
68 pub value: Option<Cow<'a, str>>,
69}
70
71#[cfg(test)]
72mod tests {
73 use super::*;
74
75 #[test]
76 fn token_clone_and_debug() {
77 let tok = Token::Text {
78 content: Cow::Borrowed("hello"),
79 };
80 let tok2 = tok.clone();
81 assert_eq!(tok, tok2);
82 assert!(format!("{tok:?}").contains("hello"));
83 }
84
85 #[test]
86 fn attribute_with_value() {
87 let attr = Attribute {
88 name: Cow::Borrowed("class"),
89 value: Some(Cow::Borrowed("foo")),
90 };
91 assert_eq!(attr.name.as_ref(), "class");
92 assert_eq!(attr.value.as_deref(), Some("foo"));
93 }
94
95 #[test]
96 fn boolean_attribute() {
97 let attr = Attribute {
98 name: Cow::Borrowed("disabled"),
99 value: None,
100 };
101 assert!(attr.value.is_none());
102 }
103}