Skip to main content

oak_html/lexer/
token_type.rs

1use oak_core::{Token, TokenType, UniversalTokenRole};
2
3/// HTML token type alias.
4pub type HtmlToken = Token<HtmlTokenType>;
5
6/// HTML token types.
7#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
8#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
9pub enum HtmlTokenType {
10    /// Opening tag bracket `<`.
11    TagOpen,
12    /// Closing tag bracket `>`.
13    TagClose,
14    /// Opening tag with slash `</`.
15    TagSlashOpen,
16    /// Self-closing tag slash `/>`.
17    TagSelfClose,
18    /// Tag name (e.g., `div`, `p`).
19    TagName,
20    /// Attribute name (e.g., `id`, `class`).
21    AttributeName,
22    /// Attribute value.
23    AttributeValue,
24    /// Attribute node.
25    Attribute,
26    /// Text content between tags.
27    Text,
28    /// HTML comment `<!-- ... -->`.
29    Comment,
30    /// Equal sign `=` in attributes.
31    Equal,
32    /// Quote `"` or `'`.
33    Quote,
34    /// Doctype declaration `<!DOCTYPE ...>`.
35    Doctype,
36    /// CDATA section `<![CDATA[ ... ]]>`.
37    CData,
38    /// Processing instruction `<? ... ?>`.
39    ProcessingInstruction,
40    /// Entity reference `&name;`.
41    EntityRef,
42    /// Character reference `&#123;` or `&#xabc;`.
43    CharRef,
44    /// Whitespace.
45    Whitespace,
46    /// Newline.
47    Newline,
48    /// Root document node.
49    Document,
50    /// HTML element.
51    Element,
52    /// End of file.
53    Eof,
54    /// Error token.
55    Error,
56}
57
58impl TokenType for HtmlTokenType {
59    type Role = UniversalTokenRole;
60    const END_OF_STREAM: Self = Self::Eof;
61
62    fn is_ignored(&self) -> bool {
63        matches!(self, Self::Whitespace | Self::Newline | Self::Comment)
64    }
65
66    fn role(&self) -> Self::Role {
67        match self {
68            Self::TagOpen | Self::TagClose | Self::TagSlashOpen | Self::TagSelfClose => UniversalTokenRole::Operator,
69            Self::TagName => UniversalTokenRole::Name,
70            Self::AttributeName => UniversalTokenRole::Name,
71            Self::AttributeValue => UniversalTokenRole::Literal,
72            Self::Text => UniversalTokenRole::None,
73            Self::Comment => UniversalTokenRole::Comment,
74            Self::Equal => UniversalTokenRole::Operator,
75            Self::Quote => UniversalTokenRole::Operator,
76            Self::Doctype => UniversalTokenRole::Keyword,
77            Self::Whitespace | Self::Newline => UniversalTokenRole::Whitespace,
78            _ => UniversalTokenRole::None,
79        }
80    }
81}