Skip to main content

oak_html/lexer/
token_type.rs

1use oak_core::{Token, TokenType, UniversalTokenRole};
2#[cfg(feature = "serde")]
3use serde::{Deserialize, Serialize};
4
5/// HTML token type alias.
6pub type HtmlToken = Token<HtmlTokenType>;
7
8/// HTML token types.
9#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
10#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
11pub enum HtmlTokenType {
12    /// Opening tag bracket `<`.
13    TagOpen,
14    /// Closing tag bracket `>`.
15    TagClose,
16    /// Opening tag with slash `</`.
17    TagSlashOpen,
18    /// Self-closing tag slash `/>`.
19    TagSelfClose,
20    /// Tag name (e.g., `div`, `p`).
21    TagName,
22    /// Attribute name (e.g., `id`, `class`).
23    AttributeName,
24    /// Attribute value.
25    AttributeValue,
26    /// Attribute node.
27    Attribute,
28    /// Text content between tags.
29    Text,
30    /// HTML comment `<!-- ... -->`.
31    Comment,
32    /// Equal sign `=` in attributes.
33    Equal,
34    /// Quote `"` or `'`.
35    Quote,
36    /// Doctype declaration `<!DOCTYPE ...>`.
37    Doctype,
38    /// CDATA section `<![CDATA[ ... ]]>`.
39    CData,
40    /// Processing instruction `<? ... ?>`.
41    ProcessingInstruction,
42    /// Entity reference `&name;`.
43    EntityRef,
44    /// Character reference `&#123;` or `&#xabc;`.
45    CharRef,
46    /// Whitespace.
47    Whitespace,
48    /// Newline.
49    Newline,
50    /// Root document node.
51    Document,
52    /// HTML element.
53    Element,
54    /// End of file.
55    Eof,
56    /// Error token.
57    Error,
58}
59
60impl TokenType for HtmlTokenType {
61    type Role = UniversalTokenRole;
62    const END_OF_STREAM: Self = Self::Eof;
63
64    fn is_ignored(&self) -> bool {
65        matches!(self, Self::Whitespace | Self::Newline | Self::Comment)
66    }
67
68    fn role(&self) -> Self::Role {
69        match self {
70            Self::TagOpen | Self::TagClose | Self::TagSlashOpen | Self::TagSelfClose => UniversalTokenRole::Operator,
71            Self::TagName => UniversalTokenRole::Name,
72            Self::AttributeName => UniversalTokenRole::Name,
73            Self::AttributeValue => UniversalTokenRole::Literal,
74            Self::Text => UniversalTokenRole::None,
75            Self::Comment => UniversalTokenRole::Comment,
76            Self::Equal => UniversalTokenRole::Operator,
77            Self::Quote => UniversalTokenRole::Operator,
78            Self::Doctype => UniversalTokenRole::Keyword,
79            Self::Whitespace | Self::Newline => UniversalTokenRole::Whitespace,
80            _ => UniversalTokenRole::None,
81        }
82    }
83}