solar_parse/lexer/cursor/
token.rs

1//! Raw, low-level tokens. Created using [`Cursor`](crate::Cursor).
2
3use solar_ast::{
4    Base, StrKind,
5    token::{BinOpToken, Delimiter},
6};
7
8/// A raw token.
9///
10/// It doesn't contain information about data that has been parsed, only the type of the token and
11/// its size.
12#[derive(Clone, Copy, Debug, PartialEq, Eq)]
13pub struct RawToken {
14    /// The kind of token.
15    pub kind: RawTokenKind,
16    /// The length of the token in bytes.
17    pub len: u32,
18}
19
20impl RawToken {
21    /// The [`EOF`](RawTokenKind::Eof) token with length 0.
22    pub const EOF: Self = Self::new(RawTokenKind::Eof, 0);
23
24    /// Creates a new token.
25    #[inline]
26    pub const fn new(kind: RawTokenKind, len: u32) -> Self {
27        Self { kind, len }
28    }
29}
30
31/// Common lexeme types.
32#[derive(Clone, Copy, Debug, PartialEq, Eq)]
33pub enum RawTokenKind {
34    // Multi-char tokens:
35    /// `// comment`
36    ///
37    /// `/// doc comment`
38    LineComment { is_doc: bool },
39
40    /// `/* block comment */`
41    ///
42    /// `/** block doc comment */`
43    BlockComment { is_doc: bool, terminated: bool },
44
45    /// Any whitespace character sequence.
46    Whitespace,
47
48    /// `ident` or `continue`
49    ///
50    /// At this step, keywords are also considered identifiers.
51    Ident,
52
53    /// Examples: `123`, `0x123`, `hex"123"`. Note that `_` is an invalid
54    /// suffix, but may be present here on string and float literals. Users of
55    /// this type will need to check for and reject that case.
56    ///
57    /// See [`RawLiteralKind`] for more details.
58    Literal { kind: RawLiteralKind },
59
60    // Expression-operator symbols.
61    /// `=`
62    Eq,
63    /// `<`
64    Lt,
65    /// `<=`
66    Le,
67    /// `==`
68    EqEq,
69    /// `!=`
70    Ne,
71    /// `>=`
72    Ge,
73    /// `>`
74    Gt,
75    /// `&&`
76    AndAnd,
77    /// `||`
78    OrOr,
79    /// `!`
80    Not,
81    /// `~`
82    Tilde,
83    /// `:=`
84    Walrus,
85    /// `++`
86    PlusPlus,
87    /// `--`
88    MinusMinus,
89    /// `**`
90    StarStar,
91    /// A binary operator token.
92    BinOp(BinOpToken),
93    /// A binary operator token, followed by an equals sign (`=`).
94    BinOpEq(BinOpToken),
95
96    // Structural symbols.
97    /// `@`
98    At,
99    /// `.`
100    Dot,
101    /// `,`
102    Comma,
103    /// `;`
104    Semi,
105    /// `:`
106    Colon,
107    /// `->`
108    Arrow,
109    /// `=>`
110    FatArrow,
111    /// `?`
112    Question,
113    /// An opening delimiter (e.g., `{`).
114    OpenDelim(Delimiter),
115    /// A closing delimiter (e.g., `}`).
116    CloseDelim(Delimiter),
117
118    /// Unknown token, not expected by the lexer, e.g. `№`
119    Unknown,
120
121    /// End of input.
122    Eof,
123}
124
125impl RawTokenKind {
126    /// Returns `true` if this token is EOF.
127    #[inline]
128    pub const fn is_eof(&self) -> bool {
129        matches!(self, Self::Eof)
130    }
131
132    /// Returns `true` if this token is a line comment or a block comment.
133    #[inline]
134    pub const fn is_comment(&self) -> bool {
135        matches!(self, Self::LineComment { .. } | Self::BlockComment { .. })
136    }
137
138    /// Returns `true` if this token is a whitespace, line comment, or block comment.
139    #[inline]
140    pub const fn is_trivial(&self) -> bool {
141        matches!(self, Self::Whitespace | Self::LineComment { .. } | Self::BlockComment { .. })
142    }
143}
144
145/// The literal types supported by the lexer.
146#[derive(Clone, Copy, Debug, PartialEq, Eq)]
147pub enum RawLiteralKind {
148    /// `123`, `0x123`; empty_int: `0x`
149    Int { base: Base, empty_int: bool },
150    /// `123.321`, `1.2e3`, `.2e3`; empty_exponent: `2e`, `2.3e`, `.3e`
151    Rational { base: Base, empty_exponent: bool },
152    /// `"abc"`, `"abc`; `unicode"abc"`, `unicode"abc`; `hex"abc"`, `hex"abc`
153    Str { kind: StrKind, terminated: bool },
154}