solar_parse/lexer/cursor/token.rs
1//! Raw, low-level tokens. Created using [`Cursor`](crate::Cursor).
2
3use solar_ast::{
4 Base, StrKind,
5 token::{BinOpToken, Delimiter},
6};
7
8/// A raw token.
9///
10/// It doesn't contain information about data that has been parsed, only the type of the token and
11/// its size.
12#[derive(Clone, Copy, Debug, PartialEq, Eq)]
13pub struct RawToken {
14 /// The kind of token.
15 pub kind: RawTokenKind,
16 /// The length of the token in bytes.
17 pub len: u32,
18}
19
20impl RawToken {
21 /// The [`EOF`](RawTokenKind::Eof) token with length 0.
22 pub const EOF: Self = Self::new(RawTokenKind::Eof, 0);
23
24 /// Creates a new token.
25 #[inline]
26 pub const fn new(kind: RawTokenKind, len: u32) -> Self {
27 Self { kind, len }
28 }
29}
30
31/// Common lexeme types.
32#[derive(Clone, Copy, Debug, PartialEq, Eq)]
33pub enum RawTokenKind {
34 // Multi-char tokens:
35 /// `// comment`
36 ///
37 /// `/// doc comment`
38 LineComment { is_doc: bool },
39
40 /// `/* block comment */`
41 ///
42 /// `/** block doc comment */`
43 BlockComment { is_doc: bool, terminated: bool },
44
45 /// Any whitespace character sequence.
46 Whitespace,
47
48 /// `ident` or `continue`
49 ///
50 /// At this step, keywords are also considered identifiers.
51 Ident,
52
53 /// Examples: `123`, `0x123`, `hex"123"`. Note that `_` is an invalid
54 /// suffix, but may be present here on string and float literals. Users of
55 /// this type will need to check for and reject that case.
56 ///
57 /// See [`RawLiteralKind`] for more details.
58 Literal { kind: RawLiteralKind },
59
60 // Expression-operator symbols.
61 /// `=`
62 Eq,
63 /// `<`
64 Lt,
65 /// `<=`
66 Le,
67 /// `==`
68 EqEq,
69 /// `!=`
70 Ne,
71 /// `>=`
72 Ge,
73 /// `>`
74 Gt,
75 /// `&&`
76 AndAnd,
77 /// `||`
78 OrOr,
79 /// `!`
80 Not,
81 /// `~`
82 Tilde,
83 /// `:=`
84 Walrus,
85 /// `++`
86 PlusPlus,
87 /// `--`
88 MinusMinus,
89 /// `**`
90 StarStar,
91 /// A binary operator token.
92 BinOp(BinOpToken),
93 /// A binary operator token, followed by an equals sign (`=`).
94 BinOpEq(BinOpToken),
95
96 // Structural symbols.
97 /// `@`
98 At,
99 /// `.`
100 Dot,
101 /// `,`
102 Comma,
103 /// `;`
104 Semi,
105 /// `:`
106 Colon,
107 /// `->`
108 Arrow,
109 /// `=>`
110 FatArrow,
111 /// `?`
112 Question,
113 /// An opening delimiter (e.g., `{`).
114 OpenDelim(Delimiter),
115 /// A closing delimiter (e.g., `}`).
116 CloseDelim(Delimiter),
117
118 /// Unknown token, not expected by the lexer, e.g. `№`
119 Unknown,
120
121 /// End of input.
122 Eof,
123}
124
125impl RawTokenKind {
126 /// Returns `true` if this token is EOF.
127 #[inline]
128 pub const fn is_eof(&self) -> bool {
129 matches!(self, Self::Eof)
130 }
131
132 /// Returns `true` if this token is a line comment or a block comment.
133 #[inline]
134 pub const fn is_comment(&self) -> bool {
135 matches!(self, Self::LineComment { .. } | Self::BlockComment { .. })
136 }
137
138 /// Returns `true` if this token is a whitespace, line comment, or block comment.
139 #[inline]
140 pub const fn is_trivial(&self) -> bool {
141 matches!(self, Self::Whitespace | Self::LineComment { .. } | Self::BlockComment { .. })
142 }
143}
144
145/// The literal types supported by the lexer.
146#[derive(Clone, Copy, Debug, PartialEq, Eq)]
147pub enum RawLiteralKind {
148 /// `123`, `0x123`; empty_int: `0x`
149 Int { base: Base, empty_int: bool },
150 /// `123.321`, `1.2e3`, `.2e3`; empty_exponent: `2e`, `2.3e`, `.3e`
151 Rational { base: Base, empty_exponent: bool },
152 /// `"abc"`, `"abc`; `unicode"abc"`, `unicode"abc`; `hex"abc"`, `hex"abc`
153 Str { kind: StrKind, terminated: bool },
154}