solar_parse/lexer/cursor/token.rs
1//! Raw, low-level tokens. Created using [`Cursor`](crate::Cursor).
2
3use solar_ast::Base;
4
5/// A raw token.
6///
7/// It doesn't contain information about data that has been parsed, only the type of the token and
8/// its size.
9#[derive(Clone, Debug, PartialEq, Eq)]
10pub struct RawToken {
11 /// The kind of token.
12 pub kind: RawTokenKind,
13 /// The length of the token in bytes.
14 pub len: u32,
15}
16
17impl RawToken {
18 /// The [`EOF`](RawTokenKind::Eof) token with length 0.
19 pub const EOF: Self = Self::new(RawTokenKind::Eof, 0);
20
21 /// Creates a new token.
22 #[inline]
23 pub const fn new(kind: RawTokenKind, len: u32) -> Self {
24 Self { kind, len }
25 }
26}
27
28/// Common lexeme types.
29#[derive(Clone, Copy, Debug, PartialEq, Eq)]
30pub enum RawTokenKind {
31 // Multi-char tokens:
32 /// `// comment`
33 ///
34 /// `/// doc comment`
35 LineComment { is_doc: bool },
36
37 /// `/* block comment */`
38 ///
39 /// `/** block doc comment */`
40 BlockComment { is_doc: bool, terminated: bool },
41
42 /// Any whitespace character sequence.
43 Whitespace,
44
45 /// `ident` or `continue`
46 ///
47 /// At this step, keywords are also considered identifiers.
48 Ident,
49
50 /// An unknown prefix, like `foo'`, `foo"`.
51 ///
52 /// Note that only the prefix (`foo`) is included in the token, not the separator (which is
53 /// lexed as its own distinct token).
54 UnknownPrefix,
55
56 /// Examples: `123`, `0x123`, `hex"123"`. Note that `_` is an invalid
57 /// suffix, but may be present here on string and float literals. Users of
58 /// this type will need to check for and reject that case.
59 ///
60 /// See [`RawLiteralKind`] for more details.
61 Literal { kind: RawLiteralKind },
62
63 // One-char tokens:
64 /// `;`
65 Semi,
66 /// `,`
67 Comma,
68 /// `.`
69 Dot,
70 /// `(`
71 OpenParen,
72 /// `)`
73 CloseParen,
74 /// `{`
75 OpenBrace,
76 /// `}`
77 CloseBrace,
78 /// `[`
79 OpenBracket,
80 /// `]`
81 CloseBracket,
82 /// `~`
83 Tilde,
84 /// `?`
85 Question,
86 /// `:`
87 Colon,
88 /// `=`
89 Eq,
90 /// `!`
91 Bang,
92 /// `<`
93 Lt,
94 /// `>`
95 Gt,
96 /// `-`
97 Minus,
98 /// `&`
99 And,
100 /// `|`
101 Or,
102 /// `+`
103 Plus,
104 /// `*`
105 Star,
106 /// `/`
107 Slash,
108 /// `^`
109 Caret,
110 /// `%`
111 Percent,
112
113 /// Unknown token, not expected by the lexer, e.g. `№`
114 Unknown,
115
116 /// End of input.
117 Eof,
118}
119
120/// The literal types supported by the lexer.
121#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
122pub enum RawLiteralKind {
123 /// `123`, `0x123`; empty_int: `0x`
124 Int { base: Base, empty_int: bool },
125 /// `123.321`, `1.2e3`, `.2e3`; empty_exponent: `2e`, `2.3e`, `.3e`
126 Rational { base: Base, empty_exponent: bool },
127 /// `"abc"`, `"abc`; `unicode"abc"`, `unicode"abc`
128 Str { terminated: bool, unicode: bool },
129 /// `hex"abc"`, `hex"abc`
130 HexStr { terminated: bool },
131}