solar_parse/lexer/cursor/
token.rs

1//! Raw, low-level tokens. Created using [`Cursor`](crate::Cursor).
2
3use solar_ast::Base;
4
5/// A raw token.
6///
7/// It doesn't contain information about data that has been parsed, only the type of the token and
8/// its size.
9#[derive(Clone, Debug, PartialEq, Eq)]
10pub struct RawToken {
11    /// The kind of token.
12    pub kind: RawTokenKind,
13    /// The length of the token in bytes.
14    pub len: u32,
15}
16
17impl RawToken {
18    /// The [`EOF`](RawTokenKind::Eof) token with length 0.
19    pub const EOF: Self = Self::new(RawTokenKind::Eof, 0);
20
21    /// Creates a new token.
22    #[inline]
23    pub const fn new(kind: RawTokenKind, len: u32) -> Self {
24        Self { kind, len }
25    }
26}
27
28/// Common lexeme types.
29#[derive(Clone, Copy, Debug, PartialEq, Eq)]
30pub enum RawTokenKind {
31    // Multi-char tokens:
32    /// `// comment`
33    ///
34    /// `/// doc comment`
35    LineComment { is_doc: bool },
36
37    /// `/* block comment */`
38    ///
39    /// `/** block doc comment */`
40    BlockComment { is_doc: bool, terminated: bool },
41
42    /// Any whitespace character sequence.
43    Whitespace,
44
45    /// `ident` or `continue`
46    ///
47    /// At this step, keywords are also considered identifiers.
48    Ident,
49
50    /// An unknown prefix, like `foo'`, `foo"`.
51    ///
52    /// Note that only the prefix (`foo`) is included in the token, not the separator (which is
53    /// lexed as its own distinct token).
54    UnknownPrefix,
55
56    /// Examples: `123`, `0x123`, `hex"123"`. Note that `_` is an invalid
57    /// suffix, but may be present here on string and float literals. Users of
58    /// this type will need to check for and reject that case.
59    ///
60    /// See [`RawLiteralKind`] for more details.
61    Literal { kind: RawLiteralKind },
62
63    // One-char tokens:
64    /// `;`
65    Semi,
66    /// `,`
67    Comma,
68    /// `.`
69    Dot,
70    /// `(`
71    OpenParen,
72    /// `)`
73    CloseParen,
74    /// `{`
75    OpenBrace,
76    /// `}`
77    CloseBrace,
78    /// `[`
79    OpenBracket,
80    /// `]`
81    CloseBracket,
82    /// `~`
83    Tilde,
84    /// `?`
85    Question,
86    /// `:`
87    Colon,
88    /// `=`
89    Eq,
90    /// `!`
91    Bang,
92    /// `<`
93    Lt,
94    /// `>`
95    Gt,
96    /// `-`
97    Minus,
98    /// `&`
99    And,
100    /// `|`
101    Or,
102    /// `+`
103    Plus,
104    /// `*`
105    Star,
106    /// `/`
107    Slash,
108    /// `^`
109    Caret,
110    /// `%`
111    Percent,
112
113    /// Unknown token, not expected by the lexer, e.g. `№`
114    Unknown,
115
116    /// End of input.
117    Eof,
118}
119
120/// The literal types supported by the lexer.
121#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
122pub enum RawLiteralKind {
123    /// `123`, `0x123`; empty_int: `0x`
124    Int { base: Base, empty_int: bool },
125    /// `123.321`, `1.2e3`, `.2e3`; empty_exponent: `2e`, `2.3e`, `.3e`
126    Rational { base: Base, empty_exponent: bool },
127    /// `"abc"`, `"abc`; `unicode"abc"`, `unicode"abc`
128    Str { terminated: bool, unicode: bool },
129    /// `hex"abc"`, `hex"abc`
130    HexStr { terminated: bool },
131}