squawk_lexer/token.rs
1// based on: https://github.com/rust-lang/rust/blob/d1b7355d3d7b4ead564dbecb1d240fcc74fff21b/compiler/rustc_lexer/src/lib.rs#L58
2#[derive(Debug, PartialEq, Clone, Copy)]
3pub enum TokenKind {
4 /// Used when there's an error of some sort while lexing.
5 Unknown,
6 /// Examples: `12u8`, `1.0e-40`, `b"123"`. Note that `_` is an invalid
7 /// suffix, but may be present here on string and float literals. Users of
8 /// this type will need to check for and reject that case.
9 ///
10 /// See [`LiteralKind`] for more details.
11 Literal { kind: LiteralKind },
12 /// Space, tab, newline, carriage return, vertical tab, form feed
13 Whitespace,
14 /// Identifier
15 ///
16 /// case-sensitive
17 Ident,
18 /// `;`
19 Semi,
20 /// End of file
21 Eof,
22 /// `/`
23 Slash,
24 /// `-- foo`
25 LineComment,
26 /// ```
27 /// /*
28 /// foo
29 /// */
30 /// ```
31 BlockComment { terminated: bool },
32 /// `-`
33 Minus,
34 /// `:`
35 Colon,
36 /// `.`
37 Dot,
38 /// `=`
39 Eq,
40 /// `>`
41 Gt,
42 /// `&`
43 And,
44 /// `<`
45 Lt,
46 /// `!`
47 Bang,
48 /// `+`
49 Plus,
50 /// `~`
51 Tilde,
52 /// `#`
53 Pound,
54 /// `?`
55 Question,
56 /// `|`
57 Or,
58 /// `%`
59 Percent,
60 /// `^`
61 Caret,
62 /// `*`
63 Star,
64 /// `` ` ``
65 Backtick,
66 /// `@`
67 At,
68 /// `]`
69 CloseBracket,
70 /// `[`
71 OpenBracket,
72 /// `}`
73 CloseCurly,
74 /// `{`
75 OpenCurly,
76 /// `)`
77 CloseParen,
78 /// `(`
79 OpenParen,
80 /// `,`
81 Comma,
82 /// Error case that we need to report later on.
83 UnknownPrefix,
84 /// Positional Parameter, e.g., `$1`
85 ///
86 /// see: <https://www.postgresql.org/docs/16/sql-expressions.html#SQL-EXPRESSIONS-PARAMETERS-POSITIONAL>
87 PositionalParam,
88 /// Quoted Identifier, e.g., `"update"` in `update "my_table" set "a" = 5;`
89 ///
90 /// These are case-sensitive, unlike [`TokenKind::Ident`]
91 ///
92 /// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS>
93 QuotedIdent { terminated: bool },
94}
95
96/// Parsed token.
97/// It doesn't contain information about data that has been parsed,
98/// only the type of the token and its size.
99#[derive(Debug, Clone, Copy)]
100pub struct Token {
101 pub kind: TokenKind,
102 pub len: u32,
103}
104
105impl Token {
106 pub(crate) fn new(kind: TokenKind, len: u32) -> Token {
107 Token { kind, len }
108 }
109}
110
111/// Base of numeric literal encoding according to its prefix.
112#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
113pub enum Base {
114 /// Literal starts with "0b".
115 Binary = 2,
116 /// Literal starts with "0o".
117 Octal = 8,
118 /// Literal doesn't contain a prefix.
119 Decimal = 10,
120 /// Literal starts with "0x".
121 Hexadecimal = 16,
122}
123
124// Enum representing the literal types supported by the lexer.
125#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
126pub enum LiteralKind {
127 /// Integer Numeric, e.g., `42`
128 ///
129 /// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html#SQL-SYNTAX-CONSTANTS-NUMERIC>
130 Int { base: Base, empty_int: bool },
131 /// Float Numeric, e.g., `1.925e-3`
132 ///
133 /// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html#SQL-SYNTAX-CONSTANTS-NUMERIC>
134 Float { base: Base, empty_exponent: bool },
135 /// String, e.g., `'foo'`
136 ///
137 /// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html#SQL-SYNTAX-STRINGS>
138 Str { terminated: bool },
139 /// Hexidecimal Bit String, e.g., `X'1FF'`
140 ///
141 /// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html#SQL-SYNTAX-BIT-STRINGS>
142 ByteStr { terminated: bool },
143 /// Bit String, e.g., `B'1001'`
144 ///
145 /// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html#SQL-SYNTAX-BIT-STRINGS>
146 BitStr { terminated: bool },
147 /// Dollar Quoted String, e.g., `$$Dianne's horse$$`
148 ///
149 /// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html#SQL-SYNTAX-DOLLAR-QUOTING>
150 DollarQuotedString { terminated: bool },
151 /// Unicode Escape String, e.g., `U&'d\0061t\+000061'`
152 ///
153 /// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html#SQL-SYNTAX-STRINGS-UESCAPE>
154 UnicodeEscStr { terminated: bool },
155 /// Escape String, e.g, `E'foo'`
156 ///
157 /// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html>
158 EscStr { terminated: bool },
159}