squawk_lexer/token.rs
1// based on: https://github.com/rust-lang/rust/blob/d1b7355d3d7b4ead564dbecb1d240fcc74fff21b/compiler/rustc_lexer/src/lib.rs#L58
2#[derive(Debug, PartialEq, Clone, Copy)]
3pub enum TokenKind {
4 /// Used when there's an error of some sort while lexing.
5 Unknown,
6 /// Examples: `12u8`, `1.0e-40`, `b"123"`. Note that `_` is an invalid
7 /// suffix, but may be present here on string and float literals. Users of
8 /// this type will need to check for and reject that case.
9 ///
10 /// See [`LiteralKind`] for more details.
11 Literal { kind: LiteralKind },
12 /// Space, tab, newline, carriage return, vertical tab, form feed
13 Whitespace,
14 /// Identifier
15 ///
16 /// case-sensitive
17 Ident,
18 /// `;`
19 Semi,
20 /// End of file
21 Eof,
22 /// `/`
23 Slash,
24 /// `-- foo`
25 LineComment,
26 /// ```
27 /// /*
28 /// foo
29 /// */
30 /// ```
31 BlockComment { terminated: bool },
32 /// `-`
33 Minus,
34 /// `:`
35 Colon,
36 /// `.`
37 Dot,
38 /// `=`
39 Eq,
40 /// `>`
41 Gt,
42 /// `&`
43 And,
44 /// `<`
45 Lt,
46 /// `!`
47 Bang,
48 /// `+`
49 Plus,
50 /// `~`
51 Tilde,
52 /// `#`
53 Pound,
54 /// `?`
55 Question,
56 /// `|`
57 Or,
58 /// `%`
59 Percent,
60 /// `^`
61 Caret,
62 /// `*`
63 Star,
64 /// `` ` ``
65 Backtick,
66 /// `@`
67 At,
68 /// `]`
69 CloseBracket,
70 /// `[`
71 OpenBracket,
72 /// `)`
73 CloseParen,
74 /// `(`
75 OpenParen,
76 /// `,`
77 Comma,
78 /// Error case that we need to report later on.
79 UnknownPrefix,
80 /// Positional Parameter, e.g., `$1`
81 ///
82 /// see: <https://www.postgresql.org/docs/16/sql-expressions.html#SQL-EXPRESSIONS-PARAMETERS-POSITIONAL>
83 PositionalParam,
84 /// Quoted Identifier, e.g., `"update"` in `update "my_table" set "a" = 5;`
85 ///
86 /// These are case-sensitive, unlike [`TokenKind::Ident`]
87 ///
88 /// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS>
89 QuotedIdent { terminated: bool },
90}
91
92/// Parsed token.
93/// It doesn't contain information about data that has been parsed,
94/// only the type of the token and its size.
95#[derive(Debug, Clone, Copy)]
96pub struct Token {
97 pub kind: TokenKind,
98 pub len: u32,
99}
100
101impl Token {
102 pub(crate) fn new(kind: TokenKind, len: u32) -> Token {
103 Token { kind, len }
104 }
105}
106
107/// Base of numeric literal encoding according to its prefix.
108#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
109pub enum Base {
110 /// Literal starts with "0b".
111 Binary = 2,
112 /// Literal starts with "0o".
113 Octal = 8,
114 /// Literal doesn't contain a prefix.
115 Decimal = 10,
116 /// Literal starts with "0x".
117 Hexadecimal = 16,
118}
119
120// Enum representing the literal types supported by the lexer.
121#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
122pub enum LiteralKind {
123 /// Integer Numeric, e.g., `42`
124 ///
125 /// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html#SQL-SYNTAX-CONSTANTS-NUMERIC>
126 Int { base: Base, empty_int: bool },
127 /// Float Numeric, e.g., `1.925e-3`
128 ///
129 /// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html#SQL-SYNTAX-CONSTANTS-NUMERIC>
130 Float { base: Base, empty_exponent: bool },
131 /// String, e.g., `'foo'`
132 ///
133 /// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html#SQL-SYNTAX-STRINGS>
134 Str { terminated: bool },
135 /// Hexidecimal Bit String, e.g., `X'1FF'`
136 ///
137 /// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html#SQL-SYNTAX-BIT-STRINGS>
138 ByteStr { terminated: bool },
139 /// Bit String, e.g., `B'1001'`
140 ///
141 /// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html#SQL-SYNTAX-BIT-STRINGS>
142 BitStr { terminated: bool },
143 /// Dollar Quoted String, e.g., `$$Dianne's horse$$`
144 ///
145 /// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html#SQL-SYNTAX-DOLLAR-QUOTING>
146 DollarQuotedString { terminated: bool },
147 /// Unicode Escape String, e.g., `U&'d\0061t\+000061'`
148 ///
149 /// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html#SQL-SYNTAX-STRINGS-UESCAPE>
150 UnicodeEscStr { terminated: bool },
151 /// Escape String, e.g, `E'foo'`
152 ///
153 /// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html>
154 EscStr { terminated: bool },
155}