squawk_lexer/token.rs
1// based on: https://github.com/rust-lang/rust/blob/d1b7355d3d7b4ead564dbecb1d240fcc74fff21b/compiler/rustc_lexer/src/lib.rs#L58
2#[derive(Debug, PartialEq, Clone, Copy)]
3pub enum TokenKind {
4 /// Used when there's an error of some sort while lexing.
5 Unknown,
6 /// Examples: `12u8`, `1.0e-40`, `b"123"`. Note that `_` is an invalid
7 /// suffix, but may be present here on string and numeric literals. Users of
8 /// this type will need to check for and reject that case.
9 ///
10 /// See [`LiteralKind`] for more details.
11 Literal { kind: LiteralKind },
12 /// Space, tab, newline, carriage return, vertical tab, form feed
13 Whitespace,
14 /// Identifier
15 ///
16 /// case-sensitive
17 Ident,
18 /// `;`
19 Semi,
20 /// End of file
21 Eof,
22 /// `/`
23 Slash,
24 /// `-- foo`
25 LineComment,
26 /// ```
27 /// /*
28 /// foo
29 /// */
30 /// ```
31 BlockComment { terminated: bool },
32 /// `-`
33 Minus,
34 /// `:`
35 Colon,
36 /// `.`
37 Dot,
38 /// `=`
39 Eq,
40 /// `>`
41 Gt,
42 /// `&`
43 And,
44 /// `<`
45 Lt,
46 /// `!`
47 Bang,
48 /// `+`
49 Plus,
50 /// `~`
51 Tilde,
52 /// `#`
53 Pound,
54 /// `?`
55 Question,
56 /// `|`
57 Or,
58 /// `%`
59 Percent,
60 /// `^`
61 Caret,
62 /// `*`
63 Star,
64 /// `` ` ``
65 Backtick,
66 /// `@`
67 At,
68 /// `]`
69 CloseBracket,
70 /// `[`
71 OpenBracket,
72 /// `}`
73 CloseCurly,
74 /// `{`
75 OpenCurly,
76 /// `)`
77 CloseParen,
78 /// `(`
79 OpenParen,
80 /// `,`
81 Comma,
82 /// Positional Parameter, e.g., `$1`
83 ///
84 /// see: <https://www.postgresql.org/docs/16/sql-expressions.html#SQL-EXPRESSIONS-PARAMETERS-POSITIONAL>
85 PositionalParam { trailing_junk_start: u32 },
86 /// Quoted Identifier, e.g., `"update"` in `update "my_table" set "a" = 5;`
87 ///
88 /// These are case-sensitive, unlike [`TokenKind::Ident`]
89 ///
90 /// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS>
91 QuotedIdent { terminated: bool, uescape: bool },
92}
93
94/// Parsed token.
95/// It doesn't contain information about data that has been parsed,
96/// only the type of the token and its size.
97#[derive(Debug, Clone, Copy)]
98pub struct Token {
99 pub kind: TokenKind,
100 pub len: u32,
101}
102
103impl Token {
104 pub(crate) fn new(kind: TokenKind, len: u32) -> Token {
105 Token { kind, len }
106 }
107}
108
109/// Base of numeric literal encoding according to its prefix.
110#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
111pub enum Base {
112 /// Literal starts with "0b".
113 Binary = 2,
114 /// Literal starts with "0o".
115 Octal = 8,
116 /// Literal doesn't contain a prefix.
117 Decimal = 10,
118 /// Literal starts with "0x".
119 Hexadecimal = 16,
120}
121
122// Enum representing the literal types supported by the lexer.
123#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
124pub enum LiteralKind {
125 /// Integer Numeric, e.g., `42`
126 ///
127 /// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html#SQL-SYNTAX-CONSTANTS-NUMERIC>
128 Int {
129 base: Base,
130 empty_int: bool,
131 trailing_junk_start: u32,
132 },
133 /// Numeric literal with a decimal point or exponent, e.g., `1.925e-3`
134 ///
135 /// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html#SQL-SYNTAX-CONSTANTS-NUMERIC>
136 Numeric {
137 // e.g., `1e` instead of `1e10`
138 empty_exponent_start: Option<u32>,
139 // e.g., `1foo` where `foo` is the junk
140 trailing_junk_start: u32,
141 },
142 /// String, e.g., `'foo'`
143 ///
144 /// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html#SQL-SYNTAX-STRINGS>
145 Str { terminated: bool },
146 /// National character string, e.g., `N'foo'`
147 NationalStr { terminated: bool },
148 /// Hexidecimal Bit String, e.g., `X'1FF'`
149 ///
150 /// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html#SQL-SYNTAX-BIT-STRINGS>
151 ByteStr { terminated: bool },
152 /// Bit String, e.g., `B'1001'`
153 ///
154 /// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html#SQL-SYNTAX-BIT-STRINGS>
155 BitStr { terminated: bool },
156 /// Dollar Quoted String, e.g., `$$Dianne's horse$$`
157 ///
158 /// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html#SQL-SYNTAX-DOLLAR-QUOTING>
159 DollarQuotedString { terminated: bool },
160 /// Unicode Escape String, e.g., `U&'d\0061t\+000061'`
161 ///
162 /// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html#SQL-SYNTAX-STRINGS-UESCAPE>
163 UnicodeEscStr { terminated: bool },
164 /// Escape String, e.g, `E'foo'`
165 ///
166 /// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html>
167 EscStr { terminated: bool },
168}