Skip to main content

squawk_lexer/
token.rs

1// based on: https://github.com/rust-lang/rust/blob/d1b7355d3d7b4ead564dbecb1d240fcc74fff21b/compiler/rustc_lexer/src/lib.rs#L58
2#[derive(Debug, PartialEq, Clone, Copy)]
3pub enum TokenKind {
4    /// Used when there's an error of some sort while lexing.
5    Unknown,
6    /// Examples: `12u8`, `1.0e-40`, `b"123"`. Note that `_` is an invalid
7    /// suffix, but may be present here on string and float literals. Users of
8    /// this type will need to check for and reject that case.
9    ///
10    /// See [`LiteralKind`] for more details.
11    Literal { kind: LiteralKind },
12    /// Space, tab, newline, carriage return, vertical tab, form feed
13    Whitespace,
14    /// Identifier
15    ///
16    /// case-sensitive
17    Ident,
18    /// `;`
19    Semi,
20    /// End of file
21    Eof,
22    /// `/`
23    Slash,
24    /// `-- foo`
25    LineComment,
26    /// ```
27    /// /*
28    /// foo
29    /// */
30    /// ```
31    BlockComment { terminated: bool },
32    /// `-`
33    Minus,
34    /// `:`
35    Colon,
36    /// `.`
37    Dot,
38    /// `=`
39    Eq,
40    /// `>`
41    Gt,
42    /// `&`
43    And,
44    /// `<`
45    Lt,
46    /// `!`
47    Bang,
48    /// `+`
49    Plus,
50    /// `~`
51    Tilde,
52    /// `#`
53    Pound,
54    /// `?`
55    Question,
56    /// `|`
57    Or,
58    /// `%`
59    Percent,
60    /// `^`
61    Caret,
62    /// `*`
63    Star,
64    /// `` ` ``
65    Backtick,
66    /// `@`
67    At,
68    /// `]`
69    CloseBracket,
70    /// `[`
71    OpenBracket,
72    /// `}`
73    CloseCurly,
74    /// `{`
75    OpenCurly,
76    /// `)`
77    CloseParen,
78    /// `(`
79    OpenParen,
80    /// `,`
81    Comma,
82    /// Error case that we need to report later on.
83    UnknownPrefix,
84    /// Positional Parameter, e.g., `$1`
85    ///
86    /// see: <https://www.postgresql.org/docs/16/sql-expressions.html#SQL-EXPRESSIONS-PARAMETERS-POSITIONAL>
87    PositionalParam,
88    /// Quoted Identifier, e.g., `"update"` in `update "my_table" set "a" = 5;`
89    ///
90    /// These are case-sensitive, unlike [`TokenKind::Ident`]
91    ///
92    /// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS>
93    QuotedIdent { terminated: bool },
94}
95
96/// Parsed token.
97/// It doesn't contain information about data that has been parsed,
98/// only the type of the token and its size.
99#[derive(Debug, Clone, Copy)]
100pub struct Token {
101    pub kind: TokenKind,
102    pub len: u32,
103}
104
105impl Token {
106    pub(crate) fn new(kind: TokenKind, len: u32) -> Token {
107        Token { kind, len }
108    }
109}
110
111/// Base of numeric literal encoding according to its prefix.
112#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
113pub enum Base {
114    /// Literal starts with "0b".
115    Binary = 2,
116    /// Literal starts with "0o".
117    Octal = 8,
118    /// Literal doesn't contain a prefix.
119    Decimal = 10,
120    /// Literal starts with "0x".
121    Hexadecimal = 16,
122}
123
124// Enum representing the literal types supported by the lexer.
125#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
126pub enum LiteralKind {
127    /// Integer Numeric, e.g., `42`
128    ///
129    /// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html#SQL-SYNTAX-CONSTANTS-NUMERIC>
130    Int { base: Base, empty_int: bool },
131    /// Float Numeric, e.g., `1.925e-3`
132    ///
133    /// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html#SQL-SYNTAX-CONSTANTS-NUMERIC>
134    Float { base: Base, empty_exponent: bool },
135    /// String, e.g., `'foo'`
136    ///
137    /// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html#SQL-SYNTAX-STRINGS>
138    Str { terminated: bool },
139    /// Hexidecimal Bit String, e.g., `X'1FF'`
140    ///
141    /// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html#SQL-SYNTAX-BIT-STRINGS>
142    ByteStr { terminated: bool },
143    /// Bit String, e.g., `B'1001'`
144    ///
145    /// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html#SQL-SYNTAX-BIT-STRINGS>
146    BitStr { terminated: bool },
147    /// Dollar Quoted String, e.g., `$$Dianne's horse$$`
148    ///
149    /// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html#SQL-SYNTAX-DOLLAR-QUOTING>
150    DollarQuotedString { terminated: bool },
151    /// Unicode Escape String, e.g., `U&'d\0061t\+000061'`
152    ///
153    /// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html#SQL-SYNTAX-STRINGS-UESCAPE>
154    UnicodeEscStr { terminated: bool },
155    /// Escape String, e.g, `E'foo'`
156    ///
157    /// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html>
158    EscStr { terminated: bool },
159}