Skip to main content

squawk_lexer/
token.rs

1// based on: https://github.com/rust-lang/rust/blob/d1b7355d3d7b4ead564dbecb1d240fcc74fff21b/compiler/rustc_lexer/src/lib.rs#L58
2#[derive(Debug, PartialEq, Clone, Copy)]
3pub enum TokenKind {
4    /// Used when there's an error of some sort while lexing.
5    Unknown,
6    /// Examples: `12u8`, `1.0e-40`, `b"123"`. Note that `_` is an invalid
7    /// suffix, but may be present here on string and float literals. Users of
8    /// this type will need to check for and reject that case.
9    ///
10    /// See [`LiteralKind`] for more details.
11    Literal { kind: LiteralKind },
12    /// Space, tab, newline, carriage return, vertical tab, form feed
13    Whitespace,
14    /// Identifier
15    ///
16    /// case-sensitive
17    Ident,
18    /// `;`
19    Semi,
20    /// End of file
21    Eof,
22    /// `/`
23    Slash,
24    /// `-- foo`
25    LineComment,
26    /// ```
27    /// /*
28    /// foo
29    /// */
30    /// ```
31    BlockComment { terminated: bool },
32    /// `-`
33    Minus,
34    /// `:`
35    Colon,
36    /// `.`
37    Dot,
38    /// `=`
39    Eq,
40    /// `>`
41    Gt,
42    /// `&`
43    And,
44    /// `<`
45    Lt,
46    /// `!`
47    Bang,
48    /// `+`
49    Plus,
50    /// `~`
51    Tilde,
52    /// `#`
53    Pound,
54    /// `?`
55    Question,
56    /// `|`
57    Or,
58    /// `%`
59    Percent,
60    /// `^`
61    Caret,
62    /// `*`
63    Star,
64    /// `` ` ``
65    Backtick,
66    /// `@`
67    At,
68    /// `]`
69    CloseBracket,
70    /// `[`
71    OpenBracket,
72    /// `)`
73    CloseParen,
74    /// `(`
75    OpenParen,
76    /// `,`
77    Comma,
78    /// Error case that we need to report later on.
79    UnknownPrefix,
80    /// Positional Parameter, e.g., `$1`
81    ///
82    /// see: <https://www.postgresql.org/docs/16/sql-expressions.html#SQL-EXPRESSIONS-PARAMETERS-POSITIONAL>
83    PositionalParam,
84    /// Quoted Identifier, e.g., `"update"` in `update "my_table" set "a" = 5;`
85    ///
86    /// These are case-sensitive, unlike [`TokenKind::Ident`]
87    ///
88    /// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS>
89    QuotedIdent { terminated: bool },
90}
91
92/// Parsed token.
93/// It doesn't contain information about data that has been parsed,
94/// only the type of the token and its size.
95#[derive(Debug, Clone, Copy)]
96pub struct Token {
97    pub kind: TokenKind,
98    pub len: u32,
99}
100
101impl Token {
102    pub(crate) fn new(kind: TokenKind, len: u32) -> Token {
103        Token { kind, len }
104    }
105}
106
107/// Base of numeric literal encoding according to its prefix.
108#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
109pub enum Base {
110    /// Literal starts with "0b".
111    Binary = 2,
112    /// Literal starts with "0o".
113    Octal = 8,
114    /// Literal doesn't contain a prefix.
115    Decimal = 10,
116    /// Literal starts with "0x".
117    Hexadecimal = 16,
118}
119
120// Enum representing the literal types supported by the lexer.
121#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
122pub enum LiteralKind {
123    /// Integer Numeric, e.g., `42`
124    ///
125    /// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html#SQL-SYNTAX-CONSTANTS-NUMERIC>
126    Int { base: Base, empty_int: bool },
127    /// Float Numeric, e.g., `1.925e-3`
128    ///
129    /// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html#SQL-SYNTAX-CONSTANTS-NUMERIC>
130    Float { base: Base, empty_exponent: bool },
131    /// String, e.g., `'foo'`
132    ///
133    /// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html#SQL-SYNTAX-STRINGS>
134    Str { terminated: bool },
135    /// Hexidecimal Bit String, e.g., `X'1FF'`
136    ///
137    /// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html#SQL-SYNTAX-BIT-STRINGS>
138    ByteStr { terminated: bool },
139    /// Bit String, e.g., `B'1001'`
140    ///
141    /// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html#SQL-SYNTAX-BIT-STRINGS>
142    BitStr { terminated: bool },
143    /// Dollar Quoted String, e.g., `$$Dianne's horse$$`
144    ///
145    /// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html#SQL-SYNTAX-DOLLAR-QUOTING>
146    DollarQuotedString { terminated: bool },
147    /// Unicode Escape String, e.g., `U&'d\0061t\+000061'`
148    ///
149    /// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html#SQL-SYNTAX-STRINGS-UESCAPE>
150    UnicodeEscStr { terminated: bool },
151    /// Escape String, e.g, `E'foo'`
152    ///
153    /// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html>
154    EscStr { terminated: bool },
155}