Skip to main content

squawk_lexer/
token.rs

1// based on: https://github.com/rust-lang/rust/blob/d1b7355d3d7b4ead564dbecb1d240fcc74fff21b/compiler/rustc_lexer/src/lib.rs#L58
2#[derive(Debug, PartialEq, Clone, Copy)]
3pub enum TokenKind {
4    /// Used when there's an error of some sort while lexing.
5    Unknown,
6    /// Examples: `12u8`, `1.0e-40`, `b"123"`. Note that `_` is an invalid
7    /// suffix, but may be present here on string and numeric literals. Users of
8    /// this type will need to check for and reject that case.
9    ///
10    /// See [`LiteralKind`] for more details.
11    Literal { kind: LiteralKind },
12    /// Space, tab, newline, carriage return, vertical tab, form feed
13    Whitespace,
14    /// Identifier
15    ///
16    /// case-sensitive
17    Ident,
18    /// `;`
19    Semi,
20    /// End of file
21    Eof,
22    /// `/`
23    Slash,
24    /// `-- foo`
25    LineComment,
26    /// ```
27    /// /*
28    /// foo
29    /// */
30    /// ```
31    BlockComment { terminated: bool },
32    /// `-`
33    Minus,
34    /// `:`
35    Colon,
36    /// `.`
37    Dot,
38    /// `=`
39    Eq,
40    /// `>`
41    Gt,
42    /// `&`
43    And,
44    /// `<`
45    Lt,
46    /// `!`
47    Bang,
48    /// `+`
49    Plus,
50    /// `~`
51    Tilde,
52    /// `#`
53    Pound,
54    /// `?`
55    Question,
56    /// `|`
57    Or,
58    /// `%`
59    Percent,
60    /// `^`
61    Caret,
62    /// `*`
63    Star,
64    /// `` ` ``
65    Backtick,
66    /// `@`
67    At,
68    /// `]`
69    CloseBracket,
70    /// `[`
71    OpenBracket,
72    /// `}`
73    CloseCurly,
74    /// `{`
75    OpenCurly,
76    /// `)`
77    CloseParen,
78    /// `(`
79    OpenParen,
80    /// `,`
81    Comma,
82    /// Positional Parameter, e.g., `$1`
83    ///
84    /// see: <https://www.postgresql.org/docs/16/sql-expressions.html#SQL-EXPRESSIONS-PARAMETERS-POSITIONAL>
85    PositionalParam { trailing_junk_start: u32 },
86    /// Quoted Identifier, e.g., `"update"` in `update "my_table" set "a" = 5;`
87    ///
88    /// These are case-sensitive, unlike [`TokenKind::Ident`]
89    ///
90    /// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS>
91    QuotedIdent { terminated: bool, uescape: bool },
92}
93
94/// Parsed token.
95/// It doesn't contain information about data that has been parsed,
96/// only the type of the token and its size.
97#[derive(Debug, Clone, Copy)]
98pub struct Token {
99    pub kind: TokenKind,
100    pub len: u32,
101}
102
103impl Token {
104    pub(crate) fn new(kind: TokenKind, len: u32) -> Token {
105        Token { kind, len }
106    }
107}
108
109/// Base of numeric literal encoding according to its prefix.
110#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
111pub enum Base {
112    /// Literal starts with "0b".
113    Binary = 2,
114    /// Literal starts with "0o".
115    Octal = 8,
116    /// Literal doesn't contain a prefix.
117    Decimal = 10,
118    /// Literal starts with "0x".
119    Hexadecimal = 16,
120}
121
122// Enum representing the literal types supported by the lexer.
123#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
124pub enum LiteralKind {
125    /// Integer Numeric, e.g., `42`
126    ///
127    /// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html#SQL-SYNTAX-CONSTANTS-NUMERIC>
128    Int {
129        base: Base,
130        empty_int: bool,
131        trailing_junk_start: u32,
132    },
133    /// Numeric literal with a decimal point or exponent, e.g., `1.925e-3`
134    ///
135    /// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html#SQL-SYNTAX-CONSTANTS-NUMERIC>
136    Numeric {
137        // e.g., `1e` instead of `1e10`
138        empty_exponent_start: Option<u32>,
139        // e.g., `1foo` where `foo` is the junk
140        trailing_junk_start: u32,
141    },
142    /// String, e.g., `'foo'`
143    ///
144    /// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html#SQL-SYNTAX-STRINGS>
145    Str { terminated: bool },
146    /// National character string, e.g., `N'foo'`
147    NationalStr { terminated: bool },
148    /// Hexidecimal Bit String, e.g., `X'1FF'`
149    ///
150    /// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html#SQL-SYNTAX-BIT-STRINGS>
151    ByteStr { terminated: bool },
152    /// Bit String, e.g., `B'1001'`
153    ///
154    /// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html#SQL-SYNTAX-BIT-STRINGS>
155    BitStr { terminated: bool },
156    /// Dollar Quoted String, e.g., `$$Dianne's horse$$`
157    ///
158    /// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html#SQL-SYNTAX-DOLLAR-QUOTING>
159    DollarQuotedString { terminated: bool },
160    /// Unicode Escape String, e.g., `U&'d\0061t\+000061'`
161    ///
162    /// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html#SQL-SYNTAX-STRINGS-UESCAPE>
163    UnicodeEscStr { terminated: bool },
164    /// Escape String, e.g, `E'foo'`
165    ///
166    /// see: <https://www.postgresql.org/docs/16/sql-syntax-lexical.html>
167    EscStr { terminated: bool },
168}