vibesql_parser/
token.rs

1use std::fmt;
2
3use crate::keywords::Keyword;
4
5/// Multi-character operators that require heap allocation if stored as String.
6/// Using an enum eliminates allocation and enables fast matching.
7#[derive(Debug, Clone, Copy, PartialEq, Eq)]
8pub enum MultiCharOperator {
9    /// <= (less than or equal)
10    LessEqual,
11    /// >= (greater than or equal)
12    GreaterEqual,
13    /// != (not equal)
14    NotEqual,
15    /// <> (not equal, SQL standard)
16    NotEqualAlt,
17    /// == (equal, SQLite compatibility - synonym for =)
18    DoubleEqual,
19    /// || (string concatenation)
20    Concat,
21    /// << (left shift, SQLite bitwise)
22    LeftShift,
23    /// >> (right shift, SQLite bitwise)
24    RightShift,
25    /// -> (JSON extract, returns JSON)
26    JsonExtract,
27    /// ->> (JSON extract, returns text)
28    JsonExtractText,
29    /// <-> (cosine distance - pgvector compatible)
30    CosineDistance,
31    /// <#> (negative inner product - pgvector compatible)
32    NegativeInnerProduct,
33    /// <=> (L2/Euclidean distance - pgvector compatible)
34    L2Distance,
35}
36
37impl fmt::Display for MultiCharOperator {
38    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
39        match self {
40            MultiCharOperator::LessEqual => write!(f, "<="),
41            MultiCharOperator::GreaterEqual => write!(f, ">="),
42            MultiCharOperator::NotEqual => write!(f, "!="),
43            MultiCharOperator::NotEqualAlt => write!(f, "<>"),
44            MultiCharOperator::DoubleEqual => write!(f, "=="),
45            MultiCharOperator::Concat => write!(f, "||"),
46            MultiCharOperator::LeftShift => write!(f, "<<"),
47            MultiCharOperator::RightShift => write!(f, ">>"),
48            MultiCharOperator::JsonExtract => write!(f, "->"),
49            MultiCharOperator::JsonExtractText => write!(f, "->>"),
50            MultiCharOperator::CosineDistance => write!(f, "<->"),
51            MultiCharOperator::NegativeInnerProduct => write!(f, "<#>"),
52            MultiCharOperator::L2Distance => write!(f, "<=>"),
53        }
54    }
55}
56
57/// SQL Token produced by the lexer.
58#[derive(Debug, Clone, PartialEq)]
59pub enum Token {
60    /// SQL keyword (SELECT, FROM, etc.)
61    /// Stores both the keyword variant and original text for error messages.
62    /// The original text preserves user's input case (e.g., "SeLeCt").
63    Keyword { keyword: Keyword, original: String },
64    /// Identifier (table name, column name, etc.)
65    Identifier(String),
66    /// Delimited identifier ("columnName" - case-sensitive, can use reserved words)
67    DelimitedIdentifier(String),
68    /// Numeric literal (42, 3.14, etc.)
69    Number(String),
70    /// String literal ('hello')
71    String(String),
72    /// Blob literal (x'48454C4C4F' or X'1234')
73    BlobLiteral(Vec<u8>),
74    /// Single character symbols (+, -, *, /, =, <, >, etc.)
75    Symbol(char),
76    /// Multi-character operators (<=, >=, !=, <>, ||)
77    Operator(MultiCharOperator),
78    /// Session variable (@@variable, @@session.variable, @@global.variable)
79    SessionVariable(String),
80    /// User variable (@variable)
81    UserVariable(String),
82    /// Parameter placeholder (?) for prepared statements
83    /// The index is assigned during parsing (0-indexed, in order of appearance)
84    Placeholder,
85    /// Numbered parameter placeholder ($1, $2, etc.) for prepared statements
86    /// PostgreSQL-style: 1-indexed as written in SQL ($1 = first parameter)
87    NumberedPlaceholder(usize),
88    /// Named parameter placeholder (:name) for prepared statements
89    /// Used by many ORMs and applications for readability
90    NamedPlaceholder(String),
91    /// Semicolon (statement terminator)
92    Semicolon,
93    /// Comma (separator)
94    Comma,
95    /// Left parenthesis
96    LParen,
97    /// Right parenthesis
98    RParen,
99    /// End of input
100    Eof,
101}
102
103impl Token {
104    /// Convert token back to valid SQL string.
105    /// This is the inverse of lexing - it produces SQL that can be re-parsed.
106    pub fn to_sql(&self) -> String {
107        match self {
108            Token::Keyword { keyword, .. } => keyword.to_string(),
109            Token::Identifier(id) => id.clone(),
110            Token::DelimitedIdentifier(id) => format!("\"{}\"", id),
111            Token::Number(n) => n.clone(),
112            Token::String(s) => format!("'{}'", s.replace('\'', "''")),
113            Token::BlobLiteral(bytes) => {
114                let hex: String = bytes.iter().map(|b| format!("{:02X}", b)).collect();
115                format!("x'{}'", hex)
116            }
117            Token::Symbol(c) => c.to_string(),
118            Token::Operator(op) => op.to_string(),
119            Token::SessionVariable(v) => format!("@@{}", v),
120            Token::UserVariable(v) => format!("@{}", v),
121            Token::Placeholder => "?".to_string(),
122            Token::NumberedPlaceholder(n) => format!("${}", n),
123            Token::NamedPlaceholder(name) => format!(":{}", name),
124            Token::Semicolon => ";".to_string(),
125            Token::Comma => ",".to_string(),
126            Token::LParen => "(".to_string(),
127            Token::RParen => ")".to_string(),
128            Token::Eof => String::new(),
129        }
130    }
131
132    /// Generate a SQLite-compatible syntax error message for this token.
133    ///
134    /// SQLite uses the format: `near "TOKEN": syntax error`
135    /// where TOKEN is the actual text that caused the error.
136    ///
137    /// Special cases:
138    /// - EOF (end of input) returns "incomplete input" (SQLite convention for truncated statements)
139    /// - Keywords preserve original case from user input (e.g., "SeLeCt")
140    pub fn syntax_error(&self) -> String {
141        match self {
142            // For EOF/truncated input, SQLite returns "incomplete input"
143            Token::Eof => "incomplete input".to_string(),
144            // For keywords, use the original text to preserve user's input case
145            Token::Keyword { original, .. } => {
146                format!("near \"{}\": syntax error", original)
147            }
148            _ => format!("near \"{}\": syntax error", self.to_sql()),
149        }
150    }
151}
152
153impl fmt::Display for Token {
154    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
155        match self {
156            Token::Keyword { keyword, .. } => write!(f, "Keyword({})", keyword),
157            Token::Identifier(id) => write!(f, "Identifier({})", id),
158            Token::DelimitedIdentifier(id) => write!(f, "DelimitedIdentifier(\"{}\")", id),
159            Token::Number(n) => write!(f, "Number({})", n),
160            Token::String(s) => write!(f, "String('{}')", s),
161            Token::BlobLiteral(bytes) => {
162                let hex: String = bytes.iter().map(|b| format!("{:02X}", b)).collect();
163                write!(f, "BlobLiteral(x'{}')", hex)
164            }
165            Token::Symbol(c) => write!(f, "Symbol({})", c),
166            Token::Operator(op) => write!(f, "Operator({})", op),
167            Token::SessionVariable(v) => write!(f, "SessionVariable({})", v),
168            Token::UserVariable(v) => write!(f, "UserVariable({})", v),
169            Token::Placeholder => write!(f, "Placeholder"),
170            Token::NumberedPlaceholder(n) => write!(f, "NumberedPlaceholder(${})", n),
171            Token::NamedPlaceholder(name) => write!(f, "NamedPlaceholder(:{})", name),
172            Token::Semicolon => write!(f, "Semicolon"),
173            Token::Comma => write!(f, "Comma"),
174            Token::LParen => write!(f, "LParen"),
175            Token::RParen => write!(f, "RParen"),
176            Token::Eof => write!(f, "Eof"),
177        }
178    }
179}