1use std::fmt;
4
5#[derive(Debug, Clone, PartialEq)]
6pub enum Token {
7 Identifier(String), NameRef(String), ValueRef(String), Eq, Ne, Lt, Le, Gt, Ge, Plus, Minus, And,
24 Or,
25 Not,
26 Between,
27 In,
28 Set,
29 Remove,
30 Add,
31 Delete,
32
33 LParen, RParen, LBracket, RBracket, Dot, Comma, Number(String), }
44
45impl fmt::Display for Token {
46 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
47 match self {
48 Token::Identifier(s) => write!(f, "{s}"),
49 Token::NameRef(s) => write!(f, "{s}"),
50 Token::ValueRef(s) => write!(f, "{s}"),
51 Token::Eq => write!(f, "="),
52 Token::Ne => write!(f, "<>"),
53 Token::Lt => write!(f, "<"),
54 Token::Le => write!(f, "<="),
55 Token::Gt => write!(f, ">"),
56 Token::Ge => write!(f, ">="),
57 Token::Plus => write!(f, "+"),
58 Token::Minus => write!(f, "-"),
59 Token::And => write!(f, "AND"),
60 Token::Or => write!(f, "OR"),
61 Token::Not => write!(f, "NOT"),
62 Token::Between => write!(f, "BETWEEN"),
63 Token::In => write!(f, "IN"),
64 Token::Set => write!(f, "SET"),
65 Token::Remove => write!(f, "REMOVE"),
66 Token::Add => write!(f, "ADD"),
67 Token::Delete => write!(f, "DELETE"),
68 Token::LParen => write!(f, "("),
69 Token::RParen => write!(f, ")"),
70 Token::LBracket => write!(f, "["),
71 Token::RBracket => write!(f, "]"),
72 Token::Dot => write!(f, "."),
73 Token::Comma => write!(f, ","),
74 Token::Number(n) => write!(f, "{n}"),
75 }
76 }
77}
78
79pub fn tokenize(input: &str) -> Result<Vec<Token>, String> {
81 let mut tokens = Vec::new();
82 let chars: Vec<char> = input.chars().collect();
83 let mut i = 0;
84
85 while i < chars.len() {
86 if chars[i].is_whitespace() {
88 i += 1;
89 continue;
90 }
91
92 match chars[i] {
93 '#' => {
95 i += 1;
96 let start = i;
97 while i < chars.len() && is_name_char(chars[i]) {
98 i += 1;
99 }
100 if i == start {
101 return Err("Syntax error; token: \"#\"".to_string());
102 }
103 let name: String = chars[start..i].iter().collect();
104 tokens.push(Token::NameRef(format!("#{name}")));
105 }
106
107 ':' => {
109 i += 1;
110 let start = i;
111 while i < chars.len() && is_name_char(chars[i]) {
112 i += 1;
113 }
114 if i == start {
115 return Err("Syntax error; token: \":\"".to_string());
116 }
117 let name: String = chars[start..i].iter().collect();
118 tokens.push(Token::ValueRef(format!(":{name}")));
119 }
120
121 '<' => {
123 i += 1;
124 if i < chars.len() && chars[i] == '>' {
125 tokens.push(Token::Ne);
126 i += 1;
127 } else if i < chars.len() && chars[i] == '=' {
128 tokens.push(Token::Le);
129 i += 1;
130 } else {
131 tokens.push(Token::Lt);
132 }
133 }
134
135 '>' => {
136 i += 1;
137 if i < chars.len() && chars[i] == '=' {
138 tokens.push(Token::Ge);
139 i += 1;
140 } else {
141 tokens.push(Token::Gt);
142 }
143 }
144
145 '=' => {
146 tokens.push(Token::Eq);
147 i += 1;
148 }
149
150 '+' => {
151 tokens.push(Token::Plus);
152 i += 1;
153 }
154 '-' => {
155 tokens.push(Token::Minus);
156 i += 1;
157 }
158
159 '(' => {
161 tokens.push(Token::LParen);
162 i += 1;
163 }
164 ')' => {
165 tokens.push(Token::RParen);
166 i += 1;
167 }
168 '[' => {
169 tokens.push(Token::LBracket);
171 i += 1;
172 let start = i;
174 while i < chars.len() && chars[i].is_ascii_digit() {
175 i += 1;
176 }
177 if i > start {
178 let num: String = chars[start..i].iter().collect();
179 tokens.push(Token::Number(num));
180 }
181 }
182 ']' => {
183 tokens.push(Token::RBracket);
184 i += 1;
185 }
186 '.' => {
187 tokens.push(Token::Dot);
188 i += 1;
189 }
190 ',' => {
191 tokens.push(Token::Comma);
192 i += 1;
193 }
194
195 c if is_ident_start(c) => {
197 let start = i;
198 while i < chars.len() && is_name_char(chars[i]) {
199 i += 1;
200 }
201 let word: String = chars[start..i].iter().collect();
202 let token = match word.to_uppercase().as_str() {
203 "AND" => Token::And,
204 "OR" => Token::Or,
205 "NOT" => Token::Not,
206 "BETWEEN" => Token::Between,
207 "IN" => Token::In,
208 "SET" => Token::Set,
209 "REMOVE" => Token::Remove,
210 "ADD" => Token::Add,
211 "DELETE" => Token::Delete,
212 _ => Token::Identifier(word),
213 };
214 tokens.push(token);
215 }
216
217 c => {
218 return Err(format!("Syntax error; token: \"{c}\""));
219 }
220 }
221 }
222
223 Ok(tokens)
224}
225
226fn is_ident_start(c: char) -> bool {
227 c.is_ascii_alphabetic() || c == '_'
228}
229
230fn is_name_char(c: char) -> bool {
231 c.is_ascii_alphanumeric() || c == '_'
232}
233
234pub struct TokenStream {
236 tokens: Vec<Token>,
237 pos: usize,
238}
239
240impl TokenStream {
241 pub fn new(tokens: Vec<Token>) -> Self {
242 Self { tokens, pos: 0 }
243 }
244
245 pub fn peek(&self) -> Option<&Token> {
246 self.tokens.get(self.pos)
247 }
248
249 #[allow(clippy::should_implement_trait)]
250 pub fn next(&mut self) -> Option<&Token> {
251 let token = self.tokens.get(self.pos);
252 if token.is_some() {
253 self.pos += 1;
254 }
255 token
256 }
257
258 pub fn expect(&mut self, expected: &Token) -> Result<(), String> {
259 match self.next() {
260 Some(t) if t == expected => Ok(()),
261 Some(t) => Err(format!("Expected {expected}, got {t}")),
262 None => Err(format!("Expected {expected}, got end of expression")),
263 }
264 }
265
266 pub fn at_end(&self) -> bool {
267 self.pos >= self.tokens.len()
268 }
269
270 pub fn position(&self) -> usize {
271 self.pos
272 }
273
274 pub fn pos(&self) -> usize {
276 self.pos
277 }
278
279 pub fn set_pos(&mut self, pos: usize) {
281 self.pos = pos;
282 }
283}
284
285#[cfg(test)]
286mod tests {
287 use super::*;
288
289 #[test]
290 fn test_tokenize_simple_condition() {
291 let tokens = tokenize("#status = :val").unwrap();
292 assert_eq!(
293 tokens,
294 vec![
295 Token::NameRef("#status".into()),
296 Token::Eq,
297 Token::ValueRef(":val".into()),
298 ]
299 );
300 }
301
302 #[test]
303 fn test_tokenize_comparison_operators() {
304 let tokens = tokenize("a < b").unwrap();
305 assert!(matches!(tokens[1], Token::Lt));
306
307 let tokens = tokenize("a <= b").unwrap();
308 assert!(matches!(tokens[1], Token::Le));
309
310 let tokens = tokenize("a > b").unwrap();
311 assert!(matches!(tokens[1], Token::Gt));
312
313 let tokens = tokenize("a >= b").unwrap();
314 assert!(matches!(tokens[1], Token::Ge));
315
316 let tokens = tokenize("a <> b").unwrap();
317 assert!(matches!(tokens[1], Token::Ne));
318 }
319
320 #[test]
321 fn test_tokenize_keywords() {
322 let tokens = tokenize("a AND b OR NOT c BETWEEN d IN e").unwrap();
323 assert!(matches!(tokens[1], Token::And));
324 assert!(matches!(tokens[3], Token::Or));
325 assert!(matches!(tokens[4], Token::Not));
326 assert!(matches!(tokens[6], Token::Between));
327 assert!(matches!(tokens[8], Token::In));
328 }
329
330 #[test]
331 fn test_tokenize_update_keywords() {
332 let tokens = tokenize("SET a = :v REMOVE b ADD c :d DELETE e :f").unwrap();
333 assert!(matches!(tokens[0], Token::Set));
334 assert!(matches!(tokens[4], Token::Remove));
335 assert!(matches!(tokens[6], Token::Add));
336 assert!(matches!(tokens[9], Token::Delete));
337 }
338
339 #[test]
340 fn test_tokenize_path_expression() {
341 let tokens = tokenize("a.b[0].c").unwrap();
342 assert_eq!(
343 tokens,
344 vec![
345 Token::Identifier("a".into()),
346 Token::Dot,
347 Token::Identifier("b".into()),
348 Token::LBracket,
349 Token::Number("0".into()),
350 Token::RBracket,
351 Token::Dot,
352 Token::Identifier("c".into()),
353 ]
354 );
355 }
356
357 #[test]
358 fn test_tokenize_function_call() {
359 let tokens = tokenize("attribute_exists(#name)").unwrap();
360 assert_eq!(
361 tokens,
362 vec![
363 Token::Identifier("attribute_exists".into()),
364 Token::LParen,
365 Token::NameRef("#name".into()),
366 Token::RParen,
367 ]
368 );
369 }
370
371 #[test]
372 fn test_tokenize_arithmetic() {
373 let tokens = tokenize("Price + :inc").unwrap();
374 assert!(matches!(tokens[1], Token::Plus));
375
376 let tokens = tokenize("Price - :dec").unwrap();
377 assert!(matches!(tokens[1], Token::Minus));
378 }
379
380 #[test]
381 fn test_tokenize_case_insensitive_keywords() {
382 let tokens = tokenize("set AND or").unwrap();
383 assert!(matches!(tokens[0], Token::Set));
384 assert!(matches!(tokens[1], Token::And));
385 assert!(matches!(tokens[2], Token::Or));
386 }
387}