graphos_adapters/query/gql/
lexer.rs1use graphos_common::utils::error::SourceSpan;
4
5#[derive(Debug, Clone, PartialEq)]
7pub struct Token {
8 pub kind: TokenKind,
10 pub text: String,
12 pub span: SourceSpan,
14}
15
16#[derive(Debug, Clone, Copy, PartialEq, Eq)]
18pub enum TokenKind {
19 Match,
22 Return,
24 Where,
26 And,
28 Or,
30 Not,
32 Insert,
34 Delete,
36 Set,
38 Create,
40 Node,
42 Edge,
44 Type,
46 As,
48 Distinct,
50 Order,
52 By,
54 Asc,
56 Desc,
58 Skip,
60 Limit,
62 Null,
64 True,
66 False,
68 Detach,
70 Call,
72 Yield,
74 In,
76 Like,
78 Is,
80 Case,
82 When,
84 Then,
86 Else,
88 End,
90 Optional,
92 With,
94 Exists,
96
97 Integer,
100 Float,
102 String,
104
105 Identifier,
108
109 Eq,
112 Ne,
114 Lt,
116 Le,
118 Gt,
120 Ge,
122 Plus,
124 Minus,
126 Star,
128 Slash,
130 Percent,
132 Concat,
134
135 LParen,
138 RParen,
140 LBracket,
142 RBracket,
144 LBrace,
146 RBrace,
148 Colon,
150 Comma,
152 Dot,
154 Arrow,
156 LeftArrow,
158 DoubleDash,
160
161 Parameter,
163
164 Eof,
166
167 Error,
169}
170
171pub struct Lexer<'a> {
173 input: &'a str,
174 position: usize,
175 line: u32,
176 column: u32,
177}
178
179impl<'a> Lexer<'a> {
180 pub fn new(input: &'a str) -> Self {
182 Self {
183 input,
184 position: 0,
185 line: 1,
186 column: 1,
187 }
188 }
189
190 pub fn next_token(&mut self) -> Token {
192 self.skip_whitespace();
193
194 let start = self.position;
195 let start_line = self.line;
196 let start_column = self.column;
197
198 if self.position >= self.input.len() {
199 return Token {
200 kind: TokenKind::Eof,
201 text: String::new(),
202 span: SourceSpan::new(start, start, start_line, start_column),
203 };
204 }
205
206 let ch = self.current_char();
207
208 let kind = match ch {
209 '(' => {
210 self.advance();
211 TokenKind::LParen
212 }
213 ')' => {
214 self.advance();
215 TokenKind::RParen
216 }
217 '[' => {
218 self.advance();
219 TokenKind::LBracket
220 }
221 ']' => {
222 self.advance();
223 TokenKind::RBracket
224 }
225 '{' => {
226 self.advance();
227 TokenKind::LBrace
228 }
229 '}' => {
230 self.advance();
231 TokenKind::RBrace
232 }
233 ':' => {
234 self.advance();
235 TokenKind::Colon
236 }
237 ',' => {
238 self.advance();
239 TokenKind::Comma
240 }
241 '.' => {
242 self.advance();
243 TokenKind::Dot
244 }
245 '+' => {
246 self.advance();
247 TokenKind::Plus
248 }
249 '*' => {
250 self.advance();
251 TokenKind::Star
252 }
253 '/' => {
254 self.advance();
255 TokenKind::Slash
256 }
257 '%' => {
258 self.advance();
259 TokenKind::Percent
260 }
261 '=' => {
262 self.advance();
263 TokenKind::Eq
264 }
265 '<' => {
266 self.advance();
267 if self.current_char() == '>' {
268 self.advance();
269 TokenKind::Ne
270 } else if self.current_char() == '=' {
271 self.advance();
272 TokenKind::Le
273 } else if self.current_char() == '-' {
274 self.advance();
275 TokenKind::LeftArrow
276 } else {
277 TokenKind::Lt
278 }
279 }
280 '>' => {
281 self.advance();
282 if self.current_char() == '=' {
283 self.advance();
284 TokenKind::Ge
285 } else {
286 TokenKind::Gt
287 }
288 }
289 '-' => {
290 self.advance();
291 if self.current_char() == '>' {
292 self.advance();
293 TokenKind::Arrow
294 } else if self.current_char() == '-' {
295 self.advance();
296 TokenKind::DoubleDash
297 } else {
298 TokenKind::Minus
299 }
300 }
301 '|' => {
302 self.advance();
303 if self.current_char() == '|' {
304 self.advance();
305 TokenKind::Concat
306 } else {
307 TokenKind::Error
308 }
309 }
310 '\'' | '"' => self.scan_string(),
311 '$' => self.scan_parameter(),
312 _ if ch.is_ascii_digit() => self.scan_number(),
313 _ if ch.is_ascii_alphabetic() || ch == '_' => self.scan_identifier(),
314 _ => {
315 self.advance();
316 TokenKind::Error
317 }
318 };
319
320 let text = self.input[start..self.position].to_string();
321 Token {
322 kind,
323 text,
324 span: SourceSpan::new(start, self.position, start_line, start_column),
325 }
326 }
327
328 fn skip_whitespace(&mut self) {
329 while self.position < self.input.len() {
330 let ch = self.current_char();
331 if ch.is_whitespace() {
332 if ch == '\n' {
333 self.line += 1;
334 self.column = 1;
335 } else {
336 self.column += 1;
337 }
338 self.position += 1;
339 } else {
340 break;
341 }
342 }
343 }
344
345 fn current_char(&self) -> char {
346 self.input[self.position..].chars().next().unwrap_or('\0')
347 }
348
349 fn advance(&mut self) {
350 if self.position < self.input.len() {
351 self.position += 1;
352 self.column += 1;
353 }
354 }
355
356 fn scan_string(&mut self) -> TokenKind {
357 let quote = self.current_char();
358 self.advance();
359
360 while self.position < self.input.len() {
361 let ch = self.current_char();
362 if ch == quote {
363 self.advance();
364 return TokenKind::String;
365 }
366 if ch == '\\' {
367 self.advance();
368 }
369 self.advance();
370 }
371
372 TokenKind::Error }
374
375 fn scan_number(&mut self) -> TokenKind {
376 while self.position < self.input.len() && self.current_char().is_ascii_digit() {
377 self.advance();
378 }
379
380 if self.current_char() == '.' {
381 self.advance();
382 while self.position < self.input.len() && self.current_char().is_ascii_digit() {
383 self.advance();
384 }
385 TokenKind::Float
386 } else {
387 TokenKind::Integer
388 }
389 }
390
391 fn scan_parameter(&mut self) -> TokenKind {
392 self.advance();
394
395 if self.position >= self.input.len() {
397 return TokenKind::Error;
398 }
399
400 let ch = self.current_char();
401 if !ch.is_ascii_alphabetic() && ch != '_' {
402 return TokenKind::Error;
403 }
404
405 while self.position < self.input.len() {
407 let ch = self.current_char();
408 if ch.is_ascii_alphanumeric() || ch == '_' {
409 self.advance();
410 } else {
411 break;
412 }
413 }
414
415 TokenKind::Parameter
416 }
417
418 fn scan_identifier(&mut self) -> TokenKind {
419 let start = self.position;
420 while self.position < self.input.len() {
421 let ch = self.current_char();
422 if ch.is_ascii_alphanumeric() || ch == '_' {
423 self.advance();
424 } else {
425 break;
426 }
427 }
428
429 let text = &self.input[start..self.position];
430 match text.to_uppercase().as_str() {
431 "MATCH" => TokenKind::Match,
432 "RETURN" => TokenKind::Return,
433 "WHERE" => TokenKind::Where,
434 "AND" => TokenKind::And,
435 "OR" => TokenKind::Or,
436 "NOT" => TokenKind::Not,
437 "INSERT" => TokenKind::Insert,
438 "DELETE" => TokenKind::Delete,
439 "SET" => TokenKind::Set,
440 "CREATE" => TokenKind::Create,
441 "NODE" => TokenKind::Node,
442 "EDGE" => TokenKind::Edge,
443 "TYPE" => TokenKind::Type,
444 "AS" => TokenKind::As,
445 "DISTINCT" => TokenKind::Distinct,
446 "ORDER" => TokenKind::Order,
447 "BY" => TokenKind::By,
448 "ASC" => TokenKind::Asc,
449 "DESC" => TokenKind::Desc,
450 "SKIP" => TokenKind::Skip,
451 "LIMIT" => TokenKind::Limit,
452 "NULL" => TokenKind::Null,
453 "TRUE" => TokenKind::True,
454 "FALSE" => TokenKind::False,
455 "DETACH" => TokenKind::Detach,
456 "CALL" => TokenKind::Call,
457 "YIELD" => TokenKind::Yield,
458 "IN" => TokenKind::In,
459 "LIKE" => TokenKind::Like,
460 "IS" => TokenKind::Is,
461 "CASE" => TokenKind::Case,
462 "WHEN" => TokenKind::When,
463 "THEN" => TokenKind::Then,
464 "ELSE" => TokenKind::Else,
465 "END" => TokenKind::End,
466 "EXISTS" => TokenKind::Exists,
467 "OPTIONAL" => TokenKind::Optional,
468 "WITH" => TokenKind::With,
469 _ => TokenKind::Identifier,
470 }
471 }
472}
473
474#[cfg(test)]
475mod tests {
476 use super::*;
477
478 #[test]
479 fn test_simple_tokens() {
480 let mut lexer = Lexer::new("MATCH (n) RETURN n");
481
482 assert_eq!(lexer.next_token().kind, TokenKind::Match);
483 assert_eq!(lexer.next_token().kind, TokenKind::LParen);
484 assert_eq!(lexer.next_token().kind, TokenKind::Identifier);
485 assert_eq!(lexer.next_token().kind, TokenKind::RParen);
486 assert_eq!(lexer.next_token().kind, TokenKind::Return);
487 assert_eq!(lexer.next_token().kind, TokenKind::Identifier);
488 assert_eq!(lexer.next_token().kind, TokenKind::Eof);
489 }
490
491 #[test]
492 fn test_arrow_tokens() {
493 let mut lexer = Lexer::new("-> <- --");
494
495 assert_eq!(lexer.next_token().kind, TokenKind::Arrow);
496 assert_eq!(lexer.next_token().kind, TokenKind::LeftArrow);
497 assert_eq!(lexer.next_token().kind, TokenKind::DoubleDash);
498 }
499
500 #[test]
501 fn test_number_tokens() {
502 let mut lexer = Lexer::new("42 3.14");
503
504 let int_token = lexer.next_token();
505 assert_eq!(int_token.kind, TokenKind::Integer);
506 assert_eq!(int_token.text, "42");
507
508 let float_token = lexer.next_token();
509 assert_eq!(float_token.kind, TokenKind::Float);
510 assert_eq!(float_token.text, "3.14");
511 }
512
513 #[test]
514 fn test_string_tokens() {
515 let mut lexer = Lexer::new("'hello' \"world\"");
516
517 let s1 = lexer.next_token();
518 assert_eq!(s1.kind, TokenKind::String);
519 assert_eq!(s1.text, "'hello'");
520
521 let s2 = lexer.next_token();
522 assert_eq!(s2.kind, TokenKind::String);
523 assert_eq!(s2.text, "\"world\"");
524 }
525
526 #[test]
527 fn test_parameter_tokens() {
528 let mut lexer = Lexer::new("$param1 $another_param");
529
530 let p1 = lexer.next_token();
531 assert_eq!(p1.kind, TokenKind::Parameter);
532 assert_eq!(p1.text, "$param1");
533
534 let p2 = lexer.next_token();
535 assert_eq!(p2.kind, TokenKind::Parameter);
536 assert_eq!(p2.text, "$another_param");
537 }
538
539 #[test]
540 fn test_parameter_in_query() {
541 let mut lexer = Lexer::new("n.age > $min_age");
542
543 assert_eq!(lexer.next_token().kind, TokenKind::Identifier); assert_eq!(lexer.next_token().kind, TokenKind::Dot);
545 assert_eq!(lexer.next_token().kind, TokenKind::Identifier); assert_eq!(lexer.next_token().kind, TokenKind::Gt);
547
548 let param = lexer.next_token();
549 assert_eq!(param.kind, TokenKind::Parameter);
550 assert_eq!(param.text, "$min_age");
551 }
552}