graphos_adapters/query/gql/
lexer.rs1use graphos_common::utils::error::SourceSpan;
4
5#[derive(Debug, Clone, PartialEq)]
7pub struct Token {
8 pub kind: TokenKind,
10 pub text: String,
12 pub span: SourceSpan,
14}
15
16#[derive(Debug, Clone, Copy, PartialEq, Eq)]
18pub enum TokenKind {
19 Match,
22 Return,
24 Where,
26 And,
28 Or,
30 Not,
32 Insert,
34 Delete,
36 Set,
38 Create,
40 Node,
42 Edge,
44 Type,
46 As,
48 Distinct,
50 Order,
52 By,
54 Asc,
56 Desc,
58 Skip,
60 Limit,
62 Null,
64 True,
66 False,
68 Detach,
70 Call,
72 Yield,
74 In,
76 Like,
78 Is,
80 Case,
82 When,
84 Then,
86 Else,
88 End,
90
91 Integer,
94 Float,
96 String,
98
99 Identifier,
102
103 Eq,
106 Ne,
108 Lt,
110 Le,
112 Gt,
114 Ge,
116 Plus,
118 Minus,
120 Star,
122 Slash,
124 Percent,
126 Concat,
128
129 LParen,
132 RParen,
134 LBracket,
136 RBracket,
138 LBrace,
140 RBrace,
142 Colon,
144 Comma,
146 Dot,
148 Arrow,
150 LeftArrow,
152 DoubleDash,
154
155 Eof,
157
158 Error,
160}
161
162pub struct Lexer<'a> {
164 input: &'a str,
165 position: usize,
166 line: u32,
167 column: u32,
168}
169
170impl<'a> Lexer<'a> {
171 pub fn new(input: &'a str) -> Self {
173 Self {
174 input,
175 position: 0,
176 line: 1,
177 column: 1,
178 }
179 }
180
181 pub fn next_token(&mut self) -> Token {
183 self.skip_whitespace();
184
185 let start = self.position;
186 let start_line = self.line;
187 let start_column = self.column;
188
189 if self.position >= self.input.len() {
190 return Token {
191 kind: TokenKind::Eof,
192 text: String::new(),
193 span: SourceSpan::new(start, start, start_line, start_column),
194 };
195 }
196
197 let ch = self.current_char();
198
199 let kind = match ch {
200 '(' => {
201 self.advance();
202 TokenKind::LParen
203 }
204 ')' => {
205 self.advance();
206 TokenKind::RParen
207 }
208 '[' => {
209 self.advance();
210 TokenKind::LBracket
211 }
212 ']' => {
213 self.advance();
214 TokenKind::RBracket
215 }
216 '{' => {
217 self.advance();
218 TokenKind::LBrace
219 }
220 '}' => {
221 self.advance();
222 TokenKind::RBrace
223 }
224 ':' => {
225 self.advance();
226 TokenKind::Colon
227 }
228 ',' => {
229 self.advance();
230 TokenKind::Comma
231 }
232 '.' => {
233 self.advance();
234 TokenKind::Dot
235 }
236 '+' => {
237 self.advance();
238 TokenKind::Plus
239 }
240 '*' => {
241 self.advance();
242 TokenKind::Star
243 }
244 '/' => {
245 self.advance();
246 TokenKind::Slash
247 }
248 '%' => {
249 self.advance();
250 TokenKind::Percent
251 }
252 '=' => {
253 self.advance();
254 TokenKind::Eq
255 }
256 '<' => {
257 self.advance();
258 if self.current_char() == '>' {
259 self.advance();
260 TokenKind::Ne
261 } else if self.current_char() == '=' {
262 self.advance();
263 TokenKind::Le
264 } else if self.current_char() == '-' {
265 self.advance();
266 TokenKind::LeftArrow
267 } else {
268 TokenKind::Lt
269 }
270 }
271 '>' => {
272 self.advance();
273 if self.current_char() == '=' {
274 self.advance();
275 TokenKind::Ge
276 } else {
277 TokenKind::Gt
278 }
279 }
280 '-' => {
281 self.advance();
282 if self.current_char() == '>' {
283 self.advance();
284 TokenKind::Arrow
285 } else if self.current_char() == '-' {
286 self.advance();
287 TokenKind::DoubleDash
288 } else {
289 TokenKind::Minus
290 }
291 }
292 '|' => {
293 self.advance();
294 if self.current_char() == '|' {
295 self.advance();
296 TokenKind::Concat
297 } else {
298 TokenKind::Error
299 }
300 }
301 '\'' | '"' => self.scan_string(),
302 _ if ch.is_ascii_digit() => self.scan_number(),
303 _ if ch.is_ascii_alphabetic() || ch == '_' => self.scan_identifier(),
304 _ => {
305 self.advance();
306 TokenKind::Error
307 }
308 };
309
310 let text = self.input[start..self.position].to_string();
311 Token {
312 kind,
313 text,
314 span: SourceSpan::new(start, self.position, start_line, start_column),
315 }
316 }
317
318 fn skip_whitespace(&mut self) {
319 while self.position < self.input.len() {
320 let ch = self.current_char();
321 if ch.is_whitespace() {
322 if ch == '\n' {
323 self.line += 1;
324 self.column = 1;
325 } else {
326 self.column += 1;
327 }
328 self.position += 1;
329 } else {
330 break;
331 }
332 }
333 }
334
335 fn current_char(&self) -> char {
336 self.input[self.position..].chars().next().unwrap_or('\0')
337 }
338
339 fn advance(&mut self) {
340 if self.position < self.input.len() {
341 self.position += 1;
342 self.column += 1;
343 }
344 }
345
346 fn scan_string(&mut self) -> TokenKind {
347 let quote = self.current_char();
348 self.advance();
349
350 while self.position < self.input.len() {
351 let ch = self.current_char();
352 if ch == quote {
353 self.advance();
354 return TokenKind::String;
355 }
356 if ch == '\\' {
357 self.advance();
358 }
359 self.advance();
360 }
361
362 TokenKind::Error }
364
365 fn scan_number(&mut self) -> TokenKind {
366 while self.position < self.input.len() && self.current_char().is_ascii_digit() {
367 self.advance();
368 }
369
370 if self.current_char() == '.' {
371 self.advance();
372 while self.position < self.input.len() && self.current_char().is_ascii_digit() {
373 self.advance();
374 }
375 TokenKind::Float
376 } else {
377 TokenKind::Integer
378 }
379 }
380
381 fn scan_identifier(&mut self) -> TokenKind {
382 let start = self.position;
383 while self.position < self.input.len() {
384 let ch = self.current_char();
385 if ch.is_ascii_alphanumeric() || ch == '_' {
386 self.advance();
387 } else {
388 break;
389 }
390 }
391
392 let text = &self.input[start..self.position];
393 match text.to_uppercase().as_str() {
394 "MATCH" => TokenKind::Match,
395 "RETURN" => TokenKind::Return,
396 "WHERE" => TokenKind::Where,
397 "AND" => TokenKind::And,
398 "OR" => TokenKind::Or,
399 "NOT" => TokenKind::Not,
400 "INSERT" => TokenKind::Insert,
401 "DELETE" => TokenKind::Delete,
402 "SET" => TokenKind::Set,
403 "CREATE" => TokenKind::Create,
404 "NODE" => TokenKind::Node,
405 "EDGE" => TokenKind::Edge,
406 "TYPE" => TokenKind::Type,
407 "AS" => TokenKind::As,
408 "DISTINCT" => TokenKind::Distinct,
409 "ORDER" => TokenKind::Order,
410 "BY" => TokenKind::By,
411 "ASC" => TokenKind::Asc,
412 "DESC" => TokenKind::Desc,
413 "SKIP" => TokenKind::Skip,
414 "LIMIT" => TokenKind::Limit,
415 "NULL" => TokenKind::Null,
416 "TRUE" => TokenKind::True,
417 "FALSE" => TokenKind::False,
418 "DETACH" => TokenKind::Detach,
419 "CALL" => TokenKind::Call,
420 "YIELD" => TokenKind::Yield,
421 "IN" => TokenKind::In,
422 "LIKE" => TokenKind::Like,
423 "IS" => TokenKind::Is,
424 "CASE" => TokenKind::Case,
425 "WHEN" => TokenKind::When,
426 "THEN" => TokenKind::Then,
427 "ELSE" => TokenKind::Else,
428 "END" => TokenKind::End,
429 _ => TokenKind::Identifier,
430 }
431 }
432}
433
434#[cfg(test)]
435mod tests {
436 use super::*;
437
438 #[test]
439 fn test_simple_tokens() {
440 let mut lexer = Lexer::new("MATCH (n) RETURN n");
441
442 assert_eq!(lexer.next_token().kind, TokenKind::Match);
443 assert_eq!(lexer.next_token().kind, TokenKind::LParen);
444 assert_eq!(lexer.next_token().kind, TokenKind::Identifier);
445 assert_eq!(lexer.next_token().kind, TokenKind::RParen);
446 assert_eq!(lexer.next_token().kind, TokenKind::Return);
447 assert_eq!(lexer.next_token().kind, TokenKind::Identifier);
448 assert_eq!(lexer.next_token().kind, TokenKind::Eof);
449 }
450
451 #[test]
452 fn test_arrow_tokens() {
453 let mut lexer = Lexer::new("-> <- --");
454
455 assert_eq!(lexer.next_token().kind, TokenKind::Arrow);
456 assert_eq!(lexer.next_token().kind, TokenKind::LeftArrow);
457 assert_eq!(lexer.next_token().kind, TokenKind::DoubleDash);
458 }
459
460 #[test]
461 fn test_number_tokens() {
462 let mut lexer = Lexer::new("42 3.14");
463
464 let int_token = lexer.next_token();
465 assert_eq!(int_token.kind, TokenKind::Integer);
466 assert_eq!(int_token.text, "42");
467
468 let float_token = lexer.next_token();
469 assert_eq!(float_token.kind, TokenKind::Float);
470 assert_eq!(float_token.text, "3.14");
471 }
472
473 #[test]
474 fn test_string_tokens() {
475 let mut lexer = Lexer::new("'hello' \"world\"");
476
477 let s1 = lexer.next_token();
478 assert_eq!(s1.kind, TokenKind::String);
479 assert_eq!(s1.text, "'hello'");
480
481 let s2 = lexer.next_token();
482 assert_eq!(s2.kind, TokenKind::String);
483 assert_eq!(s2.text, "\"world\"");
484 }
485}