kotoba_execution/execution/
gql_parser.rs1use kotoba_core::{types::*, ir::*};
4use kotoba_core::types::Result;
5use std::collections::HashMap;
6
7#[derive(Debug)]
9pub struct GqlParser {
10 position: usize,
12 tokens: Vec<GqlToken>,
14}
15
16#[derive(Debug, Clone, PartialEq)]
18enum GqlToken {
19 Match,
21 Where,
22 Return,
23 Order,
24 By,
25 Limit,
26 Asc,
27 Desc,
28 Distinct,
29 Count,
30 Sum,
31 Avg,
32 Min,
33 Max,
34 As,
35
36 LParen, RParen, LBracket, RBracket, LBrace, RBrace, Colon, Comma, Dot, Arrow, Dash, Eq, Ne, Lt, Le, Gt, Ge, Plus, Minus, Star, Slash, Identifier(String),
61 String(String),
62 Number(f64),
63
64 Eof,
66}
67
68impl Default for GqlParser {
69 fn default() -> Self {
70 Self::new()
71 }
72}
73
74impl GqlParser {
75 pub fn new() -> Self {
76 Self {
77 position: 0,
78 tokens: Vec::new(),
79 }
80 }
81
82 pub fn parse(&mut self, gql: &str) -> Result<PlanIR> {
84 self.tokenize(gql)?;
85 self.position = 0;
86
87 self.parse_query()
88 }
89
90 fn tokenize(&mut self, input: &str) -> Result<()> {
92 self.tokens.clear();
93 let chars: Vec<char> = input.chars().collect();
94 let mut i = 0;
95
96 while i < chars.len() {
97 match chars[i] {
98 ' ' | '\t' | '\n' | '\r' => {
100 i += 1;
101 continue;
102 }
103
104 '(' => self.tokens.push(GqlToken::LParen),
106 ')' => self.tokens.push(GqlToken::RParen),
107 '[' => self.tokens.push(GqlToken::LBracket),
108 ']' => self.tokens.push(GqlToken::RBracket),
109 '{' => self.tokens.push(GqlToken::LBrace),
110 '}' => self.tokens.push(GqlToken::RBrace),
111 ':' => self.tokens.push(GqlToken::Colon),
112 ',' => self.tokens.push(GqlToken::Comma),
113 '.' => self.tokens.push(GqlToken::Dot),
114 '+' => self.tokens.push(GqlToken::Plus),
115 '-' => {
116 if i + 1 < chars.len() && chars[i + 1] == '>' {
117 self.tokens.push(GqlToken::Arrow);
118 i += 1;
119 } else {
120 self.tokens.push(GqlToken::Minus);
121 }
122 }
123 '*' => self.tokens.push(GqlToken::Star),
124 '/' => self.tokens.push(GqlToken::Slash),
125
126 '=' => self.tokens.push(GqlToken::Eq),
128 '<' => {
129 if i + 1 < chars.len() && chars[i + 1] == '=' {
130 self.tokens.push(GqlToken::Le);
131 i += 1;
132 } else if i + 1 < chars.len() && chars[i + 1] == '>' {
133 self.tokens.push(GqlToken::Ne);
134 i += 1;
135 } else {
136 self.tokens.push(GqlToken::Lt);
137 }
138 }
139 '>' => {
140 if i + 1 < chars.len() && chars[i + 1] == '=' {
141 self.tokens.push(GqlToken::Ge);
142 i += 1;
143 } else {
144 self.tokens.push(GqlToken::Gt);
145 }
146 }
147
148 '"' | '\'' => {
150 let quote = chars[i];
151 i += 1;
152 let start = i;
153 while i < chars.len() && chars[i] != quote {
154 if chars[i] == '\\' {
155 i += 2; } else {
157 i += 1;
158 }
159 }
160 if i >= chars.len() {
161 return Err(KotobaError::Parse("Unterminated string literal".to_string()));
162 }
163 let value = chars[start..i].iter().collect();
164 self.tokens.push(GqlToken::String(value));
165 }
166
167 '0'..='9' => {
169 let start = i;
170 while i < chars.len() && (chars[i].is_ascii_digit() || chars[i] == '.') {
171 i += 1;
172 }
173 let num_str: String = chars[start..i].iter().collect();
174 match num_str.parse::<f64>() {
175 Ok(n) => self.tokens.push(GqlToken::Number(n)),
176 Err(_) => return Err(KotobaError::Parse(format!("Invalid number: {}", num_str))),
177 }
178 i -= 1; }
180
181 'a'..='z' | 'A'..='Z' | '_' => {
183 let start = i;
184 while i < chars.len() && (chars[i].is_alphanumeric() || chars[i] == '_') {
185 i += 1;
186 }
187 let ident: String = chars[start..i].iter().collect();
188 i -= 1; let token = match ident.to_uppercase().as_str() {
191 "MATCH" => GqlToken::Match,
192 "WHERE" => GqlToken::Where,
193 "RETURN" => GqlToken::Return,
194 "ORDER" => GqlToken::Order,
195 "BY" => GqlToken::By,
196 "LIMIT" => GqlToken::Limit,
197 "ASC" => GqlToken::Asc,
198 "DESC" => GqlToken::Desc,
199 "DISTINCT" => GqlToken::Distinct,
200 "COUNT" => GqlToken::Count,
201 "SUM" => GqlToken::Sum,
202 "AVG" => GqlToken::Avg,
203 "MIN" => GqlToken::Min,
204 "MAX" => GqlToken::Max,
205 "AS" => GqlToken::As,
206 _ => GqlToken::Identifier(ident),
207 };
208 self.tokens.push(token);
209 }
210
211 _ => return Err(KotobaError::Parse(format!("Unexpected character: {}", chars[i]))),
212 }
213 i += 1;
214 }
215
216 self.tokens.push(GqlToken::Eof);
217 Ok(())
218 }
219
220 fn parse_query(&mut self) -> Result<PlanIR> {
222 self.consume_token(GqlToken::Match)?;
224 let mut plan = self.parse_match_clause()?;
225
226 if self.check_token(&GqlToken::Where) {
228 self.advance();
229 let predicate = self.parse_where_clause()?;
230 plan = LogicalOp::Filter {
231 pred: predicate,
232 input: Box::new(plan),
233 };
234 }
235
236 self.consume_token(GqlToken::Return)?;
238 let (cols, aggregations) = self.parse_return_clause()?;
239
240 if !aggregations.is_empty() {
242 let group_keys: Vec<String> = cols.iter()
244 .filter(|col| !aggregations.iter().any(|agg| agg.as_ == **col))
245 .cloned()
246 .collect();
247
248 plan = LogicalOp::Group {
249 keys: group_keys,
250 aggregations,
251 input: Box::new(plan),
252 };
253 }
254
255 let mut sort_keys = Vec::new();
257 if self.check_token(&GqlToken::Order) {
258 self.advance();
259 self.consume_token(GqlToken::By)?;
260 sort_keys = self.parse_order_by_clause()?;
261 plan = LogicalOp::Sort {
262 keys: sort_keys,
263 input: Box::new(plan),
264 };
265 }
266
267 let mut limit = None;
269 if self.check_token(&GqlToken::Limit) {
270 self.advance();
271 limit = Some(self.parse_limit_clause()?);
272 plan = LogicalOp::Limit {
273 count: limit.unwrap(),
274 input: Box::new(plan),
275 };
276 }
277
278 if self.check_token(&GqlToken::Distinct) {
280 self.advance();
281 plan = LogicalOp::Distinct {
282 input: Box::new(plan),
283 };
284 }
285
286 plan = LogicalOp::Project {
287 cols,
288 input: Box::new(plan),
289 };
290
291 Ok(PlanIR {
292 plan,
293 limit,
294 })
295 }
296
297 fn parse_match_clause(&mut self) -> Result<LogicalOp> {
299 let pattern = self.parse_pattern()?;
300
301 let mut patterns = vec![pattern];
303 while self.check_token(&GqlToken::Comma) {
304 self.advance();
305 patterns.push(self.parse_pattern()?);
306 }
307
308 if patterns.len() == 1 {
310 Ok(patterns.into_iter().next().unwrap())
311 } else {
312 Ok(LogicalOp::Join {
314 left: Box::new(patterns[0].clone()),
315 right: Box::new(patterns[1].clone()),
316 on: Vec::new(), })
318 }
319 }
320
321 fn parse_pattern(&mut self) -> Result<LogicalOp> {
323 self.consume_token(GqlToken::LParen)?;
324 let node_pattern = self.parse_node_pattern()?;
325 self.consume_token(GqlToken::RParen)?;
326
327 if self.check_token(&GqlToken::Dash) {
329 self.advance();
330
331 let edge_label = if self.check_token(&GqlToken::LBracket) {
333 self.advance();
334 let label = self.parse_edge_pattern()?;
335 self.consume_token(GqlToken::RBracket)?;
336 Some(label)
337 } else {
338 None
339 };
340
341 let direction = if self.check_token(&GqlToken::Arrow) {
343 self.advance();
344 Direction::Out
345 } else if self.check_token(&GqlToken::Dash) {
346 self.advance();
347 if self.check_token(&GqlToken::Gt) {
348 self.advance();
349 Direction::In
350 } else {
351 Direction::Both
352 }
353 } else {
354 return Err(KotobaError::Parse("Expected edge direction".to_string()));
355 };
356
357 self.consume_token(GqlToken::LParen)?;
359 let target_alias = self.parse_node_alias()?;
360 self.consume_token(GqlToken::RParen)?;
361
362 let edge_pattern = EdgePattern {
364 label: edge_label.unwrap_or_else(|| "EDGE".to_string()),
365 dir: direction,
366 props: None,
367 };
368
369 Ok(LogicalOp::Expand {
370 edge: edge_pattern,
371 to_as: target_alias,
372 from: Box::new(node_pattern),
373 })
374 } else {
375 Ok(node_pattern)
376 }
377 }
378
379 fn parse_node_pattern(&mut self) -> Result<LogicalOp> {
381 let alias = self.parse_node_alias()?;
382
383 let label = if self.check_token(&GqlToken::Colon) {
385 self.advance();
386 self.parse_identifier()?
387 } else {
388 "Node".to_string()
389 };
390
391 let props = if self.check_token(&GqlToken::LBrace) {
393 Some(self.parse_properties()?)
394 } else {
395 None
396 };
397
398 Ok(LogicalOp::NodeScan {
399 label,
400 as_: alias,
401 props,
402 })
403 }
404
405 fn parse_node_alias(&mut self) -> Result<String> {
407 if let Some(token) = self.peek_token().cloned() {
408 match token {
409 GqlToken::Identifier(alias) => {
410 self.advance();
411 Ok(alias)
412 }
413 _ => Ok(format!("node_{}", self.position)),
414 }
415 } else {
416 Ok(format!("node_{}", self.position))
417 }
418 }
419
420 fn parse_edge_pattern(&mut self) -> Result<String> {
422 if self.check_token(&GqlToken::Colon) {
423 self.advance();
424 self.parse_identifier()
425 } else {
426 Ok("EDGE".to_string())
427 }
428 }
429
430 fn parse_properties(&mut self) -> Result<Properties> {
432 self.consume_token(GqlToken::LBrace)?;
433 let mut props = HashMap::new();
434
435 while !self.check_token(&GqlToken::RBrace) && !self.check_token(&GqlToken::Eof) {
436 let key = self.parse_identifier()?;
437 self.consume_token(GqlToken::Colon)?;
438 let value = self.parse_value()?;
439 props.insert(key, value);
440
441 if !self.check_token(&GqlToken::RBrace) {
442 self.consume_token(GqlToken::Comma)?;
443 }
444 }
445
446 self.consume_token(GqlToken::RBrace)?;
447 Ok(props)
448 }
449
450 fn parse_where_clause(&mut self) -> Result<Predicate> {
452 self.parse_expression()
453 }
454
455 fn parse_return_clause(&mut self) -> Result<(Vec<String>, Vec<Aggregation>)> {
457 let mut cols = Vec::new();
458 let mut aggregations = Vec::new();
459
460 loop {
461 if let Some(agg) = self.try_parse_aggregation()? {
462 aggregations.push(agg);
463 } else {
464 let expr = self.parse_expression()?;
465 cols.push(format!("{:?}", expr));
466 }
467
468 if !self.check_token(&GqlToken::Comma) {
469 break;
470 }
471 self.advance();
472 }
473
474 Ok((cols, aggregations))
475 }
476
477 fn try_parse_aggregation(&mut self) -> Result<Option<Aggregation>> {
479 let start_pos = self.position;
480
481 if let Some(func_token) = self.peek_token().cloned() {
482 let fn_name = match func_token {
483 GqlToken::Count => "count",
484 GqlToken::Sum => "sum",
485 GqlToken::Avg => "avg",
486 GqlToken::Min => "min",
487 GqlToken::Max => "max",
488 _ => return Ok(None),
489 };
490
491 self.advance();
492 self.consume_token(GqlToken::LParen)?;
493 let arg = self.parse_expression()?;
494 self.consume_token(GqlToken::RParen)?;
495
496 let as_name = if self.check_token(&GqlToken::As) {
497 self.advance();
498 self.parse_identifier()?
499 } else {
500 format!("{}_{:?}", fn_name, arg)
501 };
502
503 Ok(Some(Aggregation {
504 fn_: fn_name.to_string(),
505 args: vec![format!("{:?}", arg)],
506 as_: as_name,
507 }))
508 } else {
509 self.position = start_pos;
511 Ok(None)
512 }
513 }
514
515 fn parse_order_by_clause(&mut self) -> Result<Vec<SortKey>> {
517 let mut keys = Vec::new();
518
519 loop {
520 let expr = self.parse_term()?; let asc = if self.check_token(&GqlToken::Asc) {
522 self.advance();
523 true
524 } else if self.check_token(&GqlToken::Desc) {
525 self.advance();
526 false
527 } else {
528 true };
530
531 keys.push(SortKey { expr, asc });
532
533 if !self.check_token(&GqlToken::Comma) {
534 break;
535 }
536 self.advance();
537 }
538
539 Ok(keys)
540 }
541
542 fn parse_limit_clause(&mut self) -> Result<usize> {
544 if let Some(GqlToken::Number(n)) = self.peek_token() {
545 let count = *n as usize;
546 self.advance();
547 Ok(count)
548 } else {
549 Err(KotobaError::Parse("Expected number after LIMIT".to_string()))
550 }
551 }
552
553 fn parse_expression(&mut self) -> Result<Predicate> {
555 let left = self.parse_term()?;
557
558 if self.check_token(&GqlToken::Eq) {
559 self.advance();
560 let right = self.parse_term()?;
561 Ok(Predicate::Eq { eq: [left, right] })
562 } else {
563 Ok(Predicate::Eq { eq: [left, Expr::Const(Value::Bool(true))] })
565 }
566 }
567
568 fn parse_term(&mut self) -> Result<Expr> {
570 match self.peek_token().cloned() {
571 Some(GqlToken::Identifier(id)) => {
572 self.advance();
573 Ok(Expr::Var(id))
574 }
575 Some(GqlToken::String(s)) => {
576 self.advance();
577 Ok(Expr::Const(Value::String(s)))
578 }
579 Some(GqlToken::Number(n)) => {
580 self.advance();
581 Ok(Expr::Const(Value::Int(n as i64)))
582 }
583 _ => Err(KotobaError::Parse("Expected term".to_string())),
584 }
585 }
586
587 fn parse_value(&mut self) -> Result<Value> {
589 match self.peek_token().cloned() {
590 Some(GqlToken::String(s)) => {
591 self.advance();
592 Ok(Value::String(s))
593 }
594 Some(GqlToken::Number(n)) => {
595 self.advance();
596 Ok(Value::Int(n as i64))
597 }
598 _ => Err(KotobaError::Parse("Expected value".to_string())),
599 }
600 }
601
602 fn parse_identifier(&mut self) -> Result<String> {
604 if let Some(GqlToken::Identifier(id)) = self.peek_token().cloned() {
605 self.advance();
606 Ok(id)
607 } else {
608 Err(KotobaError::Parse("Expected identifier".to_string()))
609 }
610 }
611
612 fn check_token(&self, token: &GqlToken) -> bool {
614 matches!(self.peek_token(), Some(t) if std::mem::discriminant(t) == std::mem::discriminant(token))
615 }
616
617 fn consume_token(&mut self, expected: GqlToken) -> Result<()> {
619 if self.check_token(&expected) {
620 self.advance();
621 Ok(())
622 } else {
623 Err(KotobaError::Parse(format!("Expected {:?}, found {:?}", expected, self.peek_token())))
624 }
625 }
626
627 fn peek_token(&self) -> Option<&GqlToken> {
629 self.tokens.get(self.position)
630 }
631
632 fn advance(&mut self) {
634 if self.position < self.tokens.len() {
635 self.position += 1;
636 }
637 }
638}