1use std::collections::HashMap;
7use kotoba_core::prelude::*;
8
9type Result<T> = std::result::Result<T, Box<dyn std::error::Error + Send + Sync>>;
11
12#[derive(Debug)]
14pub struct GqlParser;
15
16#[derive(Debug, Clone)]
17struct ParsedMatch {
18 patterns: Vec<GraphPattern>,
19 where_clause: Option<Predicate>,
20}
21
22#[derive(Debug, Clone)]
23enum GraphPattern {
24 Node(NodePattern),
25 Path(PathPattern),
26}
27
28#[derive(Debug, Clone)]
29struct NodePattern {
30 variable: String,
31 labels: Vec<String>,
32 properties: Option<Properties>,
33}
34
35#[derive(Debug, Clone)]
36struct PathPattern {
37 start_node: NodePattern,
38 edges: Vec<EdgeHop>,
39 end_node: NodePattern,
40}
41
42#[derive(Debug, Clone)]
43struct EdgeHop {
44 variable: Option<String>,
45 labels: Vec<String>,
46 direction: Direction,
47 properties: Option<Properties>,
48 min_hops: Option<usize>,
49 max_hops: Option<usize>,
50}
51
52#[derive(Debug, Clone)]
53struct ReturnClause {
54 items: Vec<ReturnItem>,
55 distinct: bool,
56}
57
58#[derive(Debug, Clone)]
59struct ReturnItem {
60 expr: Expr,
61 alias: Option<String>,
62}
63
64#[derive(Debug, Clone)]
65struct AlgorithmCall {
66 algorithm: AlgorithmType,
67 parameters: Vec<Expr>,
68}
69
70#[derive(Debug, Clone)]
71enum AlgorithmType {
72 ShortestPath { algorithm: ShortestPathAlgorithm },
73 PatternMatching,
75}
76
77#[derive(Debug, Clone)]
78enum ShortestPathAlgorithm {
79 Dijkstra,
80 BellmanFord,
81 AStar,
82 FloydWarshall,
83}
84
85#[derive(Debug, Clone)]
86struct OrderByClause {
87 items: Vec<OrderByItem>,
88}
89
90#[derive(Debug, Clone)]
91struct OrderByItem {
92 expr: Expr,
93 ascending: bool,
94}
95
96impl GqlParser {
97 pub fn new() -> Self {
98 Self
99 }
100
101 pub fn parse(&self, gql: &str) -> Result<PlanIR> {
103 let gql_lower = gql.trim().to_lowercase();
104
105 if gql_lower.starts_with("match") {
106 self.parse_match_query(gql)
107 } else if gql_lower.starts_with("create") {
108 self.parse_create_query(gql)
109 } else {
110 Err(Box::new(KotobaError::Parse(format!("Unsupported GQL operation: {}", gql))))
111 }
112 }
113
114 fn parse_match_query(&self, gql: &str) -> Result<PlanIR> {
116 let mut logical_plan = None;
119 let mut return_clause = None;
120 let mut order_by = None;
121 let mut skip = None;
122 let mut limit = None;
123
124 let parts: Vec<&str> = gql.split_whitespace().collect();
125 let mut i = 0;
126
127 if parts.get(i).map(|s| s.to_lowercase()) == Some("match".to_string()) {
129 i += 1;
130 let match_part = self.extract_clause(gql, "match", &["where", "return", "order", "skip", "limit"])?;
131 let parsed_match = self.parse_match_clause(&match_part)?;
132
133 if let Some(node_pattern) = parsed_match.patterns.first() {
135 match node_pattern {
136 GraphPattern::Node(node) => {
137 logical_plan = Some(LogicalOp::NodeScan {
138 label: node.labels.first().cloned().unwrap_or_else(|| "Node".to_string()),
139 as_: node.variable.clone(),
140 props: node.properties.clone(),
141 });
142 }
143 GraphPattern::Path(path) => {
144 logical_plan = Some(self.build_path_plan(&path)?);
146 }
147 }
148 }
149
150 if let Some(where_pred) = parsed_match.where_clause {
152 if let Some(plan) = logical_plan {
153 logical_plan = Some(LogicalOp::Filter {
154 pred: where_pred,
155 input: Box::new(plan),
156 });
157 }
158 }
159 }
160
161 if let Some(return_start) = gql.to_lowercase().find("return") {
163 let return_part = self.extract_clause_from_pos(gql, return_start + 6, &["order", "skip", "limit"])?;
164 return_clause = Some(self.parse_return_clause(&return_part)?);
165 }
166
167 if let Some(order_start) = gql.to_lowercase().find("order by") {
169 let order_part = self.extract_clause_from_pos(gql, order_start + 8, &["skip", "limit"])?;
170 order_by = Some(self.parse_order_by_clause(&order_part)?);
171 }
172
173 if let Some(skip_start) = gql.to_lowercase().find("skip") {
175 let skip_part = self.extract_clause_from_pos(gql, skip_start + 4, &["limit"])?;
176 skip = skip_part.trim().parse::<usize>().ok();
177 }
178
179 if let Some(limit_start) = gql.to_lowercase().find("limit") {
181 let limit_part = &gql[limit_start + 5..];
182 limit = limit_part.trim().parse::<usize>().ok();
183 }
184
185 let mut plan = logical_plan.unwrap_or(LogicalOp::NodeScan {
187 label: "Node".to_string(),
188 as_: "n".to_string(),
189 props: None,
190 });
191
192 if let Some(ret) = return_clause {
194 let cols: Vec<String> = ret.items.iter()
195 .map(|item| item.alias.clone().unwrap_or_else(|| format!("{}", item.expr)))
196 .collect();
197
198 plan = LogicalOp::Project {
199 cols,
200 input: Box::new(plan),
201 };
202 }
203
204 if let Some(order) = order_by {
206 let sort_keys: Vec<SortKey> = order.items.iter()
207 .map(|item| SortKey {
208 expr: item.expr.clone(),
209 asc: item.ascending,
210 })
211 .collect();
212
213 plan = LogicalOp::Sort {
214 keys: sort_keys,
215 input: Box::new(plan),
216 };
217 }
218
219 if let Some(lim) = limit {
221 plan = LogicalOp::Limit {
222 count: lim,
223 input: Box::new(plan),
224 };
225 }
226
227 Ok(PlanIR {
228 plan,
229 limit: limit.or(Some(100)), })
231 }
232
233 fn parse_create_query(&self, _gql: &str) -> Result<PlanIR> {
235 let plan = LogicalOp::NodeScan {
237 label: "Node".to_string(),
238 as_: "n".to_string(),
239 props: None,
240 };
241
242 Ok(PlanIR {
243 plan,
244 limit: Some(0),
245 })
246 }
247
248 fn parse_match_clause(&self, match_clause: &str) -> Result<ParsedMatch> {
250 let mut patterns = Vec::new();
251 let mut where_clause = None;
252
253 let (pattern_part, where_part) = if let Some(where_pos) = match_clause.to_lowercase().find(" where ") {
255 let pattern_str = match_clause[..where_pos].trim();
256 let where_str = &match_clause[where_pos + 7..].trim();
257 where_clause = Some(self.parse_where_clause(where_str)?);
258 (pattern_str, Some(where_str.to_string()))
259 } else {
260 (match_clause.trim(), None)
261 };
262
263 if pattern_part.contains("->") || pattern_part.contains("<-") {
265 patterns.push(GraphPattern::Path(self.parse_path_pattern(pattern_part)?));
266 } else {
267 patterns.push(GraphPattern::Node(self.parse_node_pattern_simple(pattern_part)?));
268 }
269
270 Ok(ParsedMatch {
271 patterns,
272 where_clause,
273 })
274 }
275
276 fn parse_node_pattern_simple(&self, pattern: &str) -> Result<NodePattern> {
278 let pattern = pattern.trim();
281 if !pattern.starts_with("(") || !pattern.ends_with(")") {
282 return Err(Box::new(KotobaError::Parse("Invalid node pattern".to_string())));
283 }
284
285 let inner = &pattern[1..pattern.len()-1];
286 let parts: Vec<&str> = inner.split(':').collect();
287
288 let variable = if parts.len() >= 2 {
289 parts[0].trim().to_string()
290 } else {
291 "n".to_string()
292 };
293
294 let labels_part = if parts.len() >= 2 { parts[1] } else { inner };
295 let (labels, properties) = self.parse_labels_and_properties(labels_part)?;
296
297 Ok(NodePattern {
298 variable,
299 labels,
300 properties,
301 })
302 }
303
304 fn parse_path_pattern(&self, pattern: &str) -> Result<PathPattern> {
306 let parts: Vec<&str> = pattern.split("->").collect();
309 if parts.len() < 2 {
310 return Err(Box::new(KotobaError::Parse("Invalid path pattern".to_string())));
311 }
312
313 let start_node = self.parse_node_pattern_simple(parts[0].trim())?;
314 let end_node = self.parse_node_pattern_simple(parts[parts.len()-1].trim())?;
315
316 let mut edges = Vec::new();
318 for i in 0..parts.len()-1 {
319 let part = parts[i];
320 if let Some(edge_start) = part.rfind("-[:") {
321 let edge_part = &part[edge_start..];
322 if let Some(edge_end) = edge_part.find("]-") {
323 let edge_str = &edge_part[2..edge_end];
324 let (labels, properties) = self.parse_labels_and_properties(edge_str)?;
325
326 edges.push(EdgeHop {
327 variable: None,
328 labels,
329 direction: Direction::Out,
330 properties,
331 min_hops: None,
332 max_hops: None,
333 });
334 }
335 }
336 }
337
338 Ok(PathPattern {
339 start_node,
340 edges,
341 end_node,
342 })
343 }
344
345 fn parse_labels_and_properties(&self, input: &str) -> Result<(Vec<String>, Option<Properties>)> {
347 let input = input.trim();
348
349 let (labels_str, props_str) = if let Some(props_start) = input.find('{') {
351 if let Some(props_end) = input[props_start..].find('}') {
352 let labels_part = input[..props_start].trim();
353 let props_part = &input[props_start..props_start + props_end + 1];
354 (labels_part, Some(props_part))
355 } else {
356 (input, None)
357 }
358 } else {
359 (input, None)
360 };
361
362 let labels: Vec<String> = labels_str
364 .split('|')
365 .map(|s| s.trim().to_string())
366 .filter(|s| !s.is_empty())
367 .collect();
368
369 let properties = if let Some(props) = props_str {
371 Some(self.parse_properties(props)?)
372 } else {
373 None
374 };
375
376 Ok((labels, properties))
377 }
378
379 fn parse_properties(&self, props_str: &str) -> Result<Properties> {
381 let mut props = HashMap::new();
383
384 let inner = &props_str[1..props_str.len()-1]; for pair in inner.split(',') {
386 let pair = pair.trim();
387 if let Some(colon_pos) = pair.find(':') {
388 let key = pair[..colon_pos].trim().to_string();
389 let value_str = pair[colon_pos + 1..].trim();
390
391 let value = if value_str.starts_with('"') && value_str.ends_with('"') {
392 Value::String(value_str[1..value_str.len()-1].to_string())
393 } else if let Ok(num) = value_str.parse::<i64>() {
394 Value::Int(num)
395 } else {
396 Value::String(value_str.to_string())
397 };
398
399 props.insert(key, value);
400 }
401 }
402
403 Ok(props)
404 }
405
406 fn parse_where_clause(&self, where_str: &str) -> Result<Predicate> {
408 self.parse_predicate(where_str)
410 }
411
412 fn parse_predicate(&self, pred_str: &str) -> Result<Predicate> {
414 let pred_str = pred_str.trim();
415
416 if let Some(and_pos) = self.find_logical_op(pred_str, "and") {
418 let left = self.parse_predicate(&pred_str[..and_pos])?;
419 let right = self.parse_predicate(&pred_str[and_pos + 3..])?;
420 return Ok(Predicate::And { and: vec![left, right] });
421 }
422
423 if let Some(or_pos) = self.find_logical_op(pred_str, "or") {
424 let left = self.parse_predicate(&pred_str[..or_pos])?;
425 let right = self.parse_predicate(&pred_str[or_pos + 2..])?;
426 return Ok(Predicate::Or { or: vec![left, right] });
427 }
428
429 self.parse_comparison(pred_str)
431 }
432
433 fn parse_comparison(&self, comp_str: &str) -> Result<Predicate> {
435 let operators = ["!=", ">=", "<=", ">", "<", "="];
436
437 for op in &operators {
438 if let Some(pos) = comp_str.find(op) {
439 let left_expr = self.parse_expr(comp_str[..pos].trim())?;
440 let right_expr = self.parse_expr(comp_str[pos + op.len()..].trim())?;
441
442 return match *op {
443 "=" => Ok(Predicate::Eq { eq: [left_expr, right_expr] }),
444 "!=" => Ok(Predicate::Ne { ne: [left_expr, right_expr] }),
445 ">" => Ok(Predicate::Gt { gt: [left_expr, right_expr] }),
446 "<" => Ok(Predicate::Lt { lt: [left_expr, right_expr] }),
447 ">=" => Ok(Predicate::Ge { ge: [left_expr, right_expr] }),
448 "<=" => Ok(Predicate::Le { le: [left_expr, right_expr] }),
449 _ => Err(Box::new(KotobaError::Parse(format!("Unknown operator: {}", op)))),
450 };
451 }
452 }
453
454 Err(Box::new(KotobaError::Parse(format!("No comparison operator found in: {}", comp_str))))
455 }
456
457 fn parse_expr(&self, expr_str: &str) -> Result<Expr> {
459 let expr_str = expr_str.trim();
460
461 if let Some(paren_pos) = expr_str.find('(') {
463 if expr_str.ends_with(')') {
464 let func_name = expr_str[..paren_pos].trim();
465 let args_str = &expr_str[paren_pos + 1..expr_str.len() - 1];
466
467 if let Some(algorithm) = self.parse_algorithm_function(func_name) {
469 let args = self.parse_function_args(args_str)?;
470 return Ok(Expr::Fn {
471 fn_: format!("algorithm_{}", self.algorithm_to_string(&algorithm)),
472 args,
473 });
474 } else {
475 let args = self.parse_function_args(args_str)?;
477 return Ok(Expr::Fn {
478 fn_: func_name.to_string(),
479 args,
480 });
481 }
482 }
483 }
484
485 if expr_str.starts_with('"') && expr_str.ends_with('"') {
487 let value = expr_str[1..expr_str.len()-1].to_string();
488 return Ok(Expr::Const(Value::String(value)));
489 }
490
491 if let Ok(num) = expr_str.parse::<i64>() {
493 return Ok(Expr::Const(Value::Int(num)));
494 }
495
496 if expr_str.contains('.') {
498 let parts: Vec<&str> = expr_str.split('.').collect();
500 if parts.len() == 2 {
501 return Ok(Expr::Fn {
502 fn_: "property".to_string(),
503 args: vec![
504 Expr::Var(parts[0].to_string()),
505 Expr::Const(Value::String(parts[1].to_string())),
506 ],
507 });
508 }
509 }
510
511 Ok(Expr::Var(expr_str.to_string()))
513 }
514
515 fn parse_algorithm_function(&self, func_name: &str) -> Option<AlgorithmType> {
517 match func_name.to_lowercase().as_str() {
518 "dijkstra" | "shortest_path" => Some(AlgorithmType::ShortestPath {
519 algorithm: ShortestPathAlgorithm::Dijkstra,
520 }),
521 "bellman_ford" => Some(AlgorithmType::ShortestPath {
522 algorithm: ShortestPathAlgorithm::BellmanFord,
523 }),
524 "astar" => Some(AlgorithmType::ShortestPath {
525 algorithm: ShortestPathAlgorithm::AStar,
526 }),
527 "floyd_warshall" => Some(AlgorithmType::ShortestPath {
528 algorithm: ShortestPathAlgorithm::FloydWarshall,
529 }),
530 "pattern_match" | "subgraph_isomorphism" => Some(AlgorithmType::PatternMatching),
544 _ => None,
545 }
546 }
547
548 fn algorithm_to_string(&self, algorithm: &AlgorithmType) -> String {
550 match algorithm {
551 AlgorithmType::ShortestPath { algorithm: sp } => match sp {
552 ShortestPathAlgorithm::Dijkstra => "dijkstra",
553 ShortestPathAlgorithm::BellmanFord => "bellman_ford",
554 ShortestPathAlgorithm::AStar => "astar",
555 ShortestPathAlgorithm::FloydWarshall => "floyd_warshall",
556 }.to_string(),
557 AlgorithmType::PatternMatching => "pattern_matching".to_string(),
566 }
567 }
568
569 fn parse_function_args(&self, args_str: &str) -> Result<Vec<Expr>> {
571 let mut args = Vec::new();
572
573 if args_str.trim().is_empty() {
574 return Ok(args);
575 }
576
577 for arg in args_str.split(',') {
578 let arg = arg.trim();
579 if !arg.is_empty() {
580 args.push(self.parse_expr(arg)?);
581 }
582 }
583
584 Ok(args)
585 }
586
587 fn parse_return_clause(&self, return_str: &str) -> Result<ReturnClause> {
589 let return_str = return_str.trim();
590 let mut items = Vec::new();
591 let distinct = return_str.to_lowercase().starts_with("distinct");
592
593 let items_str = if distinct {
594 &return_str[8..] } else {
596 return_str
597 };
598
599 for item in items_str.split(',') {
600 let item = item.trim();
601 let (expr_str, alias) = if let Some(as_pos) = item.to_lowercase().find(" as ") {
602 let expr_part = item[..as_pos].trim();
603 let alias_part = item[as_pos + 4..].trim();
604 (expr_part, Some(alias_part.to_string()))
605 } else {
606 (item, None)
607 };
608
609 let expr = self.parse_expr(expr_str)?;
610 items.push(ReturnItem { expr, alias });
611 }
612
613 Ok(ReturnClause { items, distinct })
614 }
615
616 fn parse_order_by_clause(&self, order_str: &str) -> Result<OrderByClause> {
618 let mut items = Vec::new();
619
620 for item in order_str.split(',') {
621 let item = item.trim();
622 let (expr_str, ascending) = if item.to_lowercase().ends_with(" desc") {
623 (&item[..item.len() - 5], false)
624 } else if item.to_lowercase().ends_with(" asc") {
625 (&item[..item.len() - 4], true)
626 } else {
627 (item, true) };
629
630 let expr = self.parse_expr(expr_str.trim())?;
631 items.push(OrderByItem { expr, ascending });
632 }
633
634 Ok(OrderByClause { items })
635 }
636
637 fn build_path_plan(&self, path: &PathPattern) -> Result<LogicalOp> {
639 let mut plan = LogicalOp::NodeScan {
641 label: path.start_node.labels.first().cloned().unwrap_or_else(|| "Node".to_string()),
642 as_: path.start_node.variable.clone(),
643 props: path.start_node.properties.clone(),
644 };
645
646 for (i, edge) in path.edges.iter().enumerate() {
648 let target_var = if i == path.edges.len() - 1 {
649 path.end_node.variable.clone()
650 } else {
651 format!("intermediate_{}", i)
652 };
653
654 let edge_pattern = EdgePattern {
655 label: edge.labels.first().cloned().unwrap_or_else(|| "EDGE".to_string()),
656 dir: edge.direction.clone(),
657 props: edge.properties.clone(),
658 };
659
660 plan = LogicalOp::Expand {
661 edge: edge_pattern,
662 to_as: target_var,
663 from: Box::new(plan),
664 };
665 }
666
667 Ok(plan)
668 }
669
670 fn find_logical_op(&self, text: &str, op: &str) -> Option<usize> {
672 let op_lower = op.to_lowercase();
673 let text_lower = text.to_lowercase();
674
675 let mut paren_depth = 0;
676 for (i, ch) in text.char_indices() {
677 match ch {
678 '(' => paren_depth += 1,
679 ')' => paren_depth -= 1,
680 _ => {}
681 }
682
683 if paren_depth == 0 {
684 if text_lower[i..].starts_with(&op_lower) {
685 let before = i.checked_sub(1).map(|pos| text.chars().nth(pos)).flatten();
687 let after = text.chars().nth(i + op.len());
688
689 let before_ok = before.map(|c| !c.is_alphanumeric()).unwrap_or(true);
690 let after_ok = after.map(|c| !c.is_alphanumeric()).unwrap_or(true);
691
692 if before_ok && after_ok {
693 return Some(i);
694 }
695 }
696 }
697 }
698
699 None
700 }
701
702 fn extract_clause(&self, gql: &str, start_keyword: &str, end_keywords: &[&str]) -> Result<String> {
704 let gql_lower = gql.to_lowercase();
705 let start_pos = gql_lower.find(start_keyword).unwrap_or(0) + start_keyword.len();
706
707 let mut end_pos = gql.len();
708 for keyword in end_keywords {
709 if let Some(pos) = gql_lower[start_pos..].find(keyword) {
710 end_pos = end_pos.min(start_pos + pos);
711 }
712 }
713
714 Ok(gql[start_pos..end_pos].trim().to_string())
715 }
716
717 fn extract_clause_from_pos(&self, gql: &str, start_pos: usize, end_keywords: &[&str]) -> Result<String> {
719 let gql_lower = gql.to_lowercase();
720
721 let mut end_pos = gql.len();
722 for keyword in end_keywords {
723 if let Some(pos) = gql_lower[start_pos..].find(keyword) {
724 end_pos = end_pos.min(start_pos + pos);
725 }
726 }
727
728 Ok(gql[start_pos..end_pos].trim().to_string())
729 }
730
731 pub fn gql_to_plan(&self, gql: &str) -> Result<LogicalOp> {
733 let plan_ir = self.parse(gql)?;
734 Ok(plan_ir.plan)
735 }
736}