1use std::collections::HashMap;
7use kotoba_core::prelude::*;
8use kotoba_graph::prelude::*;
9use kotoba_errors::KotobaError;
10
11type Result<T> = std::result::Result<T, Box<dyn std::error::Error + Send + Sync>>;
13
14#[derive(Debug)]
16pub struct GqlParser;
17
18#[derive(Debug, Clone)]
19struct ParsedMatch {
20 patterns: Vec<GraphPattern>,
21 where_clause: Option<Predicate>,
22}
23
24#[derive(Debug, Clone)]
25enum GraphPattern {
26 Node(NodePattern),
27 Path(PathPattern),
28}
29
30#[derive(Debug, Clone)]
31struct NodePattern {
32 variable: String,
33 labels: Vec<String>,
34 properties: Option<Properties>,
35}
36
37#[derive(Debug, Clone)]
38struct PathPattern {
39 start_node: NodePattern,
40 edges: Vec<EdgeHop>,
41 end_node: NodePattern,
42}
43
44#[derive(Debug, Clone)]
45struct EdgeHop {
46 variable: Option<String>,
47 labels: Vec<String>,
48 direction: Direction,
49 properties: Option<Properties>,
50 min_hops: Option<usize>,
51 max_hops: Option<usize>,
52}
53
54#[derive(Debug, Clone)]
55struct ReturnClause {
56 items: Vec<ReturnItem>,
57 distinct: bool,
58}
59
60#[derive(Debug, Clone)]
61struct ReturnItem {
62 expr: Expr,
63 alias: Option<String>,
64}
65
66#[derive(Debug, Clone)]
67struct AlgorithmCall {
68 algorithm: AlgorithmType,
69 parameters: Vec<Expr>,
70}
71
72#[derive(Debug, Clone)]
73enum AlgorithmType {
74 ShortestPath { algorithm: ShortestPathAlgorithm },
75 Centrality { algorithm: CentralityAlgorithm },
76 PatternMatching,
77}
78
79#[derive(Debug, Clone)]
80enum ShortestPathAlgorithm {
81 Dijkstra,
82 BellmanFord,
83 AStar,
84 FloydWarshall,
85}
86
87#[derive(Debug, Clone)]
88struct OrderByClause {
89 items: Vec<OrderByItem>,
90}
91
92#[derive(Debug, Clone)]
93struct OrderByItem {
94 expr: Expr,
95 ascending: bool,
96}
97
98impl GqlParser {
99 pub fn new() -> Self {
100 Self
101 }
102
103 pub fn parse(&self, gql: &str) -> Result<PlanIR> {
105 let gql_lower = gql.trim().to_lowercase();
106
107 if gql_lower.starts_with("match") {
108 self.parse_match_query(gql)
109 } else if gql_lower.starts_with("create") {
110 self.parse_create_query(gql)
111 } else {
112 Err(Box::new(KotobaError::Parse(format!("Unsupported GQL operation: {}", gql))))
113 }
114 }
115
116 fn parse_match_query(&self, gql: &str) -> Result<PlanIR> {
118 let mut logical_plan = None;
121 let mut return_clause = None;
122 let mut order_by = None;
123 let mut skip = None;
124 let mut limit = None;
125
126 let parts: Vec<&str> = gql.split_whitespace().collect();
127 let mut i = 0;
128
129 if parts.get(i).map(|s| s.to_lowercase()) == Some("match".to_string()) {
131 i += 1;
132 let match_part = self.extract_clause(gql, "match", &["where", "return", "order", "skip", "limit"])?;
133 let parsed_match = self.parse_match_clause(&match_part)?;
134
135 if let Some(node_pattern) = parsed_match.patterns.first() {
137 match node_pattern {
138 GraphPattern::Node(node) => {
139 logical_plan = Some(LogicalOp::NodeScan {
140 label: node.labels.first().cloned().unwrap_or_else(|| "Node".to_string()),
141 as_: node.variable.clone(),
142 props: node.properties.clone(),
143 });
144 }
145 GraphPattern::Path(path) => {
146 logical_plan = Some(self.build_path_plan(&path)?);
148 }
149 }
150 }
151
152 if let Some(where_pred) = parsed_match.where_clause {
154 if let Some(plan) = logical_plan {
155 logical_plan = Some(LogicalOp::Filter {
156 pred: where_pred,
157 input: Box::new(plan),
158 });
159 }
160 }
161 }
162
163 if let Some(return_start) = gql.to_lowercase().find("return") {
165 let return_part = self.extract_clause_from_pos(gql, return_start + 6, &["order", "skip", "limit"])?;
166 return_clause = Some(self.parse_return_clause(&return_part)?);
167 }
168
169 if let Some(order_start) = gql.to_lowercase().find("order by") {
171 let order_part = self.extract_clause_from_pos(gql, order_start + 8, &["skip", "limit"])?;
172 order_by = Some(self.parse_order_by_clause(&order_part)?);
173 }
174
175 if let Some(skip_start) = gql.to_lowercase().find("skip") {
177 let skip_part = self.extract_clause_from_pos(gql, skip_start + 4, &["limit"])?;
178 skip = skip_part.trim().parse::<usize>().ok();
179 }
180
181 if let Some(limit_start) = gql.to_lowercase().find("limit") {
183 let limit_part = &gql[limit_start + 5..];
184 limit = limit_part.trim().parse::<usize>().ok();
185 }
186
187 let mut plan = logical_plan.unwrap_or(LogicalOp::NodeScan {
189 label: "Node".to_string(),
190 as_: "n".to_string(),
191 props: None,
192 });
193
194 if let Some(ret) = return_clause {
196 let cols: Vec<String> = ret.items.iter()
197 .map(|item| item.alias.clone().unwrap_or_else(|| format!("{}", item.expr)))
198 .collect();
199
200 plan = LogicalOp::Project {
201 cols,
202 input: Box::new(plan),
203 };
204 }
205
206 if let Some(order) = order_by {
208 let sort_keys: Vec<SortKey> = order.items.iter()
209 .map(|item| SortKey {
210 expr: item.expr.clone(),
211 asc: item.ascending,
212 })
213 .collect();
214
215 plan = LogicalOp::Sort {
216 keys: sort_keys,
217 input: Box::new(plan),
218 };
219 }
220
221 if let Some(lim) = limit {
223 plan = LogicalOp::Limit {
224 count: lim,
225 input: Box::new(plan),
226 };
227 }
228
229 Ok(PlanIR {
230 plan,
231 limit: limit.or(Some(100)), })
233 }
234
235 fn parse_create_query(&self, _gql: &str) -> Result<PlanIR> {
237 let plan = LogicalOp::NodeScan {
239 label: "Node".to_string(),
240 as_: "n".to_string(),
241 props: None,
242 };
243
244 Ok(PlanIR {
245 plan,
246 limit: Some(0),
247 })
248 }
249
250 fn parse_match_clause(&self, match_clause: &str) -> Result<ParsedMatch> {
252 let mut patterns = Vec::new();
253 let mut where_clause = None;
254
255 let (pattern_part, where_part) = if let Some(where_pos) = match_clause.to_lowercase().find(" where ") {
257 let pattern_str = match_clause[..where_pos].trim();
258 let where_str = &match_clause[where_pos + 7..].trim();
259 where_clause = Some(self.parse_where_clause(where_str)?);
260 (pattern_str, Some(where_str.to_string()))
261 } else {
262 (match_clause.trim(), None)
263 };
264
265 if pattern_part.contains("->") || pattern_part.contains("<-") {
267 patterns.push(GraphPattern::Path(self.parse_path_pattern(pattern_part)?));
268 } else {
269 patterns.push(GraphPattern::Node(self.parse_node_pattern_simple(pattern_part)?));
270 }
271
272 Ok(ParsedMatch {
273 patterns,
274 where_clause,
275 })
276 }
277
278 fn parse_node_pattern_simple(&self, pattern: &str) -> Result<NodePattern> {
280 let pattern = pattern.trim();
283 if !pattern.starts_with("(") || !pattern.ends_with(")") {
284 return Err(Box::new(KotobaError::Parse("Invalid node pattern".to_string())));
285 }
286
287 let inner = &pattern[1..pattern.len()-1];
288 let parts: Vec<&str> = inner.split(':').collect();
289
290 let variable = if parts.len() >= 2 {
291 parts[0].trim().to_string()
292 } else {
293 "n".to_string()
294 };
295
296 let labels_part = if parts.len() >= 2 { parts[1] } else { inner };
297 let (labels, properties) = self.parse_labels_and_properties(labels_part)?;
298
299 Ok(NodePattern {
300 variable,
301 labels,
302 properties,
303 })
304 }
305
306 fn parse_path_pattern(&self, pattern: &str) -> Result<PathPattern> {
308 let parts: Vec<&str> = pattern.split("->").collect();
311 if parts.len() < 2 {
312 return Err(Box::new(KotobaError::Parse("Invalid path pattern".to_string())));
313 }
314
315 let start_node = self.parse_node_pattern_simple(parts[0].trim())?;
316 let end_node = self.parse_node_pattern_simple(parts[parts.len()-1].trim())?;
317
318 let mut edges = Vec::new();
320 for i in 0..parts.len()-1 {
321 let part = parts[i];
322 if let Some(edge_start) = part.rfind("-[:") {
323 let edge_part = &part[edge_start..];
324 if let Some(edge_end) = edge_part.find("]-") {
325 let edge_str = &edge_part[2..edge_end];
326 let (labels, properties) = self.parse_labels_and_properties(edge_str)?;
327
328 edges.push(EdgeHop {
329 variable: None,
330 labels,
331 direction: Direction::Out,
332 properties,
333 min_hops: None,
334 max_hops: None,
335 });
336 }
337 }
338 }
339
340 Ok(PathPattern {
341 start_node,
342 edges,
343 end_node,
344 })
345 }
346
347 fn parse_labels_and_properties(&self, input: &str) -> Result<(Vec<String>, Option<Properties>)> {
349 let input = input.trim();
350
351 let (labels_str, props_str) = if let Some(props_start) = input.find('{') {
353 if let Some(props_end) = input[props_start..].find('}') {
354 let labels_part = input[..props_start].trim();
355 let props_part = &input[props_start..props_start + props_end + 1];
356 (labels_part, Some(props_part))
357 } else {
358 (input, None)
359 }
360 } else {
361 (input, None)
362 };
363
364 let labels: Vec<String> = labels_str
366 .split('|')
367 .map(|s| s.trim().to_string())
368 .filter(|s| !s.is_empty())
369 .collect();
370
371 let properties = if let Some(props) = props_str {
373 Some(self.parse_properties(props)?)
374 } else {
375 None
376 };
377
378 Ok((labels, properties))
379 }
380
381 fn parse_properties(&self, props_str: &str) -> Result<Properties> {
383 let mut props = HashMap::new();
385
386 let inner = &props_str[1..props_str.len()-1]; for pair in inner.split(',') {
388 let pair = pair.trim();
389 if let Some(colon_pos) = pair.find(':') {
390 let key = pair[..colon_pos].trim().to_string();
391 let value_str = pair[colon_pos + 1..].trim();
392
393 let value = if value_str.starts_with('"') && value_str.ends_with('"') {
394 Value::String(value_str[1..value_str.len()-1].to_string())
395 } else if let Ok(num) = value_str.parse::<i64>() {
396 Value::Int(num)
397 } else {
398 Value::String(value_str.to_string())
399 };
400
401 props.insert(key, value);
402 }
403 }
404
405 Ok(props)
406 }
407
408 fn parse_where_clause(&self, where_str: &str) -> Result<Predicate> {
410 self.parse_predicate(where_str)
412 }
413
414 fn parse_predicate(&self, pred_str: &str) -> Result<Predicate> {
416 let pred_str = pred_str.trim();
417
418 if let Some(and_pos) = self.find_logical_op(pred_str, "and") {
420 let left = self.parse_predicate(&pred_str[..and_pos])?;
421 let right = self.parse_predicate(&pred_str[and_pos + 3..])?;
422 return Ok(Predicate::And { and: vec![left, right] });
423 }
424
425 if let Some(or_pos) = self.find_logical_op(pred_str, "or") {
426 let left = self.parse_predicate(&pred_str[..or_pos])?;
427 let right = self.parse_predicate(&pred_str[or_pos + 2..])?;
428 return Ok(Predicate::Or { or: vec![left, right] });
429 }
430
431 self.parse_comparison(pred_str)
433 }
434
435 fn parse_comparison(&self, comp_str: &str) -> Result<Predicate> {
437 let operators = ["!=", ">=", "<=", ">", "<", "="];
438
439 for op in &operators {
440 if let Some(pos) = comp_str.find(op) {
441 let left_expr = self.parse_expr(comp_str[..pos].trim())?;
442 let right_expr = self.parse_expr(comp_str[pos + op.len()..].trim())?;
443
444 return match *op {
445 "=" => Ok(Predicate::Eq { eq: [left_expr, right_expr] }),
446 "!=" => Ok(Predicate::Ne { ne: [left_expr, right_expr] }),
447 ">" => Ok(Predicate::Gt { gt: [left_expr, right_expr] }),
448 "<" => Ok(Predicate::Lt { lt: [left_expr, right_expr] }),
449 ">=" => Ok(Predicate::Ge { ge: [left_expr, right_expr] }),
450 "<=" => Ok(Predicate::Le { le: [left_expr, right_expr] }),
451 _ => Err(Box::new(KotobaError::Parse(format!("Unknown operator: {}", op)))),
452 };
453 }
454 }
455
456 Err(Box::new(KotobaError::Parse(format!("No comparison operator found in: {}", comp_str))))
457 }
458
459 fn parse_expr(&self, expr_str: &str) -> Result<Expr> {
461 let expr_str = expr_str.trim();
462
463 if let Some(paren_pos) = expr_str.find('(') {
465 if expr_str.ends_with(')') {
466 let func_name = expr_str[..paren_pos].trim();
467 let args_str = &expr_str[paren_pos + 1..expr_str.len() - 1];
468
469 if let Some(algorithm) = self.parse_algorithm_function(func_name) {
471 let args = self.parse_function_args(args_str)?;
472 return Ok(Expr::Fn {
473 fn_: format!("algorithm_{}", self.algorithm_to_string(&algorithm)),
474 args,
475 });
476 } else {
477 let args = self.parse_function_args(args_str)?;
479 return Ok(Expr::Fn {
480 fn_: func_name.to_string(),
481 args,
482 });
483 }
484 }
485 }
486
487 if expr_str.starts_with('"') && expr_str.ends_with('"') {
489 let value = expr_str[1..expr_str.len()-1].to_string();
490 return Ok(Expr::Const(Value::String(value)));
491 }
492
493 if let Ok(num) = expr_str.parse::<i64>() {
495 return Ok(Expr::Const(Value::Int(num)));
496 }
497
498 if expr_str.contains('.') {
500 let parts: Vec<&str> = expr_str.split('.').collect();
502 if parts.len() == 2 {
503 return Ok(Expr::Fn {
504 fn_: "property".to_string(),
505 args: vec![
506 Expr::Var(parts[0].to_string()),
507 Expr::Const(Value::String(parts[1].to_string())),
508 ],
509 });
510 }
511 }
512
513 Ok(Expr::Var(expr_str.to_string()))
515 }
516
517 fn parse_algorithm_function(&self, func_name: &str) -> Option<AlgorithmType> {
519 match func_name.to_lowercase().as_str() {
520 "dijkstra" | "shortest_path" => Some(AlgorithmType::ShortestPath {
521 algorithm: ShortestPathAlgorithm::Dijkstra,
522 }),
523 "bellman_ford" => Some(AlgorithmType::ShortestPath {
524 algorithm: ShortestPathAlgorithm::BellmanFord,
525 }),
526 "astar" => Some(AlgorithmType::ShortestPath {
527 algorithm: ShortestPathAlgorithm::AStar,
528 }),
529 "floyd_warshall" => Some(AlgorithmType::ShortestPath {
530 algorithm: ShortestPathAlgorithm::FloydWarshall,
531 }),
532 "degree_centrality" => Some(AlgorithmType::Centrality {
533 algorithm: CentralityAlgorithm::Degree,
534 }),
535 "betweenness_centrality" => Some(AlgorithmType::Centrality {
536 algorithm: CentralityAlgorithm::Betweenness,
537 }),
538 "closeness_centrality" => Some(AlgorithmType::Centrality {
539 algorithm: CentralityAlgorithm::Closeness,
540 }),
541 "pagerank" => Some(AlgorithmType::Centrality {
542 algorithm: CentralityAlgorithm::PageRank,
543 }),
544 "pattern_match" | "subgraph_isomorphism" => Some(AlgorithmType::PatternMatching),
545 _ => None,
546 }
547 }
548
549 fn algorithm_to_string(&self, algorithm: &AlgorithmType) -> String {
551 match algorithm {
552 AlgorithmType::ShortestPath { algorithm: sp } => match sp {
553 ShortestPathAlgorithm::Dijkstra => "dijkstra",
554 ShortestPathAlgorithm::BellmanFord => "bellman_ford",
555 ShortestPathAlgorithm::AStar => "astar",
556 ShortestPathAlgorithm::FloydWarshall => "floyd_warshall",
557 }.to_string(),
558 AlgorithmType::Centrality { algorithm: c } => match c {
559 CentralityAlgorithm::Degree => "degree_centrality",
560 CentralityAlgorithm::Betweenness => "betweenness_centrality",
561 CentralityAlgorithm::Closeness => "closeness_centrality",
562 CentralityAlgorithm::Eigenvector => "eigenvector_centrality",
563 CentralityAlgorithm::PageRank => "pagerank",
564 }.to_string(),
565 AlgorithmType::PatternMatching => "pattern_matching".to_string(),
566 }
567 }
568
569 fn parse_function_args(&self, args_str: &str) -> Result<Vec<Expr>> {
571 let mut args = Vec::new();
572
573 if args_str.trim().is_empty() {
574 return Ok(args);
575 }
576
577 for arg in args_str.split(',') {
578 let arg = arg.trim();
579 if !arg.is_empty() {
580 args.push(self.parse_expr(arg)?);
581 }
582 }
583
584 Ok(args)
585 }
586
587 fn parse_return_clause(&self, return_str: &str) -> Result<ReturnClause> {
589 let return_str = return_str.trim();
590 let mut items = Vec::new();
591 let distinct = return_str.to_lowercase().starts_with("distinct");
592
593 let items_str = if distinct {
594 &return_str[8..] } else {
596 return_str
597 };
598
599 for item in items_str.split(',') {
600 let item = item.trim();
601 let (expr_str, alias) = if let Some(as_pos) = item.to_lowercase().find(" as ") {
602 let expr_part = item[..as_pos].trim();
603 let alias_part = item[as_pos + 4..].trim();
604 (expr_part, Some(alias_part.to_string()))
605 } else {
606 (item, None)
607 };
608
609 let expr = self.parse_expr(expr_str)?;
610 items.push(ReturnItem { expr, alias });
611 }
612
613 Ok(ReturnClause { items, distinct })
614 }
615
616 fn parse_order_by_clause(&self, order_str: &str) -> Result<OrderByClause> {
618 let mut items = Vec::new();
619
620 for item in order_str.split(',') {
621 let item = item.trim();
622 let (expr_str, ascending) = if item.to_lowercase().ends_with(" desc") {
623 (&item[..item.len() - 5], false)
624 } else if item.to_lowercase().ends_with(" asc") {
625 (&item[..item.len() - 4], true)
626 } else {
627 (item, true) };
629
630 let expr = self.parse_expr(expr_str.trim())?;
631 items.push(OrderByItem { expr, ascending });
632 }
633
634 Ok(OrderByClause { items })
635 }
636
637 fn build_path_plan(&self, path: &PathPattern) -> Result<LogicalOp> {
639 let mut plan = LogicalOp::NodeScan {
641 label: path.start_node.labels.first().cloned().unwrap_or_else(|| "Node".to_string()),
642 as_: path.start_node.variable.clone(),
643 props: path.start_node.properties.clone(),
644 };
645
646 for (i, edge) in path.edges.iter().enumerate() {
648 let target_var = if i == path.edges.len() - 1 {
649 path.end_node.variable.clone()
650 } else {
651 format!("intermediate_{}", i)
652 };
653
654 let edge_pattern = EdgePattern {
655 label: edge.labels.first().cloned().unwrap_or_else(|| "EDGE".to_string()),
656 dir: edge.direction.clone(),
657 props: edge.properties.clone(),
658 };
659
660 plan = LogicalOp::Expand {
661 edge: edge_pattern,
662 to_as: target_var,
663 from: Box::new(plan),
664 };
665 }
666
667 Ok(plan)
668 }
669
670 fn find_logical_op(&self, text: &str, op: &str) -> Option<usize> {
672 let op_lower = op.to_lowercase();
673 let text_lower = text.to_lowercase();
674
675 let mut paren_depth = 0;
676 for (i, ch) in text.char_indices() {
677 match ch {
678 '(' => paren_depth += 1,
679 ')' => paren_depth -= 1,
680 _ => {}
681 }
682
683 if paren_depth == 0 {
684 if text_lower[i..].starts_with(&op_lower) {
685 let before = i.checked_sub(1).map(|pos| text.chars().nth(pos)).flatten();
687 let after = text.chars().nth(i + op.len());
688
689 let before_ok = before.map(|c| !c.is_alphanumeric()).unwrap_or(true);
690 let after_ok = after.map(|c| !c.is_alphanumeric()).unwrap_or(true);
691
692 if before_ok && after_ok {
693 return Some(i);
694 }
695 }
696 }
697 }
698
699 None
700 }
701
702 fn extract_clause(&self, gql: &str, start_keyword: &str, end_keywords: &[&str]) -> Result<String> {
704 let gql_lower = gql.to_lowercase();
705 let start_pos = gql_lower.find(start_keyword).unwrap_or(0) + start_keyword.len();
706
707 let mut end_pos = gql.len();
708 for keyword in end_keywords {
709 if let Some(pos) = gql_lower[start_pos..].find(keyword) {
710 end_pos = end_pos.min(start_pos + pos);
711 }
712 }
713
714 Ok(gql[start_pos..end_pos].trim().to_string())
715 }
716
717 fn extract_clause_from_pos(&self, gql: &str, start_pos: usize, end_keywords: &[&str]) -> Result<String> {
719 let gql_lower = gql.to_lowercase();
720
721 let mut end_pos = gql.len();
722 for keyword in end_keywords {
723 if let Some(pos) = gql_lower[start_pos..].find(keyword) {
724 end_pos = end_pos.min(start_pos + pos);
725 }
726 }
727
728 Ok(gql[start_pos..end_pos].trim().to_string())
729 }
730
731 pub fn gql_to_plan(&self, gql: &str) -> Result<LogicalOp> {
733 let plan_ir = self.parse(gql)?;
734 Ok(plan_ir.plan)
735 }
736}