1use std::collections::{BTreeMap, HashMap, HashSet};
43use std::fmt;
44use std::rc::Rc;
45
46use harn_vm::VmValue;
47
48use super::symbol_graph::{EdgeKind, NodeId, NodeKind, SymbolGraph};
49
50pub const MAX_ROWS: usize = 10_000;
57
58pub const MAX_PATTERNS: usize = 3;
65
66const VAR_LENGTH_MAX_DEPTH: u32 = 4;
70
71#[derive(Debug, Clone, Eq, PartialEq)]
76pub enum CypherError {
77 LexError(String),
79 ParseError(String),
81 ExecError(String),
84}
85
86impl fmt::Display for CypherError {
87 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
88 match self {
89 CypherError::LexError(s) => write!(f, "lex error: {s}"),
90 CypherError::ParseError(s) => write!(f, "parse error: {s}"),
91 CypherError::ExecError(s) => write!(f, "exec error: {s}"),
92 }
93 }
94}
95
96impl std::error::Error for CypherError {}
97
98#[derive(Debug, Clone, Eq, PartialEq)]
100pub enum CypherValue {
101 Null,
103 String(String),
105 Int(i64),
107 Bool(bool),
109}
110
111impl CypherValue {
112 pub fn to_vm(&self) -> VmValue {
114 match self {
115 CypherValue::Null => VmValue::Nil,
116 CypherValue::String(s) => VmValue::String(Rc::from(s.as_str())),
117 CypherValue::Int(n) => VmValue::Int(*n),
118 CypherValue::Bool(b) => VmValue::Bool(*b),
119 }
120 }
121}
122
123pub type CypherRow = BTreeMap<String, CypherValue>;
125
126pub fn execute(query: &str, graph: &SymbolGraph) -> Result<Vec<CypherRow>, CypherError> {
128 let tokens = lex(query)?;
129 let ast = parse(&tokens)?;
130 exec(&ast, graph)
131}
132
133#[derive(Debug, Clone, PartialEq)]
138enum Token {
139 Keyword(String),
141 Ident(String),
142 Str(String),
143 Int(i64),
144 LParen,
145 RParen,
146 LBrace,
147 RBrace,
148 LBracket,
149 RBracket,
150 Colon,
151 Comma,
152 Dot,
153 Eq,
154 Neq,
155 Lt,
156 Le,
157 Gt,
158 Ge,
159 Star,
160 DotDot,
161 Dash,
162 Arrow, LeftArrow, }
165
166fn lex(input: &str) -> Result<Vec<Token>, CypherError> {
167 let mut out: Vec<Token> = Vec::new();
168 let bytes = input.as_bytes();
169 let mut i = 0;
170 while i < bytes.len() {
171 let b = bytes[i];
172 if b.is_ascii_whitespace() {
173 i += 1;
174 continue;
175 }
176 if b == b'(' {
177 out.push(Token::LParen);
178 i += 1;
179 continue;
180 }
181 if b == b')' {
182 out.push(Token::RParen);
183 i += 1;
184 continue;
185 }
186 if b == b'{' {
187 out.push(Token::LBrace);
188 i += 1;
189 continue;
190 }
191 if b == b'}' {
192 out.push(Token::RBrace);
193 i += 1;
194 continue;
195 }
196 if b == b'[' {
197 out.push(Token::LBracket);
198 i += 1;
199 continue;
200 }
201 if b == b']' {
202 out.push(Token::RBracket);
203 i += 1;
204 continue;
205 }
206 if b == b':' {
207 out.push(Token::Colon);
208 i += 1;
209 continue;
210 }
211 if b == b',' {
212 out.push(Token::Comma);
213 i += 1;
214 continue;
215 }
216 if b == b'*' {
217 out.push(Token::Star);
218 i += 1;
219 continue;
220 }
221 if b == b'=' {
222 out.push(Token::Eq);
223 i += 1;
224 continue;
225 }
226 if b == b'.' {
227 if i + 1 < bytes.len() && bytes[i + 1] == b'.' {
228 out.push(Token::DotDot);
229 i += 2;
230 } else {
231 out.push(Token::Dot);
232 i += 1;
233 }
234 continue;
235 }
236 if b == b'<' {
237 if i + 1 < bytes.len() && bytes[i + 1] == b'-' {
238 out.push(Token::LeftArrow);
239 i += 2;
240 } else if i + 1 < bytes.len() && bytes[i + 1] == b'=' {
241 out.push(Token::Le);
242 i += 2;
243 } else if i + 1 < bytes.len() && bytes[i + 1] == b'>' {
244 out.push(Token::Neq);
245 i += 2;
246 } else {
247 out.push(Token::Lt);
248 i += 1;
249 }
250 continue;
251 }
252 if b == b'>' {
253 if i + 1 < bytes.len() && bytes[i + 1] == b'=' {
254 out.push(Token::Ge);
255 i += 2;
256 } else {
257 out.push(Token::Gt);
258 i += 1;
259 }
260 continue;
261 }
262 if b == b'!' {
263 if i + 1 < bytes.len() && bytes[i + 1] == b'=' {
264 out.push(Token::Neq);
265 i += 2;
266 continue;
267 }
268 return Err(CypherError::LexError(format!(
269 "unexpected '!' at byte {i} (expected '!=')"
270 )));
271 }
272 if b == b'-' {
273 if i + 1 < bytes.len() && bytes[i + 1] == b'>' {
274 out.push(Token::Arrow);
275 i += 2;
276 } else {
277 out.push(Token::Dash);
278 i += 1;
279 }
280 continue;
281 }
282 if b == b'\'' || b == b'"' {
283 let quote = b;
284 let start = i + 1;
285 i += 1;
286 while i < bytes.len() && bytes[i] != quote {
287 if bytes[i] == b'\\' && i + 1 < bytes.len() {
288 i += 2;
289 } else {
290 i += 1;
291 }
292 }
293 if i >= bytes.len() {
294 return Err(CypherError::LexError("unterminated string literal".into()));
295 }
296 let raw = &input[start..i];
297 out.push(Token::Str(unescape(raw)));
298 i += 1;
299 continue;
300 }
301 if b.is_ascii_digit() {
302 let start = i;
303 while i < bytes.len() && bytes[i].is_ascii_digit() {
304 i += 1;
305 }
306 let n: i64 = input[start..i].parse().map_err(|err| {
307 CypherError::LexError(format!("bad integer at byte {start}: {err}"))
308 })?;
309 out.push(Token::Int(n));
310 continue;
311 }
312 if b.is_ascii_alphabetic() || b == b'_' {
313 let start = i;
314 while i < bytes.len() && (bytes[i].is_ascii_alphanumeric() || bytes[i] == b'_') {
315 i += 1;
316 }
317 let word = &input[start..i];
318 let upper = word.to_ascii_uppercase();
319 if is_keyword(&upper) {
320 out.push(Token::Keyword(upper));
321 } else {
322 out.push(Token::Ident(word.to_string()));
323 }
324 continue;
325 }
326 return Err(CypherError::LexError(format!(
327 "unexpected character `{}` at byte {i}",
328 char::from(b)
329 )));
330 }
331 Ok(out)
332}
333
334fn is_keyword(word: &str) -> bool {
335 matches!(
336 word,
337 "MATCH" | "WHERE" | "RETURN" | "AND" | "OR" | "AS" | "NOT" | "TRUE" | "FALSE"
338 )
339}
340
341fn unescape(raw: &str) -> String {
342 let mut out = String::with_capacity(raw.len());
343 let mut iter = raw.chars();
344 while let Some(c) = iter.next() {
345 if c == '\\' {
346 if let Some(next) = iter.next() {
347 out.push(match next {
348 'n' => '\n',
349 't' => '\t',
350 'r' => '\r',
351 '\\' => '\\',
352 '\'' => '\'',
353 '"' => '"',
354 other => other,
355 });
356 }
357 } else {
358 out.push(c);
359 }
360 }
361 out
362}
363
364#[derive(Debug, Clone)]
369struct Query {
370 matches: Vec<Pattern>,
371 where_clause: Option<Expr>,
372 projections: Vec<Projection>,
373}
374
375#[derive(Debug, Clone)]
376struct Pattern {
377 head: NodePat,
378 steps: Vec<RelStep>,
379}
380
381#[derive(Debug, Clone)]
382struct NodePat {
383 var: String,
384 label: Option<String>,
385 props: BTreeMap<String, Literal>,
386}
387
388#[derive(Debug, Clone)]
389struct RelStep {
390 forward: bool,
392 edge_label: String,
393 var_length: Option<(u32, u32)>,
394 target: NodePat,
395}
396
397#[derive(Debug, Clone)]
398enum Expr {
399 And(Box<Expr>, Box<Expr>),
400 Or(Box<Expr>, Box<Expr>),
401 Not(Box<Expr>),
402 Compare(Operand, CmpOp, Operand),
403 Bool(bool),
404}
405
406#[derive(Debug, Clone, Copy, PartialEq, Eq)]
407enum CmpOp {
408 Eq,
409 Neq,
410 Lt,
411 Le,
412 Gt,
413 Ge,
414}
415
416#[derive(Debug, Clone)]
417enum Operand {
418 Path {
420 var: String,
421 property: Option<String>,
422 },
423 Literal(Literal),
424}
425
426#[derive(Debug, Clone, PartialEq, Eq)]
427enum Literal {
428 Str(String),
429 Int(i64),
430 Bool(bool),
431}
432
433#[derive(Debug, Clone)]
434struct Projection {
435 operand: Operand,
436 alias: String,
440 explicit_alias: bool,
445}
446
447struct Parser<'a> {
452 tokens: &'a [Token],
453 pos: usize,
454}
455
456fn parse(tokens: &[Token]) -> Result<Query, CypherError> {
457 let mut p = Parser { tokens, pos: 0 };
458 let q = p.parse_query()?;
459 if p.pos != tokens.len() {
460 return Err(CypherError::ParseError(format!(
461 "trailing tokens after RETURN at position {} ({:?})",
462 p.pos, tokens[p.pos]
463 )));
464 }
465 Ok(q)
466}
467
468impl<'a> Parser<'a> {
469 fn peek(&self) -> Option<&Token> {
470 self.tokens.get(self.pos)
471 }
472
473 fn advance(&mut self) -> Option<&Token> {
474 let t = self.tokens.get(self.pos);
475 self.pos += 1;
476 t
477 }
478
479 fn expect_keyword(&mut self, word: &str) -> Result<(), CypherError> {
480 match self.advance() {
481 Some(Token::Keyword(k)) if k == word => Ok(()),
482 other => Err(CypherError::ParseError(format!(
483 "expected `{word}`, got {other:?}"
484 ))),
485 }
486 }
487
488 fn match_keyword(&mut self, word: &str) -> bool {
489 if matches!(self.peek(), Some(Token::Keyword(k)) if k == word) {
490 self.pos += 1;
491 true
492 } else {
493 false
494 }
495 }
496
497 fn expect(&mut self, want: &Token) -> Result<(), CypherError> {
498 let next = self.advance();
499 if next == Some(want) {
500 Ok(())
501 } else {
502 Err(CypherError::ParseError(format!(
503 "expected {want:?}, got {next:?}"
504 )))
505 }
506 }
507
508 fn parse_query(&mut self) -> Result<Query, CypherError> {
509 self.expect_keyword("MATCH")?;
510 let mut matches = vec![self.parse_pattern()?];
511 while matches!(self.peek(), Some(Token::Comma)) {
512 self.advance();
513 matches.push(self.parse_pattern()?);
514 if matches.len() > MAX_PATTERNS {
515 return Err(CypherError::ParseError(format!(
516 "too many disjoint MATCH patterns (got {}, cap is {}); \
517 join through edge syntax (`-[:KIND]->`) instead of a cartesian product",
518 matches.len(),
519 MAX_PATTERNS
520 )));
521 }
522 }
523 let where_clause = if self.match_keyword("WHERE") {
524 Some(self.parse_expr()?)
525 } else {
526 None
527 };
528 self.expect_keyword("RETURN")?;
529 let mut projections = vec![self.parse_projection()?];
530 while matches!(self.peek(), Some(Token::Comma)) {
531 self.advance();
532 projections.push(self.parse_projection()?);
533 }
534 resolve_projection_aliases(&mut projections)?;
535 Ok(Query {
536 matches,
537 where_clause,
538 projections,
539 })
540 }
541
542 fn parse_pattern(&mut self) -> Result<Pattern, CypherError> {
543 let head = self.parse_node_pat()?;
544 let mut steps: Vec<RelStep> = Vec::new();
545 loop {
546 let forward = match self.peek() {
548 Some(Token::Dash) => true,
549 Some(Token::LeftArrow) => false,
550 _ => break,
551 };
552 self.advance(); self.expect(&Token::LBracket)?;
554 self.expect(&Token::Colon)?;
555 let edge_label = match self.advance() {
556 Some(Token::Ident(s)) => s.clone(),
557 other => {
558 return Err(CypherError::ParseError(format!(
559 "expected edge label, got {other:?}"
560 )))
561 }
562 };
563 let var_length = if matches!(self.peek(), Some(Token::Star)) {
564 self.advance();
565 let lo = if matches!(self.peek(), Some(Token::Int(_))) {
566 if let Some(Token::Int(n)) = self.advance() {
567 Some(*n)
568 } else {
569 None
570 }
571 } else {
572 None
573 };
574 let bounds = if matches!(self.peek(), Some(Token::DotDot)) {
575 self.advance();
576 let hi = if matches!(self.peek(), Some(Token::Int(_))) {
577 if let Some(Token::Int(n)) = self.advance() {
578 Some(*n)
579 } else {
580 None
581 }
582 } else {
583 None
584 };
585 let hi_v = hi.map(|n| n as u32).unwrap_or(VAR_LENGTH_MAX_DEPTH);
589 (lo.unwrap_or(1) as u32, hi_v)
590 } else {
591 let lo_v = lo.unwrap_or(1) as u32;
593 (lo_v, lo_v.max(3))
594 };
595 Some(bounds)
596 } else {
597 None
598 };
599 self.expect(&Token::RBracket)?;
600 if forward {
601 self.expect(&Token::Arrow)?;
602 } else {
603 self.expect(&Token::Dash)?;
604 }
605 let target = self.parse_node_pat()?;
606 steps.push(RelStep {
607 forward,
608 edge_label,
609 var_length,
610 target,
611 });
612 }
613 Ok(Pattern { head, steps })
614 }
615
616 fn parse_node_pat(&mut self) -> Result<NodePat, CypherError> {
617 self.expect(&Token::LParen)?;
618 let var = match self.advance() {
619 Some(Token::Ident(s)) => s.clone(),
620 other => {
621 return Err(CypherError::ParseError(format!(
622 "expected node variable, got {other:?}"
623 )))
624 }
625 };
626 let label = if matches!(self.peek(), Some(Token::Colon)) {
627 self.advance();
628 match self.advance() {
629 Some(Token::Ident(s)) => Some(s.clone()),
630 other => {
631 return Err(CypherError::ParseError(format!(
632 "expected node label, got {other:?}"
633 )))
634 }
635 }
636 } else {
637 None
638 };
639 let props = if matches!(self.peek(), Some(Token::LBrace)) {
640 self.parse_props()?
641 } else {
642 BTreeMap::new()
643 };
644 self.expect(&Token::RParen)?;
645 Ok(NodePat { var, label, props })
646 }
647
648 fn parse_props(&mut self) -> Result<BTreeMap<String, Literal>, CypherError> {
649 self.expect(&Token::LBrace)?;
650 let mut props: BTreeMap<String, Literal> = BTreeMap::new();
651 loop {
652 let key = match self.advance() {
653 Some(Token::Ident(s)) => s.clone(),
654 other => {
655 return Err(CypherError::ParseError(format!(
656 "expected property key, got {other:?}"
657 )))
658 }
659 };
660 self.expect(&Token::Colon)?;
661 let value = self.parse_literal()?;
662 props.insert(key, value);
663 match self.peek() {
664 Some(Token::Comma) => {
665 self.advance();
666 }
667 Some(Token::RBrace) => break,
668 other => {
669 return Err(CypherError::ParseError(format!(
670 "expected ',' or '}}', got {other:?}"
671 )))
672 }
673 }
674 }
675 self.expect(&Token::RBrace)?;
676 Ok(props)
677 }
678
679 fn parse_literal(&mut self) -> Result<Literal, CypherError> {
680 match self.advance() {
681 Some(Token::Str(s)) => Ok(Literal::Str(s.clone())),
682 Some(Token::Int(n)) => Ok(Literal::Int(*n)),
683 Some(Token::Keyword(k)) if k == "TRUE" => Ok(Literal::Bool(true)),
684 Some(Token::Keyword(k)) if k == "FALSE" => Ok(Literal::Bool(false)),
685 other => Err(CypherError::ParseError(format!(
686 "expected literal, got {other:?}"
687 ))),
688 }
689 }
690
691 fn parse_expr(&mut self) -> Result<Expr, CypherError> {
692 let mut left = self.parse_and_expr()?;
693 while self.match_keyword("OR") {
694 let right = self.parse_and_expr()?;
695 left = Expr::Or(Box::new(left), Box::new(right));
696 }
697 Ok(left)
698 }
699
700 fn parse_and_expr(&mut self) -> Result<Expr, CypherError> {
701 let mut left = self.parse_unary_expr()?;
702 while self.match_keyword("AND") {
703 let right = self.parse_unary_expr()?;
704 left = Expr::And(Box::new(left), Box::new(right));
705 }
706 Ok(left)
707 }
708
709 fn parse_unary_expr(&mut self) -> Result<Expr, CypherError> {
710 if self.match_keyword("NOT") {
711 let inner = self.parse_unary_expr()?;
712 return Ok(Expr::Not(Box::new(inner)));
713 }
714 self.parse_compare()
715 }
716
717 fn parse_compare(&mut self) -> Result<Expr, CypherError> {
718 if matches!(
719 self.peek(),
720 Some(Token::Keyword(k)) if k == "TRUE" || k == "FALSE"
721 ) {
722 let lit = self.parse_literal()?;
723 return Ok(match lit {
724 Literal::Bool(b) => Expr::Bool(b),
725 _ => unreachable!(),
726 });
727 }
728 let left = self.parse_operand()?;
729 let op = match self.peek() {
730 Some(Token::Eq) => Some(CmpOp::Eq),
731 Some(Token::Neq) => Some(CmpOp::Neq),
732 Some(Token::Lt) => Some(CmpOp::Lt),
733 Some(Token::Le) => Some(CmpOp::Le),
734 Some(Token::Gt) => Some(CmpOp::Gt),
735 Some(Token::Ge) => Some(CmpOp::Ge),
736 _ => None,
737 };
738 let Some(op) = op else {
739 return Err(CypherError::ParseError(
740 "expected comparison operator in WHERE clause".into(),
741 ));
742 };
743 self.advance();
744 let right = self.parse_operand()?;
745 Ok(Expr::Compare(left, op, right))
746 }
747
748 fn parse_operand(&mut self) -> Result<Operand, CypherError> {
749 match self.peek() {
750 Some(Token::Str(_)) | Some(Token::Int(_)) => {
751 let lit = self.parse_literal()?;
752 Ok(Operand::Literal(lit))
753 }
754 Some(Token::Keyword(k)) if k == "TRUE" || k == "FALSE" => {
755 let lit = self.parse_literal()?;
756 Ok(Operand::Literal(lit))
757 }
758 Some(Token::Ident(_)) => {
759 let var = if let Some(Token::Ident(s)) = self.advance() {
760 s.clone()
761 } else {
762 unreachable!()
763 };
764 let property = if matches!(self.peek(), Some(Token::Dot)) {
765 self.advance();
766 match self.advance() {
767 Some(Token::Ident(s)) => Some(s.clone()),
768 other => {
769 return Err(CypherError::ParseError(format!(
770 "expected property name after '.', got {other:?}"
771 )))
772 }
773 }
774 } else {
775 None
776 };
777 Ok(Operand::Path { var, property })
778 }
779 other => Err(CypherError::ParseError(format!(
780 "expected operand, got {other:?}"
781 ))),
782 }
783 }
784
785 fn parse_projection(&mut self) -> Result<Projection, CypherError> {
786 let operand = self.parse_operand()?;
787 let alias = if self.match_keyword("AS") {
788 match self.advance() {
789 Some(Token::Ident(s)) => Some(s.clone()),
790 other => {
791 return Err(CypherError::ParseError(format!(
792 "expected alias after AS, got {other:?}"
793 )))
794 }
795 }
796 } else {
797 None
798 };
799 let explicit_alias = alias.is_some();
800 Ok(Projection {
801 operand,
802 alias: alias.unwrap_or_default(),
803 explicit_alias,
804 })
805 }
806}
807
808fn default_alias(operand: &Operand) -> String {
809 match operand {
810 Operand::Path {
811 var,
812 property: None,
813 } => var.clone(),
814 Operand::Path {
815 var,
816 property: Some(p),
817 } => format!("{var}.{p}"),
818 Operand::Literal(_) => "value".to_string(),
819 }
820}
821
822fn resolve_projection_aliases(projections: &mut [Projection]) -> Result<(), CypherError> {
834 use std::collections::HashSet;
835
836 let mut seen: HashSet<String> = HashSet::new();
838 for proj in projections.iter() {
839 if proj.explicit_alias && !seen.insert(proj.alias.clone()) {
840 return Err(CypherError::ParseError(format!(
841 "duplicate alias `{}` in RETURN clause",
842 proj.alias
843 )));
844 }
845 }
846
847 let mut literal_count: usize = 0;
849 for proj in projections.iter_mut() {
850 if proj.explicit_alias {
851 continue;
852 }
853 let base = default_alias(&proj.operand);
854 let is_literal_default = matches!(proj.operand, Operand::Literal(_));
855 let mut candidate = base.clone();
856 if is_literal_default {
857 literal_count += 1;
858 if literal_count >= 2 {
859 candidate = format!("{base}_{literal_count}");
860 }
861 }
862 let mut bump: usize = 2;
865 while seen.contains(&candidate) {
866 candidate = format!("{base}_{bump}");
867 bump += 1;
868 }
869 seen.insert(candidate.clone());
870 proj.alias = candidate;
871 }
872 Ok(())
873}
874
875fn exec(query: &Query, graph: &SymbolGraph) -> Result<Vec<CypherRow>, CypherError> {
880 let mut all_rows: Vec<HashMap<String, NodeId>> = vec![HashMap::new()];
881 for pattern in &query.matches {
882 let mut new_rows: Vec<HashMap<String, NodeId>> = Vec::new();
883 for binding in &all_rows {
884 let candidates = match_pattern(pattern, graph, binding)?;
885 for cand in candidates {
886 let mut merged = binding.clone();
887 for (k, v) in cand {
888 merged.insert(k, v);
889 }
890 new_rows.push(merged);
891 if new_rows.len() > MAX_ROWS {
896 return Err(CypherError::ExecError(format!(
897 "row budget exceeded ({} > {}); narrow the MATCH or add a WHERE clause",
898 new_rows.len(),
899 MAX_ROWS
900 )));
901 }
902 }
903 }
904 all_rows = new_rows;
905 }
906
907 let mut out: Vec<CypherRow> = Vec::new();
908 for binding in all_rows {
909 if let Some(expr) = &query.where_clause {
910 if !eval_bool(expr, &binding, graph)? {
911 continue;
912 }
913 }
914 let mut row: CypherRow = BTreeMap::new();
915 for proj in &query.projections {
916 let v = eval_operand(&proj.operand, &binding, graph)?;
917 row.insert(proj.alias.clone(), v);
918 }
919 out.push(row);
920 if out.len() > MAX_ROWS {
924 return Err(CypherError::ExecError(format!(
925 "row budget exceeded ({} > {}); narrow the MATCH or add a WHERE clause",
926 out.len(),
927 MAX_ROWS
928 )));
929 }
930 }
931 Ok(out)
932}
933
934fn match_pattern(
935 pattern: &Pattern,
936 graph: &SymbolGraph,
937 seed: &HashMap<String, NodeId>,
938) -> Result<Vec<HashMap<String, NodeId>>, CypherError> {
939 let head_candidates: Vec<NodeId> = if let Some(existing) = seed.get(&pattern.head.var) {
940 if node_matches(graph, *existing, &pattern.head) {
942 vec![*existing]
943 } else {
944 vec![]
945 }
946 } else {
947 candidate_nodes(graph, &pattern.head)
948 };
949
950 let mut out: Vec<HashMap<String, NodeId>> = Vec::new();
951 for head_id in head_candidates {
952 let mut binding: HashMap<String, NodeId> = HashMap::new();
953 binding.insert(pattern.head.var.clone(), head_id);
954 extend_steps(graph, &pattern.steps, 0, head_id, binding, &mut out)?;
955 }
956 Ok(out)
957}
958
959fn extend_steps(
960 graph: &SymbolGraph,
961 steps: &[RelStep],
962 idx: usize,
963 cursor: NodeId,
964 binding: HashMap<String, NodeId>,
965 out: &mut Vec<HashMap<String, NodeId>>,
966) -> Result<(), CypherError> {
967 if idx == steps.len() {
968 out.push(binding);
969 return Ok(());
970 }
971 let step = &steps[idx];
972 let (kind, label_reversed) =
973 EdgeKind::parse_with_direction(&step.edge_label).ok_or_else(|| {
974 CypherError::ExecError(format!("unknown edge label `{}`", step.edge_label))
975 })?;
976 let forward = step.forward ^ label_reversed;
978 let (lo, hi) = step.var_length.unwrap_or((1, 1));
979 if hi > VAR_LENGTH_MAX_DEPTH {
980 return Err(CypherError::ExecError(format!(
981 "variable-length traversal capped at depth {VAR_LENGTH_MAX_DEPTH} (got `*{lo}..{hi}`)"
982 )));
983 }
984 let lo = lo.max(1);
985 let hi = hi.max(lo);
986
987 let mut visited: HashSet<NodeId> = HashSet::new();
989 visited.insert(cursor);
990 let mut stack: Vec<(NodeId, u32, HashSet<NodeId>)> = vec![(cursor, 0, visited)];
991 while let Some((node, depth, visited)) = stack.pop() {
992 if depth >= hi {
993 continue;
994 }
995 let edges = if forward {
996 graph.outgoing(node)
997 } else {
998 graph.incoming(node)
999 };
1000 for edge in edges {
1001 if edge.kind != kind {
1002 continue;
1003 }
1004 let next = if forward { edge.to } else { edge.from };
1005 if visited.contains(&next) {
1006 continue;
1007 }
1008 let new_depth = depth + 1;
1009 if new_depth >= lo && node_matches(graph, next, &step.target) {
1010 let already_bound = binding.get(&step.target.var);
1011 if matches!(already_bound, Some(id) if *id != next) {
1012 } else {
1014 let mut next_binding = binding.clone();
1015 next_binding.insert(step.target.var.clone(), next);
1016 extend_steps(graph, steps, idx + 1, next, next_binding, out)?;
1017 }
1018 }
1019 if new_depth < hi {
1020 let mut next_visited = visited.clone();
1021 next_visited.insert(next);
1022 stack.push((next, new_depth, next_visited));
1023 }
1024 }
1025 }
1026 Ok(())
1027}
1028
1029fn candidate_nodes(graph: &SymbolGraph, pat: &NodePat) -> Vec<NodeId> {
1030 if let Some(Literal::Str(name)) = pat.props.get("name") {
1032 return graph
1033 .nodes_named(name)
1034 .iter()
1035 .copied()
1036 .filter(|nid| node_matches(graph, *nid, pat))
1037 .collect();
1038 }
1039 if let Some(label) = &pat.label {
1040 let kind = NodeKind::parse(label).unwrap_or(NodeKind::Module);
1041 return graph
1042 .nodes_of_kind(kind)
1043 .into_iter()
1044 .filter(|nid| node_matches(graph, *nid, pat))
1045 .collect();
1046 }
1047 graph
1048 .all_node_ids()
1049 .into_iter()
1050 .filter(|nid| node_matches(graph, *nid, pat))
1051 .collect()
1052}
1053
1054fn node_matches(graph: &SymbolGraph, id: NodeId, pat: &NodePat) -> bool {
1055 let Some(node) = graph.node(id) else {
1056 return false;
1057 };
1058 if let Some(label) = &pat.label {
1059 let Some(kind) = NodeKind::parse(label) else {
1060 return false;
1061 };
1062 if node.kind != kind {
1063 return false;
1064 }
1065 }
1066 for (key, expected) in &pat.props {
1067 let actual = property_value(node, key);
1068 if &actual != expected {
1069 return false;
1070 }
1071 }
1072 true
1073}
1074
1075fn property_value(node: &super::symbol_graph::Node, key: &str) -> Literal {
1076 match key {
1077 "name" => Literal::Str(node.name.clone()),
1078 "path" => Literal::Str(node.path.clone()),
1079 "language" => Literal::Str(node.language.clone()),
1080 "kind" => Literal::Str(node.kind.as_str().to_string()),
1081 "container" => Literal::Str(node.container.clone().unwrap_or_default()),
1082 "signature" => Literal::Str(node.signature.clone()),
1083 "line" => Literal::Int(node.line as i64),
1084 "file_id" => Literal::Int(node.file_id as i64),
1085 "id" => Literal::Int(node.id as i64),
1086 _ => Literal::Str(String::new()),
1087 }
1088}
1089
1090fn eval_bool(
1091 expr: &Expr,
1092 binding: &HashMap<String, NodeId>,
1093 graph: &SymbolGraph,
1094) -> Result<bool, CypherError> {
1095 match expr {
1096 Expr::Bool(b) => Ok(*b),
1097 Expr::And(a, b) => Ok(eval_bool(a, binding, graph)? && eval_bool(b, binding, graph)?),
1098 Expr::Or(a, b) => Ok(eval_bool(a, binding, graph)? || eval_bool(b, binding, graph)?),
1099 Expr::Not(inner) => Ok(!eval_bool(inner, binding, graph)?),
1100 Expr::Compare(l, op, r) => {
1101 let lv = lookup_operand(l, binding, graph)?;
1102 let rv = lookup_operand(r, binding, graph)?;
1103 Ok(apply_cmp(&lv, *op, &rv))
1104 }
1105 }
1106}
1107
1108fn apply_cmp(left: &Literal, op: CmpOp, right: &Literal) -> bool {
1109 match (left, right) {
1110 (Literal::Int(a), Literal::Int(b)) => match op {
1111 CmpOp::Eq => a == b,
1112 CmpOp::Neq => a != b,
1113 CmpOp::Lt => a < b,
1114 CmpOp::Le => a <= b,
1115 CmpOp::Gt => a > b,
1116 CmpOp::Ge => a >= b,
1117 },
1118 (Literal::Str(a), Literal::Str(b)) => match op {
1119 CmpOp::Eq => a == b,
1120 CmpOp::Neq => a != b,
1121 CmpOp::Lt => a < b,
1122 CmpOp::Le => a <= b,
1123 CmpOp::Gt => a > b,
1124 CmpOp::Ge => a >= b,
1125 },
1126 (Literal::Bool(a), Literal::Bool(b)) => match op {
1127 CmpOp::Eq => a == b,
1128 CmpOp::Neq => a != b,
1129 _ => false,
1130 },
1131 _ => matches!(op, CmpOp::Neq),
1132 }
1133}
1134
1135fn lookup_operand(
1136 op: &Operand,
1137 binding: &HashMap<String, NodeId>,
1138 graph: &SymbolGraph,
1139) -> Result<Literal, CypherError> {
1140 match op {
1141 Operand::Literal(lit) => Ok(lit.clone()),
1142 Operand::Path { var, property } => {
1143 let id = binding.get(var).copied().ok_or_else(|| {
1144 CypherError::ExecError(format!("unbound variable `{var}` in expression"))
1145 })?;
1146 let node = graph
1147 .node(id)
1148 .ok_or_else(|| CypherError::ExecError(format!("node id {id} not in graph")))?;
1149 match property {
1150 Some(p) => Ok(property_value(node, p)),
1151 None => Ok(Literal::Str(node.name.clone())),
1152 }
1153 }
1154 }
1155}
1156
1157fn eval_operand(
1158 op: &Operand,
1159 binding: &HashMap<String, NodeId>,
1160 graph: &SymbolGraph,
1161) -> Result<CypherValue, CypherError> {
1162 let lit = lookup_operand(op, binding, graph)?;
1163 Ok(match lit {
1164 Literal::Str(s) => {
1165 if s.is_empty() {
1166 CypherValue::Null
1167 } else {
1168 CypherValue::String(s)
1169 }
1170 }
1171 Literal::Int(n) => CypherValue::Int(n),
1172 Literal::Bool(b) => CypherValue::Bool(b),
1173 })
1174}
1175
1176#[cfg(test)]
1177mod tests {
1178 use super::*;
1179 use crate::ast::Language;
1180
1181 fn fixture() -> SymbolGraph {
1182 let mut g = SymbolGraph::new();
1183 g.rebuild_file(
1184 1,
1185 "src/a.rs",
1186 Language::Rust,
1187 "fn start() {}\nfn driver() { start(); }\n",
1188 &[],
1189 );
1190 g.rebuild_file(
1191 2,
1192 "src/b.rs",
1193 Language::Rust,
1194 "fn entry() { driver(); }\n",
1195 &[],
1196 );
1197 g
1198 }
1199
1200 #[test]
1201 fn lexer_recognises_arrows_and_keywords() {
1202 let toks = lex("MATCH (a)-[:CALLS]->(b) RETURN a.name").unwrap();
1203 assert!(toks.iter().any(|t| matches!(t, Token::Arrow)));
1204 assert!(toks
1205 .iter()
1206 .any(|t| matches!(t, Token::Keyword(k) if k == "MATCH")));
1207 }
1208
1209 #[test]
1210 fn returns_function_by_name() {
1211 let g = fixture();
1212 let rows = execute(
1213 "MATCH (f:Function {name: 'start'}) RETURN f.path AS path, f.line AS line",
1214 &g,
1215 )
1216 .unwrap();
1217 assert_eq!(rows.len(), 1);
1218 assert_eq!(
1219 rows[0].get("path"),
1220 Some(&CypherValue::String("src/a.rs".into()))
1221 );
1222 }
1223
1224 #[test]
1225 fn called_by_var_length_finds_indirect_callers() {
1226 let g = fixture();
1227 let rows = execute(
1228 "MATCH (f:Function {name: 'start'})<-[:CALLS*1..3]-(c:CallSite) RETURN c.path AS path",
1229 &g,
1230 )
1231 .unwrap();
1232 assert!(!rows.is_empty(), "expected at least one call-site caller");
1233 }
1234
1235 #[test]
1236 fn where_predicate_filters_results() {
1237 let g = fixture();
1238 let rows = execute(
1239 "MATCH (f:Function) WHERE f.name = 'driver' RETURN f.path AS path",
1240 &g,
1241 )
1242 .unwrap();
1243 assert_eq!(rows.len(), 1);
1244 assert_eq!(
1245 rows[0].get("path"),
1246 Some(&CypherValue::String("src/a.rs".into()))
1247 );
1248 }
1249
1250 #[test]
1251 fn literal_default_aliases_do_not_collide() {
1252 let g = fixture();
1257 let rows = execute("MATCH (f:Function {name: 'start'}) RETURN 1, 2, 3", &g).unwrap();
1258 assert_eq!(rows.len(), 1);
1259 let row = &rows[0];
1260 assert_eq!(row.get("value"), Some(&CypherValue::Int(1)));
1261 assert_eq!(row.get("value_2"), Some(&CypherValue::Int(2)));
1262 assert_eq!(row.get("value_3"), Some(&CypherValue::Int(3)));
1263 }
1264
1265 #[test]
1266 fn duplicate_explicit_aliases_are_rejected() {
1267 let g = fixture();
1268 let err = execute("MATCH (f:Function) RETURN f.name AS n, f.path AS n", &g).unwrap_err();
1269 assert!(
1270 matches!(err, CypherError::ParseError(_)),
1271 "expected ParseError for duplicate explicit alias, got {err:?}"
1272 );
1273 }
1274
1275 #[test]
1276 fn rejects_too_many_disjoint_patterns() {
1277 let g = fixture();
1281 let err = execute(
1282 "MATCH (a:Function),(b:Function),(c:Function),(d:Function) RETURN a.name",
1283 &g,
1284 )
1285 .unwrap_err();
1286 assert!(
1287 matches!(&err, CypherError::ParseError(msg) if msg.contains("too many disjoint MATCH patterns")),
1288 "expected ParseError for too many patterns, got {err:?}"
1289 );
1290 }
1291
1292 #[test]
1293 fn enforces_row_budget_on_cartesian_explosion() {
1294 let mut g = SymbolGraph::new();
1298 let mut source = String::new();
1299 for i in 0..25 {
1300 source.push_str(&format!("fn f{i}() {{}}\n"));
1301 }
1302 g.rebuild_file(1, "src/big.rs", Language::Rust, &source, &[]);
1303
1304 let err = execute(
1305 "MATCH (a:Function),(b:Function),(c:Function) RETURN a.name AS n",
1306 &g,
1307 )
1308 .unwrap_err();
1309 assert!(
1310 matches!(&err, CypherError::ExecError(msg) if msg.contains("row budget exceeded")),
1311 "expected ExecError for row budget, got {err:?}"
1312 );
1313 }
1314
1315 #[test]
1316 fn open_upper_bound_defaults_to_depth_cap() {
1317 let toks = lex("MATCH (a:Function)-[:CALLS*1..]->(b:Function) RETURN a.name AS n").unwrap();
1319 let q = parse(&toks).unwrap();
1320 let step = &q.matches[0].steps[0];
1321 assert_eq!(step.var_length, Some((1, VAR_LENGTH_MAX_DEPTH)));
1322
1323 let toks = lex("MATCH (a:Function)-[:CALLS*2..]->(b:Function) RETURN a.name AS n").unwrap();
1325 let q = parse(&toks).unwrap();
1326 let step = &q.matches[0].steps[0];
1327 assert_eq!(step.var_length, Some((2, VAR_LENGTH_MAX_DEPTH)));
1328 }
1329
1330 #[test]
1331 fn rejects_depth_above_four() {
1332 let g = fixture();
1333 let err = execute(
1334 "MATCH (a:Function)-[:CALLS*1..6]->(b:Function) RETURN a.name AS n",
1335 &g,
1336 )
1337 .unwrap_err();
1338 assert!(
1339 matches!(err, CypherError::ExecError(_)),
1340 "expected ExecError, got {err:?}"
1341 );
1342 }
1343}