1use std::collections::{BTreeMap, HashMap, HashSet};
43use std::fmt;
44
45use harn_vm::VmValue;
46
47use super::symbol_graph::{EdgeKind, NodeId, NodeKind, SymbolGraph};
48
49pub const MAX_ROWS: usize = 10_000;
56
57pub const MAX_PATTERNS: usize = 3;
64
65const VAR_LENGTH_MAX_DEPTH: u32 = 4;
69
70#[derive(Debug, Clone, Eq, PartialEq)]
75pub enum CypherError {
76 LexError(String),
78 ParseError(String),
80 ExecError(String),
83}
84
85impl fmt::Display for CypherError {
86 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
87 match self {
88 CypherError::LexError(s) => write!(f, "lex error: {s}"),
89 CypherError::ParseError(s) => write!(f, "parse error: {s}"),
90 CypherError::ExecError(s) => write!(f, "exec error: {s}"),
91 }
92 }
93}
94
95impl std::error::Error for CypherError {}
96
97#[derive(Debug, Clone, Eq, PartialEq)]
99pub enum CypherValue {
100 Null,
102 String(String),
104 Int(i64),
106 Bool(bool),
108}
109
110impl CypherValue {
111 pub fn to_vm(&self) -> VmValue {
113 match self {
114 CypherValue::Null => VmValue::Nil,
115 CypherValue::String(s) => VmValue::String(arcstr::ArcStr::from(s.as_str())),
116 CypherValue::Int(n) => VmValue::Int(*n),
117 CypherValue::Bool(b) => VmValue::Bool(*b),
118 }
119 }
120}
121
122pub type CypherRow = BTreeMap<String, CypherValue>;
124
125pub fn execute(query: &str, graph: &SymbolGraph) -> Result<Vec<CypherRow>, CypherError> {
127 let tokens = lex(query)?;
128 let ast = parse(&tokens)?;
129 exec(&ast, graph)
130}
131
132#[derive(Debug, Clone, PartialEq)]
137enum Token {
138 Keyword(String),
140 Ident(String),
141 Str(String),
142 Int(i64),
143 LParen,
144 RParen,
145 LBrace,
146 RBrace,
147 LBracket,
148 RBracket,
149 Colon,
150 Comma,
151 Dot,
152 Eq,
153 Neq,
154 Lt,
155 Le,
156 Gt,
157 Ge,
158 Star,
159 DotDot,
160 Dash,
161 Arrow, LeftArrow, }
164
165fn lex(input: &str) -> Result<Vec<Token>, CypherError> {
166 let mut out: Vec<Token> = Vec::new();
167 let bytes = input.as_bytes();
168 let mut i = 0;
169 while i < bytes.len() {
170 let b = bytes[i];
171 if b.is_ascii_whitespace() {
172 i += 1;
173 continue;
174 }
175 if b == b'(' {
176 out.push(Token::LParen);
177 i += 1;
178 continue;
179 }
180 if b == b')' {
181 out.push(Token::RParen);
182 i += 1;
183 continue;
184 }
185 if b == b'{' {
186 out.push(Token::LBrace);
187 i += 1;
188 continue;
189 }
190 if b == b'}' {
191 out.push(Token::RBrace);
192 i += 1;
193 continue;
194 }
195 if b == b'[' {
196 out.push(Token::LBracket);
197 i += 1;
198 continue;
199 }
200 if b == b']' {
201 out.push(Token::RBracket);
202 i += 1;
203 continue;
204 }
205 if b == b':' {
206 out.push(Token::Colon);
207 i += 1;
208 continue;
209 }
210 if b == b',' {
211 out.push(Token::Comma);
212 i += 1;
213 continue;
214 }
215 if b == b'*' {
216 out.push(Token::Star);
217 i += 1;
218 continue;
219 }
220 if b == b'=' {
221 out.push(Token::Eq);
222 i += 1;
223 continue;
224 }
225 if b == b'.' {
226 if i + 1 < bytes.len() && bytes[i + 1] == b'.' {
227 out.push(Token::DotDot);
228 i += 2;
229 } else {
230 out.push(Token::Dot);
231 i += 1;
232 }
233 continue;
234 }
235 if b == b'<' {
236 if i + 1 < bytes.len() && bytes[i + 1] == b'-' {
237 out.push(Token::LeftArrow);
238 i += 2;
239 } else if i + 1 < bytes.len() && bytes[i + 1] == b'=' {
240 out.push(Token::Le);
241 i += 2;
242 } else if i + 1 < bytes.len() && bytes[i + 1] == b'>' {
243 out.push(Token::Neq);
244 i += 2;
245 } else {
246 out.push(Token::Lt);
247 i += 1;
248 }
249 continue;
250 }
251 if b == b'>' {
252 if i + 1 < bytes.len() && bytes[i + 1] == b'=' {
253 out.push(Token::Ge);
254 i += 2;
255 } else {
256 out.push(Token::Gt);
257 i += 1;
258 }
259 continue;
260 }
261 if b == b'!' {
262 if i + 1 < bytes.len() && bytes[i + 1] == b'=' {
263 out.push(Token::Neq);
264 i += 2;
265 continue;
266 }
267 return Err(CypherError::LexError(format!(
268 "unexpected '!' at byte {i} (expected '!=')"
269 )));
270 }
271 if b == b'-' {
272 if i + 1 < bytes.len() && bytes[i + 1] == b'>' {
273 out.push(Token::Arrow);
274 i += 2;
275 } else {
276 out.push(Token::Dash);
277 i += 1;
278 }
279 continue;
280 }
281 if b == b'\'' || b == b'"' {
282 let quote = b;
283 let start = i + 1;
284 i += 1;
285 while i < bytes.len() && bytes[i] != quote {
286 if bytes[i] == b'\\' && i + 1 < bytes.len() {
287 i += 2;
288 } else {
289 i += 1;
290 }
291 }
292 if i >= bytes.len() {
293 return Err(CypherError::LexError("unterminated string literal".into()));
294 }
295 let raw = &input[start..i];
296 out.push(Token::Str(unescape(raw)));
297 i += 1;
298 continue;
299 }
300 if b.is_ascii_digit() {
301 let start = i;
302 while i < bytes.len() && bytes[i].is_ascii_digit() {
303 i += 1;
304 }
305 let n: i64 = input[start..i].parse().map_err(|err| {
306 CypherError::LexError(format!("bad integer at byte {start}: {err}"))
307 })?;
308 out.push(Token::Int(n));
309 continue;
310 }
311 if b.is_ascii_alphabetic() || b == b'_' {
312 let start = i;
313 while i < bytes.len() && (bytes[i].is_ascii_alphanumeric() || bytes[i] == b'_') {
314 i += 1;
315 }
316 let word = &input[start..i];
317 let upper = word.to_ascii_uppercase();
318 if is_keyword(&upper) {
319 out.push(Token::Keyword(upper));
320 } else {
321 out.push(Token::Ident(word.to_string()));
322 }
323 continue;
324 }
325 return Err(CypherError::LexError(format!(
326 "unexpected character `{}` at byte {i}",
327 char::from(b)
328 )));
329 }
330 Ok(out)
331}
332
333fn is_keyword(word: &str) -> bool {
334 matches!(
335 word,
336 "MATCH" | "WHERE" | "RETURN" | "AND" | "OR" | "AS" | "NOT" | "TRUE" | "FALSE"
337 )
338}
339
340fn unescape(raw: &str) -> String {
341 let mut out = String::with_capacity(raw.len());
342 let mut iter = raw.chars();
343 while let Some(c) = iter.next() {
344 if c == '\\' {
345 if let Some(next) = iter.next() {
346 out.push(match next {
347 'n' => '\n',
348 't' => '\t',
349 'r' => '\r',
350 '\\' => '\\',
351 '\'' => '\'',
352 '"' => '"',
353 other => other,
354 });
355 }
356 } else {
357 out.push(c);
358 }
359 }
360 out
361}
362
363#[derive(Debug, Clone)]
368struct Query {
369 matches: Vec<Pattern>,
370 where_clause: Option<Expr>,
371 projections: Vec<Projection>,
372}
373
374#[derive(Debug, Clone)]
375struct Pattern {
376 head: NodePat,
377 steps: Vec<RelStep>,
378}
379
380#[derive(Debug, Clone)]
381struct NodePat {
382 var: String,
383 label: Option<String>,
384 props: BTreeMap<String, Literal>,
385}
386
387#[derive(Debug, Clone)]
388struct RelStep {
389 forward: bool,
391 edge_label: String,
392 var_length: Option<(u32, u32)>,
393 target: NodePat,
394}
395
396#[derive(Debug, Clone)]
397enum Expr {
398 And(Box<Expr>, Box<Expr>),
399 Or(Box<Expr>, Box<Expr>),
400 Not(Box<Expr>),
401 Compare(Operand, CmpOp, Operand),
402 Bool(bool),
403}
404
405#[derive(Debug, Clone, Copy, PartialEq, Eq)]
406enum CmpOp {
407 Eq,
408 Neq,
409 Lt,
410 Le,
411 Gt,
412 Ge,
413}
414
415#[derive(Debug, Clone)]
416enum Operand {
417 Path {
419 var: String,
420 property: Option<String>,
421 },
422 Literal(Literal),
423}
424
425#[derive(Debug, Clone, PartialEq, Eq)]
426enum Literal {
427 Str(String),
428 Int(i64),
429 Bool(bool),
430}
431
432#[derive(Debug, Clone)]
433struct Projection {
434 operand: Operand,
435 alias: String,
439 explicit_alias: bool,
444}
445
446struct Parser<'a> {
451 tokens: &'a [Token],
452 pos: usize,
453}
454
455fn parse(tokens: &[Token]) -> Result<Query, CypherError> {
456 let mut p = Parser { tokens, pos: 0 };
457 let q = p.parse_query()?;
458 if p.pos != tokens.len() {
459 return Err(CypherError::ParseError(format!(
460 "trailing tokens after RETURN at position {} ({:?})",
461 p.pos, tokens[p.pos]
462 )));
463 }
464 Ok(q)
465}
466
467impl<'a> Parser<'a> {
468 fn peek(&self) -> Option<&Token> {
469 self.tokens.get(self.pos)
470 }
471
472 fn advance(&mut self) -> Option<&Token> {
473 let t = self.tokens.get(self.pos);
474 self.pos += 1;
475 t
476 }
477
478 fn expect_keyword(&mut self, word: &str) -> Result<(), CypherError> {
479 match self.advance() {
480 Some(Token::Keyword(k)) if k == word => Ok(()),
481 other => Err(CypherError::ParseError(format!(
482 "expected `{word}`, got {other:?}"
483 ))),
484 }
485 }
486
487 fn match_keyword(&mut self, word: &str) -> bool {
488 if matches!(self.peek(), Some(Token::Keyword(k)) if k == word) {
489 self.pos += 1;
490 true
491 } else {
492 false
493 }
494 }
495
496 fn expect(&mut self, want: &Token) -> Result<(), CypherError> {
497 let next = self.advance();
498 if next == Some(want) {
499 Ok(())
500 } else {
501 Err(CypherError::ParseError(format!(
502 "expected {want:?}, got {next:?}"
503 )))
504 }
505 }
506
507 fn parse_query(&mut self) -> Result<Query, CypherError> {
508 self.expect_keyword("MATCH")?;
509 let mut matches = vec![self.parse_pattern()?];
510 while matches!(self.peek(), Some(Token::Comma)) {
511 self.advance();
512 matches.push(self.parse_pattern()?);
513 if matches.len() > MAX_PATTERNS {
514 return Err(CypherError::ParseError(format!(
515 "too many disjoint MATCH patterns (got {}, cap is {}); \
516 join through edge syntax (`-[:KIND]->`) instead of a cartesian product",
517 matches.len(),
518 MAX_PATTERNS
519 )));
520 }
521 }
522 let where_clause = if self.match_keyword("WHERE") {
523 Some(self.parse_expr()?)
524 } else {
525 None
526 };
527 self.expect_keyword("RETURN")?;
528 let mut projections = vec![self.parse_projection()?];
529 while matches!(self.peek(), Some(Token::Comma)) {
530 self.advance();
531 projections.push(self.parse_projection()?);
532 }
533 resolve_projection_aliases(&mut projections)?;
534 Ok(Query {
535 matches,
536 where_clause,
537 projections,
538 })
539 }
540
541 fn parse_pattern(&mut self) -> Result<Pattern, CypherError> {
542 let head = self.parse_node_pat()?;
543 let mut steps: Vec<RelStep> = Vec::new();
544 loop {
545 let forward = match self.peek() {
547 Some(Token::Dash) => true,
548 Some(Token::LeftArrow) => false,
549 _ => break,
550 };
551 self.advance(); self.expect(&Token::LBracket)?;
553 self.expect(&Token::Colon)?;
554 let edge_label = match self.advance() {
555 Some(Token::Ident(s)) => s.clone(),
556 other => {
557 return Err(CypherError::ParseError(format!(
558 "expected edge label, got {other:?}"
559 )))
560 }
561 };
562 let var_length = if matches!(self.peek(), Some(Token::Star)) {
563 self.advance();
564 let lo = if matches!(self.peek(), Some(Token::Int(_))) {
565 if let Some(Token::Int(n)) = self.advance() {
566 Some(*n)
567 } else {
568 None
569 }
570 } else {
571 None
572 };
573 let bounds = if matches!(self.peek(), Some(Token::DotDot)) {
574 self.advance();
575 let hi = if matches!(self.peek(), Some(Token::Int(_))) {
576 if let Some(Token::Int(n)) = self.advance() {
577 Some(*n)
578 } else {
579 None
580 }
581 } else {
582 None
583 };
584 let hi_v = hi.map(|n| n as u32).unwrap_or(VAR_LENGTH_MAX_DEPTH);
588 (lo.unwrap_or(1) as u32, hi_v)
589 } else {
590 let lo_v = lo.unwrap_or(1) as u32;
592 (lo_v, lo_v.max(3))
593 };
594 Some(bounds)
595 } else {
596 None
597 };
598 self.expect(&Token::RBracket)?;
599 if forward {
600 self.expect(&Token::Arrow)?;
601 } else {
602 self.expect(&Token::Dash)?;
603 }
604 let target = self.parse_node_pat()?;
605 steps.push(RelStep {
606 forward,
607 edge_label,
608 var_length,
609 target,
610 });
611 }
612 Ok(Pattern { head, steps })
613 }
614
615 fn parse_node_pat(&mut self) -> Result<NodePat, CypherError> {
616 self.expect(&Token::LParen)?;
617 let var = match self.advance() {
618 Some(Token::Ident(s)) => s.clone(),
619 other => {
620 return Err(CypherError::ParseError(format!(
621 "expected node variable, got {other:?}"
622 )))
623 }
624 };
625 let label = if matches!(self.peek(), Some(Token::Colon)) {
626 self.advance();
627 match self.advance() {
628 Some(Token::Ident(s)) => Some(s.clone()),
629 other => {
630 return Err(CypherError::ParseError(format!(
631 "expected node label, got {other:?}"
632 )))
633 }
634 }
635 } else {
636 None
637 };
638 let props = if matches!(self.peek(), Some(Token::LBrace)) {
639 self.parse_props()?
640 } else {
641 BTreeMap::new()
642 };
643 self.expect(&Token::RParen)?;
644 Ok(NodePat { var, label, props })
645 }
646
647 fn parse_props(&mut self) -> Result<BTreeMap<String, Literal>, CypherError> {
648 self.expect(&Token::LBrace)?;
649 let mut props: BTreeMap<String, Literal> = BTreeMap::new();
650 loop {
651 let key = match self.advance() {
652 Some(Token::Ident(s)) => s.clone(),
653 other => {
654 return Err(CypherError::ParseError(format!(
655 "expected property key, got {other:?}"
656 )))
657 }
658 };
659 self.expect(&Token::Colon)?;
660 let value = self.parse_literal()?;
661 props.insert(key, value);
662 match self.peek() {
663 Some(Token::Comma) => {
664 self.advance();
665 }
666 Some(Token::RBrace) => break,
667 other => {
668 return Err(CypherError::ParseError(format!(
669 "expected ',' or '}}', got {other:?}"
670 )))
671 }
672 }
673 }
674 self.expect(&Token::RBrace)?;
675 Ok(props)
676 }
677
678 fn parse_literal(&mut self) -> Result<Literal, CypherError> {
679 match self.advance() {
680 Some(Token::Str(s)) => Ok(Literal::Str(s.clone())),
681 Some(Token::Int(n)) => Ok(Literal::Int(*n)),
682 Some(Token::Keyword(k)) if k == "TRUE" => Ok(Literal::Bool(true)),
683 Some(Token::Keyword(k)) if k == "FALSE" => Ok(Literal::Bool(false)),
684 other => Err(CypherError::ParseError(format!(
685 "expected literal, got {other:?}"
686 ))),
687 }
688 }
689
690 fn parse_expr(&mut self) -> Result<Expr, CypherError> {
691 let mut left = self.parse_and_expr()?;
692 while self.match_keyword("OR") {
693 let right = self.parse_and_expr()?;
694 left = Expr::Or(Box::new(left), Box::new(right));
695 }
696 Ok(left)
697 }
698
699 fn parse_and_expr(&mut self) -> Result<Expr, CypherError> {
700 let mut left = self.parse_unary_expr()?;
701 while self.match_keyword("AND") {
702 let right = self.parse_unary_expr()?;
703 left = Expr::And(Box::new(left), Box::new(right));
704 }
705 Ok(left)
706 }
707
708 fn parse_unary_expr(&mut self) -> Result<Expr, CypherError> {
709 if self.match_keyword("NOT") {
710 let inner = self.parse_unary_expr()?;
711 return Ok(Expr::Not(Box::new(inner)));
712 }
713 self.parse_compare()
714 }
715
716 fn parse_compare(&mut self) -> Result<Expr, CypherError> {
717 if matches!(
718 self.peek(),
719 Some(Token::Keyword(k)) if k == "TRUE" || k == "FALSE"
720 ) {
721 let lit = self.parse_literal()?;
722 return Ok(match lit {
723 Literal::Bool(b) => Expr::Bool(b),
724 _ => unreachable!(),
725 });
726 }
727 let left = self.parse_operand()?;
728 let op = match self.peek() {
729 Some(Token::Eq) => Some(CmpOp::Eq),
730 Some(Token::Neq) => Some(CmpOp::Neq),
731 Some(Token::Lt) => Some(CmpOp::Lt),
732 Some(Token::Le) => Some(CmpOp::Le),
733 Some(Token::Gt) => Some(CmpOp::Gt),
734 Some(Token::Ge) => Some(CmpOp::Ge),
735 _ => None,
736 };
737 let Some(op) = op else {
738 return Err(CypherError::ParseError(
739 "expected comparison operator in WHERE clause".into(),
740 ));
741 };
742 self.advance();
743 let right = self.parse_operand()?;
744 Ok(Expr::Compare(left, op, right))
745 }
746
747 fn parse_operand(&mut self) -> Result<Operand, CypherError> {
748 match self.peek() {
749 Some(Token::Str(_)) | Some(Token::Int(_)) => {
750 let lit = self.parse_literal()?;
751 Ok(Operand::Literal(lit))
752 }
753 Some(Token::Keyword(k)) if k == "TRUE" || k == "FALSE" => {
754 let lit = self.parse_literal()?;
755 Ok(Operand::Literal(lit))
756 }
757 Some(Token::Ident(_)) => {
758 let var = if let Some(Token::Ident(s)) = self.advance() {
759 s.clone()
760 } else {
761 unreachable!()
762 };
763 let property = if matches!(self.peek(), Some(Token::Dot)) {
764 self.advance();
765 match self.advance() {
766 Some(Token::Ident(s)) => Some(s.clone()),
767 other => {
768 return Err(CypherError::ParseError(format!(
769 "expected property name after '.', got {other:?}"
770 )))
771 }
772 }
773 } else {
774 None
775 };
776 Ok(Operand::Path { var, property })
777 }
778 other => Err(CypherError::ParseError(format!(
779 "expected operand, got {other:?}"
780 ))),
781 }
782 }
783
784 fn parse_projection(&mut self) -> Result<Projection, CypherError> {
785 let operand = self.parse_operand()?;
786 let alias = if self.match_keyword("AS") {
787 match self.advance() {
788 Some(Token::Ident(s)) => Some(s.clone()),
789 other => {
790 return Err(CypherError::ParseError(format!(
791 "expected alias after AS, got {other:?}"
792 )))
793 }
794 }
795 } else {
796 None
797 };
798 let explicit_alias = alias.is_some();
799 Ok(Projection {
800 operand,
801 alias: alias.unwrap_or_default(),
802 explicit_alias,
803 })
804 }
805}
806
807fn default_alias(operand: &Operand) -> String {
808 match operand {
809 Operand::Path {
810 var,
811 property: None,
812 } => var.clone(),
813 Operand::Path {
814 var,
815 property: Some(p),
816 } => format!("{var}.{p}"),
817 Operand::Literal(_) => "value".to_string(),
818 }
819}
820
821fn resolve_projection_aliases(projections: &mut [Projection]) -> Result<(), CypherError> {
833 use std::collections::HashSet;
834
835 let mut seen: HashSet<String> = HashSet::new();
837 for proj in projections.iter() {
838 if proj.explicit_alias && !seen.insert(proj.alias.clone()) {
839 return Err(CypherError::ParseError(format!(
840 "duplicate alias `{}` in RETURN clause",
841 proj.alias
842 )));
843 }
844 }
845
846 let mut literal_count: usize = 0;
848 for proj in projections.iter_mut() {
849 if proj.explicit_alias {
850 continue;
851 }
852 let base = default_alias(&proj.operand);
853 let is_literal_default = matches!(proj.operand, Operand::Literal(_));
854 let mut candidate = base.clone();
855 if is_literal_default {
856 literal_count += 1;
857 if literal_count >= 2 {
858 candidate = format!("{base}_{literal_count}");
859 }
860 }
861 let mut bump: usize = 2;
864 while seen.contains(&candidate) {
865 candidate = format!("{base}_{bump}");
866 bump += 1;
867 }
868 seen.insert(candidate.clone());
869 proj.alias = candidate;
870 }
871 Ok(())
872}
873
874fn exec(query: &Query, graph: &SymbolGraph) -> Result<Vec<CypherRow>, CypherError> {
879 let mut all_rows: Vec<HashMap<String, NodeId>> = vec![HashMap::new()];
880 for pattern in &query.matches {
881 let mut new_rows: Vec<HashMap<String, NodeId>> = Vec::new();
882 for binding in &all_rows {
883 let candidates = match_pattern(pattern, graph, binding)?;
884 for cand in candidates {
885 let mut merged = binding.clone();
886 for (k, v) in cand {
887 merged.insert(k, v);
888 }
889 new_rows.push(merged);
890 if new_rows.len() > MAX_ROWS {
895 return Err(CypherError::ExecError(format!(
896 "row budget exceeded ({} > {}); narrow the MATCH or add a WHERE clause",
897 new_rows.len(),
898 MAX_ROWS
899 )));
900 }
901 }
902 }
903 all_rows = new_rows;
904 }
905
906 let mut out: Vec<CypherRow> = Vec::new();
907 for binding in all_rows {
908 if let Some(expr) = &query.where_clause {
909 if !eval_bool(expr, &binding, graph)? {
910 continue;
911 }
912 }
913 let mut row: CypherRow = BTreeMap::new();
914 for proj in &query.projections {
915 let v = eval_operand(&proj.operand, &binding, graph)?;
916 row.insert(proj.alias.clone(), v);
917 }
918 out.push(row);
919 if out.len() > MAX_ROWS {
923 return Err(CypherError::ExecError(format!(
924 "row budget exceeded ({} > {}); narrow the MATCH or add a WHERE clause",
925 out.len(),
926 MAX_ROWS
927 )));
928 }
929 }
930 Ok(out)
931}
932
933fn match_pattern(
934 pattern: &Pattern,
935 graph: &SymbolGraph,
936 seed: &HashMap<String, NodeId>,
937) -> Result<Vec<HashMap<String, NodeId>>, CypherError> {
938 let head_candidates: Vec<NodeId> = if let Some(existing) = seed.get(&pattern.head.var) {
939 if node_matches(graph, *existing, &pattern.head) {
941 vec![*existing]
942 } else {
943 vec![]
944 }
945 } else {
946 candidate_nodes(graph, &pattern.head)
947 };
948
949 let mut out: Vec<HashMap<String, NodeId>> = Vec::new();
950 for head_id in head_candidates {
951 let mut binding: HashMap<String, NodeId> = HashMap::new();
952 binding.insert(pattern.head.var.clone(), head_id);
953 extend_steps(graph, &pattern.steps, 0, head_id, binding, &mut out)?;
954 }
955 Ok(out)
956}
957
958fn extend_steps(
959 graph: &SymbolGraph,
960 steps: &[RelStep],
961 idx: usize,
962 cursor: NodeId,
963 binding: HashMap<String, NodeId>,
964 out: &mut Vec<HashMap<String, NodeId>>,
965) -> Result<(), CypherError> {
966 if idx == steps.len() {
967 out.push(binding);
968 return Ok(());
969 }
970 let step = &steps[idx];
971 let (kind, label_reversed) =
972 EdgeKind::parse_with_direction(&step.edge_label).ok_or_else(|| {
973 CypherError::ExecError(format!("unknown edge label `{}`", step.edge_label))
974 })?;
975 let forward = step.forward ^ label_reversed;
977 let (lo, hi) = step.var_length.unwrap_or((1, 1));
978 if hi > VAR_LENGTH_MAX_DEPTH {
979 return Err(CypherError::ExecError(format!(
980 "variable-length traversal capped at depth {VAR_LENGTH_MAX_DEPTH} (got `*{lo}..{hi}`)"
981 )));
982 }
983 let lo = lo.max(1);
984 let hi = hi.max(lo);
985
986 let mut visited: HashSet<NodeId> = HashSet::new();
988 visited.insert(cursor);
989 let mut stack: Vec<(NodeId, u32, HashSet<NodeId>)> = vec![(cursor, 0, visited)];
990 while let Some((node, depth, visited)) = stack.pop() {
991 if depth >= hi {
992 continue;
993 }
994 let edges = if forward {
995 graph.outgoing(node)
996 } else {
997 graph.incoming(node)
998 };
999 for edge in edges {
1000 if edge.kind != kind {
1001 continue;
1002 }
1003 let next = if forward { edge.to } else { edge.from };
1004 if visited.contains(&next) {
1005 continue;
1006 }
1007 let new_depth = depth + 1;
1008 if new_depth >= lo && node_matches(graph, next, &step.target) {
1009 let already_bound = binding.get(&step.target.var);
1010 if matches!(already_bound, Some(id) if *id != next) {
1011 } else {
1013 let mut next_binding = binding.clone();
1014 next_binding.insert(step.target.var.clone(), next);
1015 extend_steps(graph, steps, idx + 1, next, next_binding, out)?;
1016 }
1017 }
1018 if new_depth < hi {
1019 let mut next_visited = visited.clone();
1020 next_visited.insert(next);
1021 stack.push((next, new_depth, next_visited));
1022 }
1023 }
1024 }
1025 Ok(())
1026}
1027
1028fn candidate_nodes(graph: &SymbolGraph, pat: &NodePat) -> Vec<NodeId> {
1029 if let Some(Literal::Str(name)) = pat.props.get("name") {
1031 return graph
1032 .nodes_named(name)
1033 .iter()
1034 .copied()
1035 .filter(|nid| node_matches(graph, *nid, pat))
1036 .collect();
1037 }
1038 if let Some(label) = &pat.label {
1039 let kind = NodeKind::parse(label).unwrap_or(NodeKind::Module);
1040 return graph
1041 .nodes_of_kind(kind)
1042 .into_iter()
1043 .filter(|nid| node_matches(graph, *nid, pat))
1044 .collect();
1045 }
1046 graph
1047 .all_node_ids()
1048 .into_iter()
1049 .filter(|nid| node_matches(graph, *nid, pat))
1050 .collect()
1051}
1052
1053fn node_matches(graph: &SymbolGraph, id: NodeId, pat: &NodePat) -> bool {
1054 let Some(node) = graph.node(id) else {
1055 return false;
1056 };
1057 if let Some(label) = &pat.label {
1058 let Some(kind) = NodeKind::parse(label) else {
1059 return false;
1060 };
1061 if node.kind != kind {
1062 return false;
1063 }
1064 }
1065 for (key, expected) in &pat.props {
1066 let actual = property_value(node, key);
1067 if &actual != expected {
1068 return false;
1069 }
1070 }
1071 true
1072}
1073
1074fn property_value(node: &super::symbol_graph::Node, key: &str) -> Literal {
1075 match key {
1076 "name" => Literal::Str(node.name.clone()),
1077 "path" => Literal::Str(node.path.clone()),
1078 "language" => Literal::Str(node.language.clone()),
1079 "kind" => Literal::Str(node.kind.as_str().to_string()),
1080 "container" => Literal::Str(node.container.clone().unwrap_or_default()),
1081 "signature" => Literal::Str(node.signature.clone()),
1082 "line" => Literal::Int(node.line as i64),
1083 "file_id" => Literal::Int(node.file_id as i64),
1084 "id" => Literal::Int(node.id as i64),
1085 _ => Literal::Str(String::new()),
1086 }
1087}
1088
1089fn eval_bool(
1090 expr: &Expr,
1091 binding: &HashMap<String, NodeId>,
1092 graph: &SymbolGraph,
1093) -> Result<bool, CypherError> {
1094 match expr {
1095 Expr::Bool(b) => Ok(*b),
1096 Expr::And(a, b) => Ok(eval_bool(a, binding, graph)? && eval_bool(b, binding, graph)?),
1097 Expr::Or(a, b) => Ok(eval_bool(a, binding, graph)? || eval_bool(b, binding, graph)?),
1098 Expr::Not(inner) => Ok(!eval_bool(inner, binding, graph)?),
1099 Expr::Compare(l, op, r) => {
1100 let lv = lookup_operand(l, binding, graph)?;
1101 let rv = lookup_operand(r, binding, graph)?;
1102 Ok(apply_cmp(&lv, *op, &rv))
1103 }
1104 }
1105}
1106
1107fn apply_cmp(left: &Literal, op: CmpOp, right: &Literal) -> bool {
1108 match (left, right) {
1109 (Literal::Int(a), Literal::Int(b)) => match op {
1110 CmpOp::Eq => a == b,
1111 CmpOp::Neq => a != b,
1112 CmpOp::Lt => a < b,
1113 CmpOp::Le => a <= b,
1114 CmpOp::Gt => a > b,
1115 CmpOp::Ge => a >= b,
1116 },
1117 (Literal::Str(a), Literal::Str(b)) => match op {
1118 CmpOp::Eq => a == b,
1119 CmpOp::Neq => a != b,
1120 CmpOp::Lt => a < b,
1121 CmpOp::Le => a <= b,
1122 CmpOp::Gt => a > b,
1123 CmpOp::Ge => a >= b,
1124 },
1125 (Literal::Bool(a), Literal::Bool(b)) => match op {
1126 CmpOp::Eq => a == b,
1127 CmpOp::Neq => a != b,
1128 _ => false,
1129 },
1130 _ => matches!(op, CmpOp::Neq),
1131 }
1132}
1133
1134fn lookup_operand(
1135 op: &Operand,
1136 binding: &HashMap<String, NodeId>,
1137 graph: &SymbolGraph,
1138) -> Result<Literal, CypherError> {
1139 match op {
1140 Operand::Literal(lit) => Ok(lit.clone()),
1141 Operand::Path { var, property } => {
1142 let id = binding.get(var).copied().ok_or_else(|| {
1143 CypherError::ExecError(format!("unbound variable `{var}` in expression"))
1144 })?;
1145 let node = graph
1146 .node(id)
1147 .ok_or_else(|| CypherError::ExecError(format!("node id {id} not in graph")))?;
1148 match property {
1149 Some(p) => Ok(property_value(node, p)),
1150 None => Ok(Literal::Str(node.name.clone())),
1151 }
1152 }
1153 }
1154}
1155
1156fn eval_operand(
1157 op: &Operand,
1158 binding: &HashMap<String, NodeId>,
1159 graph: &SymbolGraph,
1160) -> Result<CypherValue, CypherError> {
1161 let lit = lookup_operand(op, binding, graph)?;
1162 Ok(match lit {
1163 Literal::Str(s) => {
1164 if s.is_empty() {
1165 CypherValue::Null
1166 } else {
1167 CypherValue::String(s)
1168 }
1169 }
1170 Literal::Int(n) => CypherValue::Int(n),
1171 Literal::Bool(b) => CypherValue::Bool(b),
1172 })
1173}
1174
1175#[cfg(test)]
1176mod tests {
1177 use super::*;
1178 use crate::ast::Language;
1179
1180 fn fixture() -> SymbolGraph {
1181 let mut g = SymbolGraph::new();
1182 g.rebuild_file(
1183 1,
1184 "src/a.rs",
1185 Language::Rust,
1186 "fn start() {}\nfn driver() { start(); }\n",
1187 &[],
1188 );
1189 g.rebuild_file(
1190 2,
1191 "src/b.rs",
1192 Language::Rust,
1193 "fn entry() { driver(); }\n",
1194 &[],
1195 );
1196 g
1197 }
1198
1199 #[test]
1200 fn lexer_recognises_arrows_and_keywords() {
1201 let toks = lex("MATCH (a)-[:CALLS]->(b) RETURN a.name").unwrap();
1202 assert!(toks.iter().any(|t| matches!(t, Token::Arrow)));
1203 assert!(toks
1204 .iter()
1205 .any(|t| matches!(t, Token::Keyword(k) if k == "MATCH")));
1206 }
1207
1208 #[test]
1209 fn returns_function_by_name() {
1210 let g = fixture();
1211 let rows = execute(
1212 "MATCH (f:Function {name: 'start'}) RETURN f.path AS path, f.line AS line",
1213 &g,
1214 )
1215 .unwrap();
1216 assert_eq!(rows.len(), 1);
1217 assert_eq!(
1218 rows[0].get("path"),
1219 Some(&CypherValue::String("src/a.rs".into()))
1220 );
1221 }
1222
1223 #[test]
1224 fn called_by_var_length_finds_indirect_callers() {
1225 let g = fixture();
1226 let rows = execute(
1227 "MATCH (f:Function {name: 'start'})<-[:CALLS*1..3]-(c:CallSite) RETURN c.path AS path",
1228 &g,
1229 )
1230 .unwrap();
1231 assert!(!rows.is_empty(), "expected at least one call-site caller");
1232 }
1233
1234 #[test]
1235 fn where_predicate_filters_results() {
1236 let g = fixture();
1237 let rows = execute(
1238 "MATCH (f:Function) WHERE f.name = 'driver' RETURN f.path AS path",
1239 &g,
1240 )
1241 .unwrap();
1242 assert_eq!(rows.len(), 1);
1243 assert_eq!(
1244 rows[0].get("path"),
1245 Some(&CypherValue::String("src/a.rs".into()))
1246 );
1247 }
1248
1249 #[test]
1250 fn literal_default_aliases_do_not_collide() {
1251 let g = fixture();
1256 let rows = execute("MATCH (f:Function {name: 'start'}) RETURN 1, 2, 3", &g).unwrap();
1257 assert_eq!(rows.len(), 1);
1258 let row = &rows[0];
1259 assert_eq!(row.get("value"), Some(&CypherValue::Int(1)));
1260 assert_eq!(row.get("value_2"), Some(&CypherValue::Int(2)));
1261 assert_eq!(row.get("value_3"), Some(&CypherValue::Int(3)));
1262 }
1263
1264 #[test]
1265 fn duplicate_explicit_aliases_are_rejected() {
1266 let g = fixture();
1267 let err = execute("MATCH (f:Function) RETURN f.name AS n, f.path AS n", &g).unwrap_err();
1268 assert!(
1269 matches!(err, CypherError::ParseError(_)),
1270 "expected ParseError for duplicate explicit alias, got {err:?}"
1271 );
1272 }
1273
1274 #[test]
1275 fn rejects_too_many_disjoint_patterns() {
1276 let g = fixture();
1280 let err = execute(
1281 "MATCH (a:Function),(b:Function),(c:Function),(d:Function) RETURN a.name",
1282 &g,
1283 )
1284 .unwrap_err();
1285 assert!(
1286 matches!(&err, CypherError::ParseError(msg) if msg.contains("too many disjoint MATCH patterns")),
1287 "expected ParseError for too many patterns, got {err:?}"
1288 );
1289 }
1290
1291 #[test]
1292 fn enforces_row_budget_on_cartesian_explosion() {
1293 let mut g = SymbolGraph::new();
1297 let mut source = String::new();
1298 for i in 0..25 {
1299 source.push_str(&format!("fn f{i}() {{}}\n"));
1300 }
1301 g.rebuild_file(1, "src/big.rs", Language::Rust, &source, &[]);
1302
1303 let err = execute(
1304 "MATCH (a:Function),(b:Function),(c:Function) RETURN a.name AS n",
1305 &g,
1306 )
1307 .unwrap_err();
1308 assert!(
1309 matches!(&err, CypherError::ExecError(msg) if msg.contains("row budget exceeded")),
1310 "expected ExecError for row budget, got {err:?}"
1311 );
1312 }
1313
1314 #[test]
1315 fn open_upper_bound_defaults_to_depth_cap() {
1316 let toks = lex("MATCH (a:Function)-[:CALLS*1..]->(b:Function) RETURN a.name AS n").unwrap();
1318 let q = parse(&toks).unwrap();
1319 let step = &q.matches[0].steps[0];
1320 assert_eq!(step.var_length, Some((1, VAR_LENGTH_MAX_DEPTH)));
1321
1322 let toks = lex("MATCH (a:Function)-[:CALLS*2..]->(b:Function) RETURN a.name AS n").unwrap();
1324 let q = parse(&toks).unwrap();
1325 let step = &q.matches[0].steps[0];
1326 assert_eq!(step.var_length, Some((2, VAR_LENGTH_MAX_DEPTH)));
1327 }
1328
1329 #[test]
1330 fn rejects_depth_above_four() {
1331 let g = fixture();
1332 let err = execute(
1333 "MATCH (a:Function)-[:CALLS*1..6]->(b:Function) RETURN a.name AS n",
1334 &g,
1335 )
1336 .unwrap_err();
1337 assert!(
1338 matches!(err, CypherError::ExecError(_)),
1339 "expected ExecError, got {err:?}"
1340 );
1341 }
1342}