Skip to main content

harn_hostlib/code_index/
cypher.rs

1//! Minimal Cypher executor over [`super::SymbolGraph`].
2//!
3//! Supported subset (intentionally narrow — see issue #2434):
4//!
5//! ```text
6//! query     = MATCH pattern [, pattern]* [WHERE expr] RETURN proj [, proj]*
7//! pattern   = '(' var ':' label '{' kv (',' kv)* '}' ')'
8//!             [ ('-' | '<-') '[' ':' edge ('*' bounds)? ']' ('->' | '-') node ]*
9//! bounds    = INT? '..' INT?
10//! expr      = term [ ('AND' | 'OR') term ]*
11//! term      = path op literal | path op path | literal
12//! op        = '=' | '<>' | '!=' | '<' | '<=' | '>' | '>='
13//! proj      = path [AS alias]
14//! path      = ident '.' ident | ident
15//! literal   = STRING | INT
16//! ```
17//!
18//! Variable-length traversal up to depth 4 is supported via `*1..N` or
19//! the shorthand `*` (defaults to `1..3`). When the upper bound is
20//! omitted (e.g. `*1..` or `*2..`), it defaults to the executor depth
21//! cap (4) — matching the usual Cypher "open upper bound = capped by
22//! engine" semantics. The executor implements depth-first enumeration
23//! with a per-step visited set so cycles can't infinite-loop.
24//!
25//! Read-only; no MERGE/CREATE/SET. The result is a flat
26//! [`Vec<CypherRow>`] where each row maps the projected aliases to
27//! [`CypherValue`]s.
28//!
29//! Execution is bounded by two budgets enforced inside [`exec`] and
30//! [`parse`]:
31//! * [`MAX_ROWS`] — maximum number of intermediate bindings *and*
32//!   projected rows. The executor returns [`CypherError::ExecError`] as
33//!   soon as it tries to push past the cap, so a degenerate
34//!   `MATCH (a),(b),(c) RETURN ...` query cannot lock the symbol-graph
35//!   index for arbitrarily long.
36//! * [`MAX_PATTERNS`] — maximum number of comma-separated disjoint
37//!   patterns in one query. Beyond this, the parser raises
38//!   [`CypherError::ParseError`]; richer joins should use the edge
39//!   syntax (`-[:CALLS]->`) which is bounded by [`MAX_ROWS`] but does
40//!   not multiply pattern counts.
41
42use std::collections::{BTreeMap, HashMap, HashSet};
43use std::fmt;
44
45use harn_vm::VmValue;
46
47use super::symbol_graph::{EdgeKind, NodeId, NodeKind, SymbolGraph};
48
49/// Hard upper bound on the number of rows the executor will materialize
50/// before raising [`CypherError::ExecError`]. A query like
51/// `MATCH (a),(b),(c),(d) RETURN ...` over a workspace symbol graph can
52/// produce N⁴ rows in the worst case; without a cap a single Cypher call
53/// can lock the host's symbol-graph index for many seconds. This budget
54/// matches the schema description (`code_index/cypher.request.json`).
55pub const MAX_ROWS: usize = 10_000;
56
57/// Hard upper bound on the number of comma-separated `MATCH` patterns in
58/// one query. Each additional pattern multiplies the row count, so we
59/// refuse to even start enumerating beyond three disjoint patterns —
60/// scripts that need a richer join should chain patterns through the
61/// edge syntax (`-[:CALLS]->`) rather than relying on a cartesian
62/// product. Enforced at parse time so the executor never sees one.
63pub const MAX_PATTERNS: usize = 3;
64
65/// Maximum traversal depth for variable-length patterns (`*lo..hi`).
66/// Also used as the implicit upper bound when the high end is omitted
67/// (e.g. `*1..`), matching standard "open upper bound" Cypher semantics.
68const VAR_LENGTH_MAX_DEPTH: u32 = 4;
69
70/// Error variants the parser/executor raise. The host wraps these in
71/// [`crate::error::HostlibError`] before surfacing to scripts. Each
72/// variant carries a free-text message identifying the offending token
73/// or position.
74#[derive(Debug, Clone, Eq, PartialEq)]
75pub enum CypherError {
76    /// Tokenizer reached an unexpected character.
77    LexError(String),
78    /// Parser saw a token it didn't recognize at this position.
79    ParseError(String),
80    /// Executor saw something semantically invalid (unknown variable,
81    /// unknown edge label, var-length bound > 4, …).
82    ExecError(String),
83}
84
85impl fmt::Display for CypherError {
86    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
87        match self {
88            CypherError::LexError(s) => write!(f, "lex error: {s}"),
89            CypherError::ParseError(s) => write!(f, "parse error: {s}"),
90            CypherError::ExecError(s) => write!(f, "exec error: {s}"),
91        }
92    }
93}
94
95impl std::error::Error for CypherError {}
96
97/// One projected value in a row.
98#[derive(Debug, Clone, Eq, PartialEq)]
99pub enum CypherValue {
100    /// SQL/Cypher `NULL`. Falls back to `VmValue::Nil`.
101    Null,
102    /// UTF-8 string.
103    String(String),
104    /// 64-bit signed integer.
105    Int(i64),
106    /// Boolean.
107    Bool(bool),
108}
109
110impl CypherValue {
111    /// Render as the corresponding [`VmValue`].
112    pub fn to_vm(&self) -> VmValue {
113        match self {
114            CypherValue::Null => VmValue::Nil,
115            CypherValue::String(s) => VmValue::String(arcstr::ArcStr::from(s.as_str())),
116            CypherValue::Int(n) => VmValue::Int(*n),
117            CypherValue::Bool(b) => VmValue::Bool(*b),
118        }
119    }
120}
121
122/// One projected row.
123pub type CypherRow = BTreeMap<String, CypherValue>;
124
125/// Parse + execute `query` against `graph`. Returns one row per match.
126pub fn execute(query: &str, graph: &SymbolGraph) -> Result<Vec<CypherRow>, CypherError> {
127    let tokens = lex(query)?;
128    let ast = parse(&tokens)?;
129    exec(&ast, graph)
130}
131
132// ---------------------------------------------------------------------------
133// Lexer
134// ---------------------------------------------------------------------------
135
136#[derive(Debug, Clone, PartialEq)]
137enum Token {
138    /// Keyword (uppercase form, case-insensitive matching at lex time).
139    Keyword(String),
140    Ident(String),
141    Str(String),
142    Int(i64),
143    LParen,
144    RParen,
145    LBrace,
146    RBrace,
147    LBracket,
148    RBracket,
149    Colon,
150    Comma,
151    Dot,
152    Eq,
153    Neq,
154    Lt,
155    Le,
156    Gt,
157    Ge,
158    Star,
159    DotDot,
160    Dash,
161    Arrow,     // ->
162    LeftArrow, // <-
163}
164
165fn lex(input: &str) -> Result<Vec<Token>, CypherError> {
166    let mut out: Vec<Token> = Vec::new();
167    let bytes = input.as_bytes();
168    let mut i = 0;
169    while i < bytes.len() {
170        let b = bytes[i];
171        if b.is_ascii_whitespace() {
172            i += 1;
173            continue;
174        }
175        if b == b'(' {
176            out.push(Token::LParen);
177            i += 1;
178            continue;
179        }
180        if b == b')' {
181            out.push(Token::RParen);
182            i += 1;
183            continue;
184        }
185        if b == b'{' {
186            out.push(Token::LBrace);
187            i += 1;
188            continue;
189        }
190        if b == b'}' {
191            out.push(Token::RBrace);
192            i += 1;
193            continue;
194        }
195        if b == b'[' {
196            out.push(Token::LBracket);
197            i += 1;
198            continue;
199        }
200        if b == b']' {
201            out.push(Token::RBracket);
202            i += 1;
203            continue;
204        }
205        if b == b':' {
206            out.push(Token::Colon);
207            i += 1;
208            continue;
209        }
210        if b == b',' {
211            out.push(Token::Comma);
212            i += 1;
213            continue;
214        }
215        if b == b'*' {
216            out.push(Token::Star);
217            i += 1;
218            continue;
219        }
220        if b == b'=' {
221            out.push(Token::Eq);
222            i += 1;
223            continue;
224        }
225        if b == b'.' {
226            if i + 1 < bytes.len() && bytes[i + 1] == b'.' {
227                out.push(Token::DotDot);
228                i += 2;
229            } else {
230                out.push(Token::Dot);
231                i += 1;
232            }
233            continue;
234        }
235        if b == b'<' {
236            if i + 1 < bytes.len() && bytes[i + 1] == b'-' {
237                out.push(Token::LeftArrow);
238                i += 2;
239            } else if i + 1 < bytes.len() && bytes[i + 1] == b'=' {
240                out.push(Token::Le);
241                i += 2;
242            } else if i + 1 < bytes.len() && bytes[i + 1] == b'>' {
243                out.push(Token::Neq);
244                i += 2;
245            } else {
246                out.push(Token::Lt);
247                i += 1;
248            }
249            continue;
250        }
251        if b == b'>' {
252            if i + 1 < bytes.len() && bytes[i + 1] == b'=' {
253                out.push(Token::Ge);
254                i += 2;
255            } else {
256                out.push(Token::Gt);
257                i += 1;
258            }
259            continue;
260        }
261        if b == b'!' {
262            if i + 1 < bytes.len() && bytes[i + 1] == b'=' {
263                out.push(Token::Neq);
264                i += 2;
265                continue;
266            }
267            return Err(CypherError::LexError(format!(
268                "unexpected '!' at byte {i} (expected '!=')"
269            )));
270        }
271        if b == b'-' {
272            if i + 1 < bytes.len() && bytes[i + 1] == b'>' {
273                out.push(Token::Arrow);
274                i += 2;
275            } else {
276                out.push(Token::Dash);
277                i += 1;
278            }
279            continue;
280        }
281        if b == b'\'' || b == b'"' {
282            let quote = b;
283            let start = i + 1;
284            i += 1;
285            while i < bytes.len() && bytes[i] != quote {
286                if bytes[i] == b'\\' && i + 1 < bytes.len() {
287                    i += 2;
288                } else {
289                    i += 1;
290                }
291            }
292            if i >= bytes.len() {
293                return Err(CypherError::LexError("unterminated string literal".into()));
294            }
295            let raw = &input[start..i];
296            out.push(Token::Str(unescape(raw)));
297            i += 1;
298            continue;
299        }
300        if b.is_ascii_digit() {
301            let start = i;
302            while i < bytes.len() && bytes[i].is_ascii_digit() {
303                i += 1;
304            }
305            let n: i64 = input[start..i].parse().map_err(|err| {
306                CypherError::LexError(format!("bad integer at byte {start}: {err}"))
307            })?;
308            out.push(Token::Int(n));
309            continue;
310        }
311        if b.is_ascii_alphabetic() || b == b'_' {
312            let start = i;
313            while i < bytes.len() && (bytes[i].is_ascii_alphanumeric() || bytes[i] == b'_') {
314                i += 1;
315            }
316            let word = &input[start..i];
317            let upper = word.to_ascii_uppercase();
318            if is_keyword(&upper) {
319                out.push(Token::Keyword(upper));
320            } else {
321                out.push(Token::Ident(word.to_string()));
322            }
323            continue;
324        }
325        return Err(CypherError::LexError(format!(
326            "unexpected character `{}` at byte {i}",
327            char::from(b)
328        )));
329    }
330    Ok(out)
331}
332
333fn is_keyword(word: &str) -> bool {
334    matches!(
335        word,
336        "MATCH" | "WHERE" | "RETURN" | "AND" | "OR" | "AS" | "NOT" | "TRUE" | "FALSE"
337    )
338}
339
340fn unescape(raw: &str) -> String {
341    let mut out = String::with_capacity(raw.len());
342    let mut iter = raw.chars();
343    while let Some(c) = iter.next() {
344        if c == '\\' {
345            if let Some(next) = iter.next() {
346                out.push(match next {
347                    'n' => '\n',
348                    't' => '\t',
349                    'r' => '\r',
350                    '\\' => '\\',
351                    '\'' => '\'',
352                    '"' => '"',
353                    other => other,
354                });
355            }
356        } else {
357            out.push(c);
358        }
359    }
360    out
361}
362
363// ---------------------------------------------------------------------------
364// Parser AST
365// ---------------------------------------------------------------------------
366
367#[derive(Debug, Clone)]
368struct Query {
369    matches: Vec<Pattern>,
370    where_clause: Option<Expr>,
371    projections: Vec<Projection>,
372}
373
374#[derive(Debug, Clone)]
375struct Pattern {
376    head: NodePat,
377    steps: Vec<RelStep>,
378}
379
380#[derive(Debug, Clone)]
381struct NodePat {
382    var: String,
383    label: Option<String>,
384    props: BTreeMap<String, Literal>,
385}
386
387#[derive(Debug, Clone)]
388struct RelStep {
389    /// Direction the arrow points. true = forward (`-[]->`), false = reverse (`<-[]-`).
390    forward: bool,
391    edge_label: String,
392    var_length: Option<(u32, u32)>,
393    target: NodePat,
394}
395
396#[derive(Debug, Clone)]
397enum Expr {
398    And(Box<Expr>, Box<Expr>),
399    Or(Box<Expr>, Box<Expr>),
400    Not(Box<Expr>),
401    Compare(Operand, CmpOp, Operand),
402    Bool(bool),
403}
404
405#[derive(Debug, Clone, Copy, PartialEq, Eq)]
406enum CmpOp {
407    Eq,
408    Neq,
409    Lt,
410    Le,
411    Gt,
412    Ge,
413}
414
415#[derive(Debug, Clone)]
416enum Operand {
417    /// `var.property` or just `var` (no property).
418    Path {
419        var: String,
420        property: Option<String>,
421    },
422    Literal(Literal),
423}
424
425#[derive(Debug, Clone, PartialEq, Eq)]
426enum Literal {
427    Str(String),
428    Int(i64),
429    Bool(bool),
430}
431
432#[derive(Debug, Clone)]
433struct Projection {
434    operand: Operand,
435    /// Final alias used in the projected row. Empty during parsing
436    /// when no explicit `AS` clause was supplied; filled in after the
437    /// query is fully parsed by [`resolve_projection_aliases`].
438    alias: String,
439    /// `true` if the user supplied an `AS <name>` clause for this
440    /// projection. Used to disambiguate default-alias collisions
441    /// (e.g. `RETURN 1, 2` would otherwise project two `value` keys
442    /// into the same `BTreeMap`).
443    explicit_alias: bool,
444}
445
446// ---------------------------------------------------------------------------
447// Parser
448// ---------------------------------------------------------------------------
449
450struct Parser<'a> {
451    tokens: &'a [Token],
452    pos: usize,
453}
454
455fn parse(tokens: &[Token]) -> Result<Query, CypherError> {
456    let mut p = Parser { tokens, pos: 0 };
457    let q = p.parse_query()?;
458    if p.pos != tokens.len() {
459        return Err(CypherError::ParseError(format!(
460            "trailing tokens after RETURN at position {} ({:?})",
461            p.pos, tokens[p.pos]
462        )));
463    }
464    Ok(q)
465}
466
467impl<'a> Parser<'a> {
468    fn peek(&self) -> Option<&Token> {
469        self.tokens.get(self.pos)
470    }
471
472    fn advance(&mut self) -> Option<&Token> {
473        let t = self.tokens.get(self.pos);
474        self.pos += 1;
475        t
476    }
477
478    fn expect_keyword(&mut self, word: &str) -> Result<(), CypherError> {
479        match self.advance() {
480            Some(Token::Keyword(k)) if k == word => Ok(()),
481            other => Err(CypherError::ParseError(format!(
482                "expected `{word}`, got {other:?}"
483            ))),
484        }
485    }
486
487    fn match_keyword(&mut self, word: &str) -> bool {
488        if matches!(self.peek(), Some(Token::Keyword(k)) if k == word) {
489            self.pos += 1;
490            true
491        } else {
492            false
493        }
494    }
495
496    fn expect(&mut self, want: &Token) -> Result<(), CypherError> {
497        let next = self.advance();
498        if next == Some(want) {
499            Ok(())
500        } else {
501            Err(CypherError::ParseError(format!(
502                "expected {want:?}, got {next:?}"
503            )))
504        }
505    }
506
507    fn parse_query(&mut self) -> Result<Query, CypherError> {
508        self.expect_keyword("MATCH")?;
509        let mut matches = vec![self.parse_pattern()?];
510        while matches!(self.peek(), Some(Token::Comma)) {
511            self.advance();
512            matches.push(self.parse_pattern()?);
513            if matches.len() > MAX_PATTERNS {
514                return Err(CypherError::ParseError(format!(
515                    "too many disjoint MATCH patterns (got {}, cap is {}); \
516                     join through edge syntax (`-[:KIND]->`) instead of a cartesian product",
517                    matches.len(),
518                    MAX_PATTERNS
519                )));
520            }
521        }
522        let where_clause = if self.match_keyword("WHERE") {
523            Some(self.parse_expr()?)
524        } else {
525            None
526        };
527        self.expect_keyword("RETURN")?;
528        let mut projections = vec![self.parse_projection()?];
529        while matches!(self.peek(), Some(Token::Comma)) {
530            self.advance();
531            projections.push(self.parse_projection()?);
532        }
533        resolve_projection_aliases(&mut projections)?;
534        Ok(Query {
535            matches,
536            where_clause,
537            projections,
538        })
539    }
540
541    fn parse_pattern(&mut self) -> Result<Pattern, CypherError> {
542        let head = self.parse_node_pat()?;
543        let mut steps: Vec<RelStep> = Vec::new();
544        loop {
545            // Detect either `-[:X]->` or `<-[:X]-`.
546            let forward = match self.peek() {
547                Some(Token::Dash) => true,
548                Some(Token::LeftArrow) => false,
549                _ => break,
550            };
551            self.advance(); // consume - or <-
552            self.expect(&Token::LBracket)?;
553            self.expect(&Token::Colon)?;
554            let edge_label = match self.advance() {
555                Some(Token::Ident(s)) => s.clone(),
556                other => {
557                    return Err(CypherError::ParseError(format!(
558                        "expected edge label, got {other:?}"
559                    )))
560                }
561            };
562            let var_length = if matches!(self.peek(), Some(Token::Star)) {
563                self.advance();
564                let lo = if matches!(self.peek(), Some(Token::Int(_))) {
565                    if let Some(Token::Int(n)) = self.advance() {
566                        Some(*n)
567                    } else {
568                        None
569                    }
570                } else {
571                    None
572                };
573                let bounds = if matches!(self.peek(), Some(Token::DotDot)) {
574                    self.advance();
575                    let hi = if matches!(self.peek(), Some(Token::Int(_))) {
576                        if let Some(Token::Int(n)) = self.advance() {
577                            Some(*n)
578                        } else {
579                            None
580                        }
581                    } else {
582                        None
583                    };
584                    // `*lo..` with no upper bound: default to the executor
585                    // depth cap so users opt into the maximum traversal
586                    // length without naming it explicitly.
587                    let hi_v = hi.map(|n| n as u32).unwrap_or(VAR_LENGTH_MAX_DEPTH);
588                    (lo.unwrap_or(1) as u32, hi_v)
589                } else {
590                    // bare `*`: 1..3
591                    let lo_v = lo.unwrap_or(1) as u32;
592                    (lo_v, lo_v.max(3))
593                };
594                Some(bounds)
595            } else {
596                None
597            };
598            self.expect(&Token::RBracket)?;
599            if forward {
600                self.expect(&Token::Arrow)?;
601            } else {
602                self.expect(&Token::Dash)?;
603            }
604            let target = self.parse_node_pat()?;
605            steps.push(RelStep {
606                forward,
607                edge_label,
608                var_length,
609                target,
610            });
611        }
612        Ok(Pattern { head, steps })
613    }
614
615    fn parse_node_pat(&mut self) -> Result<NodePat, CypherError> {
616        self.expect(&Token::LParen)?;
617        let var = match self.advance() {
618            Some(Token::Ident(s)) => s.clone(),
619            other => {
620                return Err(CypherError::ParseError(format!(
621                    "expected node variable, got {other:?}"
622                )))
623            }
624        };
625        let label = if matches!(self.peek(), Some(Token::Colon)) {
626            self.advance();
627            match self.advance() {
628                Some(Token::Ident(s)) => Some(s.clone()),
629                other => {
630                    return Err(CypherError::ParseError(format!(
631                        "expected node label, got {other:?}"
632                    )))
633                }
634            }
635        } else {
636            None
637        };
638        let props = if matches!(self.peek(), Some(Token::LBrace)) {
639            self.parse_props()?
640        } else {
641            BTreeMap::new()
642        };
643        self.expect(&Token::RParen)?;
644        Ok(NodePat { var, label, props })
645    }
646
647    fn parse_props(&mut self) -> Result<BTreeMap<String, Literal>, CypherError> {
648        self.expect(&Token::LBrace)?;
649        let mut props: BTreeMap<String, Literal> = BTreeMap::new();
650        loop {
651            let key = match self.advance() {
652                Some(Token::Ident(s)) => s.clone(),
653                other => {
654                    return Err(CypherError::ParseError(format!(
655                        "expected property key, got {other:?}"
656                    )))
657                }
658            };
659            self.expect(&Token::Colon)?;
660            let value = self.parse_literal()?;
661            props.insert(key, value);
662            match self.peek() {
663                Some(Token::Comma) => {
664                    self.advance();
665                }
666                Some(Token::RBrace) => break,
667                other => {
668                    return Err(CypherError::ParseError(format!(
669                        "expected ',' or '}}', got {other:?}"
670                    )))
671                }
672            }
673        }
674        self.expect(&Token::RBrace)?;
675        Ok(props)
676    }
677
678    fn parse_literal(&mut self) -> Result<Literal, CypherError> {
679        match self.advance() {
680            Some(Token::Str(s)) => Ok(Literal::Str(s.clone())),
681            Some(Token::Int(n)) => Ok(Literal::Int(*n)),
682            Some(Token::Keyword(k)) if k == "TRUE" => Ok(Literal::Bool(true)),
683            Some(Token::Keyword(k)) if k == "FALSE" => Ok(Literal::Bool(false)),
684            other => Err(CypherError::ParseError(format!(
685                "expected literal, got {other:?}"
686            ))),
687        }
688    }
689
690    fn parse_expr(&mut self) -> Result<Expr, CypherError> {
691        let mut left = self.parse_and_expr()?;
692        while self.match_keyword("OR") {
693            let right = self.parse_and_expr()?;
694            left = Expr::Or(Box::new(left), Box::new(right));
695        }
696        Ok(left)
697    }
698
699    fn parse_and_expr(&mut self) -> Result<Expr, CypherError> {
700        let mut left = self.parse_unary_expr()?;
701        while self.match_keyword("AND") {
702            let right = self.parse_unary_expr()?;
703            left = Expr::And(Box::new(left), Box::new(right));
704        }
705        Ok(left)
706    }
707
708    fn parse_unary_expr(&mut self) -> Result<Expr, CypherError> {
709        if self.match_keyword("NOT") {
710            let inner = self.parse_unary_expr()?;
711            return Ok(Expr::Not(Box::new(inner)));
712        }
713        self.parse_compare()
714    }
715
716    fn parse_compare(&mut self) -> Result<Expr, CypherError> {
717        if matches!(
718            self.peek(),
719            Some(Token::Keyword(k)) if k == "TRUE" || k == "FALSE"
720        ) {
721            let lit = self.parse_literal()?;
722            return Ok(match lit {
723                Literal::Bool(b) => Expr::Bool(b),
724                _ => unreachable!(),
725            });
726        }
727        let left = self.parse_operand()?;
728        let op = match self.peek() {
729            Some(Token::Eq) => Some(CmpOp::Eq),
730            Some(Token::Neq) => Some(CmpOp::Neq),
731            Some(Token::Lt) => Some(CmpOp::Lt),
732            Some(Token::Le) => Some(CmpOp::Le),
733            Some(Token::Gt) => Some(CmpOp::Gt),
734            Some(Token::Ge) => Some(CmpOp::Ge),
735            _ => None,
736        };
737        let Some(op) = op else {
738            return Err(CypherError::ParseError(
739                "expected comparison operator in WHERE clause".into(),
740            ));
741        };
742        self.advance();
743        let right = self.parse_operand()?;
744        Ok(Expr::Compare(left, op, right))
745    }
746
747    fn parse_operand(&mut self) -> Result<Operand, CypherError> {
748        match self.peek() {
749            Some(Token::Str(_)) | Some(Token::Int(_)) => {
750                let lit = self.parse_literal()?;
751                Ok(Operand::Literal(lit))
752            }
753            Some(Token::Keyword(k)) if k == "TRUE" || k == "FALSE" => {
754                let lit = self.parse_literal()?;
755                Ok(Operand::Literal(lit))
756            }
757            Some(Token::Ident(_)) => {
758                let var = if let Some(Token::Ident(s)) = self.advance() {
759                    s.clone()
760                } else {
761                    unreachable!()
762                };
763                let property = if matches!(self.peek(), Some(Token::Dot)) {
764                    self.advance();
765                    match self.advance() {
766                        Some(Token::Ident(s)) => Some(s.clone()),
767                        other => {
768                            return Err(CypherError::ParseError(format!(
769                                "expected property name after '.', got {other:?}"
770                            )))
771                        }
772                    }
773                } else {
774                    None
775                };
776                Ok(Operand::Path { var, property })
777            }
778            other => Err(CypherError::ParseError(format!(
779                "expected operand, got {other:?}"
780            ))),
781        }
782    }
783
784    fn parse_projection(&mut self) -> Result<Projection, CypherError> {
785        let operand = self.parse_operand()?;
786        let alias = if self.match_keyword("AS") {
787            match self.advance() {
788                Some(Token::Ident(s)) => Some(s.clone()),
789                other => {
790                    return Err(CypherError::ParseError(format!(
791                        "expected alias after AS, got {other:?}"
792                    )))
793                }
794            }
795        } else {
796            None
797        };
798        let explicit_alias = alias.is_some();
799        Ok(Projection {
800            operand,
801            alias: alias.unwrap_or_default(),
802            explicit_alias,
803        })
804    }
805}
806
807fn default_alias(operand: &Operand) -> String {
808    match operand {
809        Operand::Path {
810            var,
811            property: None,
812        } => var.clone(),
813        Operand::Path {
814            var,
815            property: Some(p),
816        } => format!("{var}.{p}"),
817        Operand::Literal(_) => "value".to_string(),
818    }
819}
820
821/// Assign default aliases to every projection that didn't get an
822/// explicit `AS <name>`. Default aliases for literal operands all start
823/// as `"value"`, so without deduplication a query like `RETURN 1, 2`
824/// would silently overwrite the first column. We resolve the collision
825/// deterministically by suffixing the second, third, … occurrence with
826/// `_2`, `_3`, … For non-literal default aliases (`var` or
827/// `var.property`) we leave names unchanged because they are already
828/// disambiguated by the underlying path. If a default-derived alias
829/// collides with an explicit alias, the default is suffixed; an
830/// explicit alias colliding with another explicit alias raises a
831/// `ParseError::ParseError` because that is a user authoring mistake.
832fn resolve_projection_aliases(projections: &mut [Projection]) -> Result<(), CypherError> {
833    use std::collections::HashSet;
834
835    // First pass: collect explicit aliases and detect duplicates among them.
836    let mut seen: HashSet<String> = HashSet::new();
837    for proj in projections.iter() {
838        if proj.explicit_alias && !seen.insert(proj.alias.clone()) {
839            return Err(CypherError::ParseError(format!(
840                "duplicate alias `{}` in RETURN clause",
841                proj.alias
842            )));
843        }
844    }
845
846    // Second pass: fill in default aliases with collision-suffixing.
847    let mut literal_count: usize = 0;
848    for proj in projections.iter_mut() {
849        if proj.explicit_alias {
850            continue;
851        }
852        let base = default_alias(&proj.operand);
853        let is_literal_default = matches!(proj.operand, Operand::Literal(_));
854        let mut candidate = base.clone();
855        if is_literal_default {
856            literal_count += 1;
857            if literal_count >= 2 {
858                candidate = format!("{base}_{literal_count}");
859            }
860        }
861        // Guard against any other accidental collision (default path
862        // alias clashing with an explicit alias or another default).
863        let mut bump: usize = 2;
864        while seen.contains(&candidate) {
865            candidate = format!("{base}_{bump}");
866            bump += 1;
867        }
868        seen.insert(candidate.clone());
869        proj.alias = candidate;
870    }
871    Ok(())
872}
873
874// ---------------------------------------------------------------------------
875// Executor
876// ---------------------------------------------------------------------------
877
878fn exec(query: &Query, graph: &SymbolGraph) -> Result<Vec<CypherRow>, CypherError> {
879    let mut all_rows: Vec<HashMap<String, NodeId>> = vec![HashMap::new()];
880    for pattern in &query.matches {
881        let mut new_rows: Vec<HashMap<String, NodeId>> = Vec::new();
882        for binding in &all_rows {
883            let candidates = match_pattern(pattern, graph, binding)?;
884            for cand in candidates {
885                let mut merged = binding.clone();
886                for (k, v) in cand {
887                    merged.insert(k, v);
888                }
889                new_rows.push(merged);
890                // Pre-projection budget: a cartesian explosion can
891                // balloon `new_rows` long before we ever start
892                // building projected rows, so we cap intermediate
893                // bindings too.
894                if new_rows.len() > MAX_ROWS {
895                    return Err(CypherError::ExecError(format!(
896                        "row budget exceeded ({} > {}); narrow the MATCH or add a WHERE clause",
897                        new_rows.len(),
898                        MAX_ROWS
899                    )));
900                }
901            }
902        }
903        all_rows = new_rows;
904    }
905
906    let mut out: Vec<CypherRow> = Vec::new();
907    for binding in all_rows {
908        if let Some(expr) = &query.where_clause {
909            if !eval_bool(expr, &binding, graph)? {
910                continue;
911            }
912        }
913        let mut row: CypherRow = BTreeMap::new();
914        for proj in &query.projections {
915            let v = eval_operand(&proj.operand, &binding, graph)?;
916            row.insert(proj.alias.clone(), v);
917        }
918        out.push(row);
919        // Post-WHERE budget: even if intermediate bindings stayed
920        // under the cap, a permissive projection on a wide graph can
921        // still hand back too many rows.
922        if out.len() > MAX_ROWS {
923            return Err(CypherError::ExecError(format!(
924                "row budget exceeded ({} > {}); narrow the MATCH or add a WHERE clause",
925                out.len(),
926                MAX_ROWS
927            )));
928        }
929    }
930    Ok(out)
931}
932
933fn match_pattern(
934    pattern: &Pattern,
935    graph: &SymbolGraph,
936    seed: &HashMap<String, NodeId>,
937) -> Result<Vec<HashMap<String, NodeId>>, CypherError> {
938    let head_candidates: Vec<NodeId> = if let Some(existing) = seed.get(&pattern.head.var) {
939        // Already bound — verify it matches the constraints.
940        if node_matches(graph, *existing, &pattern.head) {
941            vec![*existing]
942        } else {
943            vec![]
944        }
945    } else {
946        candidate_nodes(graph, &pattern.head)
947    };
948
949    let mut out: Vec<HashMap<String, NodeId>> = Vec::new();
950    for head_id in head_candidates {
951        let mut binding: HashMap<String, NodeId> = HashMap::new();
952        binding.insert(pattern.head.var.clone(), head_id);
953        extend_steps(graph, &pattern.steps, 0, head_id, binding, &mut out)?;
954    }
955    Ok(out)
956}
957
958fn extend_steps(
959    graph: &SymbolGraph,
960    steps: &[RelStep],
961    idx: usize,
962    cursor: NodeId,
963    binding: HashMap<String, NodeId>,
964    out: &mut Vec<HashMap<String, NodeId>>,
965) -> Result<(), CypherError> {
966    if idx == steps.len() {
967        out.push(binding);
968        return Ok(());
969    }
970    let step = &steps[idx];
971    let (kind, label_reversed) =
972        EdgeKind::parse_with_direction(&step.edge_label).ok_or_else(|| {
973            CypherError::ExecError(format!("unknown edge label `{}`", step.edge_label))
974        })?;
975    // Pattern arrow direction XOR label inversion = effective direction.
976    let forward = step.forward ^ label_reversed;
977    let (lo, hi) = step.var_length.unwrap_or((1, 1));
978    if hi > VAR_LENGTH_MAX_DEPTH {
979        return Err(CypherError::ExecError(format!(
980            "variable-length traversal capped at depth {VAR_LENGTH_MAX_DEPTH} (got `*{lo}..{hi}`)"
981        )));
982    }
983    let lo = lo.max(1);
984    let hi = hi.max(lo);
985
986    // Depth-first enumerate every reachable node at depths in [lo, hi].
987    let mut visited: HashSet<NodeId> = HashSet::new();
988    visited.insert(cursor);
989    let mut stack: Vec<(NodeId, u32, HashSet<NodeId>)> = vec![(cursor, 0, visited)];
990    while let Some((node, depth, visited)) = stack.pop() {
991        if depth >= hi {
992            continue;
993        }
994        let edges = if forward {
995            graph.outgoing(node)
996        } else {
997            graph.incoming(node)
998        };
999        for edge in edges {
1000            if edge.kind != kind {
1001                continue;
1002            }
1003            let next = if forward { edge.to } else { edge.from };
1004            if visited.contains(&next) {
1005                continue;
1006            }
1007            let new_depth = depth + 1;
1008            if new_depth >= lo && node_matches(graph, next, &step.target) {
1009                let already_bound = binding.get(&step.target.var);
1010                if matches!(already_bound, Some(id) if *id != next) {
1011                    // Variable rebinds to a different id — skip.
1012                } else {
1013                    let mut next_binding = binding.clone();
1014                    next_binding.insert(step.target.var.clone(), next);
1015                    extend_steps(graph, steps, idx + 1, next, next_binding, out)?;
1016                }
1017            }
1018            if new_depth < hi {
1019                let mut next_visited = visited.clone();
1020                next_visited.insert(next);
1021                stack.push((next, new_depth, next_visited));
1022            }
1023        }
1024    }
1025    Ok(())
1026}
1027
1028fn candidate_nodes(graph: &SymbolGraph, pat: &NodePat) -> Vec<NodeId> {
1029    // Property-driven fast path: if `name` is constrained, scan by name.
1030    if let Some(Literal::Str(name)) = pat.props.get("name") {
1031        return graph
1032            .nodes_named(name)
1033            .iter()
1034            .copied()
1035            .filter(|nid| node_matches(graph, *nid, pat))
1036            .collect();
1037    }
1038    if let Some(label) = &pat.label {
1039        let kind = NodeKind::parse(label).unwrap_or(NodeKind::Module);
1040        return graph
1041            .nodes_of_kind(kind)
1042            .into_iter()
1043            .filter(|nid| node_matches(graph, *nid, pat))
1044            .collect();
1045    }
1046    graph
1047        .all_node_ids()
1048        .into_iter()
1049        .filter(|nid| node_matches(graph, *nid, pat))
1050        .collect()
1051}
1052
1053fn node_matches(graph: &SymbolGraph, id: NodeId, pat: &NodePat) -> bool {
1054    let Some(node) = graph.node(id) else {
1055        return false;
1056    };
1057    if let Some(label) = &pat.label {
1058        let Some(kind) = NodeKind::parse(label) else {
1059            return false;
1060        };
1061        if node.kind != kind {
1062            return false;
1063        }
1064    }
1065    for (key, expected) in &pat.props {
1066        let actual = property_value(node, key);
1067        if &actual != expected {
1068            return false;
1069        }
1070    }
1071    true
1072}
1073
1074fn property_value(node: &super::symbol_graph::Node, key: &str) -> Literal {
1075    match key {
1076        "name" => Literal::Str(node.name.clone()),
1077        "path" => Literal::Str(node.path.clone()),
1078        "language" => Literal::Str(node.language.clone()),
1079        "kind" => Literal::Str(node.kind.as_str().to_string()),
1080        "container" => Literal::Str(node.container.clone().unwrap_or_default()),
1081        "signature" => Literal::Str(node.signature.clone()),
1082        "line" => Literal::Int(node.line as i64),
1083        "file_id" => Literal::Int(node.file_id as i64),
1084        "id" => Literal::Int(node.id as i64),
1085        _ => Literal::Str(String::new()),
1086    }
1087}
1088
1089fn eval_bool(
1090    expr: &Expr,
1091    binding: &HashMap<String, NodeId>,
1092    graph: &SymbolGraph,
1093) -> Result<bool, CypherError> {
1094    match expr {
1095        Expr::Bool(b) => Ok(*b),
1096        Expr::And(a, b) => Ok(eval_bool(a, binding, graph)? && eval_bool(b, binding, graph)?),
1097        Expr::Or(a, b) => Ok(eval_bool(a, binding, graph)? || eval_bool(b, binding, graph)?),
1098        Expr::Not(inner) => Ok(!eval_bool(inner, binding, graph)?),
1099        Expr::Compare(l, op, r) => {
1100            let lv = lookup_operand(l, binding, graph)?;
1101            let rv = lookup_operand(r, binding, graph)?;
1102            Ok(apply_cmp(&lv, *op, &rv))
1103        }
1104    }
1105}
1106
1107fn apply_cmp(left: &Literal, op: CmpOp, right: &Literal) -> bool {
1108    match (left, right) {
1109        (Literal::Int(a), Literal::Int(b)) => match op {
1110            CmpOp::Eq => a == b,
1111            CmpOp::Neq => a != b,
1112            CmpOp::Lt => a < b,
1113            CmpOp::Le => a <= b,
1114            CmpOp::Gt => a > b,
1115            CmpOp::Ge => a >= b,
1116        },
1117        (Literal::Str(a), Literal::Str(b)) => match op {
1118            CmpOp::Eq => a == b,
1119            CmpOp::Neq => a != b,
1120            CmpOp::Lt => a < b,
1121            CmpOp::Le => a <= b,
1122            CmpOp::Gt => a > b,
1123            CmpOp::Ge => a >= b,
1124        },
1125        (Literal::Bool(a), Literal::Bool(b)) => match op {
1126            CmpOp::Eq => a == b,
1127            CmpOp::Neq => a != b,
1128            _ => false,
1129        },
1130        _ => matches!(op, CmpOp::Neq),
1131    }
1132}
1133
1134fn lookup_operand(
1135    op: &Operand,
1136    binding: &HashMap<String, NodeId>,
1137    graph: &SymbolGraph,
1138) -> Result<Literal, CypherError> {
1139    match op {
1140        Operand::Literal(lit) => Ok(lit.clone()),
1141        Operand::Path { var, property } => {
1142            let id = binding.get(var).copied().ok_or_else(|| {
1143                CypherError::ExecError(format!("unbound variable `{var}` in expression"))
1144            })?;
1145            let node = graph
1146                .node(id)
1147                .ok_or_else(|| CypherError::ExecError(format!("node id {id} not in graph")))?;
1148            match property {
1149                Some(p) => Ok(property_value(node, p)),
1150                None => Ok(Literal::Str(node.name.clone())),
1151            }
1152        }
1153    }
1154}
1155
1156fn eval_operand(
1157    op: &Operand,
1158    binding: &HashMap<String, NodeId>,
1159    graph: &SymbolGraph,
1160) -> Result<CypherValue, CypherError> {
1161    let lit = lookup_operand(op, binding, graph)?;
1162    Ok(match lit {
1163        Literal::Str(s) => {
1164            if s.is_empty() {
1165                CypherValue::Null
1166            } else {
1167                CypherValue::String(s)
1168            }
1169        }
1170        Literal::Int(n) => CypherValue::Int(n),
1171        Literal::Bool(b) => CypherValue::Bool(b),
1172    })
1173}
1174
1175#[cfg(test)]
1176mod tests {
1177    use super::*;
1178    use crate::ast::Language;
1179
1180    fn fixture() -> SymbolGraph {
1181        let mut g = SymbolGraph::new();
1182        g.rebuild_file(
1183            1,
1184            "src/a.rs",
1185            Language::Rust,
1186            "fn start() {}\nfn driver() { start(); }\n",
1187            &[],
1188        );
1189        g.rebuild_file(
1190            2,
1191            "src/b.rs",
1192            Language::Rust,
1193            "fn entry() { driver(); }\n",
1194            &[],
1195        );
1196        g
1197    }
1198
1199    #[test]
1200    fn lexer_recognises_arrows_and_keywords() {
1201        let toks = lex("MATCH (a)-[:CALLS]->(b) RETURN a.name").unwrap();
1202        assert!(toks.iter().any(|t| matches!(t, Token::Arrow)));
1203        assert!(toks
1204            .iter()
1205            .any(|t| matches!(t, Token::Keyword(k) if k == "MATCH")));
1206    }
1207
1208    #[test]
1209    fn returns_function_by_name() {
1210        let g = fixture();
1211        let rows = execute(
1212            "MATCH (f:Function {name: 'start'}) RETURN f.path AS path, f.line AS line",
1213            &g,
1214        )
1215        .unwrap();
1216        assert_eq!(rows.len(), 1);
1217        assert_eq!(
1218            rows[0].get("path"),
1219            Some(&CypherValue::String("src/a.rs".into()))
1220        );
1221    }
1222
1223    #[test]
1224    fn called_by_var_length_finds_indirect_callers() {
1225        let g = fixture();
1226        let rows = execute(
1227            "MATCH (f:Function {name: 'start'})<-[:CALLS*1..3]-(c:CallSite) RETURN c.path AS path",
1228            &g,
1229        )
1230        .unwrap();
1231        assert!(!rows.is_empty(), "expected at least one call-site caller");
1232    }
1233
1234    #[test]
1235    fn where_predicate_filters_results() {
1236        let g = fixture();
1237        let rows = execute(
1238            "MATCH (f:Function) WHERE f.name = 'driver' RETURN f.path AS path",
1239            &g,
1240        )
1241        .unwrap();
1242        assert_eq!(rows.len(), 1);
1243        assert_eq!(
1244            rows[0].get("path"),
1245            Some(&CypherValue::String("src/a.rs".into()))
1246        );
1247    }
1248
1249    #[test]
1250    fn literal_default_aliases_do_not_collide() {
1251        // `RETURN 1, 2, 3` previously projected all three literals under
1252        // the same `"value"` key, silently overwriting earlier columns.
1253        // The deduplicator suffixes the 2nd/3rd literals as `value_2`
1254        // / `value_3` so every literal makes it into the row.
1255        let g = fixture();
1256        let rows = execute("MATCH (f:Function {name: 'start'}) RETURN 1, 2, 3", &g).unwrap();
1257        assert_eq!(rows.len(), 1);
1258        let row = &rows[0];
1259        assert_eq!(row.get("value"), Some(&CypherValue::Int(1)));
1260        assert_eq!(row.get("value_2"), Some(&CypherValue::Int(2)));
1261        assert_eq!(row.get("value_3"), Some(&CypherValue::Int(3)));
1262    }
1263
1264    #[test]
1265    fn duplicate_explicit_aliases_are_rejected() {
1266        let g = fixture();
1267        let err = execute("MATCH (f:Function) RETURN f.name AS n, f.path AS n", &g).unwrap_err();
1268        assert!(
1269            matches!(err, CypherError::ParseError(_)),
1270            "expected ParseError for duplicate explicit alias, got {err:?}"
1271        );
1272    }
1273
1274    #[test]
1275    fn rejects_too_many_disjoint_patterns() {
1276        // Four disjoint patterns is one more than `MAX_PATTERNS`; the
1277        // parser refuses before the executor ever sees the cartesian
1278        // explosion.
1279        let g = fixture();
1280        let err = execute(
1281            "MATCH (a:Function),(b:Function),(c:Function),(d:Function) RETURN a.name",
1282            &g,
1283        )
1284        .unwrap_err();
1285        assert!(
1286            matches!(&err, CypherError::ParseError(msg) if msg.contains("too many disjoint MATCH patterns")),
1287            "expected ParseError for too many patterns, got {err:?}"
1288        );
1289    }
1290
1291    #[test]
1292    fn enforces_row_budget_on_cartesian_explosion() {
1293        // Build a synthetic graph with enough Function nodes that even
1294        // a three-pattern cartesian product blows the row budget.
1295        // 25 * 25 * 25 = 15,625 > MAX_ROWS (10,000).
1296        let mut g = SymbolGraph::new();
1297        let mut source = String::new();
1298        for i in 0..25 {
1299            source.push_str(&format!("fn f{i}() {{}}\n"));
1300        }
1301        g.rebuild_file(1, "src/big.rs", Language::Rust, &source, &[]);
1302
1303        let err = execute(
1304            "MATCH (a:Function),(b:Function),(c:Function) RETURN a.name AS n",
1305            &g,
1306        )
1307        .unwrap_err();
1308        assert!(
1309            matches!(&err, CypherError::ExecError(msg) if msg.contains("row budget exceeded")),
1310            "expected ExecError for row budget, got {err:?}"
1311        );
1312    }
1313
1314    #[test]
1315    fn open_upper_bound_defaults_to_depth_cap() {
1316        // `*1..` (no hi) should fall back to VAR_LENGTH_MAX_DEPTH, not 3.
1317        let toks = lex("MATCH (a:Function)-[:CALLS*1..]->(b:Function) RETURN a.name AS n").unwrap();
1318        let q = parse(&toks).unwrap();
1319        let step = &q.matches[0].steps[0];
1320        assert_eq!(step.var_length, Some((1, VAR_LENGTH_MAX_DEPTH)));
1321
1322        // `*2..` likewise.
1323        let toks = lex("MATCH (a:Function)-[:CALLS*2..]->(b:Function) RETURN a.name AS n").unwrap();
1324        let q = parse(&toks).unwrap();
1325        let step = &q.matches[0].steps[0];
1326        assert_eq!(step.var_length, Some((2, VAR_LENGTH_MAX_DEPTH)));
1327    }
1328
1329    #[test]
1330    fn rejects_depth_above_four() {
1331        let g = fixture();
1332        let err = execute(
1333            "MATCH (a:Function)-[:CALLS*1..6]->(b:Function) RETURN a.name AS n",
1334            &g,
1335        )
1336        .unwrap_err();
1337        assert!(
1338            matches!(err, CypherError::ExecError(_)),
1339            "expected ExecError, got {err:?}"
1340        );
1341    }
1342}