Skip to main content

sqry_db/planner/
parse.rs

1//! Text syntax parser for the structural query planner.
2//!
3//! # Pipeline position
4//!
5//! ```text
6//!   text syntax ── THIS MODULE (DB13) ──▶ QueryPlan
7//!         │
8//!         ▼
9//!   [compile] [fuse] [execute]
10//! ```
11//!
12//! # Grammar
13//!
14//! The text syntax is a whitespace-separated flat chain of predicate
15//! *steps*. Each step translates into a single method call on a
16//! [`QueryBuilder`]; the full step sequence feeds into
17//! [`QueryBuilder::build`] to produce a [`QueryPlan`]. Examples from the
18//! design doc:
19//!
20//! ```text
21//! kind:function has:caller traverse:reverse(calls,3) in:src/api/**
22//! kind:method callers:parse_*
23//! kind:function callees:(kind:method name:visit_*)
24//! kind:function references ~= /handle_.*/i
25//! ```
26//!
27//! EBNF-ish:
28//!
29//! ```text
30//! query       = step (WS step)*
31//!
32//! step        = "kind:" nodekind                          → .scan(kind)
33//!             | "visibility:" ("public" | "private")      → .scan_with(…)
34//!             | "name:" name_pattern                      → .filter(MatchesName)
35//!             | "returns:" type_name                      → .filter(Returns)
36//!             | "in:" path_glob                           → .filter(InFile)
37//!             | "scope:" scopekind                        → .filter(InScope)
38//!             | "has:" ("caller" | "callee")              → .filter(HasCaller|HasCallee)
39//!             | "unused"                                  → .filter(IsUnused)
40//!             | relation_key ":" value                    → .filter(<Relation>(value))
41//!             | "references" "~=" regex                   → .filter(References(Regex))
42//!             | "traverse:" direction                       ;
43//!                                   "(" edge_kind "," depth ")" → .traverse(…)
44//!
45//! relation_key = "callers" | "callees" | "imports" | "exports"
46//!              | "implements" | "impl" | "references"
47//!
48//! value       = "(" query ")"                             — subquery
49//!             | quoted_string
50//!             | bare_word
51//!
52//! regex       = "/" regex_body "/" flags?
53//! flags       = /[ims]+/
54//!
55//! direction   = "forward" | "reverse" | "both"
56//! edge_kind   = ident                                     — matched to EdgeKind::*
57//! depth       = u32 literal
58//! ```
59//!
60//! # Alias handling
61//!
62//! - `impl:` and `implements:` both produce [`Predicate::Implements`] (spec M8).
63//! - `traverse:` keyword `forward` alternatively spelled `outgoing`; `reverse`
64//!   alternatively spelled `incoming`. Both forms are accepted so the text
65//!   syntax stays readable for users who think in "call direction".
66//!
67//! # Error model
68//!
69//! Parse errors surface through [`ParseError`], which carries a byte-offset
70//! span into the input so callers (CLI and MCP handlers) can render a caret
71//! pointer at the error site. The parser never panics on well-formed UTF-8
72//! input; malformed input yields `ParseError::UnexpectedEnd` or
73//! `ParseError::UnexpectedChar` variants instead.
74//!
75//! # Design references
76//!
77//! - Spec: `docs/superpowers/specs/2026-04-12-derived-analysis-db-query-planner-design.md` (§3 — Text Syntax Frontend)
78//! - DAG: `docs/superpowers/plans/2026-04-12-phase3-4-combined-implementation-dag.toml` (unit DB13)
79//!
80//! [`QueryBuilder`]: super::compile::QueryBuilder
81//! [`QueryBuilder::build`]: super::compile::QueryBuilder::build
82//! [`QueryPlan`]: super::ir::QueryPlan
83//! [`Predicate::Implements`]: super::ir::Predicate::Implements
84
85use thiserror::Error;
86
87use sqry_core::graph::unified::bind::scope::arena::ScopeKind;
88use sqry_core::graph::unified::edge::kind::{EdgeKind, ExportKind};
89use sqry_core::graph::unified::node::kind::NodeKind;
90use sqry_core::schema::Visibility;
91
92use super::compile::{BuildError, QueryBuilder, ScanFilters};
93use super::ir::{
94    Direction, PathPattern, PlanNode, Predicate, PredicateValue, QueryPlan, RegexFlags,
95    RegexPattern, StringPattern,
96};
97
98// ============================================================================
99// Public API
100// ============================================================================
101
102/// Parse a text query into a [`QueryPlan`].
103///
104/// # Errors
105///
106/// Returns [`ParseError`] describing a structural or lexical problem in the
107/// input, or a [`BuildError`] if the parsed [`QueryBuilder`] fails
108/// validation (zero depth, first step not context-free, etc.).
109pub fn parse_query(source: &str) -> Result<QueryPlan, ParseError> {
110    let mut parser = Parser::new(source);
111    let builder = parser.parse_chain()?;
112    parser.expect_eof()?;
113    builder.build().map_err(ParseError::from)
114}
115
116// ============================================================================
117// Errors
118// ============================================================================
119
120/// Error returned by the text-syntax parser.
121#[derive(Debug, Error, PartialEq, Eq, Clone)]
122pub enum ParseError {
123    /// Expected more tokens but hit end-of-input.
124    #[error("unexpected end of input at byte {offset}: expected {expected}")]
125    UnexpectedEnd {
126        /// Byte offset into the source (0-based).
127        offset: usize,
128        /// Human-readable description of what the parser was looking for.
129        expected: &'static str,
130    },
131
132    /// Encountered an unexpected character.
133    #[error("unexpected character {ch:?} at byte {offset}: expected {expected}")]
134    UnexpectedChar {
135        /// Offending character.
136        ch: char,
137        /// Byte offset of the offending character.
138        offset: usize,
139        /// What the parser expected instead.
140        expected: &'static str,
141    },
142
143    /// An identifier did not match any known enum variant.
144    #[error("unknown {kind} {value:?} at byte {offset}")]
145    UnknownIdent {
146        /// Domain that rejected the identifier (`node kind`, `edge kind`, …).
147        kind: &'static str,
148        /// Literal text that could not be resolved.
149        value: String,
150        /// Byte offset where the identifier started.
151        offset: usize,
152    },
153
154    /// A numeric literal failed to parse.
155    #[error("invalid integer {value:?} at byte {offset}")]
156    InvalidInteger {
157        /// Literal text that could not be parsed as an integer.
158        value: String,
159        /// Byte offset where the literal started.
160        offset: usize,
161    },
162
163    /// The `QueryBuilder` rejected the plan (e.g. zero-depth traversal).
164    #[error("plan construction failed: {0}")]
165    Build(#[from] BuildError),
166}
167
168// ============================================================================
169// Parser state
170// ============================================================================
171
172struct Parser<'a> {
173    src: &'a [u8],
174    pos: usize,
175}
176
177impl<'a> Parser<'a> {
178    fn new(source: &'a str) -> Self {
179        Self {
180            src: source.as_bytes(),
181            pos: 0,
182        }
183    }
184
185    // ------------------------------------------------------------------
186    // Top-level chain
187    // ------------------------------------------------------------------
188
189    fn parse_chain(&mut self) -> Result<QueryBuilder, ParseError> {
190        let mut builder = QueryBuilder::new();
191        self.skip_ws();
192
193        while !self.at_end() && !self.peek_is(b')') {
194            builder = self.parse_step(builder)?;
195            self.skip_ws();
196        }
197
198        Ok(builder)
199    }
200
201    fn parse_step(&mut self, builder: QueryBuilder) -> Result<QueryBuilder, ParseError> {
202        let start = self.pos;
203        let head = self.take_ident()?;
204        match head.as_str() {
205            "kind" => {
206                self.expect_byte(b':', "':' after 'kind'")?;
207                let ident = self.take_ident()?;
208                let offset = start;
209                let nk = NodeKind::parse(&ident).ok_or(ParseError::UnknownIdent {
210                    kind: "node kind",
211                    value: ident,
212                    offset,
213                })?;
214                Ok(builder.scan(nk))
215            }
216            "visibility" => {
217                self.expect_byte(b':', "':' after 'visibility'")?;
218                let ident = self.take_ident()?;
219                let vis = Visibility::parse(&ident).ok_or(ParseError::UnknownIdent {
220                    kind: "visibility",
221                    value: ident,
222                    offset: start,
223                })?;
224                Ok(apply_visibility(builder, vis))
225            }
226            "name" => {
227                // `name:<value>` — literal-exact / glob contract.
228                //
229                // **Semantics (B1_ALIGN, locked).**
230                //
231                // - **Literal value (no `*`, `?`, `[`).** `name:Foo`
232                //   matches every indexable graph node whose interned
233                //   `entry.name` or `entry.qualified_name` equals `Foo`
234                //   byte-for-byte, case-sensitive. If `Foo` is dot- or
235                //   Ruby-`#` qualified, the shared exact-name resolver also
236                //   checks the graph-canonical `::` rewrite so user-facing
237                //   display names like `Class.field` / `Class#field` can
238                //   resolve to canonical graph names like `Class::field`.
239                //   This path
240                //   is contract-bound to the CLI `--exact <literal>`
241                //   shorthand: both route through
242                //   [`sqry_core::graph::unified::concurrent::graph::GraphSnapshot::find_by_exact_name`]
243                //   and return the same set on any fixture.
244                //
245                // - **Glob value (contains `*`, `?`, or `[`).**
246                //   [`Self::parse_string_pattern`] promotes the pattern
247                //   to [`MatchMode::Glob`]; the executor then matches
248                //   nodes whose simple or qualified name satisfies the
249                //   glob (e.g. `name:parse_*` matches `parse_expr`,
250                //   `parse_stmt`). The CLI `--exact` shorthand does
251                //   **not** accept glob meta — it treats every
252                //   character as a literal — so the
253                //   exact-set-equality contract above does not extend
254                //   to glob values. This is intentional: the CLI
255                //   `--exact` flag is a literal-only convenience, and
256                //   glob lookups belong to the structured planner.
257                //
258                // Synthetic placeholder nodes (Go-plugin
259                // `<field:operand.field>` shadows and `<ident>@<offset>`
260                // per-binding-site Variables; see `C_SUPPRESS` and
261                // [`crate::query::QueryDb`] docs for the full taxonomy)
262                // are excluded on **both** the literal and glob paths,
263                // gated by
264                // [`sqry_core::graph::unified::concurrent::graph::GraphSnapshot::is_node_synthetic`]
265                // inside `entry_name_matches` / `scan_match`.
266                //
267                // **No substring or regex form.** A future regex form
268                // would land as a separate `name~` operator with its
269                // own grammar branch and IR variant — mirroring the
270                // `references:` / `references ~= /…/` split. There is
271                // no implicit substring fallback; users wanting regex
272                // name matching today should use `sqry search <regex>`
273                // (regex over interned strings; synthetic-visible) or
274                // wait for `name~` to land.
275                //
276                // **Precedence vs `name~` (future).** When `name~`
277                // lands, it will be parsed as a distinct token and
278                // produce a distinct IR predicate; `name:` keeps the
279                // literal-exact / glob split documented above
280                // unchanged. Folding the two into a single token with
281                // a "smart" mode is explicitly out of scope.
282                self.expect_byte(b':', "':' after 'name'")?;
283                let pat = self.parse_string_pattern()?;
284                // `name:` attaches to an existing NodeScan when possible so
285                // the scan uses the pre-built by-kind index directly. When
286                // the chain is empty, it starts a fresh `NodeScan` carrying
287                // only the name pattern so `name:Foo` is a valid standalone
288                // query (otherwise the chain would fail context-free
289                // validation in `compile.rs`).
290                Ok(apply_name_pattern(builder, pat))
291            }
292            "returns" => {
293                // `returns:<TypeName>` — selects nodes whose outgoing
294                // `EdgeKind::TypeOf { context: Some(TypeOfContext::Return), .. }`
295                // edges target a node whose interned name equals `<TypeName>`
296                // by byte-exact, case-sensitive comparison.
297                //
298                // The value parser is `parse_bare_or_quoted` (not
299                // `parse_string_pattern`) so glob meta-characters like `*`,
300                // `?`, and `[` are taken as literal name bytes rather than
301                // promoted to a glob. This keeps the contract identical to
302                // the IR docstring on `Predicate::Returns`: exact match only.
303                // A future `returns~:` regex form would land as a separate
304                // grammar branch and IR variant, mirroring how `references`
305                // already pairs `references:` (literal) with
306                // `references ~= /…/` (regex).
307                self.expect_byte(b':', "':' after 'returns'")?;
308                let type_name = self.parse_bare_or_quoted()?;
309                Ok(builder.filter(Predicate::Returns(type_name)))
310            }
311            "in" => {
312                self.expect_byte(b':', "':' after 'in'")?;
313                let glob = self.parse_bare_or_quoted()?;
314                Ok(builder.filter(Predicate::InFile(PathPattern::new(glob))))
315            }
316            "scope" => {
317                self.expect_byte(b':', "':' after 'scope'")?;
318                let ident = self.take_ident()?;
319                let sk = parse_scope_kind(&ident).ok_or(ParseError::UnknownIdent {
320                    kind: "scope kind",
321                    value: ident,
322                    offset: start,
323                })?;
324                Ok(builder.filter(Predicate::InScope(sk)))
325            }
326            "has" => {
327                self.expect_byte(b':', "':' after 'has'")?;
328                let ident = self.take_ident()?;
329                match ident.as_str() {
330                    "caller" => Ok(builder.filter(Predicate::HasCaller)),
331                    "callee" => Ok(builder.filter(Predicate::HasCallee)),
332                    _ => Err(ParseError::UnknownIdent {
333                        kind: "has-target (expected 'caller' or 'callee')",
334                        value: ident,
335                        offset: start,
336                    }),
337                }
338            }
339            "unused" => Ok(builder.filter(Predicate::IsUnused)),
340            "traverse" => {
341                self.expect_byte(b':', "':' after 'traverse'")?;
342                let (direction, edge_kind, depth) = self.parse_traverse_args()?;
343                Ok(builder.traverse(direction, edge_kind, depth))
344            }
345            "callers" | "callees" | "imports" | "exports" | "implements" | "impl" => {
346                self.expect_byte(b':', "':' after relation predicate")?;
347                let value = self.parse_value()?;
348                let predicate = match head.as_str() {
349                    "callers" => Predicate::Callers(value),
350                    "callees" => Predicate::Callees(value),
351                    "imports" => Predicate::Imports(value),
352                    "exports" => Predicate::Exports(value),
353                    "implements" | "impl" => Predicate::Implements(value),
354                    _ => unreachable!("outer match covers every arm"),
355                };
356                Ok(builder.filter(predicate))
357            }
358            "references" => {
359                // `references:<value>` — literal / subquery form;
360                // `references ~= /regex/` — regex form (space optional).
361                self.skip_ws();
362                if self.eat_bytes(b"~=") {
363                    self.skip_ws();
364                    let regex = self.parse_regex_literal()?;
365                    Ok(builder.filter(Predicate::References(PredicateValue::Regex(regex))))
366                } else {
367                    self.expect_byte(b':', "':' or '~=' after 'references'")?;
368                    let value = self.parse_value()?;
369                    Ok(builder.filter(Predicate::References(value)))
370                }
371            }
372            _ => Err(ParseError::UnknownIdent {
373                kind: "step keyword",
374                value: head,
375                offset: start,
376            }),
377        }
378    }
379
380    // ------------------------------------------------------------------
381    // Value / subquery
382    // ------------------------------------------------------------------
383
384    fn parse_value(&mut self) -> Result<PredicateValue, ParseError> {
385        self.skip_inline_ws();
386        if self.peek_is(b'(') {
387            self.pos += 1;
388            let sub_builder = self.parse_chain()?;
389            self.expect_byte(b')', "')' to close subquery")?;
390            let sub_plan = sub_builder.build().map_err(ParseError::from)?;
391            Ok(PredicateValue::Subquery(Box::new(sub_plan.root)))
392        } else if self.peek_is(b'/') {
393            let regex = self.parse_regex_literal()?;
394            Ok(PredicateValue::Regex(regex))
395        } else {
396            let pat = self.parse_string_pattern()?;
397            Ok(PredicateValue::Pattern(pat))
398        }
399    }
400
401    /// Parses a quoted or bare string literal and infers a [`MatchMode`] from
402    /// the raw contents — `*` or `?` promote to [`MatchMode::Glob`]; otherwise
403    /// the pattern is an [`MatchMode::Exact`] match.
404    fn parse_string_pattern(&mut self) -> Result<StringPattern, ParseError> {
405        let raw = self.parse_bare_or_quoted()?;
406        let has_glob_meta = raw.contains(['*', '?', '[']);
407        let pattern = if has_glob_meta {
408            StringPattern::glob(raw)
409        } else {
410            StringPattern::exact(raw)
411        };
412        Ok(pattern)
413    }
414
415    fn parse_bare_or_quoted(&mut self) -> Result<String, ParseError> {
416        self.skip_inline_ws();
417        if self.peek_is(b'"') {
418            self.take_quoted_string()
419        } else {
420            let start = self.pos;
421            let tok = self.take_value_word()?;
422            if tok.is_empty() {
423                Err(ParseError::UnexpectedChar {
424                    ch: self.peek_char().unwrap_or('\0'),
425                    offset: start,
426                    expected: "value (quoted string or bare word)",
427                })
428            } else {
429                Ok(tok)
430            }
431        }
432    }
433
434    fn parse_regex_literal(&mut self) -> Result<RegexPattern, ParseError> {
435        self.expect_byte(b'/', "'/' to open regex literal")?;
436        let start = self.pos;
437        while !self.at_end() && !self.peek_is(b'/') {
438            // Support backslash-escaped forward slashes within the regex body.
439            if self.peek_is(b'\\') && self.pos + 1 < self.src.len() {
440                self.pos += 2;
441            } else {
442                self.pos += 1;
443            }
444        }
445        if self.at_end() {
446            return Err(ParseError::UnexpectedEnd {
447                offset: self.pos,
448                expected: "'/' to close regex literal",
449            });
450        }
451        let body_bytes = &self.src[start..self.pos];
452        let body = std::str::from_utf8(body_bytes)
453            .map_err(|_| ParseError::UnexpectedChar {
454                ch: '\u{FFFD}',
455                offset: start,
456                expected: "valid UTF-8 in regex body",
457            })?
458            .to_owned();
459        self.pos += 1; // consume closing '/'
460
461        let mut flags = RegexFlags::default();
462        while let Some(b) = self.peek_byte() {
463            match b {
464                b'i' => {
465                    flags.case_insensitive = true;
466                    self.pos += 1;
467                }
468                b'm' => {
469                    flags.multiline = true;
470                    self.pos += 1;
471                }
472                b's' => {
473                    flags.dot_all = true;
474                    self.pos += 1;
475                }
476                _ => break,
477            }
478        }
479        Ok(RegexPattern::with_flags(body, flags))
480    }
481
482    fn parse_traverse_args(&mut self) -> Result<(Direction, EdgeKind, u32), ParseError> {
483        let dir_start = self.pos;
484        let dir_text = self.take_ident()?;
485        let direction = parse_direction(&dir_text).ok_or(ParseError::UnknownIdent {
486            kind: "traversal direction",
487            value: dir_text,
488            offset: dir_start,
489        })?;
490        self.expect_byte(b'(', "'(' after traversal direction")?;
491        self.skip_inline_ws();
492
493        let edge_start = self.pos;
494        let edge_text = self.take_ident()?;
495        let edge_kind = parse_edge_kind(&edge_text).ok_or(ParseError::UnknownIdent {
496            kind: "edge kind",
497            value: edge_text,
498            offset: edge_start,
499        })?;
500
501        self.skip_inline_ws();
502        self.expect_byte(b',', "',' between edge kind and depth")?;
503        self.skip_inline_ws();
504
505        let depth_start = self.pos;
506        let depth_text = self.take_digits()?;
507        let depth: u32 = depth_text.parse().map_err(|_| ParseError::InvalidInteger {
508            value: depth_text,
509            offset: depth_start,
510        })?;
511
512        self.skip_inline_ws();
513        self.expect_byte(b')', "')' to close traversal arguments")?;
514        Ok((direction, edge_kind, depth))
515    }
516
517    // ------------------------------------------------------------------
518    // Low-level lexing
519    // ------------------------------------------------------------------
520
521    #[inline]
522    fn at_end(&self) -> bool {
523        self.pos >= self.src.len()
524    }
525
526    #[inline]
527    fn peek_byte(&self) -> Option<u8> {
528        self.src.get(self.pos).copied()
529    }
530
531    #[inline]
532    fn peek_is(&self, b: u8) -> bool {
533        self.peek_byte() == Some(b)
534    }
535
536    fn peek_char(&self) -> Option<char> {
537        self.src[self.pos..]
538            .utf8_chunks()
539            .next()
540            .and_then(|chunk| chunk.valid().chars().next())
541    }
542
543    fn eat_bytes(&mut self, needle: &[u8]) -> bool {
544        if self.src[self.pos..].starts_with(needle) {
545            self.pos += needle.len();
546            true
547        } else {
548            false
549        }
550    }
551
552    fn skip_ws(&mut self) {
553        while let Some(b) = self.peek_byte() {
554            if b.is_ascii_whitespace() {
555                self.pos += 1;
556            } else {
557                break;
558            }
559        }
560    }
561
562    /// Skips *inline* whitespace (space / tab) without consuming newlines.
563    /// Used between a relation key and its value so that `callers: foo` parses
564    /// the same as `callers:foo` without letting the value span multiple steps.
565    fn skip_inline_ws(&mut self) {
566        while let Some(b) = self.peek_byte() {
567            if b == b' ' || b == b'\t' {
568                self.pos += 1;
569            } else {
570                break;
571            }
572        }
573    }
574
575    fn expect_byte(&mut self, byte: u8, expected: &'static str) -> Result<(), ParseError> {
576        self.skip_inline_ws();
577        match self.peek_byte() {
578            Some(b) if b == byte => {
579                self.pos += 1;
580                Ok(())
581            }
582            Some(_) => Err(ParseError::UnexpectedChar {
583                ch: self.peek_char().unwrap_or('\0'),
584                offset: self.pos,
585                expected,
586            }),
587            None => Err(ParseError::UnexpectedEnd {
588                offset: self.pos,
589                expected,
590            }),
591        }
592    }
593
594    fn expect_eof(&mut self) -> Result<(), ParseError> {
595        self.skip_ws();
596        if self.at_end() {
597            Ok(())
598        } else {
599            Err(ParseError::UnexpectedChar {
600                ch: self.peek_char().unwrap_or('\0'),
601                offset: self.pos,
602                expected: "end of query",
603            })
604        }
605    }
606
607    /// Takes a lowercase identifier `[a-z_]+[a-z0-9_]*`. Returns an empty
608    /// string if the next character is not an ident-start.
609    fn take_ident(&mut self) -> Result<String, ParseError> {
610        let start = self.pos;
611        while let Some(b) = self.peek_byte() {
612            let is_start = (start == self.pos) && (b.is_ascii_alphabetic() || b == b'_');
613            let is_continue = start != self.pos && (b.is_ascii_alphanumeric() || b == b'_');
614            if is_start || is_continue {
615                self.pos += 1;
616            } else {
617                break;
618            }
619        }
620        if self.pos == start {
621            return Err(ParseError::UnexpectedChar {
622                ch: self.peek_char().unwrap_or('\0'),
623                offset: self.pos,
624                expected: "identifier",
625            });
626        }
627        let slice = &self.src[start..self.pos];
628        let s = std::str::from_utf8(slice)
629            .expect("identifier is ASCII")
630            .to_ascii_lowercase();
631        Ok(s)
632    }
633
634    fn take_digits(&mut self) -> Result<String, ParseError> {
635        let start = self.pos;
636        while let Some(b) = self.peek_byte() {
637            if b.is_ascii_digit() {
638                self.pos += 1;
639            } else {
640                break;
641            }
642        }
643        if self.pos == start {
644            return Err(ParseError::UnexpectedChar {
645                ch: self.peek_char().unwrap_or('\0'),
646                offset: self.pos,
647                expected: "integer",
648            });
649        }
650        Ok(std::str::from_utf8(&self.src[start..self.pos])
651            .expect("digits are ASCII")
652            .to_owned())
653    }
654
655    /// Reads the body of a bare "value word" — everything up to the next
656    /// whitespace or structural byte (`)`). Supports wildcards (`*`, `?`, `[`,
657    /// `]`) and path separators so that bare globs like `src/api/**/*.rs` or
658    /// qualified names like `foo::bar::baz` parse as a single word.
659    fn take_value_word(&mut self) -> Result<String, ParseError> {
660        let start = self.pos;
661        while let Some(b) = self.peek_byte() {
662            if b.is_ascii_whitespace() || matches!(b, b')' | b'(') {
663                break;
664            }
665            self.pos += 1;
666        }
667        let slice = &self.src[start..self.pos];
668        std::str::from_utf8(slice)
669            .map(str::to_owned)
670            .map_err(|_| ParseError::UnexpectedChar {
671                ch: '\u{FFFD}',
672                offset: start,
673                expected: "valid UTF-8 in value",
674            })
675    }
676
677    fn take_quoted_string(&mut self) -> Result<String, ParseError> {
678        self.expect_byte(b'"', "'\"' to open quoted string")?;
679        let mut out = String::new();
680        loop {
681            match self.peek_byte() {
682                None => {
683                    return Err(ParseError::UnexpectedEnd {
684                        offset: self.pos,
685                        expected: "'\"' to close quoted string",
686                    });
687                }
688                Some(b'"') => {
689                    self.pos += 1;
690                    return Ok(out);
691                }
692                Some(b'\\') => {
693                    if let Some(&next) = self.src.get(self.pos + 1) {
694                        self.pos += 2;
695                        match next {
696                            b'\\' => out.push('\\'),
697                            b'"' => out.push('"'),
698                            b'n' => out.push('\n'),
699                            b't' => out.push('\t'),
700                            other => out.push(other as char),
701                        }
702                    } else {
703                        return Err(ParseError::UnexpectedEnd {
704                            offset: self.pos + 1,
705                            expected: "escape character after '\\'",
706                        });
707                    }
708                }
709                Some(_) => {
710                    // Decode a single UTF-8 character and copy it over.
711                    let tail = &self.src[self.pos..];
712                    let chunk = tail
713                        .utf8_chunks()
714                        .next()
715                        .expect("non-empty tail yields a chunk");
716                    if let Some(ch) = chunk.valid().chars().next() {
717                        out.push(ch);
718                        self.pos += ch.len_utf8();
719                    } else {
720                        return Err(ParseError::UnexpectedChar {
721                            ch: '\u{FFFD}',
722                            offset: self.pos,
723                            expected: "valid UTF-8 inside quoted string",
724                        });
725                    }
726                }
727            }
728        }
729    }
730}
731
732// ============================================================================
733// Helper translators
734// ============================================================================
735
736/// Merge a visibility filter into the current builder.
737///
738/// The builder exposes [`QueryBuilder::scan_with`] which takes a
739/// [`ScanFilters`]; to apply a `visibility` filter to an already-added scan we
740/// reconstruct the scan with both fields. Since `QueryBuilder` does not expose
741/// its internals, the text syntax treats `visibility:` as its own `.filter()`
742/// step over a [`Predicate::And`] constructed ad-hoc — but a cleaner route is
743/// to push a second `scan_with` when the builder is empty. For any non-empty
744/// builder we fall back to a [`Predicate::And`]-adjacent filter via
745/// [`Predicate::MatchesName`]-free routing: we simply chain a new `scan_with`
746/// prefix. In practice, `visibility:` follows `kind:` in every example, so the
747/// builder carries exactly one `NodeScan` at this point and pushing a second
748/// scan would violate the context-free contract. Instead we emit a lightweight
749/// filter: **kind with visibility** is folded into the existing scan when the
750/// builder has exactly one step, otherwise a [`Predicate::MatchesName`] fallback
751/// is impossible (visibility is not a name), so we store the visibility as a
752/// hidden filter through [`Predicate::And`] of existence + name placeholder is
753/// also wrong. The simplest robust behaviour is to require `visibility:` to
754/// immediately follow a `kind:` (or stand alone) and re-run `scan_with` there.
755fn apply_visibility(builder: QueryBuilder, visibility: Visibility) -> QueryBuilder {
756    let steps = builder_steps(&builder);
757    if let Some(existing) = steps.last()
758        && let PlanNode::NodeScan {
759            kind,
760            visibility: existing_vis,
761            name_pattern,
762        } = existing
763    {
764        let kind = *kind;
765        let vis = existing_vis.unwrap_or(visibility);
766        let name_pattern = name_pattern.clone();
767        // Replace the trailing NodeScan with a merged one.
768        let mut trimmed = strip_last_step(builder);
769        trimmed = trimmed.scan_with(
770            ScanFilters::new()
771                .merge_kind(kind)
772                .with_visibility(vis)
773                .merge_name(name_pattern),
774        );
775        return trimmed;
776    }
777
778    // No prior scan — start one with visibility only.
779    builder.scan_with(ScanFilters::new().with_visibility(visibility))
780}
781
782/// Merge a name pattern into the current builder.
783///
784/// Preference order:
785///
786/// 1. **Empty builder** (`name:Foo` standalone): start a fresh
787///    [`PlanNode::NodeScan`] carrying only the name pattern. This makes
788///    `name:Foo` a valid context-free first step (otherwise `compile.rs`
789///    would reject the chain as starting with a `Filter`).
790/// 2. **Trailing `NodeScan` with no existing name pattern**
791///    (`kind:function name:Foo`): fold into the trailing scan so the
792///    executor walks the pre-built by-kind index directly and applies
793///    the name predicate inside `run_scan`.
794/// 3. **Anything else**: fall back to a separate
795///    [`Predicate::MatchesName`] filter step. The executor's
796///    `entry_name_matches` honours the same byte-exact, synthetic-aware
797///    contract documented around the `name:` step in
798///    [`Parser::parse_step`].
799fn apply_name_pattern(builder: QueryBuilder, pattern: StringPattern) -> QueryBuilder {
800    let steps = builder_steps(&builder);
801    if steps.is_empty() {
802        return builder.scan_with(ScanFilters {
803            kind: None,
804            visibility: None,
805            name_pattern: Some(pattern),
806        });
807    }
808    if let Some(existing) = steps.last()
809        && let PlanNode::NodeScan {
810            kind,
811            visibility,
812            name_pattern: existing_name,
813        } = existing
814        && existing_name.is_none()
815    {
816        let kind = *kind;
817        let vis = *visibility;
818        let mut trimmed = strip_last_step(builder);
819        trimmed = trimmed.scan_with(ScanFilters {
820            kind,
821            visibility: vis,
822            name_pattern: Some(pattern),
823        });
824        return trimmed;
825    }
826    builder.filter(Predicate::MatchesName(pattern))
827}
828
829/// Reads the `QueryBuilder::steps` vector by routing through the public
830/// `build` shape — the builder does not expose its internals. Because
831/// `build` consumes the builder, we reconstruct a clone by serializing
832/// through [`QueryBuilder::step_count`] and a `pop` loop. To avoid that
833/// cost, the real implementation just clones the builder and drives
834/// `build` on the clone.
835fn builder_steps(builder: &QueryBuilder) -> Vec<PlanNode> {
836    if builder.is_empty() {
837        return Vec::new();
838    }
839    let cloned = builder.clone();
840    match cloned.build() {
841        Ok(plan) => match plan.root {
842            PlanNode::Chain { steps } => steps,
843            other => vec![other],
844        },
845        Err(_) => Vec::new(),
846    }
847}
848
849/// Rebuilds the builder with every step except the last. Used by
850/// [`apply_visibility`] and [`apply_name_pattern`] to replace a trailing
851/// scan with a merged version without adding a new `QueryBuilder` API.
852fn strip_last_step(builder: QueryBuilder) -> QueryBuilder {
853    let steps = builder_steps(&builder);
854    let mut out = QueryBuilder::new();
855    if steps.len() <= 1 {
856        return out;
857    }
858    out = rehydrate_from_steps(&steps[..steps.len() - 1]);
859    out
860}
861
862/// Rebuilds a [`QueryBuilder`] from a list of [`PlanNode`] steps.
863///
864/// Only the step kinds the text parser emits are handled — additional
865/// variants would require new builder methods which DB13 does not introduce.
866fn rehydrate_from_steps(steps: &[PlanNode]) -> QueryBuilder {
867    let mut b = QueryBuilder::new();
868    for step in steps {
869        match step {
870            PlanNode::NodeScan {
871                kind,
872                visibility,
873                name_pattern,
874            } => {
875                b = b.scan_with(ScanFilters {
876                    kind: *kind,
877                    visibility: *visibility,
878                    name_pattern: name_pattern.clone(),
879                });
880            }
881            PlanNode::EdgeTraversal {
882                direction,
883                edge_kind,
884                max_depth,
885            } => match edge_kind {
886                Some(k) => {
887                    b = b.traverse(*direction, k.clone(), *max_depth);
888                }
889                None => {
890                    b = b.traverse_any(*direction, *max_depth);
891                }
892            },
893            PlanNode::Filter { predicate } => {
894                b = b.filter(predicate.clone());
895            }
896            PlanNode::SetOp { .. } | PlanNode::Chain { .. } => {
897                // Unreachable from the text parser; preserve the step as an
898                // opaque filter so we do not silently drop it.
899                b = b.filter(Predicate::HasCaller);
900            }
901        }
902    }
903    b
904}
905
906// Local helper methods added as a trait extension so the upstream
907// `ScanFilters` type does not need new constructors for DB13.
908trait ScanFiltersExt {
909    fn merge_kind(self, kind: Option<NodeKind>) -> Self;
910    fn merge_name(self, pattern: Option<StringPattern>) -> Self;
911}
912
913impl ScanFiltersExt for ScanFilters {
914    fn merge_kind(mut self, kind: Option<NodeKind>) -> Self {
915        if let Some(k) = kind {
916            self.kind = Some(k);
917        }
918        self
919    }
920
921    fn merge_name(mut self, pattern: Option<StringPattern>) -> Self {
922        if let Some(p) = pattern {
923            self.name_pattern = Some(p);
924        }
925        self
926    }
927}
928
929// ============================================================================
930// Direction / scope-kind / edge-kind text parsers
931// ============================================================================
932
933fn parse_direction(text: &str) -> Option<Direction> {
934    match text {
935        "forward" | "outgoing" | "out" => Some(Direction::Forward),
936        "reverse" | "incoming" | "in" => Some(Direction::Reverse),
937        "both" => Some(Direction::Both),
938        _ => None,
939    }
940}
941
942fn parse_scope_kind(text: &str) -> Option<ScopeKind> {
943    match text {
944        "module" => Some(ScopeKind::Module),
945        "function" => Some(ScopeKind::Function),
946        "class" => Some(ScopeKind::Class),
947        "namespace" => Some(ScopeKind::Namespace),
948        "trait" => Some(ScopeKind::Trait),
949        "impl" => Some(ScopeKind::Impl),
950        _ => None,
951    }
952}
953
954/// Maps a text identifier (e.g. `"calls"`) to a canonical [`EdgeKind`] with
955/// zeroed metadata so the executor's discriminant match behaves as expected.
956/// Only the edge kinds reachable from the text syntax are covered; fall back
957/// to `None` for unsupported kinds so callers see `ParseError::UnknownIdent`
958/// rather than silently accepting malformed input.
959fn parse_edge_kind(text: &str) -> Option<EdgeKind> {
960    match text {
961        "calls" => Some(EdgeKind::Calls {
962            argument_count: 0,
963            is_async: false,
964        }),
965        "references" => Some(EdgeKind::References),
966        "imports" => Some(EdgeKind::Imports {
967            alias: None,
968            is_wildcard: false,
969        }),
970        "exports" => Some(EdgeKind::Exports {
971            kind: ExportKind::Direct,
972            alias: None,
973        }),
974        "implements" => Some(EdgeKind::Implements),
975        "inherits" => Some(EdgeKind::Inherits),
976        "defines" => Some(EdgeKind::Defines),
977        "contains" => Some(EdgeKind::Contains),
978        _ => None,
979    }
980}
981
982// ============================================================================
983// Inline smoke tests — full coverage lives in
984// `sqry-db/tests/parser_test.rs`.
985// ============================================================================
986
987#[cfg(test)]
988mod tests {
989    use super::*;
990
991    #[test]
992    fn parse_kind_scan_produces_single_nodescan_step() {
993        let plan = parse_query("kind:function").expect("parse");
994        let PlanNode::Chain { steps } = plan.root else {
995            panic!("expected Chain root");
996        };
997        assert_eq!(steps.len(), 1);
998        assert!(matches!(
999            steps[0],
1000            PlanNode::NodeScan {
1001                kind: Some(NodeKind::Function),
1002                ..
1003            }
1004        ));
1005    }
1006
1007    #[test]
1008    fn parse_has_caller_is_a_filter_step() {
1009        let plan = parse_query("kind:function has:caller").expect("parse");
1010        let PlanNode::Chain { steps } = plan.root else {
1011            panic!("chain");
1012        };
1013        assert_eq!(steps.len(), 2);
1014        assert!(matches!(
1015            steps[1],
1016            PlanNode::Filter {
1017                predicate: Predicate::HasCaller,
1018            }
1019        ));
1020    }
1021
1022    #[test]
1023    fn parse_traverse_accepts_all_three_directions() {
1024        for (text, expected) in [
1025            ("forward", Direction::Forward),
1026            ("reverse", Direction::Reverse),
1027            ("both", Direction::Both),
1028        ] {
1029            let src = format!("kind:function traverse:{text}(calls,1)");
1030            let plan = parse_query(&src).expect("parse");
1031            let PlanNode::Chain { steps } = plan.root else {
1032                panic!("chain");
1033            };
1034            match &steps[1] {
1035                PlanNode::EdgeTraversal {
1036                    direction,
1037                    max_depth,
1038                    ..
1039                } => {
1040                    assert_eq!(*direction, expected);
1041                    assert_eq!(*max_depth, 1);
1042                }
1043                other => panic!("expected EdgeTraversal, got {other:?}"),
1044            }
1045        }
1046    }
1047
1048    #[test]
1049    fn parse_unknown_ident_produces_unknown_error() {
1050        let err = parse_query("kind:definitely_not_a_kind").unwrap_err();
1051        match err {
1052            ParseError::UnknownIdent { kind, .. } => assert_eq!(kind, "node kind"),
1053            other => panic!("expected UnknownIdent, got {other:?}"),
1054        }
1055    }
1056
1057    #[test]
1058    fn parse_regex_literal_with_flags() {
1059        let plan = parse_query("kind:function references ~= /handle_.*/im").expect("parse");
1060        let PlanNode::Chain { steps } = plan.root else {
1061            panic!("chain");
1062        };
1063        match &steps[1] {
1064            PlanNode::Filter {
1065                predicate: Predicate::References(PredicateValue::Regex(rp)),
1066            } => {
1067                assert_eq!(rp.pattern, "handle_.*");
1068                assert!(rp.flags.case_insensitive);
1069                assert!(rp.flags.multiline);
1070                assert!(!rp.flags.dot_all);
1071            }
1072            other => panic!("expected References(Regex), got {other:?}"),
1073        }
1074    }
1075
1076    #[test]
1077    fn parse_subquery_value_produces_plan_node() {
1078        let plan = parse_query("kind:function callers:(kind:method)").expect("parse");
1079        let PlanNode::Chain { steps } = plan.root else {
1080            panic!("chain");
1081        };
1082        match &steps[1] {
1083            PlanNode::Filter {
1084                predicate: Predicate::Callers(PredicateValue::Subquery(inner)),
1085            } => match inner.as_ref() {
1086                PlanNode::Chain { steps: sub_steps } => {
1087                    assert!(matches!(
1088                        sub_steps[0],
1089                        PlanNode::NodeScan {
1090                            kind: Some(NodeKind::Method),
1091                            ..
1092                        }
1093                    ));
1094                }
1095                other => panic!("expected Chain subquery, got {other:?}"),
1096            },
1097            other => panic!("expected Callers(Subquery), got {other:?}"),
1098        }
1099    }
1100
1101    #[test]
1102    fn parse_glob_name_pattern_folds_into_scan() {
1103        let plan = parse_query("kind:function name:parse_*").expect("parse");
1104        let PlanNode::Chain { steps } = plan.root else {
1105            panic!("chain");
1106        };
1107        // Glob name should fold into the leading NodeScan.
1108        assert_eq!(steps.len(), 1);
1109        match &steps[0] {
1110            PlanNode::NodeScan {
1111                kind: Some(NodeKind::Function),
1112                name_pattern: Some(pat),
1113                ..
1114            } => {
1115                assert_eq!(pat.raw, "parse_*");
1116            }
1117            other => panic!("expected folded NodeScan, got {other:?}"),
1118        }
1119    }
1120
1121    #[test]
1122    fn parse_implements_and_impl_aliases_both_work() {
1123        for src in ["kind:class implements:Visitor", "kind:class impl:Visitor"] {
1124            let plan = parse_query(src).expect("parse");
1125            let PlanNode::Chain { steps } = plan.root else {
1126                panic!("chain");
1127            };
1128            assert!(matches!(
1129                steps[1],
1130                PlanNode::Filter {
1131                    predicate: Predicate::Implements(_),
1132                }
1133            ));
1134        }
1135    }
1136
1137    #[test]
1138    fn parse_unused_alone_is_a_filter() {
1139        let plan = parse_query("kind:function unused").expect("parse");
1140        let PlanNode::Chain { steps } = plan.root else {
1141            panic!("chain");
1142        };
1143        assert_eq!(steps.len(), 2);
1144        assert!(matches!(
1145            steps[1],
1146            PlanNode::Filter {
1147                predicate: Predicate::IsUnused,
1148            }
1149        ));
1150    }
1151
1152    #[test]
1153    fn parse_empty_query_errors_on_build() {
1154        let err = parse_query("").unwrap_err();
1155        assert!(matches!(err, ParseError::Build(_)));
1156    }
1157
1158    #[test]
1159    fn parse_returns_predicate_basic() {
1160        let plan = parse_query("kind:function returns:error").expect("parse");
1161        let PlanNode::Chain { steps } = plan.root else {
1162            panic!("chain");
1163        };
1164        assert_eq!(steps.len(), 2);
1165        match &steps[1] {
1166            PlanNode::Filter {
1167                predicate: Predicate::Returns(name),
1168            } => {
1169                assert_eq!(name, "error");
1170            }
1171            other => panic!("expected Filter(Returns), got {other:?}"),
1172        }
1173    }
1174
1175    #[test]
1176    fn parse_returns_does_not_collide_with_name_predicate() {
1177        // `name:Foo returns:Bar` must produce two distinct predicate variants
1178        // — `name:` folds into the leading NodeScan (single step), and
1179        // `returns:` lands as a Filter step on top.
1180        let plan = parse_query("kind:function name:Foo returns:Bar").expect("parse");
1181        let PlanNode::Chain { steps } = plan.root else {
1182            panic!("chain");
1183        };
1184        assert_eq!(steps.len(), 2);
1185        match &steps[0] {
1186            PlanNode::NodeScan {
1187                kind: Some(NodeKind::Function),
1188                name_pattern: Some(pat),
1189                ..
1190            } => {
1191                assert_eq!(pat.raw, "Foo");
1192            }
1193            other => panic!("expected leading NodeScan with name_pattern, got {other:?}"),
1194        }
1195        match &steps[1] {
1196            PlanNode::Filter {
1197                predicate: Predicate::Returns(name),
1198            } => {
1199                assert_eq!(name, "Bar");
1200            }
1201            other => panic!("expected Filter(Returns), got {other:?}"),
1202        }
1203    }
1204
1205    #[test]
1206    fn parse_returns_takes_value_byte_exact_no_glob_promotion() {
1207        // `returns:` keeps glob meta as literal name bytes (the spec says
1208        // exact match only; future `returns~:` would handle regex).
1209        let plan = parse_query("kind:function returns:Result*").expect("parse");
1210        let PlanNode::Chain { steps } = plan.root else {
1211            panic!("chain");
1212        };
1213        match &steps[1] {
1214            PlanNode::Filter {
1215                predicate: Predicate::Returns(name),
1216            } => {
1217                assert_eq!(name, "Result*");
1218            }
1219            other => panic!("expected Filter(Returns), got {other:?}"),
1220        }
1221    }
1222
1223    #[test]
1224    fn parse_returns_quoted_string_value() {
1225        let plan = parse_query(r#"kind:function returns:"std::io::Error""#).expect("parse");
1226        let PlanNode::Chain { steps } = plan.root else {
1227            panic!("chain");
1228        };
1229        match &steps[1] {
1230            PlanNode::Filter {
1231                predicate: Predicate::Returns(name),
1232            } => {
1233                assert_eq!(name, "std::io::Error");
1234            }
1235            other => panic!("expected Filter(Returns), got {other:?}"),
1236        }
1237    }
1238
1239    #[test]
1240    fn parse_returns_missing_value_is_an_error() {
1241        let err = parse_query("kind:function returns:").unwrap_err();
1242        // Missing value yields `parse_bare_or_quoted` failure or end-of-input
1243        // depending on whitespace; both surface as parse errors rather than
1244        // silently producing an empty `Returns("")` predicate.
1245        assert!(matches!(
1246            err,
1247            ParseError::UnexpectedChar { .. } | ParseError::UnexpectedEnd { .. }
1248        ));
1249    }
1250
1251    #[test]
1252    fn parse_integer_rejects_non_digit() {
1253        let err = parse_query("kind:function traverse:forward(calls,abc)").unwrap_err();
1254        match err {
1255            ParseError::UnexpectedChar { expected, .. } => {
1256                assert_eq!(expected, "integer");
1257            }
1258            other => panic!("expected UnexpectedChar, got {other:?}"),
1259        }
1260    }
1261
1262    /// REQ:R0014 — `take_value_word` lock-in: dot-qualified `name:` value.
1263    ///
1264    /// `name:Foo.bar` must fold into the leading `NodeScan` as a single
1265    /// literal `name_pattern` carrying the full dotted string `Foo.bar`. This
1266    /// freezes today's `take_value_word` behaviour so that field-emission
1267    /// units (U06 Ruby, U11 Rust, U07 C++) can rely on dot-qualified lookups
1268    /// resolving via the planner's `Predicate::MatchesName` filter.
1269    #[test]
1270    fn parses_dot_qualified_name() {
1271        let plan = parse_query("name:Foo.bar").expect("parse");
1272        let PlanNode::Chain { steps } = plan.root else {
1273            panic!("expected Chain root");
1274        };
1275        assert_eq!(steps.len(), 1);
1276        match &steps[0] {
1277            PlanNode::NodeScan {
1278                name_pattern: Some(pat),
1279                ..
1280            } => {
1281                assert_eq!(pat.raw, "Foo.bar");
1282            }
1283            other => panic!("expected NodeScan with name_pattern, got {other:?}"),
1284        }
1285    }
1286
1287    /// REQ:R0014 — `take_value_word` lock-in: Rust `::`-qualified `name:` value.
1288    ///
1289    /// `name:my_crate::Counter::count` must fold into the leading `NodeScan`
1290    /// as a single literal `name_pattern` carrying the full `::`-separated
1291    /// string. U11 (Rust field emission) emits `crate::Struct::field` style
1292    /// qualified names; this test guards that the planner's value-word reader
1293    /// keeps `::` as part of a single token rather than splitting on `:`.
1294    #[test]
1295    fn parses_rust_qualified_name_with_double_colon() {
1296        let plan = parse_query("name:my_crate::Counter::count").expect("parse");
1297        let PlanNode::Chain { steps } = plan.root else {
1298            panic!("expected Chain root");
1299        };
1300        assert_eq!(steps.len(), 1);
1301        match &steps[0] {
1302            PlanNode::NodeScan {
1303                name_pattern: Some(pat),
1304                ..
1305            } => {
1306                assert_eq!(pat.raw, "my_crate::Counter::count");
1307            }
1308            other => panic!("expected NodeScan with name_pattern, got {other:?}"),
1309        }
1310    }
1311
1312    /// REQ:R0014 — `take_value_word` lock-in: Ruby `#`-separated `name:` value.
1313    ///
1314    /// `name:Counter#increment` must fold into the leading `NodeScan` as a
1315    /// single literal `name_pattern` carrying the full `Class#method` string.
1316    /// U06 (Ruby field emission) uses `#` as the canonical instance-method
1317    /// separator; this test guards that the planner's value-word reader does
1318    /// not treat `#` as a comment or whitespace marker.
1319    #[test]
1320    fn parses_ruby_instance_method_separator() {
1321        let plan = parse_query("name:Counter#increment").expect("parse");
1322        let PlanNode::Chain { steps } = plan.root else {
1323            panic!("expected Chain root");
1324        };
1325        assert_eq!(steps.len(), 1);
1326        match &steps[0] {
1327            PlanNode::NodeScan {
1328                name_pattern: Some(pat),
1329                ..
1330            } => {
1331                assert_eq!(pat.raw, "Counter#increment");
1332            }
1333            other => panic!("expected NodeScan with name_pattern, got {other:?}"),
1334        }
1335    }
1336}