sqry_db/planner/parse.rs
1//! Text syntax parser for the structural query planner.
2//!
3//! # Pipeline position
4//!
5//! ```text
6//! text syntax ── THIS MODULE (DB13) ──▶ QueryPlan
7//! │
8//! ▼
9//! [compile] [fuse] [execute]
10//! ```
11//!
12//! # Grammar
13//!
14//! The text syntax is a whitespace-separated flat chain of predicate
15//! *steps*. Each step translates into a single method call on a
16//! [`QueryBuilder`]; the full step sequence feeds into
17//! [`QueryBuilder::build`] to produce a [`QueryPlan`]. Examples from the
18//! design doc:
19//!
20//! ```text
21//! kind:function has:caller traverse:reverse(calls,3) in:src/api/**
22//! kind:method callers:parse_*
23//! kind:function callees:(kind:method name:visit_*)
24//! kind:function references ~= /handle_.*/i
25//! ```
26//!
27//! EBNF-ish:
28//!
29//! ```text
30//! query = step (WS step)*
31//!
32//! step = "kind:" nodekind → .scan(kind)
33//! | "visibility:" ("public" | "private") → .scan_with(…)
34//! | "name:" name_pattern → .filter(MatchesName)
35//! | "returns:" type_name → .filter(Returns)
36//! | "in:" path_glob → .filter(InFile)
37//! | "scope:" scopekind → .filter(InScope)
38//! | "has:" ("caller" | "callee") → .filter(HasCaller|HasCallee)
39//! | "unused" → .filter(IsUnused)
40//! | relation_key ":" value → .filter(<Relation>(value))
41//! | "references" "~=" regex → .filter(References(Regex))
42//! | "traverse:" direction ;
43//! "(" edge_kind "," depth ")" → .traverse(…)
44//!
45//! relation_key = "callers" | "callees" | "imports" | "exports"
46//! | "implements" | "impl" | "references"
47//!
48//! value = "(" query ")" — subquery
49//! | quoted_string
50//! | bare_word
51//!
52//! regex = "/" regex_body "/" flags?
53//! flags = /[ims]+/
54//!
55//! direction = "forward" | "reverse" | "both"
56//! edge_kind = ident — matched to EdgeKind::*
57//! depth = u32 literal
58//! ```
59//!
60//! # Alias handling
61//!
62//! - `impl:` and `implements:` both produce [`Predicate::Implements`] (spec M8).
63//! - `traverse:` keyword `forward` alternatively spelled `outgoing`; `reverse`
64//! alternatively spelled `incoming`. Both forms are accepted so the text
65//! syntax stays readable for users who think in "call direction".
66//!
67//! # Error model
68//!
69//! Parse errors surface through [`ParseError`], which carries a byte-offset
70//! span into the input so callers (CLI and MCP handlers) can render a caret
71//! pointer at the error site. The parser never panics on well-formed UTF-8
72//! input; malformed input yields `ParseError::UnexpectedEnd` or
73//! `ParseError::UnexpectedChar` variants instead.
74//!
75//! # Design references
76//!
77//! - Spec: `docs/superpowers/specs/2026-04-12-derived-analysis-db-query-planner-design.md` (§3 — Text Syntax Frontend)
78//! - DAG: `docs/superpowers/plans/2026-04-12-phase3-4-combined-implementation-dag.toml` (unit DB13)
79//!
80//! [`QueryBuilder`]: super::compile::QueryBuilder
81//! [`QueryBuilder::build`]: super::compile::QueryBuilder::build
82//! [`QueryPlan`]: super::ir::QueryPlan
83//! [`Predicate::Implements`]: super::ir::Predicate::Implements
84
85use thiserror::Error;
86
87use sqry_core::graph::unified::bind::scope::arena::ScopeKind;
88use sqry_core::graph::unified::edge::kind::{EdgeKind, ExportKind};
89use sqry_core::graph::unified::node::kind::NodeKind;
90use sqry_core::schema::Visibility;
91
92use super::compile::{BuildError, QueryBuilder, ScanFilters};
93use super::ir::{
94 Direction, PathPattern, PlanNode, Predicate, PredicateValue, QueryPlan, RegexFlags,
95 RegexPattern, StringPattern,
96};
97
98// ============================================================================
99// Public API
100// ============================================================================
101
102/// Parse a text query into a [`QueryPlan`].
103///
104/// # Errors
105///
106/// Returns [`ParseError`] describing a structural or lexical problem in the
107/// input, or a [`BuildError`] if the parsed [`QueryBuilder`] fails
108/// validation (zero depth, first step not context-free, etc.).
109pub fn parse_query(source: &str) -> Result<QueryPlan, ParseError> {
110 let mut parser = Parser::new(source);
111 let builder = parser.parse_chain()?;
112 parser.expect_eof()?;
113 builder.build().map_err(ParseError::from)
114}
115
116// ============================================================================
117// Errors
118// ============================================================================
119
120/// Error returned by the text-syntax parser.
121#[derive(Debug, Error, PartialEq, Eq, Clone)]
122pub enum ParseError {
123 /// Expected more tokens but hit end-of-input.
124 #[error("unexpected end of input at byte {offset}: expected {expected}")]
125 UnexpectedEnd {
126 /// Byte offset into the source (0-based).
127 offset: usize,
128 /// Human-readable description of what the parser was looking for.
129 expected: &'static str,
130 },
131
132 /// Encountered an unexpected character.
133 #[error("unexpected character {ch:?} at byte {offset}: expected {expected}")]
134 UnexpectedChar {
135 /// Offending character.
136 ch: char,
137 /// Byte offset of the offending character.
138 offset: usize,
139 /// What the parser expected instead.
140 expected: &'static str,
141 },
142
143 /// An identifier did not match any known enum variant.
144 #[error("unknown {kind} {value:?} at byte {offset}")]
145 UnknownIdent {
146 /// Domain that rejected the identifier (`node kind`, `edge kind`, …).
147 kind: &'static str,
148 /// Literal text that could not be resolved.
149 value: String,
150 /// Byte offset where the identifier started.
151 offset: usize,
152 },
153
154 /// A numeric literal failed to parse.
155 #[error("invalid integer {value:?} at byte {offset}")]
156 InvalidInteger {
157 /// Literal text that could not be parsed as an integer.
158 value: String,
159 /// Byte offset where the literal started.
160 offset: usize,
161 },
162
163 /// The `QueryBuilder` rejected the plan (e.g. zero-depth traversal).
164 #[error("plan construction failed: {0}")]
165 Build(#[from] BuildError),
166}
167
168// ============================================================================
169// Parser state
170// ============================================================================
171
172struct Parser<'a> {
173 src: &'a [u8],
174 pos: usize,
175}
176
177impl<'a> Parser<'a> {
178 fn new(source: &'a str) -> Self {
179 Self {
180 src: source.as_bytes(),
181 pos: 0,
182 }
183 }
184
185 // ------------------------------------------------------------------
186 // Top-level chain
187 // ------------------------------------------------------------------
188
189 fn parse_chain(&mut self) -> Result<QueryBuilder, ParseError> {
190 let mut builder = QueryBuilder::new();
191 self.skip_ws();
192
193 while !self.at_end() && !self.peek_is(b')') {
194 builder = self.parse_step(builder)?;
195 self.skip_ws();
196 }
197
198 Ok(builder)
199 }
200
201 fn parse_step(&mut self, builder: QueryBuilder) -> Result<QueryBuilder, ParseError> {
202 let start = self.pos;
203 let head = self.take_ident()?;
204 match head.as_str() {
205 "kind" => {
206 self.expect_byte(b':', "':' after 'kind'")?;
207 let ident = self.take_ident()?;
208 let offset = start;
209 let nk = NodeKind::parse(&ident).ok_or(ParseError::UnknownIdent {
210 kind: "node kind",
211 value: ident,
212 offset,
213 })?;
214 Ok(builder.scan(nk))
215 }
216 "visibility" => {
217 self.expect_byte(b':', "':' after 'visibility'")?;
218 let ident = self.take_ident()?;
219 let vis = Visibility::parse(&ident).ok_or(ParseError::UnknownIdent {
220 kind: "visibility",
221 value: ident,
222 offset: start,
223 })?;
224 Ok(apply_visibility(builder, vis))
225 }
226 "name" => {
227 // `name:<value>` — literal-exact / glob contract.
228 //
229 // **Semantics (B1_ALIGN, locked).**
230 //
231 // - **Literal value (no `*`, `?`, `[`).** `name:Foo`
232 // matches every indexable graph node whose interned
233 // `entry.name` or `entry.qualified_name` equals `Foo`
234 // byte-for-byte, case-sensitive. If `Foo` is dot- or
235 // Ruby-`#` qualified, the shared exact-name resolver also
236 // checks the graph-canonical `::` rewrite so user-facing
237 // display names like `Class.field` / `Class#field` can
238 // resolve to canonical graph names like `Class::field`.
239 // This path
240 // is contract-bound to the CLI `--exact <literal>`
241 // shorthand: both route through
242 // [`sqry_core::graph::unified::concurrent::graph::GraphSnapshot::find_by_exact_name`]
243 // and return the same set on any fixture.
244 //
245 // - **Glob value (contains `*`, `?`, or `[`).**
246 // [`Self::parse_string_pattern`] promotes the pattern
247 // to [`MatchMode::Glob`]; the executor then matches
248 // nodes whose simple or qualified name satisfies the
249 // glob (e.g. `name:parse_*` matches `parse_expr`,
250 // `parse_stmt`). The CLI `--exact` shorthand does
251 // **not** accept glob meta — it treats every
252 // character as a literal — so the
253 // exact-set-equality contract above does not extend
254 // to glob values. This is intentional: the CLI
255 // `--exact` flag is a literal-only convenience, and
256 // glob lookups belong to the structured planner.
257 //
258 // Synthetic placeholder nodes (Go-plugin
259 // `<field:operand.field>` shadows and `<ident>@<offset>`
260 // per-binding-site Variables; see `C_SUPPRESS` and
261 // [`crate::query::QueryDb`] docs for the full taxonomy)
262 // are excluded on **both** the literal and glob paths,
263 // gated by
264 // [`sqry_core::graph::unified::concurrent::graph::GraphSnapshot::is_node_synthetic`]
265 // inside `entry_name_matches` / `scan_match`.
266 //
267 // **No substring or regex form.** A future regex form
268 // would land as a separate `name~` operator with its
269 // own grammar branch and IR variant — mirroring the
270 // `references:` / `references ~= /…/` split. There is
271 // no implicit substring fallback; users wanting regex
272 // name matching today should use `sqry search <regex>`
273 // (regex over interned strings; synthetic-visible) or
274 // wait for `name~` to land.
275 //
276 // **Precedence vs `name~` (future).** When `name~`
277 // lands, it will be parsed as a distinct token and
278 // produce a distinct IR predicate; `name:` keeps the
279 // literal-exact / glob split documented above
280 // unchanged. Folding the two into a single token with
281 // a "smart" mode is explicitly out of scope.
282 self.expect_byte(b':', "':' after 'name'")?;
283 let pat = self.parse_string_pattern()?;
284 // `name:` attaches to an existing NodeScan when possible so
285 // the scan uses the pre-built by-kind index directly. When
286 // the chain is empty, it starts a fresh `NodeScan` carrying
287 // only the name pattern so `name:Foo` is a valid standalone
288 // query (otherwise the chain would fail context-free
289 // validation in `compile.rs`).
290 Ok(apply_name_pattern(builder, pat))
291 }
292 "returns" => {
293 // `returns:<TypeName>` — selects nodes whose outgoing
294 // `EdgeKind::TypeOf { context: Some(TypeOfContext::Return), .. }`
295 // edges target a node whose interned name equals `<TypeName>`
296 // by byte-exact, case-sensitive comparison.
297 //
298 // The value parser is `parse_bare_or_quoted` (not
299 // `parse_string_pattern`) so glob meta-characters like `*`,
300 // `?`, and `[` are taken as literal name bytes rather than
301 // promoted to a glob. This keeps the contract identical to
302 // the IR docstring on `Predicate::Returns`: exact match only.
303 // A future `returns~:` regex form would land as a separate
304 // grammar branch and IR variant, mirroring how `references`
305 // already pairs `references:` (literal) with
306 // `references ~= /…/` (regex).
307 self.expect_byte(b':', "':' after 'returns'")?;
308 let type_name = self.parse_bare_or_quoted()?;
309 Ok(builder.filter(Predicate::Returns(type_name)))
310 }
311 "in" => {
312 self.expect_byte(b':', "':' after 'in'")?;
313 let glob = self.parse_bare_or_quoted()?;
314 Ok(builder.filter(Predicate::InFile(PathPattern::new(glob))))
315 }
316 "scope" => {
317 self.expect_byte(b':', "':' after 'scope'")?;
318 let ident = self.take_ident()?;
319 let sk = parse_scope_kind(&ident).ok_or(ParseError::UnknownIdent {
320 kind: "scope kind",
321 value: ident,
322 offset: start,
323 })?;
324 Ok(builder.filter(Predicate::InScope(sk)))
325 }
326 "has" => {
327 self.expect_byte(b':', "':' after 'has'")?;
328 let ident = self.take_ident()?;
329 match ident.as_str() {
330 "caller" => Ok(builder.filter(Predicate::HasCaller)),
331 "callee" => Ok(builder.filter(Predicate::HasCallee)),
332 _ => Err(ParseError::UnknownIdent {
333 kind: "has-target (expected 'caller' or 'callee')",
334 value: ident,
335 offset: start,
336 }),
337 }
338 }
339 "unused" => Ok(builder.filter(Predicate::IsUnused)),
340 "traverse" => {
341 self.expect_byte(b':', "':' after 'traverse'")?;
342 let (direction, edge_kind, depth) = self.parse_traverse_args()?;
343 Ok(builder.traverse(direction, edge_kind, depth))
344 }
345 "callers" | "callees" | "imports" | "exports" | "implements" | "impl" => {
346 self.expect_byte(b':', "':' after relation predicate")?;
347 let value = self.parse_value()?;
348 let predicate = match head.as_str() {
349 "callers" => Predicate::Callers(value),
350 "callees" => Predicate::Callees(value),
351 "imports" => Predicate::Imports(value),
352 "exports" => Predicate::Exports(value),
353 "implements" | "impl" => Predicate::Implements(value),
354 _ => unreachable!("outer match covers every arm"),
355 };
356 Ok(builder.filter(predicate))
357 }
358 "references" => {
359 // `references:<value>` — literal / subquery form;
360 // `references ~= /regex/` — regex form (space optional).
361 self.skip_ws();
362 if self.eat_bytes(b"~=") {
363 self.skip_ws();
364 let regex = self.parse_regex_literal()?;
365 Ok(builder.filter(Predicate::References(PredicateValue::Regex(regex))))
366 } else {
367 self.expect_byte(b':', "':' or '~=' after 'references'")?;
368 let value = self.parse_value()?;
369 Ok(builder.filter(Predicate::References(value)))
370 }
371 }
372 _ => Err(ParseError::UnknownIdent {
373 kind: "step keyword",
374 value: head,
375 offset: start,
376 }),
377 }
378 }
379
380 // ------------------------------------------------------------------
381 // Value / subquery
382 // ------------------------------------------------------------------
383
384 fn parse_value(&mut self) -> Result<PredicateValue, ParseError> {
385 self.skip_inline_ws();
386 if self.peek_is(b'(') {
387 self.pos += 1;
388 let sub_builder = self.parse_chain()?;
389 self.expect_byte(b')', "')' to close subquery")?;
390 let sub_plan = sub_builder.build().map_err(ParseError::from)?;
391 Ok(PredicateValue::Subquery(Box::new(sub_plan.root)))
392 } else if self.peek_is(b'/') {
393 let regex = self.parse_regex_literal()?;
394 Ok(PredicateValue::Regex(regex))
395 } else {
396 let pat = self.parse_string_pattern()?;
397 Ok(PredicateValue::Pattern(pat))
398 }
399 }
400
401 /// Parses a quoted or bare string literal and infers a [`MatchMode`] from
402 /// the raw contents — `*` or `?` promote to [`MatchMode::Glob`]; otherwise
403 /// the pattern is an [`MatchMode::Exact`] match.
404 fn parse_string_pattern(&mut self) -> Result<StringPattern, ParseError> {
405 let raw = self.parse_bare_or_quoted()?;
406 let has_glob_meta = raw.contains(['*', '?', '[']);
407 let pattern = if has_glob_meta {
408 StringPattern::glob(raw)
409 } else {
410 StringPattern::exact(raw)
411 };
412 Ok(pattern)
413 }
414
415 fn parse_bare_or_quoted(&mut self) -> Result<String, ParseError> {
416 self.skip_inline_ws();
417 if self.peek_is(b'"') {
418 self.take_quoted_string()
419 } else {
420 let start = self.pos;
421 let tok = self.take_value_word()?;
422 if tok.is_empty() {
423 Err(ParseError::UnexpectedChar {
424 ch: self.peek_char().unwrap_or('\0'),
425 offset: start,
426 expected: "value (quoted string or bare word)",
427 })
428 } else {
429 Ok(tok)
430 }
431 }
432 }
433
434 fn parse_regex_literal(&mut self) -> Result<RegexPattern, ParseError> {
435 self.expect_byte(b'/', "'/' to open regex literal")?;
436 let start = self.pos;
437 while !self.at_end() && !self.peek_is(b'/') {
438 // Support backslash-escaped forward slashes within the regex body.
439 if self.peek_is(b'\\') && self.pos + 1 < self.src.len() {
440 self.pos += 2;
441 } else {
442 self.pos += 1;
443 }
444 }
445 if self.at_end() {
446 return Err(ParseError::UnexpectedEnd {
447 offset: self.pos,
448 expected: "'/' to close regex literal",
449 });
450 }
451 let body_bytes = &self.src[start..self.pos];
452 let body = std::str::from_utf8(body_bytes)
453 .map_err(|_| ParseError::UnexpectedChar {
454 ch: '\u{FFFD}',
455 offset: start,
456 expected: "valid UTF-8 in regex body",
457 })?
458 .to_owned();
459 self.pos += 1; // consume closing '/'
460
461 let mut flags = RegexFlags::default();
462 while let Some(b) = self.peek_byte() {
463 match b {
464 b'i' => {
465 flags.case_insensitive = true;
466 self.pos += 1;
467 }
468 b'm' => {
469 flags.multiline = true;
470 self.pos += 1;
471 }
472 b's' => {
473 flags.dot_all = true;
474 self.pos += 1;
475 }
476 _ => break,
477 }
478 }
479 Ok(RegexPattern::with_flags(body, flags))
480 }
481
482 fn parse_traverse_args(&mut self) -> Result<(Direction, EdgeKind, u32), ParseError> {
483 let dir_start = self.pos;
484 let dir_text = self.take_ident()?;
485 let direction = parse_direction(&dir_text).ok_or(ParseError::UnknownIdent {
486 kind: "traversal direction",
487 value: dir_text,
488 offset: dir_start,
489 })?;
490 self.expect_byte(b'(', "'(' after traversal direction")?;
491 self.skip_inline_ws();
492
493 let edge_start = self.pos;
494 let edge_text = self.take_ident()?;
495 let edge_kind = parse_edge_kind(&edge_text).ok_or(ParseError::UnknownIdent {
496 kind: "edge kind",
497 value: edge_text,
498 offset: edge_start,
499 })?;
500
501 self.skip_inline_ws();
502 self.expect_byte(b',', "',' between edge kind and depth")?;
503 self.skip_inline_ws();
504
505 let depth_start = self.pos;
506 let depth_text = self.take_digits()?;
507 let depth: u32 = depth_text.parse().map_err(|_| ParseError::InvalidInteger {
508 value: depth_text,
509 offset: depth_start,
510 })?;
511
512 self.skip_inline_ws();
513 self.expect_byte(b')', "')' to close traversal arguments")?;
514 Ok((direction, edge_kind, depth))
515 }
516
517 // ------------------------------------------------------------------
518 // Low-level lexing
519 // ------------------------------------------------------------------
520
521 #[inline]
522 fn at_end(&self) -> bool {
523 self.pos >= self.src.len()
524 }
525
526 #[inline]
527 fn peek_byte(&self) -> Option<u8> {
528 self.src.get(self.pos).copied()
529 }
530
531 #[inline]
532 fn peek_is(&self, b: u8) -> bool {
533 self.peek_byte() == Some(b)
534 }
535
536 fn peek_char(&self) -> Option<char> {
537 self.src[self.pos..]
538 .utf8_chunks()
539 .next()
540 .and_then(|chunk| chunk.valid().chars().next())
541 }
542
543 fn eat_bytes(&mut self, needle: &[u8]) -> bool {
544 if self.src[self.pos..].starts_with(needle) {
545 self.pos += needle.len();
546 true
547 } else {
548 false
549 }
550 }
551
552 fn skip_ws(&mut self) {
553 while let Some(b) = self.peek_byte() {
554 if b.is_ascii_whitespace() {
555 self.pos += 1;
556 } else {
557 break;
558 }
559 }
560 }
561
562 /// Skips *inline* whitespace (space / tab) without consuming newlines.
563 /// Used between a relation key and its value so that `callers: foo` parses
564 /// the same as `callers:foo` without letting the value span multiple steps.
565 fn skip_inline_ws(&mut self) {
566 while let Some(b) = self.peek_byte() {
567 if b == b' ' || b == b'\t' {
568 self.pos += 1;
569 } else {
570 break;
571 }
572 }
573 }
574
575 fn expect_byte(&mut self, byte: u8, expected: &'static str) -> Result<(), ParseError> {
576 self.skip_inline_ws();
577 match self.peek_byte() {
578 Some(b) if b == byte => {
579 self.pos += 1;
580 Ok(())
581 }
582 Some(_) => Err(ParseError::UnexpectedChar {
583 ch: self.peek_char().unwrap_or('\0'),
584 offset: self.pos,
585 expected,
586 }),
587 None => Err(ParseError::UnexpectedEnd {
588 offset: self.pos,
589 expected,
590 }),
591 }
592 }
593
594 fn expect_eof(&mut self) -> Result<(), ParseError> {
595 self.skip_ws();
596 if self.at_end() {
597 Ok(())
598 } else {
599 Err(ParseError::UnexpectedChar {
600 ch: self.peek_char().unwrap_or('\0'),
601 offset: self.pos,
602 expected: "end of query",
603 })
604 }
605 }
606
607 /// Takes a lowercase identifier `[a-z_]+[a-z0-9_]*`. Returns an empty
608 /// string if the next character is not an ident-start.
609 fn take_ident(&mut self) -> Result<String, ParseError> {
610 let start = self.pos;
611 while let Some(b) = self.peek_byte() {
612 let is_start = (start == self.pos) && (b.is_ascii_alphabetic() || b == b'_');
613 let is_continue = start != self.pos && (b.is_ascii_alphanumeric() || b == b'_');
614 if is_start || is_continue {
615 self.pos += 1;
616 } else {
617 break;
618 }
619 }
620 if self.pos == start {
621 return Err(ParseError::UnexpectedChar {
622 ch: self.peek_char().unwrap_or('\0'),
623 offset: self.pos,
624 expected: "identifier",
625 });
626 }
627 let slice = &self.src[start..self.pos];
628 let s = std::str::from_utf8(slice)
629 .expect("identifier is ASCII")
630 .to_ascii_lowercase();
631 Ok(s)
632 }
633
634 fn take_digits(&mut self) -> Result<String, ParseError> {
635 let start = self.pos;
636 while let Some(b) = self.peek_byte() {
637 if b.is_ascii_digit() {
638 self.pos += 1;
639 } else {
640 break;
641 }
642 }
643 if self.pos == start {
644 return Err(ParseError::UnexpectedChar {
645 ch: self.peek_char().unwrap_or('\0'),
646 offset: self.pos,
647 expected: "integer",
648 });
649 }
650 Ok(std::str::from_utf8(&self.src[start..self.pos])
651 .expect("digits are ASCII")
652 .to_owned())
653 }
654
655 /// Reads the body of a bare "value word" — everything up to the next
656 /// whitespace or structural byte (`)`). Supports wildcards (`*`, `?`, `[`,
657 /// `]`) and path separators so that bare globs like `src/api/**/*.rs` or
658 /// qualified names like `foo::bar::baz` parse as a single word.
659 fn take_value_word(&mut self) -> Result<String, ParseError> {
660 let start = self.pos;
661 while let Some(b) = self.peek_byte() {
662 if b.is_ascii_whitespace() || matches!(b, b')' | b'(') {
663 break;
664 }
665 self.pos += 1;
666 }
667 let slice = &self.src[start..self.pos];
668 std::str::from_utf8(slice)
669 .map(str::to_owned)
670 .map_err(|_| ParseError::UnexpectedChar {
671 ch: '\u{FFFD}',
672 offset: start,
673 expected: "valid UTF-8 in value",
674 })
675 }
676
677 fn take_quoted_string(&mut self) -> Result<String, ParseError> {
678 self.expect_byte(b'"', "'\"' to open quoted string")?;
679 let mut out = String::new();
680 loop {
681 match self.peek_byte() {
682 None => {
683 return Err(ParseError::UnexpectedEnd {
684 offset: self.pos,
685 expected: "'\"' to close quoted string",
686 });
687 }
688 Some(b'"') => {
689 self.pos += 1;
690 return Ok(out);
691 }
692 Some(b'\\') => {
693 if let Some(&next) = self.src.get(self.pos + 1) {
694 self.pos += 2;
695 match next {
696 b'\\' => out.push('\\'),
697 b'"' => out.push('"'),
698 b'n' => out.push('\n'),
699 b't' => out.push('\t'),
700 other => out.push(other as char),
701 }
702 } else {
703 return Err(ParseError::UnexpectedEnd {
704 offset: self.pos + 1,
705 expected: "escape character after '\\'",
706 });
707 }
708 }
709 Some(_) => {
710 // Decode a single UTF-8 character and copy it over.
711 let tail = &self.src[self.pos..];
712 let chunk = tail
713 .utf8_chunks()
714 .next()
715 .expect("non-empty tail yields a chunk");
716 if let Some(ch) = chunk.valid().chars().next() {
717 out.push(ch);
718 self.pos += ch.len_utf8();
719 } else {
720 return Err(ParseError::UnexpectedChar {
721 ch: '\u{FFFD}',
722 offset: self.pos,
723 expected: "valid UTF-8 inside quoted string",
724 });
725 }
726 }
727 }
728 }
729 }
730}
731
732// ============================================================================
733// Helper translators
734// ============================================================================
735
736/// Merge a visibility filter into the current builder.
737///
738/// The builder exposes [`QueryBuilder::scan_with`] which takes a
739/// [`ScanFilters`]; to apply a `visibility` filter to an already-added scan we
740/// reconstruct the scan with both fields. Since `QueryBuilder` does not expose
741/// its internals, the text syntax treats `visibility:` as its own `.filter()`
742/// step over a [`Predicate::And`] constructed ad-hoc — but a cleaner route is
743/// to push a second `scan_with` when the builder is empty. For any non-empty
744/// builder we fall back to a [`Predicate::And`]-adjacent filter via
745/// [`Predicate::MatchesName`]-free routing: we simply chain a new `scan_with`
746/// prefix. In practice, `visibility:` follows `kind:` in every example, so the
747/// builder carries exactly one `NodeScan` at this point and pushing a second
748/// scan would violate the context-free contract. Instead we emit a lightweight
749/// filter: **kind with visibility** is folded into the existing scan when the
750/// builder has exactly one step, otherwise a [`Predicate::MatchesName`] fallback
751/// is impossible (visibility is not a name), so we store the visibility as a
752/// hidden filter through [`Predicate::And`] of existence + name placeholder is
753/// also wrong. The simplest robust behaviour is to require `visibility:` to
754/// immediately follow a `kind:` (or stand alone) and re-run `scan_with` there.
755fn apply_visibility(builder: QueryBuilder, visibility: Visibility) -> QueryBuilder {
756 let steps = builder_steps(&builder);
757 if let Some(existing) = steps.last()
758 && let PlanNode::NodeScan {
759 kind,
760 visibility: existing_vis,
761 name_pattern,
762 } = existing
763 {
764 let kind = *kind;
765 let vis = existing_vis.unwrap_or(visibility);
766 let name_pattern = name_pattern.clone();
767 // Replace the trailing NodeScan with a merged one.
768 let mut trimmed = strip_last_step(builder);
769 trimmed = trimmed.scan_with(
770 ScanFilters::new()
771 .merge_kind(kind)
772 .with_visibility(vis)
773 .merge_name(name_pattern),
774 );
775 return trimmed;
776 }
777
778 // No prior scan — start one with visibility only.
779 builder.scan_with(ScanFilters::new().with_visibility(visibility))
780}
781
782/// Merge a name pattern into the current builder.
783///
784/// Preference order:
785///
786/// 1. **Empty builder** (`name:Foo` standalone): start a fresh
787/// [`PlanNode::NodeScan`] carrying only the name pattern. This makes
788/// `name:Foo` a valid context-free first step (otherwise `compile.rs`
789/// would reject the chain as starting with a `Filter`).
790/// 2. **Trailing `NodeScan` with no existing name pattern**
791/// (`kind:function name:Foo`): fold into the trailing scan so the
792/// executor walks the pre-built by-kind index directly and applies
793/// the name predicate inside `run_scan`.
794/// 3. **Anything else**: fall back to a separate
795/// [`Predicate::MatchesName`] filter step. The executor's
796/// `entry_name_matches` honours the same byte-exact, synthetic-aware
797/// contract documented around the `name:` step in
798/// [`Parser::parse_step`].
799fn apply_name_pattern(builder: QueryBuilder, pattern: StringPattern) -> QueryBuilder {
800 let steps = builder_steps(&builder);
801 if steps.is_empty() {
802 return builder.scan_with(ScanFilters {
803 kind: None,
804 visibility: None,
805 name_pattern: Some(pattern),
806 });
807 }
808 if let Some(existing) = steps.last()
809 && let PlanNode::NodeScan {
810 kind,
811 visibility,
812 name_pattern: existing_name,
813 } = existing
814 && existing_name.is_none()
815 {
816 let kind = *kind;
817 let vis = *visibility;
818 let mut trimmed = strip_last_step(builder);
819 trimmed = trimmed.scan_with(ScanFilters {
820 kind,
821 visibility: vis,
822 name_pattern: Some(pattern),
823 });
824 return trimmed;
825 }
826 builder.filter(Predicate::MatchesName(pattern))
827}
828
829/// Reads the `QueryBuilder::steps` vector by routing through the public
830/// `build` shape — the builder does not expose its internals. Because
831/// `build` consumes the builder, we reconstruct a clone by serializing
832/// through [`QueryBuilder::step_count`] and a `pop` loop. To avoid that
833/// cost, the real implementation just clones the builder and drives
834/// `build` on the clone.
835fn builder_steps(builder: &QueryBuilder) -> Vec<PlanNode> {
836 if builder.is_empty() {
837 return Vec::new();
838 }
839 let cloned = builder.clone();
840 match cloned.build() {
841 Ok(plan) => match plan.root {
842 PlanNode::Chain { steps } => steps,
843 other => vec![other],
844 },
845 Err(_) => Vec::new(),
846 }
847}
848
849/// Rebuilds the builder with every step except the last. Used by
850/// [`apply_visibility`] and [`apply_name_pattern`] to replace a trailing
851/// scan with a merged version without adding a new `QueryBuilder` API.
852fn strip_last_step(builder: QueryBuilder) -> QueryBuilder {
853 let steps = builder_steps(&builder);
854 let mut out = QueryBuilder::new();
855 if steps.len() <= 1 {
856 return out;
857 }
858 out = rehydrate_from_steps(&steps[..steps.len() - 1]);
859 out
860}
861
862/// Rebuilds a [`QueryBuilder`] from a list of [`PlanNode`] steps.
863///
864/// Only the step kinds the text parser emits are handled — additional
865/// variants would require new builder methods which DB13 does not introduce.
866fn rehydrate_from_steps(steps: &[PlanNode]) -> QueryBuilder {
867 let mut b = QueryBuilder::new();
868 for step in steps {
869 match step {
870 PlanNode::NodeScan {
871 kind,
872 visibility,
873 name_pattern,
874 } => {
875 b = b.scan_with(ScanFilters {
876 kind: *kind,
877 visibility: *visibility,
878 name_pattern: name_pattern.clone(),
879 });
880 }
881 PlanNode::EdgeTraversal {
882 direction,
883 edge_kind,
884 max_depth,
885 } => match edge_kind {
886 Some(k) => {
887 b = b.traverse(*direction, k.clone(), *max_depth);
888 }
889 None => {
890 b = b.traverse_any(*direction, *max_depth);
891 }
892 },
893 PlanNode::Filter { predicate } => {
894 b = b.filter(predicate.clone());
895 }
896 PlanNode::SetOp { .. } | PlanNode::Chain { .. } => {
897 // Unreachable from the text parser; preserve the step as an
898 // opaque filter so we do not silently drop it.
899 b = b.filter(Predicate::HasCaller);
900 }
901 }
902 }
903 b
904}
905
906// Local helper methods added as a trait extension so the upstream
907// `ScanFilters` type does not need new constructors for DB13.
908trait ScanFiltersExt {
909 fn merge_kind(self, kind: Option<NodeKind>) -> Self;
910 fn merge_name(self, pattern: Option<StringPattern>) -> Self;
911}
912
913impl ScanFiltersExt for ScanFilters {
914 fn merge_kind(mut self, kind: Option<NodeKind>) -> Self {
915 if let Some(k) = kind {
916 self.kind = Some(k);
917 }
918 self
919 }
920
921 fn merge_name(mut self, pattern: Option<StringPattern>) -> Self {
922 if let Some(p) = pattern {
923 self.name_pattern = Some(p);
924 }
925 self
926 }
927}
928
929// ============================================================================
930// Direction / scope-kind / edge-kind text parsers
931// ============================================================================
932
933fn parse_direction(text: &str) -> Option<Direction> {
934 match text {
935 "forward" | "outgoing" | "out" => Some(Direction::Forward),
936 "reverse" | "incoming" | "in" => Some(Direction::Reverse),
937 "both" => Some(Direction::Both),
938 _ => None,
939 }
940}
941
942fn parse_scope_kind(text: &str) -> Option<ScopeKind> {
943 match text {
944 "module" => Some(ScopeKind::Module),
945 "function" => Some(ScopeKind::Function),
946 "class" => Some(ScopeKind::Class),
947 "namespace" => Some(ScopeKind::Namespace),
948 "trait" => Some(ScopeKind::Trait),
949 "impl" => Some(ScopeKind::Impl),
950 _ => None,
951 }
952}
953
954/// Maps a text identifier (e.g. `"calls"`) to a canonical [`EdgeKind`] with
955/// zeroed metadata so the executor's discriminant match behaves as expected.
956/// Only the edge kinds reachable from the text syntax are covered; fall back
957/// to `None` for unsupported kinds so callers see `ParseError::UnknownIdent`
958/// rather than silently accepting malformed input.
959fn parse_edge_kind(text: &str) -> Option<EdgeKind> {
960 match text {
961 "calls" => Some(EdgeKind::Calls {
962 argument_count: 0,
963 is_async: false,
964 }),
965 "references" => Some(EdgeKind::References),
966 "imports" => Some(EdgeKind::Imports {
967 alias: None,
968 is_wildcard: false,
969 }),
970 "exports" => Some(EdgeKind::Exports {
971 kind: ExportKind::Direct,
972 alias: None,
973 }),
974 "implements" => Some(EdgeKind::Implements),
975 "inherits" => Some(EdgeKind::Inherits),
976 "defines" => Some(EdgeKind::Defines),
977 "contains" => Some(EdgeKind::Contains),
978 _ => None,
979 }
980}
981
982// ============================================================================
983// Inline smoke tests — full coverage lives in
984// `sqry-db/tests/parser_test.rs`.
985// ============================================================================
986
987#[cfg(test)]
988mod tests {
989 use super::*;
990
991 #[test]
992 fn parse_kind_scan_produces_single_nodescan_step() {
993 let plan = parse_query("kind:function").expect("parse");
994 let PlanNode::Chain { steps } = plan.root else {
995 panic!("expected Chain root");
996 };
997 assert_eq!(steps.len(), 1);
998 assert!(matches!(
999 steps[0],
1000 PlanNode::NodeScan {
1001 kind: Some(NodeKind::Function),
1002 ..
1003 }
1004 ));
1005 }
1006
1007 #[test]
1008 fn parse_has_caller_is_a_filter_step() {
1009 let plan = parse_query("kind:function has:caller").expect("parse");
1010 let PlanNode::Chain { steps } = plan.root else {
1011 panic!("chain");
1012 };
1013 assert_eq!(steps.len(), 2);
1014 assert!(matches!(
1015 steps[1],
1016 PlanNode::Filter {
1017 predicate: Predicate::HasCaller,
1018 }
1019 ));
1020 }
1021
1022 #[test]
1023 fn parse_traverse_accepts_all_three_directions() {
1024 for (text, expected) in [
1025 ("forward", Direction::Forward),
1026 ("reverse", Direction::Reverse),
1027 ("both", Direction::Both),
1028 ] {
1029 let src = format!("kind:function traverse:{text}(calls,1)");
1030 let plan = parse_query(&src).expect("parse");
1031 let PlanNode::Chain { steps } = plan.root else {
1032 panic!("chain");
1033 };
1034 match &steps[1] {
1035 PlanNode::EdgeTraversal {
1036 direction,
1037 max_depth,
1038 ..
1039 } => {
1040 assert_eq!(*direction, expected);
1041 assert_eq!(*max_depth, 1);
1042 }
1043 other => panic!("expected EdgeTraversal, got {other:?}"),
1044 }
1045 }
1046 }
1047
1048 #[test]
1049 fn parse_unknown_ident_produces_unknown_error() {
1050 let err = parse_query("kind:definitely_not_a_kind").unwrap_err();
1051 match err {
1052 ParseError::UnknownIdent { kind, .. } => assert_eq!(kind, "node kind"),
1053 other => panic!("expected UnknownIdent, got {other:?}"),
1054 }
1055 }
1056
1057 #[test]
1058 fn parse_regex_literal_with_flags() {
1059 let plan = parse_query("kind:function references ~= /handle_.*/im").expect("parse");
1060 let PlanNode::Chain { steps } = plan.root else {
1061 panic!("chain");
1062 };
1063 match &steps[1] {
1064 PlanNode::Filter {
1065 predicate: Predicate::References(PredicateValue::Regex(rp)),
1066 } => {
1067 assert_eq!(rp.pattern, "handle_.*");
1068 assert!(rp.flags.case_insensitive);
1069 assert!(rp.flags.multiline);
1070 assert!(!rp.flags.dot_all);
1071 }
1072 other => panic!("expected References(Regex), got {other:?}"),
1073 }
1074 }
1075
1076 #[test]
1077 fn parse_subquery_value_produces_plan_node() {
1078 let plan = parse_query("kind:function callers:(kind:method)").expect("parse");
1079 let PlanNode::Chain { steps } = plan.root else {
1080 panic!("chain");
1081 };
1082 match &steps[1] {
1083 PlanNode::Filter {
1084 predicate: Predicate::Callers(PredicateValue::Subquery(inner)),
1085 } => match inner.as_ref() {
1086 PlanNode::Chain { steps: sub_steps } => {
1087 assert!(matches!(
1088 sub_steps[0],
1089 PlanNode::NodeScan {
1090 kind: Some(NodeKind::Method),
1091 ..
1092 }
1093 ));
1094 }
1095 other => panic!("expected Chain subquery, got {other:?}"),
1096 },
1097 other => panic!("expected Callers(Subquery), got {other:?}"),
1098 }
1099 }
1100
1101 #[test]
1102 fn parse_glob_name_pattern_folds_into_scan() {
1103 let plan = parse_query("kind:function name:parse_*").expect("parse");
1104 let PlanNode::Chain { steps } = plan.root else {
1105 panic!("chain");
1106 };
1107 // Glob name should fold into the leading NodeScan.
1108 assert_eq!(steps.len(), 1);
1109 match &steps[0] {
1110 PlanNode::NodeScan {
1111 kind: Some(NodeKind::Function),
1112 name_pattern: Some(pat),
1113 ..
1114 } => {
1115 assert_eq!(pat.raw, "parse_*");
1116 }
1117 other => panic!("expected folded NodeScan, got {other:?}"),
1118 }
1119 }
1120
1121 #[test]
1122 fn parse_implements_and_impl_aliases_both_work() {
1123 for src in ["kind:class implements:Visitor", "kind:class impl:Visitor"] {
1124 let plan = parse_query(src).expect("parse");
1125 let PlanNode::Chain { steps } = plan.root else {
1126 panic!("chain");
1127 };
1128 assert!(matches!(
1129 steps[1],
1130 PlanNode::Filter {
1131 predicate: Predicate::Implements(_),
1132 }
1133 ));
1134 }
1135 }
1136
1137 #[test]
1138 fn parse_unused_alone_is_a_filter() {
1139 let plan = parse_query("kind:function unused").expect("parse");
1140 let PlanNode::Chain { steps } = plan.root else {
1141 panic!("chain");
1142 };
1143 assert_eq!(steps.len(), 2);
1144 assert!(matches!(
1145 steps[1],
1146 PlanNode::Filter {
1147 predicate: Predicate::IsUnused,
1148 }
1149 ));
1150 }
1151
1152 #[test]
1153 fn parse_empty_query_errors_on_build() {
1154 let err = parse_query("").unwrap_err();
1155 assert!(matches!(err, ParseError::Build(_)));
1156 }
1157
1158 #[test]
1159 fn parse_returns_predicate_basic() {
1160 let plan = parse_query("kind:function returns:error").expect("parse");
1161 let PlanNode::Chain { steps } = plan.root else {
1162 panic!("chain");
1163 };
1164 assert_eq!(steps.len(), 2);
1165 match &steps[1] {
1166 PlanNode::Filter {
1167 predicate: Predicate::Returns(name),
1168 } => {
1169 assert_eq!(name, "error");
1170 }
1171 other => panic!("expected Filter(Returns), got {other:?}"),
1172 }
1173 }
1174
1175 #[test]
1176 fn parse_returns_does_not_collide_with_name_predicate() {
1177 // `name:Foo returns:Bar` must produce two distinct predicate variants
1178 // — `name:` folds into the leading NodeScan (single step), and
1179 // `returns:` lands as a Filter step on top.
1180 let plan = parse_query("kind:function name:Foo returns:Bar").expect("parse");
1181 let PlanNode::Chain { steps } = plan.root else {
1182 panic!("chain");
1183 };
1184 assert_eq!(steps.len(), 2);
1185 match &steps[0] {
1186 PlanNode::NodeScan {
1187 kind: Some(NodeKind::Function),
1188 name_pattern: Some(pat),
1189 ..
1190 } => {
1191 assert_eq!(pat.raw, "Foo");
1192 }
1193 other => panic!("expected leading NodeScan with name_pattern, got {other:?}"),
1194 }
1195 match &steps[1] {
1196 PlanNode::Filter {
1197 predicate: Predicate::Returns(name),
1198 } => {
1199 assert_eq!(name, "Bar");
1200 }
1201 other => panic!("expected Filter(Returns), got {other:?}"),
1202 }
1203 }
1204
1205 #[test]
1206 fn parse_returns_takes_value_byte_exact_no_glob_promotion() {
1207 // `returns:` keeps glob meta as literal name bytes (the spec says
1208 // exact match only; future `returns~:` would handle regex).
1209 let plan = parse_query("kind:function returns:Result*").expect("parse");
1210 let PlanNode::Chain { steps } = plan.root else {
1211 panic!("chain");
1212 };
1213 match &steps[1] {
1214 PlanNode::Filter {
1215 predicate: Predicate::Returns(name),
1216 } => {
1217 assert_eq!(name, "Result*");
1218 }
1219 other => panic!("expected Filter(Returns), got {other:?}"),
1220 }
1221 }
1222
1223 #[test]
1224 fn parse_returns_quoted_string_value() {
1225 let plan = parse_query(r#"kind:function returns:"std::io::Error""#).expect("parse");
1226 let PlanNode::Chain { steps } = plan.root else {
1227 panic!("chain");
1228 };
1229 match &steps[1] {
1230 PlanNode::Filter {
1231 predicate: Predicate::Returns(name),
1232 } => {
1233 assert_eq!(name, "std::io::Error");
1234 }
1235 other => panic!("expected Filter(Returns), got {other:?}"),
1236 }
1237 }
1238
1239 #[test]
1240 fn parse_returns_missing_value_is_an_error() {
1241 let err = parse_query("kind:function returns:").unwrap_err();
1242 // Missing value yields `parse_bare_or_quoted` failure or end-of-input
1243 // depending on whitespace; both surface as parse errors rather than
1244 // silently producing an empty `Returns("")` predicate.
1245 assert!(matches!(
1246 err,
1247 ParseError::UnexpectedChar { .. } | ParseError::UnexpectedEnd { .. }
1248 ));
1249 }
1250
1251 #[test]
1252 fn parse_integer_rejects_non_digit() {
1253 let err = parse_query("kind:function traverse:forward(calls,abc)").unwrap_err();
1254 match err {
1255 ParseError::UnexpectedChar { expected, .. } => {
1256 assert_eq!(expected, "integer");
1257 }
1258 other => panic!("expected UnexpectedChar, got {other:?}"),
1259 }
1260 }
1261
1262 /// REQ:R0014 — `take_value_word` lock-in: dot-qualified `name:` value.
1263 ///
1264 /// `name:Foo.bar` must fold into the leading `NodeScan` as a single
1265 /// literal `name_pattern` carrying the full dotted string `Foo.bar`. This
1266 /// freezes today's `take_value_word` behaviour so that field-emission
1267 /// units (U06 Ruby, U11 Rust, U07 C++) can rely on dot-qualified lookups
1268 /// resolving via the planner's `Predicate::MatchesName` filter.
1269 #[test]
1270 fn parses_dot_qualified_name() {
1271 let plan = parse_query("name:Foo.bar").expect("parse");
1272 let PlanNode::Chain { steps } = plan.root else {
1273 panic!("expected Chain root");
1274 };
1275 assert_eq!(steps.len(), 1);
1276 match &steps[0] {
1277 PlanNode::NodeScan {
1278 name_pattern: Some(pat),
1279 ..
1280 } => {
1281 assert_eq!(pat.raw, "Foo.bar");
1282 }
1283 other => panic!("expected NodeScan with name_pattern, got {other:?}"),
1284 }
1285 }
1286
1287 /// REQ:R0014 — `take_value_word` lock-in: Rust `::`-qualified `name:` value.
1288 ///
1289 /// `name:my_crate::Counter::count` must fold into the leading `NodeScan`
1290 /// as a single literal `name_pattern` carrying the full `::`-separated
1291 /// string. U11 (Rust field emission) emits `crate::Struct::field` style
1292 /// qualified names; this test guards that the planner's value-word reader
1293 /// keeps `::` as part of a single token rather than splitting on `:`.
1294 #[test]
1295 fn parses_rust_qualified_name_with_double_colon() {
1296 let plan = parse_query("name:my_crate::Counter::count").expect("parse");
1297 let PlanNode::Chain { steps } = plan.root else {
1298 panic!("expected Chain root");
1299 };
1300 assert_eq!(steps.len(), 1);
1301 match &steps[0] {
1302 PlanNode::NodeScan {
1303 name_pattern: Some(pat),
1304 ..
1305 } => {
1306 assert_eq!(pat.raw, "my_crate::Counter::count");
1307 }
1308 other => panic!("expected NodeScan with name_pattern, got {other:?}"),
1309 }
1310 }
1311
1312 /// REQ:R0014 — `take_value_word` lock-in: Ruby `#`-separated `name:` value.
1313 ///
1314 /// `name:Counter#increment` must fold into the leading `NodeScan` as a
1315 /// single literal `name_pattern` carrying the full `Class#method` string.
1316 /// U06 (Ruby field emission) uses `#` as the canonical instance-method
1317 /// separator; this test guards that the planner's value-word reader does
1318 /// not treat `#` as a comment or whitespace marker.
1319 #[test]
1320 fn parses_ruby_instance_method_separator() {
1321 let plan = parse_query("name:Counter#increment").expect("parse");
1322 let PlanNode::Chain { steps } = plan.root else {
1323 panic!("expected Chain root");
1324 };
1325 assert_eq!(steps.len(), 1);
1326 match &steps[0] {
1327 PlanNode::NodeScan {
1328 name_pattern: Some(pat),
1329 ..
1330 } => {
1331 assert_eq!(pat.raw, "Counter#increment");
1332 }
1333 other => panic!("expected NodeScan with name_pattern, got {other:?}"),
1334 }
1335 }
1336}