Skip to main content

sqry_db/planner/
compile.rs

1//! Fluent [`QueryBuilder`] for assembling [`QueryPlan`] trees.
2//!
3//! The builder is the user-facing entry point to the planner pipeline. It
4//! accumulates a sequence of operator steps and materialises them into a
5//! [`PlanNode::Chain`] when [`QueryBuilder::build`] is called. Validation is
6//! performed at `build` time; structural mistakes (an empty builder, a
7//! traversal with `max_depth = 0`, a chain whose first step depends on an
8//! input set) surface as [`BuildError`] variants rather than panics, so
9//! callers can recover from malformed user input gracefully.
10//!
11//! # Pipeline position
12//!
13//! ```text
14//!   text syntax / builder API
15//!         │
16//!         ▼
17//!     [ir]
18//!         │
19//!         ▼
20//!   [compile]  ← THIS MODULE — DB10
21//!         │
22//!         ▼
23//!     [fuse]   — DB11
24//!         │
25//!         ▼
26//!   [execute]  — DB12
27//! ```
28//!
29//! # Example
30//!
31//! ```rust
32//! use sqry_core::graph::unified::edge::kind::EdgeKind;
33//! use sqry_core::graph::unified::node::kind::NodeKind;
34//! use sqry_db::planner::{Direction, Predicate, QueryBuilder};
35//!
36//! let plan = QueryBuilder::new()
37//!     .scan(NodeKind::Function)
38//!     .filter(Predicate::HasCaller)
39//!     .traverse(
40//!         Direction::Reverse,
41//!         EdgeKind::Calls {
42//!             argument_count: 0,
43//!             is_async: false,
44//!         },
45//!         3,
46//!     )
47//!     .filter(Predicate::InFile("src/api/**".into()))
48//!     .build()
49//!     .expect("plan validates");
50//!
51//! assert_eq!(plan.operator_count(), 5); // chain + 4 steps
52//! ```
53//!
54//! # Design references
55//!
56//! - Spec: `docs/superpowers/specs/2026-04-12-derived-analysis-db-query-planner-design.md` (§3.2)
57//! - DAG: `docs/superpowers/plans/2026-04-12-phase3-4-combined-implementation-dag.toml` (unit DB10)
58
59use sqry_core::graph::unified::edge::kind::EdgeKind;
60use sqry_core::graph::unified::node::kind::NodeKind;
61use sqry_core::graph::unified::string::StringId;
62use sqry_core::schema::Visibility;
63use thiserror::Error;
64
65use super::ir::{
66    Direction, PathPattern, PlanNode, Predicate, PredicateValue, QueryPlan, StringPattern,
67};
68
69// ============================================================================
70// Errors
71// ============================================================================
72
73/// Errors produced when a [`QueryBuilder`] fails to materialise a valid plan.
74///
75/// Every error reflects a structural invariant the executor relies on. None of
76/// these errors abort the calling thread — they are returned via
77/// [`QueryBuilder::build`] so the caller can adjust the user query and retry.
78#[derive(Debug, Error, PartialEq, Eq, Clone)]
79pub enum BuildError {
80    /// The builder has no steps. Calling [`QueryBuilder::build`] on a freshly
81    /// constructed [`QueryBuilder::new`] without any other method calls
82    /// produces this error.
83    #[error("query builder is empty: at least one step is required before build()")]
84    EmptyBuilder,
85
86    /// The first step of the chain is not context-free.
87    ///
88    /// The chain executor walks steps from left to right, threading each step's
89    /// output into the next step's input. The first step therefore cannot
90    /// depend on an inherited input set — only [`PlanNode::NodeScan`] and
91    /// [`PlanNode::SetOp`] satisfy that contract (see
92    /// [`PlanNode::is_context_free`]).
93    #[error(
94        "first step is not context-free: chains must start with NodeScan or SetOp, not {first_kind:?}"
95    )]
96    FirstStepNotContextFree {
97        /// Name of the offending step variant for diagnostic purposes
98        /// (e.g. `"Filter"`, `"EdgeTraversal"`, `"Chain"`).
99        first_kind: PlanNodeKind,
100    },
101
102    /// An [`PlanNode::EdgeTraversal`] step was added with `max_depth = 0`.
103    ///
104    /// `max_depth = 0` would yield an empty result set unconditionally — the
105    /// executor short-circuits, so the plan is unambiguously a user error.
106    /// The builder rejects it at construction time.
107    #[error("traversal step has max_depth = 0: must be >= 1 to produce any output")]
108    ZeroDepth,
109
110    /// A nested sub-plan supplied to [`QueryBuilder::union`],
111    /// [`QueryBuilder::intersect`], or [`QueryBuilder::difference`] failed
112    /// the same context-free validation as a chain's first step.
113    #[error(
114        "set-op operand is not a valid sub-plan: {reason} (the operand must itself build cleanly)"
115    )]
116    InvalidSetOpOperand {
117        /// Human-readable explanation lifted from the inner [`BuildError`].
118        reason: String,
119    },
120}
121
122/// Compact identifier for a [`PlanNode`] variant, used in [`BuildError`]
123/// payloads so error messages stay readable without exposing the full enum.
124#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
125pub enum PlanNodeKind {
126    /// Corresponds to [`PlanNode::NodeScan`].
127    NodeScan,
128    /// Corresponds to [`PlanNode::EdgeTraversal`].
129    EdgeTraversal,
130    /// Corresponds to [`PlanNode::Filter`].
131    Filter,
132    /// Corresponds to [`PlanNode::SetOp`].
133    SetOp,
134    /// Corresponds to [`PlanNode::Chain`].
135    Chain,
136}
137
138impl PlanNodeKind {
139    /// Returns the variant kind for a given [`PlanNode`].
140    #[must_use]
141    pub const fn of(node: &PlanNode) -> Self {
142        match node {
143            PlanNode::NodeScan { .. } => Self::NodeScan,
144            PlanNode::EdgeTraversal { .. } => Self::EdgeTraversal,
145            PlanNode::Filter { .. } => Self::Filter,
146            PlanNode::SetOp { .. } => Self::SetOp,
147            PlanNode::Chain { .. } => Self::Chain,
148        }
149    }
150}
151
152// ============================================================================
153// Edge-kind normalization
154// ============================================================================
155
156/// Strip metadata from an [`EdgeKind`] so that two traversals over the same
157/// logical relation produce identical plan hashes.
158///
159/// # Why this matters for fusion
160///
161/// The DB11 fuser merges [`PlanNode`] subtrees that compare equal under
162/// [`PartialEq`]. The executor matches edges by `std::mem::discriminant` —
163/// i.e. it ignores metadata fields like
164/// `Calls { argument_count, is_async }`. If the builder admitted the user's
165/// `argument_count` value into the IR, two semantically identical traversals
166/// constructed with different concrete values (a common occurrence when the
167/// user is asking "do callers exist?" without caring about argument shape)
168/// would end up with distinct plan hashes, defeating fusion.
169///
170/// Normalisation distinguishes **semantic discriminators** (kept) from
171/// **site metadata** (zeroed):
172///
173/// ## Site metadata — always zeroed
174///
175/// These fields describe *where* or *how* the edge appeared in source code,
176/// not *what kind* of relationship it is. Two traversals that differ only in
177/// these fields ask the same semantic question.
178///
179/// - numeric per-site counters (`Calls::argument_count`, `TypeOf::index`)
180/// - per-site flags with no query-level meaning (`Calls::is_async`,
181///   `TraitMethodBinding::is_ambiguous`, `MacroExpansion::is_verified`)
182/// - every [`StringId`] field — interner handles are snapshot-dependent and
183///   therefore cannot survive a plan that may be re-evaluated against a
184///   different snapshot. Semantic filtering on the names these
185///   [`StringId`]s reference belongs in a [`Predicate`] (e.g.
186///   [`Predicate::Implements`] with a
187///   [`PredicateValue::Pattern`]), not
188///   in the edge kind itself.
189///
190/// ## Semantic discriminators — always preserved
191///
192/// These fields carve the edge space into distinct semantic categories.
193/// Collapsing them would merge edges that the caller asked about separately.
194///
195/// | Variant | Preserved field | Why |
196/// |---------|-----------------|-----|
197/// | [`EdgeKind::TypeOf`] | `context: Option<TypeOfContext>` | Parameter vs Return vs Field vs Variable vs TypeParameter vs Constraint are distinct relationships. |
198/// | [`EdgeKind::Exports`] | `kind: ExportKind` | Direct / Reexport / Default / Namespace are distinct export forms. |
199/// | [`EdgeKind::Imports`] | `is_wildcard: bool` | A `use foo::*` wildcard import is a different relationship from a named import. |
200/// | [`EdgeKind::LifetimeConstraint`] | `constraint_kind` | Outlives / TypeBound / Reference / Static / HigherRanked / TraitObject are distinct Rust lifetime relations. |
201/// | [`EdgeKind::MacroExpansion`] | `expansion_kind` | `CfgGate`, `Derive`, `Attribute`, `Declarative`, and `Function` expansions have distinct query semantics. |
202/// | [`EdgeKind::FfiCall`] | `convention` | C vs Cdecl vs Stdcall vs Fastcall vs System are distinct FFI edges. |
203/// | [`EdgeKind::HttpRequest`] | `method` | GET / POST / PUT / DELETE / PATCH / HEAD are distinct HTTP edges. |
204/// | [`EdgeKind::DbQuery`] | `query_type` | Select / Insert / Update / Delete / Execute are distinct DB edges. |
205/// | [`EdgeKind::TableWrite`] | `operation` | Insert / Update / Delete are distinct write edges. |
206/// | [`EdgeKind::MessageQueue`] | `protocol` | Kafka / SQS / RabbitMQ / NATS / Redis / Other are distinct MQ edges. |
207///
208/// ## Pass-through
209///
210/// Variants with **no metadata** or **only semantic fields** are returned
211/// unchanged. This includes the structural edges (`Defines`, `Contains`,
212/// `References`, `Inherits`, `Implements`, `WebAssemblyCall`), the JVM
213/// classpath edges (`GenericBound`, `AnnotatedWith`, `AnnotationParam`,
214/// `LambdaCaptures`, `ModuleExports`, `ModuleRequires`, `ModuleOpens`,
215/// `ModuleProvides`, `TypeArgument`, `ExtensionReceiver`, `CompanionOf`,
216/// `SealedPermit`), and variants whose sole payload is a semantic discriminator
217/// ([`EdgeKind::LifetimeConstraint`], [`EdgeKind::FfiCall`]).
218///
219/// [`Predicate`]: super::ir::Predicate
220/// [`Predicate::Implements`]: super::ir::Predicate::Implements
221#[must_use]
222pub fn normalize_edge_kind(kind: EdgeKind) -> EdgeKind {
223    match kind {
224        // ---- Metadata-free or entirely-semantic variants: pass through ----
225        EdgeKind::Defines
226        | EdgeKind::Contains
227        | EdgeKind::References
228        | EdgeKind::Inherits
229        | EdgeKind::Implements
230        | EdgeKind::WebAssemblyCall
231        | EdgeKind::GenericBound
232        | EdgeKind::AnnotatedWith
233        | EdgeKind::AnnotationParam
234        | EdgeKind::LambdaCaptures
235        | EdgeKind::ModuleExports
236        | EdgeKind::ModuleRequires
237        | EdgeKind::ModuleOpens
238        | EdgeKind::ModuleProvides
239        | EdgeKind::TypeArgument
240        | EdgeKind::ExtensionReceiver
241        | EdgeKind::CompanionOf
242        | EdgeKind::SealedPermit
243        | EdgeKind::LifetimeConstraint { .. }
244        | EdgeKind::FfiCall { .. } => kind,
245
246        // ---- Pure site metadata: zero everything ----
247        EdgeKind::Calls { .. } => EdgeKind::Calls {
248            argument_count: 0,
249            is_async: false,
250        },
251
252        // ---- Mixed: preserve semantic, drop site/StringId ----
253        EdgeKind::Imports { is_wildcard, .. } => EdgeKind::Imports {
254            alias: None,
255            is_wildcard,
256        },
257        EdgeKind::Exports { kind, .. } => EdgeKind::Exports { kind, alias: None },
258        EdgeKind::TypeOf { context, .. } => EdgeKind::TypeOf {
259            context,
260            index: None,
261            name: None,
262        },
263        EdgeKind::MacroExpansion { expansion_kind, .. } => EdgeKind::MacroExpansion {
264            expansion_kind,
265            is_verified: false,
266        },
267        EdgeKind::HttpRequest { method, .. } => EdgeKind::HttpRequest { method, url: None },
268        EdgeKind::DbQuery { query_type, .. } => EdgeKind::DbQuery {
269            query_type,
270            table: None,
271        },
272        EdgeKind::TableWrite { operation, .. } => EdgeKind::TableWrite {
273            table_name: StringId::INVALID,
274            schema: None,
275            operation,
276        },
277        EdgeKind::MessageQueue { protocol, .. } => EdgeKind::MessageQueue {
278            protocol,
279            topic: None,
280        },
281
282        // ---- Entirely StringId / site metadata: zero everything ----
283        EdgeKind::TraitMethodBinding { .. } => EdgeKind::TraitMethodBinding {
284            trait_name: StringId::INVALID,
285            impl_type: StringId::INVALID,
286            is_ambiguous: false,
287        },
288        EdgeKind::GrpcCall { .. } => EdgeKind::GrpcCall {
289            service: StringId::INVALID,
290            method: StringId::INVALID,
291        },
292        EdgeKind::TableRead { .. } => EdgeKind::TableRead {
293            table_name: StringId::INVALID,
294            schema: None,
295        },
296        EdgeKind::TriggeredBy { .. } => EdgeKind::TriggeredBy {
297            trigger_name: StringId::INVALID,
298            schema: None,
299        },
300        EdgeKind::WebSocket { .. } => EdgeKind::WebSocket { event: None },
301        EdgeKind::GraphQLOperation { .. } => EdgeKind::GraphQLOperation {
302            operation: StringId::INVALID,
303        },
304        EdgeKind::ProcessExec { .. } => EdgeKind::ProcessExec {
305            command: StringId::INVALID,
306        },
307        EdgeKind::FileIpc { .. } => EdgeKind::FileIpc { path_pattern: None },
308        EdgeKind::ProtocolCall { .. } => EdgeKind::ProtocolCall {
309            protocol: StringId::INVALID,
310            metadata: None,
311        },
312    }
313}
314
315// ============================================================================
316// QueryBuilder
317// ============================================================================
318
319/// Optional filters for [`QueryBuilder::scan_with`].
320///
321/// Mirrors the `NodeScan` payload but accepts every field as `Option`, so a
322/// caller can fix a subset of filters (e.g. visibility only) without having to
323/// supply placeholder values for the others.
324#[derive(Debug, Clone, Default, PartialEq, Eq)]
325pub struct ScanFilters {
326    /// Optional kind filter (Function, Method, Class, ...).
327    pub kind: Option<NodeKind>,
328    /// Optional visibility filter (Public, Private).
329    pub visibility: Option<Visibility>,
330    /// Optional symbol-name pattern filter.
331    pub name_pattern: Option<StringPattern>,
332}
333
334impl ScanFilters {
335    /// Constructs an empty [`ScanFilters`] (equivalent to [`Default::default`]).
336    #[must_use]
337    pub const fn new() -> Self {
338        Self {
339            kind: None,
340            visibility: None,
341            name_pattern: None,
342        }
343    }
344
345    /// Sets the kind filter.
346    #[must_use]
347    pub fn with_kind(mut self, kind: NodeKind) -> Self {
348        self.kind = Some(kind);
349        self
350    }
351
352    /// Sets the visibility filter.
353    #[must_use]
354    pub fn with_visibility(mut self, visibility: Visibility) -> Self {
355        self.visibility = Some(visibility);
356        self
357    }
358
359    /// Sets the name-pattern filter.
360    #[must_use]
361    pub fn with_name_pattern(mut self, pattern: StringPattern) -> Self {
362        self.name_pattern = Some(pattern);
363        self
364    }
365}
366
367/// Fluent builder that compiles a chain of operator method calls into a
368/// [`QueryPlan`].
369///
370/// # Composition rules
371///
372/// 1. The first step appended must be context-free. [`Self::scan`],
373///    [`Self::scan_with`], [`Self::scan_all`], [`Self::union`],
374///    [`Self::intersect`], and [`Self::difference`] all satisfy that
375///    contract; calling [`Self::filter`] or [`Self::traverse`] on an empty
376///    builder produces a [`BuildError::FirstStepNotContextFree`] at
377///    [`Self::build`] time.
378/// 2. [`Self::union`], [`Self::intersect`], and [`Self::difference`] take an
379///    arbitrary [`QueryPlan`] as the right-hand operand. The builder's
380///    current accumulated state is folded into the left-hand operand. If the
381///    builder is empty, the right-hand operand becomes the first step.
382/// 3. [`Self::traverse`] normalises its [`EdgeKind`] argument so that the
383///    plan hash is independent of metadata the executor ignores
384///    (see [`normalize_edge_kind`]).
385/// 4. [`Self::build`] always wraps the accumulated steps in a
386///    [`PlanNode::Chain`], even when only a single step is present. This
387///    keeps the plan shape uniform for the fuser and the executor; callers
388///    that want the bare step can match against the resulting `Chain.steps`.
389///
390/// # Cloning
391///
392/// [`QueryBuilder`] is `Clone` so that an in-progress chain can be branched
393/// — useful when constructing two related queries from a shared prefix:
394///
395/// ```rust
396/// use sqry_core::graph::unified::node::kind::NodeKind;
397/// use sqry_db::planner::{Predicate, QueryBuilder};
398///
399/// let prefix = QueryBuilder::new().scan(NodeKind::Function);
400/// let with_callers = prefix.clone().filter(Predicate::HasCaller).build();
401/// let with_callees = prefix.filter(Predicate::HasCallee).build();
402/// assert!(with_callers.is_ok() && with_callees.is_ok());
403/// ```
404#[derive(Debug, Clone, Default)]
405pub struct QueryBuilder {
406    /// Accumulated operator steps in insertion order.
407    steps: Vec<PlanNode>,
408}
409
410impl QueryBuilder {
411    /// Constructs an empty builder.
412    ///
413    /// At least one of [`Self::scan`], [`Self::scan_with`], [`Self::scan_all`],
414    /// [`Self::union`], [`Self::intersect`], or [`Self::difference`] must be
415    /// called before [`Self::build`] for the resulting plan to validate.
416    #[must_use]
417    pub const fn new() -> Self {
418        Self { steps: Vec::new() }
419    }
420
421    // ------------------------------------------------------------------
422    // Scan: context-free starting steps
423    // ------------------------------------------------------------------
424
425    /// Appends a [`PlanNode::NodeScan`] filtered to the given [`NodeKind`].
426    ///
427    /// Equivalent to `.scan_with(ScanFilters::new().with_kind(kind))`.
428    #[must_use]
429    pub fn scan(mut self, kind: NodeKind) -> Self {
430        self.steps.push(PlanNode::NodeScan {
431            kind: Some(kind),
432            visibility: None,
433            name_pattern: None,
434        });
435        self
436    }
437
438    /// Appends a [`PlanNode::NodeScan`] with the given filter combination.
439    #[must_use]
440    pub fn scan_with(mut self, filters: ScanFilters) -> Self {
441        let ScanFilters {
442            kind,
443            visibility,
444            name_pattern,
445        } = filters;
446        self.steps.push(PlanNode::NodeScan {
447            kind,
448            visibility,
449            name_pattern,
450        });
451        self
452    }
453
454    /// Appends an unfiltered [`PlanNode::NodeScan`] that matches every node in
455    /// the graph.
456    ///
457    /// Use sparingly — this is an expensive operation on large snapshots.
458    #[must_use]
459    pub fn scan_all(mut self) -> Self {
460        self.steps.push(PlanNode::NodeScan {
461            kind: None,
462            visibility: None,
463            name_pattern: None,
464        });
465        self
466    }
467
468    // ------------------------------------------------------------------
469    // Filter
470    // ------------------------------------------------------------------
471
472    /// Appends a [`PlanNode::Filter`] step with the given predicate.
473    ///
474    /// Every variant of [`Predicate`] is acceptable here, including the
475    /// boolean combinators ([`Predicate::And`], [`Predicate::Or`],
476    /// [`Predicate::Not`]) and value-bearing relation predicates with
477    /// [`PredicateValue::Subquery`] payloads.
478    #[must_use]
479    pub fn filter(mut self, predicate: Predicate) -> Self {
480        self.steps.push(PlanNode::Filter { predicate });
481        self
482    }
483
484    /// Convenience wrapper that builds a [`Predicate::MatchesName`] filter
485    /// from a [`StringPattern`] and appends it as a [`PlanNode::Filter`].
486    #[must_use]
487    pub fn filter_name(self, pattern: StringPattern) -> Self {
488        self.filter(Predicate::MatchesName(pattern))
489    }
490
491    /// Convenience wrapper that builds a [`Predicate::InFile`] filter from a
492    /// [`PathPattern`] (or anything convertible to one) and appends it.
493    #[must_use]
494    pub fn filter_in_file(self, path: impl Into<PathPattern>) -> Self {
495        self.filter(Predicate::InFile(path.into()))
496    }
497
498    // ------------------------------------------------------------------
499    // Traverse
500    // ------------------------------------------------------------------
501
502    /// Appends a [`PlanNode::EdgeTraversal`] step.
503    ///
504    /// The supplied [`EdgeKind`] is run through [`normalize_edge_kind`] so
505    /// that two builders that pick the same edge kind but supply different
506    /// per-edge metadata produce the same plan hash. See
507    /// [`normalize_edge_kind`] for the rationale.
508    ///
509    /// Note that `max_depth == 0` is accepted here but rejected at
510    /// [`Self::build`] time with [`BuildError::ZeroDepth`]. The builder defers
511    /// validation so that callers can compose traversals freely without
512    /// fallible intermediate steps.
513    #[must_use]
514    pub fn traverse(mut self, direction: Direction, edge_kind: EdgeKind, max_depth: u32) -> Self {
515        self.steps.push(PlanNode::EdgeTraversal {
516            direction,
517            edge_kind: Some(normalize_edge_kind(edge_kind)),
518            max_depth,
519        });
520        self
521    }
522
523    /// Appends a [`PlanNode::EdgeTraversal`] step that matches any edge kind.
524    ///
525    /// Useful for "follow any outgoing edge to depth N" queries (impact
526    /// analysis, broad reachability).
527    #[must_use]
528    pub fn traverse_any(mut self, direction: Direction, max_depth: u32) -> Self {
529        self.steps.push(PlanNode::EdgeTraversal {
530            direction,
531            edge_kind: None,
532            max_depth,
533        });
534        self
535    }
536
537    // ------------------------------------------------------------------
538    // Set operations
539    // ------------------------------------------------------------------
540
541    /// Combines the builder's current state with another plan via union.
542    ///
543    /// If the builder already holds steps, they are wrapped in a
544    /// [`PlanNode::Chain`] and used as the left-hand operand. If the builder
545    /// is empty, the right-hand operand becomes the sole context-free step.
546    #[must_use]
547    pub fn union(self, other: QueryPlan) -> Self {
548        self.combine(super::ir::SetOperation::Union, other)
549    }
550
551    /// Combines the builder's current state with another plan via intersection.
552    ///
553    /// See [`Self::union`] for the composition semantics when the builder is
554    /// non-empty vs empty.
555    #[must_use]
556    pub fn intersect(self, other: QueryPlan) -> Self {
557        self.combine(super::ir::SetOperation::Intersect, other)
558    }
559
560    /// Combines the builder's current state with another plan via difference.
561    ///
562    /// `result = self \ other`. See [`Self::union`] for composition semantics.
563    #[must_use]
564    pub fn difference(self, other: QueryPlan) -> Self {
565        self.combine(super::ir::SetOperation::Difference, other)
566    }
567
568    /// Internal helper for [`Self::union`], [`Self::intersect`],
569    /// [`Self::difference`]. Folds the builder's current state into the left
570    /// operand of a [`PlanNode::SetOp`] node and resets the builder so the
571    /// `SetOp` becomes the new (single) context-free step.
572    ///
573    /// # Empty-builder identity
574    ///
575    /// When called on an empty builder, the right operand is adopted as the
576    /// builder's contents directly, bypassing the `SetOp` wrapper —
577    /// `SetOp(∅, X) ≡ X` for all three operations modelled here, and
578    /// constructing a `SetOp` with a synthetic empty left would be
579    /// semantically wrong (the executor has no "empty set" `PlanNode`). If
580    /// the right operand's root is itself a [`PlanNode::Chain`], its steps
581    /// are flattened into the builder so the resulting plan stays within
582    /// the canonical "single Chain" shape that [`Self::build`] guarantees.
583    fn combine(mut self, op: super::ir::SetOperation, other: QueryPlan) -> Self {
584        let right = Box::new(other.root);
585        if self.steps.is_empty() {
586            adopt_into_steps(&mut self.steps, *right);
587            return self;
588        }
589
590        let left: Box<PlanNode> = if self.steps.len() == 1 {
591            Box::new(self.steps.pop().expect("len == 1"))
592        } else {
593            Box::new(PlanNode::Chain {
594                steps: std::mem::take(&mut self.steps),
595            })
596        };
597
598        self.steps.push(PlanNode::SetOp { op, left, right });
599        self
600    }
601
602    // ------------------------------------------------------------------
603    // Inspection
604    // ------------------------------------------------------------------
605
606    /// Returns the number of steps currently accumulated in the builder.
607    ///
608    /// Does not include nested operators inside [`PlanNode::SetOp`] or
609    /// [`PlanNode::Chain`] payloads — only top-level steps that
610    /// [`Self::build`] would emit.
611    #[inline]
612    #[must_use]
613    pub fn step_count(&self) -> usize {
614        self.steps.len()
615    }
616
617    /// Returns `true` if the builder has no steps yet.
618    #[inline]
619    #[must_use]
620    pub fn is_empty(&self) -> bool {
621        self.steps.is_empty()
622    }
623
624    // ------------------------------------------------------------------
625    // Build
626    // ------------------------------------------------------------------
627
628    /// Materialises the accumulated steps into a [`QueryPlan`].
629    ///
630    /// # Validation
631    ///
632    /// 1. The builder must contain at least one step
633    ///    ([`BuildError::EmptyBuilder`]).
634    /// 2. The first step must be context-free
635    ///    ([`BuildError::FirstStepNotContextFree`]).
636    /// 3. No [`PlanNode::EdgeTraversal`] step may have `max_depth = 0`
637    ///    ([`BuildError::ZeroDepth`]). This is checked recursively through
638    ///    [`PlanNode::SetOp`] and [`PlanNode::Chain`] payloads, so a malformed
639    ///    sub-plan supplied to [`Self::union`] is also caught.
640    /// 4. Any [`PlanNode::SetOp`] node added by [`Self::union`] /
641    ///    [`Self::intersect`] / [`Self::difference`] must have both operands
642    ///    rooted at a context-free node ([`BuildError::InvalidSetOpOperand`]).
643    ///
644    /// # Output shape
645    ///
646    /// The returned [`QueryPlan`]'s root is always a [`PlanNode::Chain`].
647    /// Single-step builders therefore produce a `Chain { steps: vec![one] }`
648    /// rather than the bare `one`. This keeps the plan shape uniform for
649    /// downstream fusion and execution. Callers that prefer the bare step
650    /// can match against `plan.root` and unwrap when `Chain.steps.len() == 1`.
651    pub fn build(self) -> Result<QueryPlan, BuildError> {
652        if self.steps.is_empty() {
653            return Err(BuildError::EmptyBuilder);
654        }
655
656        let first = &self.steps[0];
657        if !first.is_context_free() {
658            return Err(BuildError::FirstStepNotContextFree {
659                first_kind: PlanNodeKind::of(first),
660            });
661        }
662
663        for step in &self.steps {
664            validate_subtree(step)?;
665        }
666
667        Ok(QueryPlan::new(PlanNode::Chain { steps: self.steps }))
668    }
669}
670
671/// Inlines another plan's root into a builder's `steps` vector.
672///
673/// Used by [`QueryBuilder::combine`] when the builder is empty. If the
674/// adopted root is itself a [`PlanNode::Chain`], its steps are unwrapped so
675/// that the builder doesn't end up with a `Chain { steps: vec![Chain{...}] }`
676/// shape (which would fail context-free validation at `build` time and is
677/// also semantically redundant). Any other variant is appended as a single
678/// step.
679fn adopt_into_steps(steps: &mut Vec<PlanNode>, root: PlanNode) {
680    match root {
681        PlanNode::Chain { steps: inner } => steps.extend(inner),
682        other => steps.push(other),
683    }
684}
685
686/// Recursive structural validation shared by [`QueryBuilder::build`] and the
687/// set-op operand check.
688///
689/// Checks every traversal in the subtree for `max_depth >= 1` and verifies
690/// that every nested [`PlanNode::SetOp`] operand is itself context-free.
691/// Filter predicates carrying [`PredicateValue::Subquery`] are also
692/// validated, so a malformed subquery surfaces at top-level `build` time.
693fn validate_subtree(node: &PlanNode) -> Result<(), BuildError> {
694    match node {
695        PlanNode::NodeScan { .. } => Ok(()),
696        PlanNode::EdgeTraversal { max_depth, .. } => {
697            if *max_depth == 0 {
698                Err(BuildError::ZeroDepth)
699            } else {
700                Ok(())
701            }
702        }
703        PlanNode::Filter { predicate } => validate_predicate(predicate),
704        PlanNode::SetOp { left, right, .. } => {
705            ensure_context_free(left)?;
706            ensure_context_free(right)?;
707            validate_subtree(left)?;
708            validate_subtree(right)?;
709            Ok(())
710        }
711        PlanNode::Chain { steps } => {
712            if let Some(first) = steps.first()
713                && !first.is_context_free()
714            {
715                return Err(BuildError::FirstStepNotContextFree {
716                    first_kind: PlanNodeKind::of(first),
717                });
718            }
719            for step in steps {
720                validate_subtree(step)?;
721            }
722            Ok(())
723        }
724    }
725}
726
727/// Recursively validates a [`Predicate`] tree, walking through every boolean
728/// combinator and into nested subqueries.
729fn validate_predicate(predicate: &Predicate) -> Result<(), BuildError> {
730    match predicate {
731        Predicate::HasCaller
732        | Predicate::HasCallee
733        | Predicate::IsUnused
734        | Predicate::InFile(_)
735        | Predicate::InScope(_)
736        | Predicate::MatchesName(_)
737        | Predicate::Returns(_) => Ok(()),
738
739        Predicate::Callers(v)
740        | Predicate::Callees(v)
741        | Predicate::Imports(v)
742        | Predicate::Exports(v)
743        | Predicate::References(v)
744        | Predicate::Implements(v) => validate_predicate_value(v),
745
746        Predicate::And(list) | Predicate::Or(list) => {
747            for inner in list {
748                validate_predicate(inner)?;
749            }
750            Ok(())
751        }
752        Predicate::Not(inner) => validate_predicate(inner),
753    }
754}
755
756/// Validates the right-hand side of a relation predicate. For subqueries the
757/// nested plan must itself be a well-formed [`PlanNode`].
758fn validate_predicate_value(value: &PredicateValue) -> Result<(), BuildError> {
759    match value {
760        PredicateValue::Pattern(_) | PredicateValue::Regex(_) => Ok(()),
761        PredicateValue::Subquery(plan) => {
762            ensure_context_free(plan)?;
763            validate_subtree(plan)
764        }
765    }
766}
767
768/// Helper used by set-op operand and subquery validation. Wraps the
769/// underlying context-free check so the resulting error variant carries the
770/// `InvalidSetOpOperand` framing.
771fn ensure_context_free(node: &PlanNode) -> Result<(), BuildError> {
772    if node.is_context_free() {
773        return Ok(());
774    }
775
776    // A `Chain` whose first step is itself context-free is acceptable as a
777    // sub-plan operand — the chain's contract is the same as a top-level
778    // builder's.
779    if let PlanNode::Chain { steps } = node
780        && let Some(first) = steps.first()
781        && first.is_context_free()
782    {
783        return Ok(());
784    }
785
786    Err(BuildError::InvalidSetOpOperand {
787        reason: format!("operand root is {:?}", PlanNodeKind::of(node)),
788    })
789}
790
791// ============================================================================
792// QueryPlan ergonomics — used to compose builder outputs as subqueries
793// ============================================================================
794
795/// Extension methods on [`QueryPlan`] that simplify common composition
796/// patterns. Implemented as a trait so the [`ir`] module stays free of
797/// `compile`-flavoured concerns.
798pub trait QueryPlanExt {
799    /// Wraps the plan's root as a [`PredicateValue::Subquery`] suitable for
800    /// embedding inside a relation predicate (e.g. `Predicate::Callers`).
801    ///
802    /// Consumes the plan to avoid a clone — callers needing both the plan
803    /// and the subquery should `.clone()` first.
804    fn into_subquery(self) -> PredicateValue;
805
806    /// Borrowing variant of [`Self::into_subquery`] that clones the plan's
807    /// root rather than consuming it.
808    fn as_subquery(&self) -> PredicateValue;
809}
810
811impl QueryPlanExt for QueryPlan {
812    fn into_subquery(self) -> PredicateValue {
813        PredicateValue::Subquery(Box::new(self.root))
814    }
815
816    fn as_subquery(&self) -> PredicateValue {
817        PredicateValue::Subquery(Box::new(self.root.clone()))
818    }
819}
820
821// ============================================================================
822// Inline unit tests
823// ============================================================================
824
825#[cfg(test)]
826mod tests {
827    use super::*;
828
829    #[test]
830    fn empty_builder_reports_empty_error() {
831        let err = QueryBuilder::new().build().unwrap_err();
832        assert_eq!(err, BuildError::EmptyBuilder);
833    }
834
835    #[test]
836    fn first_step_filter_is_rejected() {
837        let mut b = QueryBuilder::new();
838        b.steps.push(PlanNode::Filter {
839            predicate: Predicate::HasCaller,
840        });
841        let err = b.build().unwrap_err();
842        assert!(matches!(
843            err,
844            BuildError::FirstStepNotContextFree {
845                first_kind: PlanNodeKind::Filter
846            }
847        ));
848    }
849
850    #[test]
851    fn first_step_traversal_is_rejected() {
852        let mut b = QueryBuilder::new();
853        b.steps.push(PlanNode::EdgeTraversal {
854            direction: Direction::Forward,
855            edge_kind: None,
856            max_depth: 1,
857        });
858        let err = b.build().unwrap_err();
859        assert!(matches!(
860            err,
861            BuildError::FirstStepNotContextFree {
862                first_kind: PlanNodeKind::EdgeTraversal
863            }
864        ));
865    }
866
867    #[test]
868    fn zero_depth_is_rejected() {
869        let err = QueryBuilder::new()
870            .scan(NodeKind::Function)
871            .traverse_any(Direction::Forward, 0)
872            .build()
873            .unwrap_err();
874        assert_eq!(err, BuildError::ZeroDepth);
875    }
876
877    #[test]
878    fn scan_then_filter_builds() {
879        let plan = QueryBuilder::new()
880            .scan(NodeKind::Function)
881            .filter(Predicate::HasCaller)
882            .build()
883            .expect("plan");
884        let PlanNode::Chain { steps } = &plan.root else {
885            panic!("expected Chain root");
886        };
887        assert_eq!(steps.len(), 2);
888        assert!(matches!(steps[0], PlanNode::NodeScan { .. }));
889        assert!(matches!(steps[1], PlanNode::Filter { .. }));
890    }
891
892    #[test]
893    fn step_count_and_is_empty_track_state() {
894        let b = QueryBuilder::new();
895        assert!(b.is_empty());
896        assert_eq!(b.step_count(), 0);
897
898        let b = b.scan(NodeKind::Function).filter(Predicate::HasCaller);
899        assert!(!b.is_empty());
900        assert_eq!(b.step_count(), 2);
901    }
902
903    #[test]
904    fn normalize_edge_kind_zeroes_calls_metadata() {
905        let a = normalize_edge_kind(EdgeKind::Calls {
906            argument_count: 7,
907            is_async: true,
908        });
909        let b = normalize_edge_kind(EdgeKind::Calls {
910            argument_count: 0,
911            is_async: false,
912        });
913        assert_eq!(a, b);
914    }
915
916    #[test]
917    fn normalize_edge_kind_passes_through_metadata_free_variants() {
918        let cases = [
919            EdgeKind::Defines,
920            EdgeKind::Contains,
921            EdgeKind::References,
922            EdgeKind::Inherits,
923            EdgeKind::Implements,
924            EdgeKind::WebAssemblyCall,
925        ];
926        for c in cases {
927            assert_eq!(normalize_edge_kind(c.clone()), c);
928        }
929    }
930
931    #[test]
932    fn plan_node_kind_of_covers_all_variants() {
933        assert_eq!(
934            PlanNodeKind::of(&PlanNode::NodeScan {
935                kind: None,
936                visibility: None,
937                name_pattern: None
938            }),
939            PlanNodeKind::NodeScan
940        );
941        assert_eq!(
942            PlanNodeKind::of(&PlanNode::EdgeTraversal {
943                direction: Direction::Forward,
944                edge_kind: None,
945                max_depth: 1
946            }),
947            PlanNodeKind::EdgeTraversal
948        );
949        assert_eq!(
950            PlanNodeKind::of(&PlanNode::Filter {
951                predicate: Predicate::HasCaller
952            }),
953            PlanNodeKind::Filter
954        );
955        let scan = PlanNode::NodeScan {
956            kind: None,
957            visibility: None,
958            name_pattern: None,
959        };
960        assert_eq!(
961            PlanNodeKind::of(&PlanNode::SetOp {
962                op: super::super::ir::SetOperation::Union,
963                left: Box::new(scan.clone()),
964                right: Box::new(scan)
965            }),
966            PlanNodeKind::SetOp
967        );
968        assert_eq!(
969            PlanNodeKind::of(&PlanNode::Chain { steps: vec![] }),
970            PlanNodeKind::Chain
971        );
972    }
973
974    #[test]
975    fn into_subquery_consumes_plan() {
976        let plan = QueryBuilder::new()
977            .scan(NodeKind::Function)
978            .build()
979            .expect("plan");
980        let value = plan.into_subquery();
981        assert!(value.is_subquery());
982    }
983}