Skip to main content

fathomdb_query/
ast.rs

1use crate::TextQuery;
2
3/// Abstract syntax tree representing a graph query.
4///
5/// `Eq` is intentionally NOT derived because [`QueryStep::RawVectorSearch`]
6/// carries a `Vec<f32>` payload; `Vec<f32>` implements `PartialEq` but not
7/// `Eq` (IEEE-754 NaN breaks reflexivity). Existing `PartialEq`-only
8/// callers are unaffected.
9#[derive(Clone, Debug, PartialEq)]
10pub struct QueryAst {
11    /// Node kind used as the root of the query.
12    pub root_kind: String,
13    /// Ordered pipeline of search, traversal, and filter steps.
14    pub steps: Vec<QueryStep>,
15    /// Named expansion slots evaluated per root result in grouped queries.
16    pub expansions: Vec<ExpansionSlot>,
17    /// Named edge-projecting expansion slots evaluated per root result in
18    /// grouped queries. Sibling to `expansions`; vec membership is the
19    /// discriminator between node- and edge-expansion slots. Slot names
20    /// must be unique across both vecs.
21    pub edge_expansions: Vec<EdgeExpansionSlot>,
22    /// Optional hard cap on the number of result rows.
23    pub final_limit: Option<usize>,
24}
25
26/// An edge-projecting expansion slot.
27///
28/// Emits `(EdgeRow, NodeRow)` tuples per root on execution. The endpoint
29/// node is the target on `Out` traversal, source on `In`. For
30/// `max_depth > 1`, each emitted tuple reflects the final-hop edge
31/// leading to the emitted endpoint node.
32#[derive(Clone, Debug, PartialEq, Eq)]
33pub struct EdgeExpansionSlot {
34    /// Slot name used to key the expansion results. Must be unique across
35    /// both node-expansion and edge-expansion slots in the same query.
36    pub slot: String,
37    /// Direction to traverse edges.
38    pub direction: TraverseDirection,
39    /// Edge kind (label) to follow.
40    pub label: String,
41    /// Maximum traversal depth.
42    pub max_depth: usize,
43    /// Optional predicate filtering the endpoint node (the target side on
44    /// `Out`, the source side on `In`). Reuses the `Predicate` enum.
45    pub endpoint_filter: Option<Predicate>,
46    /// Optional predicate filtering the traversed edges. Only
47    /// `EdgePropertyEq` and `EdgePropertyCompare` are valid here.
48    pub edge_filter: Option<Predicate>,
49}
50
51/// A named expansion slot that traverses edges per root result.
52#[derive(Clone, Debug, PartialEq, Eq)]
53pub struct ExpansionSlot {
54    /// Slot name used to key the expansion results.
55    pub slot: String,
56    /// Direction to traverse edges.
57    pub direction: TraverseDirection,
58    /// Edge kind (label) to follow.
59    pub label: String,
60    /// Maximum traversal depth.
61    pub max_depth: usize,
62    /// Optional predicate to filter target nodes in this expansion slot.
63    /// `None` is exactly equivalent to pre-Pack-2 behavior.
64    /// `Some(_)` is not yet implemented; see Pack 3.
65    pub filter: Option<Predicate>,
66    /// Optional predicate to filter the traversed edges in this expansion slot.
67    /// Only `EdgePropertyEq` and `EdgePropertyCompare` are valid here.
68    /// `None` preserves pre-Pack-D behavior (no edge filtering).
69    pub edge_filter: Option<Predicate>,
70}
71
72/// A single step in the query pipeline.
73///
74/// `Eq` is intentionally NOT derived — see [`QueryAst`] for the rationale.
75#[derive(Clone, Debug, PartialEq)]
76pub enum QueryStep {
77    /// Unified adaptive retrieval entry step consumed by the Phase 12
78    /// retrieval planner.
79    ///
80    /// Carries the caller's raw query string (not a parsed [`TextQuery`]):
81    /// the planner decides how to interpret and route it across the text
82    /// strict, text relaxed, and (future) vector branches. See
83    /// `crate::compile_retrieval_plan` for the planner entry point.
84    Search {
85        /// The raw caller-supplied query string.
86        query: String,
87        /// Maximum number of candidate rows requested by the caller.
88        limit: usize,
89    },
90    /// Nearest-neighbor search over vector embeddings.
91    VectorSearch {
92        /// The search query text (to be embedded by the caller).
93        query: String,
94        /// Maximum number of candidate rows from the vector index.
95        limit: usize,
96    },
97    /// Pack F1 semantic-search step: a natural-language query that the
98    /// engine embeds at query time using the db-wide active profile
99    /// embedder, then runs KNN against the per-kind `vec_<kind>` table.
100    ///
101    /// Unlike [`QueryStep::VectorSearch`], the `text` is NEVER a JSON
102    /// float-array literal — the caller supplies natural language and
103    /// the engine handles embedding internally.
104    SemanticSearch {
105        /// Natural-language query string to embed at query time.
106        text: String,
107        /// Maximum number of candidate rows from the vector KNN scan.
108        limit: usize,
109    },
110    /// Pack F1 raw-vector-search step: a caller-supplied dense vector
111    /// that the engine binds directly to the per-kind `vec_<kind>` KNN
112    /// scan with no embedder call. The vector's dimension must match the
113    /// active embedding profile's dimension or the coordinator returns
114    /// [`super::CompileError`]-free with a hard `DimensionMismatch`
115    /// error at plan-time.
116    RawVectorSearch {
117        /// Caller-supplied dense vector.
118        vec: Vec<f32>,
119        /// Maximum number of candidate rows from the vector KNN scan.
120        limit: usize,
121    },
122    /// Full-text search over indexed chunk content using `FathomDB`'s supported
123    /// safe text-query subset.
124    TextSearch {
125        /// Parsed text-search intent to be lowered into safe FTS5 syntax.
126        query: TextQuery,
127        /// Maximum number of candidate rows from the FTS index.
128        limit: usize,
129    },
130    /// Graph traversal following edges of the given label.
131    Traverse {
132        /// Direction to traverse.
133        direction: TraverseDirection,
134        /// Edge kind to follow.
135        label: String,
136        /// Maximum hops from each candidate.
137        max_depth: usize,
138        /// Optional predicate to filter traversal results.
139        /// `None` is exactly equivalent to the pre-Pack-2 behavior.
140        /// `Some(_)` is not yet implemented; see Pack 3.
141        filter: Option<Predicate>,
142    },
143    /// Row-level filter predicate.
144    Filter(Predicate),
145}
146
147/// A filter predicate applied to candidate nodes.
148#[derive(Clone, Debug, PartialEq, Eq)]
149pub enum Predicate {
150    /// Match nodes with the exact logical ID.
151    LogicalIdEq(String),
152    /// Match nodes with the exact kind.
153    KindEq(String),
154    /// Equality check on a JSON property at the given path.
155    JsonPathEq {
156        /// JSON path expression (e.g. `$.status`).
157        path: String,
158        /// Value to compare against.
159        value: ScalarValue,
160    },
161    /// Ordered comparison on a JSON property at the given path.
162    JsonPathCompare {
163        /// JSON path expression.
164        path: String,
165        /// Comparison operator.
166        op: ComparisonOp,
167        /// Value to compare against.
168        value: ScalarValue,
169    },
170    /// Match nodes with the exact `source_ref`.
171    SourceRefEq(String),
172    /// Match nodes where `content_ref` is not NULL (i.e. content proxy nodes).
173    ContentRefNotNull,
174    /// Match nodes with the exact `content_ref` URI.
175    ContentRefEq(String),
176    /// Fused equality check on a JSON text property at the given path.
177    ///
178    /// Unlike [`Predicate::JsonPathEq`], this variant is classified as
179    /// **fusable** by [`crate::fusion::is_fusable`] and is pushed into
180    /// the search CTE's inner `WHERE` clause so the CTE `LIMIT` applies
181    /// after the predicate runs. The caller opts into fusion by
182    /// registering an FTS property schema that covers the path; the
183    /// tethered builder enforces that gate at filter-add time.
184    JsonPathFusedEq {
185        /// JSON path expression (e.g. `$.status`).
186        path: String,
187        /// Text value to compare against.
188        value: String,
189    },
190    /// Fused ordered comparison on a JSON integer/timestamp property at
191    /// the given path. See [`Predicate::JsonPathFusedEq`] for the fusion
192    /// contract.
193    JsonPathFusedTimestampCmp {
194        /// JSON path expression.
195        path: String,
196        /// Comparison operator.
197        op: ComparisonOp,
198        /// Integer value to compare against (epoch seconds for
199        /// timestamp semantics).
200        value: i64,
201    },
202    /// Fused equality check on a JSON boolean property at the given path.
203    /// See [`Predicate::JsonPathFusedEq`] for the fusion contract.
204    /// The boolean is stored as `SQLite` integer 1/0.
205    JsonPathFusedBoolEq {
206        /// JSON path expression (e.g. `$.resolved`).
207        path: String,
208        /// Boolean value to compare against (stored as 1 or 0).
209        value: bool,
210    },
211    /// Equality check on a JSON property of the traversed edge at the given path.
212    ///
213    /// Structurally identical to [`Predicate::JsonPathEq`] but targets
214    /// `e.properties` on the edge row rather than `n.properties` on the
215    /// target node. Only valid inside an expansion slot's `edge_filter`.
216    EdgePropertyEq {
217        /// JSON path expression (e.g. `$.rel`).
218        path: String,
219        /// Value to compare against.
220        value: ScalarValue,
221    },
222    /// Ordered comparison on a JSON property of the traversed edge at the given path.
223    ///
224    /// Structurally identical to [`Predicate::JsonPathCompare`] but targets
225    /// `e.properties` on the edge row rather than `n.properties` on the
226    /// target node. Only valid inside an expansion slot's `edge_filter`.
227    EdgePropertyCompare {
228        /// JSON path expression.
229        path: String,
230        /// Comparison operator.
231        op: ComparisonOp,
232        /// Value to compare against.
233        value: ScalarValue,
234    },
235    /// Fused IN-set check on a JSON text property at the given path.
236    ///
237    /// Like [`Predicate::JsonPathFusedEq`], this variant is classified as
238    /// **fusable** and is pushed into the search CTE's inner `WHERE` clause.
239    /// The caller must have a registered FTS property schema for the path.
240    JsonPathFusedIn {
241        /// JSON path expression (e.g. `$.status`).
242        path: String,
243        /// Non-empty set of text values; the node must match at least one.
244        values: Vec<String>,
245    },
246    /// IN-set check on a JSON property at the given path.
247    ///
248    /// Unlike [`Predicate::JsonPathFusedIn`], this variant is **not** fusable
249    /// and is applied as a residual WHERE clause on the Nodes driver scan.
250    JsonPathIn {
251        /// JSON path expression (e.g. `$.category`).
252        path: String,
253        /// Non-empty set of values; the node must match at least one.
254        values: Vec<ScalarValue>,
255    },
256}
257
258/// Ordered comparison operator for JSON property filters.
259#[derive(Clone, Copy, Debug, PartialEq, Eq)]
260pub enum ComparisonOp {
261    /// Greater than.
262    Gt,
263    /// Greater than or equal.
264    Gte,
265    /// Less than.
266    Lt,
267    /// Less than or equal.
268    Lte,
269}
270
271/// A typed scalar value used in query predicates.
272#[derive(Clone, Debug, PartialEq, Eq)]
273pub enum ScalarValue {
274    /// A UTF-8 text value.
275    Text(String),
276    /// A 64-bit signed integer.
277    Integer(i64),
278    /// A boolean value.
279    Bool(bool),
280}
281
282/// Direction for graph traversal steps and expansion slots.
283#[derive(Clone, Copy, Debug, PartialEq, Eq)]
284pub enum TraverseDirection {
285    /// Follow edges pointing toward the current node.
286    In,
287    /// Follow edges pointing away from the current node.
288    Out,
289}