Skip to main content

fathomdb_query/
compile.rs

1use std::fmt::Write;
2
3use crate::fusion::partition_search_filters;
4use crate::plan::{choose_driving_table, execution_hints, shape_signature};
5use crate::search::{
6    CompiledRetrievalPlan, CompiledSearch, CompiledSearchPlan, CompiledVectorSearch,
7};
8use crate::{
9    ComparisonOp, DrivingTable, ExpansionSlot, Predicate, QueryAst, QueryStep, ScalarValue,
10    TextQuery, TraverseDirection, derive_relaxed, render_text_query_fts5,
11};
12
13/// A typed bind value for a compiled SQL query parameter.
14#[derive(Clone, Debug, PartialEq, Eq)]
15pub enum BindValue {
16    /// A UTF-8 text parameter.
17    Text(String),
18    /// A 64-bit signed integer parameter.
19    Integer(i64),
20    /// A boolean parameter.
21    Bool(bool),
22}
23
24/// A deterministic hash of a query's structural shape, independent of bind values.
25#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
26pub struct ShapeHash(pub u64);
27
28/// A fully compiled query ready for execution against `SQLite`.
29#[derive(Clone, Debug, PartialEq, Eq)]
30pub struct CompiledQuery {
31    /// The generated SQL text.
32    pub sql: String,
33    /// Positional bind parameters for the SQL.
34    pub binds: Vec<BindValue>,
35    /// Structural shape hash for caching.
36    pub shape_hash: ShapeHash,
37    /// The driving table chosen by the query planner.
38    pub driving_table: DrivingTable,
39    /// Execution hints derived from the query shape.
40    pub hints: crate::ExecutionHints,
41}
42
43/// A compiled grouped query containing a root query and expansion slots.
44#[derive(Clone, Debug, PartialEq, Eq)]
45pub struct CompiledGroupedQuery {
46    /// The root flat query.
47    pub root: CompiledQuery,
48    /// Expansion slots to evaluate per root result.
49    pub expansions: Vec<ExpansionSlot>,
50    /// Structural shape hash covering the root query and all expansion slots.
51    pub shape_hash: ShapeHash,
52    /// Execution hints derived from the grouped query shape.
53    pub hints: crate::ExecutionHints,
54}
55
56/// Errors that can occur during query compilation.
57#[derive(Clone, Debug, PartialEq, Eq, thiserror::Error)]
58pub enum CompileError {
59    #[error("multiple traversal steps are not supported in v1")]
60    TooManyTraversals,
61    #[error("flat query compilation does not support expansions; use compile_grouped")]
62    FlatCompileDoesNotSupportExpansions,
63    #[error("duplicate expansion slot name: {0}")]
64    DuplicateExpansionSlot(String),
65    #[error("expansion slot name must be non-empty")]
66    EmptyExpansionSlotName,
67    #[error("too many expansion slots: max {MAX_EXPANSION_SLOTS}, got {0}")]
68    TooManyExpansionSlots(usize),
69    #[error("too many bind parameters: max 15, got {0}")]
70    TooManyBindParameters(usize),
71    #[error("traversal depth {0} exceeds maximum of {MAX_TRAVERSAL_DEPTH}")]
72    TraversalTooDeep(usize),
73    #[error("invalid JSON path: must match $(.key)+ pattern, got {0:?}")]
74    InvalidJsonPath(String),
75    #[error("compile_search requires exactly one TextSearch step in the AST")]
76    MissingTextSearchStep,
77    #[error("compile_vector_search requires exactly one VectorSearch step in the AST")]
78    MissingVectorSearchStep,
79    #[error("compile_retrieval_plan requires exactly one Search step in the AST")]
80    MissingSearchStep,
81    #[error("compile_retrieval_plan requires exactly one Search step in the AST, found multiple")]
82    MultipleSearchSteps,
83}
84
85/// Security fix H-1: Validate JSON path against a strict allowlist pattern to
86/// prevent SQL injection. Retained as defense-in-depth even though the path is
87/// now parameterized (see `FIX(review)` in `compile_query`). Only paths like
88/// `$.foo`, `$.foo.bar_baz` are allowed.
89fn validate_json_path(path: &str) -> Result<(), CompileError> {
90    let valid = path.starts_with('$')
91        && path.len() > 1
92        && path[1..].split('.').all(|segment| {
93            segment.is_empty()
94                || segment
95                    .chars()
96                    .all(|c| c.is_ascii_alphanumeric() || c == '_')
97                    && !segment.is_empty()
98        })
99        && path.contains('.');
100    if !valid {
101        return Err(CompileError::InvalidJsonPath(path.to_owned()));
102    }
103    Ok(())
104}
105
106/// Append a fusable predicate as an `AND` clause referencing `alias`.
107///
108/// Only the fusable variants (those that can be evaluated against columns on
109/// the `nodes` table join inside a search CTE) are supported — callers must
110/// pre-partition predicates via
111/// [`crate::fusion::partition_search_filters`]. Residual predicates panic via
112/// `unreachable!`.
113fn append_fusable_clause(
114    sql: &mut String,
115    binds: &mut Vec<BindValue>,
116    alias: &str,
117    predicate: &Predicate,
118) -> Result<(), CompileError> {
119    match predicate {
120        Predicate::KindEq(kind) => {
121            binds.push(BindValue::Text(kind.clone()));
122            let idx = binds.len();
123            let _ = write!(sql, "\n                          AND {alias}.kind = ?{idx}");
124        }
125        Predicate::LogicalIdEq(logical_id) => {
126            binds.push(BindValue::Text(logical_id.clone()));
127            let idx = binds.len();
128            let _ = write!(
129                sql,
130                "\n                          AND {alias}.logical_id = ?{idx}"
131            );
132        }
133        Predicate::SourceRefEq(source_ref) => {
134            binds.push(BindValue::Text(source_ref.clone()));
135            let idx = binds.len();
136            let _ = write!(
137                sql,
138                "\n                          AND {alias}.source_ref = ?{idx}"
139            );
140        }
141        Predicate::ContentRefEq(uri) => {
142            binds.push(BindValue::Text(uri.clone()));
143            let idx = binds.len();
144            let _ = write!(
145                sql,
146                "\n                          AND {alias}.content_ref = ?{idx}"
147            );
148        }
149        Predicate::ContentRefNotNull => {
150            let _ = write!(
151                sql,
152                "\n                          AND {alias}.content_ref IS NOT NULL"
153            );
154        }
155        Predicate::JsonPathFusedEq { path, value } => {
156            validate_json_path(path)?;
157            binds.push(BindValue::Text(path.clone()));
158            let path_index = binds.len();
159            binds.push(BindValue::Text(value.clone()));
160            let value_index = binds.len();
161            let _ = write!(
162                sql,
163                "\n                          AND json_extract({alias}.properties, ?{path_index}) = ?{value_index}"
164            );
165        }
166        Predicate::JsonPathFusedTimestampCmp { path, op, value } => {
167            validate_json_path(path)?;
168            binds.push(BindValue::Text(path.clone()));
169            let path_index = binds.len();
170            binds.push(BindValue::Integer(*value));
171            let value_index = binds.len();
172            let operator = match op {
173                ComparisonOp::Gt => ">",
174                ComparisonOp::Gte => ">=",
175                ComparisonOp::Lt => "<",
176                ComparisonOp::Lte => "<=",
177            };
178            let _ = write!(
179                sql,
180                "\n                          AND json_extract({alias}.properties, ?{path_index}) {operator} ?{value_index}"
181            );
182        }
183        Predicate::JsonPathEq { .. } | Predicate::JsonPathCompare { .. } => {
184            unreachable!("append_fusable_clause received a residual predicate");
185        }
186    }
187    Ok(())
188}
189
190const MAX_BIND_PARAMETERS: usize = 15;
191const MAX_EXPANSION_SLOTS: usize = 8;
192
193// FIX(review): max_depth was unbounded — usize::MAX produces an effectively infinite CTE.
194// Options: (A) silent clamp at compile, (B) reject with CompileError, (C) validate in builder.
195// Chose (B): consistent with existing TooManyTraversals/TooManyBindParameters pattern.
196// The compiler is the validation boundary; silent clamping would surprise callers.
197const MAX_TRAVERSAL_DEPTH: usize = 50;
198
199/// Compile a [`QueryAst`] into a [`CompiledQuery`] ready for execution.
200///
201/// # Compilation strategy
202///
203/// The compiled SQL is structured as a `WITH RECURSIVE` CTE named
204/// `base_candidates` followed by a final `SELECT ... JOIN nodes` projection.
205///
206/// For the **Nodes** driving table (no FTS/vector search), all filter
207/// predicates (`LogicalIdEq`, `JsonPathEq`, `JsonPathCompare`,
208/// `SourceRefEq`) are pushed into the `base_candidates` CTE so that the
209/// CTE's `LIMIT` applies *after* filtering. Without this pushdown the LIMIT
210/// would truncate the candidate set before property filters run, silently
211/// excluding nodes whose properties satisfy the filter but whose insertion
212/// order falls outside the limit window.
213///
214/// For **FTS** and **vector** driving tables, fusable predicates
215/// (`KindEq`, `LogicalIdEq`, `SourceRefEq`, `ContentRefEq`,
216/// `ContentRefNotNull`) are pushed into the `base_candidates` CTE so that
217/// the CTE's `LIMIT` applies *after* filtering; residual predicates
218/// (`JsonPathEq`, `JsonPathCompare`) remain in the outer `WHERE` because
219/// they require `json_extract` on the outer `nodes.properties` column.
220///
221/// # Errors
222///
223/// Returns [`CompileError::TooManyTraversals`] if more than one traversal step
224/// is present, or [`CompileError::TooManyBindParameters`] if the resulting SQL
225/// would require more than 15 bind parameters.
226///
227/// # Panics
228///
229/// Panics (via `unreachable!`) if the AST is internally inconsistent — for
230/// example, if `choose_driving_table` selects `VecNodes` but no
231/// `VectorSearch` step is present in the AST. This cannot happen through the
232/// public [`QueryBuilder`] API.
233#[allow(clippy::too_many_lines)]
234pub fn compile_query(ast: &QueryAst) -> Result<CompiledQuery, CompileError> {
235    if !ast.expansions.is_empty() {
236        return Err(CompileError::FlatCompileDoesNotSupportExpansions);
237    }
238
239    let traversals = ast
240        .steps
241        .iter()
242        .filter(|step| matches!(step, QueryStep::Traverse { .. }))
243        .count();
244    if traversals > 1 {
245        return Err(CompileError::TooManyTraversals);
246    }
247
248    let excessive_depth = ast.steps.iter().find_map(|step| {
249        if let QueryStep::Traverse { max_depth, .. } = step
250            && *max_depth > MAX_TRAVERSAL_DEPTH
251        {
252            return Some(*max_depth);
253        }
254        None
255    });
256    if let Some(depth) = excessive_depth {
257        return Err(CompileError::TraversalTooDeep(depth));
258    }
259
260    let driving_table = choose_driving_table(ast);
261    let hints = execution_hints(ast);
262    let shape_hash = ShapeHash(hash_signature(&shape_signature(ast)));
263
264    let base_limit = ast
265        .steps
266        .iter()
267        .find_map(|step| match step {
268            QueryStep::VectorSearch { limit, .. } | QueryStep::TextSearch { limit, .. } => {
269                Some(*limit)
270            }
271            _ => None,
272        })
273        .or(ast.final_limit)
274        .unwrap_or(25);
275
276    let final_limit = ast.final_limit.unwrap_or(base_limit);
277    let traversal = ast.steps.iter().find_map(|step| {
278        if let QueryStep::Traverse {
279            direction,
280            label,
281            max_depth,
282        } = step
283        {
284            Some((*direction, label.as_str(), *max_depth))
285        } else {
286            None
287        }
288    });
289
290    // Partition Filter predicates for the search-driven paths into fusable
291    // (injected into the search CTE's WHERE) and residual (left in the outer
292    // WHERE) sets. The Nodes path pushes *every* predicate into the CTE
293    // directly and ignores this partition.
294    let (fusable_filters, residual_filters) = partition_search_filters(&ast.steps);
295
296    let mut binds = Vec::new();
297    let base_candidates = match driving_table {
298        DrivingTable::VecNodes => {
299            let query = ast
300                .steps
301                .iter()
302                .find_map(|step| {
303                    if let QueryStep::VectorSearch { query, .. } = step {
304                        Some(query.as_str())
305                    } else {
306                        None
307                    }
308                })
309                .unwrap_or_else(|| unreachable!("VecNodes chosen but no VectorSearch step in AST"));
310            binds.push(BindValue::Text(query.to_owned()));
311            binds.push(BindValue::Text(ast.root_kind.clone()));
312            // sqlite-vec requires the LIMIT/k constraint to be visible directly on the
313            // vec0 KNN scan. Using a sub-select isolates the vec0 LIMIT so the join
314            // with chunks/nodes does not prevent the query planner from recognising it.
315            //
316            // ASYMMETRY (known gap, P2-3): the inner `LIMIT {base_limit}` runs
317            // BEFORE the fusable-filter `WHERE` below, so fused predicates on
318            // `src` (e.g. `kind_eq`) filter a candidate pool that has already
319            // been narrowed to `base_limit` KNN neighbours. A
320            // `vector_search("x", 5).filter_kind_eq("Goal")` can therefore
321            // return fewer than 5 Goal hits even when more exist. Fixing this
322            // requires overfetching from vec0 and re-ranking/re-limiting after
323            // the filter — explicitly out of scope for Phase 2 filter fusion.
324            // The FTS branch below does NOT share this asymmetry because its
325            // outer LIMIT wraps the post-filter SELECT.
326            let mut sql = format!(
327                "base_candidates AS (
328                    SELECT DISTINCT src.logical_id
329                    FROM (
330                        SELECT chunk_id FROM vec_nodes_active
331                        WHERE embedding MATCH ?1
332                        LIMIT {base_limit}
333                    ) vc
334                    JOIN chunks c ON c.id = vc.chunk_id
335                    JOIN nodes src ON src.logical_id = c.node_logical_id AND src.superseded_at IS NULL
336                    WHERE src.kind = ?2",
337            );
338            for predicate in &fusable_filters {
339                append_fusable_clause(&mut sql, &mut binds, "src", predicate)?;
340            }
341            sql.push_str("\n                )");
342            sql
343        }
344        DrivingTable::FtsNodes => {
345            let text_query = ast
346                .steps
347                .iter()
348                .find_map(|step| {
349                    if let QueryStep::TextSearch { query, .. } = step {
350                        Some(query)
351                    } else {
352                        None
353                    }
354                })
355                .unwrap_or_else(|| unreachable!("FtsNodes chosen but no TextSearch step in AST"));
356            // Render the typed text-query subset into safe FTS5 syntax. Only
357            // supported operators are emitted as control syntax; all literal
358            // terms and phrases remain quoted and escaped.
359            let rendered = render_text_query_fts5(text_query);
360            // Each FTS5 virtual table requires its own MATCH bind parameter;
361            // reusing indices across the UNION is not supported by SQLite.
362            binds.push(BindValue::Text(rendered.clone()));
363            binds.push(BindValue::Text(ast.root_kind.clone()));
364            binds.push(BindValue::Text(rendered));
365            binds.push(BindValue::Text(ast.root_kind.clone()));
366            // Wrap the chunk/property UNION in an outer SELECT that joins
367            // `nodes` once so fusable filters (kind/logical_id/source_ref/
368            // content_ref) can reference node columns directly, bringing them
369            // inside the CTE's LIMIT window.
370            let mut sql = String::from(
371                "base_candidates AS (
372                    SELECT DISTINCT n.logical_id
373                    FROM (
374                        SELECT src.logical_id
375                        FROM fts_nodes f
376                        JOIN chunks c ON c.id = f.chunk_id
377                        JOIN nodes src ON src.logical_id = c.node_logical_id AND src.superseded_at IS NULL
378                        WHERE fts_nodes MATCH ?1
379                          AND src.kind = ?2
380                        UNION
381                        SELECT fp.node_logical_id AS logical_id
382                        FROM fts_node_properties fp
383                        JOIN nodes src ON src.logical_id = fp.node_logical_id AND src.superseded_at IS NULL
384                        WHERE fts_node_properties MATCH ?3
385                          AND fp.kind = ?4
386                    ) u
387                    JOIN nodes n ON n.logical_id = u.logical_id AND n.superseded_at IS NULL
388                    WHERE 1 = 1",
389            );
390            for predicate in &fusable_filters {
391                append_fusable_clause(&mut sql, &mut binds, "n", predicate)?;
392            }
393            let _ = write!(
394                &mut sql,
395                "\n                    LIMIT {base_limit}\n                )"
396            );
397            sql
398        }
399        DrivingTable::Nodes => {
400            binds.push(BindValue::Text(ast.root_kind.clone()));
401            let mut sql = "base_candidates AS (
402                    SELECT DISTINCT src.logical_id
403                    FROM nodes src
404                    WHERE src.superseded_at IS NULL
405                      AND src.kind = ?1"
406                .to_owned();
407            // Push filter predicates into base_candidates so the LIMIT applies
408            // after filtering, not before. Without this, the CTE may truncate
409            // the candidate set before property/source_ref filters run, causing
410            // nodes that satisfy the filter to be excluded from results.
411            for step in &ast.steps {
412                if let QueryStep::Filter(predicate) = step {
413                    match predicate {
414                        Predicate::LogicalIdEq(logical_id) => {
415                            binds.push(BindValue::Text(logical_id.clone()));
416                            let bind_index = binds.len();
417                            let _ = write!(
418                                &mut sql,
419                                "\n                      AND src.logical_id = ?{bind_index}"
420                            );
421                        }
422                        Predicate::JsonPathEq { path, value } => {
423                            validate_json_path(path)?;
424                            binds.push(BindValue::Text(path.clone()));
425                            let path_index = binds.len();
426                            binds.push(match value {
427                                ScalarValue::Text(text) => BindValue::Text(text.clone()),
428                                ScalarValue::Integer(integer) => BindValue::Integer(*integer),
429                                ScalarValue::Bool(boolean) => BindValue::Bool(*boolean),
430                            });
431                            let value_index = binds.len();
432                            let _ = write!(
433                                &mut sql,
434                                "\n                      AND json_extract(src.properties, ?{path_index}) = ?{value_index}"
435                            );
436                        }
437                        Predicate::JsonPathCompare { path, op, value } => {
438                            validate_json_path(path)?;
439                            binds.push(BindValue::Text(path.clone()));
440                            let path_index = binds.len();
441                            binds.push(match value {
442                                ScalarValue::Text(text) => BindValue::Text(text.clone()),
443                                ScalarValue::Integer(integer) => BindValue::Integer(*integer),
444                                ScalarValue::Bool(boolean) => BindValue::Bool(*boolean),
445                            });
446                            let value_index = binds.len();
447                            let operator = match op {
448                                ComparisonOp::Gt => ">",
449                                ComparisonOp::Gte => ">=",
450                                ComparisonOp::Lt => "<",
451                                ComparisonOp::Lte => "<=",
452                            };
453                            let _ = write!(
454                                &mut sql,
455                                "\n                      AND json_extract(src.properties, ?{path_index}) {operator} ?{value_index}"
456                            );
457                        }
458                        Predicate::SourceRefEq(source_ref) => {
459                            binds.push(BindValue::Text(source_ref.clone()));
460                            let bind_index = binds.len();
461                            let _ = write!(
462                                &mut sql,
463                                "\n                      AND src.source_ref = ?{bind_index}"
464                            );
465                        }
466                        Predicate::ContentRefNotNull => {
467                            let _ = write!(
468                                &mut sql,
469                                "\n                      AND src.content_ref IS NOT NULL"
470                            );
471                        }
472                        Predicate::ContentRefEq(uri) => {
473                            binds.push(BindValue::Text(uri.clone()));
474                            let bind_index = binds.len();
475                            let _ = write!(
476                                &mut sql,
477                                "\n                      AND src.content_ref = ?{bind_index}"
478                            );
479                        }
480                        Predicate::KindEq(_) => {
481                            // Already filtered by ast.root_kind above.
482                        }
483                        Predicate::JsonPathFusedEq { path, value } => {
484                            validate_json_path(path)?;
485                            binds.push(BindValue::Text(path.clone()));
486                            let path_index = binds.len();
487                            binds.push(BindValue::Text(value.clone()));
488                            let value_index = binds.len();
489                            let _ = write!(
490                                &mut sql,
491                                "\n                      AND json_extract(src.properties, ?{path_index}) = ?{value_index}"
492                            );
493                        }
494                        Predicate::JsonPathFusedTimestampCmp { path, op, value } => {
495                            validate_json_path(path)?;
496                            binds.push(BindValue::Text(path.clone()));
497                            let path_index = binds.len();
498                            binds.push(BindValue::Integer(*value));
499                            let value_index = binds.len();
500                            let operator = match op {
501                                ComparisonOp::Gt => ">",
502                                ComparisonOp::Gte => ">=",
503                                ComparisonOp::Lt => "<",
504                                ComparisonOp::Lte => "<=",
505                            };
506                            let _ = write!(
507                                &mut sql,
508                                "\n                      AND json_extract(src.properties, ?{path_index}) {operator} ?{value_index}"
509                            );
510                        }
511                    }
512                }
513            }
514            let _ = write!(
515                &mut sql,
516                "\n                    LIMIT {base_limit}\n                )"
517            );
518            sql
519        }
520    };
521
522    let mut sql = format!("WITH RECURSIVE\n{base_candidates}");
523    let source_alias = if traversal.is_some() { "t" } else { "bc" };
524
525    if let Some((direction, label, max_depth)) = traversal {
526        binds.push(BindValue::Text(label.to_owned()));
527        let label_index = binds.len();
528        let (join_condition, next_logical_id) = match direction {
529            TraverseDirection::Out => ("e.source_logical_id = t.logical_id", "e.target_logical_id"),
530            TraverseDirection::In => ("e.target_logical_id = t.logical_id", "e.source_logical_id"),
531        };
532
533        let _ = write!(
534            &mut sql,
535            ",
536traversed(logical_id, depth, visited) AS (
537    SELECT bc.logical_id, 0, printf(',%s,', bc.logical_id)
538    FROM base_candidates bc
539    UNION ALL
540    SELECT {next_logical_id}, t.depth + 1, t.visited || {next_logical_id} || ','
541    FROM traversed t
542    JOIN edges e ON {join_condition}
543        AND e.kind = ?{label_index}
544        AND e.superseded_at IS NULL
545    WHERE t.depth < {max_depth}
546      AND instr(t.visited, printf(',%s,', {next_logical_id})) = 0
547    LIMIT {}
548)",
549            hints.hard_limit
550        );
551    }
552
553    let _ = write!(
554        &mut sql,
555        "
556SELECT DISTINCT n.row_id, n.logical_id, n.kind, n.properties, n.content_ref
557FROM {} {source_alias}
558JOIN nodes n ON n.logical_id = {source_alias}.logical_id
559    AND n.superseded_at IS NULL
560WHERE 1 = 1",
561        if traversal.is_some() {
562            "traversed"
563        } else {
564            "base_candidates"
565        }
566    );
567
568    // Outer WHERE emission. The Nodes driving table pushes every filter
569    // into `base_candidates` already, so only `KindEq` (handled separately
570    // via `root_kind`) needs to be re-emitted outside — we iterate
571    // `ast.steps` to catch it. For the search-driven paths (FtsNodes,
572    // VecNodes) we iterate the `residual_filters` partition directly
573    // instead of re-classifying predicates via `is_fusable()`. This makes
574    // `partition_search_filters` the single source of truth for the
575    // fusable/residual split: adding a new fusable variant automatically
576    // drops it from the outer WHERE without a separate audit of this loop.
577    if driving_table == DrivingTable::Nodes {
578        for step in &ast.steps {
579            if let QueryStep::Filter(Predicate::KindEq(kind)) = step {
580                binds.push(BindValue::Text(kind.clone()));
581                let bind_index = binds.len();
582                let _ = write!(&mut sql, "\n  AND n.kind = ?{bind_index}");
583            }
584        }
585    } else {
586        for predicate in &residual_filters {
587            match predicate {
588                Predicate::JsonPathEq { path, value } => {
589                    validate_json_path(path)?;
590                    binds.push(BindValue::Text(path.clone()));
591                    let path_index = binds.len();
592                    binds.push(match value {
593                        ScalarValue::Text(text) => BindValue::Text(text.clone()),
594                        ScalarValue::Integer(integer) => BindValue::Integer(*integer),
595                        ScalarValue::Bool(boolean) => BindValue::Bool(*boolean),
596                    });
597                    let value_index = binds.len();
598                    let _ = write!(
599                        &mut sql,
600                        "\n  AND json_extract(n.properties, ?{path_index}) = ?{value_index}",
601                    );
602                }
603                Predicate::JsonPathCompare { path, op, value } => {
604                    validate_json_path(path)?;
605                    binds.push(BindValue::Text(path.clone()));
606                    let path_index = binds.len();
607                    binds.push(match value {
608                        ScalarValue::Text(text) => BindValue::Text(text.clone()),
609                        ScalarValue::Integer(integer) => BindValue::Integer(*integer),
610                        ScalarValue::Bool(boolean) => BindValue::Bool(*boolean),
611                    });
612                    let value_index = binds.len();
613                    let operator = match op {
614                        ComparisonOp::Gt => ">",
615                        ComparisonOp::Gte => ">=",
616                        ComparisonOp::Lt => "<",
617                        ComparisonOp::Lte => "<=",
618                    };
619                    let _ = write!(
620                        &mut sql,
621                        "\n  AND json_extract(n.properties, ?{path_index}) {operator} ?{value_index}",
622                    );
623                }
624                Predicate::KindEq(_)
625                | Predicate::LogicalIdEq(_)
626                | Predicate::SourceRefEq(_)
627                | Predicate::ContentRefEq(_)
628                | Predicate::ContentRefNotNull
629                | Predicate::JsonPathFusedEq { .. }
630                | Predicate::JsonPathFusedTimestampCmp { .. } => {
631                    // Fusable — already injected into base_candidates by
632                    // `partition_search_filters`.
633                }
634            }
635        }
636    }
637
638    let _ = write!(&mut sql, "\nLIMIT {final_limit}");
639
640    if binds.len() > MAX_BIND_PARAMETERS {
641        return Err(CompileError::TooManyBindParameters(binds.len()));
642    }
643
644    Ok(CompiledQuery {
645        sql,
646        binds,
647        shape_hash,
648        driving_table,
649        hints,
650    })
651}
652
653/// Compile a [`QueryAst`] into a [`CompiledGroupedQuery`] for grouped execution.
654///
655/// # Errors
656///
657/// Returns a [`CompileError`] if the AST exceeds expansion-slot limits,
658/// contains empty slot names, or specifies a traversal depth beyond the
659/// configured maximum.
660pub fn compile_grouped_query(ast: &QueryAst) -> Result<CompiledGroupedQuery, CompileError> {
661    if ast.expansions.len() > MAX_EXPANSION_SLOTS {
662        return Err(CompileError::TooManyExpansionSlots(ast.expansions.len()));
663    }
664
665    let mut seen = std::collections::BTreeSet::new();
666    for expansion in &ast.expansions {
667        if expansion.slot.trim().is_empty() {
668            return Err(CompileError::EmptyExpansionSlotName);
669        }
670        if expansion.max_depth > MAX_TRAVERSAL_DEPTH {
671            return Err(CompileError::TraversalTooDeep(expansion.max_depth));
672        }
673        if !seen.insert(expansion.slot.clone()) {
674            return Err(CompileError::DuplicateExpansionSlot(expansion.slot.clone()));
675        }
676    }
677
678    let mut root_ast = ast.clone();
679    root_ast.expansions.clear();
680    let root = compile_query(&root_ast)?;
681    let hints = execution_hints(ast);
682    let shape_hash = ShapeHash(hash_signature(&shape_signature(ast)));
683
684    Ok(CompiledGroupedQuery {
685        root,
686        expansions: ast.expansions.clone(),
687        shape_hash,
688        hints,
689    })
690}
691
692/// Compile a [`QueryAst`] into a [`CompiledSearch`] describing an adaptive
693/// text-search execution.
694///
695/// Unlike [`compile_query`], this path does not emit SQL directly: the
696/// coordinator owns the search SELECT so it can project the richer row shape
697/// (score, source, snippet, projection id) that flat queries do not need.
698///
699/// # Errors
700///
701/// Returns [`CompileError::MissingTextSearchStep`] if the AST contains no
702/// [`QueryStep::TextSearch`] step.
703pub fn compile_search(ast: &QueryAst) -> Result<CompiledSearch, CompileError> {
704    let mut text_query = None;
705    let mut limit = None;
706    for step in &ast.steps {
707        match step {
708            QueryStep::TextSearch {
709                query,
710                limit: step_limit,
711            } => {
712                text_query = Some(query.clone());
713                limit = Some(*step_limit);
714            }
715            QueryStep::Filter(_)
716            | QueryStep::Search { .. }
717            | QueryStep::VectorSearch { .. }
718            | QueryStep::Traverse { .. } => {
719                // Filter steps are partitioned below; Search/Vector/Traverse
720                // steps are not composable with text search in the adaptive
721                // surface yet.
722            }
723        }
724    }
725    let text_query = text_query.ok_or(CompileError::MissingTextSearchStep)?;
726    let limit = limit.unwrap_or(25);
727    let (fusable_filters, residual_filters) = partition_search_filters(&ast.steps);
728    Ok(CompiledSearch {
729        root_kind: ast.root_kind.clone(),
730        text_query,
731        limit,
732        fusable_filters,
733        residual_filters,
734        attribution_requested: false,
735    })
736}
737
738/// Compile a [`QueryAst`] into a [`CompiledSearchPlan`] whose strict branch
739/// is the user's [`TextQuery`] and whose relaxed branch is derived via
740/// [`derive_relaxed`].
741///
742/// Reserved for Phase 7 SDK bindings that will construct plans from typed
743/// AST fragments. The coordinator currently builds its adaptive plan
744/// directly inside `execute_compiled_search` from an already-compiled
745/// [`CompiledSearch`], so this helper has no in-tree caller; it is kept
746/// as a public entry point for forthcoming surface bindings.
747///
748/// # Errors
749/// Returns [`CompileError::MissingTextSearchStep`] if the AST contains no
750/// [`QueryStep::TextSearch`] step.
751#[doc(hidden)]
752pub fn compile_search_plan(ast: &QueryAst) -> Result<CompiledSearchPlan, CompileError> {
753    let strict = compile_search(ast)?;
754    let (relaxed_query, was_degraded_at_plan_time) = derive_relaxed(&strict.text_query);
755    let relaxed = relaxed_query.map(|q| CompiledSearch {
756        root_kind: strict.root_kind.clone(),
757        text_query: q,
758        limit: strict.limit,
759        fusable_filters: strict.fusable_filters.clone(),
760        residual_filters: strict.residual_filters.clone(),
761        attribution_requested: strict.attribution_requested,
762    });
763    Ok(CompiledSearchPlan {
764        strict,
765        relaxed,
766        was_degraded_at_plan_time,
767    })
768}
769
770/// Compile a caller-provided strict/relaxed [`TextQuery`] pair into a
771/// [`CompiledSearchPlan`] against a [`QueryAst`] that supplies the kind
772/// root, filters, and limit.
773///
774/// This is the two-query entry point used by `Engine::fallback_search`. The
775/// caller's relaxed [`TextQuery`] is used verbatim — it is NOT passed through
776/// [`derive_relaxed`], and the 4-alternative
777/// [`crate::RELAXED_BRANCH_CAP`] is NOT applied. As a result
778/// [`CompiledSearchPlan::was_degraded_at_plan_time`] is always `false` on
779/// this path.
780///
781/// The AST supplies:
782///  - `root_kind` — reused for both branches
783///  - filter steps — partitioned once via [`partition_search_filters`] and
784///    shared unchanged across both branches
785///  - `limit` from the text-search step (or the default used by
786///    [`compile_search`]) when present; if the AST has no `TextSearch` step,
787///    the caller-supplied `limit` is used
788///
789/// Any `TextSearch` step already on the AST is IGNORED — `strict` and
790/// `relaxed` come from the caller. `Vector`/`Traverse` steps are also
791/// ignored for symmetry with [`compile_search`].
792///
793/// # Errors
794/// Returns [`CompileError`] if filter partitioning produces an unsupported
795/// shape (currently none; reserved for forward compatibility).
796pub fn compile_search_plan_from_queries(
797    ast: &QueryAst,
798    strict: TextQuery,
799    relaxed: Option<TextQuery>,
800    limit: usize,
801    attribution_requested: bool,
802) -> Result<CompiledSearchPlan, CompileError> {
803    let (fusable_filters, residual_filters) = partition_search_filters(&ast.steps);
804    let strict_compiled = CompiledSearch {
805        root_kind: ast.root_kind.clone(),
806        text_query: strict,
807        limit,
808        fusable_filters: fusable_filters.clone(),
809        residual_filters: residual_filters.clone(),
810        attribution_requested,
811    };
812    let relaxed_compiled = relaxed.map(|q| CompiledSearch {
813        root_kind: ast.root_kind.clone(),
814        text_query: q,
815        limit,
816        fusable_filters,
817        residual_filters,
818        attribution_requested,
819    });
820    Ok(CompiledSearchPlan {
821        strict: strict_compiled,
822        relaxed: relaxed_compiled,
823        was_degraded_at_plan_time: false,
824    })
825}
826
827/// Compile a [`QueryAst`] into a [`CompiledVectorSearch`] describing a
828/// vector-only retrieval execution.
829///
830/// Mirrors [`compile_search`] structurally. The AST must contain exactly one
831/// [`QueryStep::VectorSearch`] step; filters following the search step are
832/// partitioned by [`partition_search_filters`] into fusable and residual
833/// sets. Unlike [`compile_search`] this path does not produce a
834/// [`TextQuery`]; the caller's raw query string is preserved verbatim for
835/// the coordinator to bind to `embedding MATCH ?`.
836///
837/// # Errors
838///
839/// Returns [`CompileError::MissingVectorSearchStep`] if the AST contains no
840/// [`QueryStep::VectorSearch`] step.
841pub fn compile_vector_search(ast: &QueryAst) -> Result<CompiledVectorSearch, CompileError> {
842    let mut query_text = None;
843    let mut limit = None;
844    for step in &ast.steps {
845        match step {
846            QueryStep::VectorSearch {
847                query,
848                limit: step_limit,
849            } => {
850                query_text = Some(query.clone());
851                limit = Some(*step_limit);
852            }
853            QueryStep::Filter(_)
854            | QueryStep::Search { .. }
855            | QueryStep::TextSearch { .. }
856            | QueryStep::Traverse { .. } => {
857                // Filter steps are partitioned below; Search/TextSearch/
858                // Traverse steps are not composable with vector search in
859                // the standalone vector retrieval path.
860            }
861        }
862    }
863    let query_text = query_text.ok_or(CompileError::MissingVectorSearchStep)?;
864    let limit = limit.unwrap_or(25);
865    let (fusable_filters, residual_filters) = partition_search_filters(&ast.steps);
866    Ok(CompiledVectorSearch {
867        root_kind: ast.root_kind.clone(),
868        query_text,
869        limit,
870        fusable_filters,
871        residual_filters,
872        attribution_requested: false,
873    })
874}
875
876/// Compile a [`QueryAst`] containing a [`QueryStep::Search`] into a
877/// [`CompiledRetrievalPlan`] describing the bounded set of retrieval branches
878/// the Phase 12 planner may run.
879///
880/// The raw query string carried by the `Search` step is parsed into a
881/// strict [`TextQuery`] (via [`TextQuery::parse`]) and a relaxed sibling is
882/// derived via [`derive_relaxed`]. Both branches share the post-search
883/// fusable/residual filter partition. The resulting
884/// [`CompiledRetrievalPlan::text`] field carries them in the same Phase 6
885/// [`CompiledSearchPlan`] shape as `text_search()` / `fallback_search()`.
886///
887/// **v1 scope**: `vector` is unconditionally `None`. Read-time embedding of
888/// natural-language queries is not wired in v1; see
889/// [`CompiledRetrievalPlan`] for the rationale and the future-phase plan.
890/// Callers who need vector retrieval today must use the `vector_search()`
891/// override directly with a caller-provided vector literal.
892///
893/// # Errors
894///
895/// Returns [`CompileError::MissingSearchStep`] if the AST contains no
896/// [`QueryStep::Search`] step, or
897/// [`CompileError::MultipleSearchSteps`] if the AST contains more than one.
898pub fn compile_retrieval_plan(ast: &QueryAst) -> Result<CompiledRetrievalPlan, CompileError> {
899    let mut raw_query: Option<&str> = None;
900    let mut limit: Option<usize> = None;
901    for step in &ast.steps {
902        if let QueryStep::Search {
903            query,
904            limit: step_limit,
905        } = step
906        {
907            if raw_query.is_some() {
908                return Err(CompileError::MultipleSearchSteps);
909            }
910            raw_query = Some(query.as_str());
911            limit = Some(*step_limit);
912        }
913    }
914    let raw_query = raw_query.ok_or(CompileError::MissingSearchStep)?;
915    let limit = limit.unwrap_or(25);
916
917    let strict_text_query = TextQuery::parse(raw_query);
918    let (relaxed_text_query, was_degraded_at_plan_time) = derive_relaxed(&strict_text_query);
919
920    let (fusable_filters, residual_filters) = partition_search_filters(&ast.steps);
921
922    let strict = CompiledSearch {
923        root_kind: ast.root_kind.clone(),
924        text_query: strict_text_query,
925        limit,
926        fusable_filters: fusable_filters.clone(),
927        residual_filters: residual_filters.clone(),
928        attribution_requested: false,
929    };
930    let relaxed = relaxed_text_query.map(|q| CompiledSearch {
931        root_kind: ast.root_kind.clone(),
932        text_query: q,
933        limit,
934        fusable_filters,
935        residual_filters,
936        attribution_requested: false,
937    });
938    let text = CompiledSearchPlan {
939        strict,
940        relaxed,
941        was_degraded_at_plan_time,
942    };
943
944    // v1 scope (Phase 12): the planner's vector branch slot is structurally
945    // present on `CompiledRetrievalPlan` so the coordinator's three-block
946    // fusion path is fully wired, but read-time embedding of natural-language
947    // queries is deliberately deferred to a future phase. `compile_retrieval_plan`
948    // therefore always leaves `vector = None`; callers who want vector
949    // retrieval today must use `vector_search()` directly with a caller-
950    // provided vector literal.
951    Ok(CompiledRetrievalPlan {
952        text,
953        vector: None,
954        was_degraded_at_plan_time,
955    })
956}
957
958/// FNV-1a 64-bit hash — deterministic across Rust versions and program
959/// invocations, unlike `DefaultHasher`.
960fn hash_signature(signature: &str) -> u64 {
961    const OFFSET: u64 = 0xcbf2_9ce4_8422_2325;
962    const PRIME: u64 = 0x0000_0100_0000_01b3;
963    let mut hash = OFFSET;
964    for byte in signature.bytes() {
965        hash ^= u64::from(byte);
966        hash = hash.wrapping_mul(PRIME);
967    }
968    hash
969}
970
971#[cfg(test)]
972#[allow(clippy::expect_used, clippy::items_after_statements)]
973mod tests {
974    use rstest::rstest;
975
976    use crate::{
977        CompileError, DrivingTable, QueryBuilder, TraverseDirection, compile_grouped_query,
978        compile_query,
979    };
980
981    #[test]
982    fn vector_query_compiles_to_chunk_resolution() {
983        let compiled = compile_query(
984            &QueryBuilder::nodes("Meeting")
985                .vector_search("budget", 5)
986                .limit(5)
987                .into_ast(),
988        )
989        .expect("compiled query");
990
991        assert_eq!(compiled.driving_table, DrivingTable::VecNodes);
992        assert!(compiled.sql.contains("JOIN chunks c ON c.id = vc.chunk_id"));
993        assert!(
994            compiled
995                .sql
996                .contains("JOIN nodes src ON src.logical_id = c.node_logical_id")
997        );
998    }
999
1000    #[rstest]
1001    #[case(5, 7)]
1002    #[case(3, 11)]
1003    fn structural_limits_change_shape_hash(#[case] left: usize, #[case] right: usize) {
1004        let left_compiled = compile_query(
1005            &QueryBuilder::nodes("Meeting")
1006                .text_search("budget", left)
1007                .limit(left)
1008                .into_ast(),
1009        )
1010        .expect("left query");
1011        let right_compiled = compile_query(
1012            &QueryBuilder::nodes("Meeting")
1013                .text_search("budget", right)
1014                .limit(right)
1015                .into_ast(),
1016        )
1017        .expect("right query");
1018
1019        assert_ne!(left_compiled.shape_hash, right_compiled.shape_hash);
1020    }
1021
1022    #[test]
1023    fn traversal_query_is_depth_bounded() {
1024        let compiled = compile_query(
1025            &QueryBuilder::nodes("Meeting")
1026                .text_search("budget", 5)
1027                .traverse(TraverseDirection::Out, "HAS_TASK", 3)
1028                .limit(10)
1029                .into_ast(),
1030        )
1031        .expect("compiled traversal");
1032
1033        assert!(compiled.sql.contains("WITH RECURSIVE"));
1034        assert!(compiled.sql.contains("WHERE t.depth < 3"));
1035    }
1036
1037    #[test]
1038    fn text_search_compiles_to_union_over_chunk_and_property_fts() {
1039        let compiled = compile_query(
1040            &QueryBuilder::nodes("Meeting")
1041                .text_search("budget", 25)
1042                .limit(25)
1043                .into_ast(),
1044        )
1045        .expect("compiled text search");
1046
1047        assert_eq!(compiled.driving_table, DrivingTable::FtsNodes);
1048        // Must contain UNION of both FTS tables.
1049        assert!(
1050            compiled.sql.contains("fts_nodes MATCH"),
1051            "must search chunk-backed FTS"
1052        );
1053        assert!(
1054            compiled.sql.contains("fts_node_properties MATCH"),
1055            "must search property-backed FTS"
1056        );
1057        assert!(compiled.sql.contains("UNION"), "must UNION both sources");
1058        // Must have 4 bind parameters: sanitized query + kind for each table.
1059        assert_eq!(compiled.binds.len(), 4);
1060    }
1061
1062    #[test]
1063    fn logical_id_filter_is_compiled() {
1064        let compiled = compile_query(
1065            &QueryBuilder::nodes("Meeting")
1066                .filter_logical_id_eq("meeting-123")
1067                .filter_json_text_eq("$.status", "active")
1068                .limit(1)
1069                .into_ast(),
1070        )
1071        .expect("compiled query");
1072
1073        // LogicalIdEq is applied in base_candidates (src alias) for the Nodes driver,
1074        // NOT duplicated in the final WHERE. The JOIN condition still contains
1075        // "n.logical_id =" which satisfies this check.
1076        assert!(compiled.sql.contains("n.logical_id ="));
1077        assert!(compiled.sql.contains("src.logical_id ="));
1078        assert!(compiled.sql.contains("json_extract"));
1079        // Only one bind for the logical_id (not two).
1080        use crate::BindValue;
1081        assert_eq!(
1082            compiled
1083                .binds
1084                .iter()
1085                .filter(|b| matches!(b, BindValue::Text(s) if s == "meeting-123"))
1086                .count(),
1087            1
1088        );
1089    }
1090
1091    #[test]
1092    fn compile_rejects_invalid_json_path() {
1093        use crate::{Predicate, QueryStep, ScalarValue};
1094        let mut ast = QueryBuilder::nodes("Meeting").into_ast();
1095        // Attempt SQL injection via JSON path.
1096        ast.steps.push(QueryStep::Filter(Predicate::JsonPathEq {
1097            path: "$') OR 1=1 --".to_owned(),
1098            value: ScalarValue::Text("x".to_owned()),
1099        }));
1100        use crate::CompileError;
1101        let result = compile_query(&ast);
1102        assert!(
1103            matches!(result, Err(CompileError::InvalidJsonPath(_))),
1104            "expected InvalidJsonPath, got {result:?}"
1105        );
1106    }
1107
1108    #[test]
1109    fn compile_accepts_valid_json_paths() {
1110        use crate::{Predicate, QueryStep, ScalarValue};
1111        for valid_path in ["$.status", "$.foo.bar", "$.a_b.c2"] {
1112            let mut ast = QueryBuilder::nodes("Meeting").into_ast();
1113            ast.steps.push(QueryStep::Filter(Predicate::JsonPathEq {
1114                path: valid_path.to_owned(),
1115                value: ScalarValue::Text("v".to_owned()),
1116            }));
1117            assert!(
1118                compile_query(&ast).is_ok(),
1119                "expected valid path {valid_path:?} to compile"
1120            );
1121        }
1122    }
1123
1124    #[test]
1125    fn compile_rejects_too_many_bind_parameters() {
1126        use crate::{Predicate, QueryStep, ScalarValue};
1127        let mut ast = QueryBuilder::nodes("Meeting").into_ast();
1128        // kind occupies 1 bind; each json filter now occupies 2 binds (path + value).
1129        // 7 json filters → 1 + 14 = 15 (ok), 8 → 1 + 16 = 17 (exceeds limit of 15).
1130        for i in 0..8 {
1131            ast.steps.push(QueryStep::Filter(Predicate::JsonPathEq {
1132                path: format!("$.f{i}"),
1133                value: ScalarValue::Text("v".to_owned()),
1134            }));
1135        }
1136        use crate::CompileError;
1137        let result = compile_query(&ast);
1138        assert!(
1139            matches!(result, Err(CompileError::TooManyBindParameters(17))),
1140            "expected TooManyBindParameters(17), got {result:?}"
1141        );
1142    }
1143
1144    #[test]
1145    fn compile_rejects_excessive_traversal_depth() {
1146        let result = compile_query(
1147            &QueryBuilder::nodes("Meeting")
1148                .text_search("budget", 5)
1149                .traverse(TraverseDirection::Out, "HAS_TASK", 51)
1150                .limit(10)
1151                .into_ast(),
1152        );
1153        assert!(
1154            matches!(result, Err(CompileError::TraversalTooDeep(51))),
1155            "expected TraversalTooDeep(51), got {result:?}"
1156        );
1157    }
1158
1159    #[test]
1160    fn grouped_queries_with_same_structure_share_shape_hash() {
1161        let left = compile_grouped_query(
1162            &QueryBuilder::nodes("Meeting")
1163                .text_search("budget", 5)
1164                .expand("tasks", TraverseDirection::Out, "HAS_TASK", 1)
1165                .limit(10)
1166                .into_ast(),
1167        )
1168        .expect("left grouped query");
1169        let right = compile_grouped_query(
1170            &QueryBuilder::nodes("Meeting")
1171                .text_search("planning", 5)
1172                .expand("tasks", TraverseDirection::Out, "HAS_TASK", 1)
1173                .limit(10)
1174                .into_ast(),
1175        )
1176        .expect("right grouped query");
1177
1178        assert_eq!(left.shape_hash, right.shape_hash);
1179    }
1180
1181    #[test]
1182    fn compile_grouped_rejects_duplicate_expansion_slot_names() {
1183        let result = compile_grouped_query(
1184            &QueryBuilder::nodes("Meeting")
1185                .expand("tasks", TraverseDirection::Out, "HAS_TASK", 1)
1186                .expand("tasks", TraverseDirection::Out, "HAS_DECISION", 1)
1187                .into_ast(),
1188        );
1189
1190        assert!(
1191            matches!(result, Err(CompileError::DuplicateExpansionSlot(ref slot)) if slot == "tasks"),
1192            "expected DuplicateExpansionSlot(\"tasks\"), got {result:?}"
1193        );
1194    }
1195
1196    #[test]
1197    fn flat_compile_rejects_queries_with_expansions() {
1198        let result = compile_query(
1199            &QueryBuilder::nodes("Meeting")
1200                .expand("tasks", TraverseDirection::Out, "HAS_TASK", 1)
1201                .into_ast(),
1202        );
1203
1204        assert!(
1205            matches!(
1206                result,
1207                Err(CompileError::FlatCompileDoesNotSupportExpansions)
1208            ),
1209            "expected FlatCompileDoesNotSupportExpansions, got {result:?}"
1210        );
1211    }
1212
1213    #[test]
1214    fn json_path_compiled_as_bind_parameter() {
1215        let compiled = compile_query(
1216            &QueryBuilder::nodes("Meeting")
1217                .filter_json_text_eq("$.status", "active")
1218                .limit(1)
1219                .into_ast(),
1220        )
1221        .expect("compiled query");
1222
1223        // Path must be parameterized, not interpolated into the SQL string.
1224        assert!(
1225            !compiled.sql.contains("'$.status'"),
1226            "JSON path must not appear as a SQL string literal"
1227        );
1228        assert!(
1229            compiled.sql.contains("json_extract(src.properties, ?"),
1230            "JSON path must be a bind parameter (pushed into base_candidates for Nodes driver)"
1231        );
1232        // Path and value should both be in the bind list.
1233        use crate::BindValue;
1234        assert!(
1235            compiled
1236                .binds
1237                .iter()
1238                .any(|b| matches!(b, BindValue::Text(s) if s == "$.status"))
1239        );
1240        assert!(
1241            compiled
1242                .binds
1243                .iter()
1244                .any(|b| matches!(b, BindValue::Text(s) if s == "active"))
1245        );
1246    }
1247
1248    // --- Filter pushdown regression tests ---
1249    //
1250    // These tests verify that filter predicates are pushed into the
1251    // base_candidates CTE for the Nodes driving table, so the CTE LIMIT
1252    // applies after filtering rather than before.  Without pushdown, the
1253    // LIMIT may truncate the candidate set before the filter runs, causing
1254    // matching nodes to be silently excluded.
1255
1256    #[test]
1257    fn nodes_driver_pushes_json_eq_filter_into_base_candidates() {
1258        let compiled = compile_query(
1259            &QueryBuilder::nodes("Meeting")
1260                .filter_json_text_eq("$.status", "active")
1261                .limit(5)
1262                .into_ast(),
1263        )
1264        .expect("compiled query");
1265
1266        assert_eq!(compiled.driving_table, DrivingTable::Nodes);
1267        // Filter must appear inside base_candidates (src alias), not the
1268        // outer WHERE (n alias).
1269        assert!(
1270            compiled.sql.contains("json_extract(src.properties, ?"),
1271            "json_extract must reference src (base_candidates), got:\n{}",
1272            compiled.sql,
1273        );
1274        assert!(
1275            !compiled.sql.contains("json_extract(n.properties, ?"),
1276            "json_extract must NOT appear in outer WHERE for Nodes driver, got:\n{}",
1277            compiled.sql,
1278        );
1279    }
1280
1281    #[test]
1282    fn nodes_driver_pushes_json_compare_filter_into_base_candidates() {
1283        let compiled = compile_query(
1284            &QueryBuilder::nodes("Meeting")
1285                .filter_json_integer_gte("$.priority", 5)
1286                .limit(10)
1287                .into_ast(),
1288        )
1289        .expect("compiled query");
1290
1291        assert_eq!(compiled.driving_table, DrivingTable::Nodes);
1292        assert!(
1293            compiled.sql.contains("json_extract(src.properties, ?"),
1294            "comparison filter must be in base_candidates, got:\n{}",
1295            compiled.sql,
1296        );
1297        assert!(
1298            !compiled.sql.contains("json_extract(n.properties, ?"),
1299            "comparison filter must NOT be in outer WHERE for Nodes driver",
1300        );
1301        assert!(
1302            compiled.sql.contains(">= ?"),
1303            "expected >= operator in SQL, got:\n{}",
1304            compiled.sql,
1305        );
1306    }
1307
1308    #[test]
1309    fn nodes_driver_pushes_source_ref_filter_into_base_candidates() {
1310        let compiled = compile_query(
1311            &QueryBuilder::nodes("Meeting")
1312                .filter_source_ref_eq("ref-123")
1313                .limit(5)
1314                .into_ast(),
1315        )
1316        .expect("compiled query");
1317
1318        assert_eq!(compiled.driving_table, DrivingTable::Nodes);
1319        assert!(
1320            compiled.sql.contains("src.source_ref = ?"),
1321            "source_ref filter must be in base_candidates, got:\n{}",
1322            compiled.sql,
1323        );
1324        assert!(
1325            !compiled.sql.contains("n.source_ref = ?"),
1326            "source_ref filter must NOT be in outer WHERE for Nodes driver",
1327        );
1328    }
1329
1330    #[test]
1331    fn nodes_driver_pushes_multiple_filters_into_base_candidates() {
1332        let compiled = compile_query(
1333            &QueryBuilder::nodes("Meeting")
1334                .filter_logical_id_eq("meeting-1")
1335                .filter_json_text_eq("$.status", "active")
1336                .filter_json_integer_gte("$.priority", 5)
1337                .filter_source_ref_eq("ref-abc")
1338                .limit(1)
1339                .into_ast(),
1340        )
1341        .expect("compiled query");
1342
1343        assert_eq!(compiled.driving_table, DrivingTable::Nodes);
1344        // All filters should be in base_candidates, none in outer WHERE
1345        assert!(
1346            compiled.sql.contains("src.logical_id = ?"),
1347            "logical_id filter must be in base_candidates",
1348        );
1349        assert!(
1350            compiled.sql.contains("json_extract(src.properties, ?"),
1351            "JSON filters must be in base_candidates",
1352        );
1353        assert!(
1354            compiled.sql.contains("src.source_ref = ?"),
1355            "source_ref filter must be in base_candidates",
1356        );
1357        // Each bind value should appear exactly once (not duplicated in outer WHERE)
1358        use crate::BindValue;
1359        assert_eq!(
1360            compiled
1361                .binds
1362                .iter()
1363                .filter(|b| matches!(b, BindValue::Text(s) if s == "meeting-1"))
1364                .count(),
1365            1,
1366            "logical_id bind must not be duplicated"
1367        );
1368        assert_eq!(
1369            compiled
1370                .binds
1371                .iter()
1372                .filter(|b| matches!(b, BindValue::Text(s) if s == "ref-abc"))
1373                .count(),
1374            1,
1375            "source_ref bind must not be duplicated"
1376        );
1377    }
1378
1379    #[test]
1380    fn fts_driver_keeps_json_filter_residual_but_fuses_kind() {
1381        // Phase 2: JSON filters are residual (stay in outer WHERE); KindEq is
1382        // fusable (pushed into base_candidates so the CTE LIMIT applies after
1383        // filtering).
1384        let compiled = compile_query(
1385            &QueryBuilder::nodes("Meeting")
1386                .text_search("budget", 5)
1387                .filter_json_text_eq("$.status", "active")
1388                .filter_kind_eq("Meeting")
1389                .limit(5)
1390                .into_ast(),
1391        )
1392        .expect("compiled query");
1393
1394        assert_eq!(compiled.driving_table, DrivingTable::FtsNodes);
1395        // Residual: JSON predicate stays in outer WHERE on n.properties.
1396        assert!(
1397            compiled.sql.contains("json_extract(n.properties, ?"),
1398            "JSON filter must stay residual in outer WHERE, got:\n{}",
1399            compiled.sql,
1400        );
1401        // Fusable: the second n.kind bind should live inside base_candidates.
1402        // The CTE block ends before the final SELECT.
1403        let (cte, outer) = compiled
1404            .sql
1405            .split_once("SELECT DISTINCT n.row_id")
1406            .expect("query has final SELECT");
1407        assert!(
1408            cte.contains("AND n.kind = ?"),
1409            "KindEq must be fused inside base_candidates CTE, got CTE:\n{cte}"
1410        );
1411        // Outer WHERE must not contain a duplicate n.kind filter.
1412        assert!(
1413            !outer.contains("AND n.kind = ?"),
1414            "KindEq must NOT appear in outer WHERE for FTS driver, got outer:\n{outer}"
1415        );
1416    }
1417
1418    #[test]
1419    fn fts_driver_fuses_kind_filter() {
1420        let compiled = compile_query(
1421            &QueryBuilder::nodes("Goal")
1422                .text_search("budget", 5)
1423                .filter_kind_eq("Goal")
1424                .limit(5)
1425                .into_ast(),
1426        )
1427        .expect("compiled query");
1428
1429        assert_eq!(compiled.driving_table, DrivingTable::FtsNodes);
1430        let (cte, outer) = compiled
1431            .sql
1432            .split_once("SELECT DISTINCT n.row_id")
1433            .expect("query has final SELECT");
1434        assert!(
1435            cte.contains("AND n.kind = ?"),
1436            "KindEq must be fused inside base_candidates, got:\n{cte}"
1437        );
1438        assert!(
1439            !outer.contains("AND n.kind = ?"),
1440            "KindEq must NOT be in outer WHERE, got:\n{outer}"
1441        );
1442    }
1443
1444    #[test]
1445    fn vec_driver_fuses_kind_filter() {
1446        let compiled = compile_query(
1447            &QueryBuilder::nodes("Goal")
1448                .vector_search("budget", 5)
1449                .filter_kind_eq("Goal")
1450                .limit(5)
1451                .into_ast(),
1452        )
1453        .expect("compiled query");
1454
1455        assert_eq!(compiled.driving_table, DrivingTable::VecNodes);
1456        let (cte, outer) = compiled
1457            .sql
1458            .split_once("SELECT DISTINCT n.row_id")
1459            .expect("query has final SELECT");
1460        assert!(
1461            cte.contains("AND src.kind = ?"),
1462            "KindEq must be fused inside base_candidates, got:\n{cte}"
1463        );
1464        assert!(
1465            !outer.contains("AND n.kind = ?"),
1466            "KindEq must NOT be in outer WHERE, got:\n{outer}"
1467        );
1468    }
1469
1470    #[test]
1471    fn fts5_query_bind_uses_rendered_literals() {
1472        let compiled = compile_query(
1473            &QueryBuilder::nodes("Meeting")
1474                .text_search("User's name", 5)
1475                .limit(5)
1476                .into_ast(),
1477        )
1478        .expect("compiled query");
1479
1480        use crate::BindValue;
1481        assert!(
1482            compiled
1483                .binds
1484                .iter()
1485                .any(|b| matches!(b, BindValue::Text(s) if s == "\"User's\" \"name\"")),
1486            "FTS5 query bind should use rendered literal terms; got {:?}",
1487            compiled.binds
1488        );
1489    }
1490
1491    #[test]
1492    fn fts5_query_bind_supports_or_operator() {
1493        let compiled = compile_query(
1494            &QueryBuilder::nodes("Meeting")
1495                .text_search("ship OR docs", 5)
1496                .limit(5)
1497                .into_ast(),
1498        )
1499        .expect("compiled query");
1500
1501        use crate::BindValue;
1502        assert!(
1503            compiled
1504                .binds
1505                .iter()
1506                .any(|b| matches!(b, BindValue::Text(s) if s == "\"ship\" OR \"docs\"")),
1507            "FTS5 query bind should preserve supported OR; got {:?}",
1508            compiled.binds
1509        );
1510    }
1511
1512    #[test]
1513    fn fts5_query_bind_supports_not_operator() {
1514        let compiled = compile_query(
1515            &QueryBuilder::nodes("Meeting")
1516                .text_search("ship NOT blocked", 5)
1517                .limit(5)
1518                .into_ast(),
1519        )
1520        .expect("compiled query");
1521
1522        use crate::BindValue;
1523        assert!(
1524            compiled
1525                .binds
1526                .iter()
1527                .any(|b| matches!(b, BindValue::Text(s) if s == "\"ship\" NOT \"blocked\"")),
1528            "FTS5 query bind should preserve supported NOT; got {:?}",
1529            compiled.binds
1530        );
1531    }
1532
1533    #[test]
1534    fn fts5_query_bind_literalizes_clause_leading_not() {
1535        let compiled = compile_query(
1536            &QueryBuilder::nodes("Meeting")
1537                .text_search("NOT blocked", 5)
1538                .limit(5)
1539                .into_ast(),
1540        )
1541        .expect("compiled query");
1542
1543        use crate::BindValue;
1544        assert!(
1545            compiled
1546                .binds
1547                .iter()
1548                .any(|b| matches!(b, BindValue::Text(s) if s == "\"NOT\" \"blocked\"")),
1549            "Clause-leading NOT should degrade to literals; got {:?}",
1550            compiled.binds
1551        );
1552    }
1553
1554    #[test]
1555    fn fts5_query_bind_literalizes_or_not_sequence() {
1556        let compiled = compile_query(
1557            &QueryBuilder::nodes("Meeting")
1558                .text_search("ship OR NOT blocked", 5)
1559                .limit(5)
1560                .into_ast(),
1561        )
1562        .expect("compiled query");
1563
1564        use crate::BindValue;
1565        assert!(
1566            compiled.binds.iter().any(
1567                |b| matches!(b, BindValue::Text(s) if s == "\"ship\" \"OR\" \"NOT\" \"blocked\"")
1568            ),
1569            "`OR NOT` should degrade to literals rather than emit invalid FTS5; got {:?}",
1570            compiled.binds
1571        );
1572    }
1573
1574    #[test]
1575    fn compile_retrieval_plan_accepts_search_step() {
1576        use crate::{
1577            CompileError, Predicate, QueryAst, QueryStep, TextQuery, compile_retrieval_plan,
1578        };
1579        let ast = QueryAst {
1580            root_kind: "Goal".to_owned(),
1581            steps: vec![
1582                QueryStep::Search {
1583                    query: "ship quarterly docs".to_owned(),
1584                    limit: 7,
1585                },
1586                QueryStep::Filter(Predicate::KindEq("Goal".to_owned())),
1587            ],
1588            expansions: vec![],
1589            final_limit: None,
1590        };
1591        let plan = compile_retrieval_plan(&ast).expect("compiles");
1592        assert_eq!(plan.text.strict.root_kind, "Goal");
1593        assert_eq!(plan.text.strict.limit, 7);
1594        // Filter following the Search step must land in the fusable bucket.
1595        assert_eq!(plan.text.strict.fusable_filters.len(), 1);
1596        assert!(plan.text.strict.residual_filters.is_empty());
1597        // Strict text query is the parsed form of the raw string; "ship
1598        // quarterly docs" parses to an implicit AND of three terms.
1599        assert_eq!(
1600            plan.text.strict.text_query,
1601            TextQuery::And(vec![
1602                TextQuery::Term("ship".into()),
1603                TextQuery::Term("quarterly".into()),
1604                TextQuery::Term("docs".into()),
1605            ])
1606        );
1607        // Three-term implicit-AND has a useful relaxation: per-term OR.
1608        let relaxed = plan.text.relaxed.as_ref().expect("relaxed branch present");
1609        assert_eq!(
1610            relaxed.text_query,
1611            TextQuery::Or(vec![
1612                TextQuery::Term("ship".into()),
1613                TextQuery::Term("quarterly".into()),
1614                TextQuery::Term("docs".into()),
1615            ])
1616        );
1617        assert_eq!(relaxed.fusable_filters.len(), 1);
1618        assert!(!plan.was_degraded_at_plan_time);
1619        // CompileError unused in the success path.
1620        let _ = std::any::TypeId::of::<CompileError>();
1621    }
1622
1623    #[test]
1624    fn compile_retrieval_plan_rejects_ast_without_search_step() {
1625        use crate::{CompileError, QueryBuilder, compile_retrieval_plan};
1626        let ast = QueryBuilder::nodes("Goal")
1627            .filter_kind_eq("Goal")
1628            .into_ast();
1629        let result = compile_retrieval_plan(&ast);
1630        assert!(
1631            matches!(result, Err(CompileError::MissingSearchStep)),
1632            "expected MissingSearchStep, got {result:?}"
1633        );
1634    }
1635
1636    #[test]
1637    fn compile_retrieval_plan_rejects_ast_with_multiple_search_steps() {
1638        // P12-N-1: the compiler must not silently last-wins when the caller
1639        // hands it an AST with two `QueryStep::Search` entries. Instead it
1640        // must return an explicit `MultipleSearchSteps` error so the
1641        // mis-shaped AST is surfaced at plan time.
1642        use crate::{CompileError, QueryAst, QueryStep, compile_retrieval_plan};
1643        let ast = QueryAst {
1644            root_kind: "Goal".to_owned(),
1645            steps: vec![
1646                QueryStep::Search {
1647                    query: "alpha".to_owned(),
1648                    limit: 5,
1649                },
1650                QueryStep::Search {
1651                    query: "bravo".to_owned(),
1652                    limit: 10,
1653                },
1654            ],
1655            expansions: vec![],
1656            final_limit: None,
1657        };
1658        let result = compile_retrieval_plan(&ast);
1659        assert!(
1660            matches!(result, Err(CompileError::MultipleSearchSteps)),
1661            "expected MultipleSearchSteps, got {result:?}"
1662        );
1663    }
1664
1665    #[test]
1666    fn compile_retrieval_plan_v1_always_leaves_vector_empty() {
1667        // Phase 12 v1 scope: regardless of the query shape, the unified
1668        // planner never wires a vector branch into the compiled plan
1669        // because read-time embedding of natural-language queries is not
1670        // implemented in v1. Pin the constraint so a future phase that
1671        // wires the embedding generator must explicitly relax this test.
1672        use crate::{QueryAst, QueryStep, compile_retrieval_plan};
1673        for query in ["ship quarterly docs", "single", "", "   "] {
1674            let ast = QueryAst {
1675                root_kind: "Goal".to_owned(),
1676                steps: vec![QueryStep::Search {
1677                    query: query.to_owned(),
1678                    limit: 10,
1679                }],
1680                expansions: vec![],
1681                final_limit: None,
1682            };
1683            let plan = compile_retrieval_plan(&ast).expect("compiles");
1684            assert!(
1685                plan.vector.is_none(),
1686                "Phase 12 v1 must always leave the vector branch empty (query = {query:?})"
1687            );
1688        }
1689    }
1690
1691    #[test]
1692    fn fused_json_text_eq_pushes_into_search_cte_inner_where() {
1693        // Item 7 contract: a fused JSON text-eq predicate on a text search
1694        // is pushed into the `base_candidates` CTE inner WHERE clause so the
1695        // CTE LIMIT applies *after* the filter runs. Compare to
1696        // `filter_json_text_eq` which lands in the outer WHERE as residual.
1697        let mut ast = QueryBuilder::nodes("Goal")
1698            .text_search("budget", 5)
1699            .into_ast();
1700        ast.steps.push(crate::QueryStep::Filter(
1701            crate::Predicate::JsonPathFusedEq {
1702                path: "$.status".to_owned(),
1703                value: "active".to_owned(),
1704            },
1705        ));
1706        let compiled = compile_query(&ast).expect("compile");
1707
1708        // Inner CTE WHERE (under the `n` alias on the chunk/property UNION).
1709        assert!(
1710            compiled.sql.contains("AND json_extract(n.properties, ?"),
1711            "fused json text-eq must land on n.properties inside the CTE; got {}",
1712            compiled.sql
1713        );
1714        // It must NOT also appear in the outer `h.properties` / flat
1715        // projection WHERE — the fusable partition removes it.
1716        assert!(
1717            !compiled.sql.contains("h.properties"),
1718            "sql should not mention h.properties (only compiled_search uses that alias)"
1719        );
1720    }
1721
1722    #[test]
1723    fn fused_json_timestamp_cmp_emits_each_operator() {
1724        for (op, op_str) in [
1725            (crate::ComparisonOp::Gt, ">"),
1726            (crate::ComparisonOp::Gte, ">="),
1727            (crate::ComparisonOp::Lt, "<"),
1728            (crate::ComparisonOp::Lte, "<="),
1729        ] {
1730            let mut ast = QueryBuilder::nodes("Goal")
1731                .text_search("budget", 5)
1732                .into_ast();
1733            ast.steps.push(crate::QueryStep::Filter(
1734                crate::Predicate::JsonPathFusedTimestampCmp {
1735                    path: "$.written_at".to_owned(),
1736                    op,
1737                    value: 1_700_000_000,
1738                },
1739            ));
1740            let compiled = compile_query(&ast).expect("compile");
1741            let needle = "json_extract(n.properties, ?";
1742            assert!(
1743                compiled.sql.contains(needle) && compiled.sql.contains(op_str),
1744                "operator {op_str} must appear in emitted SQL for fused timestamp cmp"
1745            );
1746        }
1747    }
1748
1749    #[test]
1750    fn non_fused_json_filters_still_emit_outer_where() {
1751        // Regression guard: the existing non-fused filter_json_* family
1752        // is unchanged — its predicates continue to be classified as
1753        // residual on search-driven paths and emitted against the outer
1754        // `n.properties` WHERE clause (which is textually identical to
1755        // the inner CTE emission; the difference is *where* in the SQL
1756        // it lives).
1757        let compiled = compile_query(
1758            &QueryBuilder::nodes("Goal")
1759                .text_search("budget", 5)
1760                .filter_json_text_eq("$.status", "active")
1761                .into_ast(),
1762        )
1763        .expect("compile");
1764
1765        // The residual emission lives in the outer SELECT's WHERE and
1766        // targets `n.properties`. Fusion would instead prefix the line
1767        // with `                          AND` (26 spaces) inside the
1768        // CTE. We assert the residual form here by checking the
1769        // leading whitespace on the emitted clause matches the outer
1770        // WHERE indentation ("\n  AND ") rather than the CTE one.
1771        assert!(
1772            compiled
1773                .sql
1774                .contains("\n  AND json_extract(n.properties, ?"),
1775            "non-fused filter_json_text_eq must emit into outer WHERE, got {}",
1776            compiled.sql
1777        );
1778    }
1779
1780    #[test]
1781    fn fused_json_text_eq_pushes_into_vector_cte_inner_where() {
1782        // Mirror of the text-search case for the vector driving path:
1783        // the fused JSON text-eq predicate must land inside the
1784        // `base_candidates` CTE aliased to `src`.
1785        let mut ast = QueryBuilder::nodes("Goal")
1786            .vector_search("budget", 5)
1787            .into_ast();
1788        ast.steps.push(crate::QueryStep::Filter(
1789            crate::Predicate::JsonPathFusedEq {
1790                path: "$.status".to_owned(),
1791                value: "active".to_owned(),
1792            },
1793        ));
1794        let compiled = compile_query(&ast).expect("compile");
1795        assert_eq!(compiled.driving_table, DrivingTable::VecNodes);
1796        assert!(
1797            compiled.sql.contains("AND json_extract(src.properties, ?"),
1798            "fused json text-eq on vector path must land on src.properties, got {}",
1799            compiled.sql
1800        );
1801    }
1802
1803    #[test]
1804    fn fts5_query_bind_preserves_lowercase_not_as_literal_text() {
1805        let compiled = compile_query(
1806            &QueryBuilder::nodes("Meeting")
1807                .text_search("not a ship", 5)
1808                .limit(5)
1809                .into_ast(),
1810        )
1811        .expect("compiled query");
1812
1813        use crate::BindValue;
1814        assert!(
1815            compiled
1816                .binds
1817                .iter()
1818                .any(|b| matches!(b, BindValue::Text(s) if s == "\"not\" \"a\" \"ship\"")),
1819            "Lowercase not should remain a literal term sequence; got {:?}",
1820            compiled.binds
1821        );
1822    }
1823}