Skip to main content

fathomdb_query/
compile.rs

1use std::fmt::Write;
2
3use crate::fusion::partition_search_filters;
4use crate::plan::{choose_driving_table, execution_hints, shape_signature};
5use crate::search::{
6    CompiledRetrievalPlan, CompiledSearch, CompiledSearchPlan, CompiledVectorSearch,
7};
8use crate::{
9    ComparisonOp, DrivingTable, ExpansionSlot, Predicate, QueryAst, QueryStep, ScalarValue,
10    TextQuery, TraverseDirection, derive_relaxed, render_text_query_fts5,
11};
12
13/// A typed bind value for a compiled SQL query parameter.
14#[derive(Clone, Debug, PartialEq, Eq)]
15pub enum BindValue {
16    /// A UTF-8 text parameter.
17    Text(String),
18    /// A 64-bit signed integer parameter.
19    Integer(i64),
20    /// A boolean parameter.
21    Bool(bool),
22}
23
24/// A deterministic hash of a query's structural shape, independent of bind values.
25#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
26pub struct ShapeHash(pub u64);
27
28/// A fully compiled query ready for execution against `SQLite`.
29#[derive(Clone, Debug, PartialEq, Eq)]
30pub struct CompiledQuery {
31    /// The generated SQL text.
32    pub sql: String,
33    /// Positional bind parameters for the SQL.
34    pub binds: Vec<BindValue>,
35    /// Structural shape hash for caching.
36    pub shape_hash: ShapeHash,
37    /// The driving table chosen by the query planner.
38    pub driving_table: DrivingTable,
39    /// Execution hints derived from the query shape.
40    pub hints: crate::ExecutionHints,
41}
42
43/// A compiled grouped query containing a root query and expansion slots.
44#[derive(Clone, Debug, PartialEq, Eq)]
45pub struct CompiledGroupedQuery {
46    /// The root flat query.
47    pub root: CompiledQuery,
48    /// Expansion slots to evaluate per root result.
49    pub expansions: Vec<ExpansionSlot>,
50    /// Structural shape hash covering the root query and all expansion slots.
51    pub shape_hash: ShapeHash,
52    /// Execution hints derived from the grouped query shape.
53    pub hints: crate::ExecutionHints,
54}
55
56/// Errors that can occur during query compilation.
57#[derive(Clone, Debug, PartialEq, Eq, thiserror::Error)]
58pub enum CompileError {
59    #[error("multiple traversal steps are not supported in v1")]
60    TooManyTraversals,
61    #[error("flat query compilation does not support expansions; use compile_grouped")]
62    FlatCompileDoesNotSupportExpansions,
63    #[error("duplicate expansion slot name: {0}")]
64    DuplicateExpansionSlot(String),
65    #[error("expansion slot name must be non-empty")]
66    EmptyExpansionSlotName,
67    #[error("too many expansion slots: max {MAX_EXPANSION_SLOTS}, got {0}")]
68    TooManyExpansionSlots(usize),
69    #[error("too many bind parameters: max 15, got {0}")]
70    TooManyBindParameters(usize),
71    #[error("traversal depth {0} exceeds maximum of {MAX_TRAVERSAL_DEPTH}")]
72    TraversalTooDeep(usize),
73    #[error("invalid JSON path: must match $(.key)+ pattern, got {0:?}")]
74    InvalidJsonPath(String),
75    #[error("compile_search requires exactly one TextSearch step in the AST")]
76    MissingTextSearchStep,
77    #[error("compile_vector_search requires exactly one VectorSearch step in the AST")]
78    MissingVectorSearchStep,
79    #[error("compile_retrieval_plan requires exactly one Search step in the AST")]
80    MissingSearchStep,
81    #[error("compile_retrieval_plan requires exactly one Search step in the AST, found multiple")]
82    MultipleSearchSteps,
83}
84
85/// Security fix H-1: Validate JSON path against a strict allowlist pattern to
86/// prevent SQL injection. Retained as defense-in-depth even though the path is
87/// now parameterized (see `FIX(review)` in `compile_query`). Only paths like
88/// `$.foo`, `$.foo.bar_baz` are allowed.
89fn validate_json_path(path: &str) -> Result<(), CompileError> {
90    let valid = path.starts_with('$')
91        && path.len() > 1
92        && path[1..].split('.').all(|segment| {
93            segment.is_empty()
94                || segment
95                    .chars()
96                    .all(|c| c.is_ascii_alphanumeric() || c == '_')
97                    && !segment.is_empty()
98        })
99        && path.contains('.');
100    if !valid {
101        return Err(CompileError::InvalidJsonPath(path.to_owned()));
102    }
103    Ok(())
104}
105
106/// Append a fusable predicate as an `AND` clause referencing `alias`.
107///
108/// Only the fusable variants (those that can be evaluated against columns on
109/// the `nodes` table join inside a search CTE) are supported — callers must
110/// pre-partition predicates via
111/// [`crate::fusion::partition_search_filters`]. Residual predicates panic via
112/// `unreachable!`.
113fn append_fusable_clause(
114    sql: &mut String,
115    binds: &mut Vec<BindValue>,
116    alias: &str,
117    predicate: &Predicate,
118) {
119    match predicate {
120        Predicate::KindEq(kind) => {
121            binds.push(BindValue::Text(kind.clone()));
122            let idx = binds.len();
123            let _ = write!(sql, "\n                          AND {alias}.kind = ?{idx}");
124        }
125        Predicate::LogicalIdEq(logical_id) => {
126            binds.push(BindValue::Text(logical_id.clone()));
127            let idx = binds.len();
128            let _ = write!(
129                sql,
130                "\n                          AND {alias}.logical_id = ?{idx}"
131            );
132        }
133        Predicate::SourceRefEq(source_ref) => {
134            binds.push(BindValue::Text(source_ref.clone()));
135            let idx = binds.len();
136            let _ = write!(
137                sql,
138                "\n                          AND {alias}.source_ref = ?{idx}"
139            );
140        }
141        Predicate::ContentRefEq(uri) => {
142            binds.push(BindValue::Text(uri.clone()));
143            let idx = binds.len();
144            let _ = write!(
145                sql,
146                "\n                          AND {alias}.content_ref = ?{idx}"
147            );
148        }
149        Predicate::ContentRefNotNull => {
150            let _ = write!(
151                sql,
152                "\n                          AND {alias}.content_ref IS NOT NULL"
153            );
154        }
155        Predicate::JsonPathEq { .. } | Predicate::JsonPathCompare { .. } => {
156            unreachable!("append_fusable_clause received a residual predicate");
157        }
158    }
159}
160
161const MAX_BIND_PARAMETERS: usize = 15;
162const MAX_EXPANSION_SLOTS: usize = 8;
163
164// FIX(review): max_depth was unbounded — usize::MAX produces an effectively infinite CTE.
165// Options: (A) silent clamp at compile, (B) reject with CompileError, (C) validate in builder.
166// Chose (B): consistent with existing TooManyTraversals/TooManyBindParameters pattern.
167// The compiler is the validation boundary; silent clamping would surprise callers.
168const MAX_TRAVERSAL_DEPTH: usize = 50;
169
170/// Compile a [`QueryAst`] into a [`CompiledQuery`] ready for execution.
171///
172/// # Compilation strategy
173///
174/// The compiled SQL is structured as a `WITH RECURSIVE` CTE named
175/// `base_candidates` followed by a final `SELECT ... JOIN nodes` projection.
176///
177/// For the **Nodes** driving table (no FTS/vector search), all filter
178/// predicates (`LogicalIdEq`, `JsonPathEq`, `JsonPathCompare`,
179/// `SourceRefEq`) are pushed into the `base_candidates` CTE so that the
180/// CTE's `LIMIT` applies *after* filtering. Without this pushdown the LIMIT
181/// would truncate the candidate set before property filters run, silently
182/// excluding nodes whose properties satisfy the filter but whose insertion
183/// order falls outside the limit window.
184///
185/// For **FTS** and **vector** driving tables, fusable predicates
186/// (`KindEq`, `LogicalIdEq`, `SourceRefEq`, `ContentRefEq`,
187/// `ContentRefNotNull`) are pushed into the `base_candidates` CTE so that
188/// the CTE's `LIMIT` applies *after* filtering; residual predicates
189/// (`JsonPathEq`, `JsonPathCompare`) remain in the outer `WHERE` because
190/// they require `json_extract` on the outer `nodes.properties` column.
191///
192/// # Errors
193///
194/// Returns [`CompileError::TooManyTraversals`] if more than one traversal step
195/// is present, or [`CompileError::TooManyBindParameters`] if the resulting SQL
196/// would require more than 15 bind parameters.
197///
198/// # Panics
199///
200/// Panics (via `unreachable!`) if the AST is internally inconsistent — for
201/// example, if `choose_driving_table` selects `VecNodes` but no
202/// `VectorSearch` step is present in the AST. This cannot happen through the
203/// public [`QueryBuilder`] API.
204#[allow(clippy::too_many_lines)]
205pub fn compile_query(ast: &QueryAst) -> Result<CompiledQuery, CompileError> {
206    if !ast.expansions.is_empty() {
207        return Err(CompileError::FlatCompileDoesNotSupportExpansions);
208    }
209
210    let traversals = ast
211        .steps
212        .iter()
213        .filter(|step| matches!(step, QueryStep::Traverse { .. }))
214        .count();
215    if traversals > 1 {
216        return Err(CompileError::TooManyTraversals);
217    }
218
219    let excessive_depth = ast.steps.iter().find_map(|step| {
220        if let QueryStep::Traverse { max_depth, .. } = step
221            && *max_depth > MAX_TRAVERSAL_DEPTH
222        {
223            return Some(*max_depth);
224        }
225        None
226    });
227    if let Some(depth) = excessive_depth {
228        return Err(CompileError::TraversalTooDeep(depth));
229    }
230
231    let driving_table = choose_driving_table(ast);
232    let hints = execution_hints(ast);
233    let shape_hash = ShapeHash(hash_signature(&shape_signature(ast)));
234
235    let base_limit = ast
236        .steps
237        .iter()
238        .find_map(|step| match step {
239            QueryStep::VectorSearch { limit, .. } | QueryStep::TextSearch { limit, .. } => {
240                Some(*limit)
241            }
242            _ => None,
243        })
244        .or(ast.final_limit)
245        .unwrap_or(25);
246
247    let final_limit = ast.final_limit.unwrap_or(base_limit);
248    let traversal = ast.steps.iter().find_map(|step| {
249        if let QueryStep::Traverse {
250            direction,
251            label,
252            max_depth,
253        } = step
254        {
255            Some((*direction, label.as_str(), *max_depth))
256        } else {
257            None
258        }
259    });
260
261    // Partition Filter predicates for the search-driven paths into fusable
262    // (injected into the search CTE's WHERE) and residual (left in the outer
263    // WHERE) sets. The Nodes path pushes *every* predicate into the CTE
264    // directly and ignores this partition.
265    let (fusable_filters, residual_filters) = partition_search_filters(&ast.steps);
266
267    let mut binds = Vec::new();
268    let base_candidates = match driving_table {
269        DrivingTable::VecNodes => {
270            let query = ast
271                .steps
272                .iter()
273                .find_map(|step| {
274                    if let QueryStep::VectorSearch { query, .. } = step {
275                        Some(query.as_str())
276                    } else {
277                        None
278                    }
279                })
280                .unwrap_or_else(|| unreachable!("VecNodes chosen but no VectorSearch step in AST"));
281            binds.push(BindValue::Text(query.to_owned()));
282            binds.push(BindValue::Text(ast.root_kind.clone()));
283            // sqlite-vec requires the LIMIT/k constraint to be visible directly on the
284            // vec0 KNN scan. Using a sub-select isolates the vec0 LIMIT so the join
285            // with chunks/nodes does not prevent the query planner from recognising it.
286            //
287            // ASYMMETRY (known gap, P2-3): the inner `LIMIT {base_limit}` runs
288            // BEFORE the fusable-filter `WHERE` below, so fused predicates on
289            // `src` (e.g. `kind_eq`) filter a candidate pool that has already
290            // been narrowed to `base_limit` KNN neighbours. A
291            // `vector_search("x", 5).filter_kind_eq("Goal")` can therefore
292            // return fewer than 5 Goal hits even when more exist. Fixing this
293            // requires overfetching from vec0 and re-ranking/re-limiting after
294            // the filter — explicitly out of scope for Phase 2 filter fusion.
295            // The FTS branch below does NOT share this asymmetry because its
296            // outer LIMIT wraps the post-filter SELECT.
297            let mut sql = format!(
298                "base_candidates AS (
299                    SELECT DISTINCT src.logical_id
300                    FROM (
301                        SELECT chunk_id FROM vec_nodes_active
302                        WHERE embedding MATCH ?1
303                        LIMIT {base_limit}
304                    ) vc
305                    JOIN chunks c ON c.id = vc.chunk_id
306                    JOIN nodes src ON src.logical_id = c.node_logical_id AND src.superseded_at IS NULL
307                    WHERE src.kind = ?2",
308            );
309            for predicate in &fusable_filters {
310                append_fusable_clause(&mut sql, &mut binds, "src", predicate);
311            }
312            sql.push_str("\n                )");
313            sql
314        }
315        DrivingTable::FtsNodes => {
316            let text_query = ast
317                .steps
318                .iter()
319                .find_map(|step| {
320                    if let QueryStep::TextSearch { query, .. } = step {
321                        Some(query)
322                    } else {
323                        None
324                    }
325                })
326                .unwrap_or_else(|| unreachable!("FtsNodes chosen but no TextSearch step in AST"));
327            // Render the typed text-query subset into safe FTS5 syntax. Only
328            // supported operators are emitted as control syntax; all literal
329            // terms and phrases remain quoted and escaped.
330            let rendered = render_text_query_fts5(text_query);
331            // Each FTS5 virtual table requires its own MATCH bind parameter;
332            // reusing indices across the UNION is not supported by SQLite.
333            binds.push(BindValue::Text(rendered.clone()));
334            binds.push(BindValue::Text(ast.root_kind.clone()));
335            binds.push(BindValue::Text(rendered));
336            binds.push(BindValue::Text(ast.root_kind.clone()));
337            // Wrap the chunk/property UNION in an outer SELECT that joins
338            // `nodes` once so fusable filters (kind/logical_id/source_ref/
339            // content_ref) can reference node columns directly, bringing them
340            // inside the CTE's LIMIT window.
341            let mut sql = String::from(
342                "base_candidates AS (
343                    SELECT DISTINCT n.logical_id
344                    FROM (
345                        SELECT src.logical_id
346                        FROM fts_nodes f
347                        JOIN chunks c ON c.id = f.chunk_id
348                        JOIN nodes src ON src.logical_id = c.node_logical_id AND src.superseded_at IS NULL
349                        WHERE fts_nodes MATCH ?1
350                          AND src.kind = ?2
351                        UNION
352                        SELECT fp.node_logical_id AS logical_id
353                        FROM fts_node_properties fp
354                        JOIN nodes src ON src.logical_id = fp.node_logical_id AND src.superseded_at IS NULL
355                        WHERE fts_node_properties MATCH ?3
356                          AND fp.kind = ?4
357                    ) u
358                    JOIN nodes n ON n.logical_id = u.logical_id AND n.superseded_at IS NULL
359                    WHERE 1 = 1",
360            );
361            for predicate in &fusable_filters {
362                append_fusable_clause(&mut sql, &mut binds, "n", predicate);
363            }
364            let _ = write!(
365                &mut sql,
366                "\n                    LIMIT {base_limit}\n                )"
367            );
368            sql
369        }
370        DrivingTable::Nodes => {
371            binds.push(BindValue::Text(ast.root_kind.clone()));
372            let mut sql = "base_candidates AS (
373                    SELECT DISTINCT src.logical_id
374                    FROM nodes src
375                    WHERE src.superseded_at IS NULL
376                      AND src.kind = ?1"
377                .to_owned();
378            // Push filter predicates into base_candidates so the LIMIT applies
379            // after filtering, not before. Without this, the CTE may truncate
380            // the candidate set before property/source_ref filters run, causing
381            // nodes that satisfy the filter to be excluded from results.
382            for step in &ast.steps {
383                if let QueryStep::Filter(predicate) = step {
384                    match predicate {
385                        Predicate::LogicalIdEq(logical_id) => {
386                            binds.push(BindValue::Text(logical_id.clone()));
387                            let bind_index = binds.len();
388                            let _ = write!(
389                                &mut sql,
390                                "\n                      AND src.logical_id = ?{bind_index}"
391                            );
392                        }
393                        Predicate::JsonPathEq { path, value } => {
394                            validate_json_path(path)?;
395                            binds.push(BindValue::Text(path.clone()));
396                            let path_index = binds.len();
397                            binds.push(match value {
398                                ScalarValue::Text(text) => BindValue::Text(text.clone()),
399                                ScalarValue::Integer(integer) => BindValue::Integer(*integer),
400                                ScalarValue::Bool(boolean) => BindValue::Bool(*boolean),
401                            });
402                            let value_index = binds.len();
403                            let _ = write!(
404                                &mut sql,
405                                "\n                      AND json_extract(src.properties, ?{path_index}) = ?{value_index}"
406                            );
407                        }
408                        Predicate::JsonPathCompare { path, op, value } => {
409                            validate_json_path(path)?;
410                            binds.push(BindValue::Text(path.clone()));
411                            let path_index = binds.len();
412                            binds.push(match value {
413                                ScalarValue::Text(text) => BindValue::Text(text.clone()),
414                                ScalarValue::Integer(integer) => BindValue::Integer(*integer),
415                                ScalarValue::Bool(boolean) => BindValue::Bool(*boolean),
416                            });
417                            let value_index = binds.len();
418                            let operator = match op {
419                                ComparisonOp::Gt => ">",
420                                ComparisonOp::Gte => ">=",
421                                ComparisonOp::Lt => "<",
422                                ComparisonOp::Lte => "<=",
423                            };
424                            let _ = write!(
425                                &mut sql,
426                                "\n                      AND json_extract(src.properties, ?{path_index}) {operator} ?{value_index}"
427                            );
428                        }
429                        Predicate::SourceRefEq(source_ref) => {
430                            binds.push(BindValue::Text(source_ref.clone()));
431                            let bind_index = binds.len();
432                            let _ = write!(
433                                &mut sql,
434                                "\n                      AND src.source_ref = ?{bind_index}"
435                            );
436                        }
437                        Predicate::ContentRefNotNull => {
438                            let _ = write!(
439                                &mut sql,
440                                "\n                      AND src.content_ref IS NOT NULL"
441                            );
442                        }
443                        Predicate::ContentRefEq(uri) => {
444                            binds.push(BindValue::Text(uri.clone()));
445                            let bind_index = binds.len();
446                            let _ = write!(
447                                &mut sql,
448                                "\n                      AND src.content_ref = ?{bind_index}"
449                            );
450                        }
451                        Predicate::KindEq(_) => {
452                            // Already filtered by ast.root_kind above.
453                        }
454                    }
455                }
456            }
457            let _ = write!(
458                &mut sql,
459                "\n                    LIMIT {base_limit}\n                )"
460            );
461            sql
462        }
463    };
464
465    let mut sql = format!("WITH RECURSIVE\n{base_candidates}");
466    let source_alias = if traversal.is_some() { "t" } else { "bc" };
467
468    if let Some((direction, label, max_depth)) = traversal {
469        binds.push(BindValue::Text(label.to_owned()));
470        let label_index = binds.len();
471        let (join_condition, next_logical_id) = match direction {
472            TraverseDirection::Out => ("e.source_logical_id = t.logical_id", "e.target_logical_id"),
473            TraverseDirection::In => ("e.target_logical_id = t.logical_id", "e.source_logical_id"),
474        };
475
476        let _ = write!(
477            &mut sql,
478            ",
479traversed(logical_id, depth, visited) AS (
480    SELECT bc.logical_id, 0, printf(',%s,', bc.logical_id)
481    FROM base_candidates bc
482    UNION ALL
483    SELECT {next_logical_id}, t.depth + 1, t.visited || {next_logical_id} || ','
484    FROM traversed t
485    JOIN edges e ON {join_condition}
486        AND e.kind = ?{label_index}
487        AND e.superseded_at IS NULL
488    WHERE t.depth < {max_depth}
489      AND instr(t.visited, printf(',%s,', {next_logical_id})) = 0
490    LIMIT {}
491)",
492            hints.hard_limit
493        );
494    }
495
496    let _ = write!(
497        &mut sql,
498        "
499SELECT DISTINCT n.row_id, n.logical_id, n.kind, n.properties, n.content_ref
500FROM {} {source_alias}
501JOIN nodes n ON n.logical_id = {source_alias}.logical_id
502    AND n.superseded_at IS NULL
503WHERE 1 = 1",
504        if traversal.is_some() {
505            "traversed"
506        } else {
507            "base_candidates"
508        }
509    );
510
511    // Outer WHERE emission. The Nodes driving table pushes every filter
512    // into `base_candidates` already, so only `KindEq` (handled separately
513    // via `root_kind`) needs to be re-emitted outside — we iterate
514    // `ast.steps` to catch it. For the search-driven paths (FtsNodes,
515    // VecNodes) we iterate the `residual_filters` partition directly
516    // instead of re-classifying predicates via `is_fusable()`. This makes
517    // `partition_search_filters` the single source of truth for the
518    // fusable/residual split: adding a new fusable variant automatically
519    // drops it from the outer WHERE without a separate audit of this loop.
520    if driving_table == DrivingTable::Nodes {
521        for step in &ast.steps {
522            if let QueryStep::Filter(Predicate::KindEq(kind)) = step {
523                binds.push(BindValue::Text(kind.clone()));
524                let bind_index = binds.len();
525                let _ = write!(&mut sql, "\n  AND n.kind = ?{bind_index}");
526            }
527        }
528    } else {
529        for predicate in &residual_filters {
530            match predicate {
531                Predicate::JsonPathEq { path, value } => {
532                    validate_json_path(path)?;
533                    binds.push(BindValue::Text(path.clone()));
534                    let path_index = binds.len();
535                    binds.push(match value {
536                        ScalarValue::Text(text) => BindValue::Text(text.clone()),
537                        ScalarValue::Integer(integer) => BindValue::Integer(*integer),
538                        ScalarValue::Bool(boolean) => BindValue::Bool(*boolean),
539                    });
540                    let value_index = binds.len();
541                    let _ = write!(
542                        &mut sql,
543                        "\n  AND json_extract(n.properties, ?{path_index}) = ?{value_index}",
544                    );
545                }
546                Predicate::JsonPathCompare { path, op, value } => {
547                    validate_json_path(path)?;
548                    binds.push(BindValue::Text(path.clone()));
549                    let path_index = binds.len();
550                    binds.push(match value {
551                        ScalarValue::Text(text) => BindValue::Text(text.clone()),
552                        ScalarValue::Integer(integer) => BindValue::Integer(*integer),
553                        ScalarValue::Bool(boolean) => BindValue::Bool(*boolean),
554                    });
555                    let value_index = binds.len();
556                    let operator = match op {
557                        ComparisonOp::Gt => ">",
558                        ComparisonOp::Gte => ">=",
559                        ComparisonOp::Lt => "<",
560                        ComparisonOp::Lte => "<=",
561                    };
562                    let _ = write!(
563                        &mut sql,
564                        "\n  AND json_extract(n.properties, ?{path_index}) {operator} ?{value_index}",
565                    );
566                }
567                Predicate::KindEq(_)
568                | Predicate::LogicalIdEq(_)
569                | Predicate::SourceRefEq(_)
570                | Predicate::ContentRefEq(_)
571                | Predicate::ContentRefNotNull => {
572                    // Fusable — already injected into base_candidates by
573                    // `partition_search_filters`.
574                }
575            }
576        }
577    }
578
579    let _ = write!(&mut sql, "\nLIMIT {final_limit}");
580
581    if binds.len() > MAX_BIND_PARAMETERS {
582        return Err(CompileError::TooManyBindParameters(binds.len()));
583    }
584
585    Ok(CompiledQuery {
586        sql,
587        binds,
588        shape_hash,
589        driving_table,
590        hints,
591    })
592}
593
594/// Compile a [`QueryAst`] into a [`CompiledGroupedQuery`] for grouped execution.
595///
596/// # Errors
597///
598/// Returns a [`CompileError`] if the AST exceeds expansion-slot limits,
599/// contains empty slot names, or specifies a traversal depth beyond the
600/// configured maximum.
601pub fn compile_grouped_query(ast: &QueryAst) -> Result<CompiledGroupedQuery, CompileError> {
602    if ast.expansions.len() > MAX_EXPANSION_SLOTS {
603        return Err(CompileError::TooManyExpansionSlots(ast.expansions.len()));
604    }
605
606    let mut seen = std::collections::BTreeSet::new();
607    for expansion in &ast.expansions {
608        if expansion.slot.trim().is_empty() {
609            return Err(CompileError::EmptyExpansionSlotName);
610        }
611        if expansion.max_depth > MAX_TRAVERSAL_DEPTH {
612            return Err(CompileError::TraversalTooDeep(expansion.max_depth));
613        }
614        if !seen.insert(expansion.slot.clone()) {
615            return Err(CompileError::DuplicateExpansionSlot(expansion.slot.clone()));
616        }
617    }
618
619    let mut root_ast = ast.clone();
620    root_ast.expansions.clear();
621    let root = compile_query(&root_ast)?;
622    let hints = execution_hints(ast);
623    let shape_hash = ShapeHash(hash_signature(&shape_signature(ast)));
624
625    Ok(CompiledGroupedQuery {
626        root,
627        expansions: ast.expansions.clone(),
628        shape_hash,
629        hints,
630    })
631}
632
633/// Compile a [`QueryAst`] into a [`CompiledSearch`] describing an adaptive
634/// text-search execution.
635///
636/// Unlike [`compile_query`], this path does not emit SQL directly: the
637/// coordinator owns the search SELECT so it can project the richer row shape
638/// (score, source, snippet, projection id) that flat queries do not need.
639///
640/// # Errors
641///
642/// Returns [`CompileError::MissingTextSearchStep`] if the AST contains no
643/// [`QueryStep::TextSearch`] step.
644pub fn compile_search(ast: &QueryAst) -> Result<CompiledSearch, CompileError> {
645    let mut text_query = None;
646    let mut limit = None;
647    for step in &ast.steps {
648        match step {
649            QueryStep::TextSearch {
650                query,
651                limit: step_limit,
652            } => {
653                text_query = Some(query.clone());
654                limit = Some(*step_limit);
655            }
656            QueryStep::Filter(_)
657            | QueryStep::Search { .. }
658            | QueryStep::VectorSearch { .. }
659            | QueryStep::Traverse { .. } => {
660                // Filter steps are partitioned below; Search/Vector/Traverse
661                // steps are not composable with text search in the adaptive
662                // surface yet.
663            }
664        }
665    }
666    let text_query = text_query.ok_or(CompileError::MissingTextSearchStep)?;
667    let limit = limit.unwrap_or(25);
668    let (fusable_filters, residual_filters) = partition_search_filters(&ast.steps);
669    Ok(CompiledSearch {
670        root_kind: ast.root_kind.clone(),
671        text_query,
672        limit,
673        fusable_filters,
674        residual_filters,
675        attribution_requested: false,
676    })
677}
678
679/// Compile a [`QueryAst`] into a [`CompiledSearchPlan`] whose strict branch
680/// is the user's [`TextQuery`] and whose relaxed branch is derived via
681/// [`derive_relaxed`].
682///
683/// Reserved for Phase 7 SDK bindings that will construct plans from typed
684/// AST fragments. The coordinator currently builds its adaptive plan
685/// directly inside `execute_compiled_search` from an already-compiled
686/// [`CompiledSearch`], so this helper has no in-tree caller; it is kept
687/// as a public entry point for forthcoming surface bindings.
688///
689/// # Errors
690/// Returns [`CompileError::MissingTextSearchStep`] if the AST contains no
691/// [`QueryStep::TextSearch`] step.
692#[doc(hidden)]
693pub fn compile_search_plan(ast: &QueryAst) -> Result<CompiledSearchPlan, CompileError> {
694    let strict = compile_search(ast)?;
695    let (relaxed_query, was_degraded_at_plan_time) = derive_relaxed(&strict.text_query);
696    let relaxed = relaxed_query.map(|q| CompiledSearch {
697        root_kind: strict.root_kind.clone(),
698        text_query: q,
699        limit: strict.limit,
700        fusable_filters: strict.fusable_filters.clone(),
701        residual_filters: strict.residual_filters.clone(),
702        attribution_requested: strict.attribution_requested,
703    });
704    Ok(CompiledSearchPlan {
705        strict,
706        relaxed,
707        was_degraded_at_plan_time,
708    })
709}
710
711/// Compile a caller-provided strict/relaxed [`TextQuery`] pair into a
712/// [`CompiledSearchPlan`] against a [`QueryAst`] that supplies the kind
713/// root, filters, and limit.
714///
715/// This is the two-query entry point used by `Engine::fallback_search`. The
716/// caller's relaxed [`TextQuery`] is used verbatim — it is NOT passed through
717/// [`derive_relaxed`], and the 4-alternative
718/// [`crate::RELAXED_BRANCH_CAP`] is NOT applied. As a result
719/// [`CompiledSearchPlan::was_degraded_at_plan_time`] is always `false` on
720/// this path.
721///
722/// The AST supplies:
723///  - `root_kind` — reused for both branches
724///  - filter steps — partitioned once via [`partition_search_filters`] and
725///    shared unchanged across both branches
726///  - `limit` from the text-search step (or the default used by
727///    [`compile_search`]) when present; if the AST has no `TextSearch` step,
728///    the caller-supplied `limit` is used
729///
730/// Any `TextSearch` step already on the AST is IGNORED — `strict` and
731/// `relaxed` come from the caller. `Vector`/`Traverse` steps are also
732/// ignored for symmetry with [`compile_search`].
733///
734/// # Errors
735/// Returns [`CompileError`] if filter partitioning produces an unsupported
736/// shape (currently none; reserved for forward compatibility).
737pub fn compile_search_plan_from_queries(
738    ast: &QueryAst,
739    strict: TextQuery,
740    relaxed: Option<TextQuery>,
741    limit: usize,
742    attribution_requested: bool,
743) -> Result<CompiledSearchPlan, CompileError> {
744    let (fusable_filters, residual_filters) = partition_search_filters(&ast.steps);
745    let strict_compiled = CompiledSearch {
746        root_kind: ast.root_kind.clone(),
747        text_query: strict,
748        limit,
749        fusable_filters: fusable_filters.clone(),
750        residual_filters: residual_filters.clone(),
751        attribution_requested,
752    };
753    let relaxed_compiled = relaxed.map(|q| CompiledSearch {
754        root_kind: ast.root_kind.clone(),
755        text_query: q,
756        limit,
757        fusable_filters,
758        residual_filters,
759        attribution_requested,
760    });
761    Ok(CompiledSearchPlan {
762        strict: strict_compiled,
763        relaxed: relaxed_compiled,
764        was_degraded_at_plan_time: false,
765    })
766}
767
768/// Compile a [`QueryAst`] into a [`CompiledVectorSearch`] describing a
769/// vector-only retrieval execution.
770///
771/// Mirrors [`compile_search`] structurally. The AST must contain exactly one
772/// [`QueryStep::VectorSearch`] step; filters following the search step are
773/// partitioned by [`partition_search_filters`] into fusable and residual
774/// sets. Unlike [`compile_search`] this path does not produce a
775/// [`TextQuery`]; the caller's raw query string is preserved verbatim for
776/// the coordinator to bind to `embedding MATCH ?`.
777///
778/// # Errors
779///
780/// Returns [`CompileError::MissingVectorSearchStep`] if the AST contains no
781/// [`QueryStep::VectorSearch`] step.
782pub fn compile_vector_search(ast: &QueryAst) -> Result<CompiledVectorSearch, CompileError> {
783    let mut query_text = None;
784    let mut limit = None;
785    for step in &ast.steps {
786        match step {
787            QueryStep::VectorSearch {
788                query,
789                limit: step_limit,
790            } => {
791                query_text = Some(query.clone());
792                limit = Some(*step_limit);
793            }
794            QueryStep::Filter(_)
795            | QueryStep::Search { .. }
796            | QueryStep::TextSearch { .. }
797            | QueryStep::Traverse { .. } => {
798                // Filter steps are partitioned below; Search/TextSearch/
799                // Traverse steps are not composable with vector search in
800                // the standalone vector retrieval path.
801            }
802        }
803    }
804    let query_text = query_text.ok_or(CompileError::MissingVectorSearchStep)?;
805    let limit = limit.unwrap_or(25);
806    let (fusable_filters, residual_filters) = partition_search_filters(&ast.steps);
807    Ok(CompiledVectorSearch {
808        root_kind: ast.root_kind.clone(),
809        query_text,
810        limit,
811        fusable_filters,
812        residual_filters,
813        attribution_requested: false,
814    })
815}
816
817/// Compile a [`QueryAst`] containing a [`QueryStep::Search`] into a
818/// [`CompiledRetrievalPlan`] describing the bounded set of retrieval branches
819/// the Phase 12 planner may run.
820///
821/// The raw query string carried by the `Search` step is parsed into a
822/// strict [`TextQuery`] (via [`TextQuery::parse`]) and a relaxed sibling is
823/// derived via [`derive_relaxed`]. Both branches share the post-search
824/// fusable/residual filter partition. The resulting
825/// [`CompiledRetrievalPlan::text`] field carries them in the same Phase 6
826/// [`CompiledSearchPlan`] shape as `text_search()` / `fallback_search()`.
827///
828/// **v1 scope**: `vector` is unconditionally `None`. Read-time embedding of
829/// natural-language queries is not wired in v1; see
830/// [`CompiledRetrievalPlan`] for the rationale and the future-phase plan.
831/// Callers who need vector retrieval today must use the `vector_search()`
832/// override directly with a caller-provided vector literal.
833///
834/// # Errors
835///
836/// Returns [`CompileError::MissingSearchStep`] if the AST contains no
837/// [`QueryStep::Search`] step, or
838/// [`CompileError::MultipleSearchSteps`] if the AST contains more than one.
839pub fn compile_retrieval_plan(ast: &QueryAst) -> Result<CompiledRetrievalPlan, CompileError> {
840    let mut raw_query: Option<&str> = None;
841    let mut limit: Option<usize> = None;
842    for step in &ast.steps {
843        if let QueryStep::Search {
844            query,
845            limit: step_limit,
846        } = step
847        {
848            if raw_query.is_some() {
849                return Err(CompileError::MultipleSearchSteps);
850            }
851            raw_query = Some(query.as_str());
852            limit = Some(*step_limit);
853        }
854    }
855    let raw_query = raw_query.ok_or(CompileError::MissingSearchStep)?;
856    let limit = limit.unwrap_or(25);
857
858    let strict_text_query = TextQuery::parse(raw_query);
859    let (relaxed_text_query, was_degraded_at_plan_time) = derive_relaxed(&strict_text_query);
860
861    let (fusable_filters, residual_filters) = partition_search_filters(&ast.steps);
862
863    let strict = CompiledSearch {
864        root_kind: ast.root_kind.clone(),
865        text_query: strict_text_query,
866        limit,
867        fusable_filters: fusable_filters.clone(),
868        residual_filters: residual_filters.clone(),
869        attribution_requested: false,
870    };
871    let relaxed = relaxed_text_query.map(|q| CompiledSearch {
872        root_kind: ast.root_kind.clone(),
873        text_query: q,
874        limit,
875        fusable_filters,
876        residual_filters,
877        attribution_requested: false,
878    });
879    let text = CompiledSearchPlan {
880        strict,
881        relaxed,
882        was_degraded_at_plan_time,
883    };
884
885    // v1 scope (Phase 12): the planner's vector branch slot is structurally
886    // present on `CompiledRetrievalPlan` so the coordinator's three-block
887    // fusion path is fully wired, but read-time embedding of natural-language
888    // queries is deliberately deferred to a future phase. `compile_retrieval_plan`
889    // therefore always leaves `vector = None`; callers who want vector
890    // retrieval today must use `vector_search()` directly with a caller-
891    // provided vector literal.
892    Ok(CompiledRetrievalPlan {
893        text,
894        vector: None,
895        was_degraded_at_plan_time,
896    })
897}
898
899/// FNV-1a 64-bit hash — deterministic across Rust versions and program
900/// invocations, unlike `DefaultHasher`.
901fn hash_signature(signature: &str) -> u64 {
902    const OFFSET: u64 = 0xcbf2_9ce4_8422_2325;
903    const PRIME: u64 = 0x0000_0100_0000_01b3;
904    let mut hash = OFFSET;
905    for byte in signature.bytes() {
906        hash ^= u64::from(byte);
907        hash = hash.wrapping_mul(PRIME);
908    }
909    hash
910}
911
912#[cfg(test)]
913#[allow(clippy::expect_used, clippy::items_after_statements)]
914mod tests {
915    use rstest::rstest;
916
917    use crate::{
918        CompileError, DrivingTable, QueryBuilder, TraverseDirection, compile_grouped_query,
919        compile_query,
920    };
921
922    #[test]
923    fn vector_query_compiles_to_chunk_resolution() {
924        let compiled = compile_query(
925            &QueryBuilder::nodes("Meeting")
926                .vector_search("budget", 5)
927                .limit(5)
928                .into_ast(),
929        )
930        .expect("compiled query");
931
932        assert_eq!(compiled.driving_table, DrivingTable::VecNodes);
933        assert!(compiled.sql.contains("JOIN chunks c ON c.id = vc.chunk_id"));
934        assert!(
935            compiled
936                .sql
937                .contains("JOIN nodes src ON src.logical_id = c.node_logical_id")
938        );
939    }
940
941    #[rstest]
942    #[case(5, 7)]
943    #[case(3, 11)]
944    fn structural_limits_change_shape_hash(#[case] left: usize, #[case] right: usize) {
945        let left_compiled = compile_query(
946            &QueryBuilder::nodes("Meeting")
947                .text_search("budget", left)
948                .limit(left)
949                .into_ast(),
950        )
951        .expect("left query");
952        let right_compiled = compile_query(
953            &QueryBuilder::nodes("Meeting")
954                .text_search("budget", right)
955                .limit(right)
956                .into_ast(),
957        )
958        .expect("right query");
959
960        assert_ne!(left_compiled.shape_hash, right_compiled.shape_hash);
961    }
962
963    #[test]
964    fn traversal_query_is_depth_bounded() {
965        let compiled = compile_query(
966            &QueryBuilder::nodes("Meeting")
967                .text_search("budget", 5)
968                .traverse(TraverseDirection::Out, "HAS_TASK", 3)
969                .limit(10)
970                .into_ast(),
971        )
972        .expect("compiled traversal");
973
974        assert!(compiled.sql.contains("WITH RECURSIVE"));
975        assert!(compiled.sql.contains("WHERE t.depth < 3"));
976    }
977
978    #[test]
979    fn text_search_compiles_to_union_over_chunk_and_property_fts() {
980        let compiled = compile_query(
981            &QueryBuilder::nodes("Meeting")
982                .text_search("budget", 25)
983                .limit(25)
984                .into_ast(),
985        )
986        .expect("compiled text search");
987
988        assert_eq!(compiled.driving_table, DrivingTable::FtsNodes);
989        // Must contain UNION of both FTS tables.
990        assert!(
991            compiled.sql.contains("fts_nodes MATCH"),
992            "must search chunk-backed FTS"
993        );
994        assert!(
995            compiled.sql.contains("fts_node_properties MATCH"),
996            "must search property-backed FTS"
997        );
998        assert!(compiled.sql.contains("UNION"), "must UNION both sources");
999        // Must have 4 bind parameters: sanitized query + kind for each table.
1000        assert_eq!(compiled.binds.len(), 4);
1001    }
1002
1003    #[test]
1004    fn logical_id_filter_is_compiled() {
1005        let compiled = compile_query(
1006            &QueryBuilder::nodes("Meeting")
1007                .filter_logical_id_eq("meeting-123")
1008                .filter_json_text_eq("$.status", "active")
1009                .limit(1)
1010                .into_ast(),
1011        )
1012        .expect("compiled query");
1013
1014        // LogicalIdEq is applied in base_candidates (src alias) for the Nodes driver,
1015        // NOT duplicated in the final WHERE. The JOIN condition still contains
1016        // "n.logical_id =" which satisfies this check.
1017        assert!(compiled.sql.contains("n.logical_id ="));
1018        assert!(compiled.sql.contains("src.logical_id ="));
1019        assert!(compiled.sql.contains("json_extract"));
1020        // Only one bind for the logical_id (not two).
1021        use crate::BindValue;
1022        assert_eq!(
1023            compiled
1024                .binds
1025                .iter()
1026                .filter(|b| matches!(b, BindValue::Text(s) if s == "meeting-123"))
1027                .count(),
1028            1
1029        );
1030    }
1031
1032    #[test]
1033    fn compile_rejects_invalid_json_path() {
1034        use crate::{Predicate, QueryStep, ScalarValue};
1035        let mut ast = QueryBuilder::nodes("Meeting").into_ast();
1036        // Attempt SQL injection via JSON path.
1037        ast.steps.push(QueryStep::Filter(Predicate::JsonPathEq {
1038            path: "$') OR 1=1 --".to_owned(),
1039            value: ScalarValue::Text("x".to_owned()),
1040        }));
1041        use crate::CompileError;
1042        let result = compile_query(&ast);
1043        assert!(
1044            matches!(result, Err(CompileError::InvalidJsonPath(_))),
1045            "expected InvalidJsonPath, got {result:?}"
1046        );
1047    }
1048
1049    #[test]
1050    fn compile_accepts_valid_json_paths() {
1051        use crate::{Predicate, QueryStep, ScalarValue};
1052        for valid_path in ["$.status", "$.foo.bar", "$.a_b.c2"] {
1053            let mut ast = QueryBuilder::nodes("Meeting").into_ast();
1054            ast.steps.push(QueryStep::Filter(Predicate::JsonPathEq {
1055                path: valid_path.to_owned(),
1056                value: ScalarValue::Text("v".to_owned()),
1057            }));
1058            assert!(
1059                compile_query(&ast).is_ok(),
1060                "expected valid path {valid_path:?} to compile"
1061            );
1062        }
1063    }
1064
1065    #[test]
1066    fn compile_rejects_too_many_bind_parameters() {
1067        use crate::{Predicate, QueryStep, ScalarValue};
1068        let mut ast = QueryBuilder::nodes("Meeting").into_ast();
1069        // kind occupies 1 bind; each json filter now occupies 2 binds (path + value).
1070        // 7 json filters → 1 + 14 = 15 (ok), 8 → 1 + 16 = 17 (exceeds limit of 15).
1071        for i in 0..8 {
1072            ast.steps.push(QueryStep::Filter(Predicate::JsonPathEq {
1073                path: format!("$.f{i}"),
1074                value: ScalarValue::Text("v".to_owned()),
1075            }));
1076        }
1077        use crate::CompileError;
1078        let result = compile_query(&ast);
1079        assert!(
1080            matches!(result, Err(CompileError::TooManyBindParameters(17))),
1081            "expected TooManyBindParameters(17), got {result:?}"
1082        );
1083    }
1084
1085    #[test]
1086    fn compile_rejects_excessive_traversal_depth() {
1087        let result = compile_query(
1088            &QueryBuilder::nodes("Meeting")
1089                .text_search("budget", 5)
1090                .traverse(TraverseDirection::Out, "HAS_TASK", 51)
1091                .limit(10)
1092                .into_ast(),
1093        );
1094        assert!(
1095            matches!(result, Err(CompileError::TraversalTooDeep(51))),
1096            "expected TraversalTooDeep(51), got {result:?}"
1097        );
1098    }
1099
1100    #[test]
1101    fn grouped_queries_with_same_structure_share_shape_hash() {
1102        let left = compile_grouped_query(
1103            &QueryBuilder::nodes("Meeting")
1104                .text_search("budget", 5)
1105                .expand("tasks", TraverseDirection::Out, "HAS_TASK", 1)
1106                .limit(10)
1107                .into_ast(),
1108        )
1109        .expect("left grouped query");
1110        let right = compile_grouped_query(
1111            &QueryBuilder::nodes("Meeting")
1112                .text_search("planning", 5)
1113                .expand("tasks", TraverseDirection::Out, "HAS_TASK", 1)
1114                .limit(10)
1115                .into_ast(),
1116        )
1117        .expect("right grouped query");
1118
1119        assert_eq!(left.shape_hash, right.shape_hash);
1120    }
1121
1122    #[test]
1123    fn compile_grouped_rejects_duplicate_expansion_slot_names() {
1124        let result = compile_grouped_query(
1125            &QueryBuilder::nodes("Meeting")
1126                .expand("tasks", TraverseDirection::Out, "HAS_TASK", 1)
1127                .expand("tasks", TraverseDirection::Out, "HAS_DECISION", 1)
1128                .into_ast(),
1129        );
1130
1131        assert!(
1132            matches!(result, Err(CompileError::DuplicateExpansionSlot(ref slot)) if slot == "tasks"),
1133            "expected DuplicateExpansionSlot(\"tasks\"), got {result:?}"
1134        );
1135    }
1136
1137    #[test]
1138    fn flat_compile_rejects_queries_with_expansions() {
1139        let result = compile_query(
1140            &QueryBuilder::nodes("Meeting")
1141                .expand("tasks", TraverseDirection::Out, "HAS_TASK", 1)
1142                .into_ast(),
1143        );
1144
1145        assert!(
1146            matches!(
1147                result,
1148                Err(CompileError::FlatCompileDoesNotSupportExpansions)
1149            ),
1150            "expected FlatCompileDoesNotSupportExpansions, got {result:?}"
1151        );
1152    }
1153
1154    #[test]
1155    fn json_path_compiled_as_bind_parameter() {
1156        let compiled = compile_query(
1157            &QueryBuilder::nodes("Meeting")
1158                .filter_json_text_eq("$.status", "active")
1159                .limit(1)
1160                .into_ast(),
1161        )
1162        .expect("compiled query");
1163
1164        // Path must be parameterized, not interpolated into the SQL string.
1165        assert!(
1166            !compiled.sql.contains("'$.status'"),
1167            "JSON path must not appear as a SQL string literal"
1168        );
1169        assert!(
1170            compiled.sql.contains("json_extract(src.properties, ?"),
1171            "JSON path must be a bind parameter (pushed into base_candidates for Nodes driver)"
1172        );
1173        // Path and value should both be in the bind list.
1174        use crate::BindValue;
1175        assert!(
1176            compiled
1177                .binds
1178                .iter()
1179                .any(|b| matches!(b, BindValue::Text(s) if s == "$.status"))
1180        );
1181        assert!(
1182            compiled
1183                .binds
1184                .iter()
1185                .any(|b| matches!(b, BindValue::Text(s) if s == "active"))
1186        );
1187    }
1188
1189    // --- Filter pushdown regression tests ---
1190    //
1191    // These tests verify that filter predicates are pushed into the
1192    // base_candidates CTE for the Nodes driving table, so the CTE LIMIT
1193    // applies after filtering rather than before.  Without pushdown, the
1194    // LIMIT may truncate the candidate set before the filter runs, causing
1195    // matching nodes to be silently excluded.
1196
1197    #[test]
1198    fn nodes_driver_pushes_json_eq_filter_into_base_candidates() {
1199        let compiled = compile_query(
1200            &QueryBuilder::nodes("Meeting")
1201                .filter_json_text_eq("$.status", "active")
1202                .limit(5)
1203                .into_ast(),
1204        )
1205        .expect("compiled query");
1206
1207        assert_eq!(compiled.driving_table, DrivingTable::Nodes);
1208        // Filter must appear inside base_candidates (src alias), not the
1209        // outer WHERE (n alias).
1210        assert!(
1211            compiled.sql.contains("json_extract(src.properties, ?"),
1212            "json_extract must reference src (base_candidates), got:\n{}",
1213            compiled.sql,
1214        );
1215        assert!(
1216            !compiled.sql.contains("json_extract(n.properties, ?"),
1217            "json_extract must NOT appear in outer WHERE for Nodes driver, got:\n{}",
1218            compiled.sql,
1219        );
1220    }
1221
1222    #[test]
1223    fn nodes_driver_pushes_json_compare_filter_into_base_candidates() {
1224        let compiled = compile_query(
1225            &QueryBuilder::nodes("Meeting")
1226                .filter_json_integer_gte("$.priority", 5)
1227                .limit(10)
1228                .into_ast(),
1229        )
1230        .expect("compiled query");
1231
1232        assert_eq!(compiled.driving_table, DrivingTable::Nodes);
1233        assert!(
1234            compiled.sql.contains("json_extract(src.properties, ?"),
1235            "comparison filter must be in base_candidates, got:\n{}",
1236            compiled.sql,
1237        );
1238        assert!(
1239            !compiled.sql.contains("json_extract(n.properties, ?"),
1240            "comparison filter must NOT be in outer WHERE for Nodes driver",
1241        );
1242        assert!(
1243            compiled.sql.contains(">= ?"),
1244            "expected >= operator in SQL, got:\n{}",
1245            compiled.sql,
1246        );
1247    }
1248
1249    #[test]
1250    fn nodes_driver_pushes_source_ref_filter_into_base_candidates() {
1251        let compiled = compile_query(
1252            &QueryBuilder::nodes("Meeting")
1253                .filter_source_ref_eq("ref-123")
1254                .limit(5)
1255                .into_ast(),
1256        )
1257        .expect("compiled query");
1258
1259        assert_eq!(compiled.driving_table, DrivingTable::Nodes);
1260        assert!(
1261            compiled.sql.contains("src.source_ref = ?"),
1262            "source_ref filter must be in base_candidates, got:\n{}",
1263            compiled.sql,
1264        );
1265        assert!(
1266            !compiled.sql.contains("n.source_ref = ?"),
1267            "source_ref filter must NOT be in outer WHERE for Nodes driver",
1268        );
1269    }
1270
1271    #[test]
1272    fn nodes_driver_pushes_multiple_filters_into_base_candidates() {
1273        let compiled = compile_query(
1274            &QueryBuilder::nodes("Meeting")
1275                .filter_logical_id_eq("meeting-1")
1276                .filter_json_text_eq("$.status", "active")
1277                .filter_json_integer_gte("$.priority", 5)
1278                .filter_source_ref_eq("ref-abc")
1279                .limit(1)
1280                .into_ast(),
1281        )
1282        .expect("compiled query");
1283
1284        assert_eq!(compiled.driving_table, DrivingTable::Nodes);
1285        // All filters should be in base_candidates, none in outer WHERE
1286        assert!(
1287            compiled.sql.contains("src.logical_id = ?"),
1288            "logical_id filter must be in base_candidates",
1289        );
1290        assert!(
1291            compiled.sql.contains("json_extract(src.properties, ?"),
1292            "JSON filters must be in base_candidates",
1293        );
1294        assert!(
1295            compiled.sql.contains("src.source_ref = ?"),
1296            "source_ref filter must be in base_candidates",
1297        );
1298        // Each bind value should appear exactly once (not duplicated in outer WHERE)
1299        use crate::BindValue;
1300        assert_eq!(
1301            compiled
1302                .binds
1303                .iter()
1304                .filter(|b| matches!(b, BindValue::Text(s) if s == "meeting-1"))
1305                .count(),
1306            1,
1307            "logical_id bind must not be duplicated"
1308        );
1309        assert_eq!(
1310            compiled
1311                .binds
1312                .iter()
1313                .filter(|b| matches!(b, BindValue::Text(s) if s == "ref-abc"))
1314                .count(),
1315            1,
1316            "source_ref bind must not be duplicated"
1317        );
1318    }
1319
1320    #[test]
1321    fn fts_driver_keeps_json_filter_residual_but_fuses_kind() {
1322        // Phase 2: JSON filters are residual (stay in outer WHERE); KindEq is
1323        // fusable (pushed into base_candidates so the CTE LIMIT applies after
1324        // filtering).
1325        let compiled = compile_query(
1326            &QueryBuilder::nodes("Meeting")
1327                .text_search("budget", 5)
1328                .filter_json_text_eq("$.status", "active")
1329                .filter_kind_eq("Meeting")
1330                .limit(5)
1331                .into_ast(),
1332        )
1333        .expect("compiled query");
1334
1335        assert_eq!(compiled.driving_table, DrivingTable::FtsNodes);
1336        // Residual: JSON predicate stays in outer WHERE on n.properties.
1337        assert!(
1338            compiled.sql.contains("json_extract(n.properties, ?"),
1339            "JSON filter must stay residual in outer WHERE, got:\n{}",
1340            compiled.sql,
1341        );
1342        // Fusable: the second n.kind bind should live inside base_candidates.
1343        // The CTE block ends before the final SELECT.
1344        let (cte, outer) = compiled
1345            .sql
1346            .split_once("SELECT DISTINCT n.row_id")
1347            .expect("query has final SELECT");
1348        assert!(
1349            cte.contains("AND n.kind = ?"),
1350            "KindEq must be fused inside base_candidates CTE, got CTE:\n{cte}"
1351        );
1352        // Outer WHERE must not contain a duplicate n.kind filter.
1353        assert!(
1354            !outer.contains("AND n.kind = ?"),
1355            "KindEq must NOT appear in outer WHERE for FTS driver, got outer:\n{outer}"
1356        );
1357    }
1358
1359    #[test]
1360    fn fts_driver_fuses_kind_filter() {
1361        let compiled = compile_query(
1362            &QueryBuilder::nodes("Goal")
1363                .text_search("budget", 5)
1364                .filter_kind_eq("Goal")
1365                .limit(5)
1366                .into_ast(),
1367        )
1368        .expect("compiled query");
1369
1370        assert_eq!(compiled.driving_table, DrivingTable::FtsNodes);
1371        let (cte, outer) = compiled
1372            .sql
1373            .split_once("SELECT DISTINCT n.row_id")
1374            .expect("query has final SELECT");
1375        assert!(
1376            cte.contains("AND n.kind = ?"),
1377            "KindEq must be fused inside base_candidates, got:\n{cte}"
1378        );
1379        assert!(
1380            !outer.contains("AND n.kind = ?"),
1381            "KindEq must NOT be in outer WHERE, got:\n{outer}"
1382        );
1383    }
1384
1385    #[test]
1386    fn vec_driver_fuses_kind_filter() {
1387        let compiled = compile_query(
1388            &QueryBuilder::nodes("Goal")
1389                .vector_search("budget", 5)
1390                .filter_kind_eq("Goal")
1391                .limit(5)
1392                .into_ast(),
1393        )
1394        .expect("compiled query");
1395
1396        assert_eq!(compiled.driving_table, DrivingTable::VecNodes);
1397        let (cte, outer) = compiled
1398            .sql
1399            .split_once("SELECT DISTINCT n.row_id")
1400            .expect("query has final SELECT");
1401        assert!(
1402            cte.contains("AND src.kind = ?"),
1403            "KindEq must be fused inside base_candidates, got:\n{cte}"
1404        );
1405        assert!(
1406            !outer.contains("AND n.kind = ?"),
1407            "KindEq must NOT be in outer WHERE, got:\n{outer}"
1408        );
1409    }
1410
1411    #[test]
1412    fn fts5_query_bind_uses_rendered_literals() {
1413        let compiled = compile_query(
1414            &QueryBuilder::nodes("Meeting")
1415                .text_search("User's name", 5)
1416                .limit(5)
1417                .into_ast(),
1418        )
1419        .expect("compiled query");
1420
1421        use crate::BindValue;
1422        assert!(
1423            compiled
1424                .binds
1425                .iter()
1426                .any(|b| matches!(b, BindValue::Text(s) if s == "\"User's\" \"name\"")),
1427            "FTS5 query bind should use rendered literal terms; got {:?}",
1428            compiled.binds
1429        );
1430    }
1431
1432    #[test]
1433    fn fts5_query_bind_supports_or_operator() {
1434        let compiled = compile_query(
1435            &QueryBuilder::nodes("Meeting")
1436                .text_search("ship OR docs", 5)
1437                .limit(5)
1438                .into_ast(),
1439        )
1440        .expect("compiled query");
1441
1442        use crate::BindValue;
1443        assert!(
1444            compiled
1445                .binds
1446                .iter()
1447                .any(|b| matches!(b, BindValue::Text(s) if s == "\"ship\" OR \"docs\"")),
1448            "FTS5 query bind should preserve supported OR; got {:?}",
1449            compiled.binds
1450        );
1451    }
1452
1453    #[test]
1454    fn fts5_query_bind_supports_not_operator() {
1455        let compiled = compile_query(
1456            &QueryBuilder::nodes("Meeting")
1457                .text_search("ship NOT blocked", 5)
1458                .limit(5)
1459                .into_ast(),
1460        )
1461        .expect("compiled query");
1462
1463        use crate::BindValue;
1464        assert!(
1465            compiled
1466                .binds
1467                .iter()
1468                .any(|b| matches!(b, BindValue::Text(s) if s == "\"ship\" NOT \"blocked\"")),
1469            "FTS5 query bind should preserve supported NOT; got {:?}",
1470            compiled.binds
1471        );
1472    }
1473
1474    #[test]
1475    fn fts5_query_bind_literalizes_clause_leading_not() {
1476        let compiled = compile_query(
1477            &QueryBuilder::nodes("Meeting")
1478                .text_search("NOT blocked", 5)
1479                .limit(5)
1480                .into_ast(),
1481        )
1482        .expect("compiled query");
1483
1484        use crate::BindValue;
1485        assert!(
1486            compiled
1487                .binds
1488                .iter()
1489                .any(|b| matches!(b, BindValue::Text(s) if s == "\"NOT\" \"blocked\"")),
1490            "Clause-leading NOT should degrade to literals; got {:?}",
1491            compiled.binds
1492        );
1493    }
1494
1495    #[test]
1496    fn fts5_query_bind_literalizes_or_not_sequence() {
1497        let compiled = compile_query(
1498            &QueryBuilder::nodes("Meeting")
1499                .text_search("ship OR NOT blocked", 5)
1500                .limit(5)
1501                .into_ast(),
1502        )
1503        .expect("compiled query");
1504
1505        use crate::BindValue;
1506        assert!(
1507            compiled.binds.iter().any(
1508                |b| matches!(b, BindValue::Text(s) if s == "\"ship\" \"OR\" \"NOT\" \"blocked\"")
1509            ),
1510            "`OR NOT` should degrade to literals rather than emit invalid FTS5; got {:?}",
1511            compiled.binds
1512        );
1513    }
1514
1515    #[test]
1516    fn compile_retrieval_plan_accepts_search_step() {
1517        use crate::{
1518            CompileError, Predicate, QueryAst, QueryStep, TextQuery, compile_retrieval_plan,
1519        };
1520        let ast = QueryAst {
1521            root_kind: "Goal".to_owned(),
1522            steps: vec![
1523                QueryStep::Search {
1524                    query: "ship quarterly docs".to_owned(),
1525                    limit: 7,
1526                },
1527                QueryStep::Filter(Predicate::KindEq("Goal".to_owned())),
1528            ],
1529            expansions: vec![],
1530            final_limit: None,
1531        };
1532        let plan = compile_retrieval_plan(&ast).expect("compiles");
1533        assert_eq!(plan.text.strict.root_kind, "Goal");
1534        assert_eq!(plan.text.strict.limit, 7);
1535        // Filter following the Search step must land in the fusable bucket.
1536        assert_eq!(plan.text.strict.fusable_filters.len(), 1);
1537        assert!(plan.text.strict.residual_filters.is_empty());
1538        // Strict text query is the parsed form of the raw string; "ship
1539        // quarterly docs" parses to an implicit AND of three terms.
1540        assert_eq!(
1541            plan.text.strict.text_query,
1542            TextQuery::And(vec![
1543                TextQuery::Term("ship".into()),
1544                TextQuery::Term("quarterly".into()),
1545                TextQuery::Term("docs".into()),
1546            ])
1547        );
1548        // Three-term implicit-AND has a useful relaxation: per-term OR.
1549        let relaxed = plan.text.relaxed.as_ref().expect("relaxed branch present");
1550        assert_eq!(
1551            relaxed.text_query,
1552            TextQuery::Or(vec![
1553                TextQuery::Term("ship".into()),
1554                TextQuery::Term("quarterly".into()),
1555                TextQuery::Term("docs".into()),
1556            ])
1557        );
1558        assert_eq!(relaxed.fusable_filters.len(), 1);
1559        assert!(!plan.was_degraded_at_plan_time);
1560        // CompileError unused in the success path.
1561        let _ = std::any::TypeId::of::<CompileError>();
1562    }
1563
1564    #[test]
1565    fn compile_retrieval_plan_rejects_ast_without_search_step() {
1566        use crate::{CompileError, QueryBuilder, compile_retrieval_plan};
1567        let ast = QueryBuilder::nodes("Goal")
1568            .filter_kind_eq("Goal")
1569            .into_ast();
1570        let result = compile_retrieval_plan(&ast);
1571        assert!(
1572            matches!(result, Err(CompileError::MissingSearchStep)),
1573            "expected MissingSearchStep, got {result:?}"
1574        );
1575    }
1576
1577    #[test]
1578    fn compile_retrieval_plan_rejects_ast_with_multiple_search_steps() {
1579        // P12-N-1: the compiler must not silently last-wins when the caller
1580        // hands it an AST with two `QueryStep::Search` entries. Instead it
1581        // must return an explicit `MultipleSearchSteps` error so the
1582        // mis-shaped AST is surfaced at plan time.
1583        use crate::{CompileError, QueryAst, QueryStep, compile_retrieval_plan};
1584        let ast = QueryAst {
1585            root_kind: "Goal".to_owned(),
1586            steps: vec![
1587                QueryStep::Search {
1588                    query: "alpha".to_owned(),
1589                    limit: 5,
1590                },
1591                QueryStep::Search {
1592                    query: "bravo".to_owned(),
1593                    limit: 10,
1594                },
1595            ],
1596            expansions: vec![],
1597            final_limit: None,
1598        };
1599        let result = compile_retrieval_plan(&ast);
1600        assert!(
1601            matches!(result, Err(CompileError::MultipleSearchSteps)),
1602            "expected MultipleSearchSteps, got {result:?}"
1603        );
1604    }
1605
1606    #[test]
1607    fn compile_retrieval_plan_v1_always_leaves_vector_empty() {
1608        // Phase 12 v1 scope: regardless of the query shape, the unified
1609        // planner never wires a vector branch into the compiled plan
1610        // because read-time embedding of natural-language queries is not
1611        // implemented in v1. Pin the constraint so a future phase that
1612        // wires the embedding generator must explicitly relax this test.
1613        use crate::{QueryAst, QueryStep, compile_retrieval_plan};
1614        for query in ["ship quarterly docs", "single", "", "   "] {
1615            let ast = QueryAst {
1616                root_kind: "Goal".to_owned(),
1617                steps: vec![QueryStep::Search {
1618                    query: query.to_owned(),
1619                    limit: 10,
1620                }],
1621                expansions: vec![],
1622                final_limit: None,
1623            };
1624            let plan = compile_retrieval_plan(&ast).expect("compiles");
1625            assert!(
1626                plan.vector.is_none(),
1627                "Phase 12 v1 must always leave the vector branch empty (query = {query:?})"
1628            );
1629        }
1630    }
1631
1632    #[test]
1633    fn fts5_query_bind_preserves_lowercase_not_as_literal_text() {
1634        let compiled = compile_query(
1635            &QueryBuilder::nodes("Meeting")
1636                .text_search("not a ship", 5)
1637                .limit(5)
1638                .into_ast(),
1639        )
1640        .expect("compiled query");
1641
1642        use crate::BindValue;
1643        assert!(
1644            compiled
1645                .binds
1646                .iter()
1647                .any(|b| matches!(b, BindValue::Text(s) if s == "\"not\" \"a\" \"ship\"")),
1648            "Lowercase not should remain a literal term sequence; got {:?}",
1649            compiled.binds
1650        );
1651    }
1652}