Skip to main content

fathomdb_query/
compile.rs

1use std::fmt::Write;
2
3use crate::fusion::partition_search_filters;
4use crate::plan::{choose_driving_table, execution_hints, shape_signature};
5use crate::search::{
6    CompiledRawVectorSearch, CompiledRetrievalPlan, CompiledSearch, CompiledSearchPlan,
7    CompiledSemanticSearch, CompiledVectorSearch,
8};
9use crate::{
10    ComparisonOp, DrivingTable, EdgeExpansionSlot, ExpansionSlot, Predicate, QueryAst, QueryStep,
11    ScalarValue, TextQuery, TraverseDirection, derive_relaxed, render_text_query_fts5,
12};
13
14/// A typed bind value for a compiled SQL query parameter.
15#[derive(Clone, Debug, PartialEq, Eq)]
16pub enum BindValue {
17    /// A UTF-8 text parameter.
18    Text(String),
19    /// A 64-bit signed integer parameter.
20    Integer(i64),
21    /// A boolean parameter.
22    Bool(bool),
23}
24
25/// A deterministic hash of a query's structural shape, independent of bind values.
26#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
27pub struct ShapeHash(pub u64);
28
29/// A fully compiled query ready for execution against `SQLite`.
30///
31/// Pack F1.75: carries optional [`CompiledSemanticSearch`] /
32/// [`CompiledRawVectorSearch`] sidecars. When either is `Some`, the
33/// coordinator dispatches the query through the dedicated
34/// `execute_compiled_semantic_search` / `execute_compiled_raw_vector_search`
35/// executor instead of running `sql` as a plain node scan. The `sql` /
36/// `binds` fields remain populated with a no-op node-scan plan so that
37/// explain paths keep working without the sidecar.
38#[derive(Clone, Debug, PartialEq)]
39pub struct CompiledQuery {
40    /// The generated SQL text.
41    pub sql: String,
42    /// Positional bind parameters for the SQL.
43    pub binds: Vec<BindValue>,
44    /// Structural shape hash for caching.
45    pub shape_hash: ShapeHash,
46    /// The driving table chosen by the query planner.
47    pub driving_table: DrivingTable,
48    /// Execution hints derived from the query shape.
49    pub hints: crate::ExecutionHints,
50    /// Pack F1.75 sidecar: when populated, the coordinator dispatches
51    /// through `execute_compiled_semantic_search`.
52    pub semantic_search: Option<CompiledSemanticSearch>,
53    /// Pack F1.75 sidecar: when populated, the coordinator dispatches
54    /// through `execute_compiled_raw_vector_search`.
55    pub raw_vector_search: Option<CompiledRawVectorSearch>,
56}
57
58/// A compiled grouped query containing a root query and expansion slots.
59#[derive(Clone, Debug, PartialEq)]
60pub struct CompiledGroupedQuery {
61    /// The root flat query.
62    pub root: CompiledQuery,
63    /// Expansion slots to evaluate per root result.
64    pub expansions: Vec<ExpansionSlot>,
65    /// Edge-projecting expansion slots to evaluate per root result.
66    pub edge_expansions: Vec<EdgeExpansionSlot>,
67    /// Structural shape hash covering the root query and all expansion slots.
68    pub shape_hash: ShapeHash,
69    /// Execution hints derived from the grouped query shape.
70    pub hints: crate::ExecutionHints,
71}
72
73/// Errors that can occur during query compilation.
74#[derive(Clone, Debug, PartialEq, Eq, thiserror::Error)]
75pub enum CompileError {
76    #[error("multiple traversal steps are not supported in v1")]
77    TooManyTraversals,
78    #[error("flat query compilation does not support expansions; use compile_grouped")]
79    FlatCompileDoesNotSupportExpansions,
80    #[error("duplicate expansion slot name: {0}")]
81    DuplicateExpansionSlot(String),
82    #[error("expansion slot name must be non-empty")]
83    EmptyExpansionSlotName,
84    #[error("too many expansion slots: max {MAX_EXPANSION_SLOTS}, got {0}")]
85    TooManyExpansionSlots(usize),
86    #[error("too many bind parameters: max 15, got {0}")]
87    TooManyBindParameters(usize),
88    #[error("traversal depth {0} exceeds maximum of {MAX_TRAVERSAL_DEPTH}")]
89    TraversalTooDeep(usize),
90    #[error("invalid JSON path: must match $(.key)+ pattern, got {0:?}")]
91    InvalidJsonPath(String),
92    #[error("compile_search requires exactly one TextSearch step in the AST")]
93    MissingTextSearchStep,
94    #[error("compile_vector_search requires exactly one VectorSearch step in the AST")]
95    MissingVectorSearchStep,
96    #[error("compile_retrieval_plan requires exactly one Search step in the AST")]
97    MissingSearchStep,
98    #[error("compile_retrieval_plan requires exactly one Search step in the AST, found multiple")]
99    MultipleSearchSteps,
100    #[error(
101        "query AST contains both SemanticSearch and RawVectorSearch steps; these surfaces are mutually exclusive"
102    )]
103    SemanticAndRawVectorSearchBothPresent,
104}
105
106/// Security fix H-1: Validate JSON path against a strict allowlist pattern to
107/// prevent SQL injection. Retained as defense-in-depth even though the path is
108/// now parameterized (see `FIX(review)` in `compile_query`). Only paths like
109/// `$.foo`, `$.foo.bar_baz` are allowed.
110fn validate_json_path(path: &str) -> Result<(), CompileError> {
111    let valid = path.starts_with('$')
112        && path.len() > 1
113        && path[1..].split('.').all(|segment| {
114            segment.is_empty()
115                || segment
116                    .chars()
117                    .all(|c| c.is_ascii_alphanumeric() || c == '_')
118                    && !segment.is_empty()
119        })
120        && path.contains('.');
121    if !valid {
122        return Err(CompileError::InvalidJsonPath(path.to_owned()));
123    }
124    Ok(())
125}
126
127/// Append a fusable predicate as an `AND` clause referencing `alias`.
128///
129/// Only the fusable variants (those that can be evaluated against columns on
130/// the `nodes` table join inside a search CTE) are supported — callers must
131/// pre-partition predicates via
132/// [`crate::fusion::partition_search_filters`]. Residual predicates panic via
133/// `unreachable!`.
134#[allow(clippy::too_many_lines)]
135fn append_fusable_clause(
136    sql: &mut String,
137    binds: &mut Vec<BindValue>,
138    alias: &str,
139    predicate: &Predicate,
140) -> Result<(), CompileError> {
141    match predicate {
142        Predicate::KindEq(kind) => {
143            binds.push(BindValue::Text(kind.clone()));
144            let idx = binds.len();
145            let _ = write!(sql, "\n                          AND {alias}.kind = ?{idx}");
146        }
147        Predicate::LogicalIdEq(logical_id) => {
148            binds.push(BindValue::Text(logical_id.clone()));
149            let idx = binds.len();
150            let _ = write!(
151                sql,
152                "\n                          AND {alias}.logical_id = ?{idx}"
153            );
154        }
155        Predicate::SourceRefEq(source_ref) => {
156            binds.push(BindValue::Text(source_ref.clone()));
157            let idx = binds.len();
158            let _ = write!(
159                sql,
160                "\n                          AND {alias}.source_ref = ?{idx}"
161            );
162        }
163        Predicate::ContentRefEq(uri) => {
164            binds.push(BindValue::Text(uri.clone()));
165            let idx = binds.len();
166            let _ = write!(
167                sql,
168                "\n                          AND {alias}.content_ref = ?{idx}"
169            );
170        }
171        Predicate::ContentRefNotNull => {
172            let _ = write!(
173                sql,
174                "\n                          AND {alias}.content_ref IS NOT NULL"
175            );
176        }
177        Predicate::JsonPathFusedEq { path, value } => {
178            validate_json_path(path)?;
179            binds.push(BindValue::Text(path.clone()));
180            let path_index = binds.len();
181            binds.push(BindValue::Text(value.clone()));
182            let value_index = binds.len();
183            let _ = write!(
184                sql,
185                "\n                          AND json_extract({alias}.properties, ?{path_index}) = ?{value_index}"
186            );
187        }
188        Predicate::JsonPathFusedTimestampCmp { path, op, value } => {
189            validate_json_path(path)?;
190            binds.push(BindValue::Text(path.clone()));
191            let path_index = binds.len();
192            binds.push(BindValue::Integer(*value));
193            let value_index = binds.len();
194            let operator = match op {
195                ComparisonOp::Gt => ">",
196                ComparisonOp::Gte => ">=",
197                ComparisonOp::Lt => "<",
198                ComparisonOp::Lte => "<=",
199            };
200            let _ = write!(
201                sql,
202                "\n                          AND json_extract({alias}.properties, ?{path_index}) {operator} ?{value_index}"
203            );
204        }
205        Predicate::JsonPathFusedBoolEq { path, value } => {
206            validate_json_path(path)?;
207            binds.push(BindValue::Text(path.clone()));
208            let path_index = binds.len();
209            binds.push(BindValue::Integer(i64::from(*value)));
210            let value_index = binds.len();
211            let _ = write!(
212                sql,
213                "\n                          AND json_extract({alias}.properties, ?{path_index}) = ?{value_index}"
214            );
215        }
216        Predicate::JsonPathFusedIn { path, values } => {
217            validate_json_path(path)?;
218            binds.push(BindValue::Text(path.clone()));
219            let first_param = binds.len();
220            for v in values {
221                binds.push(BindValue::Text(v.clone()));
222            }
223            let placeholders = (1..=values.len())
224                .map(|i| format!("?{}", first_param + i))
225                .collect::<Vec<_>>()
226                .join(", ");
227            let _ = write!(
228                sql,
229                "\n                          AND json_extract({alias}.properties, ?{first_param}) IN ({placeholders})"
230            );
231        }
232        Predicate::JsonPathEq { .. }
233        | Predicate::JsonPathCompare { .. }
234        | Predicate::JsonPathIn { .. } => {
235            unreachable!("append_fusable_clause received a residual predicate");
236        }
237        Predicate::EdgePropertyEq { .. } | Predicate::EdgePropertyCompare { .. } => {
238            unreachable!(
239                "append_fusable_clause received an edge-property predicate; edge filters are handled in compile_edge_filter"
240            );
241        }
242    }
243    Ok(())
244}
245
246const MAX_BIND_PARAMETERS: usize = 15;
247const MAX_EXPANSION_SLOTS: usize = 8;
248
249// FIX(review): max_depth was unbounded — usize::MAX produces an effectively infinite CTE.
250// Options: (A) silent clamp at compile, (B) reject with CompileError, (C) validate in builder.
251// Chose (B): consistent with existing TooManyTraversals/TooManyBindParameters pattern.
252// The compiler is the validation boundary; silent clamping would surprise callers.
253const MAX_TRAVERSAL_DEPTH: usize = 50;
254
255/// Compile a [`QueryAst`] into a [`CompiledQuery`] ready for execution.
256///
257/// # Compilation strategy
258///
259/// The compiled SQL is structured as a `WITH RECURSIVE` CTE named
260/// `base_candidates` followed by a final `SELECT ... JOIN nodes` projection.
261///
262/// For the **Nodes** driving table (no FTS/vector search), all filter
263/// predicates (`LogicalIdEq`, `JsonPathEq`, `JsonPathCompare`,
264/// `SourceRefEq`) are pushed into the `base_candidates` CTE so that the
265/// CTE's `LIMIT` applies *after* filtering. Without this pushdown the LIMIT
266/// would truncate the candidate set before property filters run, silently
267/// excluding nodes whose properties satisfy the filter but whose insertion
268/// order falls outside the limit window.
269///
270/// For **FTS** and **vector** driving tables, fusable predicates
271/// (`KindEq`, `LogicalIdEq`, `SourceRefEq`, `ContentRefEq`,
272/// `ContentRefNotNull`) are pushed into the `base_candidates` CTE so that
273/// the CTE's `LIMIT` applies *after* filtering; residual predicates
274/// (`JsonPathEq`, `JsonPathCompare`) remain in the outer `WHERE` because
275/// they require `json_extract` on the outer `nodes.properties` column.
276///
277/// # Errors
278///
279/// Returns [`CompileError::TooManyTraversals`] if more than one traversal step
280/// is present, or [`CompileError::TooManyBindParameters`] if the resulting SQL
281/// would require more than 15 bind parameters.
282///
283/// # Panics
284///
285/// Panics (via `unreachable!`) if the AST is internally inconsistent — for
286/// example, if `choose_driving_table` selects `VecNodes` but no
287/// `VectorSearch` step is present in the AST. This cannot happen through the
288/// public [`QueryBuilder`] API.
289#[allow(clippy::too_many_lines)]
290pub fn compile_query(ast: &QueryAst) -> Result<CompiledQuery, CompileError> {
291    if !ast.expansions.is_empty() {
292        return Err(CompileError::FlatCompileDoesNotSupportExpansions);
293    }
294
295    // Pack G: mutual-exclusion guard. `semantic_search` and
296    // `raw_vector_search` are two alternative entry points into the vector
297    // sidecar executor — they target the same `vec_<kind>` table but differ
298    // only in whether the engine embeds at query time. Carrying both in one
299    // AST previously discarded the earlier sidecar silently (the loop below
300    // overwrites on each match). Reject at compile time instead.
301    let has_semantic = ast
302        .steps
303        .iter()
304        .any(|s| matches!(s, QueryStep::SemanticSearch { .. }));
305    let has_raw_vector = ast
306        .steps
307        .iter()
308        .any(|s| matches!(s, QueryStep::RawVectorSearch { .. }));
309    if has_semantic && has_raw_vector {
310        return Err(CompileError::SemanticAndRawVectorSearchBothPresent);
311    }
312
313    let traversals = ast
314        .steps
315        .iter()
316        .filter(|step| matches!(step, QueryStep::Traverse { .. }))
317        .count();
318    if traversals > 1 {
319        return Err(CompileError::TooManyTraversals);
320    }
321
322    let excessive_depth = ast.steps.iter().find_map(|step| {
323        if let QueryStep::Traverse { max_depth, .. } = step
324            && *max_depth > MAX_TRAVERSAL_DEPTH
325        {
326            return Some(*max_depth);
327        }
328        None
329    });
330    if let Some(depth) = excessive_depth {
331        return Err(CompileError::TraversalTooDeep(depth));
332    }
333
334    let driving_table = choose_driving_table(ast);
335    let hints = execution_hints(ast);
336    let shape_hash = ShapeHash(hash_signature(&shape_signature(ast)));
337
338    let base_limit = ast
339        .steps
340        .iter()
341        .find_map(|step| match step {
342            QueryStep::VectorSearch { limit, .. }
343            | QueryStep::TextSearch { limit, .. }
344            | QueryStep::SemanticSearch { limit, .. }
345            | QueryStep::RawVectorSearch { limit, .. } => Some(*limit),
346            _ => None,
347        })
348        .or(ast.final_limit)
349        .unwrap_or(25);
350
351    let final_limit = ast.final_limit.unwrap_or(base_limit);
352    let traversal = ast.steps.iter().find_map(|step| {
353        if let QueryStep::Traverse {
354            direction,
355            label,
356            max_depth,
357            filter: _,
358        } = step
359        {
360            Some((*direction, label.as_str(), *max_depth))
361        } else {
362            None
363        }
364    });
365
366    // Partition Filter predicates for the search-driven paths into fusable
367    // (injected into the search CTE's WHERE) and residual (left in the outer
368    // WHERE) sets. The Nodes path pushes *every* predicate into the CTE
369    // directly and ignores this partition.
370    let (fusable_filters, residual_filters) = partition_search_filters(&ast.steps);
371
372    let mut binds = Vec::new();
373    let base_candidates = match driving_table {
374        DrivingTable::VecNodes => {
375            let query = ast
376                .steps
377                .iter()
378                .find_map(|step| {
379                    if let QueryStep::VectorSearch { query, .. } = step {
380                        Some(query.as_str())
381                    } else {
382                        None
383                    }
384                })
385                .unwrap_or_else(|| unreachable!("VecNodes chosen but no VectorSearch step in AST"));
386            binds.push(BindValue::Text(query.to_owned()));
387            binds.push(BindValue::Text(ast.root_kind.clone()));
388            // sqlite-vec requires the LIMIT/k constraint to be visible directly on the
389            // vec0 KNN scan. Using a sub-select isolates the vec0 LIMIT so the join
390            // with chunks/nodes does not prevent the query planner from recognising it.
391            //
392            // ASYMMETRY (known gap, P2-3): the inner `LIMIT {base_limit}` runs
393            // BEFORE the fusable-filter `WHERE` below, so fused predicates on
394            // `src` (e.g. `kind_eq`) filter a candidate pool that has already
395            // been narrowed to `base_limit` KNN neighbours. A
396            // `vector_search("x", 5).filter_kind_eq("Goal")` can therefore
397            // return fewer than 5 Goal hits even when more exist. Fixing this
398            // requires overfetching from vec0 and re-ranking/re-limiting after
399            // the filter — explicitly out of scope for Phase 2 filter fusion.
400            // The FTS branch below does NOT share this asymmetry because its
401            // outer LIMIT wraps the post-filter SELECT.
402            let mut sql = format!(
403                "base_candidates AS (
404                    SELECT DISTINCT src.logical_id
405                    FROM (
406                        SELECT chunk_id FROM vec_nodes_active
407                        WHERE embedding MATCH ?1
408                        LIMIT {base_limit}
409                    ) vc
410                    JOIN chunks c ON c.id = vc.chunk_id
411                    JOIN nodes src ON src.logical_id = c.node_logical_id AND src.superseded_at IS NULL
412                    WHERE src.kind = ?2",
413            );
414            for predicate in &fusable_filters {
415                append_fusable_clause(&mut sql, &mut binds, "src", predicate)?;
416            }
417            sql.push_str("\n                )");
418            sql
419        }
420        DrivingTable::FtsNodes => {
421            let text_query = ast
422                .steps
423                .iter()
424                .find_map(|step| {
425                    if let QueryStep::TextSearch { query, .. } = step {
426                        Some(query)
427                    } else {
428                        None
429                    }
430                })
431                .unwrap_or_else(|| unreachable!("FtsNodes chosen but no TextSearch step in AST"));
432            // Render the typed text-query subset into safe FTS5 syntax. Only
433            // supported operators are emitted as control syntax; all literal
434            // terms and phrases remain quoted and escaped.
435            let rendered = render_text_query_fts5(text_query);
436            // Each FTS5 virtual table requires its own MATCH bind parameter;
437            // reusing indices across the UNION is not supported by SQLite.
438            binds.push(BindValue::Text(rendered.clone()));
439            binds.push(BindValue::Text(ast.root_kind.clone()));
440            binds.push(BindValue::Text(rendered));
441            binds.push(BindValue::Text(ast.root_kind.clone()));
442            // Wrap the chunk/property UNION in an outer SELECT that joins
443            // `nodes` once so fusable filters (kind/logical_id/source_ref/
444            // content_ref) can reference node columns directly, bringing them
445            // inside the CTE's LIMIT window.
446            let mut sql = String::from(
447                "base_candidates AS (
448                    SELECT DISTINCT n.logical_id
449                    FROM (
450                        SELECT src.logical_id
451                        FROM fts_nodes f
452                        JOIN chunks c ON c.id = f.chunk_id
453                        JOIN nodes src ON src.logical_id = c.node_logical_id AND src.superseded_at IS NULL
454                        WHERE fts_nodes MATCH ?1
455                          AND src.kind = ?2
456                        UNION
457                        SELECT fp.node_logical_id AS logical_id
458                        FROM fts_node_properties fp
459                        JOIN nodes src ON src.logical_id = fp.node_logical_id AND src.superseded_at IS NULL
460                        WHERE fts_node_properties MATCH ?3
461                          AND fp.kind = ?4
462                    ) u
463                    JOIN nodes n ON n.logical_id = u.logical_id AND n.superseded_at IS NULL
464                    WHERE 1 = 1",
465            );
466            for predicate in &fusable_filters {
467                append_fusable_clause(&mut sql, &mut binds, "n", predicate)?;
468            }
469            let _ = write!(
470                &mut sql,
471                "\n                    LIMIT {base_limit}\n                )"
472            );
473            sql
474        }
475        DrivingTable::Nodes => {
476            binds.push(BindValue::Text(ast.root_kind.clone()));
477            let mut sql = "base_candidates AS (
478                    SELECT DISTINCT src.logical_id
479                    FROM nodes src
480                    WHERE src.superseded_at IS NULL
481                      AND src.kind = ?1"
482                .to_owned();
483            // Push filter predicates into base_candidates so the LIMIT applies
484            // after filtering, not before. Without this, the CTE may truncate
485            // the candidate set before property/source_ref filters run, causing
486            // nodes that satisfy the filter to be excluded from results.
487            for step in &ast.steps {
488                if let QueryStep::Filter(predicate) = step {
489                    match predicate {
490                        Predicate::LogicalIdEq(logical_id) => {
491                            binds.push(BindValue::Text(logical_id.clone()));
492                            let bind_index = binds.len();
493                            let _ = write!(
494                                &mut sql,
495                                "\n                      AND src.logical_id = ?{bind_index}"
496                            );
497                        }
498                        Predicate::JsonPathEq { path, value } => {
499                            validate_json_path(path)?;
500                            binds.push(BindValue::Text(path.clone()));
501                            let path_index = binds.len();
502                            binds.push(match value {
503                                ScalarValue::Text(text) => BindValue::Text(text.clone()),
504                                ScalarValue::Integer(integer) => BindValue::Integer(*integer),
505                                ScalarValue::Bool(boolean) => BindValue::Bool(*boolean),
506                            });
507                            let value_index = binds.len();
508                            let _ = write!(
509                                &mut sql,
510                                "\n                      AND json_extract(src.properties, ?{path_index}) = ?{value_index}"
511                            );
512                        }
513                        Predicate::JsonPathCompare { path, op, value } => {
514                            validate_json_path(path)?;
515                            binds.push(BindValue::Text(path.clone()));
516                            let path_index = binds.len();
517                            binds.push(match value {
518                                ScalarValue::Text(text) => BindValue::Text(text.clone()),
519                                ScalarValue::Integer(integer) => BindValue::Integer(*integer),
520                                ScalarValue::Bool(boolean) => BindValue::Bool(*boolean),
521                            });
522                            let value_index = binds.len();
523                            let operator = match op {
524                                ComparisonOp::Gt => ">",
525                                ComparisonOp::Gte => ">=",
526                                ComparisonOp::Lt => "<",
527                                ComparisonOp::Lte => "<=",
528                            };
529                            let _ = write!(
530                                &mut sql,
531                                "\n                      AND json_extract(src.properties, ?{path_index}) {operator} ?{value_index}"
532                            );
533                        }
534                        Predicate::SourceRefEq(source_ref) => {
535                            binds.push(BindValue::Text(source_ref.clone()));
536                            let bind_index = binds.len();
537                            let _ = write!(
538                                &mut sql,
539                                "\n                      AND src.source_ref = ?{bind_index}"
540                            );
541                        }
542                        Predicate::ContentRefNotNull => {
543                            let _ = write!(
544                                &mut sql,
545                                "\n                      AND src.content_ref IS NOT NULL"
546                            );
547                        }
548                        Predicate::ContentRefEq(uri) => {
549                            binds.push(BindValue::Text(uri.clone()));
550                            let bind_index = binds.len();
551                            let _ = write!(
552                                &mut sql,
553                                "\n                      AND src.content_ref = ?{bind_index}"
554                            );
555                        }
556                        Predicate::KindEq(_)
557                        | Predicate::EdgePropertyEq { .. }
558                        | Predicate::EdgePropertyCompare { .. } => {
559                            // KindEq: already filtered by ast.root_kind above.
560                            // EdgeProperty*: not valid in the main query filter path.
561                        }
562                        Predicate::JsonPathFusedEq { path, value } => {
563                            validate_json_path(path)?;
564                            binds.push(BindValue::Text(path.clone()));
565                            let path_index = binds.len();
566                            binds.push(BindValue::Text(value.clone()));
567                            let value_index = binds.len();
568                            let _ = write!(
569                                &mut sql,
570                                "\n                      AND json_extract(src.properties, ?{path_index}) = ?{value_index}"
571                            );
572                        }
573                        Predicate::JsonPathFusedTimestampCmp { path, op, value } => {
574                            validate_json_path(path)?;
575                            binds.push(BindValue::Text(path.clone()));
576                            let path_index = binds.len();
577                            binds.push(BindValue::Integer(*value));
578                            let value_index = binds.len();
579                            let operator = match op {
580                                ComparisonOp::Gt => ">",
581                                ComparisonOp::Gte => ">=",
582                                ComparisonOp::Lt => "<",
583                                ComparisonOp::Lte => "<=",
584                            };
585                            let _ = write!(
586                                &mut sql,
587                                "\n                      AND json_extract(src.properties, ?{path_index}) {operator} ?{value_index}"
588                            );
589                        }
590                        Predicate::JsonPathFusedBoolEq { path, value } => {
591                            validate_json_path(path)?;
592                            binds.push(BindValue::Text(path.clone()));
593                            let path_index = binds.len();
594                            binds.push(BindValue::Integer(i64::from(*value)));
595                            let value_index = binds.len();
596                            let _ = write!(
597                                &mut sql,
598                                "\n                      AND json_extract(src.properties, ?{path_index}) = ?{value_index}"
599                            );
600                        }
601                        Predicate::JsonPathIn { path, values } => {
602                            validate_json_path(path)?;
603                            binds.push(BindValue::Text(path.clone()));
604                            let first_param = binds.len();
605                            for v in values {
606                                binds.push(match v {
607                                    ScalarValue::Text(text) => BindValue::Text(text.clone()),
608                                    ScalarValue::Integer(integer) => BindValue::Integer(*integer),
609                                    ScalarValue::Bool(boolean) => BindValue::Bool(*boolean),
610                                });
611                            }
612                            let placeholders = (1..=values.len())
613                                .map(|i| format!("?{}", first_param + i))
614                                .collect::<Vec<_>>()
615                                .join(", ");
616                            let _ = write!(
617                                &mut sql,
618                                "\n                      AND json_extract(src.properties, ?{first_param}) IN ({placeholders})"
619                            );
620                        }
621                        Predicate::JsonPathFusedIn { path, values } => {
622                            // On the Nodes driver all predicates are pushed inline;
623                            // treat like JsonPathIn but values are always text.
624                            validate_json_path(path)?;
625                            binds.push(BindValue::Text(path.clone()));
626                            let first_param = binds.len();
627                            for v in values {
628                                binds.push(BindValue::Text(v.clone()));
629                            }
630                            let placeholders = (1..=values.len())
631                                .map(|i| format!("?{}", first_param + i))
632                                .collect::<Vec<_>>()
633                                .join(", ");
634                            let _ = write!(
635                                &mut sql,
636                                "\n                      AND json_extract(src.properties, ?{first_param}) IN ({placeholders})"
637                            );
638                        }
639                    }
640                }
641            }
642            let _ = write!(
643                &mut sql,
644                "\n                    LIMIT {base_limit}\n                )"
645            );
646            sql
647        }
648    };
649
650    let mut sql = format!("WITH RECURSIVE\n{base_candidates}");
651    let source_alias = if traversal.is_some() { "t" } else { "bc" };
652
653    if let Some((direction, label, max_depth)) = traversal {
654        binds.push(BindValue::Text(label.to_owned()));
655        let label_index = binds.len();
656        let (join_condition, next_logical_id) = match direction {
657            TraverseDirection::Out => ("e.source_logical_id = t.logical_id", "e.target_logical_id"),
658            TraverseDirection::In => ("e.target_logical_id = t.logical_id", "e.source_logical_id"),
659        };
660
661        let _ = write!(
662            &mut sql,
663            ",
664traversed(logical_id, depth, visited) AS (
665    SELECT bc.logical_id, 0, printf(',%s,', bc.logical_id)
666    FROM base_candidates bc
667    UNION ALL
668    SELECT {next_logical_id}, t.depth + 1, t.visited || {next_logical_id} || ','
669    FROM traversed t
670    JOIN edges e ON {join_condition}
671        AND e.kind = ?{label_index}
672        AND e.superseded_at IS NULL
673    WHERE t.depth < {max_depth}
674      AND instr(t.visited, printf(',%s,', {next_logical_id})) = 0
675    LIMIT {}
676)",
677            hints.hard_limit
678        );
679    }
680
681    let _ = write!(
682        &mut sql,
683        "
684SELECT DISTINCT n.row_id, n.logical_id, n.kind, n.properties, n.content_ref
685FROM {} {source_alias}
686JOIN nodes n ON n.logical_id = {source_alias}.logical_id
687    AND n.superseded_at IS NULL
688WHERE 1 = 1",
689        if traversal.is_some() {
690            "traversed"
691        } else {
692            "base_candidates"
693        }
694    );
695
696    // Outer WHERE emission. The Nodes driving table pushes every filter
697    // into `base_candidates` already, so only `KindEq` (handled separately
698    // via `root_kind`) needs to be re-emitted outside — we iterate
699    // `ast.steps` to catch it. For the search-driven paths (FtsNodes,
700    // VecNodes) we iterate the `residual_filters` partition directly
701    // instead of re-classifying predicates via `is_fusable()`. This makes
702    // `partition_search_filters` the single source of truth for the
703    // fusable/residual split: adding a new fusable variant automatically
704    // drops it from the outer WHERE without a separate audit of this loop.
705    if driving_table == DrivingTable::Nodes {
706        for step in &ast.steps {
707            if let QueryStep::Filter(Predicate::KindEq(kind)) = step {
708                binds.push(BindValue::Text(kind.clone()));
709                let bind_index = binds.len();
710                let _ = write!(&mut sql, "\n  AND n.kind = ?{bind_index}");
711            }
712        }
713    } else {
714        for predicate in &residual_filters {
715            match predicate {
716                Predicate::JsonPathEq { path, value } => {
717                    validate_json_path(path)?;
718                    binds.push(BindValue::Text(path.clone()));
719                    let path_index = binds.len();
720                    binds.push(match value {
721                        ScalarValue::Text(text) => BindValue::Text(text.clone()),
722                        ScalarValue::Integer(integer) => BindValue::Integer(*integer),
723                        ScalarValue::Bool(boolean) => BindValue::Bool(*boolean),
724                    });
725                    let value_index = binds.len();
726                    let _ = write!(
727                        &mut sql,
728                        "\n  AND json_extract(n.properties, ?{path_index}) = ?{value_index}",
729                    );
730                }
731                Predicate::JsonPathCompare { path, op, value } => {
732                    validate_json_path(path)?;
733                    binds.push(BindValue::Text(path.clone()));
734                    let path_index = binds.len();
735                    binds.push(match value {
736                        ScalarValue::Text(text) => BindValue::Text(text.clone()),
737                        ScalarValue::Integer(integer) => BindValue::Integer(*integer),
738                        ScalarValue::Bool(boolean) => BindValue::Bool(*boolean),
739                    });
740                    let value_index = binds.len();
741                    let operator = match op {
742                        ComparisonOp::Gt => ">",
743                        ComparisonOp::Gte => ">=",
744                        ComparisonOp::Lt => "<",
745                        ComparisonOp::Lte => "<=",
746                    };
747                    let _ = write!(
748                        &mut sql,
749                        "\n  AND json_extract(n.properties, ?{path_index}) {operator} ?{value_index}",
750                    );
751                }
752                Predicate::JsonPathIn { path, values } => {
753                    validate_json_path(path)?;
754                    binds.push(BindValue::Text(path.clone()));
755                    let first_param = binds.len();
756                    for v in values {
757                        binds.push(match v {
758                            ScalarValue::Text(text) => BindValue::Text(text.clone()),
759                            ScalarValue::Integer(integer) => BindValue::Integer(*integer),
760                            ScalarValue::Bool(boolean) => BindValue::Bool(*boolean),
761                        });
762                    }
763                    let placeholders = (1..=values.len())
764                        .map(|i| format!("?{}", first_param + i))
765                        .collect::<Vec<_>>()
766                        .join(", ");
767                    let _ = write!(
768                        &mut sql,
769                        "\n  AND json_extract(n.properties, ?{first_param}) IN ({placeholders})",
770                    );
771                }
772                Predicate::KindEq(_)
773                | Predicate::LogicalIdEq(_)
774                | Predicate::SourceRefEq(_)
775                | Predicate::ContentRefEq(_)
776                | Predicate::ContentRefNotNull
777                | Predicate::JsonPathFusedEq { .. }
778                | Predicate::JsonPathFusedTimestampCmp { .. }
779                | Predicate::JsonPathFusedBoolEq { .. }
780                | Predicate::JsonPathFusedIn { .. }
781                | Predicate::EdgePropertyEq { .. }
782                | Predicate::EdgePropertyCompare { .. } => {
783                    // Fusable — already injected into base_candidates by
784                    // `partition_search_filters`. Edge property predicates
785                    // are not valid in the main query path.
786                }
787            }
788        }
789    }
790
791    let _ = write!(&mut sql, "\nLIMIT {final_limit}");
792
793    if binds.len() > MAX_BIND_PARAMETERS {
794        return Err(CompileError::TooManyBindParameters(binds.len()));
795    }
796
797    // Pack F1.75: extract the optional semantic-search / raw-vector-search
798    // sidecars so the coordinator's `execute_compiled_read` dispatcher can
799    // route to the dedicated executors instead of running the plain-node
800    // scan above. Per Pack F1's v1 decision, these surfaces do not compose
801    // with filter fusion — any filters on the AST are ignored here.
802    let mut semantic_search: Option<CompiledSemanticSearch> = None;
803    let mut raw_vector_search: Option<CompiledRawVectorSearch> = None;
804    for step in &ast.steps {
805        match step {
806            QueryStep::SemanticSearch { text, limit } => {
807                semantic_search = Some(CompiledSemanticSearch {
808                    root_kind: ast.root_kind.clone(),
809                    text: text.clone(),
810                    limit: *limit,
811                });
812            }
813            QueryStep::RawVectorSearch { vec, limit } => {
814                raw_vector_search = Some(CompiledRawVectorSearch {
815                    root_kind: ast.root_kind.clone(),
816                    vec: vec.clone(),
817                    limit: *limit,
818                });
819            }
820            _ => {}
821        }
822    }
823
824    Ok(CompiledQuery {
825        sql,
826        binds,
827        shape_hash,
828        driving_table,
829        hints,
830        semantic_search,
831        raw_vector_search,
832    })
833}
834
835/// Compile a [`QueryAst`] into a [`CompiledGroupedQuery`] for grouped execution.
836///
837/// # Errors
838///
839/// Returns a [`CompileError`] if the AST exceeds expansion-slot limits,
840/// contains empty slot names, or specifies a traversal depth beyond the
841/// configured maximum.
842pub fn compile_grouped_query(ast: &QueryAst) -> Result<CompiledGroupedQuery, CompileError> {
843    if ast.expansions.len() > MAX_EXPANSION_SLOTS {
844        return Err(CompileError::TooManyExpansionSlots(ast.expansions.len()));
845    }
846
847    let mut seen = std::collections::BTreeSet::new();
848    for expansion in &ast.expansions {
849        if expansion.slot.trim().is_empty() {
850            return Err(CompileError::EmptyExpansionSlotName);
851        }
852        if expansion.max_depth > MAX_TRAVERSAL_DEPTH {
853            return Err(CompileError::TraversalTooDeep(expansion.max_depth));
854        }
855        if !seen.insert(expansion.slot.clone()) {
856            return Err(CompileError::DuplicateExpansionSlot(expansion.slot.clone()));
857        }
858    }
859    for edge_expansion in &ast.edge_expansions {
860        if edge_expansion.slot.trim().is_empty() {
861            return Err(CompileError::EmptyExpansionSlotName);
862        }
863        if edge_expansion.max_depth > MAX_TRAVERSAL_DEPTH {
864            return Err(CompileError::TraversalTooDeep(edge_expansion.max_depth));
865        }
866        if !seen.insert(edge_expansion.slot.clone()) {
867            return Err(CompileError::DuplicateExpansionSlot(
868                edge_expansion.slot.clone(),
869            ));
870        }
871    }
872
873    let mut root_ast = ast.clone();
874    root_ast.expansions.clear();
875    let root = compile_query(&root_ast)?;
876    let hints = execution_hints(ast);
877    let shape_hash = ShapeHash(hash_signature(&shape_signature(ast)));
878
879    Ok(CompiledGroupedQuery {
880        root,
881        expansions: ast.expansions.clone(),
882        edge_expansions: ast.edge_expansions.clone(),
883        shape_hash,
884        hints,
885    })
886}
887
888/// Compile a [`QueryAst`] into a [`CompiledSearch`] describing an adaptive
889/// text-search execution.
890///
891/// Unlike [`compile_query`], this path does not emit SQL directly: the
892/// coordinator owns the search SELECT so it can project the richer row shape
893/// (score, source, snippet, projection id) that flat queries do not need.
894///
895/// # Errors
896///
897/// Returns [`CompileError::MissingTextSearchStep`] if the AST contains no
898/// [`QueryStep::TextSearch`] step.
899pub fn compile_search(ast: &QueryAst) -> Result<CompiledSearch, CompileError> {
900    let mut text_query = None;
901    let mut limit = None;
902    for step in &ast.steps {
903        match step {
904            QueryStep::TextSearch {
905                query,
906                limit: step_limit,
907            } => {
908                text_query = Some(query.clone());
909                limit = Some(*step_limit);
910            }
911            QueryStep::Filter(_)
912            | QueryStep::Search { .. }
913            | QueryStep::VectorSearch { .. }
914            | QueryStep::SemanticSearch { .. }
915            | QueryStep::RawVectorSearch { .. }
916            | QueryStep::Traverse { .. } => {
917                // Filter steps are partitioned below; Search/Vector/Traverse
918                // steps are not composable with text search in the adaptive
919                // surface yet.
920            }
921        }
922    }
923    let text_query = text_query.ok_or(CompileError::MissingTextSearchStep)?;
924    let limit = limit.unwrap_or(25);
925    let (fusable_filters, residual_filters) = partition_search_filters(&ast.steps);
926    Ok(CompiledSearch {
927        root_kind: ast.root_kind.clone(),
928        text_query,
929        limit,
930        fusable_filters,
931        residual_filters,
932        attribution_requested: false,
933    })
934}
935
936/// Compile a [`QueryAst`] into a [`CompiledSearchPlan`] whose strict branch
937/// is the user's [`TextQuery`] and whose relaxed branch is derived via
938/// [`derive_relaxed`].
939///
940/// Reserved for Phase 7 SDK bindings that will construct plans from typed
941/// AST fragments. The coordinator currently builds its adaptive plan
942/// directly inside `execute_compiled_search` from an already-compiled
943/// [`CompiledSearch`], so this helper has no in-tree caller; it is kept
944/// as a public entry point for forthcoming surface bindings.
945///
946/// # Errors
947/// Returns [`CompileError::MissingTextSearchStep`] if the AST contains no
948/// [`QueryStep::TextSearch`] step.
949#[doc(hidden)]
950pub fn compile_search_plan(ast: &QueryAst) -> Result<CompiledSearchPlan, CompileError> {
951    let strict = compile_search(ast)?;
952    let (relaxed_query, was_degraded_at_plan_time) = derive_relaxed(&strict.text_query);
953    let relaxed = relaxed_query.map(|q| CompiledSearch {
954        root_kind: strict.root_kind.clone(),
955        text_query: q,
956        limit: strict.limit,
957        fusable_filters: strict.fusable_filters.clone(),
958        residual_filters: strict.residual_filters.clone(),
959        attribution_requested: strict.attribution_requested,
960    });
961    Ok(CompiledSearchPlan {
962        strict,
963        relaxed,
964        was_degraded_at_plan_time,
965    })
966}
967
968/// Compile a caller-provided strict/relaxed [`TextQuery`] pair into a
969/// [`CompiledSearchPlan`] against a [`QueryAst`] that supplies the kind
970/// root, filters, and limit.
971///
972/// This is the two-query entry point used by `Engine::fallback_search`. The
973/// caller's relaxed [`TextQuery`] is used verbatim — it is NOT passed through
974/// [`derive_relaxed`], and the 4-alternative
975/// [`crate::RELAXED_BRANCH_CAP`] is NOT applied. As a result
976/// [`CompiledSearchPlan::was_degraded_at_plan_time`] is always `false` on
977/// this path.
978///
979/// The AST supplies:
980///  - `root_kind` — reused for both branches
981///  - filter steps — partitioned once via [`partition_search_filters`] and
982///    shared unchanged across both branches
983///  - `limit` from the text-search step (or the default used by
984///    [`compile_search`]) when present; if the AST has no `TextSearch` step,
985///    the caller-supplied `limit` is used
986///
987/// Any `TextSearch` step already on the AST is IGNORED — `strict` and
988/// `relaxed` come from the caller. `Vector`/`Traverse` steps are also
989/// ignored for symmetry with [`compile_search`].
990///
991/// # Errors
992/// Returns [`CompileError`] if filter partitioning produces an unsupported
993/// shape (currently none; reserved for forward compatibility).
994pub fn compile_search_plan_from_queries(
995    ast: &QueryAst,
996    strict: TextQuery,
997    relaxed: Option<TextQuery>,
998    limit: usize,
999    attribution_requested: bool,
1000) -> Result<CompiledSearchPlan, CompileError> {
1001    let (fusable_filters, residual_filters) = partition_search_filters(&ast.steps);
1002    let strict_compiled = CompiledSearch {
1003        root_kind: ast.root_kind.clone(),
1004        text_query: strict,
1005        limit,
1006        fusable_filters: fusable_filters.clone(),
1007        residual_filters: residual_filters.clone(),
1008        attribution_requested,
1009    };
1010    let relaxed_compiled = relaxed.map(|q| CompiledSearch {
1011        root_kind: ast.root_kind.clone(),
1012        text_query: q,
1013        limit,
1014        fusable_filters,
1015        residual_filters,
1016        attribution_requested,
1017    });
1018    Ok(CompiledSearchPlan {
1019        strict: strict_compiled,
1020        relaxed: relaxed_compiled,
1021        was_degraded_at_plan_time: false,
1022    })
1023}
1024
1025/// Compile a [`QueryAst`] into a [`CompiledVectorSearch`] describing a
1026/// vector-only retrieval execution.
1027///
1028/// Mirrors [`compile_search`] structurally. The AST must contain exactly one
1029/// [`QueryStep::VectorSearch`] step; filters following the search step are
1030/// partitioned by [`partition_search_filters`] into fusable and residual
1031/// sets. Unlike [`compile_search`] this path does not produce a
1032/// [`TextQuery`]; the caller's raw query string is preserved verbatim for
1033/// the coordinator to bind to `embedding MATCH ?`.
1034///
1035/// # Errors
1036///
1037/// Returns [`CompileError::MissingVectorSearchStep`] if the AST contains no
1038/// [`QueryStep::VectorSearch`] step.
1039pub fn compile_vector_search(ast: &QueryAst) -> Result<CompiledVectorSearch, CompileError> {
1040    let mut query_text = None;
1041    let mut limit = None;
1042    for step in &ast.steps {
1043        match step {
1044            QueryStep::VectorSearch {
1045                query,
1046                limit: step_limit,
1047            } => {
1048                query_text = Some(query.clone());
1049                limit = Some(*step_limit);
1050            }
1051            QueryStep::Filter(_)
1052            | QueryStep::Search { .. }
1053            | QueryStep::TextSearch { .. }
1054            | QueryStep::SemanticSearch { .. }
1055            | QueryStep::RawVectorSearch { .. }
1056            | QueryStep::Traverse { .. } => {
1057                // Filter steps are partitioned below; Search/TextSearch/
1058                // Traverse steps are not composable with vector search in
1059                // the standalone vector retrieval path.
1060            }
1061        }
1062    }
1063    let query_text = query_text.ok_or(CompileError::MissingVectorSearchStep)?;
1064    let limit = limit.unwrap_or(25);
1065    let (fusable_filters, residual_filters) = partition_search_filters(&ast.steps);
1066    Ok(CompiledVectorSearch {
1067        root_kind: ast.root_kind.clone(),
1068        query_text,
1069        limit,
1070        fusable_filters,
1071        residual_filters,
1072        attribution_requested: false,
1073    })
1074}
1075
1076/// Compile a [`QueryAst`] containing a [`QueryStep::Search`] into a
1077/// [`CompiledRetrievalPlan`] describing the bounded set of retrieval branches
1078/// the Phase 12 planner may run.
1079///
1080/// The raw query string carried by the `Search` step is parsed into a
1081/// strict [`TextQuery`] (via [`TextQuery::parse`]) and a relaxed sibling is
1082/// derived via [`derive_relaxed`]. Both branches share the post-search
1083/// fusable/residual filter partition. The resulting
1084/// [`CompiledRetrievalPlan::text`] field carries them in the same Phase 6
1085/// [`CompiledSearchPlan`] shape as `text_search()` / `fallback_search()`.
1086///
1087/// **v1 scope**: `vector` is unconditionally `None`. Read-time embedding of
1088/// natural-language queries is not wired in v1; see
1089/// [`CompiledRetrievalPlan`] for the rationale and the future-phase plan.
1090/// Callers who need vector retrieval today must use the `vector_search()`
1091/// override directly with a caller-provided vector literal.
1092///
1093/// # Errors
1094///
1095/// Returns [`CompileError::MissingSearchStep`] if the AST contains no
1096/// [`QueryStep::Search`] step, or
1097/// [`CompileError::MultipleSearchSteps`] if the AST contains more than one.
1098pub fn compile_retrieval_plan(ast: &QueryAst) -> Result<CompiledRetrievalPlan, CompileError> {
1099    let mut raw_query: Option<&str> = None;
1100    let mut limit: Option<usize> = None;
1101    for step in &ast.steps {
1102        if let QueryStep::Search {
1103            query,
1104            limit: step_limit,
1105        } = step
1106        {
1107            if raw_query.is_some() {
1108                return Err(CompileError::MultipleSearchSteps);
1109            }
1110            raw_query = Some(query.as_str());
1111            limit = Some(*step_limit);
1112        }
1113    }
1114    let raw_query = raw_query.ok_or(CompileError::MissingSearchStep)?;
1115    let limit = limit.unwrap_or(25);
1116
1117    let strict_text_query = TextQuery::parse(raw_query);
1118    let (relaxed_text_query, was_degraded_at_plan_time) = derive_relaxed(&strict_text_query);
1119
1120    let (fusable_filters, residual_filters) = partition_search_filters(&ast.steps);
1121
1122    let strict = CompiledSearch {
1123        root_kind: ast.root_kind.clone(),
1124        text_query: strict_text_query,
1125        limit,
1126        fusable_filters: fusable_filters.clone(),
1127        residual_filters: residual_filters.clone(),
1128        attribution_requested: false,
1129    };
1130    let relaxed = relaxed_text_query.map(|q| CompiledSearch {
1131        root_kind: ast.root_kind.clone(),
1132        text_query: q,
1133        limit,
1134        fusable_filters,
1135        residual_filters,
1136        attribution_requested: false,
1137    });
1138    let text = CompiledSearchPlan {
1139        strict,
1140        relaxed,
1141        was_degraded_at_plan_time,
1142    };
1143
1144    // v1 scope (Phase 12): the planner's vector branch slot is structurally
1145    // present on `CompiledRetrievalPlan` so the coordinator's three-block
1146    // fusion path is fully wired, but read-time embedding of natural-language
1147    // queries is deliberately deferred to a future phase. `compile_retrieval_plan`
1148    // therefore always leaves `vector = None`; callers who want vector
1149    // retrieval today must use `vector_search()` directly with a caller-
1150    // provided vector literal.
1151    Ok(CompiledRetrievalPlan {
1152        text,
1153        vector: None,
1154        was_degraded_at_plan_time,
1155    })
1156}
1157
1158/// FNV-1a 64-bit hash — deterministic across Rust versions and program
1159/// invocations, unlike `DefaultHasher`.
1160fn hash_signature(signature: &str) -> u64 {
1161    const OFFSET: u64 = 0xcbf2_9ce4_8422_2325;
1162    const PRIME: u64 = 0x0000_0100_0000_01b3;
1163    let mut hash = OFFSET;
1164    for byte in signature.bytes() {
1165        hash ^= u64::from(byte);
1166        hash = hash.wrapping_mul(PRIME);
1167    }
1168    hash
1169}
1170
1171#[cfg(test)]
1172#[allow(clippy::expect_used, clippy::items_after_statements, deprecated)]
1173mod tests {
1174    use rstest::rstest;
1175
1176    use crate::{
1177        CompileError, DrivingTable, QueryBuilder, TraverseDirection, compile_grouped_query,
1178        compile_query,
1179    };
1180
1181    #[test]
1182    fn vector_query_compiles_to_chunk_resolution() {
1183        let compiled = compile_query(
1184            &QueryBuilder::nodes("Meeting")
1185                .vector_search("budget", 5)
1186                .limit(5)
1187                .into_ast(),
1188        )
1189        .expect("compiled query");
1190
1191        assert_eq!(compiled.driving_table, DrivingTable::VecNodes);
1192        assert!(compiled.sql.contains("JOIN chunks c ON c.id = vc.chunk_id"));
1193        assert!(
1194            compiled
1195                .sql
1196                .contains("JOIN nodes src ON src.logical_id = c.node_logical_id")
1197        );
1198    }
1199
1200    #[rstest]
1201    #[case(5, 7)]
1202    #[case(3, 11)]
1203    fn structural_limits_change_shape_hash(#[case] left: usize, #[case] right: usize) {
1204        let left_compiled = compile_query(
1205            &QueryBuilder::nodes("Meeting")
1206                .text_search("budget", left)
1207                .limit(left)
1208                .into_ast(),
1209        )
1210        .expect("left query");
1211        let right_compiled = compile_query(
1212            &QueryBuilder::nodes("Meeting")
1213                .text_search("budget", right)
1214                .limit(right)
1215                .into_ast(),
1216        )
1217        .expect("right query");
1218
1219        assert_ne!(left_compiled.shape_hash, right_compiled.shape_hash);
1220    }
1221
1222    #[test]
1223    fn traversal_query_is_depth_bounded() {
1224        let compiled = compile_query(
1225            &QueryBuilder::nodes("Meeting")
1226                .text_search("budget", 5)
1227                .traverse(TraverseDirection::Out, "HAS_TASK", 3)
1228                .limit(10)
1229                .into_ast(),
1230        )
1231        .expect("compiled traversal");
1232
1233        assert!(compiled.sql.contains("WITH RECURSIVE"));
1234        assert!(compiled.sql.contains("WHERE t.depth < 3"));
1235    }
1236
1237    #[test]
1238    fn text_search_compiles_to_union_over_chunk_and_property_fts() {
1239        let compiled = compile_query(
1240            &QueryBuilder::nodes("Meeting")
1241                .text_search("budget", 25)
1242                .limit(25)
1243                .into_ast(),
1244        )
1245        .expect("compiled text search");
1246
1247        assert_eq!(compiled.driving_table, DrivingTable::FtsNodes);
1248        // Must contain UNION of both FTS tables.
1249        assert!(
1250            compiled.sql.contains("fts_nodes MATCH"),
1251            "must search chunk-backed FTS"
1252        );
1253        assert!(
1254            compiled.sql.contains("fts_node_properties MATCH"),
1255            "must search property-backed FTS"
1256        );
1257        assert!(compiled.sql.contains("UNION"), "must UNION both sources");
1258        // Must have 4 bind parameters: sanitized query + kind for each table.
1259        assert_eq!(compiled.binds.len(), 4);
1260    }
1261
1262    #[test]
1263    fn logical_id_filter_is_compiled() {
1264        let compiled = compile_query(
1265            &QueryBuilder::nodes("Meeting")
1266                .filter_logical_id_eq("meeting-123")
1267                .filter_json_text_eq("$.status", "active")
1268                .limit(1)
1269                .into_ast(),
1270        )
1271        .expect("compiled query");
1272
1273        // LogicalIdEq is applied in base_candidates (src alias) for the Nodes driver,
1274        // NOT duplicated in the final WHERE. The JOIN condition still contains
1275        // "n.logical_id =" which satisfies this check.
1276        assert!(compiled.sql.contains("n.logical_id ="));
1277        assert!(compiled.sql.contains("src.logical_id ="));
1278        assert!(compiled.sql.contains("json_extract"));
1279        // Only one bind for the logical_id (not two).
1280        use crate::BindValue;
1281        assert_eq!(
1282            compiled
1283                .binds
1284                .iter()
1285                .filter(|b| matches!(b, BindValue::Text(s) if s == "meeting-123"))
1286                .count(),
1287            1
1288        );
1289    }
1290
1291    #[test]
1292    fn compile_rejects_invalid_json_path() {
1293        use crate::{Predicate, QueryStep, ScalarValue};
1294        let mut ast = QueryBuilder::nodes("Meeting").into_ast();
1295        // Attempt SQL injection via JSON path.
1296        ast.steps.push(QueryStep::Filter(Predicate::JsonPathEq {
1297            path: "$') OR 1=1 --".to_owned(),
1298            value: ScalarValue::Text("x".to_owned()),
1299        }));
1300        use crate::CompileError;
1301        let result = compile_query(&ast);
1302        assert!(
1303            matches!(result, Err(CompileError::InvalidJsonPath(_))),
1304            "expected InvalidJsonPath, got {result:?}"
1305        );
1306    }
1307
1308    #[test]
1309    fn compile_accepts_valid_json_paths() {
1310        use crate::{Predicate, QueryStep, ScalarValue};
1311        for valid_path in ["$.status", "$.foo.bar", "$.a_b.c2"] {
1312            let mut ast = QueryBuilder::nodes("Meeting").into_ast();
1313            ast.steps.push(QueryStep::Filter(Predicate::JsonPathEq {
1314                path: valid_path.to_owned(),
1315                value: ScalarValue::Text("v".to_owned()),
1316            }));
1317            assert!(
1318                compile_query(&ast).is_ok(),
1319                "expected valid path {valid_path:?} to compile"
1320            );
1321        }
1322    }
1323
1324    #[test]
1325    fn compile_rejects_too_many_bind_parameters() {
1326        use crate::{Predicate, QueryStep, ScalarValue};
1327        let mut ast = QueryBuilder::nodes("Meeting").into_ast();
1328        // kind occupies 1 bind; each json filter now occupies 2 binds (path + value).
1329        // 7 json filters → 1 + 14 = 15 (ok), 8 → 1 + 16 = 17 (exceeds limit of 15).
1330        for i in 0..8 {
1331            ast.steps.push(QueryStep::Filter(Predicate::JsonPathEq {
1332                path: format!("$.f{i}"),
1333                value: ScalarValue::Text("v".to_owned()),
1334            }));
1335        }
1336        use crate::CompileError;
1337        let result = compile_query(&ast);
1338        assert!(
1339            matches!(result, Err(CompileError::TooManyBindParameters(17))),
1340            "expected TooManyBindParameters(17), got {result:?}"
1341        );
1342    }
1343
1344    #[test]
1345    fn compile_rejects_excessive_traversal_depth() {
1346        let result = compile_query(
1347            &QueryBuilder::nodes("Meeting")
1348                .text_search("budget", 5)
1349                .traverse(TraverseDirection::Out, "HAS_TASK", 51)
1350                .limit(10)
1351                .into_ast(),
1352        );
1353        assert!(
1354            matches!(result, Err(CompileError::TraversalTooDeep(51))),
1355            "expected TraversalTooDeep(51), got {result:?}"
1356        );
1357    }
1358
1359    #[test]
1360    fn grouped_queries_with_same_structure_share_shape_hash() {
1361        let left = compile_grouped_query(
1362            &QueryBuilder::nodes("Meeting")
1363                .text_search("budget", 5)
1364                .expand("tasks", TraverseDirection::Out, "HAS_TASK", 1, None, None)
1365                .limit(10)
1366                .into_ast(),
1367        )
1368        .expect("left grouped query");
1369        let right = compile_grouped_query(
1370            &QueryBuilder::nodes("Meeting")
1371                .text_search("planning", 5)
1372                .expand("tasks", TraverseDirection::Out, "HAS_TASK", 1, None, None)
1373                .limit(10)
1374                .into_ast(),
1375        )
1376        .expect("right grouped query");
1377
1378        assert_eq!(left.shape_hash, right.shape_hash);
1379    }
1380
1381    #[test]
1382    fn compile_grouped_rejects_duplicate_expansion_slot_names() {
1383        let result = compile_grouped_query(
1384            &QueryBuilder::nodes("Meeting")
1385                .expand("tasks", TraverseDirection::Out, "HAS_TASK", 1, None, None)
1386                .expand(
1387                    "tasks",
1388                    TraverseDirection::Out,
1389                    "HAS_DECISION",
1390                    1,
1391                    None,
1392                    None,
1393                )
1394                .into_ast(),
1395        );
1396
1397        assert!(
1398            matches!(result, Err(CompileError::DuplicateExpansionSlot(ref slot)) if slot == "tasks"),
1399            "expected DuplicateExpansionSlot(\"tasks\"), got {result:?}"
1400        );
1401    }
1402
1403    #[test]
1404    fn flat_compile_rejects_queries_with_expansions() {
1405        let result = compile_query(
1406            &QueryBuilder::nodes("Meeting")
1407                .expand("tasks", TraverseDirection::Out, "HAS_TASK", 1, None, None)
1408                .into_ast(),
1409        );
1410
1411        assert!(
1412            matches!(
1413                result,
1414                Err(CompileError::FlatCompileDoesNotSupportExpansions)
1415            ),
1416            "expected FlatCompileDoesNotSupportExpansions, got {result:?}"
1417        );
1418    }
1419
1420    #[test]
1421    fn json_path_compiled_as_bind_parameter() {
1422        let compiled = compile_query(
1423            &QueryBuilder::nodes("Meeting")
1424                .filter_json_text_eq("$.status", "active")
1425                .limit(1)
1426                .into_ast(),
1427        )
1428        .expect("compiled query");
1429
1430        // Path must be parameterized, not interpolated into the SQL string.
1431        assert!(
1432            !compiled.sql.contains("'$.status'"),
1433            "JSON path must not appear as a SQL string literal"
1434        );
1435        assert!(
1436            compiled.sql.contains("json_extract(src.properties, ?"),
1437            "JSON path must be a bind parameter (pushed into base_candidates for Nodes driver)"
1438        );
1439        // Path and value should both be in the bind list.
1440        use crate::BindValue;
1441        assert!(
1442            compiled
1443                .binds
1444                .iter()
1445                .any(|b| matches!(b, BindValue::Text(s) if s == "$.status"))
1446        );
1447        assert!(
1448            compiled
1449                .binds
1450                .iter()
1451                .any(|b| matches!(b, BindValue::Text(s) if s == "active"))
1452        );
1453    }
1454
1455    // --- Filter pushdown regression tests ---
1456    //
1457    // These tests verify that filter predicates are pushed into the
1458    // base_candidates CTE for the Nodes driving table, so the CTE LIMIT
1459    // applies after filtering rather than before.  Without pushdown, the
1460    // LIMIT may truncate the candidate set before the filter runs, causing
1461    // matching nodes to be silently excluded.
1462
1463    #[test]
1464    fn nodes_driver_pushes_json_eq_filter_into_base_candidates() {
1465        let compiled = compile_query(
1466            &QueryBuilder::nodes("Meeting")
1467                .filter_json_text_eq("$.status", "active")
1468                .limit(5)
1469                .into_ast(),
1470        )
1471        .expect("compiled query");
1472
1473        assert_eq!(compiled.driving_table, DrivingTable::Nodes);
1474        // Filter must appear inside base_candidates (src alias), not the
1475        // outer WHERE (n alias).
1476        assert!(
1477            compiled.sql.contains("json_extract(src.properties, ?"),
1478            "json_extract must reference src (base_candidates), got:\n{}",
1479            compiled.sql,
1480        );
1481        assert!(
1482            !compiled.sql.contains("json_extract(n.properties, ?"),
1483            "json_extract must NOT appear in outer WHERE for Nodes driver, got:\n{}",
1484            compiled.sql,
1485        );
1486    }
1487
1488    #[test]
1489    fn nodes_driver_pushes_json_compare_filter_into_base_candidates() {
1490        let compiled = compile_query(
1491            &QueryBuilder::nodes("Meeting")
1492                .filter_json_integer_gte("$.priority", 5)
1493                .limit(10)
1494                .into_ast(),
1495        )
1496        .expect("compiled query");
1497
1498        assert_eq!(compiled.driving_table, DrivingTable::Nodes);
1499        assert!(
1500            compiled.sql.contains("json_extract(src.properties, ?"),
1501            "comparison filter must be in base_candidates, got:\n{}",
1502            compiled.sql,
1503        );
1504        assert!(
1505            !compiled.sql.contains("json_extract(n.properties, ?"),
1506            "comparison filter must NOT be in outer WHERE for Nodes driver",
1507        );
1508        assert!(
1509            compiled.sql.contains(">= ?"),
1510            "expected >= operator in SQL, got:\n{}",
1511            compiled.sql,
1512        );
1513    }
1514
1515    #[test]
1516    fn nodes_driver_pushes_source_ref_filter_into_base_candidates() {
1517        let compiled = compile_query(
1518            &QueryBuilder::nodes("Meeting")
1519                .filter_source_ref_eq("ref-123")
1520                .limit(5)
1521                .into_ast(),
1522        )
1523        .expect("compiled query");
1524
1525        assert_eq!(compiled.driving_table, DrivingTable::Nodes);
1526        assert!(
1527            compiled.sql.contains("src.source_ref = ?"),
1528            "source_ref filter must be in base_candidates, got:\n{}",
1529            compiled.sql,
1530        );
1531        assert!(
1532            !compiled.sql.contains("n.source_ref = ?"),
1533            "source_ref filter must NOT be in outer WHERE for Nodes driver",
1534        );
1535    }
1536
1537    #[test]
1538    fn nodes_driver_pushes_multiple_filters_into_base_candidates() {
1539        let compiled = compile_query(
1540            &QueryBuilder::nodes("Meeting")
1541                .filter_logical_id_eq("meeting-1")
1542                .filter_json_text_eq("$.status", "active")
1543                .filter_json_integer_gte("$.priority", 5)
1544                .filter_source_ref_eq("ref-abc")
1545                .limit(1)
1546                .into_ast(),
1547        )
1548        .expect("compiled query");
1549
1550        assert_eq!(compiled.driving_table, DrivingTable::Nodes);
1551        // All filters should be in base_candidates, none in outer WHERE
1552        assert!(
1553            compiled.sql.contains("src.logical_id = ?"),
1554            "logical_id filter must be in base_candidates",
1555        );
1556        assert!(
1557            compiled.sql.contains("json_extract(src.properties, ?"),
1558            "JSON filters must be in base_candidates",
1559        );
1560        assert!(
1561            compiled.sql.contains("src.source_ref = ?"),
1562            "source_ref filter must be in base_candidates",
1563        );
1564        // Each bind value should appear exactly once (not duplicated in outer WHERE)
1565        use crate::BindValue;
1566        assert_eq!(
1567            compiled
1568                .binds
1569                .iter()
1570                .filter(|b| matches!(b, BindValue::Text(s) if s == "meeting-1"))
1571                .count(),
1572            1,
1573            "logical_id bind must not be duplicated"
1574        );
1575        assert_eq!(
1576            compiled
1577                .binds
1578                .iter()
1579                .filter(|b| matches!(b, BindValue::Text(s) if s == "ref-abc"))
1580                .count(),
1581            1,
1582            "source_ref bind must not be duplicated"
1583        );
1584    }
1585
1586    #[test]
1587    fn fts_driver_keeps_json_filter_residual_but_fuses_kind() {
1588        // Phase 2: JSON filters are residual (stay in outer WHERE); KindEq is
1589        // fusable (pushed into base_candidates so the CTE LIMIT applies after
1590        // filtering).
1591        let compiled = compile_query(
1592            &QueryBuilder::nodes("Meeting")
1593                .text_search("budget", 5)
1594                .filter_json_text_eq("$.status", "active")
1595                .filter_kind_eq("Meeting")
1596                .limit(5)
1597                .into_ast(),
1598        )
1599        .expect("compiled query");
1600
1601        assert_eq!(compiled.driving_table, DrivingTable::FtsNodes);
1602        // Residual: JSON predicate stays in outer WHERE on n.properties.
1603        assert!(
1604            compiled.sql.contains("json_extract(n.properties, ?"),
1605            "JSON filter must stay residual in outer WHERE, got:\n{}",
1606            compiled.sql,
1607        );
1608        // Fusable: the second n.kind bind should live inside base_candidates.
1609        // The CTE block ends before the final SELECT.
1610        let (cte, outer) = compiled
1611            .sql
1612            .split_once("SELECT DISTINCT n.row_id")
1613            .expect("query has final SELECT");
1614        assert!(
1615            cte.contains("AND n.kind = ?"),
1616            "KindEq must be fused inside base_candidates CTE, got CTE:\n{cte}"
1617        );
1618        // Outer WHERE must not contain a duplicate n.kind filter.
1619        assert!(
1620            !outer.contains("AND n.kind = ?"),
1621            "KindEq must NOT appear in outer WHERE for FTS driver, got outer:\n{outer}"
1622        );
1623    }
1624
1625    #[test]
1626    fn fts_driver_fuses_kind_filter() {
1627        let compiled = compile_query(
1628            &QueryBuilder::nodes("Goal")
1629                .text_search("budget", 5)
1630                .filter_kind_eq("Goal")
1631                .limit(5)
1632                .into_ast(),
1633        )
1634        .expect("compiled query");
1635
1636        assert_eq!(compiled.driving_table, DrivingTable::FtsNodes);
1637        let (cte, outer) = compiled
1638            .sql
1639            .split_once("SELECT DISTINCT n.row_id")
1640            .expect("query has final SELECT");
1641        assert!(
1642            cte.contains("AND n.kind = ?"),
1643            "KindEq must be fused inside base_candidates, got:\n{cte}"
1644        );
1645        assert!(
1646            !outer.contains("AND n.kind = ?"),
1647            "KindEq must NOT be in outer WHERE, got:\n{outer}"
1648        );
1649    }
1650
1651    #[test]
1652    fn vec_driver_fuses_kind_filter() {
1653        let compiled = compile_query(
1654            &QueryBuilder::nodes("Goal")
1655                .vector_search("budget", 5)
1656                .filter_kind_eq("Goal")
1657                .limit(5)
1658                .into_ast(),
1659        )
1660        .expect("compiled query");
1661
1662        assert_eq!(compiled.driving_table, DrivingTable::VecNodes);
1663        let (cte, outer) = compiled
1664            .sql
1665            .split_once("SELECT DISTINCT n.row_id")
1666            .expect("query has final SELECT");
1667        assert!(
1668            cte.contains("AND src.kind = ?"),
1669            "KindEq must be fused inside base_candidates, got:\n{cte}"
1670        );
1671        assert!(
1672            !outer.contains("AND n.kind = ?"),
1673            "KindEq must NOT be in outer WHERE, got:\n{outer}"
1674        );
1675    }
1676
1677    #[test]
1678    fn fts5_query_bind_uses_rendered_literals() {
1679        let compiled = compile_query(
1680            &QueryBuilder::nodes("Meeting")
1681                .text_search("User's name", 5)
1682                .limit(5)
1683                .into_ast(),
1684        )
1685        .expect("compiled query");
1686
1687        use crate::BindValue;
1688        assert!(
1689            compiled
1690                .binds
1691                .iter()
1692                .any(|b| matches!(b, BindValue::Text(s) if s == "\"User's\" \"name\"")),
1693            "FTS5 query bind should use rendered literal terms; got {:?}",
1694            compiled.binds
1695        );
1696    }
1697
1698    #[test]
1699    fn fts5_query_bind_supports_or_operator() {
1700        let compiled = compile_query(
1701            &QueryBuilder::nodes("Meeting")
1702                .text_search("ship OR docs", 5)
1703                .limit(5)
1704                .into_ast(),
1705        )
1706        .expect("compiled query");
1707
1708        use crate::BindValue;
1709        assert!(
1710            compiled
1711                .binds
1712                .iter()
1713                .any(|b| matches!(b, BindValue::Text(s) if s == "\"ship\" OR \"docs\"")),
1714            "FTS5 query bind should preserve supported OR; got {:?}",
1715            compiled.binds
1716        );
1717    }
1718
1719    #[test]
1720    fn fts5_query_bind_supports_not_operator() {
1721        let compiled = compile_query(
1722            &QueryBuilder::nodes("Meeting")
1723                .text_search("ship NOT blocked", 5)
1724                .limit(5)
1725                .into_ast(),
1726        )
1727        .expect("compiled query");
1728
1729        use crate::BindValue;
1730        assert!(
1731            compiled
1732                .binds
1733                .iter()
1734                .any(|b| matches!(b, BindValue::Text(s) if s == "\"ship\" NOT \"blocked\"")),
1735            "FTS5 query bind should preserve supported NOT; got {:?}",
1736            compiled.binds
1737        );
1738    }
1739
1740    #[test]
1741    fn fts5_query_bind_literalizes_clause_leading_not() {
1742        let compiled = compile_query(
1743            &QueryBuilder::nodes("Meeting")
1744                .text_search("NOT blocked", 5)
1745                .limit(5)
1746                .into_ast(),
1747        )
1748        .expect("compiled query");
1749
1750        use crate::BindValue;
1751        assert!(
1752            compiled
1753                .binds
1754                .iter()
1755                .any(|b| matches!(b, BindValue::Text(s) if s == "\"NOT\" \"blocked\"")),
1756            "Clause-leading NOT should degrade to literals; got {:?}",
1757            compiled.binds
1758        );
1759    }
1760
1761    #[test]
1762    fn fts5_query_bind_literalizes_or_not_sequence() {
1763        let compiled = compile_query(
1764            &QueryBuilder::nodes("Meeting")
1765                .text_search("ship OR NOT blocked", 5)
1766                .limit(5)
1767                .into_ast(),
1768        )
1769        .expect("compiled query");
1770
1771        use crate::BindValue;
1772        assert!(
1773            compiled.binds.iter().any(
1774                |b| matches!(b, BindValue::Text(s) if s == "\"ship\" \"OR\" \"NOT\" \"blocked\"")
1775            ),
1776            "`OR NOT` should degrade to literals rather than emit invalid FTS5; got {:?}",
1777            compiled.binds
1778        );
1779    }
1780
1781    #[test]
1782    fn compile_retrieval_plan_accepts_search_step() {
1783        use crate::{
1784            CompileError, Predicate, QueryAst, QueryStep, TextQuery, compile_retrieval_plan,
1785        };
1786        let ast = QueryAst {
1787            root_kind: "Goal".to_owned(),
1788            steps: vec![
1789                QueryStep::Search {
1790                    query: "ship quarterly docs".to_owned(),
1791                    limit: 7,
1792                },
1793                QueryStep::Filter(Predicate::KindEq("Goal".to_owned())),
1794            ],
1795            expansions: vec![],
1796            edge_expansions: vec![],
1797            final_limit: None,
1798        };
1799        let plan = compile_retrieval_plan(&ast).expect("compiles");
1800        assert_eq!(plan.text.strict.root_kind, "Goal");
1801        assert_eq!(plan.text.strict.limit, 7);
1802        // Filter following the Search step must land in the fusable bucket.
1803        assert_eq!(plan.text.strict.fusable_filters.len(), 1);
1804        assert!(plan.text.strict.residual_filters.is_empty());
1805        // Strict text query is the parsed form of the raw string; "ship
1806        // quarterly docs" parses to an implicit AND of three terms.
1807        assert_eq!(
1808            plan.text.strict.text_query,
1809            TextQuery::And(vec![
1810                TextQuery::Term("ship".into()),
1811                TextQuery::Term("quarterly".into()),
1812                TextQuery::Term("docs".into()),
1813            ])
1814        );
1815        // Three-term implicit-AND has a useful relaxation: per-term OR.
1816        let relaxed = plan.text.relaxed.as_ref().expect("relaxed branch present");
1817        assert_eq!(
1818            relaxed.text_query,
1819            TextQuery::Or(vec![
1820                TextQuery::Term("ship".into()),
1821                TextQuery::Term("quarterly".into()),
1822                TextQuery::Term("docs".into()),
1823            ])
1824        );
1825        assert_eq!(relaxed.fusable_filters.len(), 1);
1826        assert!(!plan.was_degraded_at_plan_time);
1827        // CompileError unused in the success path.
1828        let _ = std::any::TypeId::of::<CompileError>();
1829    }
1830
1831    #[test]
1832    fn compile_retrieval_plan_rejects_ast_without_search_step() {
1833        use crate::{CompileError, QueryBuilder, compile_retrieval_plan};
1834        let ast = QueryBuilder::nodes("Goal")
1835            .filter_kind_eq("Goal")
1836            .into_ast();
1837        let result = compile_retrieval_plan(&ast);
1838        assert!(
1839            matches!(result, Err(CompileError::MissingSearchStep)),
1840            "expected MissingSearchStep, got {result:?}"
1841        );
1842    }
1843
1844    #[test]
1845    fn compile_retrieval_plan_rejects_ast_with_multiple_search_steps() {
1846        // P12-N-1: the compiler must not silently last-wins when the caller
1847        // hands it an AST with two `QueryStep::Search` entries. Instead it
1848        // must return an explicit `MultipleSearchSteps` error so the
1849        // mis-shaped AST is surfaced at plan time.
1850        use crate::{CompileError, QueryAst, QueryStep, compile_retrieval_plan};
1851        let ast = QueryAst {
1852            root_kind: "Goal".to_owned(),
1853            steps: vec![
1854                QueryStep::Search {
1855                    query: "alpha".to_owned(),
1856                    limit: 5,
1857                },
1858                QueryStep::Search {
1859                    query: "bravo".to_owned(),
1860                    limit: 10,
1861                },
1862            ],
1863            expansions: vec![],
1864            edge_expansions: vec![],
1865            final_limit: None,
1866        };
1867        let result = compile_retrieval_plan(&ast);
1868        assert!(
1869            matches!(result, Err(CompileError::MultipleSearchSteps)),
1870            "expected MultipleSearchSteps, got {result:?}"
1871        );
1872    }
1873
1874    #[test]
1875    fn compile_retrieval_plan_v1_always_leaves_vector_empty() {
1876        // Phase 12 v1 scope: regardless of the query shape, the unified
1877        // planner never wires a vector branch into the compiled plan
1878        // because read-time embedding of natural-language queries is not
1879        // implemented in v1. Pin the constraint so a future phase that
1880        // wires the embedding generator must explicitly relax this test.
1881        use crate::{QueryAst, QueryStep, compile_retrieval_plan};
1882        for query in ["ship quarterly docs", "single", "", "   "] {
1883            let ast = QueryAst {
1884                root_kind: "Goal".to_owned(),
1885                steps: vec![QueryStep::Search {
1886                    query: query.to_owned(),
1887                    limit: 10,
1888                }],
1889                expansions: vec![],
1890                edge_expansions: vec![],
1891                final_limit: None,
1892            };
1893            let plan = compile_retrieval_plan(&ast).expect("compiles");
1894            assert!(
1895                plan.vector.is_none(),
1896                "Phase 12 v1 must always leave the vector branch empty (query = {query:?})"
1897            );
1898        }
1899    }
1900
1901    #[test]
1902    fn fused_json_text_eq_pushes_into_search_cte_inner_where() {
1903        // Item 7 contract: a fused JSON text-eq predicate on a text search
1904        // is pushed into the `base_candidates` CTE inner WHERE clause so the
1905        // CTE LIMIT applies *after* the filter runs. Compare to
1906        // `filter_json_text_eq` which lands in the outer WHERE as residual.
1907        let mut ast = QueryBuilder::nodes("Goal")
1908            .text_search("budget", 5)
1909            .into_ast();
1910        ast.steps.push(crate::QueryStep::Filter(
1911            crate::Predicate::JsonPathFusedEq {
1912                path: "$.status".to_owned(),
1913                value: "active".to_owned(),
1914            },
1915        ));
1916        let compiled = compile_query(&ast).expect("compile");
1917
1918        // Inner CTE WHERE (under the `n` alias on the chunk/property UNION).
1919        assert!(
1920            compiled.sql.contains("AND json_extract(n.properties, ?"),
1921            "fused json text-eq must land on n.properties inside the CTE; got {}",
1922            compiled.sql
1923        );
1924        // It must NOT also appear in the outer `h.properties` / flat
1925        // projection WHERE — the fusable partition removes it.
1926        assert!(
1927            !compiled.sql.contains("h.properties"),
1928            "sql should not mention h.properties (only compiled_search uses that alias)"
1929        );
1930    }
1931
1932    #[test]
1933    fn fused_json_timestamp_cmp_emits_each_operator() {
1934        for (op, op_str) in [
1935            (crate::ComparisonOp::Gt, ">"),
1936            (crate::ComparisonOp::Gte, ">="),
1937            (crate::ComparisonOp::Lt, "<"),
1938            (crate::ComparisonOp::Lte, "<="),
1939        ] {
1940            let mut ast = QueryBuilder::nodes("Goal")
1941                .text_search("budget", 5)
1942                .into_ast();
1943            ast.steps.push(crate::QueryStep::Filter(
1944                crate::Predicate::JsonPathFusedTimestampCmp {
1945                    path: "$.written_at".to_owned(),
1946                    op,
1947                    value: 1_700_000_000,
1948                },
1949            ));
1950            let compiled = compile_query(&ast).expect("compile");
1951            let needle = "json_extract(n.properties, ?";
1952            assert!(
1953                compiled.sql.contains(needle) && compiled.sql.contains(op_str),
1954                "operator {op_str} must appear in emitted SQL for fused timestamp cmp"
1955            );
1956        }
1957    }
1958
1959    #[test]
1960    fn non_fused_json_filters_still_emit_outer_where() {
1961        // Regression guard: the existing non-fused filter_json_* family
1962        // is unchanged — its predicates continue to be classified as
1963        // residual on search-driven paths and emitted against the outer
1964        // `n.properties` WHERE clause (which is textually identical to
1965        // the inner CTE emission; the difference is *where* in the SQL
1966        // it lives).
1967        let compiled = compile_query(
1968            &QueryBuilder::nodes("Goal")
1969                .text_search("budget", 5)
1970                .filter_json_text_eq("$.status", "active")
1971                .into_ast(),
1972        )
1973        .expect("compile");
1974
1975        // The residual emission lives in the outer SELECT's WHERE and
1976        // targets `n.properties`. Fusion would instead prefix the line
1977        // with `                          AND` (26 spaces) inside the
1978        // CTE. We assert the residual form here by checking the
1979        // leading whitespace on the emitted clause matches the outer
1980        // WHERE indentation ("\n  AND ") rather than the CTE one.
1981        assert!(
1982            compiled
1983                .sql
1984                .contains("\n  AND json_extract(n.properties, ?"),
1985            "non-fused filter_json_text_eq must emit into outer WHERE, got {}",
1986            compiled.sql
1987        );
1988    }
1989
1990    #[test]
1991    fn fused_json_text_eq_pushes_into_vector_cte_inner_where() {
1992        // Mirror of the text-search case for the vector driving path:
1993        // the fused JSON text-eq predicate must land inside the
1994        // `base_candidates` CTE aliased to `src`.
1995        let mut ast = QueryBuilder::nodes("Goal")
1996            .vector_search("budget", 5)
1997            .into_ast();
1998        ast.steps.push(crate::QueryStep::Filter(
1999            crate::Predicate::JsonPathFusedEq {
2000                path: "$.status".to_owned(),
2001                value: "active".to_owned(),
2002            },
2003        ));
2004        let compiled = compile_query(&ast).expect("compile");
2005        assert_eq!(compiled.driving_table, DrivingTable::VecNodes);
2006        assert!(
2007            compiled.sql.contains("AND json_extract(src.properties, ?"),
2008            "fused json text-eq on vector path must land on src.properties, got {}",
2009            compiled.sql
2010        );
2011    }
2012
2013    #[test]
2014    fn fts5_query_bind_preserves_lowercase_not_as_literal_text() {
2015        let compiled = compile_query(
2016            &QueryBuilder::nodes("Meeting")
2017                .text_search("not a ship", 5)
2018                .limit(5)
2019                .into_ast(),
2020        )
2021        .expect("compiled query");
2022
2023        use crate::BindValue;
2024        assert!(
2025            compiled
2026                .binds
2027                .iter()
2028                .any(|b| matches!(b, BindValue::Text(s) if s == "\"not\" \"a\" \"ship\"")),
2029            "Lowercase not should remain a literal term sequence; got {:?}",
2030            compiled.binds
2031        );
2032    }
2033
2034    #[test]
2035    fn traverse_filter_field_accepted_in_ast() {
2036        // Regression test: QueryStep::Traverse must carry an optional filter
2037        // predicate. filter: None must be exactly equivalent to the old
2038        // three-field form. This test fails to compile before Pack 2 lands.
2039        use crate::{Predicate, QueryStep};
2040        let step = QueryStep::Traverse {
2041            direction: TraverseDirection::Out,
2042            label: "HAS_TASK".to_owned(),
2043            max_depth: 1,
2044            filter: None,
2045        };
2046        assert!(matches!(step, QueryStep::Traverse { filter: None, .. }));
2047
2048        let step_with_filter = QueryStep::Traverse {
2049            direction: TraverseDirection::Out,
2050            label: "HAS_TASK".to_owned(),
2051            max_depth: 1,
2052            filter: Some(Predicate::KindEq("Task".to_owned())),
2053        };
2054        assert!(matches!(
2055            step_with_filter,
2056            QueryStep::Traverse {
2057                filter: Some(_),
2058                ..
2059            }
2060        ));
2061    }
2062
2063    #[test]
2064    fn semantic_search_and_raw_vector_search_together_rejected() {
2065        // Pack G: mutual-exclusion compile guard. An AST that carries both a
2066        // SemanticSearch and a RawVectorSearch step must be rejected at
2067        // compile time rather than silently discarding one sidecar.
2068        let ast = QueryBuilder::nodes("KnowledgeItem")
2069            .semantic_search("Acme", 5)
2070            .raw_vector_search(vec![0.1_f32, 0.0, 0.0, 0.0], 5)
2071            .into_ast();
2072        let err = compile_query(&ast).expect_err("must reject conflicting vector steps");
2073        assert!(
2074            matches!(err, CompileError::SemanticAndRawVectorSearchBothPresent),
2075            "expected SemanticAndRawVectorSearchBothPresent, got {err:?}"
2076        );
2077    }
2078}