Skip to main content

fathomdb_query/
compile.rs

1use std::fmt::Write;
2
3use crate::fusion::partition_search_filters;
4use crate::plan::{choose_driving_table, execution_hints, shape_signature};
5use crate::search::{
6    CompiledRetrievalPlan, CompiledSearch, CompiledSearchPlan, CompiledVectorSearch,
7};
8use crate::{
9    ComparisonOp, DrivingTable, ExpansionSlot, Predicate, QueryAst, QueryStep, ScalarValue,
10    TextQuery, TraverseDirection, derive_relaxed, render_text_query_fts5,
11};
12
13/// A typed bind value for a compiled SQL query parameter.
14#[derive(Clone, Debug, PartialEq, Eq)]
15pub enum BindValue {
16    /// A UTF-8 text parameter.
17    Text(String),
18    /// A 64-bit signed integer parameter.
19    Integer(i64),
20    /// A boolean parameter.
21    Bool(bool),
22}
23
24/// A deterministic hash of a query's structural shape, independent of bind values.
25#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
26pub struct ShapeHash(pub u64);
27
28/// A fully compiled query ready for execution against `SQLite`.
29#[derive(Clone, Debug, PartialEq, Eq)]
30pub struct CompiledQuery {
31    /// The generated SQL text.
32    pub sql: String,
33    /// Positional bind parameters for the SQL.
34    pub binds: Vec<BindValue>,
35    /// Structural shape hash for caching.
36    pub shape_hash: ShapeHash,
37    /// The driving table chosen by the query planner.
38    pub driving_table: DrivingTable,
39    /// Execution hints derived from the query shape.
40    pub hints: crate::ExecutionHints,
41}
42
43/// A compiled grouped query containing a root query and expansion slots.
44#[derive(Clone, Debug, PartialEq, Eq)]
45pub struct CompiledGroupedQuery {
46    /// The root flat query.
47    pub root: CompiledQuery,
48    /// Expansion slots to evaluate per root result.
49    pub expansions: Vec<ExpansionSlot>,
50    /// Structural shape hash covering the root query and all expansion slots.
51    pub shape_hash: ShapeHash,
52    /// Execution hints derived from the grouped query shape.
53    pub hints: crate::ExecutionHints,
54}
55
56/// Errors that can occur during query compilation.
57#[derive(Clone, Debug, PartialEq, Eq, thiserror::Error)]
58pub enum CompileError {
59    #[error("multiple traversal steps are not supported in v1")]
60    TooManyTraversals,
61    #[error("flat query compilation does not support expansions; use compile_grouped")]
62    FlatCompileDoesNotSupportExpansions,
63    #[error("duplicate expansion slot name: {0}")]
64    DuplicateExpansionSlot(String),
65    #[error("expansion slot name must be non-empty")]
66    EmptyExpansionSlotName,
67    #[error("too many expansion slots: max {MAX_EXPANSION_SLOTS}, got {0}")]
68    TooManyExpansionSlots(usize),
69    #[error("too many bind parameters: max 15, got {0}")]
70    TooManyBindParameters(usize),
71    #[error("traversal depth {0} exceeds maximum of {MAX_TRAVERSAL_DEPTH}")]
72    TraversalTooDeep(usize),
73    #[error("invalid JSON path: must match $(.key)+ pattern, got {0:?}")]
74    InvalidJsonPath(String),
75    #[error("compile_search requires exactly one TextSearch step in the AST")]
76    MissingTextSearchStep,
77    #[error("compile_vector_search requires exactly one VectorSearch step in the AST")]
78    MissingVectorSearchStep,
79    #[error("compile_retrieval_plan requires exactly one Search step in the AST")]
80    MissingSearchStep,
81    #[error("compile_retrieval_plan requires exactly one Search step in the AST, found multiple")]
82    MultipleSearchSteps,
83}
84
85/// Security fix H-1: Validate JSON path against a strict allowlist pattern to
86/// prevent SQL injection. Retained as defense-in-depth even though the path is
87/// now parameterized (see `FIX(review)` in `compile_query`). Only paths like
88/// `$.foo`, `$.foo.bar_baz` are allowed.
89fn validate_json_path(path: &str) -> Result<(), CompileError> {
90    let valid = path.starts_with('$')
91        && path.len() > 1
92        && path[1..].split('.').all(|segment| {
93            segment.is_empty()
94                || segment
95                    .chars()
96                    .all(|c| c.is_ascii_alphanumeric() || c == '_')
97                    && !segment.is_empty()
98        })
99        && path.contains('.');
100    if !valid {
101        return Err(CompileError::InvalidJsonPath(path.to_owned()));
102    }
103    Ok(())
104}
105
106/// Append a fusable predicate as an `AND` clause referencing `alias`.
107///
108/// Only the fusable variants (those that can be evaluated against columns on
109/// the `nodes` table join inside a search CTE) are supported — callers must
110/// pre-partition predicates via
111/// [`crate::fusion::partition_search_filters`]. Residual predicates panic via
112/// `unreachable!`.
113fn append_fusable_clause(
114    sql: &mut String,
115    binds: &mut Vec<BindValue>,
116    alias: &str,
117    predicate: &Predicate,
118) -> Result<(), CompileError> {
119    match predicate {
120        Predicate::KindEq(kind) => {
121            binds.push(BindValue::Text(kind.clone()));
122            let idx = binds.len();
123            let _ = write!(sql, "\n                          AND {alias}.kind = ?{idx}");
124        }
125        Predicate::LogicalIdEq(logical_id) => {
126            binds.push(BindValue::Text(logical_id.clone()));
127            let idx = binds.len();
128            let _ = write!(
129                sql,
130                "\n                          AND {alias}.logical_id = ?{idx}"
131            );
132        }
133        Predicate::SourceRefEq(source_ref) => {
134            binds.push(BindValue::Text(source_ref.clone()));
135            let idx = binds.len();
136            let _ = write!(
137                sql,
138                "\n                          AND {alias}.source_ref = ?{idx}"
139            );
140        }
141        Predicate::ContentRefEq(uri) => {
142            binds.push(BindValue::Text(uri.clone()));
143            let idx = binds.len();
144            let _ = write!(
145                sql,
146                "\n                          AND {alias}.content_ref = ?{idx}"
147            );
148        }
149        Predicate::ContentRefNotNull => {
150            let _ = write!(
151                sql,
152                "\n                          AND {alias}.content_ref IS NOT NULL"
153            );
154        }
155        Predicate::JsonPathFusedEq { path, value } => {
156            validate_json_path(path)?;
157            binds.push(BindValue::Text(path.clone()));
158            let path_index = binds.len();
159            binds.push(BindValue::Text(value.clone()));
160            let value_index = binds.len();
161            let _ = write!(
162                sql,
163                "\n                          AND json_extract({alias}.properties, ?{path_index}) = ?{value_index}"
164            );
165        }
166        Predicate::JsonPathFusedTimestampCmp { path, op, value } => {
167            validate_json_path(path)?;
168            binds.push(BindValue::Text(path.clone()));
169            let path_index = binds.len();
170            binds.push(BindValue::Integer(*value));
171            let value_index = binds.len();
172            let operator = match op {
173                ComparisonOp::Gt => ">",
174                ComparisonOp::Gte => ">=",
175                ComparisonOp::Lt => "<",
176                ComparisonOp::Lte => "<=",
177            };
178            let _ = write!(
179                sql,
180                "\n                          AND json_extract({alias}.properties, ?{path_index}) {operator} ?{value_index}"
181            );
182        }
183        Predicate::JsonPathEq { .. } | Predicate::JsonPathCompare { .. } => {
184            unreachable!("append_fusable_clause received a residual predicate");
185        }
186    }
187    Ok(())
188}
189
190const MAX_BIND_PARAMETERS: usize = 15;
191const MAX_EXPANSION_SLOTS: usize = 8;
192
193// FIX(review): max_depth was unbounded — usize::MAX produces an effectively infinite CTE.
194// Options: (A) silent clamp at compile, (B) reject with CompileError, (C) validate in builder.
195// Chose (B): consistent with existing TooManyTraversals/TooManyBindParameters pattern.
196// The compiler is the validation boundary; silent clamping would surprise callers.
197const MAX_TRAVERSAL_DEPTH: usize = 50;
198
199/// Compile a [`QueryAst`] into a [`CompiledQuery`] ready for execution.
200///
201/// # Compilation strategy
202///
203/// The compiled SQL is structured as a `WITH RECURSIVE` CTE named
204/// `base_candidates` followed by a final `SELECT ... JOIN nodes` projection.
205///
206/// For the **Nodes** driving table (no FTS/vector search), all filter
207/// predicates (`LogicalIdEq`, `JsonPathEq`, `JsonPathCompare`,
208/// `SourceRefEq`) are pushed into the `base_candidates` CTE so that the
209/// CTE's `LIMIT` applies *after* filtering. Without this pushdown the LIMIT
210/// would truncate the candidate set before property filters run, silently
211/// excluding nodes whose properties satisfy the filter but whose insertion
212/// order falls outside the limit window.
213///
214/// For **FTS** and **vector** driving tables, fusable predicates
215/// (`KindEq`, `LogicalIdEq`, `SourceRefEq`, `ContentRefEq`,
216/// `ContentRefNotNull`) are pushed into the `base_candidates` CTE so that
217/// the CTE's `LIMIT` applies *after* filtering; residual predicates
218/// (`JsonPathEq`, `JsonPathCompare`) remain in the outer `WHERE` because
219/// they require `json_extract` on the outer `nodes.properties` column.
220///
221/// # Errors
222///
223/// Returns [`CompileError::TooManyTraversals`] if more than one traversal step
224/// is present, or [`CompileError::TooManyBindParameters`] if the resulting SQL
225/// would require more than 15 bind parameters.
226///
227/// # Panics
228///
229/// Panics (via `unreachable!`) if the AST is internally inconsistent — for
230/// example, if `choose_driving_table` selects `VecNodes` but no
231/// `VectorSearch` step is present in the AST. This cannot happen through the
232/// public [`QueryBuilder`] API.
233#[allow(clippy::too_many_lines)]
234pub fn compile_query(ast: &QueryAst) -> Result<CompiledQuery, CompileError> {
235    if !ast.expansions.is_empty() {
236        return Err(CompileError::FlatCompileDoesNotSupportExpansions);
237    }
238
239    let traversals = ast
240        .steps
241        .iter()
242        .filter(|step| matches!(step, QueryStep::Traverse { .. }))
243        .count();
244    if traversals > 1 {
245        return Err(CompileError::TooManyTraversals);
246    }
247
248    let excessive_depth = ast.steps.iter().find_map(|step| {
249        if let QueryStep::Traverse { max_depth, .. } = step
250            && *max_depth > MAX_TRAVERSAL_DEPTH
251        {
252            return Some(*max_depth);
253        }
254        None
255    });
256    if let Some(depth) = excessive_depth {
257        return Err(CompileError::TraversalTooDeep(depth));
258    }
259
260    let driving_table = choose_driving_table(ast);
261    let hints = execution_hints(ast);
262    let shape_hash = ShapeHash(hash_signature(&shape_signature(ast)));
263
264    let base_limit = ast
265        .steps
266        .iter()
267        .find_map(|step| match step {
268            QueryStep::VectorSearch { limit, .. } | QueryStep::TextSearch { limit, .. } => {
269                Some(*limit)
270            }
271            _ => None,
272        })
273        .or(ast.final_limit)
274        .unwrap_or(25);
275
276    let final_limit = ast.final_limit.unwrap_or(base_limit);
277    let traversal = ast.steps.iter().find_map(|step| {
278        if let QueryStep::Traverse {
279            direction,
280            label,
281            max_depth,
282            filter: _,
283        } = step
284        {
285            Some((*direction, label.as_str(), *max_depth))
286        } else {
287            None
288        }
289    });
290
291    // Partition Filter predicates for the search-driven paths into fusable
292    // (injected into the search CTE's WHERE) and residual (left in the outer
293    // WHERE) sets. The Nodes path pushes *every* predicate into the CTE
294    // directly and ignores this partition.
295    let (fusable_filters, residual_filters) = partition_search_filters(&ast.steps);
296
297    let mut binds = Vec::new();
298    let base_candidates = match driving_table {
299        DrivingTable::VecNodes => {
300            let query = ast
301                .steps
302                .iter()
303                .find_map(|step| {
304                    if let QueryStep::VectorSearch { query, .. } = step {
305                        Some(query.as_str())
306                    } else {
307                        None
308                    }
309                })
310                .unwrap_or_else(|| unreachable!("VecNodes chosen but no VectorSearch step in AST"));
311            binds.push(BindValue::Text(query.to_owned()));
312            binds.push(BindValue::Text(ast.root_kind.clone()));
313            // sqlite-vec requires the LIMIT/k constraint to be visible directly on the
314            // vec0 KNN scan. Using a sub-select isolates the vec0 LIMIT so the join
315            // with chunks/nodes does not prevent the query planner from recognising it.
316            //
317            // ASYMMETRY (known gap, P2-3): the inner `LIMIT {base_limit}` runs
318            // BEFORE the fusable-filter `WHERE` below, so fused predicates on
319            // `src` (e.g. `kind_eq`) filter a candidate pool that has already
320            // been narrowed to `base_limit` KNN neighbours. A
321            // `vector_search("x", 5).filter_kind_eq("Goal")` can therefore
322            // return fewer than 5 Goal hits even when more exist. Fixing this
323            // requires overfetching from vec0 and re-ranking/re-limiting after
324            // the filter — explicitly out of scope for Phase 2 filter fusion.
325            // The FTS branch below does NOT share this asymmetry because its
326            // outer LIMIT wraps the post-filter SELECT.
327            let mut sql = format!(
328                "base_candidates AS (
329                    SELECT DISTINCT src.logical_id
330                    FROM (
331                        SELECT chunk_id FROM vec_nodes_active
332                        WHERE embedding MATCH ?1
333                        LIMIT {base_limit}
334                    ) vc
335                    JOIN chunks c ON c.id = vc.chunk_id
336                    JOIN nodes src ON src.logical_id = c.node_logical_id AND src.superseded_at IS NULL
337                    WHERE src.kind = ?2",
338            );
339            for predicate in &fusable_filters {
340                append_fusable_clause(&mut sql, &mut binds, "src", predicate)?;
341            }
342            sql.push_str("\n                )");
343            sql
344        }
345        DrivingTable::FtsNodes => {
346            let text_query = ast
347                .steps
348                .iter()
349                .find_map(|step| {
350                    if let QueryStep::TextSearch { query, .. } = step {
351                        Some(query)
352                    } else {
353                        None
354                    }
355                })
356                .unwrap_or_else(|| unreachable!("FtsNodes chosen but no TextSearch step in AST"));
357            // Render the typed text-query subset into safe FTS5 syntax. Only
358            // supported operators are emitted as control syntax; all literal
359            // terms and phrases remain quoted and escaped.
360            let rendered = render_text_query_fts5(text_query);
361            // Each FTS5 virtual table requires its own MATCH bind parameter;
362            // reusing indices across the UNION is not supported by SQLite.
363            binds.push(BindValue::Text(rendered.clone()));
364            binds.push(BindValue::Text(ast.root_kind.clone()));
365            binds.push(BindValue::Text(rendered));
366            binds.push(BindValue::Text(ast.root_kind.clone()));
367            // Wrap the chunk/property UNION in an outer SELECT that joins
368            // `nodes` once so fusable filters (kind/logical_id/source_ref/
369            // content_ref) can reference node columns directly, bringing them
370            // inside the CTE's LIMIT window.
371            let mut sql = String::from(
372                "base_candidates AS (
373                    SELECT DISTINCT n.logical_id
374                    FROM (
375                        SELECT src.logical_id
376                        FROM fts_nodes f
377                        JOIN chunks c ON c.id = f.chunk_id
378                        JOIN nodes src ON src.logical_id = c.node_logical_id AND src.superseded_at IS NULL
379                        WHERE fts_nodes MATCH ?1
380                          AND src.kind = ?2
381                        UNION
382                        SELECT fp.node_logical_id AS logical_id
383                        FROM fts_node_properties fp
384                        JOIN nodes src ON src.logical_id = fp.node_logical_id AND src.superseded_at IS NULL
385                        WHERE fts_node_properties MATCH ?3
386                          AND fp.kind = ?4
387                    ) u
388                    JOIN nodes n ON n.logical_id = u.logical_id AND n.superseded_at IS NULL
389                    WHERE 1 = 1",
390            );
391            for predicate in &fusable_filters {
392                append_fusable_clause(&mut sql, &mut binds, "n", predicate)?;
393            }
394            let _ = write!(
395                &mut sql,
396                "\n                    LIMIT {base_limit}\n                )"
397            );
398            sql
399        }
400        DrivingTable::Nodes => {
401            binds.push(BindValue::Text(ast.root_kind.clone()));
402            let mut sql = "base_candidates AS (
403                    SELECT DISTINCT src.logical_id
404                    FROM nodes src
405                    WHERE src.superseded_at IS NULL
406                      AND src.kind = ?1"
407                .to_owned();
408            // Push filter predicates into base_candidates so the LIMIT applies
409            // after filtering, not before. Without this, the CTE may truncate
410            // the candidate set before property/source_ref filters run, causing
411            // nodes that satisfy the filter to be excluded from results.
412            for step in &ast.steps {
413                if let QueryStep::Filter(predicate) = step {
414                    match predicate {
415                        Predicate::LogicalIdEq(logical_id) => {
416                            binds.push(BindValue::Text(logical_id.clone()));
417                            let bind_index = binds.len();
418                            let _ = write!(
419                                &mut sql,
420                                "\n                      AND src.logical_id = ?{bind_index}"
421                            );
422                        }
423                        Predicate::JsonPathEq { path, value } => {
424                            validate_json_path(path)?;
425                            binds.push(BindValue::Text(path.clone()));
426                            let path_index = binds.len();
427                            binds.push(match value {
428                                ScalarValue::Text(text) => BindValue::Text(text.clone()),
429                                ScalarValue::Integer(integer) => BindValue::Integer(*integer),
430                                ScalarValue::Bool(boolean) => BindValue::Bool(*boolean),
431                            });
432                            let value_index = binds.len();
433                            let _ = write!(
434                                &mut sql,
435                                "\n                      AND json_extract(src.properties, ?{path_index}) = ?{value_index}"
436                            );
437                        }
438                        Predicate::JsonPathCompare { path, op, value } => {
439                            validate_json_path(path)?;
440                            binds.push(BindValue::Text(path.clone()));
441                            let path_index = binds.len();
442                            binds.push(match value {
443                                ScalarValue::Text(text) => BindValue::Text(text.clone()),
444                                ScalarValue::Integer(integer) => BindValue::Integer(*integer),
445                                ScalarValue::Bool(boolean) => BindValue::Bool(*boolean),
446                            });
447                            let value_index = binds.len();
448                            let operator = match op {
449                                ComparisonOp::Gt => ">",
450                                ComparisonOp::Gte => ">=",
451                                ComparisonOp::Lt => "<",
452                                ComparisonOp::Lte => "<=",
453                            };
454                            let _ = write!(
455                                &mut sql,
456                                "\n                      AND json_extract(src.properties, ?{path_index}) {operator} ?{value_index}"
457                            );
458                        }
459                        Predicate::SourceRefEq(source_ref) => {
460                            binds.push(BindValue::Text(source_ref.clone()));
461                            let bind_index = binds.len();
462                            let _ = write!(
463                                &mut sql,
464                                "\n                      AND src.source_ref = ?{bind_index}"
465                            );
466                        }
467                        Predicate::ContentRefNotNull => {
468                            let _ = write!(
469                                &mut sql,
470                                "\n                      AND src.content_ref IS NOT NULL"
471                            );
472                        }
473                        Predicate::ContentRefEq(uri) => {
474                            binds.push(BindValue::Text(uri.clone()));
475                            let bind_index = binds.len();
476                            let _ = write!(
477                                &mut sql,
478                                "\n                      AND src.content_ref = ?{bind_index}"
479                            );
480                        }
481                        Predicate::KindEq(_) => {
482                            // Already filtered by ast.root_kind above.
483                        }
484                        Predicate::JsonPathFusedEq { path, value } => {
485                            validate_json_path(path)?;
486                            binds.push(BindValue::Text(path.clone()));
487                            let path_index = binds.len();
488                            binds.push(BindValue::Text(value.clone()));
489                            let value_index = binds.len();
490                            let _ = write!(
491                                &mut sql,
492                                "\n                      AND json_extract(src.properties, ?{path_index}) = ?{value_index}"
493                            );
494                        }
495                        Predicate::JsonPathFusedTimestampCmp { path, op, value } => {
496                            validate_json_path(path)?;
497                            binds.push(BindValue::Text(path.clone()));
498                            let path_index = binds.len();
499                            binds.push(BindValue::Integer(*value));
500                            let value_index = binds.len();
501                            let operator = match op {
502                                ComparisonOp::Gt => ">",
503                                ComparisonOp::Gte => ">=",
504                                ComparisonOp::Lt => "<",
505                                ComparisonOp::Lte => "<=",
506                            };
507                            let _ = write!(
508                                &mut sql,
509                                "\n                      AND json_extract(src.properties, ?{path_index}) {operator} ?{value_index}"
510                            );
511                        }
512                    }
513                }
514            }
515            let _ = write!(
516                &mut sql,
517                "\n                    LIMIT {base_limit}\n                )"
518            );
519            sql
520        }
521    };
522
523    let mut sql = format!("WITH RECURSIVE\n{base_candidates}");
524    let source_alias = if traversal.is_some() { "t" } else { "bc" };
525
526    if let Some((direction, label, max_depth)) = traversal {
527        binds.push(BindValue::Text(label.to_owned()));
528        let label_index = binds.len();
529        let (join_condition, next_logical_id) = match direction {
530            TraverseDirection::Out => ("e.source_logical_id = t.logical_id", "e.target_logical_id"),
531            TraverseDirection::In => ("e.target_logical_id = t.logical_id", "e.source_logical_id"),
532        };
533
534        let _ = write!(
535            &mut sql,
536            ",
537traversed(logical_id, depth, visited) AS (
538    SELECT bc.logical_id, 0, printf(',%s,', bc.logical_id)
539    FROM base_candidates bc
540    UNION ALL
541    SELECT {next_logical_id}, t.depth + 1, t.visited || {next_logical_id} || ','
542    FROM traversed t
543    JOIN edges e ON {join_condition}
544        AND e.kind = ?{label_index}
545        AND e.superseded_at IS NULL
546    WHERE t.depth < {max_depth}
547      AND instr(t.visited, printf(',%s,', {next_logical_id})) = 0
548    LIMIT {}
549)",
550            hints.hard_limit
551        );
552    }
553
554    let _ = write!(
555        &mut sql,
556        "
557SELECT DISTINCT n.row_id, n.logical_id, n.kind, n.properties, n.content_ref
558FROM {} {source_alias}
559JOIN nodes n ON n.logical_id = {source_alias}.logical_id
560    AND n.superseded_at IS NULL
561WHERE 1 = 1",
562        if traversal.is_some() {
563            "traversed"
564        } else {
565            "base_candidates"
566        }
567    );
568
569    // Outer WHERE emission. The Nodes driving table pushes every filter
570    // into `base_candidates` already, so only `KindEq` (handled separately
571    // via `root_kind`) needs to be re-emitted outside — we iterate
572    // `ast.steps` to catch it. For the search-driven paths (FtsNodes,
573    // VecNodes) we iterate the `residual_filters` partition directly
574    // instead of re-classifying predicates via `is_fusable()`. This makes
575    // `partition_search_filters` the single source of truth for the
576    // fusable/residual split: adding a new fusable variant automatically
577    // drops it from the outer WHERE without a separate audit of this loop.
578    if driving_table == DrivingTable::Nodes {
579        for step in &ast.steps {
580            if let QueryStep::Filter(Predicate::KindEq(kind)) = step {
581                binds.push(BindValue::Text(kind.clone()));
582                let bind_index = binds.len();
583                let _ = write!(&mut sql, "\n  AND n.kind = ?{bind_index}");
584            }
585        }
586    } else {
587        for predicate in &residual_filters {
588            match predicate {
589                Predicate::JsonPathEq { path, value } => {
590                    validate_json_path(path)?;
591                    binds.push(BindValue::Text(path.clone()));
592                    let path_index = binds.len();
593                    binds.push(match value {
594                        ScalarValue::Text(text) => BindValue::Text(text.clone()),
595                        ScalarValue::Integer(integer) => BindValue::Integer(*integer),
596                        ScalarValue::Bool(boolean) => BindValue::Bool(*boolean),
597                    });
598                    let value_index = binds.len();
599                    let _ = write!(
600                        &mut sql,
601                        "\n  AND json_extract(n.properties, ?{path_index}) = ?{value_index}",
602                    );
603                }
604                Predicate::JsonPathCompare { path, op, value } => {
605                    validate_json_path(path)?;
606                    binds.push(BindValue::Text(path.clone()));
607                    let path_index = binds.len();
608                    binds.push(match value {
609                        ScalarValue::Text(text) => BindValue::Text(text.clone()),
610                        ScalarValue::Integer(integer) => BindValue::Integer(*integer),
611                        ScalarValue::Bool(boolean) => BindValue::Bool(*boolean),
612                    });
613                    let value_index = binds.len();
614                    let operator = match op {
615                        ComparisonOp::Gt => ">",
616                        ComparisonOp::Gte => ">=",
617                        ComparisonOp::Lt => "<",
618                        ComparisonOp::Lte => "<=",
619                    };
620                    let _ = write!(
621                        &mut sql,
622                        "\n  AND json_extract(n.properties, ?{path_index}) {operator} ?{value_index}",
623                    );
624                }
625                Predicate::KindEq(_)
626                | Predicate::LogicalIdEq(_)
627                | Predicate::SourceRefEq(_)
628                | Predicate::ContentRefEq(_)
629                | Predicate::ContentRefNotNull
630                | Predicate::JsonPathFusedEq { .. }
631                | Predicate::JsonPathFusedTimestampCmp { .. } => {
632                    // Fusable — already injected into base_candidates by
633                    // `partition_search_filters`.
634                }
635            }
636        }
637    }
638
639    let _ = write!(&mut sql, "\nLIMIT {final_limit}");
640
641    if binds.len() > MAX_BIND_PARAMETERS {
642        return Err(CompileError::TooManyBindParameters(binds.len()));
643    }
644
645    Ok(CompiledQuery {
646        sql,
647        binds,
648        shape_hash,
649        driving_table,
650        hints,
651    })
652}
653
654/// Compile a [`QueryAst`] into a [`CompiledGroupedQuery`] for grouped execution.
655///
656/// # Errors
657///
658/// Returns a [`CompileError`] if the AST exceeds expansion-slot limits,
659/// contains empty slot names, or specifies a traversal depth beyond the
660/// configured maximum.
661pub fn compile_grouped_query(ast: &QueryAst) -> Result<CompiledGroupedQuery, CompileError> {
662    if ast.expansions.len() > MAX_EXPANSION_SLOTS {
663        return Err(CompileError::TooManyExpansionSlots(ast.expansions.len()));
664    }
665
666    let mut seen = std::collections::BTreeSet::new();
667    for expansion in &ast.expansions {
668        if expansion.slot.trim().is_empty() {
669            return Err(CompileError::EmptyExpansionSlotName);
670        }
671        if expansion.max_depth > MAX_TRAVERSAL_DEPTH {
672            return Err(CompileError::TraversalTooDeep(expansion.max_depth));
673        }
674        if !seen.insert(expansion.slot.clone()) {
675            return Err(CompileError::DuplicateExpansionSlot(expansion.slot.clone()));
676        }
677    }
678
679    let mut root_ast = ast.clone();
680    root_ast.expansions.clear();
681    let root = compile_query(&root_ast)?;
682    let hints = execution_hints(ast);
683    let shape_hash = ShapeHash(hash_signature(&shape_signature(ast)));
684
685    Ok(CompiledGroupedQuery {
686        root,
687        expansions: ast.expansions.clone(),
688        shape_hash,
689        hints,
690    })
691}
692
693/// Compile a [`QueryAst`] into a [`CompiledSearch`] describing an adaptive
694/// text-search execution.
695///
696/// Unlike [`compile_query`], this path does not emit SQL directly: the
697/// coordinator owns the search SELECT so it can project the richer row shape
698/// (score, source, snippet, projection id) that flat queries do not need.
699///
700/// # Errors
701///
702/// Returns [`CompileError::MissingTextSearchStep`] if the AST contains no
703/// [`QueryStep::TextSearch`] step.
704pub fn compile_search(ast: &QueryAst) -> Result<CompiledSearch, CompileError> {
705    let mut text_query = None;
706    let mut limit = None;
707    for step in &ast.steps {
708        match step {
709            QueryStep::TextSearch {
710                query,
711                limit: step_limit,
712            } => {
713                text_query = Some(query.clone());
714                limit = Some(*step_limit);
715            }
716            QueryStep::Filter(_)
717            | QueryStep::Search { .. }
718            | QueryStep::VectorSearch { .. }
719            | QueryStep::Traverse { .. } => {
720                // Filter steps are partitioned below; Search/Vector/Traverse
721                // steps are not composable with text search in the adaptive
722                // surface yet.
723            }
724        }
725    }
726    let text_query = text_query.ok_or(CompileError::MissingTextSearchStep)?;
727    let limit = limit.unwrap_or(25);
728    let (fusable_filters, residual_filters) = partition_search_filters(&ast.steps);
729    Ok(CompiledSearch {
730        root_kind: ast.root_kind.clone(),
731        text_query,
732        limit,
733        fusable_filters,
734        residual_filters,
735        attribution_requested: false,
736    })
737}
738
739/// Compile a [`QueryAst`] into a [`CompiledSearchPlan`] whose strict branch
740/// is the user's [`TextQuery`] and whose relaxed branch is derived via
741/// [`derive_relaxed`].
742///
743/// Reserved for Phase 7 SDK bindings that will construct plans from typed
744/// AST fragments. The coordinator currently builds its adaptive plan
745/// directly inside `execute_compiled_search` from an already-compiled
746/// [`CompiledSearch`], so this helper has no in-tree caller; it is kept
747/// as a public entry point for forthcoming surface bindings.
748///
749/// # Errors
750/// Returns [`CompileError::MissingTextSearchStep`] if the AST contains no
751/// [`QueryStep::TextSearch`] step.
752#[doc(hidden)]
753pub fn compile_search_plan(ast: &QueryAst) -> Result<CompiledSearchPlan, CompileError> {
754    let strict = compile_search(ast)?;
755    let (relaxed_query, was_degraded_at_plan_time) = derive_relaxed(&strict.text_query);
756    let relaxed = relaxed_query.map(|q| CompiledSearch {
757        root_kind: strict.root_kind.clone(),
758        text_query: q,
759        limit: strict.limit,
760        fusable_filters: strict.fusable_filters.clone(),
761        residual_filters: strict.residual_filters.clone(),
762        attribution_requested: strict.attribution_requested,
763    });
764    Ok(CompiledSearchPlan {
765        strict,
766        relaxed,
767        was_degraded_at_plan_time,
768    })
769}
770
771/// Compile a caller-provided strict/relaxed [`TextQuery`] pair into a
772/// [`CompiledSearchPlan`] against a [`QueryAst`] that supplies the kind
773/// root, filters, and limit.
774///
775/// This is the two-query entry point used by `Engine::fallback_search`. The
776/// caller's relaxed [`TextQuery`] is used verbatim — it is NOT passed through
777/// [`derive_relaxed`], and the 4-alternative
778/// [`crate::RELAXED_BRANCH_CAP`] is NOT applied. As a result
779/// [`CompiledSearchPlan::was_degraded_at_plan_time`] is always `false` on
780/// this path.
781///
782/// The AST supplies:
783///  - `root_kind` — reused for both branches
784///  - filter steps — partitioned once via [`partition_search_filters`] and
785///    shared unchanged across both branches
786///  - `limit` from the text-search step (or the default used by
787///    [`compile_search`]) when present; if the AST has no `TextSearch` step,
788///    the caller-supplied `limit` is used
789///
790/// Any `TextSearch` step already on the AST is IGNORED — `strict` and
791/// `relaxed` come from the caller. `Vector`/`Traverse` steps are also
792/// ignored for symmetry with [`compile_search`].
793///
794/// # Errors
795/// Returns [`CompileError`] if filter partitioning produces an unsupported
796/// shape (currently none; reserved for forward compatibility).
797pub fn compile_search_plan_from_queries(
798    ast: &QueryAst,
799    strict: TextQuery,
800    relaxed: Option<TextQuery>,
801    limit: usize,
802    attribution_requested: bool,
803) -> Result<CompiledSearchPlan, CompileError> {
804    let (fusable_filters, residual_filters) = partition_search_filters(&ast.steps);
805    let strict_compiled = CompiledSearch {
806        root_kind: ast.root_kind.clone(),
807        text_query: strict,
808        limit,
809        fusable_filters: fusable_filters.clone(),
810        residual_filters: residual_filters.clone(),
811        attribution_requested,
812    };
813    let relaxed_compiled = relaxed.map(|q| CompiledSearch {
814        root_kind: ast.root_kind.clone(),
815        text_query: q,
816        limit,
817        fusable_filters,
818        residual_filters,
819        attribution_requested,
820    });
821    Ok(CompiledSearchPlan {
822        strict: strict_compiled,
823        relaxed: relaxed_compiled,
824        was_degraded_at_plan_time: false,
825    })
826}
827
828/// Compile a [`QueryAst`] into a [`CompiledVectorSearch`] describing a
829/// vector-only retrieval execution.
830///
831/// Mirrors [`compile_search`] structurally. The AST must contain exactly one
832/// [`QueryStep::VectorSearch`] step; filters following the search step are
833/// partitioned by [`partition_search_filters`] into fusable and residual
834/// sets. Unlike [`compile_search`] this path does not produce a
835/// [`TextQuery`]; the caller's raw query string is preserved verbatim for
836/// the coordinator to bind to `embedding MATCH ?`.
837///
838/// # Errors
839///
840/// Returns [`CompileError::MissingVectorSearchStep`] if the AST contains no
841/// [`QueryStep::VectorSearch`] step.
842pub fn compile_vector_search(ast: &QueryAst) -> Result<CompiledVectorSearch, CompileError> {
843    let mut query_text = None;
844    let mut limit = None;
845    for step in &ast.steps {
846        match step {
847            QueryStep::VectorSearch {
848                query,
849                limit: step_limit,
850            } => {
851                query_text = Some(query.clone());
852                limit = Some(*step_limit);
853            }
854            QueryStep::Filter(_)
855            | QueryStep::Search { .. }
856            | QueryStep::TextSearch { .. }
857            | QueryStep::Traverse { .. } => {
858                // Filter steps are partitioned below; Search/TextSearch/
859                // Traverse steps are not composable with vector search in
860                // the standalone vector retrieval path.
861            }
862        }
863    }
864    let query_text = query_text.ok_or(CompileError::MissingVectorSearchStep)?;
865    let limit = limit.unwrap_or(25);
866    let (fusable_filters, residual_filters) = partition_search_filters(&ast.steps);
867    Ok(CompiledVectorSearch {
868        root_kind: ast.root_kind.clone(),
869        query_text,
870        limit,
871        fusable_filters,
872        residual_filters,
873        attribution_requested: false,
874    })
875}
876
877/// Compile a [`QueryAst`] containing a [`QueryStep::Search`] into a
878/// [`CompiledRetrievalPlan`] describing the bounded set of retrieval branches
879/// the Phase 12 planner may run.
880///
881/// The raw query string carried by the `Search` step is parsed into a
882/// strict [`TextQuery`] (via [`TextQuery::parse`]) and a relaxed sibling is
883/// derived via [`derive_relaxed`]. Both branches share the post-search
884/// fusable/residual filter partition. The resulting
885/// [`CompiledRetrievalPlan::text`] field carries them in the same Phase 6
886/// [`CompiledSearchPlan`] shape as `text_search()` / `fallback_search()`.
887///
888/// **v1 scope**: `vector` is unconditionally `None`. Read-time embedding of
889/// natural-language queries is not wired in v1; see
890/// [`CompiledRetrievalPlan`] for the rationale and the future-phase plan.
891/// Callers who need vector retrieval today must use the `vector_search()`
892/// override directly with a caller-provided vector literal.
893///
894/// # Errors
895///
896/// Returns [`CompileError::MissingSearchStep`] if the AST contains no
897/// [`QueryStep::Search`] step, or
898/// [`CompileError::MultipleSearchSteps`] if the AST contains more than one.
899pub fn compile_retrieval_plan(ast: &QueryAst) -> Result<CompiledRetrievalPlan, CompileError> {
900    let mut raw_query: Option<&str> = None;
901    let mut limit: Option<usize> = None;
902    for step in &ast.steps {
903        if let QueryStep::Search {
904            query,
905            limit: step_limit,
906        } = step
907        {
908            if raw_query.is_some() {
909                return Err(CompileError::MultipleSearchSteps);
910            }
911            raw_query = Some(query.as_str());
912            limit = Some(*step_limit);
913        }
914    }
915    let raw_query = raw_query.ok_or(CompileError::MissingSearchStep)?;
916    let limit = limit.unwrap_or(25);
917
918    let strict_text_query = TextQuery::parse(raw_query);
919    let (relaxed_text_query, was_degraded_at_plan_time) = derive_relaxed(&strict_text_query);
920
921    let (fusable_filters, residual_filters) = partition_search_filters(&ast.steps);
922
923    let strict = CompiledSearch {
924        root_kind: ast.root_kind.clone(),
925        text_query: strict_text_query,
926        limit,
927        fusable_filters: fusable_filters.clone(),
928        residual_filters: residual_filters.clone(),
929        attribution_requested: false,
930    };
931    let relaxed = relaxed_text_query.map(|q| CompiledSearch {
932        root_kind: ast.root_kind.clone(),
933        text_query: q,
934        limit,
935        fusable_filters,
936        residual_filters,
937        attribution_requested: false,
938    });
939    let text = CompiledSearchPlan {
940        strict,
941        relaxed,
942        was_degraded_at_plan_time,
943    };
944
945    // v1 scope (Phase 12): the planner's vector branch slot is structurally
946    // present on `CompiledRetrievalPlan` so the coordinator's three-block
947    // fusion path is fully wired, but read-time embedding of natural-language
948    // queries is deliberately deferred to a future phase. `compile_retrieval_plan`
949    // therefore always leaves `vector = None`; callers who want vector
950    // retrieval today must use `vector_search()` directly with a caller-
951    // provided vector literal.
952    Ok(CompiledRetrievalPlan {
953        text,
954        vector: None,
955        was_degraded_at_plan_time,
956    })
957}
958
959/// FNV-1a 64-bit hash — deterministic across Rust versions and program
960/// invocations, unlike `DefaultHasher`.
961fn hash_signature(signature: &str) -> u64 {
962    const OFFSET: u64 = 0xcbf2_9ce4_8422_2325;
963    const PRIME: u64 = 0x0000_0100_0000_01b3;
964    let mut hash = OFFSET;
965    for byte in signature.bytes() {
966        hash ^= u64::from(byte);
967        hash = hash.wrapping_mul(PRIME);
968    }
969    hash
970}
971
972#[cfg(test)]
973#[allow(clippy::expect_used, clippy::items_after_statements)]
974mod tests {
975    use rstest::rstest;
976
977    use crate::{
978        CompileError, DrivingTable, QueryBuilder, TraverseDirection, compile_grouped_query,
979        compile_query,
980    };
981
982    #[test]
983    fn vector_query_compiles_to_chunk_resolution() {
984        let compiled = compile_query(
985            &QueryBuilder::nodes("Meeting")
986                .vector_search("budget", 5)
987                .limit(5)
988                .into_ast(),
989        )
990        .expect("compiled query");
991
992        assert_eq!(compiled.driving_table, DrivingTable::VecNodes);
993        assert!(compiled.sql.contains("JOIN chunks c ON c.id = vc.chunk_id"));
994        assert!(
995            compiled
996                .sql
997                .contains("JOIN nodes src ON src.logical_id = c.node_logical_id")
998        );
999    }
1000
1001    #[rstest]
1002    #[case(5, 7)]
1003    #[case(3, 11)]
1004    fn structural_limits_change_shape_hash(#[case] left: usize, #[case] right: usize) {
1005        let left_compiled = compile_query(
1006            &QueryBuilder::nodes("Meeting")
1007                .text_search("budget", left)
1008                .limit(left)
1009                .into_ast(),
1010        )
1011        .expect("left query");
1012        let right_compiled = compile_query(
1013            &QueryBuilder::nodes("Meeting")
1014                .text_search("budget", right)
1015                .limit(right)
1016                .into_ast(),
1017        )
1018        .expect("right query");
1019
1020        assert_ne!(left_compiled.shape_hash, right_compiled.shape_hash);
1021    }
1022
1023    #[test]
1024    fn traversal_query_is_depth_bounded() {
1025        let compiled = compile_query(
1026            &QueryBuilder::nodes("Meeting")
1027                .text_search("budget", 5)
1028                .traverse(TraverseDirection::Out, "HAS_TASK", 3)
1029                .limit(10)
1030                .into_ast(),
1031        )
1032        .expect("compiled traversal");
1033
1034        assert!(compiled.sql.contains("WITH RECURSIVE"));
1035        assert!(compiled.sql.contains("WHERE t.depth < 3"));
1036    }
1037
1038    #[test]
1039    fn text_search_compiles_to_union_over_chunk_and_property_fts() {
1040        let compiled = compile_query(
1041            &QueryBuilder::nodes("Meeting")
1042                .text_search("budget", 25)
1043                .limit(25)
1044                .into_ast(),
1045        )
1046        .expect("compiled text search");
1047
1048        assert_eq!(compiled.driving_table, DrivingTable::FtsNodes);
1049        // Must contain UNION of both FTS tables.
1050        assert!(
1051            compiled.sql.contains("fts_nodes MATCH"),
1052            "must search chunk-backed FTS"
1053        );
1054        assert!(
1055            compiled.sql.contains("fts_node_properties MATCH"),
1056            "must search property-backed FTS"
1057        );
1058        assert!(compiled.sql.contains("UNION"), "must UNION both sources");
1059        // Must have 4 bind parameters: sanitized query + kind for each table.
1060        assert_eq!(compiled.binds.len(), 4);
1061    }
1062
1063    #[test]
1064    fn logical_id_filter_is_compiled() {
1065        let compiled = compile_query(
1066            &QueryBuilder::nodes("Meeting")
1067                .filter_logical_id_eq("meeting-123")
1068                .filter_json_text_eq("$.status", "active")
1069                .limit(1)
1070                .into_ast(),
1071        )
1072        .expect("compiled query");
1073
1074        // LogicalIdEq is applied in base_candidates (src alias) for the Nodes driver,
1075        // NOT duplicated in the final WHERE. The JOIN condition still contains
1076        // "n.logical_id =" which satisfies this check.
1077        assert!(compiled.sql.contains("n.logical_id ="));
1078        assert!(compiled.sql.contains("src.logical_id ="));
1079        assert!(compiled.sql.contains("json_extract"));
1080        // Only one bind for the logical_id (not two).
1081        use crate::BindValue;
1082        assert_eq!(
1083            compiled
1084                .binds
1085                .iter()
1086                .filter(|b| matches!(b, BindValue::Text(s) if s == "meeting-123"))
1087                .count(),
1088            1
1089        );
1090    }
1091
1092    #[test]
1093    fn compile_rejects_invalid_json_path() {
1094        use crate::{Predicate, QueryStep, ScalarValue};
1095        let mut ast = QueryBuilder::nodes("Meeting").into_ast();
1096        // Attempt SQL injection via JSON path.
1097        ast.steps.push(QueryStep::Filter(Predicate::JsonPathEq {
1098            path: "$') OR 1=1 --".to_owned(),
1099            value: ScalarValue::Text("x".to_owned()),
1100        }));
1101        use crate::CompileError;
1102        let result = compile_query(&ast);
1103        assert!(
1104            matches!(result, Err(CompileError::InvalidJsonPath(_))),
1105            "expected InvalidJsonPath, got {result:?}"
1106        );
1107    }
1108
1109    #[test]
1110    fn compile_accepts_valid_json_paths() {
1111        use crate::{Predicate, QueryStep, ScalarValue};
1112        for valid_path in ["$.status", "$.foo.bar", "$.a_b.c2"] {
1113            let mut ast = QueryBuilder::nodes("Meeting").into_ast();
1114            ast.steps.push(QueryStep::Filter(Predicate::JsonPathEq {
1115                path: valid_path.to_owned(),
1116                value: ScalarValue::Text("v".to_owned()),
1117            }));
1118            assert!(
1119                compile_query(&ast).is_ok(),
1120                "expected valid path {valid_path:?} to compile"
1121            );
1122        }
1123    }
1124
1125    #[test]
1126    fn compile_rejects_too_many_bind_parameters() {
1127        use crate::{Predicate, QueryStep, ScalarValue};
1128        let mut ast = QueryBuilder::nodes("Meeting").into_ast();
1129        // kind occupies 1 bind; each json filter now occupies 2 binds (path + value).
1130        // 7 json filters → 1 + 14 = 15 (ok), 8 → 1 + 16 = 17 (exceeds limit of 15).
1131        for i in 0..8 {
1132            ast.steps.push(QueryStep::Filter(Predicate::JsonPathEq {
1133                path: format!("$.f{i}"),
1134                value: ScalarValue::Text("v".to_owned()),
1135            }));
1136        }
1137        use crate::CompileError;
1138        let result = compile_query(&ast);
1139        assert!(
1140            matches!(result, Err(CompileError::TooManyBindParameters(17))),
1141            "expected TooManyBindParameters(17), got {result:?}"
1142        );
1143    }
1144
1145    #[test]
1146    fn compile_rejects_excessive_traversal_depth() {
1147        let result = compile_query(
1148            &QueryBuilder::nodes("Meeting")
1149                .text_search("budget", 5)
1150                .traverse(TraverseDirection::Out, "HAS_TASK", 51)
1151                .limit(10)
1152                .into_ast(),
1153        );
1154        assert!(
1155            matches!(result, Err(CompileError::TraversalTooDeep(51))),
1156            "expected TraversalTooDeep(51), got {result:?}"
1157        );
1158    }
1159
1160    #[test]
1161    fn grouped_queries_with_same_structure_share_shape_hash() {
1162        let left = compile_grouped_query(
1163            &QueryBuilder::nodes("Meeting")
1164                .text_search("budget", 5)
1165                .expand("tasks", TraverseDirection::Out, "HAS_TASK", 1, None)
1166                .limit(10)
1167                .into_ast(),
1168        )
1169        .expect("left grouped query");
1170        let right = compile_grouped_query(
1171            &QueryBuilder::nodes("Meeting")
1172                .text_search("planning", 5)
1173                .expand("tasks", TraverseDirection::Out, "HAS_TASK", 1, None)
1174                .limit(10)
1175                .into_ast(),
1176        )
1177        .expect("right grouped query");
1178
1179        assert_eq!(left.shape_hash, right.shape_hash);
1180    }
1181
1182    #[test]
1183    fn compile_grouped_rejects_duplicate_expansion_slot_names() {
1184        let result = compile_grouped_query(
1185            &QueryBuilder::nodes("Meeting")
1186                .expand("tasks", TraverseDirection::Out, "HAS_TASK", 1, None)
1187                .expand("tasks", TraverseDirection::Out, "HAS_DECISION", 1, None)
1188                .into_ast(),
1189        );
1190
1191        assert!(
1192            matches!(result, Err(CompileError::DuplicateExpansionSlot(ref slot)) if slot == "tasks"),
1193            "expected DuplicateExpansionSlot(\"tasks\"), got {result:?}"
1194        );
1195    }
1196
1197    #[test]
1198    fn flat_compile_rejects_queries_with_expansions() {
1199        let result = compile_query(
1200            &QueryBuilder::nodes("Meeting")
1201                .expand("tasks", TraverseDirection::Out, "HAS_TASK", 1, None)
1202                .into_ast(),
1203        );
1204
1205        assert!(
1206            matches!(
1207                result,
1208                Err(CompileError::FlatCompileDoesNotSupportExpansions)
1209            ),
1210            "expected FlatCompileDoesNotSupportExpansions, got {result:?}"
1211        );
1212    }
1213
1214    #[test]
1215    fn json_path_compiled_as_bind_parameter() {
1216        let compiled = compile_query(
1217            &QueryBuilder::nodes("Meeting")
1218                .filter_json_text_eq("$.status", "active")
1219                .limit(1)
1220                .into_ast(),
1221        )
1222        .expect("compiled query");
1223
1224        // Path must be parameterized, not interpolated into the SQL string.
1225        assert!(
1226            !compiled.sql.contains("'$.status'"),
1227            "JSON path must not appear as a SQL string literal"
1228        );
1229        assert!(
1230            compiled.sql.contains("json_extract(src.properties, ?"),
1231            "JSON path must be a bind parameter (pushed into base_candidates for Nodes driver)"
1232        );
1233        // Path and value should both be in the bind list.
1234        use crate::BindValue;
1235        assert!(
1236            compiled
1237                .binds
1238                .iter()
1239                .any(|b| matches!(b, BindValue::Text(s) if s == "$.status"))
1240        );
1241        assert!(
1242            compiled
1243                .binds
1244                .iter()
1245                .any(|b| matches!(b, BindValue::Text(s) if s == "active"))
1246        );
1247    }
1248
1249    // --- Filter pushdown regression tests ---
1250    //
1251    // These tests verify that filter predicates are pushed into the
1252    // base_candidates CTE for the Nodes driving table, so the CTE LIMIT
1253    // applies after filtering rather than before.  Without pushdown, the
1254    // LIMIT may truncate the candidate set before the filter runs, causing
1255    // matching nodes to be silently excluded.
1256
1257    #[test]
1258    fn nodes_driver_pushes_json_eq_filter_into_base_candidates() {
1259        let compiled = compile_query(
1260            &QueryBuilder::nodes("Meeting")
1261                .filter_json_text_eq("$.status", "active")
1262                .limit(5)
1263                .into_ast(),
1264        )
1265        .expect("compiled query");
1266
1267        assert_eq!(compiled.driving_table, DrivingTable::Nodes);
1268        // Filter must appear inside base_candidates (src alias), not the
1269        // outer WHERE (n alias).
1270        assert!(
1271            compiled.sql.contains("json_extract(src.properties, ?"),
1272            "json_extract must reference src (base_candidates), got:\n{}",
1273            compiled.sql,
1274        );
1275        assert!(
1276            !compiled.sql.contains("json_extract(n.properties, ?"),
1277            "json_extract must NOT appear in outer WHERE for Nodes driver, got:\n{}",
1278            compiled.sql,
1279        );
1280    }
1281
1282    #[test]
1283    fn nodes_driver_pushes_json_compare_filter_into_base_candidates() {
1284        let compiled = compile_query(
1285            &QueryBuilder::nodes("Meeting")
1286                .filter_json_integer_gte("$.priority", 5)
1287                .limit(10)
1288                .into_ast(),
1289        )
1290        .expect("compiled query");
1291
1292        assert_eq!(compiled.driving_table, DrivingTable::Nodes);
1293        assert!(
1294            compiled.sql.contains("json_extract(src.properties, ?"),
1295            "comparison filter must be in base_candidates, got:\n{}",
1296            compiled.sql,
1297        );
1298        assert!(
1299            !compiled.sql.contains("json_extract(n.properties, ?"),
1300            "comparison filter must NOT be in outer WHERE for Nodes driver",
1301        );
1302        assert!(
1303            compiled.sql.contains(">= ?"),
1304            "expected >= operator in SQL, got:\n{}",
1305            compiled.sql,
1306        );
1307    }
1308
1309    #[test]
1310    fn nodes_driver_pushes_source_ref_filter_into_base_candidates() {
1311        let compiled = compile_query(
1312            &QueryBuilder::nodes("Meeting")
1313                .filter_source_ref_eq("ref-123")
1314                .limit(5)
1315                .into_ast(),
1316        )
1317        .expect("compiled query");
1318
1319        assert_eq!(compiled.driving_table, DrivingTable::Nodes);
1320        assert!(
1321            compiled.sql.contains("src.source_ref = ?"),
1322            "source_ref filter must be in base_candidates, got:\n{}",
1323            compiled.sql,
1324        );
1325        assert!(
1326            !compiled.sql.contains("n.source_ref = ?"),
1327            "source_ref filter must NOT be in outer WHERE for Nodes driver",
1328        );
1329    }
1330
1331    #[test]
1332    fn nodes_driver_pushes_multiple_filters_into_base_candidates() {
1333        let compiled = compile_query(
1334            &QueryBuilder::nodes("Meeting")
1335                .filter_logical_id_eq("meeting-1")
1336                .filter_json_text_eq("$.status", "active")
1337                .filter_json_integer_gte("$.priority", 5)
1338                .filter_source_ref_eq("ref-abc")
1339                .limit(1)
1340                .into_ast(),
1341        )
1342        .expect("compiled query");
1343
1344        assert_eq!(compiled.driving_table, DrivingTable::Nodes);
1345        // All filters should be in base_candidates, none in outer WHERE
1346        assert!(
1347            compiled.sql.contains("src.logical_id = ?"),
1348            "logical_id filter must be in base_candidates",
1349        );
1350        assert!(
1351            compiled.sql.contains("json_extract(src.properties, ?"),
1352            "JSON filters must be in base_candidates",
1353        );
1354        assert!(
1355            compiled.sql.contains("src.source_ref = ?"),
1356            "source_ref filter must be in base_candidates",
1357        );
1358        // Each bind value should appear exactly once (not duplicated in outer WHERE)
1359        use crate::BindValue;
1360        assert_eq!(
1361            compiled
1362                .binds
1363                .iter()
1364                .filter(|b| matches!(b, BindValue::Text(s) if s == "meeting-1"))
1365                .count(),
1366            1,
1367            "logical_id bind must not be duplicated"
1368        );
1369        assert_eq!(
1370            compiled
1371                .binds
1372                .iter()
1373                .filter(|b| matches!(b, BindValue::Text(s) if s == "ref-abc"))
1374                .count(),
1375            1,
1376            "source_ref bind must not be duplicated"
1377        );
1378    }
1379
1380    #[test]
1381    fn fts_driver_keeps_json_filter_residual_but_fuses_kind() {
1382        // Phase 2: JSON filters are residual (stay in outer WHERE); KindEq is
1383        // fusable (pushed into base_candidates so the CTE LIMIT applies after
1384        // filtering).
1385        let compiled = compile_query(
1386            &QueryBuilder::nodes("Meeting")
1387                .text_search("budget", 5)
1388                .filter_json_text_eq("$.status", "active")
1389                .filter_kind_eq("Meeting")
1390                .limit(5)
1391                .into_ast(),
1392        )
1393        .expect("compiled query");
1394
1395        assert_eq!(compiled.driving_table, DrivingTable::FtsNodes);
1396        // Residual: JSON predicate stays in outer WHERE on n.properties.
1397        assert!(
1398            compiled.sql.contains("json_extract(n.properties, ?"),
1399            "JSON filter must stay residual in outer WHERE, got:\n{}",
1400            compiled.sql,
1401        );
1402        // Fusable: the second n.kind bind should live inside base_candidates.
1403        // The CTE block ends before the final SELECT.
1404        let (cte, outer) = compiled
1405            .sql
1406            .split_once("SELECT DISTINCT n.row_id")
1407            .expect("query has final SELECT");
1408        assert!(
1409            cte.contains("AND n.kind = ?"),
1410            "KindEq must be fused inside base_candidates CTE, got CTE:\n{cte}"
1411        );
1412        // Outer WHERE must not contain a duplicate n.kind filter.
1413        assert!(
1414            !outer.contains("AND n.kind = ?"),
1415            "KindEq must NOT appear in outer WHERE for FTS driver, got outer:\n{outer}"
1416        );
1417    }
1418
1419    #[test]
1420    fn fts_driver_fuses_kind_filter() {
1421        let compiled = compile_query(
1422            &QueryBuilder::nodes("Goal")
1423                .text_search("budget", 5)
1424                .filter_kind_eq("Goal")
1425                .limit(5)
1426                .into_ast(),
1427        )
1428        .expect("compiled query");
1429
1430        assert_eq!(compiled.driving_table, DrivingTable::FtsNodes);
1431        let (cte, outer) = compiled
1432            .sql
1433            .split_once("SELECT DISTINCT n.row_id")
1434            .expect("query has final SELECT");
1435        assert!(
1436            cte.contains("AND n.kind = ?"),
1437            "KindEq must be fused inside base_candidates, got:\n{cte}"
1438        );
1439        assert!(
1440            !outer.contains("AND n.kind = ?"),
1441            "KindEq must NOT be in outer WHERE, got:\n{outer}"
1442        );
1443    }
1444
1445    #[test]
1446    fn vec_driver_fuses_kind_filter() {
1447        let compiled = compile_query(
1448            &QueryBuilder::nodes("Goal")
1449                .vector_search("budget", 5)
1450                .filter_kind_eq("Goal")
1451                .limit(5)
1452                .into_ast(),
1453        )
1454        .expect("compiled query");
1455
1456        assert_eq!(compiled.driving_table, DrivingTable::VecNodes);
1457        let (cte, outer) = compiled
1458            .sql
1459            .split_once("SELECT DISTINCT n.row_id")
1460            .expect("query has final SELECT");
1461        assert!(
1462            cte.contains("AND src.kind = ?"),
1463            "KindEq must be fused inside base_candidates, got:\n{cte}"
1464        );
1465        assert!(
1466            !outer.contains("AND n.kind = ?"),
1467            "KindEq must NOT be in outer WHERE, got:\n{outer}"
1468        );
1469    }
1470
1471    #[test]
1472    fn fts5_query_bind_uses_rendered_literals() {
1473        let compiled = compile_query(
1474            &QueryBuilder::nodes("Meeting")
1475                .text_search("User's name", 5)
1476                .limit(5)
1477                .into_ast(),
1478        )
1479        .expect("compiled query");
1480
1481        use crate::BindValue;
1482        assert!(
1483            compiled
1484                .binds
1485                .iter()
1486                .any(|b| matches!(b, BindValue::Text(s) if s == "\"User's\" \"name\"")),
1487            "FTS5 query bind should use rendered literal terms; got {:?}",
1488            compiled.binds
1489        );
1490    }
1491
1492    #[test]
1493    fn fts5_query_bind_supports_or_operator() {
1494        let compiled = compile_query(
1495            &QueryBuilder::nodes("Meeting")
1496                .text_search("ship OR docs", 5)
1497                .limit(5)
1498                .into_ast(),
1499        )
1500        .expect("compiled query");
1501
1502        use crate::BindValue;
1503        assert!(
1504            compiled
1505                .binds
1506                .iter()
1507                .any(|b| matches!(b, BindValue::Text(s) if s == "\"ship\" OR \"docs\"")),
1508            "FTS5 query bind should preserve supported OR; got {:?}",
1509            compiled.binds
1510        );
1511    }
1512
1513    #[test]
1514    fn fts5_query_bind_supports_not_operator() {
1515        let compiled = compile_query(
1516            &QueryBuilder::nodes("Meeting")
1517                .text_search("ship NOT blocked", 5)
1518                .limit(5)
1519                .into_ast(),
1520        )
1521        .expect("compiled query");
1522
1523        use crate::BindValue;
1524        assert!(
1525            compiled
1526                .binds
1527                .iter()
1528                .any(|b| matches!(b, BindValue::Text(s) if s == "\"ship\" NOT \"blocked\"")),
1529            "FTS5 query bind should preserve supported NOT; got {:?}",
1530            compiled.binds
1531        );
1532    }
1533
1534    #[test]
1535    fn fts5_query_bind_literalizes_clause_leading_not() {
1536        let compiled = compile_query(
1537            &QueryBuilder::nodes("Meeting")
1538                .text_search("NOT blocked", 5)
1539                .limit(5)
1540                .into_ast(),
1541        )
1542        .expect("compiled query");
1543
1544        use crate::BindValue;
1545        assert!(
1546            compiled
1547                .binds
1548                .iter()
1549                .any(|b| matches!(b, BindValue::Text(s) if s == "\"NOT\" \"blocked\"")),
1550            "Clause-leading NOT should degrade to literals; got {:?}",
1551            compiled.binds
1552        );
1553    }
1554
1555    #[test]
1556    fn fts5_query_bind_literalizes_or_not_sequence() {
1557        let compiled = compile_query(
1558            &QueryBuilder::nodes("Meeting")
1559                .text_search("ship OR NOT blocked", 5)
1560                .limit(5)
1561                .into_ast(),
1562        )
1563        .expect("compiled query");
1564
1565        use crate::BindValue;
1566        assert!(
1567            compiled.binds.iter().any(
1568                |b| matches!(b, BindValue::Text(s) if s == "\"ship\" \"OR\" \"NOT\" \"blocked\"")
1569            ),
1570            "`OR NOT` should degrade to literals rather than emit invalid FTS5; got {:?}",
1571            compiled.binds
1572        );
1573    }
1574
1575    #[test]
1576    fn compile_retrieval_plan_accepts_search_step() {
1577        use crate::{
1578            CompileError, Predicate, QueryAst, QueryStep, TextQuery, compile_retrieval_plan,
1579        };
1580        let ast = QueryAst {
1581            root_kind: "Goal".to_owned(),
1582            steps: vec![
1583                QueryStep::Search {
1584                    query: "ship quarterly docs".to_owned(),
1585                    limit: 7,
1586                },
1587                QueryStep::Filter(Predicate::KindEq("Goal".to_owned())),
1588            ],
1589            expansions: vec![],
1590            final_limit: None,
1591        };
1592        let plan = compile_retrieval_plan(&ast).expect("compiles");
1593        assert_eq!(plan.text.strict.root_kind, "Goal");
1594        assert_eq!(plan.text.strict.limit, 7);
1595        // Filter following the Search step must land in the fusable bucket.
1596        assert_eq!(plan.text.strict.fusable_filters.len(), 1);
1597        assert!(plan.text.strict.residual_filters.is_empty());
1598        // Strict text query is the parsed form of the raw string; "ship
1599        // quarterly docs" parses to an implicit AND of three terms.
1600        assert_eq!(
1601            plan.text.strict.text_query,
1602            TextQuery::And(vec![
1603                TextQuery::Term("ship".into()),
1604                TextQuery::Term("quarterly".into()),
1605                TextQuery::Term("docs".into()),
1606            ])
1607        );
1608        // Three-term implicit-AND has a useful relaxation: per-term OR.
1609        let relaxed = plan.text.relaxed.as_ref().expect("relaxed branch present");
1610        assert_eq!(
1611            relaxed.text_query,
1612            TextQuery::Or(vec![
1613                TextQuery::Term("ship".into()),
1614                TextQuery::Term("quarterly".into()),
1615                TextQuery::Term("docs".into()),
1616            ])
1617        );
1618        assert_eq!(relaxed.fusable_filters.len(), 1);
1619        assert!(!plan.was_degraded_at_plan_time);
1620        // CompileError unused in the success path.
1621        let _ = std::any::TypeId::of::<CompileError>();
1622    }
1623
1624    #[test]
1625    fn compile_retrieval_plan_rejects_ast_without_search_step() {
1626        use crate::{CompileError, QueryBuilder, compile_retrieval_plan};
1627        let ast = QueryBuilder::nodes("Goal")
1628            .filter_kind_eq("Goal")
1629            .into_ast();
1630        let result = compile_retrieval_plan(&ast);
1631        assert!(
1632            matches!(result, Err(CompileError::MissingSearchStep)),
1633            "expected MissingSearchStep, got {result:?}"
1634        );
1635    }
1636
1637    #[test]
1638    fn compile_retrieval_plan_rejects_ast_with_multiple_search_steps() {
1639        // P12-N-1: the compiler must not silently last-wins when the caller
1640        // hands it an AST with two `QueryStep::Search` entries. Instead it
1641        // must return an explicit `MultipleSearchSteps` error so the
1642        // mis-shaped AST is surfaced at plan time.
1643        use crate::{CompileError, QueryAst, QueryStep, compile_retrieval_plan};
1644        let ast = QueryAst {
1645            root_kind: "Goal".to_owned(),
1646            steps: vec![
1647                QueryStep::Search {
1648                    query: "alpha".to_owned(),
1649                    limit: 5,
1650                },
1651                QueryStep::Search {
1652                    query: "bravo".to_owned(),
1653                    limit: 10,
1654                },
1655            ],
1656            expansions: vec![],
1657            final_limit: None,
1658        };
1659        let result = compile_retrieval_plan(&ast);
1660        assert!(
1661            matches!(result, Err(CompileError::MultipleSearchSteps)),
1662            "expected MultipleSearchSteps, got {result:?}"
1663        );
1664    }
1665
1666    #[test]
1667    fn compile_retrieval_plan_v1_always_leaves_vector_empty() {
1668        // Phase 12 v1 scope: regardless of the query shape, the unified
1669        // planner never wires a vector branch into the compiled plan
1670        // because read-time embedding of natural-language queries is not
1671        // implemented in v1. Pin the constraint so a future phase that
1672        // wires the embedding generator must explicitly relax this test.
1673        use crate::{QueryAst, QueryStep, compile_retrieval_plan};
1674        for query in ["ship quarterly docs", "single", "", "   "] {
1675            let ast = QueryAst {
1676                root_kind: "Goal".to_owned(),
1677                steps: vec![QueryStep::Search {
1678                    query: query.to_owned(),
1679                    limit: 10,
1680                }],
1681                expansions: vec![],
1682                final_limit: None,
1683            };
1684            let plan = compile_retrieval_plan(&ast).expect("compiles");
1685            assert!(
1686                plan.vector.is_none(),
1687                "Phase 12 v1 must always leave the vector branch empty (query = {query:?})"
1688            );
1689        }
1690    }
1691
1692    #[test]
1693    fn fused_json_text_eq_pushes_into_search_cte_inner_where() {
1694        // Item 7 contract: a fused JSON text-eq predicate on a text search
1695        // is pushed into the `base_candidates` CTE inner WHERE clause so the
1696        // CTE LIMIT applies *after* the filter runs. Compare to
1697        // `filter_json_text_eq` which lands in the outer WHERE as residual.
1698        let mut ast = QueryBuilder::nodes("Goal")
1699            .text_search("budget", 5)
1700            .into_ast();
1701        ast.steps.push(crate::QueryStep::Filter(
1702            crate::Predicate::JsonPathFusedEq {
1703                path: "$.status".to_owned(),
1704                value: "active".to_owned(),
1705            },
1706        ));
1707        let compiled = compile_query(&ast).expect("compile");
1708
1709        // Inner CTE WHERE (under the `n` alias on the chunk/property UNION).
1710        assert!(
1711            compiled.sql.contains("AND json_extract(n.properties, ?"),
1712            "fused json text-eq must land on n.properties inside the CTE; got {}",
1713            compiled.sql
1714        );
1715        // It must NOT also appear in the outer `h.properties` / flat
1716        // projection WHERE — the fusable partition removes it.
1717        assert!(
1718            !compiled.sql.contains("h.properties"),
1719            "sql should not mention h.properties (only compiled_search uses that alias)"
1720        );
1721    }
1722
1723    #[test]
1724    fn fused_json_timestamp_cmp_emits_each_operator() {
1725        for (op, op_str) in [
1726            (crate::ComparisonOp::Gt, ">"),
1727            (crate::ComparisonOp::Gte, ">="),
1728            (crate::ComparisonOp::Lt, "<"),
1729            (crate::ComparisonOp::Lte, "<="),
1730        ] {
1731            let mut ast = QueryBuilder::nodes("Goal")
1732                .text_search("budget", 5)
1733                .into_ast();
1734            ast.steps.push(crate::QueryStep::Filter(
1735                crate::Predicate::JsonPathFusedTimestampCmp {
1736                    path: "$.written_at".to_owned(),
1737                    op,
1738                    value: 1_700_000_000,
1739                },
1740            ));
1741            let compiled = compile_query(&ast).expect("compile");
1742            let needle = "json_extract(n.properties, ?";
1743            assert!(
1744                compiled.sql.contains(needle) && compiled.sql.contains(op_str),
1745                "operator {op_str} must appear in emitted SQL for fused timestamp cmp"
1746            );
1747        }
1748    }
1749
1750    #[test]
1751    fn non_fused_json_filters_still_emit_outer_where() {
1752        // Regression guard: the existing non-fused filter_json_* family
1753        // is unchanged — its predicates continue to be classified as
1754        // residual on search-driven paths and emitted against the outer
1755        // `n.properties` WHERE clause (which is textually identical to
1756        // the inner CTE emission; the difference is *where* in the SQL
1757        // it lives).
1758        let compiled = compile_query(
1759            &QueryBuilder::nodes("Goal")
1760                .text_search("budget", 5)
1761                .filter_json_text_eq("$.status", "active")
1762                .into_ast(),
1763        )
1764        .expect("compile");
1765
1766        // The residual emission lives in the outer SELECT's WHERE and
1767        // targets `n.properties`. Fusion would instead prefix the line
1768        // with `                          AND` (26 spaces) inside the
1769        // CTE. We assert the residual form here by checking the
1770        // leading whitespace on the emitted clause matches the outer
1771        // WHERE indentation ("\n  AND ") rather than the CTE one.
1772        assert!(
1773            compiled
1774                .sql
1775                .contains("\n  AND json_extract(n.properties, ?"),
1776            "non-fused filter_json_text_eq must emit into outer WHERE, got {}",
1777            compiled.sql
1778        );
1779    }
1780
1781    #[test]
1782    fn fused_json_text_eq_pushes_into_vector_cte_inner_where() {
1783        // Mirror of the text-search case for the vector driving path:
1784        // the fused JSON text-eq predicate must land inside the
1785        // `base_candidates` CTE aliased to `src`.
1786        let mut ast = QueryBuilder::nodes("Goal")
1787            .vector_search("budget", 5)
1788            .into_ast();
1789        ast.steps.push(crate::QueryStep::Filter(
1790            crate::Predicate::JsonPathFusedEq {
1791                path: "$.status".to_owned(),
1792                value: "active".to_owned(),
1793            },
1794        ));
1795        let compiled = compile_query(&ast).expect("compile");
1796        assert_eq!(compiled.driving_table, DrivingTable::VecNodes);
1797        assert!(
1798            compiled.sql.contains("AND json_extract(src.properties, ?"),
1799            "fused json text-eq on vector path must land on src.properties, got {}",
1800            compiled.sql
1801        );
1802    }
1803
1804    #[test]
1805    fn fts5_query_bind_preserves_lowercase_not_as_literal_text() {
1806        let compiled = compile_query(
1807            &QueryBuilder::nodes("Meeting")
1808                .text_search("not a ship", 5)
1809                .limit(5)
1810                .into_ast(),
1811        )
1812        .expect("compiled query");
1813
1814        use crate::BindValue;
1815        assert!(
1816            compiled
1817                .binds
1818                .iter()
1819                .any(|b| matches!(b, BindValue::Text(s) if s == "\"not\" \"a\" \"ship\"")),
1820            "Lowercase not should remain a literal term sequence; got {:?}",
1821            compiled.binds
1822        );
1823    }
1824
1825    #[test]
1826    fn traverse_filter_field_accepted_in_ast() {
1827        // Regression test: QueryStep::Traverse must carry an optional filter
1828        // predicate. filter: None must be exactly equivalent to the old
1829        // three-field form. This test fails to compile before Pack 2 lands.
1830        use crate::{Predicate, QueryStep};
1831        let step = QueryStep::Traverse {
1832            direction: TraverseDirection::Out,
1833            label: "HAS_TASK".to_owned(),
1834            max_depth: 1,
1835            filter: None,
1836        };
1837        assert!(matches!(step, QueryStep::Traverse { filter: None, .. }));
1838
1839        let step_with_filter = QueryStep::Traverse {
1840            direction: TraverseDirection::Out,
1841            label: "HAS_TASK".to_owned(),
1842            max_depth: 1,
1843            filter: Some(Predicate::KindEq("Task".to_owned())),
1844        };
1845        assert!(matches!(
1846            step_with_filter,
1847            QueryStep::Traverse {
1848                filter: Some(_),
1849                ..
1850            }
1851        ));
1852    }
1853}