Skip to main content

fathomdb_query/
compile.rs

1use std::fmt::Write;
2
3use crate::plan::{choose_driving_table, execution_hints, shape_signature};
4use crate::{
5    ComparisonOp, DrivingTable, ExpansionSlot, Predicate, QueryAst, QueryStep, ScalarValue,
6    TraverseDirection,
7};
8
9/// A typed bind value for a compiled SQL query parameter.
10#[derive(Clone, Debug, PartialEq, Eq)]
11pub enum BindValue {
12    /// A UTF-8 text parameter.
13    Text(String),
14    /// A 64-bit signed integer parameter.
15    Integer(i64),
16    /// A boolean parameter.
17    Bool(bool),
18}
19
20/// A deterministic hash of a query's structural shape, independent of bind values.
21#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
22pub struct ShapeHash(pub u64);
23
24/// A fully compiled query ready for execution against `SQLite`.
25#[derive(Clone, Debug, PartialEq, Eq)]
26pub struct CompiledQuery {
27    /// The generated SQL text.
28    pub sql: String,
29    /// Positional bind parameters for the SQL.
30    pub binds: Vec<BindValue>,
31    /// Structural shape hash for caching.
32    pub shape_hash: ShapeHash,
33    /// The driving table chosen by the query planner.
34    pub driving_table: DrivingTable,
35    /// Execution hints derived from the query shape.
36    pub hints: crate::ExecutionHints,
37}
38
39/// A compiled grouped query containing a root query and expansion slots.
40#[derive(Clone, Debug, PartialEq, Eq)]
41pub struct CompiledGroupedQuery {
42    /// The root flat query.
43    pub root: CompiledQuery,
44    /// Expansion slots to evaluate per root result.
45    pub expansions: Vec<ExpansionSlot>,
46    /// Structural shape hash covering the root query and all expansion slots.
47    pub shape_hash: ShapeHash,
48    /// Execution hints derived from the grouped query shape.
49    pub hints: crate::ExecutionHints,
50}
51
52/// Errors that can occur during query compilation.
53#[derive(Clone, Debug, PartialEq, Eq, thiserror::Error)]
54pub enum CompileError {
55    #[error("multiple traversal steps are not supported in v1")]
56    TooManyTraversals,
57    #[error("flat query compilation does not support expansions; use compile_grouped")]
58    FlatCompileDoesNotSupportExpansions,
59    #[error("duplicate expansion slot name: {0}")]
60    DuplicateExpansionSlot(String),
61    #[error("expansion slot name must be non-empty")]
62    EmptyExpansionSlotName,
63    #[error("too many expansion slots: max {MAX_EXPANSION_SLOTS}, got {0}")]
64    TooManyExpansionSlots(usize),
65    #[error("too many bind parameters: max 15, got {0}")]
66    TooManyBindParameters(usize),
67    #[error("traversal depth {0} exceeds maximum of {MAX_TRAVERSAL_DEPTH}")]
68    TraversalTooDeep(usize),
69    #[error("invalid JSON path: must match $(.key)+ pattern, got {0:?}")]
70    InvalidJsonPath(String),
71}
72
73/// Sanitize a user-supplied text search query for safe use as an FTS5 MATCH
74/// expression. Splits on whitespace, wraps each token in double quotes (doubling
75/// any embedded `"` per FTS5 escaping rules), and joins with spaces. This
76/// produces an implicit AND of quoted terms that is safe against FTS5 syntax
77/// injection (operators like AND/OR/NOT/NEAR, column filters, parentheses, and
78/// wildcards are all neutralized inside quoted strings).
79///
80/// Empty or whitespace-only input returns an empty string, which the caller
81/// should handle (FTS5 MATCH with an empty string returns no rows).
82fn sanitize_fts5_query(raw: &str) -> String {
83    let tokens: Vec<String> = raw
84        .split_whitespace()
85        .map(|token| {
86            let escaped = token.replace('"', "\"\"");
87            format!("\"{escaped}\"")
88        })
89        .collect();
90    tokens.join(" ")
91}
92
93/// Security fix H-1: Validate JSON path against a strict allowlist pattern to
94/// prevent SQL injection. Retained as defense-in-depth even though the path is
95/// now parameterized (see `FIX(review)` in `compile_query`). Only paths like
96/// `$.foo`, `$.foo.bar_baz` are allowed.
97fn validate_json_path(path: &str) -> Result<(), CompileError> {
98    let valid = path.starts_with('$')
99        && path.len() > 1
100        && path[1..].split('.').all(|segment| {
101            segment.is_empty()
102                || segment
103                    .chars()
104                    .all(|c| c.is_ascii_alphanumeric() || c == '_')
105                    && !segment.is_empty()
106        })
107        && path.contains('.');
108    if !valid {
109        return Err(CompileError::InvalidJsonPath(path.to_owned()));
110    }
111    Ok(())
112}
113
114const MAX_BIND_PARAMETERS: usize = 15;
115const MAX_EXPANSION_SLOTS: usize = 8;
116
117// FIX(review): max_depth was unbounded — usize::MAX produces an effectively infinite CTE.
118// Options: (A) silent clamp at compile, (B) reject with CompileError, (C) validate in builder.
119// Chose (B): consistent with existing TooManyTraversals/TooManyBindParameters pattern.
120// The compiler is the validation boundary; silent clamping would surprise callers.
121const MAX_TRAVERSAL_DEPTH: usize = 50;
122
123/// Compile a [`QueryAst`] into a [`CompiledQuery`] ready for execution.
124///
125/// # Compilation strategy
126///
127/// The compiled SQL is structured as a `WITH RECURSIVE` CTE named
128/// `base_candidates` followed by a final `SELECT ... JOIN nodes` projection.
129///
130/// For the **Nodes** driving table (no FTS/vector search), all filter
131/// predicates (`LogicalIdEq`, `JsonPathEq`, `JsonPathCompare`,
132/// `SourceRefEq`) are pushed into the `base_candidates` CTE so that the
133/// CTE's `LIMIT` applies *after* filtering. Without this pushdown the LIMIT
134/// would truncate the candidate set before property filters run, silently
135/// excluding nodes whose properties satisfy the filter but whose insertion
136/// order falls outside the limit window.
137///
138/// For **FTS** and **vector** driving tables, filters remain in the outer
139/// `WHERE` clause because the CTE is already narrowed by the search itself.
140///
141/// # Errors
142///
143/// Returns [`CompileError::TooManyTraversals`] if more than one traversal step
144/// is present, or [`CompileError::TooManyBindParameters`] if the resulting SQL
145/// would require more than 15 bind parameters.
146///
147/// # Panics
148///
149/// Panics (via `unreachable!`) if the AST is internally inconsistent — for
150/// example, if `choose_driving_table` selects `VecNodes` but no
151/// `VectorSearch` step is present in the AST. This cannot happen through the
152/// public [`QueryBuilder`] API.
153#[allow(clippy::too_many_lines)]
154pub fn compile_query(ast: &QueryAst) -> Result<CompiledQuery, CompileError> {
155    if !ast.expansions.is_empty() {
156        return Err(CompileError::FlatCompileDoesNotSupportExpansions);
157    }
158
159    let traversals = ast
160        .steps
161        .iter()
162        .filter(|step| matches!(step, QueryStep::Traverse { .. }))
163        .count();
164    if traversals > 1 {
165        return Err(CompileError::TooManyTraversals);
166    }
167
168    let excessive_depth = ast.steps.iter().find_map(|step| {
169        if let QueryStep::Traverse { max_depth, .. } = step
170            && *max_depth > MAX_TRAVERSAL_DEPTH
171        {
172            return Some(*max_depth);
173        }
174        None
175    });
176    if let Some(depth) = excessive_depth {
177        return Err(CompileError::TraversalTooDeep(depth));
178    }
179
180    let driving_table = choose_driving_table(ast);
181    let hints = execution_hints(ast);
182    let shape_hash = ShapeHash(hash_signature(&shape_signature(ast)));
183
184    let base_limit = ast
185        .steps
186        .iter()
187        .find_map(|step| match step {
188            QueryStep::VectorSearch { limit, .. } | QueryStep::TextSearch { limit, .. } => {
189                Some(*limit)
190            }
191            _ => None,
192        })
193        .or(ast.final_limit)
194        .unwrap_or(25);
195
196    let final_limit = ast.final_limit.unwrap_or(base_limit);
197    let traversal = ast.steps.iter().find_map(|step| {
198        if let QueryStep::Traverse {
199            direction,
200            label,
201            max_depth,
202        } = step
203        {
204            Some((*direction, label.as_str(), *max_depth))
205        } else {
206            None
207        }
208    });
209
210    let mut binds = Vec::new();
211    let base_candidates = match driving_table {
212        DrivingTable::VecNodes => {
213            let query = ast
214                .steps
215                .iter()
216                .find_map(|step| {
217                    if let QueryStep::VectorSearch { query, .. } = step {
218                        Some(query.as_str())
219                    } else {
220                        None
221                    }
222                })
223                .unwrap_or_else(|| unreachable!("VecNodes chosen but no VectorSearch step in AST"));
224            binds.push(BindValue::Text(query.to_owned()));
225            binds.push(BindValue::Text(ast.root_kind.clone()));
226            // sqlite-vec requires the LIMIT/k constraint to be visible directly on the
227            // vec0 KNN scan. Using a sub-select isolates the vec0 LIMIT so the join
228            // with chunks/nodes does not prevent the query planner from recognising it.
229            format!(
230                "base_candidates AS (
231                    SELECT DISTINCT src.logical_id
232                    FROM (
233                        SELECT chunk_id FROM vec_nodes_active
234                        WHERE embedding MATCH ?1
235                        LIMIT {base_limit}
236                    ) vc
237                    JOIN chunks c ON c.id = vc.chunk_id
238                    JOIN nodes src ON src.logical_id = c.node_logical_id AND src.superseded_at IS NULL
239                    WHERE src.kind = ?2
240                )"
241            )
242        }
243        DrivingTable::FtsNodes => {
244            let raw_query = ast
245                .steps
246                .iter()
247                .find_map(|step| {
248                    if let QueryStep::TextSearch { query, .. } = step {
249                        Some(query.as_str())
250                    } else {
251                        None
252                    }
253                })
254                .unwrap_or_else(|| unreachable!("FtsNodes chosen but no TextSearch step in AST"));
255            // Sanitize FTS5 metacharacters to prevent syntax errors and query
256            // injection. Each user token is quoted so FTS5 operators (AND, OR,
257            // NOT, NEAR, column filters, wildcards) are treated as literals.
258            binds.push(BindValue::Text(sanitize_fts5_query(raw_query)));
259            binds.push(BindValue::Text(ast.root_kind.clone()));
260            format!(
261                "base_candidates AS (
262                    SELECT DISTINCT src.logical_id
263                    FROM fts_nodes f
264                    JOIN chunks c ON c.id = f.chunk_id
265                    JOIN nodes src ON src.logical_id = c.node_logical_id AND src.superseded_at IS NULL
266                    WHERE fts_nodes MATCH ?1
267                      AND src.kind = ?2
268                    LIMIT {base_limit}
269                )"
270            )
271        }
272        DrivingTable::Nodes => {
273            binds.push(BindValue::Text(ast.root_kind.clone()));
274            let mut sql = "base_candidates AS (
275                    SELECT DISTINCT src.logical_id
276                    FROM nodes src
277                    WHERE src.superseded_at IS NULL
278                      AND src.kind = ?1"
279                .to_owned();
280            // Push filter predicates into base_candidates so the LIMIT applies
281            // after filtering, not before. Without this, the CTE may truncate
282            // the candidate set before property/source_ref filters run, causing
283            // nodes that satisfy the filter to be excluded from results.
284            for step in &ast.steps {
285                if let QueryStep::Filter(predicate) = step {
286                    match predicate {
287                        Predicate::LogicalIdEq(logical_id) => {
288                            binds.push(BindValue::Text(logical_id.clone()));
289                            let bind_index = binds.len();
290                            let _ = write!(
291                                &mut sql,
292                                "\n                      AND src.logical_id = ?{bind_index}"
293                            );
294                        }
295                        Predicate::JsonPathEq { path, value } => {
296                            validate_json_path(path)?;
297                            binds.push(BindValue::Text(path.clone()));
298                            let path_index = binds.len();
299                            binds.push(match value {
300                                ScalarValue::Text(text) => BindValue::Text(text.clone()),
301                                ScalarValue::Integer(integer) => BindValue::Integer(*integer),
302                                ScalarValue::Bool(boolean) => BindValue::Bool(*boolean),
303                            });
304                            let value_index = binds.len();
305                            let _ = write!(
306                                &mut sql,
307                                "\n                      AND json_extract(src.properties, ?{path_index}) = ?{value_index}"
308                            );
309                        }
310                        Predicate::JsonPathCompare { path, op, value } => {
311                            validate_json_path(path)?;
312                            binds.push(BindValue::Text(path.clone()));
313                            let path_index = binds.len();
314                            binds.push(match value {
315                                ScalarValue::Text(text) => BindValue::Text(text.clone()),
316                                ScalarValue::Integer(integer) => BindValue::Integer(*integer),
317                                ScalarValue::Bool(boolean) => BindValue::Bool(*boolean),
318                            });
319                            let value_index = binds.len();
320                            let operator = match op {
321                                ComparisonOp::Gt => ">",
322                                ComparisonOp::Gte => ">=",
323                                ComparisonOp::Lt => "<",
324                                ComparisonOp::Lte => "<=",
325                            };
326                            let _ = write!(
327                                &mut sql,
328                                "\n                      AND json_extract(src.properties, ?{path_index}) {operator} ?{value_index}"
329                            );
330                        }
331                        Predicate::SourceRefEq(source_ref) => {
332                            binds.push(BindValue::Text(source_ref.clone()));
333                            let bind_index = binds.len();
334                            let _ = write!(
335                                &mut sql,
336                                "\n                      AND src.source_ref = ?{bind_index}"
337                            );
338                        }
339                        Predicate::KindEq(_) => {
340                            // Already filtered by ast.root_kind above.
341                        }
342                    }
343                }
344            }
345            let _ = write!(
346                &mut sql,
347                "\n                    LIMIT {base_limit}\n                )"
348            );
349            sql
350        }
351    };
352
353    let mut sql = format!("WITH RECURSIVE\n{base_candidates}");
354    let source_alias = if traversal.is_some() { "t" } else { "bc" };
355
356    if let Some((direction, label, max_depth)) = traversal {
357        binds.push(BindValue::Text(label.to_owned()));
358        let label_index = binds.len();
359        let (join_condition, next_logical_id) = match direction {
360            TraverseDirection::Out => ("e.source_logical_id = t.logical_id", "e.target_logical_id"),
361            TraverseDirection::In => ("e.target_logical_id = t.logical_id", "e.source_logical_id"),
362        };
363
364        let _ = write!(
365            &mut sql,
366            ",
367traversed(logical_id, depth, visited) AS (
368    SELECT bc.logical_id, 0, printf(',%s,', bc.logical_id)
369    FROM base_candidates bc
370    UNION ALL
371    SELECT {next_logical_id}, t.depth + 1, t.visited || {next_logical_id} || ','
372    FROM traversed t
373    JOIN edges e ON {join_condition}
374        AND e.kind = ?{label_index}
375        AND e.superseded_at IS NULL
376    WHERE t.depth < {max_depth}
377      AND instr(t.visited, printf(',%s,', {next_logical_id})) = 0
378    LIMIT {}
379)",
380            hints.hard_limit
381        );
382    }
383
384    let _ = write!(
385        &mut sql,
386        "
387SELECT DISTINCT n.row_id, n.logical_id, n.kind, n.properties
388FROM {} {source_alias}
389JOIN nodes n ON n.logical_id = {source_alias}.logical_id
390    AND n.superseded_at IS NULL
391WHERE 1 = 1",
392        if traversal.is_some() {
393            "traversed"
394        } else {
395            "base_candidates"
396        }
397    );
398
399    for step in &ast.steps {
400        if let QueryStep::Filter(predicate) = step {
401            // For the Nodes driving table, filter predicates were already pushed
402            // into base_candidates so the CTE LIMIT applies after filtering.
403            // Skip them here to avoid duplicate bind values and redundant clauses.
404            if driving_table == DrivingTable::Nodes {
405                // KindEq is the only predicate NOT pushed into base_candidates
406                // (root_kind is handled separately there).
407                if let Predicate::KindEq(kind) = predicate {
408                    binds.push(BindValue::Text(kind.clone()));
409                    let bind_index = binds.len();
410                    let _ = write!(&mut sql, "\n  AND n.kind = ?{bind_index}");
411                }
412                continue;
413            }
414            match predicate {
415                Predicate::LogicalIdEq(logical_id) => {
416                    binds.push(BindValue::Text(logical_id.clone()));
417                    let bind_index = binds.len();
418                    let _ = write!(&mut sql, "\n  AND n.logical_id = ?{bind_index}");
419                }
420                Predicate::KindEq(kind) => {
421                    binds.push(BindValue::Text(kind.clone()));
422                    let bind_index = binds.len();
423                    let _ = write!(&mut sql, "\n  AND n.kind = ?{bind_index}");
424                }
425                Predicate::JsonPathEq { path, value } => {
426                    validate_json_path(path)?;
427                    binds.push(BindValue::Text(path.clone()));
428                    let path_index = binds.len();
429                    binds.push(match value {
430                        ScalarValue::Text(text) => BindValue::Text(text.clone()),
431                        ScalarValue::Integer(integer) => BindValue::Integer(*integer),
432                        ScalarValue::Bool(boolean) => BindValue::Bool(*boolean),
433                    });
434                    let value_index = binds.len();
435                    let _ = write!(
436                        &mut sql,
437                        "\n  AND json_extract(n.properties, ?{path_index}) = ?{value_index}",
438                    );
439                }
440                Predicate::JsonPathCompare { path, op, value } => {
441                    validate_json_path(path)?;
442                    binds.push(BindValue::Text(path.clone()));
443                    let path_index = binds.len();
444                    binds.push(match value {
445                        ScalarValue::Text(text) => BindValue::Text(text.clone()),
446                        ScalarValue::Integer(integer) => BindValue::Integer(*integer),
447                        ScalarValue::Bool(boolean) => BindValue::Bool(*boolean),
448                    });
449                    let value_index = binds.len();
450                    let operator = match op {
451                        ComparisonOp::Gt => ">",
452                        ComparisonOp::Gte => ">=",
453                        ComparisonOp::Lt => "<",
454                        ComparisonOp::Lte => "<=",
455                    };
456                    let _ = write!(
457                        &mut sql,
458                        "\n  AND json_extract(n.properties, ?{path_index}) {operator} ?{value_index}",
459                    );
460                }
461                Predicate::SourceRefEq(source_ref) => {
462                    binds.push(BindValue::Text(source_ref.clone()));
463                    let bind_index = binds.len();
464                    let _ = write!(&mut sql, "\n  AND n.source_ref = ?{bind_index}");
465                }
466            }
467        }
468    }
469
470    let _ = write!(&mut sql, "\nLIMIT {final_limit}");
471
472    if binds.len() > MAX_BIND_PARAMETERS {
473        return Err(CompileError::TooManyBindParameters(binds.len()));
474    }
475
476    Ok(CompiledQuery {
477        sql,
478        binds,
479        shape_hash,
480        driving_table,
481        hints,
482    })
483}
484
485/// Compile a [`QueryAst`] into a [`CompiledGroupedQuery`] for grouped execution.
486///
487/// # Errors
488///
489/// Returns a [`CompileError`] if the AST exceeds expansion-slot limits,
490/// contains empty slot names, or specifies a traversal depth beyond the
491/// configured maximum.
492pub fn compile_grouped_query(ast: &QueryAst) -> Result<CompiledGroupedQuery, CompileError> {
493    if ast.expansions.len() > MAX_EXPANSION_SLOTS {
494        return Err(CompileError::TooManyExpansionSlots(ast.expansions.len()));
495    }
496
497    let mut seen = std::collections::BTreeSet::new();
498    for expansion in &ast.expansions {
499        if expansion.slot.trim().is_empty() {
500            return Err(CompileError::EmptyExpansionSlotName);
501        }
502        if expansion.max_depth > MAX_TRAVERSAL_DEPTH {
503            return Err(CompileError::TraversalTooDeep(expansion.max_depth));
504        }
505        if !seen.insert(expansion.slot.clone()) {
506            return Err(CompileError::DuplicateExpansionSlot(expansion.slot.clone()));
507        }
508    }
509
510    let mut root_ast = ast.clone();
511    root_ast.expansions.clear();
512    let root = compile_query(&root_ast)?;
513    let hints = execution_hints(ast);
514    let shape_hash = ShapeHash(hash_signature(&shape_signature(ast)));
515
516    Ok(CompiledGroupedQuery {
517        root,
518        expansions: ast.expansions.clone(),
519        shape_hash,
520        hints,
521    })
522}
523
524/// FNV-1a 64-bit hash — deterministic across Rust versions and program
525/// invocations, unlike `DefaultHasher`.
526fn hash_signature(signature: &str) -> u64 {
527    const OFFSET: u64 = 0xcbf2_9ce4_8422_2325;
528    const PRIME: u64 = 0x0000_0100_0000_01b3;
529    let mut hash = OFFSET;
530    for byte in signature.bytes() {
531        hash ^= u64::from(byte);
532        hash = hash.wrapping_mul(PRIME);
533    }
534    hash
535}
536
537#[cfg(test)]
538#[allow(clippy::expect_used, clippy::items_after_statements)]
539mod tests {
540    use rstest::rstest;
541
542    use crate::{
543        CompileError, DrivingTable, QueryBuilder, TraverseDirection, compile_grouped_query,
544        compile_query,
545    };
546
547    #[test]
548    fn vector_query_compiles_to_chunk_resolution() {
549        let compiled = compile_query(
550            &QueryBuilder::nodes("Meeting")
551                .vector_search("budget", 5)
552                .limit(5)
553                .into_ast(),
554        )
555        .expect("compiled query");
556
557        assert_eq!(compiled.driving_table, DrivingTable::VecNodes);
558        assert!(compiled.sql.contains("JOIN chunks c ON c.id = vc.chunk_id"));
559        assert!(
560            compiled
561                .sql
562                .contains("JOIN nodes src ON src.logical_id = c.node_logical_id")
563        );
564    }
565
566    #[rstest]
567    #[case(5, 7)]
568    #[case(3, 11)]
569    fn structural_limits_change_shape_hash(#[case] left: usize, #[case] right: usize) {
570        let left_compiled = compile_query(
571            &QueryBuilder::nodes("Meeting")
572                .text_search("budget", left)
573                .limit(left)
574                .into_ast(),
575        )
576        .expect("left query");
577        let right_compiled = compile_query(
578            &QueryBuilder::nodes("Meeting")
579                .text_search("budget", right)
580                .limit(right)
581                .into_ast(),
582        )
583        .expect("right query");
584
585        assert_ne!(left_compiled.shape_hash, right_compiled.shape_hash);
586    }
587
588    #[test]
589    fn traversal_query_is_depth_bounded() {
590        let compiled = compile_query(
591            &QueryBuilder::nodes("Meeting")
592                .text_search("budget", 5)
593                .traverse(TraverseDirection::Out, "HAS_TASK", 3)
594                .limit(10)
595                .into_ast(),
596        )
597        .expect("compiled traversal");
598
599        assert!(compiled.sql.contains("WITH RECURSIVE"));
600        assert!(compiled.sql.contains("WHERE t.depth < 3"));
601    }
602
603    #[test]
604    fn logical_id_filter_is_compiled() {
605        let compiled = compile_query(
606            &QueryBuilder::nodes("Meeting")
607                .filter_logical_id_eq("meeting-123")
608                .filter_json_text_eq("$.status", "active")
609                .limit(1)
610                .into_ast(),
611        )
612        .expect("compiled query");
613
614        // LogicalIdEq is applied in base_candidates (src alias) for the Nodes driver,
615        // NOT duplicated in the final WHERE. The JOIN condition still contains
616        // "n.logical_id =" which satisfies this check.
617        assert!(compiled.sql.contains("n.logical_id ="));
618        assert!(compiled.sql.contains("src.logical_id ="));
619        assert!(compiled.sql.contains("json_extract"));
620        // Only one bind for the logical_id (not two).
621        use crate::BindValue;
622        assert_eq!(
623            compiled
624                .binds
625                .iter()
626                .filter(|b| matches!(b, BindValue::Text(s) if s == "meeting-123"))
627                .count(),
628            1
629        );
630    }
631
632    #[test]
633    fn compile_rejects_invalid_json_path() {
634        use crate::{Predicate, QueryStep, ScalarValue};
635        let mut ast = QueryBuilder::nodes("Meeting").into_ast();
636        // Attempt SQL injection via JSON path.
637        ast.steps.push(QueryStep::Filter(Predicate::JsonPathEq {
638            path: "$') OR 1=1 --".to_owned(),
639            value: ScalarValue::Text("x".to_owned()),
640        }));
641        use crate::CompileError;
642        let result = compile_query(&ast);
643        assert!(
644            matches!(result, Err(CompileError::InvalidJsonPath(_))),
645            "expected InvalidJsonPath, got {result:?}"
646        );
647    }
648
649    #[test]
650    fn compile_accepts_valid_json_paths() {
651        use crate::{Predicate, QueryStep, ScalarValue};
652        for valid_path in ["$.status", "$.foo.bar", "$.a_b.c2"] {
653            let mut ast = QueryBuilder::nodes("Meeting").into_ast();
654            ast.steps.push(QueryStep::Filter(Predicate::JsonPathEq {
655                path: valid_path.to_owned(),
656                value: ScalarValue::Text("v".to_owned()),
657            }));
658            assert!(
659                compile_query(&ast).is_ok(),
660                "expected valid path {valid_path:?} to compile"
661            );
662        }
663    }
664
665    #[test]
666    fn compile_rejects_too_many_bind_parameters() {
667        use crate::{Predicate, QueryStep, ScalarValue};
668        let mut ast = QueryBuilder::nodes("Meeting").into_ast();
669        // kind occupies 1 bind; each json filter now occupies 2 binds (path + value).
670        // 7 json filters → 1 + 14 = 15 (ok), 8 → 1 + 16 = 17 (exceeds limit of 15).
671        for i in 0..8 {
672            ast.steps.push(QueryStep::Filter(Predicate::JsonPathEq {
673                path: format!("$.f{i}"),
674                value: ScalarValue::Text("v".to_owned()),
675            }));
676        }
677        use crate::CompileError;
678        let result = compile_query(&ast);
679        assert!(
680            matches!(result, Err(CompileError::TooManyBindParameters(17))),
681            "expected TooManyBindParameters(17), got {result:?}"
682        );
683    }
684
685    #[test]
686    fn compile_rejects_excessive_traversal_depth() {
687        let result = compile_query(
688            &QueryBuilder::nodes("Meeting")
689                .text_search("budget", 5)
690                .traverse(TraverseDirection::Out, "HAS_TASK", 51)
691                .limit(10)
692                .into_ast(),
693        );
694        assert!(
695            matches!(result, Err(CompileError::TraversalTooDeep(51))),
696            "expected TraversalTooDeep(51), got {result:?}"
697        );
698    }
699
700    #[test]
701    fn grouped_queries_with_same_structure_share_shape_hash() {
702        let left = compile_grouped_query(
703            &QueryBuilder::nodes("Meeting")
704                .text_search("budget", 5)
705                .expand("tasks", TraverseDirection::Out, "HAS_TASK", 1)
706                .limit(10)
707                .into_ast(),
708        )
709        .expect("left grouped query");
710        let right = compile_grouped_query(
711            &QueryBuilder::nodes("Meeting")
712                .text_search("planning", 5)
713                .expand("tasks", TraverseDirection::Out, "HAS_TASK", 1)
714                .limit(10)
715                .into_ast(),
716        )
717        .expect("right grouped query");
718
719        assert_eq!(left.shape_hash, right.shape_hash);
720    }
721
722    #[test]
723    fn compile_grouped_rejects_duplicate_expansion_slot_names() {
724        let result = compile_grouped_query(
725            &QueryBuilder::nodes("Meeting")
726                .expand("tasks", TraverseDirection::Out, "HAS_TASK", 1)
727                .expand("tasks", TraverseDirection::Out, "HAS_DECISION", 1)
728                .into_ast(),
729        );
730
731        assert!(
732            matches!(result, Err(CompileError::DuplicateExpansionSlot(ref slot)) if slot == "tasks"),
733            "expected DuplicateExpansionSlot(\"tasks\"), got {result:?}"
734        );
735    }
736
737    #[test]
738    fn flat_compile_rejects_queries_with_expansions() {
739        let result = compile_query(
740            &QueryBuilder::nodes("Meeting")
741                .expand("tasks", TraverseDirection::Out, "HAS_TASK", 1)
742                .into_ast(),
743        );
744
745        assert!(
746            matches!(
747                result,
748                Err(CompileError::FlatCompileDoesNotSupportExpansions)
749            ),
750            "expected FlatCompileDoesNotSupportExpansions, got {result:?}"
751        );
752    }
753
754    #[test]
755    fn json_path_compiled_as_bind_parameter() {
756        let compiled = compile_query(
757            &QueryBuilder::nodes("Meeting")
758                .filter_json_text_eq("$.status", "active")
759                .limit(1)
760                .into_ast(),
761        )
762        .expect("compiled query");
763
764        // Path must be parameterized, not interpolated into the SQL string.
765        assert!(
766            !compiled.sql.contains("'$.status'"),
767            "JSON path must not appear as a SQL string literal"
768        );
769        assert!(
770            compiled.sql.contains("json_extract(src.properties, ?"),
771            "JSON path must be a bind parameter (pushed into base_candidates for Nodes driver)"
772        );
773        // Path and value should both be in the bind list.
774        use crate::BindValue;
775        assert!(
776            compiled
777                .binds
778                .iter()
779                .any(|b| matches!(b, BindValue::Text(s) if s == "$.status"))
780        );
781        assert!(
782            compiled
783                .binds
784                .iter()
785                .any(|b| matches!(b, BindValue::Text(s) if s == "active"))
786        );
787    }
788
789    // --- FTS5 sanitization tests ---
790
791    #[test]
792    fn sanitize_fts5_plain_tokens() {
793        use super::sanitize_fts5_query;
794        assert_eq!(
795            sanitize_fts5_query("budget meeting"),
796            "\"budget\" \"meeting\""
797        );
798    }
799
800    #[test]
801    fn sanitize_fts5_apostrophe() {
802        use super::sanitize_fts5_query;
803        // The apostrophe that triggered issue #31
804        assert_eq!(sanitize_fts5_query("User's name"), "\"User's\" \"name\"");
805    }
806
807    #[test]
808    fn sanitize_fts5_embedded_double_quotes() {
809        use super::sanitize_fts5_query;
810        assert_eq!(
811            sanitize_fts5_query(r#"say "hello" world"#),
812            "\"say\" \"\"\"hello\"\"\" \"world\""
813        );
814    }
815
816    #[test]
817    fn sanitize_fts5_operators_neutralized() {
818        use super::sanitize_fts5_query;
819        // FTS5 operators should be quoted, not interpreted
820        assert_eq!(
821            sanitize_fts5_query("cats AND dogs OR fish"),
822            "\"cats\" \"AND\" \"dogs\" \"OR\" \"fish\""
823        );
824    }
825
826    #[test]
827    fn sanitize_fts5_special_chars() {
828        use super::sanitize_fts5_query;
829        // Wildcards, column filters, parentheses, NEAR
830        assert_eq!(sanitize_fts5_query("prefix*"), "\"prefix*\"");
831        assert_eq!(sanitize_fts5_query("col:value"), "\"col:value\"");
832        assert_eq!(sanitize_fts5_query("(a OR b)"), "\"(a\" \"OR\" \"b)\"");
833        assert_eq!(sanitize_fts5_query("a NEAR b"), "\"a\" \"NEAR\" \"b\"");
834    }
835
836    #[test]
837    fn sanitize_fts5_empty_input() {
838        use super::sanitize_fts5_query;
839        assert_eq!(sanitize_fts5_query(""), "");
840        assert_eq!(sanitize_fts5_query("   "), "");
841    }
842
843    // --- Filter pushdown regression tests ---
844    //
845    // These tests verify that filter predicates are pushed into the
846    // base_candidates CTE for the Nodes driving table, so the CTE LIMIT
847    // applies after filtering rather than before.  Without pushdown, the
848    // LIMIT may truncate the candidate set before the filter runs, causing
849    // matching nodes to be silently excluded.
850
851    #[test]
852    fn nodes_driver_pushes_json_eq_filter_into_base_candidates() {
853        let compiled = compile_query(
854            &QueryBuilder::nodes("Meeting")
855                .filter_json_text_eq("$.status", "active")
856                .limit(5)
857                .into_ast(),
858        )
859        .expect("compiled query");
860
861        assert_eq!(compiled.driving_table, DrivingTable::Nodes);
862        // Filter must appear inside base_candidates (src alias), not the
863        // outer WHERE (n alias).
864        assert!(
865            compiled.sql.contains("json_extract(src.properties, ?"),
866            "json_extract must reference src (base_candidates), got:\n{}",
867            compiled.sql,
868        );
869        assert!(
870            !compiled.sql.contains("json_extract(n.properties, ?"),
871            "json_extract must NOT appear in outer WHERE for Nodes driver, got:\n{}",
872            compiled.sql,
873        );
874    }
875
876    #[test]
877    fn nodes_driver_pushes_json_compare_filter_into_base_candidates() {
878        let compiled = compile_query(
879            &QueryBuilder::nodes("Meeting")
880                .filter_json_integer_gte("$.priority", 5)
881                .limit(10)
882                .into_ast(),
883        )
884        .expect("compiled query");
885
886        assert_eq!(compiled.driving_table, DrivingTable::Nodes);
887        assert!(
888            compiled.sql.contains("json_extract(src.properties, ?"),
889            "comparison filter must be in base_candidates, got:\n{}",
890            compiled.sql,
891        );
892        assert!(
893            !compiled.sql.contains("json_extract(n.properties, ?"),
894            "comparison filter must NOT be in outer WHERE for Nodes driver",
895        );
896        assert!(
897            compiled.sql.contains(">= ?"),
898            "expected >= operator in SQL, got:\n{}",
899            compiled.sql,
900        );
901    }
902
903    #[test]
904    fn nodes_driver_pushes_source_ref_filter_into_base_candidates() {
905        let compiled = compile_query(
906            &QueryBuilder::nodes("Meeting")
907                .filter_source_ref_eq("ref-123")
908                .limit(5)
909                .into_ast(),
910        )
911        .expect("compiled query");
912
913        assert_eq!(compiled.driving_table, DrivingTable::Nodes);
914        assert!(
915            compiled.sql.contains("src.source_ref = ?"),
916            "source_ref filter must be in base_candidates, got:\n{}",
917            compiled.sql,
918        );
919        assert!(
920            !compiled.sql.contains("n.source_ref = ?"),
921            "source_ref filter must NOT be in outer WHERE for Nodes driver",
922        );
923    }
924
925    #[test]
926    fn nodes_driver_pushes_multiple_filters_into_base_candidates() {
927        let compiled = compile_query(
928            &QueryBuilder::nodes("Meeting")
929                .filter_logical_id_eq("meeting-1")
930                .filter_json_text_eq("$.status", "active")
931                .filter_json_integer_gte("$.priority", 5)
932                .filter_source_ref_eq("ref-abc")
933                .limit(1)
934                .into_ast(),
935        )
936        .expect("compiled query");
937
938        assert_eq!(compiled.driving_table, DrivingTable::Nodes);
939        // All filters should be in base_candidates, none in outer WHERE
940        assert!(
941            compiled.sql.contains("src.logical_id = ?"),
942            "logical_id filter must be in base_candidates",
943        );
944        assert!(
945            compiled.sql.contains("json_extract(src.properties, ?"),
946            "JSON filters must be in base_candidates",
947        );
948        assert!(
949            compiled.sql.contains("src.source_ref = ?"),
950            "source_ref filter must be in base_candidates",
951        );
952        // Each bind value should appear exactly once (not duplicated in outer WHERE)
953        use crate::BindValue;
954        assert_eq!(
955            compiled
956                .binds
957                .iter()
958                .filter(|b| matches!(b, BindValue::Text(s) if s == "meeting-1"))
959                .count(),
960            1,
961            "logical_id bind must not be duplicated"
962        );
963        assert_eq!(
964            compiled
965                .binds
966                .iter()
967                .filter(|b| matches!(b, BindValue::Text(s) if s == "ref-abc"))
968                .count(),
969            1,
970            "source_ref bind must not be duplicated"
971        );
972    }
973
974    #[test]
975    fn fts_driver_keeps_json_filter_in_outer_where() {
976        // When the driving table is FTS (not Nodes), JSON filters should
977        // remain in the outer WHERE clause, not pushed into base_candidates.
978        let compiled = compile_query(
979            &QueryBuilder::nodes("Meeting")
980                .text_search("budget", 5)
981                .filter_json_text_eq("$.status", "active")
982                .limit(5)
983                .into_ast(),
984        )
985        .expect("compiled query");
986
987        assert_eq!(compiled.driving_table, DrivingTable::FtsNodes);
988        assert!(
989            compiled.sql.contains("json_extract(n.properties, ?"),
990            "JSON filter must be in outer WHERE for FTS driver, got:\n{}",
991            compiled.sql,
992        );
993        assert!(
994            !compiled.sql.contains("json_extract(src.properties, ?"),
995            "JSON filter must NOT be in base_candidates for FTS driver",
996        );
997    }
998
999    #[test]
1000    fn fts5_query_bind_is_sanitized() {
1001        // Verify the compiled query's bind value is sanitized, not the raw input
1002        let compiled = compile_query(
1003            &QueryBuilder::nodes("Meeting")
1004                .text_search("User's name", 5)
1005                .limit(5)
1006                .into_ast(),
1007        )
1008        .expect("compiled query");
1009
1010        use crate::BindValue;
1011        assert!(
1012            compiled
1013                .binds
1014                .iter()
1015                .any(|b| matches!(b, BindValue::Text(s) if s == "\"User's\" \"name\"")),
1016            "FTS5 query bind should be sanitized; got {:?}",
1017            compiled.binds
1018        );
1019    }
1020}