Skip to main content

fathomdb_query/
compile.rs

1use std::fmt::Write;
2
3use crate::plan::{choose_driving_table, execution_hints, shape_signature};
4use crate::{
5    ComparisonOp, DrivingTable, ExpansionSlot, Predicate, QueryAst, QueryStep, ScalarValue,
6    TraverseDirection,
7};
8
9/// A typed bind value for a compiled SQL query parameter.
10#[derive(Clone, Debug, PartialEq, Eq)]
11pub enum BindValue {
12    /// A UTF-8 text parameter.
13    Text(String),
14    /// A 64-bit signed integer parameter.
15    Integer(i64),
16    /// A boolean parameter.
17    Bool(bool),
18}
19
20/// A deterministic hash of a query's structural shape, independent of bind values.
21#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
22pub struct ShapeHash(pub u64);
23
24/// A fully compiled query ready for execution against `SQLite`.
25#[derive(Clone, Debug, PartialEq, Eq)]
26pub struct CompiledQuery {
27    /// The generated SQL text.
28    pub sql: String,
29    /// Positional bind parameters for the SQL.
30    pub binds: Vec<BindValue>,
31    /// Structural shape hash for caching.
32    pub shape_hash: ShapeHash,
33    /// The driving table chosen by the query planner.
34    pub driving_table: DrivingTable,
35    /// Execution hints derived from the query shape.
36    pub hints: crate::ExecutionHints,
37}
38
39/// A compiled grouped query containing a root query and expansion slots.
40#[derive(Clone, Debug, PartialEq, Eq)]
41pub struct CompiledGroupedQuery {
42    /// The root flat query.
43    pub root: CompiledQuery,
44    /// Expansion slots to evaluate per root result.
45    pub expansions: Vec<ExpansionSlot>,
46    /// Structural shape hash covering the root query and all expansion slots.
47    pub shape_hash: ShapeHash,
48    /// Execution hints derived from the grouped query shape.
49    pub hints: crate::ExecutionHints,
50}
51
52/// Errors that can occur during query compilation.
53#[derive(Clone, Debug, PartialEq, Eq, thiserror::Error)]
54pub enum CompileError {
55    #[error("multiple traversal steps are not supported in v1")]
56    TooManyTraversals,
57    #[error("flat query compilation does not support expansions; use compile_grouped")]
58    FlatCompileDoesNotSupportExpansions,
59    #[error("duplicate expansion slot name: {0}")]
60    DuplicateExpansionSlot(String),
61    #[error("expansion slot name must be non-empty")]
62    EmptyExpansionSlotName,
63    #[error("too many expansion slots: max {MAX_EXPANSION_SLOTS}, got {0}")]
64    TooManyExpansionSlots(usize),
65    #[error("too many bind parameters: max 15, got {0}")]
66    TooManyBindParameters(usize),
67    #[error("traversal depth {0} exceeds maximum of {MAX_TRAVERSAL_DEPTH}")]
68    TraversalTooDeep(usize),
69    #[error("invalid JSON path: must match $(.key)+ pattern, got {0:?}")]
70    InvalidJsonPath(String),
71}
72
73/// Sanitize a user-supplied text search query for safe use as an FTS5 MATCH
74/// expression. Splits on whitespace, wraps each token in double quotes (doubling
75/// any embedded `"` per FTS5 escaping rules), and joins with spaces. This
76/// produces an implicit AND of quoted terms that is safe against FTS5 syntax
77/// injection (operators like AND/OR/NOT/NEAR, column filters, parentheses, and
78/// wildcards are all neutralized inside quoted strings).
79///
80/// Empty or whitespace-only input returns an empty string, which the caller
81/// should handle (FTS5 MATCH with an empty string returns no rows).
82fn sanitize_fts5_query(raw: &str) -> String {
83    let tokens: Vec<String> = raw
84        .split_whitespace()
85        .map(|token| {
86            let escaped = token.replace('"', "\"\"");
87            format!("\"{escaped}\"")
88        })
89        .collect();
90    tokens.join(" ")
91}
92
93/// Security fix H-1: Validate JSON path against a strict allowlist pattern to
94/// prevent SQL injection. Retained as defense-in-depth even though the path is
95/// now parameterized (see `FIX(review)` in `compile_query`). Only paths like
96/// `$.foo`, `$.foo.bar_baz` are allowed.
97fn validate_json_path(path: &str) -> Result<(), CompileError> {
98    let valid = path.starts_with('$')
99        && path.len() > 1
100        && path[1..].split('.').all(|segment| {
101            segment.is_empty()
102                || segment
103                    .chars()
104                    .all(|c| c.is_ascii_alphanumeric() || c == '_')
105                    && !segment.is_empty()
106        })
107        && path.contains('.');
108    if !valid {
109        return Err(CompileError::InvalidJsonPath(path.to_owned()));
110    }
111    Ok(())
112}
113
114const MAX_BIND_PARAMETERS: usize = 15;
115const MAX_EXPANSION_SLOTS: usize = 8;
116
117// FIX(review): max_depth was unbounded — usize::MAX produces an effectively infinite CTE.
118// Options: (A) silent clamp at compile, (B) reject with CompileError, (C) validate in builder.
119// Chose (B): consistent with existing TooManyTraversals/TooManyBindParameters pattern.
120// The compiler is the validation boundary; silent clamping would surprise callers.
121const MAX_TRAVERSAL_DEPTH: usize = 50;
122
123/// Compile a [`QueryAst`] into a [`CompiledQuery`] ready for execution.
124///
125/// # Compilation strategy
126///
127/// The compiled SQL is structured as a `WITH RECURSIVE` CTE named
128/// `base_candidates` followed by a final `SELECT ... JOIN nodes` projection.
129///
130/// For the **Nodes** driving table (no FTS/vector search), all filter
131/// predicates (`LogicalIdEq`, `JsonPathEq`, `JsonPathCompare`,
132/// `SourceRefEq`) are pushed into the `base_candidates` CTE so that the
133/// CTE's `LIMIT` applies *after* filtering. Without this pushdown the LIMIT
134/// would truncate the candidate set before property filters run, silently
135/// excluding nodes whose properties satisfy the filter but whose insertion
136/// order falls outside the limit window.
137///
138/// For **FTS** and **vector** driving tables, filters remain in the outer
139/// `WHERE` clause because the CTE is already narrowed by the search itself.
140///
141/// # Errors
142///
143/// Returns [`CompileError::TooManyTraversals`] if more than one traversal step
144/// is present, or [`CompileError::TooManyBindParameters`] if the resulting SQL
145/// would require more than 15 bind parameters.
146///
147/// # Panics
148///
149/// Panics (via `unreachable!`) if the AST is internally inconsistent — for
150/// example, if `choose_driving_table` selects `VecNodes` but no
151/// `VectorSearch` step is present in the AST. This cannot happen through the
152/// public [`QueryBuilder`] API.
153#[allow(clippy::too_many_lines)]
154pub fn compile_query(ast: &QueryAst) -> Result<CompiledQuery, CompileError> {
155    if !ast.expansions.is_empty() {
156        return Err(CompileError::FlatCompileDoesNotSupportExpansions);
157    }
158
159    let traversals = ast
160        .steps
161        .iter()
162        .filter(|step| matches!(step, QueryStep::Traverse { .. }))
163        .count();
164    if traversals > 1 {
165        return Err(CompileError::TooManyTraversals);
166    }
167
168    let excessive_depth = ast.steps.iter().find_map(|step| {
169        if let QueryStep::Traverse { max_depth, .. } = step
170            && *max_depth > MAX_TRAVERSAL_DEPTH
171        {
172            return Some(*max_depth);
173        }
174        None
175    });
176    if let Some(depth) = excessive_depth {
177        return Err(CompileError::TraversalTooDeep(depth));
178    }
179
180    let driving_table = choose_driving_table(ast);
181    let hints = execution_hints(ast);
182    let shape_hash = ShapeHash(hash_signature(&shape_signature(ast)));
183
184    let base_limit = ast
185        .steps
186        .iter()
187        .find_map(|step| match step {
188            QueryStep::VectorSearch { limit, .. } | QueryStep::TextSearch { limit, .. } => {
189                Some(*limit)
190            }
191            _ => None,
192        })
193        .or(ast.final_limit)
194        .unwrap_or(25);
195
196    let final_limit = ast.final_limit.unwrap_or(base_limit);
197    let traversal = ast.steps.iter().find_map(|step| {
198        if let QueryStep::Traverse {
199            direction,
200            label,
201            max_depth,
202        } = step
203        {
204            Some((*direction, label.as_str(), *max_depth))
205        } else {
206            None
207        }
208    });
209
210    let mut binds = Vec::new();
211    let base_candidates = match driving_table {
212        DrivingTable::VecNodes => {
213            let query = ast
214                .steps
215                .iter()
216                .find_map(|step| {
217                    if let QueryStep::VectorSearch { query, .. } = step {
218                        Some(query.as_str())
219                    } else {
220                        None
221                    }
222                })
223                .unwrap_or_else(|| unreachable!("VecNodes chosen but no VectorSearch step in AST"));
224            binds.push(BindValue::Text(query.to_owned()));
225            binds.push(BindValue::Text(ast.root_kind.clone()));
226            // sqlite-vec requires the LIMIT/k constraint to be visible directly on the
227            // vec0 KNN scan. Using a sub-select isolates the vec0 LIMIT so the join
228            // with chunks/nodes does not prevent the query planner from recognising it.
229            format!(
230                "base_candidates AS (
231                    SELECT DISTINCT src.logical_id
232                    FROM (
233                        SELECT chunk_id FROM vec_nodes_active
234                        WHERE embedding MATCH ?1
235                        LIMIT {base_limit}
236                    ) vc
237                    JOIN chunks c ON c.id = vc.chunk_id
238                    JOIN nodes src ON src.logical_id = c.node_logical_id AND src.superseded_at IS NULL
239                    WHERE src.kind = ?2
240                )"
241            )
242        }
243        DrivingTable::FtsNodes => {
244            let raw_query = ast
245                .steps
246                .iter()
247                .find_map(|step| {
248                    if let QueryStep::TextSearch { query, .. } = step {
249                        Some(query.as_str())
250                    } else {
251                        None
252                    }
253                })
254                .unwrap_or_else(|| unreachable!("FtsNodes chosen but no TextSearch step in AST"));
255            // Sanitize FTS5 metacharacters to prevent syntax errors and query
256            // injection. Each user token is quoted so FTS5 operators (AND, OR,
257            // NOT, NEAR, column filters, wildcards) are treated as literals.
258            let sanitized = sanitize_fts5_query(raw_query);
259            // Each FTS5 virtual table requires its own MATCH bind parameter;
260            // reusing indices across the UNION is not supported by SQLite.
261            binds.push(BindValue::Text(sanitized.clone()));
262            binds.push(BindValue::Text(ast.root_kind.clone()));
263            binds.push(BindValue::Text(sanitized));
264            binds.push(BindValue::Text(ast.root_kind.clone()));
265            format!(
266                "base_candidates AS (
267                    SELECT DISTINCT logical_id FROM (
268                        SELECT src.logical_id
269                        FROM fts_nodes f
270                        JOIN chunks c ON c.id = f.chunk_id
271                        JOIN nodes src ON src.logical_id = c.node_logical_id AND src.superseded_at IS NULL
272                        WHERE fts_nodes MATCH ?1
273                          AND src.kind = ?2
274                        UNION
275                        SELECT fp.node_logical_id AS logical_id
276                        FROM fts_node_properties fp
277                        JOIN nodes src ON src.logical_id = fp.node_logical_id AND src.superseded_at IS NULL
278                        WHERE fts_node_properties MATCH ?3
279                          AND fp.kind = ?4
280                    )
281                    LIMIT {base_limit}
282                )"
283            )
284        }
285        DrivingTable::Nodes => {
286            binds.push(BindValue::Text(ast.root_kind.clone()));
287            let mut sql = "base_candidates AS (
288                    SELECT DISTINCT src.logical_id
289                    FROM nodes src
290                    WHERE src.superseded_at IS NULL
291                      AND src.kind = ?1"
292                .to_owned();
293            // Push filter predicates into base_candidates so the LIMIT applies
294            // after filtering, not before. Without this, the CTE may truncate
295            // the candidate set before property/source_ref filters run, causing
296            // nodes that satisfy the filter to be excluded from results.
297            for step in &ast.steps {
298                if let QueryStep::Filter(predicate) = step {
299                    match predicate {
300                        Predicate::LogicalIdEq(logical_id) => {
301                            binds.push(BindValue::Text(logical_id.clone()));
302                            let bind_index = binds.len();
303                            let _ = write!(
304                                &mut sql,
305                                "\n                      AND src.logical_id = ?{bind_index}"
306                            );
307                        }
308                        Predicate::JsonPathEq { path, value } => {
309                            validate_json_path(path)?;
310                            binds.push(BindValue::Text(path.clone()));
311                            let path_index = binds.len();
312                            binds.push(match value {
313                                ScalarValue::Text(text) => BindValue::Text(text.clone()),
314                                ScalarValue::Integer(integer) => BindValue::Integer(*integer),
315                                ScalarValue::Bool(boolean) => BindValue::Bool(*boolean),
316                            });
317                            let value_index = binds.len();
318                            let _ = write!(
319                                &mut sql,
320                                "\n                      AND json_extract(src.properties, ?{path_index}) = ?{value_index}"
321                            );
322                        }
323                        Predicate::JsonPathCompare { path, op, value } => {
324                            validate_json_path(path)?;
325                            binds.push(BindValue::Text(path.clone()));
326                            let path_index = binds.len();
327                            binds.push(match value {
328                                ScalarValue::Text(text) => BindValue::Text(text.clone()),
329                                ScalarValue::Integer(integer) => BindValue::Integer(*integer),
330                                ScalarValue::Bool(boolean) => BindValue::Bool(*boolean),
331                            });
332                            let value_index = binds.len();
333                            let operator = match op {
334                                ComparisonOp::Gt => ">",
335                                ComparisonOp::Gte => ">=",
336                                ComparisonOp::Lt => "<",
337                                ComparisonOp::Lte => "<=",
338                            };
339                            let _ = write!(
340                                &mut sql,
341                                "\n                      AND json_extract(src.properties, ?{path_index}) {operator} ?{value_index}"
342                            );
343                        }
344                        Predicate::SourceRefEq(source_ref) => {
345                            binds.push(BindValue::Text(source_ref.clone()));
346                            let bind_index = binds.len();
347                            let _ = write!(
348                                &mut sql,
349                                "\n                      AND src.source_ref = ?{bind_index}"
350                            );
351                        }
352                        Predicate::ContentRefNotNull => {
353                            let _ = write!(
354                                &mut sql,
355                                "\n                      AND src.content_ref IS NOT NULL"
356                            );
357                        }
358                        Predicate::ContentRefEq(uri) => {
359                            binds.push(BindValue::Text(uri.clone()));
360                            let bind_index = binds.len();
361                            let _ = write!(
362                                &mut sql,
363                                "\n                      AND src.content_ref = ?{bind_index}"
364                            );
365                        }
366                        Predicate::KindEq(_) => {
367                            // Already filtered by ast.root_kind above.
368                        }
369                    }
370                }
371            }
372            let _ = write!(
373                &mut sql,
374                "\n                    LIMIT {base_limit}\n                )"
375            );
376            sql
377        }
378    };
379
380    let mut sql = format!("WITH RECURSIVE\n{base_candidates}");
381    let source_alias = if traversal.is_some() { "t" } else { "bc" };
382
383    if let Some((direction, label, max_depth)) = traversal {
384        binds.push(BindValue::Text(label.to_owned()));
385        let label_index = binds.len();
386        let (join_condition, next_logical_id) = match direction {
387            TraverseDirection::Out => ("e.source_logical_id = t.logical_id", "e.target_logical_id"),
388            TraverseDirection::In => ("e.target_logical_id = t.logical_id", "e.source_logical_id"),
389        };
390
391        let _ = write!(
392            &mut sql,
393            ",
394traversed(logical_id, depth, visited) AS (
395    SELECT bc.logical_id, 0, printf(',%s,', bc.logical_id)
396    FROM base_candidates bc
397    UNION ALL
398    SELECT {next_logical_id}, t.depth + 1, t.visited || {next_logical_id} || ','
399    FROM traversed t
400    JOIN edges e ON {join_condition}
401        AND e.kind = ?{label_index}
402        AND e.superseded_at IS NULL
403    WHERE t.depth < {max_depth}
404      AND instr(t.visited, printf(',%s,', {next_logical_id})) = 0
405    LIMIT {}
406)",
407            hints.hard_limit
408        );
409    }
410
411    let _ = write!(
412        &mut sql,
413        "
414SELECT DISTINCT n.row_id, n.logical_id, n.kind, n.properties, n.content_ref
415FROM {} {source_alias}
416JOIN nodes n ON n.logical_id = {source_alias}.logical_id
417    AND n.superseded_at IS NULL
418WHERE 1 = 1",
419        if traversal.is_some() {
420            "traversed"
421        } else {
422            "base_candidates"
423        }
424    );
425
426    for step in &ast.steps {
427        if let QueryStep::Filter(predicate) = step {
428            // For the Nodes driving table, filter predicates were already pushed
429            // into base_candidates so the CTE LIMIT applies after filtering.
430            // Skip them here to avoid duplicate bind values and redundant clauses.
431            if driving_table == DrivingTable::Nodes {
432                // KindEq is the only predicate NOT pushed into base_candidates
433                // (root_kind is handled separately there).
434                if let Predicate::KindEq(kind) = predicate {
435                    binds.push(BindValue::Text(kind.clone()));
436                    let bind_index = binds.len();
437                    let _ = write!(&mut sql, "\n  AND n.kind = ?{bind_index}");
438                }
439                continue;
440            }
441            match predicate {
442                Predicate::LogicalIdEq(logical_id) => {
443                    binds.push(BindValue::Text(logical_id.clone()));
444                    let bind_index = binds.len();
445                    let _ = write!(&mut sql, "\n  AND n.logical_id = ?{bind_index}");
446                }
447                Predicate::KindEq(kind) => {
448                    binds.push(BindValue::Text(kind.clone()));
449                    let bind_index = binds.len();
450                    let _ = write!(&mut sql, "\n  AND n.kind = ?{bind_index}");
451                }
452                Predicate::JsonPathEq { path, value } => {
453                    validate_json_path(path)?;
454                    binds.push(BindValue::Text(path.clone()));
455                    let path_index = binds.len();
456                    binds.push(match value {
457                        ScalarValue::Text(text) => BindValue::Text(text.clone()),
458                        ScalarValue::Integer(integer) => BindValue::Integer(*integer),
459                        ScalarValue::Bool(boolean) => BindValue::Bool(*boolean),
460                    });
461                    let value_index = binds.len();
462                    let _ = write!(
463                        &mut sql,
464                        "\n  AND json_extract(n.properties, ?{path_index}) = ?{value_index}",
465                    );
466                }
467                Predicate::JsonPathCompare { path, op, value } => {
468                    validate_json_path(path)?;
469                    binds.push(BindValue::Text(path.clone()));
470                    let path_index = binds.len();
471                    binds.push(match value {
472                        ScalarValue::Text(text) => BindValue::Text(text.clone()),
473                        ScalarValue::Integer(integer) => BindValue::Integer(*integer),
474                        ScalarValue::Bool(boolean) => BindValue::Bool(*boolean),
475                    });
476                    let value_index = binds.len();
477                    let operator = match op {
478                        ComparisonOp::Gt => ">",
479                        ComparisonOp::Gte => ">=",
480                        ComparisonOp::Lt => "<",
481                        ComparisonOp::Lte => "<=",
482                    };
483                    let _ = write!(
484                        &mut sql,
485                        "\n  AND json_extract(n.properties, ?{path_index}) {operator} ?{value_index}",
486                    );
487                }
488                Predicate::SourceRefEq(source_ref) => {
489                    binds.push(BindValue::Text(source_ref.clone()));
490                    let bind_index = binds.len();
491                    let _ = write!(&mut sql, "\n  AND n.source_ref = ?{bind_index}");
492                }
493                Predicate::ContentRefNotNull => {
494                    let _ = write!(&mut sql, "\n  AND n.content_ref IS NOT NULL");
495                }
496                Predicate::ContentRefEq(uri) => {
497                    binds.push(BindValue::Text(uri.clone()));
498                    let bind_index = binds.len();
499                    let _ = write!(&mut sql, "\n  AND n.content_ref = ?{bind_index}");
500                }
501            }
502        }
503    }
504
505    let _ = write!(&mut sql, "\nLIMIT {final_limit}");
506
507    if binds.len() > MAX_BIND_PARAMETERS {
508        return Err(CompileError::TooManyBindParameters(binds.len()));
509    }
510
511    Ok(CompiledQuery {
512        sql,
513        binds,
514        shape_hash,
515        driving_table,
516        hints,
517    })
518}
519
520/// Compile a [`QueryAst`] into a [`CompiledGroupedQuery`] for grouped execution.
521///
522/// # Errors
523///
524/// Returns a [`CompileError`] if the AST exceeds expansion-slot limits,
525/// contains empty slot names, or specifies a traversal depth beyond the
526/// configured maximum.
527pub fn compile_grouped_query(ast: &QueryAst) -> Result<CompiledGroupedQuery, CompileError> {
528    if ast.expansions.len() > MAX_EXPANSION_SLOTS {
529        return Err(CompileError::TooManyExpansionSlots(ast.expansions.len()));
530    }
531
532    let mut seen = std::collections::BTreeSet::new();
533    for expansion in &ast.expansions {
534        if expansion.slot.trim().is_empty() {
535            return Err(CompileError::EmptyExpansionSlotName);
536        }
537        if expansion.max_depth > MAX_TRAVERSAL_DEPTH {
538            return Err(CompileError::TraversalTooDeep(expansion.max_depth));
539        }
540        if !seen.insert(expansion.slot.clone()) {
541            return Err(CompileError::DuplicateExpansionSlot(expansion.slot.clone()));
542        }
543    }
544
545    let mut root_ast = ast.clone();
546    root_ast.expansions.clear();
547    let root = compile_query(&root_ast)?;
548    let hints = execution_hints(ast);
549    let shape_hash = ShapeHash(hash_signature(&shape_signature(ast)));
550
551    Ok(CompiledGroupedQuery {
552        root,
553        expansions: ast.expansions.clone(),
554        shape_hash,
555        hints,
556    })
557}
558
559/// FNV-1a 64-bit hash — deterministic across Rust versions and program
560/// invocations, unlike `DefaultHasher`.
561fn hash_signature(signature: &str) -> u64 {
562    const OFFSET: u64 = 0xcbf2_9ce4_8422_2325;
563    const PRIME: u64 = 0x0000_0100_0000_01b3;
564    let mut hash = OFFSET;
565    for byte in signature.bytes() {
566        hash ^= u64::from(byte);
567        hash = hash.wrapping_mul(PRIME);
568    }
569    hash
570}
571
572#[cfg(test)]
573#[allow(clippy::expect_used, clippy::items_after_statements)]
574mod tests {
575    use rstest::rstest;
576
577    use crate::{
578        CompileError, DrivingTable, QueryBuilder, TraverseDirection, compile_grouped_query,
579        compile_query,
580    };
581
582    #[test]
583    fn vector_query_compiles_to_chunk_resolution() {
584        let compiled = compile_query(
585            &QueryBuilder::nodes("Meeting")
586                .vector_search("budget", 5)
587                .limit(5)
588                .into_ast(),
589        )
590        .expect("compiled query");
591
592        assert_eq!(compiled.driving_table, DrivingTable::VecNodes);
593        assert!(compiled.sql.contains("JOIN chunks c ON c.id = vc.chunk_id"));
594        assert!(
595            compiled
596                .sql
597                .contains("JOIN nodes src ON src.logical_id = c.node_logical_id")
598        );
599    }
600
601    #[rstest]
602    #[case(5, 7)]
603    #[case(3, 11)]
604    fn structural_limits_change_shape_hash(#[case] left: usize, #[case] right: usize) {
605        let left_compiled = compile_query(
606            &QueryBuilder::nodes("Meeting")
607                .text_search("budget", left)
608                .limit(left)
609                .into_ast(),
610        )
611        .expect("left query");
612        let right_compiled = compile_query(
613            &QueryBuilder::nodes("Meeting")
614                .text_search("budget", right)
615                .limit(right)
616                .into_ast(),
617        )
618        .expect("right query");
619
620        assert_ne!(left_compiled.shape_hash, right_compiled.shape_hash);
621    }
622
623    #[test]
624    fn traversal_query_is_depth_bounded() {
625        let compiled = compile_query(
626            &QueryBuilder::nodes("Meeting")
627                .text_search("budget", 5)
628                .traverse(TraverseDirection::Out, "HAS_TASK", 3)
629                .limit(10)
630                .into_ast(),
631        )
632        .expect("compiled traversal");
633
634        assert!(compiled.sql.contains("WITH RECURSIVE"));
635        assert!(compiled.sql.contains("WHERE t.depth < 3"));
636    }
637
638    #[test]
639    fn text_search_compiles_to_union_over_chunk_and_property_fts() {
640        let compiled = compile_query(
641            &QueryBuilder::nodes("Meeting")
642                .text_search("budget", 25)
643                .limit(25)
644                .into_ast(),
645        )
646        .expect("compiled text search");
647
648        assert_eq!(compiled.driving_table, DrivingTable::FtsNodes);
649        // Must contain UNION of both FTS tables.
650        assert!(
651            compiled.sql.contains("fts_nodes MATCH"),
652            "must search chunk-backed FTS"
653        );
654        assert!(
655            compiled.sql.contains("fts_node_properties MATCH"),
656            "must search property-backed FTS"
657        );
658        assert!(compiled.sql.contains("UNION"), "must UNION both sources");
659        // Must have 4 bind parameters: sanitized query + kind for each table.
660        assert_eq!(compiled.binds.len(), 4);
661    }
662
663    #[test]
664    fn logical_id_filter_is_compiled() {
665        let compiled = compile_query(
666            &QueryBuilder::nodes("Meeting")
667                .filter_logical_id_eq("meeting-123")
668                .filter_json_text_eq("$.status", "active")
669                .limit(1)
670                .into_ast(),
671        )
672        .expect("compiled query");
673
674        // LogicalIdEq is applied in base_candidates (src alias) for the Nodes driver,
675        // NOT duplicated in the final WHERE. The JOIN condition still contains
676        // "n.logical_id =" which satisfies this check.
677        assert!(compiled.sql.contains("n.logical_id ="));
678        assert!(compiled.sql.contains("src.logical_id ="));
679        assert!(compiled.sql.contains("json_extract"));
680        // Only one bind for the logical_id (not two).
681        use crate::BindValue;
682        assert_eq!(
683            compiled
684                .binds
685                .iter()
686                .filter(|b| matches!(b, BindValue::Text(s) if s == "meeting-123"))
687                .count(),
688            1
689        );
690    }
691
692    #[test]
693    fn compile_rejects_invalid_json_path() {
694        use crate::{Predicate, QueryStep, ScalarValue};
695        let mut ast = QueryBuilder::nodes("Meeting").into_ast();
696        // Attempt SQL injection via JSON path.
697        ast.steps.push(QueryStep::Filter(Predicate::JsonPathEq {
698            path: "$') OR 1=1 --".to_owned(),
699            value: ScalarValue::Text("x".to_owned()),
700        }));
701        use crate::CompileError;
702        let result = compile_query(&ast);
703        assert!(
704            matches!(result, Err(CompileError::InvalidJsonPath(_))),
705            "expected InvalidJsonPath, got {result:?}"
706        );
707    }
708
709    #[test]
710    fn compile_accepts_valid_json_paths() {
711        use crate::{Predicate, QueryStep, ScalarValue};
712        for valid_path in ["$.status", "$.foo.bar", "$.a_b.c2"] {
713            let mut ast = QueryBuilder::nodes("Meeting").into_ast();
714            ast.steps.push(QueryStep::Filter(Predicate::JsonPathEq {
715                path: valid_path.to_owned(),
716                value: ScalarValue::Text("v".to_owned()),
717            }));
718            assert!(
719                compile_query(&ast).is_ok(),
720                "expected valid path {valid_path:?} to compile"
721            );
722        }
723    }
724
725    #[test]
726    fn compile_rejects_too_many_bind_parameters() {
727        use crate::{Predicate, QueryStep, ScalarValue};
728        let mut ast = QueryBuilder::nodes("Meeting").into_ast();
729        // kind occupies 1 bind; each json filter now occupies 2 binds (path + value).
730        // 7 json filters → 1 + 14 = 15 (ok), 8 → 1 + 16 = 17 (exceeds limit of 15).
731        for i in 0..8 {
732            ast.steps.push(QueryStep::Filter(Predicate::JsonPathEq {
733                path: format!("$.f{i}"),
734                value: ScalarValue::Text("v".to_owned()),
735            }));
736        }
737        use crate::CompileError;
738        let result = compile_query(&ast);
739        assert!(
740            matches!(result, Err(CompileError::TooManyBindParameters(17))),
741            "expected TooManyBindParameters(17), got {result:?}"
742        );
743    }
744
745    #[test]
746    fn compile_rejects_excessive_traversal_depth() {
747        let result = compile_query(
748            &QueryBuilder::nodes("Meeting")
749                .text_search("budget", 5)
750                .traverse(TraverseDirection::Out, "HAS_TASK", 51)
751                .limit(10)
752                .into_ast(),
753        );
754        assert!(
755            matches!(result, Err(CompileError::TraversalTooDeep(51))),
756            "expected TraversalTooDeep(51), got {result:?}"
757        );
758    }
759
760    #[test]
761    fn grouped_queries_with_same_structure_share_shape_hash() {
762        let left = compile_grouped_query(
763            &QueryBuilder::nodes("Meeting")
764                .text_search("budget", 5)
765                .expand("tasks", TraverseDirection::Out, "HAS_TASK", 1)
766                .limit(10)
767                .into_ast(),
768        )
769        .expect("left grouped query");
770        let right = compile_grouped_query(
771            &QueryBuilder::nodes("Meeting")
772                .text_search("planning", 5)
773                .expand("tasks", TraverseDirection::Out, "HAS_TASK", 1)
774                .limit(10)
775                .into_ast(),
776        )
777        .expect("right grouped query");
778
779        assert_eq!(left.shape_hash, right.shape_hash);
780    }
781
782    #[test]
783    fn compile_grouped_rejects_duplicate_expansion_slot_names() {
784        let result = compile_grouped_query(
785            &QueryBuilder::nodes("Meeting")
786                .expand("tasks", TraverseDirection::Out, "HAS_TASK", 1)
787                .expand("tasks", TraverseDirection::Out, "HAS_DECISION", 1)
788                .into_ast(),
789        );
790
791        assert!(
792            matches!(result, Err(CompileError::DuplicateExpansionSlot(ref slot)) if slot == "tasks"),
793            "expected DuplicateExpansionSlot(\"tasks\"), got {result:?}"
794        );
795    }
796
797    #[test]
798    fn flat_compile_rejects_queries_with_expansions() {
799        let result = compile_query(
800            &QueryBuilder::nodes("Meeting")
801                .expand("tasks", TraverseDirection::Out, "HAS_TASK", 1)
802                .into_ast(),
803        );
804
805        assert!(
806            matches!(
807                result,
808                Err(CompileError::FlatCompileDoesNotSupportExpansions)
809            ),
810            "expected FlatCompileDoesNotSupportExpansions, got {result:?}"
811        );
812    }
813
814    #[test]
815    fn json_path_compiled_as_bind_parameter() {
816        let compiled = compile_query(
817            &QueryBuilder::nodes("Meeting")
818                .filter_json_text_eq("$.status", "active")
819                .limit(1)
820                .into_ast(),
821        )
822        .expect("compiled query");
823
824        // Path must be parameterized, not interpolated into the SQL string.
825        assert!(
826            !compiled.sql.contains("'$.status'"),
827            "JSON path must not appear as a SQL string literal"
828        );
829        assert!(
830            compiled.sql.contains("json_extract(src.properties, ?"),
831            "JSON path must be a bind parameter (pushed into base_candidates for Nodes driver)"
832        );
833        // Path and value should both be in the bind list.
834        use crate::BindValue;
835        assert!(
836            compiled
837                .binds
838                .iter()
839                .any(|b| matches!(b, BindValue::Text(s) if s == "$.status"))
840        );
841        assert!(
842            compiled
843                .binds
844                .iter()
845                .any(|b| matches!(b, BindValue::Text(s) if s == "active"))
846        );
847    }
848
849    // --- FTS5 sanitization tests ---
850
851    #[test]
852    fn sanitize_fts5_plain_tokens() {
853        use super::sanitize_fts5_query;
854        assert_eq!(
855            sanitize_fts5_query("budget meeting"),
856            "\"budget\" \"meeting\""
857        );
858    }
859
860    #[test]
861    fn sanitize_fts5_apostrophe() {
862        use super::sanitize_fts5_query;
863        // The apostrophe that triggered issue #31
864        assert_eq!(sanitize_fts5_query("User's name"), "\"User's\" \"name\"");
865    }
866
867    #[test]
868    fn sanitize_fts5_embedded_double_quotes() {
869        use super::sanitize_fts5_query;
870        assert_eq!(
871            sanitize_fts5_query(r#"say "hello" world"#),
872            "\"say\" \"\"\"hello\"\"\" \"world\""
873        );
874    }
875
876    #[test]
877    fn sanitize_fts5_operators_neutralized() {
878        use super::sanitize_fts5_query;
879        // FTS5 operators should be quoted, not interpreted
880        assert_eq!(
881            sanitize_fts5_query("cats AND dogs OR fish"),
882            "\"cats\" \"AND\" \"dogs\" \"OR\" \"fish\""
883        );
884    }
885
886    #[test]
887    fn sanitize_fts5_special_chars() {
888        use super::sanitize_fts5_query;
889        // Wildcards, column filters, parentheses, NEAR
890        assert_eq!(sanitize_fts5_query("prefix*"), "\"prefix*\"");
891        assert_eq!(sanitize_fts5_query("col:value"), "\"col:value\"");
892        assert_eq!(sanitize_fts5_query("(a OR b)"), "\"(a\" \"OR\" \"b)\"");
893        assert_eq!(sanitize_fts5_query("a NEAR b"), "\"a\" \"NEAR\" \"b\"");
894    }
895
896    #[test]
897    fn sanitize_fts5_empty_input() {
898        use super::sanitize_fts5_query;
899        assert_eq!(sanitize_fts5_query(""), "");
900        assert_eq!(sanitize_fts5_query("   "), "");
901    }
902
903    // --- Filter pushdown regression tests ---
904    //
905    // These tests verify that filter predicates are pushed into the
906    // base_candidates CTE for the Nodes driving table, so the CTE LIMIT
907    // applies after filtering rather than before.  Without pushdown, the
908    // LIMIT may truncate the candidate set before the filter runs, causing
909    // matching nodes to be silently excluded.
910
911    #[test]
912    fn nodes_driver_pushes_json_eq_filter_into_base_candidates() {
913        let compiled = compile_query(
914            &QueryBuilder::nodes("Meeting")
915                .filter_json_text_eq("$.status", "active")
916                .limit(5)
917                .into_ast(),
918        )
919        .expect("compiled query");
920
921        assert_eq!(compiled.driving_table, DrivingTable::Nodes);
922        // Filter must appear inside base_candidates (src alias), not the
923        // outer WHERE (n alias).
924        assert!(
925            compiled.sql.contains("json_extract(src.properties, ?"),
926            "json_extract must reference src (base_candidates), got:\n{}",
927            compiled.sql,
928        );
929        assert!(
930            !compiled.sql.contains("json_extract(n.properties, ?"),
931            "json_extract must NOT appear in outer WHERE for Nodes driver, got:\n{}",
932            compiled.sql,
933        );
934    }
935
936    #[test]
937    fn nodes_driver_pushes_json_compare_filter_into_base_candidates() {
938        let compiled = compile_query(
939            &QueryBuilder::nodes("Meeting")
940                .filter_json_integer_gte("$.priority", 5)
941                .limit(10)
942                .into_ast(),
943        )
944        .expect("compiled query");
945
946        assert_eq!(compiled.driving_table, DrivingTable::Nodes);
947        assert!(
948            compiled.sql.contains("json_extract(src.properties, ?"),
949            "comparison filter must be in base_candidates, got:\n{}",
950            compiled.sql,
951        );
952        assert!(
953            !compiled.sql.contains("json_extract(n.properties, ?"),
954            "comparison filter must NOT be in outer WHERE for Nodes driver",
955        );
956        assert!(
957            compiled.sql.contains(">= ?"),
958            "expected >= operator in SQL, got:\n{}",
959            compiled.sql,
960        );
961    }
962
963    #[test]
964    fn nodes_driver_pushes_source_ref_filter_into_base_candidates() {
965        let compiled = compile_query(
966            &QueryBuilder::nodes("Meeting")
967                .filter_source_ref_eq("ref-123")
968                .limit(5)
969                .into_ast(),
970        )
971        .expect("compiled query");
972
973        assert_eq!(compiled.driving_table, DrivingTable::Nodes);
974        assert!(
975            compiled.sql.contains("src.source_ref = ?"),
976            "source_ref filter must be in base_candidates, got:\n{}",
977            compiled.sql,
978        );
979        assert!(
980            !compiled.sql.contains("n.source_ref = ?"),
981            "source_ref filter must NOT be in outer WHERE for Nodes driver",
982        );
983    }
984
985    #[test]
986    fn nodes_driver_pushes_multiple_filters_into_base_candidates() {
987        let compiled = compile_query(
988            &QueryBuilder::nodes("Meeting")
989                .filter_logical_id_eq("meeting-1")
990                .filter_json_text_eq("$.status", "active")
991                .filter_json_integer_gte("$.priority", 5)
992                .filter_source_ref_eq("ref-abc")
993                .limit(1)
994                .into_ast(),
995        )
996        .expect("compiled query");
997
998        assert_eq!(compiled.driving_table, DrivingTable::Nodes);
999        // All filters should be in base_candidates, none in outer WHERE
1000        assert!(
1001            compiled.sql.contains("src.logical_id = ?"),
1002            "logical_id filter must be in base_candidates",
1003        );
1004        assert!(
1005            compiled.sql.contains("json_extract(src.properties, ?"),
1006            "JSON filters must be in base_candidates",
1007        );
1008        assert!(
1009            compiled.sql.contains("src.source_ref = ?"),
1010            "source_ref filter must be in base_candidates",
1011        );
1012        // Each bind value should appear exactly once (not duplicated in outer WHERE)
1013        use crate::BindValue;
1014        assert_eq!(
1015            compiled
1016                .binds
1017                .iter()
1018                .filter(|b| matches!(b, BindValue::Text(s) if s == "meeting-1"))
1019                .count(),
1020            1,
1021            "logical_id bind must not be duplicated"
1022        );
1023        assert_eq!(
1024            compiled
1025                .binds
1026                .iter()
1027                .filter(|b| matches!(b, BindValue::Text(s) if s == "ref-abc"))
1028                .count(),
1029            1,
1030            "source_ref bind must not be duplicated"
1031        );
1032    }
1033
1034    #[test]
1035    fn fts_driver_keeps_json_filter_in_outer_where() {
1036        // When the driving table is FTS (not Nodes), JSON filters should
1037        // remain in the outer WHERE clause, not pushed into base_candidates.
1038        let compiled = compile_query(
1039            &QueryBuilder::nodes("Meeting")
1040                .text_search("budget", 5)
1041                .filter_json_text_eq("$.status", "active")
1042                .limit(5)
1043                .into_ast(),
1044        )
1045        .expect("compiled query");
1046
1047        assert_eq!(compiled.driving_table, DrivingTable::FtsNodes);
1048        assert!(
1049            compiled.sql.contains("json_extract(n.properties, ?"),
1050            "JSON filter must be in outer WHERE for FTS driver, got:\n{}",
1051            compiled.sql,
1052        );
1053        assert!(
1054            !compiled.sql.contains("json_extract(src.properties, ?"),
1055            "JSON filter must NOT be in base_candidates for FTS driver",
1056        );
1057    }
1058
1059    #[test]
1060    fn fts5_query_bind_is_sanitized() {
1061        // Verify the compiled query's bind value is sanitized, not the raw input
1062        let compiled = compile_query(
1063            &QueryBuilder::nodes("Meeting")
1064                .text_search("User's name", 5)
1065                .limit(5)
1066                .into_ast(),
1067        )
1068        .expect("compiled query");
1069
1070        use crate::BindValue;
1071        assert!(
1072            compiled
1073                .binds
1074                .iter()
1075                .any(|b| matches!(b, BindValue::Text(s) if s == "\"User's\" \"name\"")),
1076            "FTS5 query bind should be sanitized; got {:?}",
1077            compiled.binds
1078        );
1079    }
1080}