1use std::fmt::Write;
2
3use crate::fusion::partition_search_filters;
4use crate::plan::{choose_driving_table, execution_hints, shape_signature};
5use crate::search::{
6 CompiledRetrievalPlan, CompiledSearch, CompiledSearchPlan, CompiledVectorSearch,
7};
8use crate::{
9 ComparisonOp, DrivingTable, ExpansionSlot, Predicate, QueryAst, QueryStep, ScalarValue,
10 TextQuery, TraverseDirection, derive_relaxed, render_text_query_fts5,
11};
12
13#[derive(Clone, Debug, PartialEq, Eq)]
15pub enum BindValue {
16 Text(String),
18 Integer(i64),
20 Bool(bool),
22}
23
24#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
26pub struct ShapeHash(pub u64);
27
28#[derive(Clone, Debug, PartialEq, Eq)]
30pub struct CompiledQuery {
31 pub sql: String,
33 pub binds: Vec<BindValue>,
35 pub shape_hash: ShapeHash,
37 pub driving_table: DrivingTable,
39 pub hints: crate::ExecutionHints,
41}
42
43#[derive(Clone, Debug, PartialEq, Eq)]
45pub struct CompiledGroupedQuery {
46 pub root: CompiledQuery,
48 pub expansions: Vec<ExpansionSlot>,
50 pub shape_hash: ShapeHash,
52 pub hints: crate::ExecutionHints,
54}
55
56#[derive(Clone, Debug, PartialEq, Eq, thiserror::Error)]
58pub enum CompileError {
59 #[error("multiple traversal steps are not supported in v1")]
60 TooManyTraversals,
61 #[error("flat query compilation does not support expansions; use compile_grouped")]
62 FlatCompileDoesNotSupportExpansions,
63 #[error("duplicate expansion slot name: {0}")]
64 DuplicateExpansionSlot(String),
65 #[error("expansion slot name must be non-empty")]
66 EmptyExpansionSlotName,
67 #[error("too many expansion slots: max {MAX_EXPANSION_SLOTS}, got {0}")]
68 TooManyExpansionSlots(usize),
69 #[error("too many bind parameters: max 15, got {0}")]
70 TooManyBindParameters(usize),
71 #[error("traversal depth {0} exceeds maximum of {MAX_TRAVERSAL_DEPTH}")]
72 TraversalTooDeep(usize),
73 #[error("invalid JSON path: must match $(.key)+ pattern, got {0:?}")]
74 InvalidJsonPath(String),
75 #[error("compile_search requires exactly one TextSearch step in the AST")]
76 MissingTextSearchStep,
77 #[error("compile_vector_search requires exactly one VectorSearch step in the AST")]
78 MissingVectorSearchStep,
79 #[error("compile_retrieval_plan requires exactly one Search step in the AST")]
80 MissingSearchStep,
81 #[error("compile_retrieval_plan requires exactly one Search step in the AST, found multiple")]
82 MultipleSearchSteps,
83}
84
85fn validate_json_path(path: &str) -> Result<(), CompileError> {
90 let valid = path.starts_with('$')
91 && path.len() > 1
92 && path[1..].split('.').all(|segment| {
93 segment.is_empty()
94 || segment
95 .chars()
96 .all(|c| c.is_ascii_alphanumeric() || c == '_')
97 && !segment.is_empty()
98 })
99 && path.contains('.');
100 if !valid {
101 return Err(CompileError::InvalidJsonPath(path.to_owned()));
102 }
103 Ok(())
104}
105
106fn append_fusable_clause(
114 sql: &mut String,
115 binds: &mut Vec<BindValue>,
116 alias: &str,
117 predicate: &Predicate,
118) {
119 match predicate {
120 Predicate::KindEq(kind) => {
121 binds.push(BindValue::Text(kind.clone()));
122 let idx = binds.len();
123 let _ = write!(sql, "\n AND {alias}.kind = ?{idx}");
124 }
125 Predicate::LogicalIdEq(logical_id) => {
126 binds.push(BindValue::Text(logical_id.clone()));
127 let idx = binds.len();
128 let _ = write!(
129 sql,
130 "\n AND {alias}.logical_id = ?{idx}"
131 );
132 }
133 Predicate::SourceRefEq(source_ref) => {
134 binds.push(BindValue::Text(source_ref.clone()));
135 let idx = binds.len();
136 let _ = write!(
137 sql,
138 "\n AND {alias}.source_ref = ?{idx}"
139 );
140 }
141 Predicate::ContentRefEq(uri) => {
142 binds.push(BindValue::Text(uri.clone()));
143 let idx = binds.len();
144 let _ = write!(
145 sql,
146 "\n AND {alias}.content_ref = ?{idx}"
147 );
148 }
149 Predicate::ContentRefNotNull => {
150 let _ = write!(
151 sql,
152 "\n AND {alias}.content_ref IS NOT NULL"
153 );
154 }
155 Predicate::JsonPathEq { .. } | Predicate::JsonPathCompare { .. } => {
156 unreachable!("append_fusable_clause received a residual predicate");
157 }
158 }
159}
160
161const MAX_BIND_PARAMETERS: usize = 15;
162const MAX_EXPANSION_SLOTS: usize = 8;
163
164const MAX_TRAVERSAL_DEPTH: usize = 50;
169
170#[allow(clippy::too_many_lines)]
205pub fn compile_query(ast: &QueryAst) -> Result<CompiledQuery, CompileError> {
206 if !ast.expansions.is_empty() {
207 return Err(CompileError::FlatCompileDoesNotSupportExpansions);
208 }
209
210 let traversals = ast
211 .steps
212 .iter()
213 .filter(|step| matches!(step, QueryStep::Traverse { .. }))
214 .count();
215 if traversals > 1 {
216 return Err(CompileError::TooManyTraversals);
217 }
218
219 let excessive_depth = ast.steps.iter().find_map(|step| {
220 if let QueryStep::Traverse { max_depth, .. } = step
221 && *max_depth > MAX_TRAVERSAL_DEPTH
222 {
223 return Some(*max_depth);
224 }
225 None
226 });
227 if let Some(depth) = excessive_depth {
228 return Err(CompileError::TraversalTooDeep(depth));
229 }
230
231 let driving_table = choose_driving_table(ast);
232 let hints = execution_hints(ast);
233 let shape_hash = ShapeHash(hash_signature(&shape_signature(ast)));
234
235 let base_limit = ast
236 .steps
237 .iter()
238 .find_map(|step| match step {
239 QueryStep::VectorSearch { limit, .. } | QueryStep::TextSearch { limit, .. } => {
240 Some(*limit)
241 }
242 _ => None,
243 })
244 .or(ast.final_limit)
245 .unwrap_or(25);
246
247 let final_limit = ast.final_limit.unwrap_or(base_limit);
248 let traversal = ast.steps.iter().find_map(|step| {
249 if let QueryStep::Traverse {
250 direction,
251 label,
252 max_depth,
253 } = step
254 {
255 Some((*direction, label.as_str(), *max_depth))
256 } else {
257 None
258 }
259 });
260
261 let (fusable_filters, residual_filters) = partition_search_filters(&ast.steps);
266
267 let mut binds = Vec::new();
268 let base_candidates = match driving_table {
269 DrivingTable::VecNodes => {
270 let query = ast
271 .steps
272 .iter()
273 .find_map(|step| {
274 if let QueryStep::VectorSearch { query, .. } = step {
275 Some(query.as_str())
276 } else {
277 None
278 }
279 })
280 .unwrap_or_else(|| unreachable!("VecNodes chosen but no VectorSearch step in AST"));
281 binds.push(BindValue::Text(query.to_owned()));
282 binds.push(BindValue::Text(ast.root_kind.clone()));
283 let mut sql = format!(
298 "base_candidates AS (
299 SELECT DISTINCT src.logical_id
300 FROM (
301 SELECT chunk_id FROM vec_nodes_active
302 WHERE embedding MATCH ?1
303 LIMIT {base_limit}
304 ) vc
305 JOIN chunks c ON c.id = vc.chunk_id
306 JOIN nodes src ON src.logical_id = c.node_logical_id AND src.superseded_at IS NULL
307 WHERE src.kind = ?2",
308 );
309 for predicate in &fusable_filters {
310 append_fusable_clause(&mut sql, &mut binds, "src", predicate);
311 }
312 sql.push_str("\n )");
313 sql
314 }
315 DrivingTable::FtsNodes => {
316 let text_query = ast
317 .steps
318 .iter()
319 .find_map(|step| {
320 if let QueryStep::TextSearch { query, .. } = step {
321 Some(query)
322 } else {
323 None
324 }
325 })
326 .unwrap_or_else(|| unreachable!("FtsNodes chosen but no TextSearch step in AST"));
327 let rendered = render_text_query_fts5(text_query);
331 binds.push(BindValue::Text(rendered.clone()));
334 binds.push(BindValue::Text(ast.root_kind.clone()));
335 binds.push(BindValue::Text(rendered));
336 binds.push(BindValue::Text(ast.root_kind.clone()));
337 let mut sql = String::from(
342 "base_candidates AS (
343 SELECT DISTINCT n.logical_id
344 FROM (
345 SELECT src.logical_id
346 FROM fts_nodes f
347 JOIN chunks c ON c.id = f.chunk_id
348 JOIN nodes src ON src.logical_id = c.node_logical_id AND src.superseded_at IS NULL
349 WHERE fts_nodes MATCH ?1
350 AND src.kind = ?2
351 UNION
352 SELECT fp.node_logical_id AS logical_id
353 FROM fts_node_properties fp
354 JOIN nodes src ON src.logical_id = fp.node_logical_id AND src.superseded_at IS NULL
355 WHERE fts_node_properties MATCH ?3
356 AND fp.kind = ?4
357 ) u
358 JOIN nodes n ON n.logical_id = u.logical_id AND n.superseded_at IS NULL
359 WHERE 1 = 1",
360 );
361 for predicate in &fusable_filters {
362 append_fusable_clause(&mut sql, &mut binds, "n", predicate);
363 }
364 let _ = write!(
365 &mut sql,
366 "\n LIMIT {base_limit}\n )"
367 );
368 sql
369 }
370 DrivingTable::Nodes => {
371 binds.push(BindValue::Text(ast.root_kind.clone()));
372 let mut sql = "base_candidates AS (
373 SELECT DISTINCT src.logical_id
374 FROM nodes src
375 WHERE src.superseded_at IS NULL
376 AND src.kind = ?1"
377 .to_owned();
378 for step in &ast.steps {
383 if let QueryStep::Filter(predicate) = step {
384 match predicate {
385 Predicate::LogicalIdEq(logical_id) => {
386 binds.push(BindValue::Text(logical_id.clone()));
387 let bind_index = binds.len();
388 let _ = write!(
389 &mut sql,
390 "\n AND src.logical_id = ?{bind_index}"
391 );
392 }
393 Predicate::JsonPathEq { path, value } => {
394 validate_json_path(path)?;
395 binds.push(BindValue::Text(path.clone()));
396 let path_index = binds.len();
397 binds.push(match value {
398 ScalarValue::Text(text) => BindValue::Text(text.clone()),
399 ScalarValue::Integer(integer) => BindValue::Integer(*integer),
400 ScalarValue::Bool(boolean) => BindValue::Bool(*boolean),
401 });
402 let value_index = binds.len();
403 let _ = write!(
404 &mut sql,
405 "\n AND json_extract(src.properties, ?{path_index}) = ?{value_index}"
406 );
407 }
408 Predicate::JsonPathCompare { path, op, value } => {
409 validate_json_path(path)?;
410 binds.push(BindValue::Text(path.clone()));
411 let path_index = binds.len();
412 binds.push(match value {
413 ScalarValue::Text(text) => BindValue::Text(text.clone()),
414 ScalarValue::Integer(integer) => BindValue::Integer(*integer),
415 ScalarValue::Bool(boolean) => BindValue::Bool(*boolean),
416 });
417 let value_index = binds.len();
418 let operator = match op {
419 ComparisonOp::Gt => ">",
420 ComparisonOp::Gte => ">=",
421 ComparisonOp::Lt => "<",
422 ComparisonOp::Lte => "<=",
423 };
424 let _ = write!(
425 &mut sql,
426 "\n AND json_extract(src.properties, ?{path_index}) {operator} ?{value_index}"
427 );
428 }
429 Predicate::SourceRefEq(source_ref) => {
430 binds.push(BindValue::Text(source_ref.clone()));
431 let bind_index = binds.len();
432 let _ = write!(
433 &mut sql,
434 "\n AND src.source_ref = ?{bind_index}"
435 );
436 }
437 Predicate::ContentRefNotNull => {
438 let _ = write!(
439 &mut sql,
440 "\n AND src.content_ref IS NOT NULL"
441 );
442 }
443 Predicate::ContentRefEq(uri) => {
444 binds.push(BindValue::Text(uri.clone()));
445 let bind_index = binds.len();
446 let _ = write!(
447 &mut sql,
448 "\n AND src.content_ref = ?{bind_index}"
449 );
450 }
451 Predicate::KindEq(_) => {
452 }
454 }
455 }
456 }
457 let _ = write!(
458 &mut sql,
459 "\n LIMIT {base_limit}\n )"
460 );
461 sql
462 }
463 };
464
465 let mut sql = format!("WITH RECURSIVE\n{base_candidates}");
466 let source_alias = if traversal.is_some() { "t" } else { "bc" };
467
468 if let Some((direction, label, max_depth)) = traversal {
469 binds.push(BindValue::Text(label.to_owned()));
470 let label_index = binds.len();
471 let (join_condition, next_logical_id) = match direction {
472 TraverseDirection::Out => ("e.source_logical_id = t.logical_id", "e.target_logical_id"),
473 TraverseDirection::In => ("e.target_logical_id = t.logical_id", "e.source_logical_id"),
474 };
475
476 let _ = write!(
477 &mut sql,
478 ",
479traversed(logical_id, depth, visited) AS (
480 SELECT bc.logical_id, 0, printf(',%s,', bc.logical_id)
481 FROM base_candidates bc
482 UNION ALL
483 SELECT {next_logical_id}, t.depth + 1, t.visited || {next_logical_id} || ','
484 FROM traversed t
485 JOIN edges e ON {join_condition}
486 AND e.kind = ?{label_index}
487 AND e.superseded_at IS NULL
488 WHERE t.depth < {max_depth}
489 AND instr(t.visited, printf(',%s,', {next_logical_id})) = 0
490 LIMIT {}
491)",
492 hints.hard_limit
493 );
494 }
495
496 let _ = write!(
497 &mut sql,
498 "
499SELECT DISTINCT n.row_id, n.logical_id, n.kind, n.properties, n.content_ref
500FROM {} {source_alias}
501JOIN nodes n ON n.logical_id = {source_alias}.logical_id
502 AND n.superseded_at IS NULL
503WHERE 1 = 1",
504 if traversal.is_some() {
505 "traversed"
506 } else {
507 "base_candidates"
508 }
509 );
510
511 if driving_table == DrivingTable::Nodes {
521 for step in &ast.steps {
522 if let QueryStep::Filter(Predicate::KindEq(kind)) = step {
523 binds.push(BindValue::Text(kind.clone()));
524 let bind_index = binds.len();
525 let _ = write!(&mut sql, "\n AND n.kind = ?{bind_index}");
526 }
527 }
528 } else {
529 for predicate in &residual_filters {
530 match predicate {
531 Predicate::JsonPathEq { path, value } => {
532 validate_json_path(path)?;
533 binds.push(BindValue::Text(path.clone()));
534 let path_index = binds.len();
535 binds.push(match value {
536 ScalarValue::Text(text) => BindValue::Text(text.clone()),
537 ScalarValue::Integer(integer) => BindValue::Integer(*integer),
538 ScalarValue::Bool(boolean) => BindValue::Bool(*boolean),
539 });
540 let value_index = binds.len();
541 let _ = write!(
542 &mut sql,
543 "\n AND json_extract(n.properties, ?{path_index}) = ?{value_index}",
544 );
545 }
546 Predicate::JsonPathCompare { path, op, value } => {
547 validate_json_path(path)?;
548 binds.push(BindValue::Text(path.clone()));
549 let path_index = binds.len();
550 binds.push(match value {
551 ScalarValue::Text(text) => BindValue::Text(text.clone()),
552 ScalarValue::Integer(integer) => BindValue::Integer(*integer),
553 ScalarValue::Bool(boolean) => BindValue::Bool(*boolean),
554 });
555 let value_index = binds.len();
556 let operator = match op {
557 ComparisonOp::Gt => ">",
558 ComparisonOp::Gte => ">=",
559 ComparisonOp::Lt => "<",
560 ComparisonOp::Lte => "<=",
561 };
562 let _ = write!(
563 &mut sql,
564 "\n AND json_extract(n.properties, ?{path_index}) {operator} ?{value_index}",
565 );
566 }
567 Predicate::KindEq(_)
568 | Predicate::LogicalIdEq(_)
569 | Predicate::SourceRefEq(_)
570 | Predicate::ContentRefEq(_)
571 | Predicate::ContentRefNotNull => {
572 }
575 }
576 }
577 }
578
579 let _ = write!(&mut sql, "\nLIMIT {final_limit}");
580
581 if binds.len() > MAX_BIND_PARAMETERS {
582 return Err(CompileError::TooManyBindParameters(binds.len()));
583 }
584
585 Ok(CompiledQuery {
586 sql,
587 binds,
588 shape_hash,
589 driving_table,
590 hints,
591 })
592}
593
594pub fn compile_grouped_query(ast: &QueryAst) -> Result<CompiledGroupedQuery, CompileError> {
602 if ast.expansions.len() > MAX_EXPANSION_SLOTS {
603 return Err(CompileError::TooManyExpansionSlots(ast.expansions.len()));
604 }
605
606 let mut seen = std::collections::BTreeSet::new();
607 for expansion in &ast.expansions {
608 if expansion.slot.trim().is_empty() {
609 return Err(CompileError::EmptyExpansionSlotName);
610 }
611 if expansion.max_depth > MAX_TRAVERSAL_DEPTH {
612 return Err(CompileError::TraversalTooDeep(expansion.max_depth));
613 }
614 if !seen.insert(expansion.slot.clone()) {
615 return Err(CompileError::DuplicateExpansionSlot(expansion.slot.clone()));
616 }
617 }
618
619 let mut root_ast = ast.clone();
620 root_ast.expansions.clear();
621 let root = compile_query(&root_ast)?;
622 let hints = execution_hints(ast);
623 let shape_hash = ShapeHash(hash_signature(&shape_signature(ast)));
624
625 Ok(CompiledGroupedQuery {
626 root,
627 expansions: ast.expansions.clone(),
628 shape_hash,
629 hints,
630 })
631}
632
633pub fn compile_search(ast: &QueryAst) -> Result<CompiledSearch, CompileError> {
645 let mut text_query = None;
646 let mut limit = None;
647 for step in &ast.steps {
648 match step {
649 QueryStep::TextSearch {
650 query,
651 limit: step_limit,
652 } => {
653 text_query = Some(query.clone());
654 limit = Some(*step_limit);
655 }
656 QueryStep::Filter(_)
657 | QueryStep::Search { .. }
658 | QueryStep::VectorSearch { .. }
659 | QueryStep::Traverse { .. } => {
660 }
664 }
665 }
666 let text_query = text_query.ok_or(CompileError::MissingTextSearchStep)?;
667 let limit = limit.unwrap_or(25);
668 let (fusable_filters, residual_filters) = partition_search_filters(&ast.steps);
669 Ok(CompiledSearch {
670 root_kind: ast.root_kind.clone(),
671 text_query,
672 limit,
673 fusable_filters,
674 residual_filters,
675 attribution_requested: false,
676 })
677}
678
679#[doc(hidden)]
693pub fn compile_search_plan(ast: &QueryAst) -> Result<CompiledSearchPlan, CompileError> {
694 let strict = compile_search(ast)?;
695 let (relaxed_query, was_degraded_at_plan_time) = derive_relaxed(&strict.text_query);
696 let relaxed = relaxed_query.map(|q| CompiledSearch {
697 root_kind: strict.root_kind.clone(),
698 text_query: q,
699 limit: strict.limit,
700 fusable_filters: strict.fusable_filters.clone(),
701 residual_filters: strict.residual_filters.clone(),
702 attribution_requested: strict.attribution_requested,
703 });
704 Ok(CompiledSearchPlan {
705 strict,
706 relaxed,
707 was_degraded_at_plan_time,
708 })
709}
710
711pub fn compile_search_plan_from_queries(
738 ast: &QueryAst,
739 strict: TextQuery,
740 relaxed: Option<TextQuery>,
741 limit: usize,
742 attribution_requested: bool,
743) -> Result<CompiledSearchPlan, CompileError> {
744 let (fusable_filters, residual_filters) = partition_search_filters(&ast.steps);
745 let strict_compiled = CompiledSearch {
746 root_kind: ast.root_kind.clone(),
747 text_query: strict,
748 limit,
749 fusable_filters: fusable_filters.clone(),
750 residual_filters: residual_filters.clone(),
751 attribution_requested,
752 };
753 let relaxed_compiled = relaxed.map(|q| CompiledSearch {
754 root_kind: ast.root_kind.clone(),
755 text_query: q,
756 limit,
757 fusable_filters,
758 residual_filters,
759 attribution_requested,
760 });
761 Ok(CompiledSearchPlan {
762 strict: strict_compiled,
763 relaxed: relaxed_compiled,
764 was_degraded_at_plan_time: false,
765 })
766}
767
768pub fn compile_vector_search(ast: &QueryAst) -> Result<CompiledVectorSearch, CompileError> {
783 let mut query_text = None;
784 let mut limit = None;
785 for step in &ast.steps {
786 match step {
787 QueryStep::VectorSearch {
788 query,
789 limit: step_limit,
790 } => {
791 query_text = Some(query.clone());
792 limit = Some(*step_limit);
793 }
794 QueryStep::Filter(_)
795 | QueryStep::Search { .. }
796 | QueryStep::TextSearch { .. }
797 | QueryStep::Traverse { .. } => {
798 }
802 }
803 }
804 let query_text = query_text.ok_or(CompileError::MissingVectorSearchStep)?;
805 let limit = limit.unwrap_or(25);
806 let (fusable_filters, residual_filters) = partition_search_filters(&ast.steps);
807 Ok(CompiledVectorSearch {
808 root_kind: ast.root_kind.clone(),
809 query_text,
810 limit,
811 fusable_filters,
812 residual_filters,
813 attribution_requested: false,
814 })
815}
816
817pub fn compile_retrieval_plan(ast: &QueryAst) -> Result<CompiledRetrievalPlan, CompileError> {
840 let mut raw_query: Option<&str> = None;
841 let mut limit: Option<usize> = None;
842 for step in &ast.steps {
843 if let QueryStep::Search {
844 query,
845 limit: step_limit,
846 } = step
847 {
848 if raw_query.is_some() {
849 return Err(CompileError::MultipleSearchSteps);
850 }
851 raw_query = Some(query.as_str());
852 limit = Some(*step_limit);
853 }
854 }
855 let raw_query = raw_query.ok_or(CompileError::MissingSearchStep)?;
856 let limit = limit.unwrap_or(25);
857
858 let strict_text_query = TextQuery::parse(raw_query);
859 let (relaxed_text_query, was_degraded_at_plan_time) = derive_relaxed(&strict_text_query);
860
861 let (fusable_filters, residual_filters) = partition_search_filters(&ast.steps);
862
863 let strict = CompiledSearch {
864 root_kind: ast.root_kind.clone(),
865 text_query: strict_text_query,
866 limit,
867 fusable_filters: fusable_filters.clone(),
868 residual_filters: residual_filters.clone(),
869 attribution_requested: false,
870 };
871 let relaxed = relaxed_text_query.map(|q| CompiledSearch {
872 root_kind: ast.root_kind.clone(),
873 text_query: q,
874 limit,
875 fusable_filters,
876 residual_filters,
877 attribution_requested: false,
878 });
879 let text = CompiledSearchPlan {
880 strict,
881 relaxed,
882 was_degraded_at_plan_time,
883 };
884
885 Ok(CompiledRetrievalPlan {
893 text,
894 vector: None,
895 was_degraded_at_plan_time,
896 })
897}
898
899fn hash_signature(signature: &str) -> u64 {
902 const OFFSET: u64 = 0xcbf2_9ce4_8422_2325;
903 const PRIME: u64 = 0x0000_0100_0000_01b3;
904 let mut hash = OFFSET;
905 for byte in signature.bytes() {
906 hash ^= u64::from(byte);
907 hash = hash.wrapping_mul(PRIME);
908 }
909 hash
910}
911
912#[cfg(test)]
913#[allow(clippy::expect_used, clippy::items_after_statements)]
914mod tests {
915 use rstest::rstest;
916
917 use crate::{
918 CompileError, DrivingTable, QueryBuilder, TraverseDirection, compile_grouped_query,
919 compile_query,
920 };
921
922 #[test]
923 fn vector_query_compiles_to_chunk_resolution() {
924 let compiled = compile_query(
925 &QueryBuilder::nodes("Meeting")
926 .vector_search("budget", 5)
927 .limit(5)
928 .into_ast(),
929 )
930 .expect("compiled query");
931
932 assert_eq!(compiled.driving_table, DrivingTable::VecNodes);
933 assert!(compiled.sql.contains("JOIN chunks c ON c.id = vc.chunk_id"));
934 assert!(
935 compiled
936 .sql
937 .contains("JOIN nodes src ON src.logical_id = c.node_logical_id")
938 );
939 }
940
941 #[rstest]
942 #[case(5, 7)]
943 #[case(3, 11)]
944 fn structural_limits_change_shape_hash(#[case] left: usize, #[case] right: usize) {
945 let left_compiled = compile_query(
946 &QueryBuilder::nodes("Meeting")
947 .text_search("budget", left)
948 .limit(left)
949 .into_ast(),
950 )
951 .expect("left query");
952 let right_compiled = compile_query(
953 &QueryBuilder::nodes("Meeting")
954 .text_search("budget", right)
955 .limit(right)
956 .into_ast(),
957 )
958 .expect("right query");
959
960 assert_ne!(left_compiled.shape_hash, right_compiled.shape_hash);
961 }
962
963 #[test]
964 fn traversal_query_is_depth_bounded() {
965 let compiled = compile_query(
966 &QueryBuilder::nodes("Meeting")
967 .text_search("budget", 5)
968 .traverse(TraverseDirection::Out, "HAS_TASK", 3)
969 .limit(10)
970 .into_ast(),
971 )
972 .expect("compiled traversal");
973
974 assert!(compiled.sql.contains("WITH RECURSIVE"));
975 assert!(compiled.sql.contains("WHERE t.depth < 3"));
976 }
977
978 #[test]
979 fn text_search_compiles_to_union_over_chunk_and_property_fts() {
980 let compiled = compile_query(
981 &QueryBuilder::nodes("Meeting")
982 .text_search("budget", 25)
983 .limit(25)
984 .into_ast(),
985 )
986 .expect("compiled text search");
987
988 assert_eq!(compiled.driving_table, DrivingTable::FtsNodes);
989 assert!(
991 compiled.sql.contains("fts_nodes MATCH"),
992 "must search chunk-backed FTS"
993 );
994 assert!(
995 compiled.sql.contains("fts_node_properties MATCH"),
996 "must search property-backed FTS"
997 );
998 assert!(compiled.sql.contains("UNION"), "must UNION both sources");
999 assert_eq!(compiled.binds.len(), 4);
1001 }
1002
1003 #[test]
1004 fn logical_id_filter_is_compiled() {
1005 let compiled = compile_query(
1006 &QueryBuilder::nodes("Meeting")
1007 .filter_logical_id_eq("meeting-123")
1008 .filter_json_text_eq("$.status", "active")
1009 .limit(1)
1010 .into_ast(),
1011 )
1012 .expect("compiled query");
1013
1014 assert!(compiled.sql.contains("n.logical_id ="));
1018 assert!(compiled.sql.contains("src.logical_id ="));
1019 assert!(compiled.sql.contains("json_extract"));
1020 use crate::BindValue;
1022 assert_eq!(
1023 compiled
1024 .binds
1025 .iter()
1026 .filter(|b| matches!(b, BindValue::Text(s) if s == "meeting-123"))
1027 .count(),
1028 1
1029 );
1030 }
1031
1032 #[test]
1033 fn compile_rejects_invalid_json_path() {
1034 use crate::{Predicate, QueryStep, ScalarValue};
1035 let mut ast = QueryBuilder::nodes("Meeting").into_ast();
1036 ast.steps.push(QueryStep::Filter(Predicate::JsonPathEq {
1038 path: "$') OR 1=1 --".to_owned(),
1039 value: ScalarValue::Text("x".to_owned()),
1040 }));
1041 use crate::CompileError;
1042 let result = compile_query(&ast);
1043 assert!(
1044 matches!(result, Err(CompileError::InvalidJsonPath(_))),
1045 "expected InvalidJsonPath, got {result:?}"
1046 );
1047 }
1048
1049 #[test]
1050 fn compile_accepts_valid_json_paths() {
1051 use crate::{Predicate, QueryStep, ScalarValue};
1052 for valid_path in ["$.status", "$.foo.bar", "$.a_b.c2"] {
1053 let mut ast = QueryBuilder::nodes("Meeting").into_ast();
1054 ast.steps.push(QueryStep::Filter(Predicate::JsonPathEq {
1055 path: valid_path.to_owned(),
1056 value: ScalarValue::Text("v".to_owned()),
1057 }));
1058 assert!(
1059 compile_query(&ast).is_ok(),
1060 "expected valid path {valid_path:?} to compile"
1061 );
1062 }
1063 }
1064
1065 #[test]
1066 fn compile_rejects_too_many_bind_parameters() {
1067 use crate::{Predicate, QueryStep, ScalarValue};
1068 let mut ast = QueryBuilder::nodes("Meeting").into_ast();
1069 for i in 0..8 {
1072 ast.steps.push(QueryStep::Filter(Predicate::JsonPathEq {
1073 path: format!("$.f{i}"),
1074 value: ScalarValue::Text("v".to_owned()),
1075 }));
1076 }
1077 use crate::CompileError;
1078 let result = compile_query(&ast);
1079 assert!(
1080 matches!(result, Err(CompileError::TooManyBindParameters(17))),
1081 "expected TooManyBindParameters(17), got {result:?}"
1082 );
1083 }
1084
1085 #[test]
1086 fn compile_rejects_excessive_traversal_depth() {
1087 let result = compile_query(
1088 &QueryBuilder::nodes("Meeting")
1089 .text_search("budget", 5)
1090 .traverse(TraverseDirection::Out, "HAS_TASK", 51)
1091 .limit(10)
1092 .into_ast(),
1093 );
1094 assert!(
1095 matches!(result, Err(CompileError::TraversalTooDeep(51))),
1096 "expected TraversalTooDeep(51), got {result:?}"
1097 );
1098 }
1099
1100 #[test]
1101 fn grouped_queries_with_same_structure_share_shape_hash() {
1102 let left = compile_grouped_query(
1103 &QueryBuilder::nodes("Meeting")
1104 .text_search("budget", 5)
1105 .expand("tasks", TraverseDirection::Out, "HAS_TASK", 1)
1106 .limit(10)
1107 .into_ast(),
1108 )
1109 .expect("left grouped query");
1110 let right = compile_grouped_query(
1111 &QueryBuilder::nodes("Meeting")
1112 .text_search("planning", 5)
1113 .expand("tasks", TraverseDirection::Out, "HAS_TASK", 1)
1114 .limit(10)
1115 .into_ast(),
1116 )
1117 .expect("right grouped query");
1118
1119 assert_eq!(left.shape_hash, right.shape_hash);
1120 }
1121
1122 #[test]
1123 fn compile_grouped_rejects_duplicate_expansion_slot_names() {
1124 let result = compile_grouped_query(
1125 &QueryBuilder::nodes("Meeting")
1126 .expand("tasks", TraverseDirection::Out, "HAS_TASK", 1)
1127 .expand("tasks", TraverseDirection::Out, "HAS_DECISION", 1)
1128 .into_ast(),
1129 );
1130
1131 assert!(
1132 matches!(result, Err(CompileError::DuplicateExpansionSlot(ref slot)) if slot == "tasks"),
1133 "expected DuplicateExpansionSlot(\"tasks\"), got {result:?}"
1134 );
1135 }
1136
1137 #[test]
1138 fn flat_compile_rejects_queries_with_expansions() {
1139 let result = compile_query(
1140 &QueryBuilder::nodes("Meeting")
1141 .expand("tasks", TraverseDirection::Out, "HAS_TASK", 1)
1142 .into_ast(),
1143 );
1144
1145 assert!(
1146 matches!(
1147 result,
1148 Err(CompileError::FlatCompileDoesNotSupportExpansions)
1149 ),
1150 "expected FlatCompileDoesNotSupportExpansions, got {result:?}"
1151 );
1152 }
1153
1154 #[test]
1155 fn json_path_compiled_as_bind_parameter() {
1156 let compiled = compile_query(
1157 &QueryBuilder::nodes("Meeting")
1158 .filter_json_text_eq("$.status", "active")
1159 .limit(1)
1160 .into_ast(),
1161 )
1162 .expect("compiled query");
1163
1164 assert!(
1166 !compiled.sql.contains("'$.status'"),
1167 "JSON path must not appear as a SQL string literal"
1168 );
1169 assert!(
1170 compiled.sql.contains("json_extract(src.properties, ?"),
1171 "JSON path must be a bind parameter (pushed into base_candidates for Nodes driver)"
1172 );
1173 use crate::BindValue;
1175 assert!(
1176 compiled
1177 .binds
1178 .iter()
1179 .any(|b| matches!(b, BindValue::Text(s) if s == "$.status"))
1180 );
1181 assert!(
1182 compiled
1183 .binds
1184 .iter()
1185 .any(|b| matches!(b, BindValue::Text(s) if s == "active"))
1186 );
1187 }
1188
1189 #[test]
1198 fn nodes_driver_pushes_json_eq_filter_into_base_candidates() {
1199 let compiled = compile_query(
1200 &QueryBuilder::nodes("Meeting")
1201 .filter_json_text_eq("$.status", "active")
1202 .limit(5)
1203 .into_ast(),
1204 )
1205 .expect("compiled query");
1206
1207 assert_eq!(compiled.driving_table, DrivingTable::Nodes);
1208 assert!(
1211 compiled.sql.contains("json_extract(src.properties, ?"),
1212 "json_extract must reference src (base_candidates), got:\n{}",
1213 compiled.sql,
1214 );
1215 assert!(
1216 !compiled.sql.contains("json_extract(n.properties, ?"),
1217 "json_extract must NOT appear in outer WHERE for Nodes driver, got:\n{}",
1218 compiled.sql,
1219 );
1220 }
1221
1222 #[test]
1223 fn nodes_driver_pushes_json_compare_filter_into_base_candidates() {
1224 let compiled = compile_query(
1225 &QueryBuilder::nodes("Meeting")
1226 .filter_json_integer_gte("$.priority", 5)
1227 .limit(10)
1228 .into_ast(),
1229 )
1230 .expect("compiled query");
1231
1232 assert_eq!(compiled.driving_table, DrivingTable::Nodes);
1233 assert!(
1234 compiled.sql.contains("json_extract(src.properties, ?"),
1235 "comparison filter must be in base_candidates, got:\n{}",
1236 compiled.sql,
1237 );
1238 assert!(
1239 !compiled.sql.contains("json_extract(n.properties, ?"),
1240 "comparison filter must NOT be in outer WHERE for Nodes driver",
1241 );
1242 assert!(
1243 compiled.sql.contains(">= ?"),
1244 "expected >= operator in SQL, got:\n{}",
1245 compiled.sql,
1246 );
1247 }
1248
1249 #[test]
1250 fn nodes_driver_pushes_source_ref_filter_into_base_candidates() {
1251 let compiled = compile_query(
1252 &QueryBuilder::nodes("Meeting")
1253 .filter_source_ref_eq("ref-123")
1254 .limit(5)
1255 .into_ast(),
1256 )
1257 .expect("compiled query");
1258
1259 assert_eq!(compiled.driving_table, DrivingTable::Nodes);
1260 assert!(
1261 compiled.sql.contains("src.source_ref = ?"),
1262 "source_ref filter must be in base_candidates, got:\n{}",
1263 compiled.sql,
1264 );
1265 assert!(
1266 !compiled.sql.contains("n.source_ref = ?"),
1267 "source_ref filter must NOT be in outer WHERE for Nodes driver",
1268 );
1269 }
1270
1271 #[test]
1272 fn nodes_driver_pushes_multiple_filters_into_base_candidates() {
1273 let compiled = compile_query(
1274 &QueryBuilder::nodes("Meeting")
1275 .filter_logical_id_eq("meeting-1")
1276 .filter_json_text_eq("$.status", "active")
1277 .filter_json_integer_gte("$.priority", 5)
1278 .filter_source_ref_eq("ref-abc")
1279 .limit(1)
1280 .into_ast(),
1281 )
1282 .expect("compiled query");
1283
1284 assert_eq!(compiled.driving_table, DrivingTable::Nodes);
1285 assert!(
1287 compiled.sql.contains("src.logical_id = ?"),
1288 "logical_id filter must be in base_candidates",
1289 );
1290 assert!(
1291 compiled.sql.contains("json_extract(src.properties, ?"),
1292 "JSON filters must be in base_candidates",
1293 );
1294 assert!(
1295 compiled.sql.contains("src.source_ref = ?"),
1296 "source_ref filter must be in base_candidates",
1297 );
1298 use crate::BindValue;
1300 assert_eq!(
1301 compiled
1302 .binds
1303 .iter()
1304 .filter(|b| matches!(b, BindValue::Text(s) if s == "meeting-1"))
1305 .count(),
1306 1,
1307 "logical_id bind must not be duplicated"
1308 );
1309 assert_eq!(
1310 compiled
1311 .binds
1312 .iter()
1313 .filter(|b| matches!(b, BindValue::Text(s) if s == "ref-abc"))
1314 .count(),
1315 1,
1316 "source_ref bind must not be duplicated"
1317 );
1318 }
1319
1320 #[test]
1321 fn fts_driver_keeps_json_filter_residual_but_fuses_kind() {
1322 let compiled = compile_query(
1326 &QueryBuilder::nodes("Meeting")
1327 .text_search("budget", 5)
1328 .filter_json_text_eq("$.status", "active")
1329 .filter_kind_eq("Meeting")
1330 .limit(5)
1331 .into_ast(),
1332 )
1333 .expect("compiled query");
1334
1335 assert_eq!(compiled.driving_table, DrivingTable::FtsNodes);
1336 assert!(
1338 compiled.sql.contains("json_extract(n.properties, ?"),
1339 "JSON filter must stay residual in outer WHERE, got:\n{}",
1340 compiled.sql,
1341 );
1342 let (cte, outer) = compiled
1345 .sql
1346 .split_once("SELECT DISTINCT n.row_id")
1347 .expect("query has final SELECT");
1348 assert!(
1349 cte.contains("AND n.kind = ?"),
1350 "KindEq must be fused inside base_candidates CTE, got CTE:\n{cte}"
1351 );
1352 assert!(
1354 !outer.contains("AND n.kind = ?"),
1355 "KindEq must NOT appear in outer WHERE for FTS driver, got outer:\n{outer}"
1356 );
1357 }
1358
1359 #[test]
1360 fn fts_driver_fuses_kind_filter() {
1361 let compiled = compile_query(
1362 &QueryBuilder::nodes("Goal")
1363 .text_search("budget", 5)
1364 .filter_kind_eq("Goal")
1365 .limit(5)
1366 .into_ast(),
1367 )
1368 .expect("compiled query");
1369
1370 assert_eq!(compiled.driving_table, DrivingTable::FtsNodes);
1371 let (cte, outer) = compiled
1372 .sql
1373 .split_once("SELECT DISTINCT n.row_id")
1374 .expect("query has final SELECT");
1375 assert!(
1376 cte.contains("AND n.kind = ?"),
1377 "KindEq must be fused inside base_candidates, got:\n{cte}"
1378 );
1379 assert!(
1380 !outer.contains("AND n.kind = ?"),
1381 "KindEq must NOT be in outer WHERE, got:\n{outer}"
1382 );
1383 }
1384
1385 #[test]
1386 fn vec_driver_fuses_kind_filter() {
1387 let compiled = compile_query(
1388 &QueryBuilder::nodes("Goal")
1389 .vector_search("budget", 5)
1390 .filter_kind_eq("Goal")
1391 .limit(5)
1392 .into_ast(),
1393 )
1394 .expect("compiled query");
1395
1396 assert_eq!(compiled.driving_table, DrivingTable::VecNodes);
1397 let (cte, outer) = compiled
1398 .sql
1399 .split_once("SELECT DISTINCT n.row_id")
1400 .expect("query has final SELECT");
1401 assert!(
1402 cte.contains("AND src.kind = ?"),
1403 "KindEq must be fused inside base_candidates, got:\n{cte}"
1404 );
1405 assert!(
1406 !outer.contains("AND n.kind = ?"),
1407 "KindEq must NOT be in outer WHERE, got:\n{outer}"
1408 );
1409 }
1410
1411 #[test]
1412 fn fts5_query_bind_uses_rendered_literals() {
1413 let compiled = compile_query(
1414 &QueryBuilder::nodes("Meeting")
1415 .text_search("User's name", 5)
1416 .limit(5)
1417 .into_ast(),
1418 )
1419 .expect("compiled query");
1420
1421 use crate::BindValue;
1422 assert!(
1423 compiled
1424 .binds
1425 .iter()
1426 .any(|b| matches!(b, BindValue::Text(s) if s == "\"User's\" \"name\"")),
1427 "FTS5 query bind should use rendered literal terms; got {:?}",
1428 compiled.binds
1429 );
1430 }
1431
1432 #[test]
1433 fn fts5_query_bind_supports_or_operator() {
1434 let compiled = compile_query(
1435 &QueryBuilder::nodes("Meeting")
1436 .text_search("ship OR docs", 5)
1437 .limit(5)
1438 .into_ast(),
1439 )
1440 .expect("compiled query");
1441
1442 use crate::BindValue;
1443 assert!(
1444 compiled
1445 .binds
1446 .iter()
1447 .any(|b| matches!(b, BindValue::Text(s) if s == "\"ship\" OR \"docs\"")),
1448 "FTS5 query bind should preserve supported OR; got {:?}",
1449 compiled.binds
1450 );
1451 }
1452
1453 #[test]
1454 fn fts5_query_bind_supports_not_operator() {
1455 let compiled = compile_query(
1456 &QueryBuilder::nodes("Meeting")
1457 .text_search("ship NOT blocked", 5)
1458 .limit(5)
1459 .into_ast(),
1460 )
1461 .expect("compiled query");
1462
1463 use crate::BindValue;
1464 assert!(
1465 compiled
1466 .binds
1467 .iter()
1468 .any(|b| matches!(b, BindValue::Text(s) if s == "\"ship\" NOT \"blocked\"")),
1469 "FTS5 query bind should preserve supported NOT; got {:?}",
1470 compiled.binds
1471 );
1472 }
1473
1474 #[test]
1475 fn fts5_query_bind_literalizes_clause_leading_not() {
1476 let compiled = compile_query(
1477 &QueryBuilder::nodes("Meeting")
1478 .text_search("NOT blocked", 5)
1479 .limit(5)
1480 .into_ast(),
1481 )
1482 .expect("compiled query");
1483
1484 use crate::BindValue;
1485 assert!(
1486 compiled
1487 .binds
1488 .iter()
1489 .any(|b| matches!(b, BindValue::Text(s) if s == "\"NOT\" \"blocked\"")),
1490 "Clause-leading NOT should degrade to literals; got {:?}",
1491 compiled.binds
1492 );
1493 }
1494
1495 #[test]
1496 fn fts5_query_bind_literalizes_or_not_sequence() {
1497 let compiled = compile_query(
1498 &QueryBuilder::nodes("Meeting")
1499 .text_search("ship OR NOT blocked", 5)
1500 .limit(5)
1501 .into_ast(),
1502 )
1503 .expect("compiled query");
1504
1505 use crate::BindValue;
1506 assert!(
1507 compiled.binds.iter().any(
1508 |b| matches!(b, BindValue::Text(s) if s == "\"ship\" \"OR\" \"NOT\" \"blocked\"")
1509 ),
1510 "`OR NOT` should degrade to literals rather than emit invalid FTS5; got {:?}",
1511 compiled.binds
1512 );
1513 }
1514
1515 #[test]
1516 fn compile_retrieval_plan_accepts_search_step() {
1517 use crate::{
1518 CompileError, Predicate, QueryAst, QueryStep, TextQuery, compile_retrieval_plan,
1519 };
1520 let ast = QueryAst {
1521 root_kind: "Goal".to_owned(),
1522 steps: vec![
1523 QueryStep::Search {
1524 query: "ship quarterly docs".to_owned(),
1525 limit: 7,
1526 },
1527 QueryStep::Filter(Predicate::KindEq("Goal".to_owned())),
1528 ],
1529 expansions: vec![],
1530 final_limit: None,
1531 };
1532 let plan = compile_retrieval_plan(&ast).expect("compiles");
1533 assert_eq!(plan.text.strict.root_kind, "Goal");
1534 assert_eq!(plan.text.strict.limit, 7);
1535 assert_eq!(plan.text.strict.fusable_filters.len(), 1);
1537 assert!(plan.text.strict.residual_filters.is_empty());
1538 assert_eq!(
1541 plan.text.strict.text_query,
1542 TextQuery::And(vec![
1543 TextQuery::Term("ship".into()),
1544 TextQuery::Term("quarterly".into()),
1545 TextQuery::Term("docs".into()),
1546 ])
1547 );
1548 let relaxed = plan.text.relaxed.as_ref().expect("relaxed branch present");
1550 assert_eq!(
1551 relaxed.text_query,
1552 TextQuery::Or(vec![
1553 TextQuery::Term("ship".into()),
1554 TextQuery::Term("quarterly".into()),
1555 TextQuery::Term("docs".into()),
1556 ])
1557 );
1558 assert_eq!(relaxed.fusable_filters.len(), 1);
1559 assert!(!plan.was_degraded_at_plan_time);
1560 let _ = std::any::TypeId::of::<CompileError>();
1562 }
1563
1564 #[test]
1565 fn compile_retrieval_plan_rejects_ast_without_search_step() {
1566 use crate::{CompileError, QueryBuilder, compile_retrieval_plan};
1567 let ast = QueryBuilder::nodes("Goal")
1568 .filter_kind_eq("Goal")
1569 .into_ast();
1570 let result = compile_retrieval_plan(&ast);
1571 assert!(
1572 matches!(result, Err(CompileError::MissingSearchStep)),
1573 "expected MissingSearchStep, got {result:?}"
1574 );
1575 }
1576
1577 #[test]
1578 fn compile_retrieval_plan_rejects_ast_with_multiple_search_steps() {
1579 use crate::{CompileError, QueryAst, QueryStep, compile_retrieval_plan};
1584 let ast = QueryAst {
1585 root_kind: "Goal".to_owned(),
1586 steps: vec![
1587 QueryStep::Search {
1588 query: "alpha".to_owned(),
1589 limit: 5,
1590 },
1591 QueryStep::Search {
1592 query: "bravo".to_owned(),
1593 limit: 10,
1594 },
1595 ],
1596 expansions: vec![],
1597 final_limit: None,
1598 };
1599 let result = compile_retrieval_plan(&ast);
1600 assert!(
1601 matches!(result, Err(CompileError::MultipleSearchSteps)),
1602 "expected MultipleSearchSteps, got {result:?}"
1603 );
1604 }
1605
1606 #[test]
1607 fn compile_retrieval_plan_v1_always_leaves_vector_empty() {
1608 use crate::{QueryAst, QueryStep, compile_retrieval_plan};
1614 for query in ["ship quarterly docs", "single", "", " "] {
1615 let ast = QueryAst {
1616 root_kind: "Goal".to_owned(),
1617 steps: vec![QueryStep::Search {
1618 query: query.to_owned(),
1619 limit: 10,
1620 }],
1621 expansions: vec![],
1622 final_limit: None,
1623 };
1624 let plan = compile_retrieval_plan(&ast).expect("compiles");
1625 assert!(
1626 plan.vector.is_none(),
1627 "Phase 12 v1 must always leave the vector branch empty (query = {query:?})"
1628 );
1629 }
1630 }
1631
1632 #[test]
1633 fn fts5_query_bind_preserves_lowercase_not_as_literal_text() {
1634 let compiled = compile_query(
1635 &QueryBuilder::nodes("Meeting")
1636 .text_search("not a ship", 5)
1637 .limit(5)
1638 .into_ast(),
1639 )
1640 .expect("compiled query");
1641
1642 use crate::BindValue;
1643 assert!(
1644 compiled
1645 .binds
1646 .iter()
1647 .any(|b| matches!(b, BindValue::Text(s) if s == "\"not\" \"a\" \"ship\"")),
1648 "Lowercase not should remain a literal term sequence; got {:?}",
1649 compiled.binds
1650 );
1651 }
1652}