use std::fmt::Write;
use crate::fusion::partition_search_filters;
use crate::plan::{choose_driving_table, execution_hints, shape_signature};
use crate::search::{
CompiledRetrievalPlan, CompiledSearch, CompiledSearchPlan, CompiledVectorSearch,
};
use crate::{
ComparisonOp, DrivingTable, ExpansionSlot, Predicate, QueryAst, QueryStep, ScalarValue,
TextQuery, TraverseDirection, derive_relaxed, render_text_query_fts5,
};
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum BindValue {
Text(String),
Integer(i64),
Bool(bool),
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub struct ShapeHash(pub u64);
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct CompiledQuery {
pub sql: String,
pub binds: Vec<BindValue>,
pub shape_hash: ShapeHash,
pub driving_table: DrivingTable,
pub hints: crate::ExecutionHints,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct CompiledGroupedQuery {
pub root: CompiledQuery,
pub expansions: Vec<ExpansionSlot>,
pub shape_hash: ShapeHash,
pub hints: crate::ExecutionHints,
}
#[derive(Clone, Debug, PartialEq, Eq, thiserror::Error)]
pub enum CompileError {
#[error("multiple traversal steps are not supported in v1")]
TooManyTraversals,
#[error("flat query compilation does not support expansions; use compile_grouped")]
FlatCompileDoesNotSupportExpansions,
#[error("duplicate expansion slot name: {0}")]
DuplicateExpansionSlot(String),
#[error("expansion slot name must be non-empty")]
EmptyExpansionSlotName,
#[error("too many expansion slots: max {MAX_EXPANSION_SLOTS}, got {0}")]
TooManyExpansionSlots(usize),
#[error("too many bind parameters: max 15, got {0}")]
TooManyBindParameters(usize),
#[error("traversal depth {0} exceeds maximum of {MAX_TRAVERSAL_DEPTH}")]
TraversalTooDeep(usize),
#[error("invalid JSON path: must match $(.key)+ pattern, got {0:?}")]
InvalidJsonPath(String),
#[error("compile_search requires exactly one TextSearch step in the AST")]
MissingTextSearchStep,
#[error("compile_vector_search requires exactly one VectorSearch step in the AST")]
MissingVectorSearchStep,
#[error("compile_retrieval_plan requires exactly one Search step in the AST")]
MissingSearchStep,
#[error("compile_retrieval_plan requires exactly one Search step in the AST, found multiple")]
MultipleSearchSteps,
}
fn validate_json_path(path: &str) -> Result<(), CompileError> {
let valid = path.starts_with('$')
&& path.len() > 1
&& path[1..].split('.').all(|segment| {
segment.is_empty()
|| segment
.chars()
.all(|c| c.is_ascii_alphanumeric() || c == '_')
&& !segment.is_empty()
})
&& path.contains('.');
if !valid {
return Err(CompileError::InvalidJsonPath(path.to_owned()));
}
Ok(())
}
fn append_fusable_clause(
sql: &mut String,
binds: &mut Vec<BindValue>,
alias: &str,
predicate: &Predicate,
) {
match predicate {
Predicate::KindEq(kind) => {
binds.push(BindValue::Text(kind.clone()));
let idx = binds.len();
let _ = write!(sql, "\n AND {alias}.kind = ?{idx}");
}
Predicate::LogicalIdEq(logical_id) => {
binds.push(BindValue::Text(logical_id.clone()));
let idx = binds.len();
let _ = write!(
sql,
"\n AND {alias}.logical_id = ?{idx}"
);
}
Predicate::SourceRefEq(source_ref) => {
binds.push(BindValue::Text(source_ref.clone()));
let idx = binds.len();
let _ = write!(
sql,
"\n AND {alias}.source_ref = ?{idx}"
);
}
Predicate::ContentRefEq(uri) => {
binds.push(BindValue::Text(uri.clone()));
let idx = binds.len();
let _ = write!(
sql,
"\n AND {alias}.content_ref = ?{idx}"
);
}
Predicate::ContentRefNotNull => {
let _ = write!(
sql,
"\n AND {alias}.content_ref IS NOT NULL"
);
}
Predicate::JsonPathEq { .. } | Predicate::JsonPathCompare { .. } => {
unreachable!("append_fusable_clause received a residual predicate");
}
}
}
const MAX_BIND_PARAMETERS: usize = 15;
const MAX_EXPANSION_SLOTS: usize = 8;
const MAX_TRAVERSAL_DEPTH: usize = 50;
#[allow(clippy::too_many_lines)]
pub fn compile_query(ast: &QueryAst) -> Result<CompiledQuery, CompileError> {
if !ast.expansions.is_empty() {
return Err(CompileError::FlatCompileDoesNotSupportExpansions);
}
let traversals = ast
.steps
.iter()
.filter(|step| matches!(step, QueryStep::Traverse { .. }))
.count();
if traversals > 1 {
return Err(CompileError::TooManyTraversals);
}
let excessive_depth = ast.steps.iter().find_map(|step| {
if let QueryStep::Traverse { max_depth, .. } = step
&& *max_depth > MAX_TRAVERSAL_DEPTH
{
return Some(*max_depth);
}
None
});
if let Some(depth) = excessive_depth {
return Err(CompileError::TraversalTooDeep(depth));
}
let driving_table = choose_driving_table(ast);
let hints = execution_hints(ast);
let shape_hash = ShapeHash(hash_signature(&shape_signature(ast)));
let base_limit = ast
.steps
.iter()
.find_map(|step| match step {
QueryStep::VectorSearch { limit, .. } | QueryStep::TextSearch { limit, .. } => {
Some(*limit)
}
_ => None,
})
.or(ast.final_limit)
.unwrap_or(25);
let final_limit = ast.final_limit.unwrap_or(base_limit);
let traversal = ast.steps.iter().find_map(|step| {
if let QueryStep::Traverse {
direction,
label,
max_depth,
} = step
{
Some((*direction, label.as_str(), *max_depth))
} else {
None
}
});
let (fusable_filters, residual_filters) = partition_search_filters(&ast.steps);
let mut binds = Vec::new();
let base_candidates = match driving_table {
DrivingTable::VecNodes => {
let query = ast
.steps
.iter()
.find_map(|step| {
if let QueryStep::VectorSearch { query, .. } = step {
Some(query.as_str())
} else {
None
}
})
.unwrap_or_else(|| unreachable!("VecNodes chosen but no VectorSearch step in AST"));
binds.push(BindValue::Text(query.to_owned()));
binds.push(BindValue::Text(ast.root_kind.clone()));
let mut sql = format!(
"base_candidates AS (
SELECT DISTINCT src.logical_id
FROM (
SELECT chunk_id FROM vec_nodes_active
WHERE embedding MATCH ?1
LIMIT {base_limit}
) vc
JOIN chunks c ON c.id = vc.chunk_id
JOIN nodes src ON src.logical_id = c.node_logical_id AND src.superseded_at IS NULL
WHERE src.kind = ?2",
);
for predicate in &fusable_filters {
append_fusable_clause(&mut sql, &mut binds, "src", predicate);
}
sql.push_str("\n )");
sql
}
DrivingTable::FtsNodes => {
let text_query = ast
.steps
.iter()
.find_map(|step| {
if let QueryStep::TextSearch { query, .. } = step {
Some(query)
} else {
None
}
})
.unwrap_or_else(|| unreachable!("FtsNodes chosen but no TextSearch step in AST"));
let rendered = render_text_query_fts5(text_query);
binds.push(BindValue::Text(rendered.clone()));
binds.push(BindValue::Text(ast.root_kind.clone()));
binds.push(BindValue::Text(rendered));
binds.push(BindValue::Text(ast.root_kind.clone()));
let mut sql = String::from(
"base_candidates AS (
SELECT DISTINCT n.logical_id
FROM (
SELECT src.logical_id
FROM fts_nodes f
JOIN chunks c ON c.id = f.chunk_id
JOIN nodes src ON src.logical_id = c.node_logical_id AND src.superseded_at IS NULL
WHERE fts_nodes MATCH ?1
AND src.kind = ?2
UNION
SELECT fp.node_logical_id AS logical_id
FROM fts_node_properties fp
JOIN nodes src ON src.logical_id = fp.node_logical_id AND src.superseded_at IS NULL
WHERE fts_node_properties MATCH ?3
AND fp.kind = ?4
) u
JOIN nodes n ON n.logical_id = u.logical_id AND n.superseded_at IS NULL
WHERE 1 = 1",
);
for predicate in &fusable_filters {
append_fusable_clause(&mut sql, &mut binds, "n", predicate);
}
let _ = write!(
&mut sql,
"\n LIMIT {base_limit}\n )"
);
sql
}
DrivingTable::Nodes => {
binds.push(BindValue::Text(ast.root_kind.clone()));
let mut sql = "base_candidates AS (
SELECT DISTINCT src.logical_id
FROM nodes src
WHERE src.superseded_at IS NULL
AND src.kind = ?1"
.to_owned();
for step in &ast.steps {
if let QueryStep::Filter(predicate) = step {
match predicate {
Predicate::LogicalIdEq(logical_id) => {
binds.push(BindValue::Text(logical_id.clone()));
let bind_index = binds.len();
let _ = write!(
&mut sql,
"\n AND src.logical_id = ?{bind_index}"
);
}
Predicate::JsonPathEq { path, value } => {
validate_json_path(path)?;
binds.push(BindValue::Text(path.clone()));
let path_index = binds.len();
binds.push(match value {
ScalarValue::Text(text) => BindValue::Text(text.clone()),
ScalarValue::Integer(integer) => BindValue::Integer(*integer),
ScalarValue::Bool(boolean) => BindValue::Bool(*boolean),
});
let value_index = binds.len();
let _ = write!(
&mut sql,
"\n AND json_extract(src.properties, ?{path_index}) = ?{value_index}"
);
}
Predicate::JsonPathCompare { path, op, value } => {
validate_json_path(path)?;
binds.push(BindValue::Text(path.clone()));
let path_index = binds.len();
binds.push(match value {
ScalarValue::Text(text) => BindValue::Text(text.clone()),
ScalarValue::Integer(integer) => BindValue::Integer(*integer),
ScalarValue::Bool(boolean) => BindValue::Bool(*boolean),
});
let value_index = binds.len();
let operator = match op {
ComparisonOp::Gt => ">",
ComparisonOp::Gte => ">=",
ComparisonOp::Lt => "<",
ComparisonOp::Lte => "<=",
};
let _ = write!(
&mut sql,
"\n AND json_extract(src.properties, ?{path_index}) {operator} ?{value_index}"
);
}
Predicate::SourceRefEq(source_ref) => {
binds.push(BindValue::Text(source_ref.clone()));
let bind_index = binds.len();
let _ = write!(
&mut sql,
"\n AND src.source_ref = ?{bind_index}"
);
}
Predicate::ContentRefNotNull => {
let _ = write!(
&mut sql,
"\n AND src.content_ref IS NOT NULL"
);
}
Predicate::ContentRefEq(uri) => {
binds.push(BindValue::Text(uri.clone()));
let bind_index = binds.len();
let _ = write!(
&mut sql,
"\n AND src.content_ref = ?{bind_index}"
);
}
Predicate::KindEq(_) => {
}
}
}
}
let _ = write!(
&mut sql,
"\n LIMIT {base_limit}\n )"
);
sql
}
};
let mut sql = format!("WITH RECURSIVE\n{base_candidates}");
let source_alias = if traversal.is_some() { "t" } else { "bc" };
if let Some((direction, label, max_depth)) = traversal {
binds.push(BindValue::Text(label.to_owned()));
let label_index = binds.len();
let (join_condition, next_logical_id) = match direction {
TraverseDirection::Out => ("e.source_logical_id = t.logical_id", "e.target_logical_id"),
TraverseDirection::In => ("e.target_logical_id = t.logical_id", "e.source_logical_id"),
};
let _ = write!(
&mut sql,
",
traversed(logical_id, depth, visited) AS (
SELECT bc.logical_id, 0, printf(',%s,', bc.logical_id)
FROM base_candidates bc
UNION ALL
SELECT {next_logical_id}, t.depth + 1, t.visited || {next_logical_id} || ','
FROM traversed t
JOIN edges e ON {join_condition}
AND e.kind = ?{label_index}
AND e.superseded_at IS NULL
WHERE t.depth < {max_depth}
AND instr(t.visited, printf(',%s,', {next_logical_id})) = 0
LIMIT {}
)",
hints.hard_limit
);
}
let _ = write!(
&mut sql,
"
SELECT DISTINCT n.row_id, n.logical_id, n.kind, n.properties, n.content_ref
FROM {} {source_alias}
JOIN nodes n ON n.logical_id = {source_alias}.logical_id
AND n.superseded_at IS NULL
WHERE 1 = 1",
if traversal.is_some() {
"traversed"
} else {
"base_candidates"
}
);
if driving_table == DrivingTable::Nodes {
for step in &ast.steps {
if let QueryStep::Filter(Predicate::KindEq(kind)) = step {
binds.push(BindValue::Text(kind.clone()));
let bind_index = binds.len();
let _ = write!(&mut sql, "\n AND n.kind = ?{bind_index}");
}
}
} else {
for predicate in &residual_filters {
match predicate {
Predicate::JsonPathEq { path, value } => {
validate_json_path(path)?;
binds.push(BindValue::Text(path.clone()));
let path_index = binds.len();
binds.push(match value {
ScalarValue::Text(text) => BindValue::Text(text.clone()),
ScalarValue::Integer(integer) => BindValue::Integer(*integer),
ScalarValue::Bool(boolean) => BindValue::Bool(*boolean),
});
let value_index = binds.len();
let _ = write!(
&mut sql,
"\n AND json_extract(n.properties, ?{path_index}) = ?{value_index}",
);
}
Predicate::JsonPathCompare { path, op, value } => {
validate_json_path(path)?;
binds.push(BindValue::Text(path.clone()));
let path_index = binds.len();
binds.push(match value {
ScalarValue::Text(text) => BindValue::Text(text.clone()),
ScalarValue::Integer(integer) => BindValue::Integer(*integer),
ScalarValue::Bool(boolean) => BindValue::Bool(*boolean),
});
let value_index = binds.len();
let operator = match op {
ComparisonOp::Gt => ">",
ComparisonOp::Gte => ">=",
ComparisonOp::Lt => "<",
ComparisonOp::Lte => "<=",
};
let _ = write!(
&mut sql,
"\n AND json_extract(n.properties, ?{path_index}) {operator} ?{value_index}",
);
}
Predicate::KindEq(_)
| Predicate::LogicalIdEq(_)
| Predicate::SourceRefEq(_)
| Predicate::ContentRefEq(_)
| Predicate::ContentRefNotNull => {
}
}
}
}
let _ = write!(&mut sql, "\nLIMIT {final_limit}");
if binds.len() > MAX_BIND_PARAMETERS {
return Err(CompileError::TooManyBindParameters(binds.len()));
}
Ok(CompiledQuery {
sql,
binds,
shape_hash,
driving_table,
hints,
})
}
pub fn compile_grouped_query(ast: &QueryAst) -> Result<CompiledGroupedQuery, CompileError> {
if ast.expansions.len() > MAX_EXPANSION_SLOTS {
return Err(CompileError::TooManyExpansionSlots(ast.expansions.len()));
}
let mut seen = std::collections::BTreeSet::new();
for expansion in &ast.expansions {
if expansion.slot.trim().is_empty() {
return Err(CompileError::EmptyExpansionSlotName);
}
if expansion.max_depth > MAX_TRAVERSAL_DEPTH {
return Err(CompileError::TraversalTooDeep(expansion.max_depth));
}
if !seen.insert(expansion.slot.clone()) {
return Err(CompileError::DuplicateExpansionSlot(expansion.slot.clone()));
}
}
let mut root_ast = ast.clone();
root_ast.expansions.clear();
let root = compile_query(&root_ast)?;
let hints = execution_hints(ast);
let shape_hash = ShapeHash(hash_signature(&shape_signature(ast)));
Ok(CompiledGroupedQuery {
root,
expansions: ast.expansions.clone(),
shape_hash,
hints,
})
}
pub fn compile_search(ast: &QueryAst) -> Result<CompiledSearch, CompileError> {
let mut text_query = None;
let mut limit = None;
for step in &ast.steps {
match step {
QueryStep::TextSearch {
query,
limit: step_limit,
} => {
text_query = Some(query.clone());
limit = Some(*step_limit);
}
QueryStep::Filter(_)
| QueryStep::Search { .. }
| QueryStep::VectorSearch { .. }
| QueryStep::Traverse { .. } => {
}
}
}
let text_query = text_query.ok_or(CompileError::MissingTextSearchStep)?;
let limit = limit.unwrap_or(25);
let (fusable_filters, residual_filters) = partition_search_filters(&ast.steps);
Ok(CompiledSearch {
root_kind: ast.root_kind.clone(),
text_query,
limit,
fusable_filters,
residual_filters,
attribution_requested: false,
})
}
#[doc(hidden)]
pub fn compile_search_plan(ast: &QueryAst) -> Result<CompiledSearchPlan, CompileError> {
let strict = compile_search(ast)?;
let (relaxed_query, was_degraded_at_plan_time) = derive_relaxed(&strict.text_query);
let relaxed = relaxed_query.map(|q| CompiledSearch {
root_kind: strict.root_kind.clone(),
text_query: q,
limit: strict.limit,
fusable_filters: strict.fusable_filters.clone(),
residual_filters: strict.residual_filters.clone(),
attribution_requested: strict.attribution_requested,
});
Ok(CompiledSearchPlan {
strict,
relaxed,
was_degraded_at_plan_time,
})
}
pub fn compile_search_plan_from_queries(
ast: &QueryAst,
strict: TextQuery,
relaxed: Option<TextQuery>,
limit: usize,
attribution_requested: bool,
) -> Result<CompiledSearchPlan, CompileError> {
let (fusable_filters, residual_filters) = partition_search_filters(&ast.steps);
let strict_compiled = CompiledSearch {
root_kind: ast.root_kind.clone(),
text_query: strict,
limit,
fusable_filters: fusable_filters.clone(),
residual_filters: residual_filters.clone(),
attribution_requested,
};
let relaxed_compiled = relaxed.map(|q| CompiledSearch {
root_kind: ast.root_kind.clone(),
text_query: q,
limit,
fusable_filters,
residual_filters,
attribution_requested,
});
Ok(CompiledSearchPlan {
strict: strict_compiled,
relaxed: relaxed_compiled,
was_degraded_at_plan_time: false,
})
}
pub fn compile_vector_search(ast: &QueryAst) -> Result<CompiledVectorSearch, CompileError> {
let mut query_text = None;
let mut limit = None;
for step in &ast.steps {
match step {
QueryStep::VectorSearch {
query,
limit: step_limit,
} => {
query_text = Some(query.clone());
limit = Some(*step_limit);
}
QueryStep::Filter(_)
| QueryStep::Search { .. }
| QueryStep::TextSearch { .. }
| QueryStep::Traverse { .. } => {
}
}
}
let query_text = query_text.ok_or(CompileError::MissingVectorSearchStep)?;
let limit = limit.unwrap_or(25);
let (fusable_filters, residual_filters) = partition_search_filters(&ast.steps);
Ok(CompiledVectorSearch {
root_kind: ast.root_kind.clone(),
query_text,
limit,
fusable_filters,
residual_filters,
attribution_requested: false,
})
}
pub fn compile_retrieval_plan(ast: &QueryAst) -> Result<CompiledRetrievalPlan, CompileError> {
let mut raw_query: Option<&str> = None;
let mut limit: Option<usize> = None;
for step in &ast.steps {
if let QueryStep::Search {
query,
limit: step_limit,
} = step
{
if raw_query.is_some() {
return Err(CompileError::MultipleSearchSteps);
}
raw_query = Some(query.as_str());
limit = Some(*step_limit);
}
}
let raw_query = raw_query.ok_or(CompileError::MissingSearchStep)?;
let limit = limit.unwrap_or(25);
let strict_text_query = TextQuery::parse(raw_query);
let (relaxed_text_query, was_degraded_at_plan_time) = derive_relaxed(&strict_text_query);
let (fusable_filters, residual_filters) = partition_search_filters(&ast.steps);
let strict = CompiledSearch {
root_kind: ast.root_kind.clone(),
text_query: strict_text_query,
limit,
fusable_filters: fusable_filters.clone(),
residual_filters: residual_filters.clone(),
attribution_requested: false,
};
let relaxed = relaxed_text_query.map(|q| CompiledSearch {
root_kind: ast.root_kind.clone(),
text_query: q,
limit,
fusable_filters,
residual_filters,
attribution_requested: false,
});
let text = CompiledSearchPlan {
strict,
relaxed,
was_degraded_at_plan_time,
};
Ok(CompiledRetrievalPlan {
text,
vector: None,
was_degraded_at_plan_time,
})
}
fn hash_signature(signature: &str) -> u64 {
const OFFSET: u64 = 0xcbf2_9ce4_8422_2325;
const PRIME: u64 = 0x0000_0100_0000_01b3;
let mut hash = OFFSET;
for byte in signature.bytes() {
hash ^= u64::from(byte);
hash = hash.wrapping_mul(PRIME);
}
hash
}
#[cfg(test)]
#[allow(clippy::expect_used, clippy::items_after_statements)]
mod tests {
use rstest::rstest;
use crate::{
CompileError, DrivingTable, QueryBuilder, TraverseDirection, compile_grouped_query,
compile_query,
};
#[test]
fn vector_query_compiles_to_chunk_resolution() {
let compiled = compile_query(
&QueryBuilder::nodes("Meeting")
.vector_search("budget", 5)
.limit(5)
.into_ast(),
)
.expect("compiled query");
assert_eq!(compiled.driving_table, DrivingTable::VecNodes);
assert!(compiled.sql.contains("JOIN chunks c ON c.id = vc.chunk_id"));
assert!(
compiled
.sql
.contains("JOIN nodes src ON src.logical_id = c.node_logical_id")
);
}
#[rstest]
#[case(5, 7)]
#[case(3, 11)]
fn structural_limits_change_shape_hash(#[case] left: usize, #[case] right: usize) {
let left_compiled = compile_query(
&QueryBuilder::nodes("Meeting")
.text_search("budget", left)
.limit(left)
.into_ast(),
)
.expect("left query");
let right_compiled = compile_query(
&QueryBuilder::nodes("Meeting")
.text_search("budget", right)
.limit(right)
.into_ast(),
)
.expect("right query");
assert_ne!(left_compiled.shape_hash, right_compiled.shape_hash);
}
#[test]
fn traversal_query_is_depth_bounded() {
let compiled = compile_query(
&QueryBuilder::nodes("Meeting")
.text_search("budget", 5)
.traverse(TraverseDirection::Out, "HAS_TASK", 3)
.limit(10)
.into_ast(),
)
.expect("compiled traversal");
assert!(compiled.sql.contains("WITH RECURSIVE"));
assert!(compiled.sql.contains("WHERE t.depth < 3"));
}
#[test]
fn text_search_compiles_to_union_over_chunk_and_property_fts() {
let compiled = compile_query(
&QueryBuilder::nodes("Meeting")
.text_search("budget", 25)
.limit(25)
.into_ast(),
)
.expect("compiled text search");
assert_eq!(compiled.driving_table, DrivingTable::FtsNodes);
assert!(
compiled.sql.contains("fts_nodes MATCH"),
"must search chunk-backed FTS"
);
assert!(
compiled.sql.contains("fts_node_properties MATCH"),
"must search property-backed FTS"
);
assert!(compiled.sql.contains("UNION"), "must UNION both sources");
assert_eq!(compiled.binds.len(), 4);
}
#[test]
fn logical_id_filter_is_compiled() {
let compiled = compile_query(
&QueryBuilder::nodes("Meeting")
.filter_logical_id_eq("meeting-123")
.filter_json_text_eq("$.status", "active")
.limit(1)
.into_ast(),
)
.expect("compiled query");
assert!(compiled.sql.contains("n.logical_id ="));
assert!(compiled.sql.contains("src.logical_id ="));
assert!(compiled.sql.contains("json_extract"));
use crate::BindValue;
assert_eq!(
compiled
.binds
.iter()
.filter(|b| matches!(b, BindValue::Text(s) if s == "meeting-123"))
.count(),
1
);
}
#[test]
fn compile_rejects_invalid_json_path() {
use crate::{Predicate, QueryStep, ScalarValue};
let mut ast = QueryBuilder::nodes("Meeting").into_ast();
ast.steps.push(QueryStep::Filter(Predicate::JsonPathEq {
path: "$') OR 1=1 --".to_owned(),
value: ScalarValue::Text("x".to_owned()),
}));
use crate::CompileError;
let result = compile_query(&ast);
assert!(
matches!(result, Err(CompileError::InvalidJsonPath(_))),
"expected InvalidJsonPath, got {result:?}"
);
}
#[test]
fn compile_accepts_valid_json_paths() {
use crate::{Predicate, QueryStep, ScalarValue};
for valid_path in ["$.status", "$.foo.bar", "$.a_b.c2"] {
let mut ast = QueryBuilder::nodes("Meeting").into_ast();
ast.steps.push(QueryStep::Filter(Predicate::JsonPathEq {
path: valid_path.to_owned(),
value: ScalarValue::Text("v".to_owned()),
}));
assert!(
compile_query(&ast).is_ok(),
"expected valid path {valid_path:?} to compile"
);
}
}
#[test]
fn compile_rejects_too_many_bind_parameters() {
use crate::{Predicate, QueryStep, ScalarValue};
let mut ast = QueryBuilder::nodes("Meeting").into_ast();
for i in 0..8 {
ast.steps.push(QueryStep::Filter(Predicate::JsonPathEq {
path: format!("$.f{i}"),
value: ScalarValue::Text("v".to_owned()),
}));
}
use crate::CompileError;
let result = compile_query(&ast);
assert!(
matches!(result, Err(CompileError::TooManyBindParameters(17))),
"expected TooManyBindParameters(17), got {result:?}"
);
}
#[test]
fn compile_rejects_excessive_traversal_depth() {
let result = compile_query(
&QueryBuilder::nodes("Meeting")
.text_search("budget", 5)
.traverse(TraverseDirection::Out, "HAS_TASK", 51)
.limit(10)
.into_ast(),
);
assert!(
matches!(result, Err(CompileError::TraversalTooDeep(51))),
"expected TraversalTooDeep(51), got {result:?}"
);
}
#[test]
fn grouped_queries_with_same_structure_share_shape_hash() {
let left = compile_grouped_query(
&QueryBuilder::nodes("Meeting")
.text_search("budget", 5)
.expand("tasks", TraverseDirection::Out, "HAS_TASK", 1)
.limit(10)
.into_ast(),
)
.expect("left grouped query");
let right = compile_grouped_query(
&QueryBuilder::nodes("Meeting")
.text_search("planning", 5)
.expand("tasks", TraverseDirection::Out, "HAS_TASK", 1)
.limit(10)
.into_ast(),
)
.expect("right grouped query");
assert_eq!(left.shape_hash, right.shape_hash);
}
#[test]
fn compile_grouped_rejects_duplicate_expansion_slot_names() {
let result = compile_grouped_query(
&QueryBuilder::nodes("Meeting")
.expand("tasks", TraverseDirection::Out, "HAS_TASK", 1)
.expand("tasks", TraverseDirection::Out, "HAS_DECISION", 1)
.into_ast(),
);
assert!(
matches!(result, Err(CompileError::DuplicateExpansionSlot(ref slot)) if slot == "tasks"),
"expected DuplicateExpansionSlot(\"tasks\"), got {result:?}"
);
}
#[test]
fn flat_compile_rejects_queries_with_expansions() {
let result = compile_query(
&QueryBuilder::nodes("Meeting")
.expand("tasks", TraverseDirection::Out, "HAS_TASK", 1)
.into_ast(),
);
assert!(
matches!(
result,
Err(CompileError::FlatCompileDoesNotSupportExpansions)
),
"expected FlatCompileDoesNotSupportExpansions, got {result:?}"
);
}
#[test]
fn json_path_compiled_as_bind_parameter() {
let compiled = compile_query(
&QueryBuilder::nodes("Meeting")
.filter_json_text_eq("$.status", "active")
.limit(1)
.into_ast(),
)
.expect("compiled query");
assert!(
!compiled.sql.contains("'$.status'"),
"JSON path must not appear as a SQL string literal"
);
assert!(
compiled.sql.contains("json_extract(src.properties, ?"),
"JSON path must be a bind parameter (pushed into base_candidates for Nodes driver)"
);
use crate::BindValue;
assert!(
compiled
.binds
.iter()
.any(|b| matches!(b, BindValue::Text(s) if s == "$.status"))
);
assert!(
compiled
.binds
.iter()
.any(|b| matches!(b, BindValue::Text(s) if s == "active"))
);
}
#[test]
fn nodes_driver_pushes_json_eq_filter_into_base_candidates() {
let compiled = compile_query(
&QueryBuilder::nodes("Meeting")
.filter_json_text_eq("$.status", "active")
.limit(5)
.into_ast(),
)
.expect("compiled query");
assert_eq!(compiled.driving_table, DrivingTable::Nodes);
assert!(
compiled.sql.contains("json_extract(src.properties, ?"),
"json_extract must reference src (base_candidates), got:\n{}",
compiled.sql,
);
assert!(
!compiled.sql.contains("json_extract(n.properties, ?"),
"json_extract must NOT appear in outer WHERE for Nodes driver, got:\n{}",
compiled.sql,
);
}
#[test]
fn nodes_driver_pushes_json_compare_filter_into_base_candidates() {
let compiled = compile_query(
&QueryBuilder::nodes("Meeting")
.filter_json_integer_gte("$.priority", 5)
.limit(10)
.into_ast(),
)
.expect("compiled query");
assert_eq!(compiled.driving_table, DrivingTable::Nodes);
assert!(
compiled.sql.contains("json_extract(src.properties, ?"),
"comparison filter must be in base_candidates, got:\n{}",
compiled.sql,
);
assert!(
!compiled.sql.contains("json_extract(n.properties, ?"),
"comparison filter must NOT be in outer WHERE for Nodes driver",
);
assert!(
compiled.sql.contains(">= ?"),
"expected >= operator in SQL, got:\n{}",
compiled.sql,
);
}
#[test]
fn nodes_driver_pushes_source_ref_filter_into_base_candidates() {
let compiled = compile_query(
&QueryBuilder::nodes("Meeting")
.filter_source_ref_eq("ref-123")
.limit(5)
.into_ast(),
)
.expect("compiled query");
assert_eq!(compiled.driving_table, DrivingTable::Nodes);
assert!(
compiled.sql.contains("src.source_ref = ?"),
"source_ref filter must be in base_candidates, got:\n{}",
compiled.sql,
);
assert!(
!compiled.sql.contains("n.source_ref = ?"),
"source_ref filter must NOT be in outer WHERE for Nodes driver",
);
}
#[test]
fn nodes_driver_pushes_multiple_filters_into_base_candidates() {
let compiled = compile_query(
&QueryBuilder::nodes("Meeting")
.filter_logical_id_eq("meeting-1")
.filter_json_text_eq("$.status", "active")
.filter_json_integer_gte("$.priority", 5)
.filter_source_ref_eq("ref-abc")
.limit(1)
.into_ast(),
)
.expect("compiled query");
assert_eq!(compiled.driving_table, DrivingTable::Nodes);
assert!(
compiled.sql.contains("src.logical_id = ?"),
"logical_id filter must be in base_candidates",
);
assert!(
compiled.sql.contains("json_extract(src.properties, ?"),
"JSON filters must be in base_candidates",
);
assert!(
compiled.sql.contains("src.source_ref = ?"),
"source_ref filter must be in base_candidates",
);
use crate::BindValue;
assert_eq!(
compiled
.binds
.iter()
.filter(|b| matches!(b, BindValue::Text(s) if s == "meeting-1"))
.count(),
1,
"logical_id bind must not be duplicated"
);
assert_eq!(
compiled
.binds
.iter()
.filter(|b| matches!(b, BindValue::Text(s) if s == "ref-abc"))
.count(),
1,
"source_ref bind must not be duplicated"
);
}
#[test]
fn fts_driver_keeps_json_filter_residual_but_fuses_kind() {
let compiled = compile_query(
&QueryBuilder::nodes("Meeting")
.text_search("budget", 5)
.filter_json_text_eq("$.status", "active")
.filter_kind_eq("Meeting")
.limit(5)
.into_ast(),
)
.expect("compiled query");
assert_eq!(compiled.driving_table, DrivingTable::FtsNodes);
assert!(
compiled.sql.contains("json_extract(n.properties, ?"),
"JSON filter must stay residual in outer WHERE, got:\n{}",
compiled.sql,
);
let (cte, outer) = compiled
.sql
.split_once("SELECT DISTINCT n.row_id")
.expect("query has final SELECT");
assert!(
cte.contains("AND n.kind = ?"),
"KindEq must be fused inside base_candidates CTE, got CTE:\n{cte}"
);
assert!(
!outer.contains("AND n.kind = ?"),
"KindEq must NOT appear in outer WHERE for FTS driver, got outer:\n{outer}"
);
}
#[test]
fn fts_driver_fuses_kind_filter() {
let compiled = compile_query(
&QueryBuilder::nodes("Goal")
.text_search("budget", 5)
.filter_kind_eq("Goal")
.limit(5)
.into_ast(),
)
.expect("compiled query");
assert_eq!(compiled.driving_table, DrivingTable::FtsNodes);
let (cte, outer) = compiled
.sql
.split_once("SELECT DISTINCT n.row_id")
.expect("query has final SELECT");
assert!(
cte.contains("AND n.kind = ?"),
"KindEq must be fused inside base_candidates, got:\n{cte}"
);
assert!(
!outer.contains("AND n.kind = ?"),
"KindEq must NOT be in outer WHERE, got:\n{outer}"
);
}
#[test]
fn vec_driver_fuses_kind_filter() {
let compiled = compile_query(
&QueryBuilder::nodes("Goal")
.vector_search("budget", 5)
.filter_kind_eq("Goal")
.limit(5)
.into_ast(),
)
.expect("compiled query");
assert_eq!(compiled.driving_table, DrivingTable::VecNodes);
let (cte, outer) = compiled
.sql
.split_once("SELECT DISTINCT n.row_id")
.expect("query has final SELECT");
assert!(
cte.contains("AND src.kind = ?"),
"KindEq must be fused inside base_candidates, got:\n{cte}"
);
assert!(
!outer.contains("AND n.kind = ?"),
"KindEq must NOT be in outer WHERE, got:\n{outer}"
);
}
#[test]
fn fts5_query_bind_uses_rendered_literals() {
let compiled = compile_query(
&QueryBuilder::nodes("Meeting")
.text_search("User's name", 5)
.limit(5)
.into_ast(),
)
.expect("compiled query");
use crate::BindValue;
assert!(
compiled
.binds
.iter()
.any(|b| matches!(b, BindValue::Text(s) if s == "\"User's\" \"name\"")),
"FTS5 query bind should use rendered literal terms; got {:?}",
compiled.binds
);
}
#[test]
fn fts5_query_bind_supports_or_operator() {
let compiled = compile_query(
&QueryBuilder::nodes("Meeting")
.text_search("ship OR docs", 5)
.limit(5)
.into_ast(),
)
.expect("compiled query");
use crate::BindValue;
assert!(
compiled
.binds
.iter()
.any(|b| matches!(b, BindValue::Text(s) if s == "\"ship\" OR \"docs\"")),
"FTS5 query bind should preserve supported OR; got {:?}",
compiled.binds
);
}
#[test]
fn fts5_query_bind_supports_not_operator() {
let compiled = compile_query(
&QueryBuilder::nodes("Meeting")
.text_search("ship NOT blocked", 5)
.limit(5)
.into_ast(),
)
.expect("compiled query");
use crate::BindValue;
assert!(
compiled
.binds
.iter()
.any(|b| matches!(b, BindValue::Text(s) if s == "\"ship\" NOT \"blocked\"")),
"FTS5 query bind should preserve supported NOT; got {:?}",
compiled.binds
);
}
#[test]
fn fts5_query_bind_literalizes_clause_leading_not() {
let compiled = compile_query(
&QueryBuilder::nodes("Meeting")
.text_search("NOT blocked", 5)
.limit(5)
.into_ast(),
)
.expect("compiled query");
use crate::BindValue;
assert!(
compiled
.binds
.iter()
.any(|b| matches!(b, BindValue::Text(s) if s == "\"NOT\" \"blocked\"")),
"Clause-leading NOT should degrade to literals; got {:?}",
compiled.binds
);
}
#[test]
fn fts5_query_bind_literalizes_or_not_sequence() {
let compiled = compile_query(
&QueryBuilder::nodes("Meeting")
.text_search("ship OR NOT blocked", 5)
.limit(5)
.into_ast(),
)
.expect("compiled query");
use crate::BindValue;
assert!(
compiled.binds.iter().any(
|b| matches!(b, BindValue::Text(s) if s == "\"ship\" \"OR\" \"NOT\" \"blocked\"")
),
"`OR NOT` should degrade to literals rather than emit invalid FTS5; got {:?}",
compiled.binds
);
}
#[test]
fn compile_retrieval_plan_accepts_search_step() {
use crate::{
CompileError, Predicate, QueryAst, QueryStep, TextQuery, compile_retrieval_plan,
};
let ast = QueryAst {
root_kind: "Goal".to_owned(),
steps: vec![
QueryStep::Search {
query: "ship quarterly docs".to_owned(),
limit: 7,
},
QueryStep::Filter(Predicate::KindEq("Goal".to_owned())),
],
expansions: vec![],
final_limit: None,
};
let plan = compile_retrieval_plan(&ast).expect("compiles");
assert_eq!(plan.text.strict.root_kind, "Goal");
assert_eq!(plan.text.strict.limit, 7);
assert_eq!(plan.text.strict.fusable_filters.len(), 1);
assert!(plan.text.strict.residual_filters.is_empty());
assert_eq!(
plan.text.strict.text_query,
TextQuery::And(vec![
TextQuery::Term("ship".into()),
TextQuery::Term("quarterly".into()),
TextQuery::Term("docs".into()),
])
);
let relaxed = plan.text.relaxed.as_ref().expect("relaxed branch present");
assert_eq!(
relaxed.text_query,
TextQuery::Or(vec![
TextQuery::Term("ship".into()),
TextQuery::Term("quarterly".into()),
TextQuery::Term("docs".into()),
])
);
assert_eq!(relaxed.fusable_filters.len(), 1);
assert!(!plan.was_degraded_at_plan_time);
let _ = std::any::TypeId::of::<CompileError>();
}
#[test]
fn compile_retrieval_plan_rejects_ast_without_search_step() {
use crate::{CompileError, QueryBuilder, compile_retrieval_plan};
let ast = QueryBuilder::nodes("Goal")
.filter_kind_eq("Goal")
.into_ast();
let result = compile_retrieval_plan(&ast);
assert!(
matches!(result, Err(CompileError::MissingSearchStep)),
"expected MissingSearchStep, got {result:?}"
);
}
#[test]
fn compile_retrieval_plan_rejects_ast_with_multiple_search_steps() {
use crate::{CompileError, QueryAst, QueryStep, compile_retrieval_plan};
let ast = QueryAst {
root_kind: "Goal".to_owned(),
steps: vec![
QueryStep::Search {
query: "alpha".to_owned(),
limit: 5,
},
QueryStep::Search {
query: "bravo".to_owned(),
limit: 10,
},
],
expansions: vec![],
final_limit: None,
};
let result = compile_retrieval_plan(&ast);
assert!(
matches!(result, Err(CompileError::MultipleSearchSteps)),
"expected MultipleSearchSteps, got {result:?}"
);
}
#[test]
fn compile_retrieval_plan_v1_always_leaves_vector_empty() {
use crate::{QueryAst, QueryStep, compile_retrieval_plan};
for query in ["ship quarterly docs", "single", "", " "] {
let ast = QueryAst {
root_kind: "Goal".to_owned(),
steps: vec![QueryStep::Search {
query: query.to_owned(),
limit: 10,
}],
expansions: vec![],
final_limit: None,
};
let plan = compile_retrieval_plan(&ast).expect("compiles");
assert!(
plan.vector.is_none(),
"Phase 12 v1 must always leave the vector branch empty (query = {query:?})"
);
}
}
#[test]
fn fts5_query_bind_preserves_lowercase_not_as_literal_text() {
let compiled = compile_query(
&QueryBuilder::nodes("Meeting")
.text_search("not a ship", 5)
.limit(5)
.into_ast(),
)
.expect("compiled query");
use crate::BindValue;
assert!(
compiled
.binds
.iter()
.any(|b| matches!(b, BindValue::Text(s) if s == "\"not\" \"a\" \"ship\"")),
"Lowercase not should remain a literal term sequence; got {:?}",
compiled.binds
);
}
}