1use crate::ast::*;
4use crate::error::QueryError;
5use crate::validate::{validate_with_warnings, MAX_DEPTH};
6
7const SYNTHETIC_RELATIONS: &[&str] = &[
9 "observed_as_candidate",
10 "observed_as_selected",
11 "observed_as_target",
12 "observed_as_signal",
13];
14
15fn is_synthetic(rel: &str) -> bool {
16 SYNTHETIC_RELATIONS.contains(&rel)
17}
18
19fn synthetic_role(rel: &str) -> Option<&'static str> {
20 match rel {
21 "observed_as_candidate" => Some("candidate"),
22 "observed_as_selected" => Some("selected"),
23 "observed_as_target" => Some("target"),
24 "observed_as_signal" => Some("signal"),
25 _ => None,
26 }
27}
28
29#[derive(Debug)]
31pub struct CompiledQuery {
32 pub sql: String,
33 pub params: Vec<QueryValue>,
34 pub return_vars: Vec<ReturnItem>,
35 pub warnings: Vec<String>,
36}
37
38pub struct CompileOptions {
40 pub scopes: Vec<String>,
42 pub max_limit: usize,
44}
45
46impl Default for CompileOptions {
47 fn default() -> Self {
48 Self {
49 scopes: Vec::new(),
50 max_limit: 500,
51 }
52 }
53}
54
55pub fn compile(query: &GqlQuery, opts: &CompileOptions) -> Result<CompiledQuery, QueryError> {
57 if query.pattern.elements.is_empty() {
58 return Err(QueryError::Compile("empty pattern".into()));
59 }
60
61 let mut query = query.clone();
63 let warnings = validate_with_warnings(&mut query)?;
64
65 let mut compiled = if query.pattern.has_variable_length() {
66 compile_variable_length(&query, opts)?
67 } else {
68 compile_fixed_length(&query, opts)?
69 };
70 compiled.warnings = warnings;
71 Ok(compiled)
72}
73
74fn namespace_filter(alias: &str, opts: &CompileOptions, params: &mut Vec<QueryValue>) -> String {
75 if opts.scopes.is_empty() {
76 String::new()
77 } else if opts.scopes.len() == 1 {
78 params.push(QueryValue::Text(opts.scopes[0].clone()));
79 format!(" AND {alias}.namespace = ?{}", params.len())
80 } else {
81 let placeholders: Vec<String> = opts
82 .scopes
83 .iter()
84 .map(|s| {
85 params.push(QueryValue::Text(s.clone()));
86 format!("?{}", params.len())
87 })
88 .collect();
89 format!(" AND {alias}.namespace IN ({})", placeholders.join(", "))
90 }
91}
92
93fn synthetic_endpoint_node_indices(
95 elements: &[PatternElement],
96) -> (
97 std::collections::HashSet<usize>,
98 std::collections::HashSet<usize>,
99) {
100 let mut source_set = std::collections::HashSet::new();
101 let mut target_set = std::collections::HashSet::new();
102 let mut node_idx = 0usize;
103 let mut prev_node_idx: Option<usize> = None;
104 for element in elements {
105 match element {
106 PatternElement::Node(_) => {
107 prev_node_idx = Some(node_idx);
108 node_idx += 1;
109 }
110 PatternElement::Edge(ep) => {
111 let has_synthetic = ep.relations.iter().any(|r| is_synthetic(r));
112 if has_synthetic {
113 if let Some(src_idx) = prev_node_idx {
114 source_set.insert(src_idx);
115 target_set.insert(node_idx);
117 }
118 }
119 }
120 }
121 }
122 (source_set, target_set)
123}
124
125fn compile_fixed_length(
127 query: &GqlQuery,
128 opts: &CompileOptions,
129) -> Result<CompiledQuery, QueryError> {
130 let mut params: Vec<QueryValue> = Vec::new();
131 let mut from_parts: Vec<String> = Vec::new();
132 let mut join_parts: Vec<String> = Vec::new();
133 let mut where_parts: Vec<String> = Vec::new();
134 let mut select_parts: Vec<String> = Vec::new();
135
136 let mut node_aliases: Vec<String> = Vec::new();
137 let mut edge_aliases: Vec<String> = Vec::new();
138 let mut var_to_alias: std::collections::HashMap<String, (String, VarKind)> =
139 std::collections::HashMap::new();
140
141 let (event_source_indices, note_target_indices) =
144 synthetic_endpoint_node_indices(&query.pattern.elements);
145
146 let mut node_idx = 0usize;
147 let mut edge_idx = 0usize;
148
149 for element in &query.pattern.elements {
150 match element {
151 PatternElement::Node(np) => {
152 let alias = format!("n{node_idx}");
153 node_aliases.push(alias.clone());
154
155 let is_event_source = event_source_indices.contains(&node_idx);
156 let is_note_target = note_target_indices.contains(&node_idx);
157
158 if node_idx == 0 {
159 if is_event_source {
160 from_parts.push(format!("events {alias}"));
161 } else {
162 if !is_note_target {
164 from_parts.push(format!("entities {alias}"));
165 }
166 }
167 }
168
169 if is_event_source {
170 let ns_filter = namespace_filter(&alias, opts, &mut params);
173 if !ns_filter.is_empty() {
174 where_parts.push(ns_filter.trim_start_matches(" AND ").to_string());
175 }
176 if let Some(ref kind) = np.kind {
178 params.push(QueryValue::Text(kind.clone()));
179 where_parts.push(format!("{alias}.kind = ?{}", params.len()));
180 }
181 if np.entity_type.is_some() {
183 return Err(QueryError::Compile(
184 "event nodes do not have an entity_type column".into(),
185 ));
186 }
187 if !np.properties.is_empty() {
188 return Err(QueryError::Compile(
189 "event nodes do not support inline property filters; \
190 use a WHERE clause on verb, outcome, or payload fields"
191 .into(),
192 ));
193 }
194 } else if is_note_target {
195 where_parts.push(format!("{alias}.deleted_at IS NULL"));
197
198 let ns_filter = namespace_filter(&alias, opts, &mut params);
199 if !ns_filter.is_empty() {
200 where_parts.push(ns_filter.trim_start_matches(" AND ").to_string());
201 }
202
203 if let Some(ref kind) = np.kind {
204 params.push(QueryValue::Text(kind.clone()));
205 where_parts.push(format!("{alias}.kind = ?{}", params.len()));
206 }
207
208 if np.entity_type.is_some() {
210 return Err(QueryError::Compile(
211 "observed note targets do not have an entity_type column".into(),
212 ));
213 }
214
215 let mut props: Vec<_> = np.properties.iter().collect();
216 props.sort_by_key(|(k, _)| k.as_str());
217 for (key, val) in props {
218 params.push(QueryValue::Text(val.clone()));
219 if key == "name" || key == "content" {
220 where_parts
221 .push(format!("{alias}.{key} = ?{} COLLATE NOCASE", params.len()));
222 } else {
223 where_parts.push(format!(
224 "json_extract({alias}.properties, '$.{}') = ?{} COLLATE NOCASE",
225 key.replace('\'', "''"),
226 params.len()
227 ));
228 }
229 }
230 } else {
231 where_parts.push(format!("{alias}.deleted_at IS NULL"));
232
233 let ns_filter = namespace_filter(&alias, opts, &mut params);
234 if !ns_filter.is_empty() {
235 where_parts.push(ns_filter.trim_start_matches(" AND ").to_string());
236 }
237
238 if let Some(ref kind) = np.kind {
239 params.push(QueryValue::Text(kind.clone()));
240 where_parts.push(format!("{alias}.kind = ?{}", params.len()));
241 }
242
243 if let Some(ref et) = np.entity_type {
244 params.push(QueryValue::Text(et.clone()));
245 where_parts.push(format!("{alias}.entity_type = ?{}", params.len()));
246 }
247
248 let mut props: Vec<_> = np.properties.iter().collect();
249 props.sort_by_key(|(k, _)| k.as_str());
250 for (key, val) in props {
251 params.push(QueryValue::Text(val.clone()));
252 if key == "name" {
253 where_parts
254 .push(format!("{alias}.name = ?{} COLLATE NOCASE", params.len()));
255 } else {
256 where_parts.push(format!(
257 "json_extract({alias}.properties, '$.{}') = ?{} COLLATE NOCASE",
258 key.replace('\'', "''"),
259 params.len()
260 ));
261 }
262 }
263 }
264
265 if let Some(ref var) = np.variable {
266 let kind = if is_event_source {
267 VarKind::EventNode
268 } else if is_note_target {
269 VarKind::NoteNode
270 } else {
271 VarKind::Node
272 };
273 var_to_alias.insert(var.clone(), (alias.clone(), kind));
274 }
275
276 node_idx += 1;
277 }
278 PatternElement::Edge(ep) => {
279 let e_alias = format!("e{edge_idx}");
280 let prev_node = &node_aliases[node_aliases.len() - 1];
281 let next_alias = format!("n{}", node_idx);
282
283 edge_aliases.push(e_alias.clone());
284
285 let has_synthetic = ep.relations.iter().any(|r| is_synthetic(r));
290 let has_canonical = ep.relations.iter().any(|r| !is_synthetic(r));
291 if has_synthetic && has_canonical {
292 return Err(QueryError::Compile(
293 "cannot mix synthetic observed_as_* relations with canonical edge relations \
294 in a single edge pattern"
295 .into(),
296 ));
297 }
298
299 if has_synthetic {
300 if !matches!(ep.direction, EdgeDirection::Out) {
304 return Err(QueryError::Compile(
305 "synthetic observed_as_* edges are always event → entity (outbound only)".into(),
306 ));
307 }
308 join_parts.push(format!(
309 "JOIN event_observations {e_alias} ON {e_alias}.event_id = {prev_node}.id"
310 ));
311 let roles: Vec<&'static str> = ep
313 .relations
314 .iter()
315 .filter_map(|r| synthetic_role(r))
316 .collect();
317 if roles.len() == 1 {
318 params.push(QueryValue::Text(roles[0].to_string()));
319 where_parts.push(format!("{e_alias}.role = ?{}", params.len()));
320 } else if roles.len() > 1 {
321 let placeholders: Vec<String> = roles
322 .iter()
323 .map(|r| {
324 params.push(QueryValue::Text(r.to_string()));
325 format!("?{}", params.len())
326 })
327 .collect();
328 where_parts
329 .push(format!("{e_alias}.role IN ({})", placeholders.join(", ")));
330 }
331 join_parts.push(format!(
338 "JOIN notes {next_alias} ON {next_alias}.id = {e_alias}.entity_id \
339 AND {e_alias}.referent_kind = 'note'"
340 ));
341 } else {
342 let (source_join, target_join) = match ep.direction {
344 EdgeDirection::Out => (
345 format!("{e_alias}.source_id = {prev_node}.id"),
346 "target_id",
347 ),
348 EdgeDirection::In => (
349 format!("{e_alias}.target_id = {prev_node}.id"),
350 "source_id",
351 ),
352 EdgeDirection::Both => (
353 format!(
354 "({e_alias}.source_id = {prev_node}.id OR {e_alias}.target_id = {prev_node}.id)"
355 ),
356 "CASE_BOTH",
357 ),
358 };
359
360 let next_join_col = if target_join == "CASE_BOTH" {
361 format!(
362 "CASE WHEN {e_alias}.source_id = {prev_node}.id THEN {e_alias}.target_id ELSE {e_alias}.source_id END"
363 )
364 } else {
365 format!("{e_alias}.{target_join}")
366 };
367
368 join_parts.push(format!(
369 "JOIN graph_edges {e_alias} ON {source_join} AND {e_alias}.deleted_at IS NULL"
370 ));
371
372 let ens_filter = namespace_filter(&e_alias, opts, &mut params);
373 if !ens_filter.is_empty() {
374 where_parts.push(ens_filter.trim_start_matches(" AND ").to_string());
375 }
376
377 join_parts.push(format!(
378 "JOIN entities {next_alias} ON {next_alias}.id = {next_join_col}"
379 ));
380
381 if !ep.relations.is_empty() {
382 if ep.relations.len() == 1 {
383 params.push(QueryValue::Text(ep.relations[0].clone()));
384 where_parts.push(format!("{e_alias}.relation = ?{}", params.len()));
385 } else {
386 let placeholders: Vec<String> = ep
387 .relations
388 .iter()
389 .map(|r| {
390 params.push(QueryValue::Text(r.clone()));
391 format!("?{}", params.len())
392 })
393 .collect();
394 where_parts.push(format!(
395 "{e_alias}.relation IN ({})",
396 placeholders.join(", ")
397 ));
398 }
399 }
400 }
401
402 if let Some(ref var) = ep.variable {
403 var_to_alias.insert(var.clone(), (e_alias.clone(), VarKind::Edge));
404 }
405
406 edge_idx += 1;
407 }
408 }
409 }
410
411 if let Some(where_sql) = compile_where_expr(&query.where_clause, &var_to_alias, &mut params)? {
413 where_parts.push(where_sql);
414 }
415
416 for item in &query.return_items {
418 let var = item.variable();
419 if let Some((alias, kind)) = var_to_alias.get(var) {
420 match item {
421 ReturnItem::Property(_, prop) => {
422 let col = property_to_column(prop, kind)?;
423 select_parts.push(format!("{alias}.{col} AS {var}_{prop}"));
424 }
425 ReturnItem::Variable(_) => match kind {
426 VarKind::Node => {
427 select_parts.push(format!(
428 "{alias}.id AS {var}_id, {alias}.namespace AS {var}_namespace, \
429 {alias}.kind AS {var}_kind, {alias}.entity_type AS {var}_entity_type, \
430 {alias}.name AS {var}_name, \
431 {alias}.properties AS {var}_properties, \
432 {alias}.created_at AS {var}_created_at, \
433 {alias}.updated_at AS {var}_updated_at"
434 ));
435 }
436 VarKind::NoteNode => {
437 select_parts.push(format!(
438 "{alias}.id AS {var}_id, {alias}.namespace AS {var}_namespace, \
439 {alias}.kind AS {var}_kind, {alias}.status AS {var}_status, \
440 {alias}.content AS {var}_content, \
441 {alias}.salience AS {var}_salience, \
442 {alias}.properties AS {var}_properties, \
443 {alias}.created_at AS {var}_created_at, \
444 {alias}.updated_at AS {var}_updated_at"
445 ));
446 }
447 VarKind::EventNode => {
448 select_parts.push(format!(
449 "{alias}.id AS {var}_id, {alias}.namespace AS {var}_namespace, \
450 {alias}.verb AS {var}_verb, {alias}.substrate AS {var}_substrate, \
451 {alias}.actor AS {var}_actor, {alias}.kind AS {var}_kind, \
452 {alias}.outcome AS {var}_outcome, \
453 {alias}.payload AS {var}_payload, \
454 {alias}.created_at AS {var}_created_at"
455 ));
456 }
457 VarKind::Edge => {
458 select_parts.push(format!(
459 "{alias}.id AS {var}_id, {alias}.source_id AS {var}_source, \
460 {alias}.target_id AS {var}_target, \
461 {alias}.relation AS {var}_relation, \
462 {alias}.weight AS {var}_weight"
463 ));
464 }
465 },
466 }
467 } else {
468 return Err(QueryError::Compile(format!(
469 "unknown variable '{var}' in RETURN clause"
470 )));
471 }
472 }
473
474 let limit = query.limit.unwrap_or(opts.max_limit).min(opts.max_limit);
475 let limit_i64 = i64::try_from(limit)
476 .map_err(|_| QueryError::InvalidInput("limit exceeds i64::MAX".into()))?;
477 params.push(QueryValue::Integer(limit_i64));
478
479 let sql = format!(
480 "SELECT {} FROM {} {} WHERE {} LIMIT ?{}",
481 select_parts.join(", "),
482 from_parts.join(", "),
483 join_parts.join(" "),
484 where_parts.join(" AND "),
485 params.len(),
486 );
487
488 Ok(CompiledQuery {
489 sql,
490 params,
491 return_vars: query.return_items.clone(),
492 warnings: Vec::new(),
493 })
494}
495
496fn compile_where_expr(
498 expr: &WhereExpr,
499 var_to_alias: &std::collections::HashMap<String, (String, VarKind)>,
500 params: &mut Vec<QueryValue>,
501) -> Result<Option<String>, QueryError> {
502 match expr {
503 WhereExpr::True => Ok(None),
504 WhereExpr::Condition(cond) => {
505 let sql = compile_single_condition(cond, var_to_alias, params)?;
506 Ok(Some(sql))
507 }
508 WhereExpr::And(l, r) => {
509 let ls = compile_where_expr(l, var_to_alias, params)?;
510 let rs = compile_where_expr(r, var_to_alias, params)?;
511 Ok(match (ls, rs) {
512 (None, None) => None,
513 (Some(s), None) | (None, Some(s)) => Some(s),
514 (Some(l), Some(r)) => Some(format!("{l} AND {r}")),
515 })
516 }
517 WhereExpr::Or(l, r) => {
518 let ls = compile_where_expr(l, var_to_alias, params)?;
519 let rs = compile_where_expr(r, var_to_alias, params)?;
520 Ok(match (ls, rs) {
521 (None, None) => None,
522 (Some(s), None) | (None, Some(s)) => Some(s),
523 (Some(l), Some(r)) => Some(format!("({l} OR {r})")),
524 })
525 }
526 }
527}
528
529fn compile_single_condition(
530 cond: &Condition,
531 var_to_alias: &std::collections::HashMap<String, (String, VarKind)>,
532 params: &mut Vec<QueryValue>,
533) -> Result<String, QueryError> {
534 let (alias, kind) = var_to_alias.get(&cond.variable).ok_or_else(|| {
535 QueryError::Compile(format!(
536 "unknown variable '{}' in WHERE clause",
537 cond.variable
538 ))
539 })?;
540
541 let col_expr = match kind {
542 VarKind::Node => {
543 if cond.property == "name"
544 || cond.property == "kind"
545 || cond.property == "entity_type"
546 || cond.property == "namespace"
547 {
548 format!("{alias}.{}", cond.property)
549 } else {
550 format!(
551 "json_extract({alias}.properties, '$.{}')",
552 cond.property.replace('\'', "''")
553 )
554 }
555 }
556 VarKind::NoteNode => {
557 if NOTE_COLUMNS.contains(&cond.property.as_str()) {
558 format!("{alias}.{}", cond.property)
559 } else {
560 format!(
561 "json_extract({alias}.properties, '$.{}')",
562 cond.property.replace('\'', "''")
563 )
564 }
565 }
566 VarKind::EventNode => {
567 if EVENT_COLUMNS.contains(&cond.property.as_str()) {
569 format!("{alias}.{}", cond.property)
570 } else {
571 return Err(QueryError::Validation(format!(
572 "event property '{}' not queryable; valid columns: {}",
573 cond.property,
574 EVENT_COLUMNS.join(", ")
575 )));
576 }
577 }
578 VarKind::Edge => match cond.property.as_str() {
579 "relation" | "weight" => format!("{alias}.{}", cond.property),
580 other => {
581 return Err(QueryError::Validation(format!(
582 "edge property '{other}' not queryable; use 'relation' or 'weight'"
583 )))
584 }
585 },
586 };
587
588 let op_str = match cond.op {
589 CompareOp::Eq => "=",
590 CompareOp::Neq => "!=",
591 CompareOp::Gt => ">",
592 CompareOp::Lt => "<",
593 CompareOp::Gte => ">=",
594 CompareOp::Lte => "<=",
595 CompareOp::Like => "LIKE",
596 };
597
598 let sql = match &cond.value {
599 ConditionValue::String(s) => {
600 params.push(QueryValue::Text(s.clone()));
601 let collate = if matches!(cond.op, CompareOp::Eq | CompareOp::Like) {
602 " COLLATE NOCASE"
603 } else {
604 ""
605 };
606 format!("{col_expr} {op_str} ?{}{}", params.len(), collate)
607 }
608 ConditionValue::Number(n) => {
609 if !n.is_finite() {
610 return Err(QueryError::InvalidInput(
611 "non-finite float (NaN or Infinity) is not a valid query parameter".into(),
612 ));
613 }
614 params.push(QueryValue::Float(*n));
615 format!("{col_expr} {op_str} ?{}", params.len())
616 }
617 ConditionValue::Bool(b) => {
618 params.push(QueryValue::Integer(if *b { 1 } else { 0 }));
619 format!("{col_expr} {op_str} ?{}", params.len())
620 }
621 };
622 Ok(sql)
623}
624
625fn expr_endpoint_set(
626 expr: &WhereExpr,
627 start_var: Option<&str>,
628 end_var: Option<&str>,
629) -> (bool, bool) {
630 match expr {
631 WhereExpr::True => (false, false),
632 WhereExpr::Condition(c) => {
633 let is_start = start_var == Some(c.variable.as_str());
634 let is_end = end_var == Some(c.variable.as_str());
635 (is_start, is_end)
636 }
637 WhereExpr::And(l, r) | WhereExpr::Or(l, r) => {
638 let (ls, le) = expr_endpoint_set(l, start_var, end_var);
639 let (rs, re) = expr_endpoint_set(r, start_var, end_var);
640 (ls || rs, le || re)
641 }
642 }
643}
644
645fn reject_or_spanning_endpoints(
647 expr: &WhereExpr,
648 start: &NodePattern,
649 end: &NodePattern,
650) -> Result<(), QueryError> {
651 let start_var = start.variable.as_deref();
652 let end_var = end.variable.as_deref();
653 reject_or_spanning_impl(expr, start_var, end_var)
654}
655
656fn reject_or_spanning_impl(
657 expr: &WhereExpr,
658 start_var: Option<&str>,
659 end_var: Option<&str>,
660) -> Result<(), QueryError> {
661 match expr {
662 WhereExpr::True | WhereExpr::Condition(_) => Ok(()),
663 WhereExpr::And(l, r) => {
664 reject_or_spanning_impl(l, start_var, end_var)?;
665 reject_or_spanning_impl(r, start_var, end_var)
666 }
667 WhereExpr::Or(l, r) => {
668 let (l_start, l_end) = expr_endpoint_set(l, start_var, end_var);
669 let (r_start, r_end) = expr_endpoint_set(r, start_var, end_var);
670 let spans_start = l_start || r_start;
671 let spans_end = l_end || r_end;
672 if spans_start && spans_end {
673 return Err(QueryError::Unsupported(
674 "WHERE clauses that span both endpoints in a variable-length pattern \
675 are not yet supported; rewrite as separate queries or restrict each \
676 OR branch to one endpoint"
677 .into(),
678 ));
679 }
680 reject_or_spanning_impl(l, start_var, end_var)?;
682 reject_or_spanning_impl(r, start_var, end_var)
683 }
684 }
685}
686
687fn compile_var_len_condition(
688 cond: &Condition,
689 start_var: Option<&str>,
690 end_var: Option<&str>,
691 params: &mut Vec<QueryValue>,
692) -> Result<(String, &'static str), QueryError> {
693 let col_alias = if start_var == Some(cond.variable.as_str()) {
694 "s"
695 } else if end_var == Some(cond.variable.as_str()) {
696 "r"
697 } else {
698 return Err(QueryError::Compile(format!(
699 "variable '{}' in WHERE not supported in variable-length pattern \
700 (only start/end node variables)",
701 cond.variable
702 )));
703 };
704
705 let col_expr =
706 if cond.property == "name" || cond.property == "kind" || cond.property == "entity_type" {
707 format!("{col_alias}.{}", cond.property)
708 } else {
709 format!(
710 "json_extract({col_alias}.properties, '$.{}')",
711 cond.property.replace('\'', "''")
712 )
713 };
714
715 let op_str = match cond.op {
716 CompareOp::Eq => "=",
717 CompareOp::Neq => "!=",
718 CompareOp::Gt => ">",
719 CompareOp::Lt => "<",
720 CompareOp::Gte => ">=",
721 CompareOp::Lte => "<=",
722 CompareOp::Like => "LIKE",
723 };
724
725 let sql = match &cond.value {
726 ConditionValue::String(s) => {
727 params.push(QueryValue::Text(s.clone()));
728 let collate = if matches!(cond.op, CompareOp::Eq | CompareOp::Like) {
729 " COLLATE NOCASE"
730 } else {
731 ""
732 };
733 format!("{col_expr} {op_str} ?{}{collate}", params.len())
734 }
735 ConditionValue::Number(n) => {
736 if !n.is_finite() {
737 return Err(QueryError::InvalidInput(
738 "non-finite float (NaN or Infinity) is not a valid query parameter".into(),
739 ));
740 }
741 params.push(QueryValue::Float(*n));
742 format!("{col_expr} {op_str} ?{}", params.len())
743 }
744 ConditionValue::Bool(b) => {
745 params.push(QueryValue::Integer(if *b { 1 } else { 0 }));
746 format!("{col_expr} {op_str} ?{}", params.len())
747 }
748 };
749 Ok((sql, col_alias))
750}
751
752fn compile_variable_length_where(
754 expr: &WhereExpr,
755 start_var: Option<&str>,
756 end_var: Option<&str>,
757 params: &mut Vec<QueryValue>,
758 start_conditions: &mut Vec<String>,
759 end_conditions: &mut Vec<String>,
760) -> Result<Option<String>, QueryError> {
761 match expr {
762 WhereExpr::True => Ok(None),
763 WhereExpr::Condition(cond) => {
764 let (sql, alias) = compile_var_len_condition(cond, start_var, end_var, params)?;
765 if alias == "s" {
766 start_conditions.push(sql);
767 } else {
768 end_conditions.push(sql);
769 }
770 Ok(None)
771 }
772 WhereExpr::And(l, r) => {
773 compile_variable_length_where(
774 l,
775 start_var,
776 end_var,
777 params,
778 start_conditions,
779 end_conditions,
780 )?;
781 compile_variable_length_where(
782 r,
783 start_var,
784 end_var,
785 params,
786 start_conditions,
787 end_conditions,
788 )?;
789 Ok(None)
790 }
791 WhereExpr::Or(l, r) => {
792 let l_sql = compile_variable_length_where_to_sql(l, start_var, end_var, params)?;
796 let r_sql = compile_variable_length_where_to_sql(r, start_var, end_var, params)?;
797 match (l_sql, r_sql) {
798 (None, None) => {}
799 (Some((ls, la)), None) => {
800 if la == "s" {
801 start_conditions.push(ls);
802 } else {
803 end_conditions.push(ls);
804 }
805 }
806 (None, Some((rs, ra))) => {
807 if ra == "s" {
808 start_conditions.push(rs);
809 } else {
810 end_conditions.push(rs);
811 }
812 }
813 (Some((ls, la)), Some((rs, _ra))) => {
814 let combined = format!("({ls} OR {rs})");
816 if la == "s" {
817 start_conditions.push(combined);
818 } else {
819 end_conditions.push(combined);
820 }
821 }
822 }
823 Ok(None)
824 }
825 }
826}
827
828fn compile_variable_length_where_to_sql(
830 expr: &WhereExpr,
831 start_var: Option<&str>,
832 end_var: Option<&str>,
833 params: &mut Vec<QueryValue>,
834) -> Result<Option<(String, &'static str)>, QueryError> {
835 match expr {
836 WhereExpr::True => Ok(None),
837 WhereExpr::Condition(cond) => {
838 let (sql, alias) = compile_var_len_condition(cond, start_var, end_var, params)?;
839 Ok(Some((sql, alias)))
840 }
841 WhereExpr::And(l, r) => {
842 let ls = compile_variable_length_where_to_sql(l, start_var, end_var, params)?;
843 let rs = compile_variable_length_where_to_sql(r, start_var, end_var, params)?;
844 Ok(match (ls, rs) {
845 (None, None) => None,
846 (Some(s), None) | (None, Some(s)) => Some(s),
847 (Some((lsql, la)), Some((rsql, _))) => Some((format!("{lsql} AND {rsql}"), la)),
848 })
849 }
850 WhereExpr::Or(l, r) => {
851 let ls = compile_variable_length_where_to_sql(l, start_var, end_var, params)?;
852 let rs = compile_variable_length_where_to_sql(r, start_var, end_var, params)?;
853 Ok(match (ls, rs) {
854 (None, None) => None,
855 (Some(s), None) | (None, Some(s)) => Some(s),
856 (Some((lsql, la)), Some((rsql, _))) => Some((format!("({lsql} OR {rsql})"), la)),
857 })
858 }
859 }
860}
861
862fn compile_variable_length(
864 query: &GqlQuery,
865 opts: &CompileOptions,
866) -> Result<CompiledQuery, QueryError> {
867 let mut params: Vec<QueryValue> = Vec::new();
868 let mut var_to_alias: std::collections::HashMap<String, (String, VarKind)> =
869 std::collections::HashMap::new();
870
871 let nodes: Vec<&NodePattern> = query.pattern.nodes().collect();
875 let edges: Vec<&EdgePattern> = query.pattern.edges().collect();
876
877 if nodes.len() != 2 || edges.len() != 1 || query.pattern.elements.len() != 3 {
878 return Err(QueryError::Unsupported(
879 "variable-length patterns must be a single start_node -[*N..M]-> end_node \
880 (mixed fixed/variable chains are not yet implemented)"
881 .into(),
882 ));
883 }
884
885 let start = &nodes[0];
886 let edge = &edges[0];
887 let end = &nodes[1];
888
889 if edge.relations.iter().any(|r| is_synthetic(r)) {
894 return Err(QueryError::Unsupported(
895 "synthetic observed_as_* edges cannot be variable-length; \
896 use a fixed-length edge pattern instead"
897 .into(),
898 ));
899 }
900
901 let max_depth = edge.max_hops.min(MAX_DEPTH);
903 let min_depth = edge.min_hops;
904
905 let mut start_conditions: Vec<String> = vec!["s.deleted_at IS NULL".to_string()];
907 let ns_filter = namespace_filter("s", opts, &mut params);
908 if !ns_filter.is_empty() {
909 start_conditions.push(ns_filter.trim_start_matches(" AND ").to_string());
910 }
911
912 if let Some(ref kind) = start.kind {
913 params.push(QueryValue::Text(kind.clone()));
914 start_conditions.push(format!("s.kind = ?{}", params.len()));
915 }
916 if let Some(ref et) = start.entity_type {
917 params.push(QueryValue::Text(et.clone()));
918 start_conditions.push(format!("s.entity_type = ?{}", params.len()));
919 }
920 let mut start_props: Vec<_> = start.properties.iter().collect();
921 start_props.sort_by_key(|(k, _)| k.as_str());
922 for (key, val) in start_props {
923 params.push(QueryValue::Text(val.clone()));
924 if key == "name" {
925 start_conditions.push(format!("s.name = ?{} COLLATE NOCASE", params.len()));
926 } else {
927 start_conditions.push(format!(
928 "json_extract(s.properties, '$.{}') = ?{} COLLATE NOCASE",
929 key.replace('\'', "''"),
930 params.len()
931 ));
932 }
933 }
934
935 let mut relation_condition = String::new();
937 if !edge.relations.is_empty() {
938 if edge.relations.len() == 1 {
939 params.push(QueryValue::Text(edge.relations[0].clone()));
940 relation_condition = format!(" AND e.relation = ?{}", params.len());
941 } else {
942 let placeholders: Vec<String> = edge
943 .relations
944 .iter()
945 .map(|r| {
946 params.push(QueryValue::Text(r.clone()));
947 format!("?{}", params.len())
948 })
949 .collect();
950 relation_condition = format!(" AND e.relation IN ({})", placeholders.join(", "));
951 }
952 }
953
954 let e_ns_filter = namespace_filter("e", opts, &mut params);
956
957 let (seed_join, seed_next, recurse_join, recurse_next) = match edge.direction {
959 EdgeDirection::Out => (
960 "e.source_id = s.id",
961 "e.target_id",
962 "e.source_id = t.current_id",
963 "e.target_id",
964 ),
965 EdgeDirection::In => (
966 "e.target_id = s.id",
967 "e.source_id",
968 "e.target_id = t.current_id",
969 "e.source_id",
970 ),
971 EdgeDirection::Both => (
972 "(e.source_id = s.id OR e.target_id = s.id)",
973 "CASE WHEN e.source_id = s.id THEN e.target_id ELSE e.source_id END",
974 "(e.source_id = t.current_id OR e.target_id = t.current_id)",
975 "CASE WHEN e.source_id = t.current_id THEN e.target_id ELSE e.source_id END",
976 ),
977 };
978
979 let next_node_ns_filter = namespace_filter("next_node", opts, &mut params);
984
985 let max_depth_i64 = i64::try_from(max_depth)
986 .map_err(|_| QueryError::InvalidInput("max_depth exceeds i64::MAX".into()))?;
987 params.push(QueryValue::Integer(max_depth_i64));
988 let depth_param = params.len();
989
990 let mut end_conditions: Vec<String> = vec!["r.deleted_at IS NULL".to_string()];
994 let r_ns_filter = namespace_filter("r", opts, &mut params);
995 if !r_ns_filter.is_empty() {
996 end_conditions.push(r_ns_filter.trim_start_matches(" AND ").to_string());
997 }
998 if let Some(ref kind) = end.kind {
999 params.push(QueryValue::Text(kind.clone()));
1000 end_conditions.push(format!("r.kind = ?{}", params.len()));
1001 }
1002 if let Some(ref et) = end.entity_type {
1003 params.push(QueryValue::Text(et.clone()));
1004 end_conditions.push(format!("r.entity_type = ?{}", params.len()));
1005 }
1006 let mut end_props: Vec<_> = end.properties.iter().collect();
1007 end_props.sort_by_key(|(k, _)| k.as_str());
1008 for (key, val) in end_props {
1009 params.push(QueryValue::Text(val.clone()));
1010 if key == "name" {
1011 end_conditions.push(format!("r.name = ?{} COLLATE NOCASE", params.len()));
1012 } else {
1013 end_conditions.push(format!(
1014 "json_extract(r.properties, '$.{}') = ?{} COLLATE NOCASE",
1015 key.replace('\'', "''"),
1016 params.len()
1017 ));
1018 }
1019 }
1020
1021 reject_or_spanning_endpoints(&query.where_clause, start, end)?;
1025
1026 if let Some(where_sql) = compile_variable_length_where(
1030 &query.where_clause,
1031 start.variable.as_deref(),
1032 end.variable.as_deref(),
1033 &mut params,
1034 &mut start_conditions,
1035 &mut end_conditions,
1036 )? {
1037 start_conditions.push(where_sql);
1041 }
1042
1043 if min_depth > 0 {
1045 let min_depth_i64 = i64::try_from(min_depth)
1046 .map_err(|_| QueryError::InvalidInput("min_depth exceeds i64::MAX".into()))?;
1047 params.push(QueryValue::Integer(min_depth_i64));
1048 end_conditions.push(format!("t.depth >= ?{}", params.len()));
1049 }
1050
1051 let limit = query.limit.unwrap_or(opts.max_limit).min(opts.max_limit);
1052 let limit_i64 = i64::try_from(limit)
1053 .map_err(|_| QueryError::InvalidInput("limit exceeds i64::MAX".into()))?;
1054 params.push(QueryValue::Integer(limit_i64));
1055 let limit_param = params.len();
1056
1057 if let Some(ref var) = start.variable {
1059 var_to_alias.insert(var.clone(), ("s".to_string(), VarKind::Node));
1060 }
1061 if let Some(ref var) = end.variable {
1062 var_to_alias.insert(var.clone(), ("r".to_string(), VarKind::Node));
1063 }
1064 if let Some(ref var) = edge.variable {
1065 var_to_alias.insert(var.clone(), ("e".to_string(), VarKind::Edge));
1066 }
1067
1068 let mut select_parts: Vec<String> = Vec::new();
1070 let mut has_start = false;
1071
1072 for item in &query.return_items {
1073 let var = item.variable();
1074 if let Some((_, kind)) = var_to_alias.get(var) {
1075 match item {
1076 ReturnItem::Property(_, prop) => {
1077 let is_start = start.variable.as_deref() == Some(var);
1078 if matches!(kind, VarKind::EventNode | VarKind::NoteNode) {
1079 return Err(QueryError::Unsupported(
1080 "synthetic observed_as_* edges cannot be used in variable-length \
1081 patterns; use a fixed-length edge pattern instead"
1082 .into(),
1083 ));
1084 }
1085 if *kind == VarKind::Node {
1086 let tbl = if is_start { "s" } else { "r" };
1087 if is_start {
1088 has_start = true;
1089 }
1090 let col = property_to_column(prop, kind)?;
1091 select_parts.push(format!("{tbl}.{col} AS {var}_{prop}"));
1092 } else {
1093 let col = match prop.as_str() {
1094 "id" => "via_edge",
1095 "relation" => "via_relation",
1096 "weight" => "via_weight",
1097 _ => {
1098 return Err(QueryError::Compile(format!(
1099 "unknown edge property '{prop}' in RETURN projection. \
1100 Valid: id, source_id, target_id, relation, weight"
1101 )));
1102 }
1103 };
1104 select_parts.push(format!("t.{col} AS {var}_{prop}"));
1105 }
1106 }
1107 ReturnItem::Variable(_) => match kind {
1108 VarKind::Node => {
1109 if start.variable.as_deref() == Some(var) {
1110 has_start = true;
1111 select_parts.push(format!(
1112 "s.id AS {var}_id, s.namespace AS {var}_namespace, \
1113 s.kind AS {var}_kind, s.entity_type AS {var}_entity_type, \
1114 s.name AS {var}_name, \
1115 s.properties AS {var}_properties, \
1116 s.created_at AS {var}_created_at, \
1117 s.updated_at AS {var}_updated_at"
1118 ));
1119 } else {
1120 select_parts.push(format!(
1121 "r.id AS {var}_id, r.namespace AS {var}_namespace, \
1122 r.kind AS {var}_kind, r.entity_type AS {var}_entity_type, \
1123 r.name AS {var}_name, \
1124 r.properties AS {var}_properties, \
1125 r.created_at AS {var}_created_at, \
1126 r.updated_at AS {var}_updated_at"
1127 ));
1128 }
1129 }
1130 VarKind::EventNode | VarKind::NoteNode => {
1131 return Err(QueryError::Unsupported(
1134 "synthetic observed_as_* edges cannot be used in variable-length \
1135 patterns; use a fixed-length edge pattern instead"
1136 .into(),
1137 ));
1138 }
1139 VarKind::Edge => {
1140 select_parts.push(format!(
1141 "t.via_edge AS {var}_id, t.via_relation AS {var}_relation, \
1142 t.via_weight AS {var}_weight"
1143 ));
1144 }
1145 },
1146 }
1147 } else {
1148 return Err(QueryError::Compile(format!(
1149 "unknown variable '{var}' in RETURN clause"
1150 )));
1151 }
1152 }
1153
1154 select_parts.push("t.depth AS _depth".to_string());
1156 select_parts.push("t.total_weight AS _total_weight".to_string());
1157
1158 let join_start = if has_start {
1163 "JOIN entities s ON s.id = t.start_id"
1164 } else {
1165 ""
1166 };
1167 let join_end = "JOIN entities r ON r.id = t.current_id";
1168
1169 let next_node_ns_and = if next_node_ns_filter.is_empty() {
1172 String::new()
1173 } else {
1174 format!(" AND {}", next_node_ns_filter.trim_start_matches(" AND "))
1175 };
1176
1177 let sql = format!(
1178 "WITH RECURSIVE traverse(start_id, current_id, depth, path, total_weight, via_edge, via_relation, via_weight) AS (\
1179 SELECT s.id, {seed_next}, 1, s.id || ',' || {seed_next}, e.weight, \
1180 e.id, e.relation, e.weight \
1181 FROM entities s \
1182 JOIN graph_edges e ON {seed_join} AND e.deleted_at IS NULL{e_ns_filter}{relation_condition} \
1183 WHERE {start_where} \
1184 UNION ALL \
1185 SELECT t.start_id, {recurse_next}, t.depth + 1, \
1186 t.path || ',' || {recurse_next}, \
1187 t.total_weight + e.weight, \
1188 e.id, e.relation, e.weight \
1189 FROM traverse t \
1190 JOIN graph_edges e ON {recurse_join} AND e.deleted_at IS NULL{e_ns_filter}{relation_condition} \
1191 JOIN entities next_node ON next_node.id = ({recurse_next}) \
1192 AND next_node.deleted_at IS NULL{next_node_ns_and} \
1193 WHERE t.depth < ?{depth_param} \
1194 AND (',' || t.path || ',') NOT LIKE '%,' || {recurse_next} || ',%' \
1195 ) \
1196 SELECT DISTINCT {select_cols} \
1197 FROM traverse t \
1198 {join_start} {join_end} \
1199 WHERE {end_where} \
1200 ORDER BY t.depth, t.total_weight DESC, t.start_id, t.current_id \
1201 LIMIT ?{limit_param}",
1202 seed_next = seed_next,
1203 seed_join = seed_join,
1204 e_ns_filter = e_ns_filter,
1205 relation_condition = relation_condition,
1206 start_where = start_conditions.join(" AND "),
1207 recurse_next = recurse_next,
1208 recurse_join = recurse_join,
1209 next_node_ns_and = next_node_ns_and,
1210 depth_param = depth_param,
1211 select_cols = select_parts.join(", "),
1212 join_start = join_start,
1213 join_end = join_end,
1214 end_where = end_conditions.join(" AND "),
1215 limit_param = limit_param,
1216 );
1217
1218 Ok(CompiledQuery {
1219 sql,
1220 params,
1221 return_vars: query.return_items.clone(),
1222 warnings: Vec::new(),
1223 })
1224}
1225
1226#[derive(Clone, Copy, PartialEq, Eq)]
1227enum VarKind {
1228 Node,
1229 EventNode,
1231 NoteNode,
1233 Edge,
1234}
1235
1236const NODE_COLUMNS: &[&str] = &[
1237 "id",
1238 "name",
1239 "kind",
1240 "entity_type",
1241 "namespace",
1242 "description",
1243 "properties",
1244 "created_at",
1245 "updated_at",
1246];
1247const NOTE_COLUMNS: &[&str] = &[
1249 "id",
1250 "namespace",
1251 "kind",
1252 "status",
1253 "name",
1254 "content",
1255 "salience",
1256 "decay_factor",
1257 "properties",
1258 "created_at",
1259 "updated_at",
1260];
1261const EVENT_COLUMNS: &[&str] = &[
1263 "id",
1264 "namespace",
1265 "verb",
1266 "substrate",
1267 "actor",
1268 "kind",
1269 "outcome",
1270 "payload",
1271 "duration_us",
1272 "target_id",
1273 "session_id",
1274 "created_at",
1275];
1276const EDGE_COLUMNS: &[&str] = &["id", "source_id", "target_id", "relation", "weight"];
1277
1278fn property_to_column<'a>(prop: &'a str, kind: &VarKind) -> Result<&'a str, QueryError> {
1279 let (valid, kind_name) = match kind {
1280 VarKind::Node => (NODE_COLUMNS, "node"),
1281 VarKind::NoteNode => (NOTE_COLUMNS, "note"),
1282 VarKind::EventNode => (EVENT_COLUMNS, "event"),
1283 VarKind::Edge => (EDGE_COLUMNS, "edge"),
1284 };
1285 if valid.contains(&prop) {
1286 Ok(prop)
1287 } else {
1288 Err(QueryError::Compile(format!(
1289 "unknown {kind_name} property '{prop}' in RETURN projection. \
1290 Valid: {}",
1291 valid.join(", ")
1292 )))
1293 }
1294}
1295
1296#[cfg(test)]
1301mod tests {
1302 use super::*;
1303 use crate::parsers::gql;
1304
1305 fn opts() -> CompileOptions {
1306 CompileOptions::default()
1307 }
1308
1309 fn scoped(namespace: &str) -> CompileOptions {
1310 CompileOptions {
1311 scopes: vec![namespace.to_string()],
1312 max_limit: 500,
1313 }
1314 }
1315
1316 #[test]
1317 fn fixed_length_basic() {
1318 let q =
1319 gql::parse("MATCH (a:concept)-[e:introduced_by]->(b:paper) RETURN a, e, b LIMIT 10")
1320 .unwrap();
1321 let compiled = compile(&q, &opts()).unwrap();
1322 assert!(compiled.sql.contains("JOIN graph_edges"));
1323 assert!(compiled.sql.contains("LIMIT"));
1324 assert_eq!(
1325 compiled.return_vars,
1326 vec![
1327 ReturnItem::Variable("a".into()),
1328 ReturnItem::Variable("e".into()),
1329 ReturnItem::Variable("b".into()),
1330 ]
1331 );
1332 assert!(!compiled.sql.contains("WITH RECURSIVE"));
1334 }
1335
1336 #[test]
1337 fn namespace_scoping_injected() {
1338 let q =
1340 gql::parse("MATCH (a:concept)-[e:introduced_by]->(b:paper) RETURN a LIMIT 5").unwrap();
1341 let compiled = compile(&q, &scoped("research")).unwrap();
1342 assert!(compiled.sql.contains("namespace"));
1343 let has_ns_param = compiled
1345 .params
1346 .iter()
1347 .any(|p| matches!(p, QueryValue::Text(s) if s == "research"));
1348 assert!(has_ns_param, "namespace must be a bound parameter");
1349 }
1350
1351 #[test]
1352 fn edge_property_whitelist_rejects_unknown() {
1353 let q = gql::parse("MATCH (a)-[e:introduced_by]->(b) WHERE e.source_id = 'x' RETURN a")
1355 .unwrap();
1356 let result = compile(&q, &opts());
1357 assert!(result.is_err());
1358 let err = result.unwrap_err().to_string();
1359 assert!(
1360 err.contains("source_id") || err.contains("not queryable"),
1361 "error: {err}"
1362 );
1363 }
1364
1365 #[test]
1366 fn edge_property_relation_allowed() {
1367 let q = gql::parse("MATCH (a)-[e]->(b) WHERE e.relation = 'extends' RETURN a").unwrap();
1368 let result = compile(&q, &opts());
1369 assert!(
1370 result.is_ok(),
1371 "relation should be allowed: {:?}",
1372 result.err()
1373 );
1374 }
1375
1376 #[test]
1377 fn edge_property_weight_allowed() {
1378 let q = gql::parse("MATCH (a)-[e]->(b) WHERE e.weight > 0.5 RETURN a").unwrap();
1379 let result = compile(&q, &opts());
1380 assert!(
1381 result.is_ok(),
1382 "weight should be allowed: {:?}",
1383 result.err()
1384 );
1385 }
1386
1387 #[test]
1388 fn variable_length_uses_cte() {
1389 let q =
1390 gql::parse("MATCH (a {name: 'LoRA'})-[:extends*1..3]->(b) RETURN b LIMIT 20").unwrap();
1391 let compiled = compile(&q, &opts()).unwrap();
1392 assert!(compiled.sql.contains("WITH RECURSIVE"));
1393 assert!(compiled.sql.contains("traverse"));
1394 }
1395
1396 #[test]
1397 fn depth_cap_at_ten_rejects_above_max() {
1398 let q = gql::parse("MATCH (a)-[:extends*1..50]->(b) RETURN b").unwrap();
1401 let err = compile(&q, &opts()).unwrap_err();
1402 assert!(
1403 matches!(err, QueryError::InvalidInput(_)),
1404 "expected InvalidInput for depth > 10, got {err:?}"
1405 );
1406 }
1407
1408 #[test]
1409 fn depth_within_cap_compiles() {
1410 let q = gql::parse("MATCH (a)-[:extends*1..10]->(b) RETURN b").unwrap();
1412 let compiled = compile(&q, &opts()).unwrap();
1413 assert!(compiled.sql.contains("WITH RECURSIVE"));
1414 let depth_val = compiled.params.iter().find_map(|p| {
1416 if let QueryValue::Integer(n) = p {
1417 Some(*n)
1418 } else {
1419 None
1420 }
1421 });
1422 assert_eq!(depth_val, Some(10), "depth param should be 10");
1423 }
1424
1425 #[test]
1426 fn limit_capped_by_max_limit() {
1427 let q = gql::parse("MATCH (a:concept)-[e]->(b) RETURN a LIMIT 1000").unwrap();
1429 let compiled = compile(&q, &opts()).unwrap();
1430 let limit_param = compiled.params.last().unwrap();
1431 assert!(
1432 matches!(limit_param, QueryValue::Integer(500)),
1433 "expected Integer(500), got {limit_param:?}"
1434 );
1435 }
1436
1437 #[test]
1438 fn compile_rejects_unknown_relation() {
1439 let q = gql::parse("MATCH (a)-[:not_a_relation]->(b) RETURN a").unwrap();
1440 let err = compile(&q, &opts()).unwrap_err();
1441 let msg = err.to_string();
1442 assert!(msg.contains("not_a_relation"), "msg: {msg}");
1443 }
1444
1445 #[test]
1446 fn compile_unknown_kind_passes_through() {
1447 let q = gql::parse("MATCH (a:gizmo)-[:extends]->(b) RETURN a").unwrap();
1450 let compiled = compile(&q, &opts()).unwrap();
1451 let has_gizmo = compiled
1452 .params
1453 .iter()
1454 .any(|p| matches!(p, QueryValue::Text(s) if s == "gizmo"));
1455 assert!(
1456 has_gizmo,
1457 "pack-agnostic: unknown kind must pass through into SQL params"
1458 );
1459 }
1460
1461 #[test]
1462 fn compile_kind_passes_through_unchanged() {
1463 let q =
1466 gql::parse("MATCH (a:paper)-[:introduced_by]->(b:concept) RETURN a LIMIT 1").unwrap();
1467 let compiled = compile(&q, &opts()).unwrap();
1468 let has_paper = compiled
1469 .params
1470 .iter()
1471 .any(|p| matches!(p, QueryValue::Text(s) if s == "paper"));
1472 assert!(
1473 has_paper,
1474 "kind 'paper' must pass through unchanged into SQL params"
1475 );
1476 }
1477
1478 #[test]
1479 fn compile_rejects_namespace_in_where() {
1480 let q =
1481 gql::parse("MATCH (a:concept)-[:extends]->(b) WHERE a.namespace = 'other' RETURN a")
1482 .unwrap();
1483 let err = compile(&q, &opts()).unwrap_err();
1484 assert!(err.to_string().contains("namespace"), "msg: {err}");
1485 }
1486
1487 #[test]
1488 fn compile_rejects_unknown_relation_in_where() {
1489 let q = gql::parse("MATCH (a)-[e:extends]->(b) WHERE e.relation = 'related_to' RETURN a")
1490 .unwrap();
1491 let err = compile(&q, &opts()).unwrap_err();
1492 assert!(err.to_string().contains("related_to"), "msg: {err}");
1493 }
1494
1495 #[test]
1496 fn compile_kind_in_where_passes_through_unchanged() {
1497 let q = gql::parse("MATCH (a)-[:extends]->(b) WHERE a.kind = 'paper' RETURN a").unwrap();
1499 let compiled = compile(&q, &opts()).unwrap();
1500 let has_paper = compiled
1501 .params
1502 .iter()
1503 .any(|p| matches!(p, QueryValue::Text(s) if s == "paper"));
1504 assert!(
1505 has_paper,
1506 "kind 'paper' must pass through unchanged into SQL params"
1507 );
1508 }
1509
1510 #[test]
1511 fn variable_length_return_start_only_joins_end_entity() {
1512 let q = gql::parse("MATCH (a:concept)-[:extends*1..3]->(b) RETURN a LIMIT 10").unwrap();
1516 let compiled = compile(&q, &opts()).unwrap();
1517 assert!(
1518 compiled.sql.contains("JOIN entities r"),
1519 "entities r must always be joined when r.* conditions are emitted; sql: {}",
1520 compiled.sql
1521 );
1522 }
1523
1524 #[test]
1525 fn variable_length_trailing_pattern_unsupported() {
1526 let q = gql::parse("MATCH (a)-[:extends*1..3]->(b)-[:implements]->(c) RETURN b").unwrap();
1527 let err = compile(&q, &opts()).unwrap_err();
1528 assert!(
1529 matches!(err, QueryError::Unsupported(_)),
1530 "expected Unsupported, got {err:?}"
1531 );
1532 }
1533
1534 #[test]
1535 fn variable_length_mixed_chain_unsupported() {
1536 let q = gql::parse("MATCH (a)-[:extends]->(b)-[:implements*1..2]->(c) RETURN c").unwrap();
1539 let err = compile(&q, &opts()).unwrap_err();
1540 assert!(matches!(err, QueryError::Unsupported(_)), "got {err:?}");
1541 }
1542
1543 #[test]
1544 fn sparql_star_rejected_as_unsupported() {
1545 use crate::parsers::sparql;
1546 let err = sparql::parse("SELECT ?a ?b WHERE { ?a :extends* ?b . }").unwrap_err();
1547 assert!(matches!(err, QueryError::Unsupported(_)), "got {err:?}");
1548 }
1549
1550 #[test]
1553 fn sparql_subject_object_direction_compiles_outbound() {
1554 use crate::parsers::sparql;
1555
1556 let q = sparql::parse("SELECT ?a ?b WHERE { ?a :extends ?b . }").unwrap();
1557 let compiled = compile(&q, &opts()).unwrap();
1558
1559 assert!(
1560 compiled
1561 .sql
1562 .contains("JOIN graph_edges e0 ON e0.source_id = n0.id"),
1563 "SPARQL subject must bind graph_edges.source_id; sql: {}",
1564 compiled.sql
1565 );
1566 assert!(
1567 compiled
1568 .sql
1569 .contains("JOIN entities n1 ON n1.id = e0.target_id"),
1570 "SPARQL object must bind graph_edges.target_id; sql: {}",
1571 compiled.sql
1572 );
1573 assert!(
1574 compiled.sql.contains("e0.relation = ?1"),
1575 "SPARQL predicate must bind graph_edges.relation; sql: {}",
1576 compiled.sql
1577 );
1578 }
1579
1580 #[test]
1581 fn return_property_projection_compiles() {
1582 let q =
1583 gql::parse("MATCH (a:concept)-[e:extends]->(b:concept) RETURN a.name, b.name LIMIT 5")
1584 .unwrap();
1585 let compiled = compile(&q, &opts()).unwrap();
1586 assert!(
1588 compiled.sql.contains(".name AS a_name"),
1589 "sql: {}",
1590 compiled.sql
1591 );
1592 assert!(
1593 compiled.sql.contains(".name AS b_name"),
1594 "sql: {}",
1595 compiled.sql
1596 );
1597 assert!(
1598 !compiled.sql.contains("a_kind"),
1599 "should not emit full node columns"
1600 );
1601 }
1602
1603 #[test]
1604 fn return_unknown_node_property_rejected() {
1605 let q = gql::parse("MATCH (a:concept)-[:extends]->(b) RETURN a.domain LIMIT 5").unwrap();
1606 let err = compile(&q, &opts()).unwrap_err();
1607 assert!(
1608 matches!(err, QueryError::Compile(ref msg) if msg.contains("unknown node property 'domain'")),
1609 "got {err:?}"
1610 );
1611 }
1612
1613 #[test]
1614 fn return_unknown_edge_property_rejected() {
1615 let q = gql::parse("MATCH (a)-[e:extends]->(b) RETURN e.label LIMIT 5").unwrap();
1616 let err = compile(&q, &opts()).unwrap_err();
1617 assert!(
1618 matches!(err, QueryError::Compile(ref msg) if msg.contains("unknown edge property 'label'")),
1619 "got {err:?}"
1620 );
1621 }
1622
1623 #[test]
1624 fn return_valid_edge_property_compiles() {
1625 let q =
1626 gql::parse("MATCH (a)-[e:extends]->(b) RETURN e.relation, e.weight LIMIT 5").unwrap();
1627 let compiled = compile(&q, &opts()).unwrap();
1628 assert!(
1630 compiled.sql.contains(".relation AS e_relation"),
1631 "sql: {}",
1632 compiled.sql
1633 );
1634 assert!(
1635 compiled.sql.contains(".weight AS e_weight"),
1636 "sql: {}",
1637 compiled.sql
1638 );
1639 }
1640
1641 #[test]
1642 fn entity_type_compiles_as_direct_column_not_json_extract() {
1643 let q = gql::parse("MATCH (n:document {entity_type: 'paper'})-[:extends]->(m) RETURN n")
1646 .unwrap();
1647 let compiled = compile(&q, &opts()).unwrap();
1648 assert!(
1649 compiled.sql.contains(".entity_type = ?"),
1650 "entity_type must compile to a direct column comparison; sql: {}",
1651 compiled.sql
1652 );
1653 assert!(
1654 !compiled.sql.contains("json_extract"),
1655 "entity_type must NOT use json_extract; sql: {}",
1656 compiled.sql
1657 );
1658 let has_paper_param = compiled
1659 .params
1660 .iter()
1661 .any(|p| matches!(p, QueryValue::Text(s) if s == "paper"));
1662 assert!(
1663 has_paper_param,
1664 "entity_type value 'paper' must appear as a bound parameter"
1665 );
1666 }
1667
1668 #[test]
1671 fn where_or_compiles_to_sql_or() {
1672 let q = gql::parse(
1673 "MATCH (a:concept)-[e:extends]->(b) WHERE a.name = 'LoRA' OR a.name = 'QLoRA' RETURN a",
1674 )
1675 .unwrap();
1676 let compiled = compile(&q, &opts()).unwrap();
1677 assert!(
1678 compiled.sql.contains(" OR "),
1679 "WHERE OR must produce SQL OR; sql: {}",
1680 compiled.sql
1681 );
1682 let has_lora = compiled
1683 .params
1684 .iter()
1685 .any(|p| matches!(p, QueryValue::Text(s) if s == "LoRA"));
1686 let has_qlora = compiled
1687 .params
1688 .iter()
1689 .any(|p| matches!(p, QueryValue::Text(s) if s == "QLoRA"));
1690 assert!(has_lora && has_qlora, "both OR values must be bound params");
1691 }
1692
1693 #[test]
1694 fn where_and_or_precedence() {
1695 let q = gql::parse(
1697 "MATCH (a:concept)-[e:extends]->(b) WHERE a.name = 'X' AND a.kind = 'concept' OR b.kind = 'project' RETURN a"
1698 ).unwrap();
1699 let compiled = compile(&q, &opts()).unwrap();
1700 assert!(
1702 compiled.sql.contains(" OR "),
1703 "expected OR in sql; sql: {}",
1704 compiled.sql
1705 );
1706 }
1707
1708 #[test]
1711 fn synthetic_edge_joins_event_observations() {
1712 let q = gql::parse("MATCH (ev)-[:observed_as_selected]->(m:memory) RETURN ev, m").unwrap();
1713 let compiled = compile(&q, &opts()).unwrap();
1714 assert!(
1715 compiled.sql.contains("event_observations"),
1716 "synthetic edge must join event_observations; sql: {}",
1717 compiled.sql
1718 );
1719 assert!(
1720 !compiled.sql.contains("graph_edges"),
1721 "synthetic edge must NOT join graph_edges; sql: {}",
1722 compiled.sql
1723 );
1724 let has_role_param = compiled
1725 .params
1726 .iter()
1727 .any(|p| matches!(p, QueryValue::Text(s) if s == "selected"));
1728 assert!(has_role_param, "role 'selected' must be a bound parameter");
1729 }
1730
1731 #[test]
1735 fn synthetic_edge_event_source_binds_events_table() {
1736 let q = gql::parse("MATCH (ev)-[:observed_as_selected]->(m:memory) RETURN ev, m").unwrap();
1737 let compiled = compile(&q, &opts()).unwrap();
1738 assert!(
1739 compiled.sql.contains("FROM events "),
1740 "CRIT-1: event source must come FROM events table, not entities; sql: {}",
1741 compiled.sql
1742 );
1743 assert!(
1744 !compiled
1745 .sql
1746 .starts_with("SELECT * FROM entities n0 JOIN event_observations"),
1747 "CRIT-1: must not join events via entities table; sql: {}",
1748 compiled.sql
1749 );
1750 }
1751
1752 #[test]
1753 fn synthetic_edge_event_observation_join_uses_events_id() {
1754 let q = gql::parse("MATCH (ev)-[:observed_as_selected]->(m) RETURN m").unwrap();
1757 let compiled = compile(&q, &opts()).unwrap();
1758 assert!(
1760 compiled
1761 .sql
1762 .contains("JOIN event_observations e0 ON e0.event_id = n0.id"),
1763 "CRIT-1: event_observations must join on events.id (n0 is now events); sql: {}",
1764 compiled.sql
1765 );
1766 }
1767
1768 #[test]
1769 fn synthetic_edge_event_node_projects_event_columns() {
1770 let q = gql::parse("MATCH (ev)-[:observed_as_selected]->(m) RETURN ev").unwrap();
1773 let compiled = compile(&q, &opts()).unwrap();
1774 assert!(
1775 compiled.sql.contains("ev_verb"),
1776 "CRIT-1: event variable must project verb column; sql: {}",
1777 compiled.sql
1778 );
1779 assert!(
1780 compiled.sql.contains("ev_outcome"),
1781 "CRIT-1: event variable must project outcome column; sql: {}",
1782 compiled.sql
1783 );
1784 assert!(
1785 !compiled.sql.contains("ev_name,") && !compiled.sql.contains("ev_name "),
1786 "CRIT-1: event variable must NOT project entity name column; sql: {}",
1787 compiled.sql
1788 );
1789 assert!(
1790 !compiled.sql.contains("ev_properties"),
1791 "CRIT-1: event variable must NOT project entity properties column; sql: {}",
1792 compiled.sql
1793 );
1794 }
1795
1796 #[test]
1797 fn synthetic_edge_namespace_filter_on_events_table() {
1798 let q = gql::parse("MATCH (ev)-[:observed_as_selected]->(m) RETURN m").unwrap();
1801 let compiled = compile(&q, &scoped("test-ns")).unwrap();
1802 let ns_count = compiled
1805 .params
1806 .iter()
1807 .filter(|p| matches!(p, QueryValue::Text(s) if s == "test-ns"))
1808 .count();
1809 assert!(
1810 ns_count >= 2,
1811 "MIN-2: namespace must be filtered on both events and target; params: {:?}",
1812 compiled.params
1813 );
1814 }
1815
1816 #[test]
1817 fn synthetic_edge_candidate_role() {
1818 let q = gql::parse("MATCH (ev)-[:observed_as_candidate]->(m) RETURN ev, m").unwrap();
1819 let compiled = compile(&q, &opts()).unwrap();
1820 assert!(
1821 compiled.sql.contains("event_observations"),
1822 "sql: {}",
1823 compiled.sql
1824 );
1825 let has_candidate = compiled
1826 .params
1827 .iter()
1828 .any(|p| matches!(p, QueryValue::Text(s) if s == "candidate"));
1829 assert!(has_candidate, "role 'candidate' must be bound");
1830 }
1831
1832 #[test]
1833 fn synthetic_edge_multi_role() {
1834 let q =
1836 gql::parse("MATCH (ev)-[:observed_as_candidate|observed_as_selected]->(m) RETURN m")
1837 .unwrap();
1838 let compiled = compile(&q, &opts()).unwrap();
1839 assert!(
1840 compiled.sql.contains("event_observations"),
1841 "sql: {}",
1842 compiled.sql
1843 );
1844 assert!(
1845 compiled.sql.contains("IN"),
1846 "multi-role must use IN; sql: {}",
1847 compiled.sql
1848 );
1849 }
1850
1851 #[test]
1852 fn mixed_synthetic_and_canonical_rejected() {
1853 let q = gql::parse("MATCH (ev)-[:observed_as_selected|extends]->(m) RETURN m").unwrap();
1854 let err = compile(&q, &opts()).unwrap_err();
1855 assert!(
1856 matches!(err, QueryError::Compile(_)),
1857 "mixed synthetic+canonical must be rejected; got {err:?}"
1858 );
1859 }
1860
1861 #[test]
1862 fn synthetic_edge_inbound_rejected() {
1863 let q = gql::parse("MATCH (m)<-[:observed_as_selected]-(ev) RETURN m").unwrap();
1864 let err = compile(&q, &opts()).unwrap_err();
1865 assert!(
1866 matches!(err, QueryError::Compile(_)),
1867 "inbound synthetic edge must be rejected; got {err:?}"
1868 );
1869 }
1870
1871 #[test]
1874 fn variable_length_or_across_endpoints_rejected() {
1875 let q = gql::parse(
1878 "MATCH (a)-[:extends*1..3]->(b) WHERE a.name = 'X' OR b.name = 'Y' RETURN a",
1879 )
1880 .unwrap();
1881 let result = compile(&q, &opts());
1882 assert!(
1883 matches!(result, Err(QueryError::Unsupported(_))),
1884 "MAJ-1: OR spanning both endpoints must return Unsupported; got {result:?}"
1885 );
1886 let err_msg = result.unwrap_err().to_string();
1887 assert!(
1888 err_msg.contains("separate queries") || err_msg.contains("one endpoint"),
1889 "error must be actionable; got: {err_msg}"
1890 );
1891 }
1892
1893 #[test]
1894 fn variable_length_or_single_endpoint_still_works() {
1895 let q = gql::parse(
1897 "MATCH (a)-[:extends*1..3]->(b) WHERE a.name = 'X' OR a.name = 'Y' RETURN a",
1898 )
1899 .unwrap();
1900 let result = compile(&q, &opts());
1901 assert!(
1902 result.is_ok(),
1903 "single-endpoint OR must compile; got {result:?}"
1904 );
1905 }
1906
1907 #[test]
1908 fn variable_length_and_across_endpoints_still_works() {
1909 let q = gql::parse(
1911 "MATCH (a)-[:extends*1..3]->(b) WHERE a.name = 'X' AND b.name = 'Y' RETURN a",
1912 )
1913 .unwrap();
1914 let result = compile(&q, &opts());
1915 assert!(
1916 result.is_ok(),
1917 "AND across endpoints must compile; got {result:?}"
1918 );
1919 }
1920
1921 #[test]
1924 fn test_variable_length_or_compiles_to_or() {
1925 let q = gql::parse(
1928 "MATCH (a)-[:extends*1..3]->(b) WHERE a.name = 'LoRA' OR a.name = 'QLoRA' RETURN b",
1929 )
1930 .unwrap();
1931 let compiled = compile(&q, &opts()).unwrap();
1932 assert!(
1934 compiled.sql.contains(" OR "),
1935 "#379: variable-length single-endpoint OR must produce SQL OR; sql: {}",
1936 compiled.sql
1937 );
1938 let has_lora = compiled
1940 .params
1941 .iter()
1942 .any(|p| matches!(p, QueryValue::Text(s) if s == "LoRA"));
1943 let has_qlora = compiled
1944 .params
1945 .iter()
1946 .any(|p| matches!(p, QueryValue::Text(s) if s == "QLoRA"));
1947 assert!(has_lora && has_qlora, "both OR values must be bound params");
1948 }
1949
1950 #[test]
1951 fn test_single_endpoint_or_at_depth_1() {
1952 let q = gql::parse(
1955 "MATCH (a)-[r:extends]->(b) WHERE r.weight > 0.5 OR r.relation = 'extends' RETURN a",
1956 )
1957 .unwrap();
1958 let compiled = compile(&q, &opts()).unwrap();
1959 assert!(
1960 compiled.sql.contains(" OR "),
1961 "#379: fixed-length single-endpoint OR must produce SQL OR; sql: {}",
1962 compiled.sql
1963 );
1964 let has_extends = compiled
1965 .params
1966 .iter()
1967 .any(|p| matches!(p, QueryValue::Text(s) if s == "extends"));
1968 assert!(
1969 has_extends,
1970 "relation value 'extends' must be a bound param"
1971 );
1972 }
1973
1974 #[test]
1975 fn test_and_still_works() {
1976 let q = gql::parse(
1978 "MATCH (a)-[:extends*1..3]->(b) WHERE a.name = 'LoRA' AND a.kind = 'concept' RETURN b",
1979 )
1980 .unwrap();
1981 let compiled = compile(&q, &opts()).unwrap();
1982 assert!(
1984 !compiled.sql.contains(" OR "),
1985 "#379: AND must not produce OR; sql: {}",
1986 compiled.sql
1987 );
1988 let has_lora = compiled
1989 .params
1990 .iter()
1991 .any(|p| matches!(p, QueryValue::Text(s) if s == "LoRA"));
1992 let has_concept = compiled
1993 .params
1994 .iter()
1995 .any(|p| matches!(p, QueryValue::Text(s) if s == "concept"));
1996 assert!(
1997 has_lora && has_concept,
1998 "both AND values must be bound params"
1999 );
2000 }
2001
2002 #[test]
2006 fn max_limit_overflow_returns_error() {
2007 let q = gql::parse("MATCH (a)-[:extends]->(b) RETURN a").unwrap();
2008 let opts = CompileOptions {
2009 scopes: vec![],
2010 max_limit: usize::MAX,
2011 };
2012 let result = compile(&q, &opts);
2016 match result {
2017 Err(QueryError::InvalidInput(_)) => {
2018 }
2020 Ok(compiled) => {
2021 let limit_param = compiled.params.last().unwrap();
2023 assert!(
2024 matches!(limit_param, QueryValue::Integer(n) if *n >= 0),
2025 "limit must never be negative; got {limit_param:?}"
2026 );
2027 }
2028 Err(e) => panic!("unexpected error: {e:?}"),
2029 }
2030 }
2031
2032 #[test]
2034 fn max_limit_zero_compiles() {
2035 let q = gql::parse("MATCH (a)-[:extends]->(b) RETURN a").unwrap();
2036 let opts = CompileOptions {
2037 scopes: vec![],
2038 max_limit: 0,
2039 };
2040 let compiled = compile(&q, &opts).unwrap();
2041 let limit_param = compiled.params.last().unwrap();
2042 assert!(
2043 matches!(limit_param, QueryValue::Integer(0)),
2044 "max_limit=0 should produce LIMIT 0; got {limit_param:?}"
2045 );
2046 }
2047
2048 #[test]
2050 fn variable_length_synthetic_edge_rejected() {
2051 let q = gql::parse("MATCH (ev)-[:observed_as_selected*1..3]->(m) RETURN m").unwrap();
2054 let err = compile(&q, &opts()).unwrap_err();
2055 assert!(
2056 matches!(err, QueryError::Unsupported(_)),
2057 "variable-length synthetic edge must return Unsupported; got {err:?}"
2058 );
2059 assert!(
2060 err.to_string().contains("synthetic") || err.to_string().contains("observed_as"),
2061 "error should mention synthetic edges: {err}"
2062 );
2063 }
2064
2065 #[test]
2068 fn variable_length_recursive_member_joins_next_node_for_deleted_filter() {
2069 let q = gql::parse("MATCH (a)-[:extends*1..3]->(b) RETURN b").unwrap();
2070 let compiled = compile(&q, &opts()).unwrap();
2071 assert!(
2073 compiled.sql.contains("JOIN entities next_node"),
2074 "recursive CTE must join entities next_node for deleted-intermediate filtering; sql: {}",
2075 compiled.sql
2076 );
2077 assert!(
2078 compiled.sql.contains("next_node.deleted_at IS NULL"),
2079 "recursive CTE must filter next_node.deleted_at IS NULL; sql: {}",
2080 compiled.sql
2081 );
2082 }
2083
2084 #[test]
2087 fn variable_length_recursive_member_namespace_scopes_intermediates() {
2088 let q = gql::parse("MATCH (a)-[:extends*1..3]->(b) RETURN b").unwrap();
2089 let compiled = compile(&q, &scoped("test-ns")).unwrap();
2090 assert!(
2092 compiled.sql.contains("next_node.namespace"),
2093 "recursive CTE next_node join must filter namespace; sql: {}",
2094 compiled.sql
2095 );
2096 }
2097
2098 #[test]
2101 fn compile_malformed_ast_returns_error_not_panic() {
2102 use crate::ast::{EdgeDirection, EdgePattern, GqlQuery, MatchPattern, PatternElement};
2103 let q = GqlQuery {
2105 pattern: MatchPattern {
2106 elements: vec![PatternElement::Edge(EdgePattern {
2107 variable: None,
2108 relations: vec!["extends".to_string()],
2109 direction: EdgeDirection::Out,
2110 min_hops: 1,
2111 max_hops: 1,
2112 })],
2113 },
2114 where_clause: WhereExpr::True,
2115 return_items: vec![],
2116 limit: None,
2117 };
2118 let result = compile(&q, &opts());
2119 assert!(
2120 result.is_err(),
2121 "malformed AST (starts with Edge) must return error, not panic"
2122 );
2123 }
2124
2125 #[test]
2128 fn edge_pattern_without_suffix_dash_rejected() {
2129 let result = gql::parse("MATCH (a)-[e:extends](b) RETURN a");
2130 assert!(
2131 result.is_err(),
2132 "edge pattern without suffix '-' must be rejected as a parse error"
2133 );
2134 }
2135
2136 #[test]
2138 fn duplicate_inline_property_rejected() {
2139 let result = gql::parse("MATCH (n {name: 'A', name: 'B'}) RETURN n");
2140 assert!(
2141 result.is_err(),
2142 "duplicate property 'name' in node props must be rejected"
2143 );
2144 let err = result.unwrap_err().to_string();
2145 assert!(
2146 err.contains("duplicate") || err.contains("name"),
2147 "error should mention duplicate or key name: {err}"
2148 );
2149 }
2150
2151 #[test]
2153 fn unknown_synthetic_relation_rejected_at_compile() {
2154 let q = gql::parse("MATCH (a)-[:observed_as_bogus]->(b) RETURN a").unwrap();
2155 let err = compile(&q, &opts()).unwrap_err();
2156 assert!(
2157 matches!(err, QueryError::Validation(_)),
2158 "unknown synthetic relation must return Validation error; got {err:?}"
2159 );
2160 }
2161}