Skip to main content

khive_query/compilers/
sql.rs

1//! Compile GQL AST to parameterized SQL (JOIN chain or recursive CTE).
2
3use crate::ast::*;
4use crate::error::QueryError;
5use crate::validate::{validate_with_warnings, MAX_DEPTH};
6
7/// Observation roles used by the synthetic edge compiler.
8const SYNTHETIC_RELATIONS: &[&str] = &[
9    "observed_as_candidate",
10    "observed_as_selected",
11    "observed_as_target",
12    "observed_as_signal",
13];
14
15fn is_synthetic(rel: &str) -> bool {
16    SYNTHETIC_RELATIONS.contains(&rel)
17}
18
19fn synthetic_role(rel: &str) -> Option<&'static str> {
20    match rel {
21        "observed_as_candidate" => Some("candidate"),
22        "observed_as_selected" => Some("selected"),
23        "observed_as_target" => Some("target"),
24        "observed_as_signal" => Some("signal"),
25        _ => None,
26    }
27}
28
29/// Parameterized SQL emitted by the compiler, ready for execution by the runtime.
30#[derive(Debug)]
31pub struct CompiledQuery {
32    pub sql: String,
33    pub params: Vec<QueryValue>,
34    pub return_vars: Vec<ReturnItem>,
35    pub warnings: Vec<String>,
36}
37
38/// Runtime options injected by the caller to scope and cap query execution.
39pub struct CompileOptions {
40    /// Namespace scope. Empty = cross-namespace (all). Non-empty = filter to these namespaces.
41    pub scopes: Vec<String>,
42    /// Hard limit cap (server-side safety). Query limit is min(requested, max_limit).
43    pub max_limit: usize,
44}
45
46impl Default for CompileOptions {
47    fn default() -> Self {
48        Self {
49            scopes: Vec::new(),
50            max_limit: 500,
51        }
52    }
53}
54
55/// Compile a `GqlQuery` AST to a parameterized SQL string and bound parameters.
56pub fn compile(query: &GqlQuery, opts: &CompileOptions) -> Result<CompiledQuery, QueryError> {
57    if query.pattern.elements.is_empty() {
58        return Err(QueryError::Compile("empty pattern".into()));
59    }
60
61    // Validate edge relations + structural rules before emitting SQL.
62    let mut query = query.clone();
63    let warnings = validate_with_warnings(&mut query)?;
64
65    let mut compiled = if query.pattern.has_variable_length() {
66        compile_variable_length(&query, opts)?
67    } else {
68        compile_fixed_length(&query, opts)?
69    };
70    compiled.warnings = warnings;
71    Ok(compiled)
72}
73
74fn namespace_filter(alias: &str, opts: &CompileOptions, params: &mut Vec<QueryValue>) -> String {
75    if opts.scopes.is_empty() {
76        String::new()
77    } else if opts.scopes.len() == 1 {
78        params.push(QueryValue::Text(opts.scopes[0].clone()));
79        format!(" AND {alias}.namespace = ?{}", params.len())
80    } else {
81        let placeholders: Vec<String> = opts
82            .scopes
83            .iter()
84            .map(|s| {
85                params.push(QueryValue::Text(s.clone()));
86                format!("?{}", params.len())
87            })
88            .collect();
89        format!(" AND {alias}.namespace IN ({})", placeholders.join(", "))
90    }
91}
92
93/// Returns `(source_indices, target_indices)` for synthetic `observed_as_*` edge endpoints.
94fn synthetic_endpoint_node_indices(
95    elements: &[PatternElement],
96) -> (
97    std::collections::HashSet<usize>,
98    std::collections::HashSet<usize>,
99) {
100    let mut source_set = std::collections::HashSet::new();
101    let mut target_set = std::collections::HashSet::new();
102    let mut node_idx = 0usize;
103    let mut prev_node_idx: Option<usize> = None;
104    for element in elements {
105        match element {
106            PatternElement::Node(_) => {
107                prev_node_idx = Some(node_idx);
108                node_idx += 1;
109            }
110            PatternElement::Edge(ep) => {
111                let has_synthetic = ep.relations.iter().any(|r| is_synthetic(r));
112                if has_synthetic {
113                    if let Some(src_idx) = prev_node_idx {
114                        source_set.insert(src_idx);
115                        // The target is the next node (current node_idx).
116                        target_set.insert(node_idx);
117                    }
118                }
119            }
120        }
121    }
122    (source_set, target_set)
123}
124
125/// Compile fixed-length patterns to a JOIN chain.
126fn compile_fixed_length(
127    query: &GqlQuery,
128    opts: &CompileOptions,
129) -> Result<CompiledQuery, QueryError> {
130    let mut params: Vec<QueryValue> = Vec::new();
131    let mut from_parts: Vec<String> = Vec::new();
132    let mut join_parts: Vec<String> = Vec::new();
133    let mut where_parts: Vec<String> = Vec::new();
134    let mut select_parts: Vec<String> = Vec::new();
135
136    let mut node_aliases: Vec<String> = Vec::new();
137    let mut edge_aliases: Vec<String> = Vec::new();
138    let mut var_to_alias: std::collections::HashMap<String, (String, VarKind)> =
139        std::collections::HashMap::new();
140
141    // Pre-compute which node indices are endpoints of synthetic edges.
142    // Source nodes bind to `events`; target nodes bind to `notes`.
143    let (event_source_indices, note_target_indices) =
144        synthetic_endpoint_node_indices(&query.pattern.elements);
145
146    let mut node_idx = 0usize;
147    let mut edge_idx = 0usize;
148
149    for element in &query.pattern.elements {
150        match element {
151            PatternElement::Node(np) => {
152                let alias = format!("n{node_idx}");
153                node_aliases.push(alias.clone());
154
155                let is_event_source = event_source_indices.contains(&node_idx);
156                let is_note_target = note_target_indices.contains(&node_idx);
157
158                if node_idx == 0 {
159                    if is_event_source {
160                        from_parts.push(format!("events {alias}"));
161                    } else {
162                        // Note targets are joined by the synthetic edge handler, not FROM.
163                        if !is_note_target {
164                            from_parts.push(format!("entities {alias}"));
165                        }
166                    }
167                }
168
169                if is_event_source {
170                    // Events table does not have `deleted_at`; filter is omitted.
171                    // Namespace filter uses the `events.namespace` column directly.
172                    let ns_filter = namespace_filter(&alias, opts, &mut params);
173                    if !ns_filter.is_empty() {
174                        where_parts.push(ns_filter.trim_start_matches(" AND ").to_string());
175                    }
176                    // `kind` on an event node filters events.kind (e.g. "recall_executed").
177                    if let Some(ref kind) = np.kind {
178                        params.push(QueryValue::Text(kind.clone()));
179                        where_parts.push(format!("{alias}.kind = ?{}", params.len()));
180                    }
181                    // entity_type and properties are not columns on events — reject explicitly.
182                    if np.entity_type.is_some() {
183                        return Err(QueryError::Compile(
184                            "event nodes do not have an entity_type column".into(),
185                        ));
186                    }
187                    if !np.properties.is_empty() {
188                        return Err(QueryError::Compile(
189                            "event nodes do not support inline property filters; \
190                             use a WHERE clause on verb, outcome, or payload fields"
191                                .into(),
192                        ));
193                    }
194                } else if is_note_target {
195                    // Note targets: `notes` table (joined by the synthetic edge handler).
196                    where_parts.push(format!("{alias}.deleted_at IS NULL"));
197
198                    let ns_filter = namespace_filter(&alias, opts, &mut params);
199                    if !ns_filter.is_empty() {
200                        where_parts.push(ns_filter.trim_start_matches(" AND ").to_string());
201                    }
202
203                    if let Some(ref kind) = np.kind {
204                        params.push(QueryValue::Text(kind.clone()));
205                        where_parts.push(format!("{alias}.kind = ?{}", params.len()));
206                    }
207
208                    // entity_type does not exist on notes — reject explicitly.
209                    if np.entity_type.is_some() {
210                        return Err(QueryError::Compile(
211                            "observed note targets do not have an entity_type column".into(),
212                        ));
213                    }
214
215                    let mut props: Vec<_> = np.properties.iter().collect();
216                    props.sort_by_key(|(k, _)| k.as_str());
217                    for (key, val) in props {
218                        params.push(QueryValue::Text(val.clone()));
219                        if key == "name" || key == "content" {
220                            where_parts
221                                .push(format!("{alias}.{key} = ?{} COLLATE NOCASE", params.len()));
222                        } else {
223                            where_parts.push(format!(
224                                "json_extract({alias}.properties, '$.{}') = ?{} COLLATE NOCASE",
225                                key.replace('\'', "''"),
226                                params.len()
227                            ));
228                        }
229                    }
230                } else {
231                    where_parts.push(format!("{alias}.deleted_at IS NULL"));
232
233                    let ns_filter = namespace_filter(&alias, opts, &mut params);
234                    if !ns_filter.is_empty() {
235                        where_parts.push(ns_filter.trim_start_matches(" AND ").to_string());
236                    }
237
238                    if let Some(ref kind) = np.kind {
239                        params.push(QueryValue::Text(kind.clone()));
240                        where_parts.push(format!("{alias}.kind = ?{}", params.len()));
241                    }
242
243                    if let Some(ref et) = np.entity_type {
244                        params.push(QueryValue::Text(et.clone()));
245                        where_parts.push(format!("{alias}.entity_type = ?{}", params.len()));
246                    }
247
248                    let mut props: Vec<_> = np.properties.iter().collect();
249                    props.sort_by_key(|(k, _)| k.as_str());
250                    for (key, val) in props {
251                        params.push(QueryValue::Text(val.clone()));
252                        if key == "name" {
253                            where_parts
254                                .push(format!("{alias}.name = ?{} COLLATE NOCASE", params.len()));
255                        } else {
256                            where_parts.push(format!(
257                                "json_extract({alias}.properties, '$.{}') = ?{} COLLATE NOCASE",
258                                key.replace('\'', "''"),
259                                params.len()
260                            ));
261                        }
262                    }
263                }
264
265                if let Some(ref var) = np.variable {
266                    let kind = if is_event_source {
267                        VarKind::EventNode
268                    } else if is_note_target {
269                        VarKind::NoteNode
270                    } else {
271                        VarKind::Node
272                    };
273                    var_to_alias.insert(var.clone(), (alias.clone(), kind));
274                }
275
276                node_idx += 1;
277            }
278            PatternElement::Edge(ep) => {
279                let e_alias = format!("e{edge_idx}");
280                let prev_node = &node_aliases[node_aliases.len() - 1];
281                let next_alias = format!("n{}", node_idx);
282
283                edge_aliases.push(e_alias.clone());
284
285                // Detect synthetic event_observations edges (observed_as_* relations).
286                // A synthetic edge is one whose only relation(s) are observed_as_* names.
287                // Mixed synthetic+canonical relations are rejected: the two tables don't share
288                // a common join key that would make an OR across them meaningful.
289                let has_synthetic = ep.relations.iter().any(|r| is_synthetic(r));
290                let has_canonical = ep.relations.iter().any(|r| !is_synthetic(r));
291                if has_synthetic && has_canonical {
292                    return Err(QueryError::Compile(
293                        "cannot mix synthetic observed_as_* relations with canonical edge relations \
294                         in a single edge pattern"
295                            .into(),
296                    ));
297                }
298
299                if has_synthetic {
300                    // Synthetic edge: join event_observations.
301                    // Direction is always event → entity/note (OUT from the event node).
302                    // The event node is the source (prev_node); the entity/note is the target.
303                    if !matches!(ep.direction, EdgeDirection::Out) {
304                        return Err(QueryError::Compile(
305                            "synthetic observed_as_* edges are always event → entity (outbound only)".into(),
306                        ));
307                    }
308                    join_parts.push(format!(
309                        "JOIN event_observations {e_alias} ON {e_alias}.event_id = {prev_node}.id"
310                    ));
311                    // Roles: collect the unique role values from the synthetic relation names.
312                    let roles: Vec<&'static str> = ep
313                        .relations
314                        .iter()
315                        .filter_map(|r| synthetic_role(r))
316                        .collect();
317                    if roles.len() == 1 {
318                        params.push(QueryValue::Text(roles[0].to_string()));
319                        where_parts.push(format!("{e_alias}.role = ?{}", params.len()));
320                    } else if roles.len() > 1 {
321                        let placeholders: Vec<String> = roles
322                            .iter()
323                            .map(|r| {
324                                params.push(QueryValue::Text(r.to_string()));
325                                format!("?{}", params.len())
326                            })
327                            .collect();
328                        where_parts
329                            .push(format!("{e_alias}.role IN ({})", placeholders.join(", ")));
330                    }
331                    // Join the target node via event_observations.entity_id.
332                    // The `referent_kind` column discriminates between note and entity
333                    // Recall/rerank observations always target notes
334                    // (`referent_kind='note'`); we filter to note substrate and join
335                    // the `notes` table.  An explicit `AND e0.referent_kind='note'`
336                    // prevents cross-substrate ID collisions.
337                    join_parts.push(format!(
338                        "JOIN notes {next_alias} ON {next_alias}.id = {e_alias}.entity_id \
339                         AND {e_alias}.referent_kind = 'note'"
340                    ));
341                } else {
342                    // Standard canonical edge: join graph_edges.
343                    let (source_join, target_join) = match ep.direction {
344                        EdgeDirection::Out => (
345                            format!("{e_alias}.source_id = {prev_node}.id"),
346                            "target_id",
347                        ),
348                        EdgeDirection::In => (
349                            format!("{e_alias}.target_id = {prev_node}.id"),
350                            "source_id",
351                        ),
352                        EdgeDirection::Both => (
353                            format!(
354                                "({e_alias}.source_id = {prev_node}.id OR {e_alias}.target_id = {prev_node}.id)"
355                            ),
356                            "CASE_BOTH",
357                        ),
358                    };
359
360                    let next_join_col = if target_join == "CASE_BOTH" {
361                        format!(
362                            "CASE WHEN {e_alias}.source_id = {prev_node}.id THEN {e_alias}.target_id ELSE {e_alias}.source_id END"
363                        )
364                    } else {
365                        format!("{e_alias}.{target_join}")
366                    };
367
368                    join_parts.push(format!(
369                        "JOIN graph_edges {e_alias} ON {source_join} AND {e_alias}.deleted_at IS NULL"
370                    ));
371
372                    let ens_filter = namespace_filter(&e_alias, opts, &mut params);
373                    if !ens_filter.is_empty() {
374                        where_parts.push(ens_filter.trim_start_matches(" AND ").to_string());
375                    }
376
377                    join_parts.push(format!(
378                        "JOIN entities {next_alias} ON {next_alias}.id = {next_join_col}"
379                    ));
380
381                    if !ep.relations.is_empty() {
382                        if ep.relations.len() == 1 {
383                            params.push(QueryValue::Text(ep.relations[0].clone()));
384                            where_parts.push(format!("{e_alias}.relation = ?{}", params.len()));
385                        } else {
386                            let placeholders: Vec<String> = ep
387                                .relations
388                                .iter()
389                                .map(|r| {
390                                    params.push(QueryValue::Text(r.clone()));
391                                    format!("?{}", params.len())
392                                })
393                                .collect();
394                            where_parts.push(format!(
395                                "{e_alias}.relation IN ({})",
396                                placeholders.join(", ")
397                            ));
398                        }
399                    }
400                }
401
402                if let Some(ref var) = ep.variable {
403                    var_to_alias.insert(var.clone(), (e_alias.clone(), VarKind::Edge));
404                }
405
406                edge_idx += 1;
407            }
408        }
409    }
410
411    // WHERE clause conditions from GQL WHERE (supports AND / OR tree)
412    if let Some(where_sql) = compile_where_expr(&query.where_clause, &var_to_alias, &mut params)? {
413        where_parts.push(where_sql);
414    }
415
416    // SELECT clause
417    for item in &query.return_items {
418        let var = item.variable();
419        if let Some((alias, kind)) = var_to_alias.get(var) {
420            match item {
421                ReturnItem::Property(_, prop) => {
422                    let col = property_to_column(prop, kind)?;
423                    select_parts.push(format!("{alias}.{col} AS {var}_{prop}"));
424                }
425                ReturnItem::Variable(_) => match kind {
426                    VarKind::Node => {
427                        select_parts.push(format!(
428                            "{alias}.id AS {var}_id, {alias}.namespace AS {var}_namespace, \
429                             {alias}.kind AS {var}_kind, {alias}.entity_type AS {var}_entity_type, \
430                             {alias}.name AS {var}_name, \
431                             {alias}.properties AS {var}_properties, \
432                             {alias}.created_at AS {var}_created_at, \
433                             {alias}.updated_at AS {var}_updated_at"
434                        ));
435                    }
436                    VarKind::NoteNode => {
437                        select_parts.push(format!(
438                            "{alias}.id AS {var}_id, {alias}.namespace AS {var}_namespace, \
439                             {alias}.kind AS {var}_kind, {alias}.status AS {var}_status, \
440                             {alias}.content AS {var}_content, \
441                             {alias}.salience AS {var}_salience, \
442                             {alias}.properties AS {var}_properties, \
443                             {alias}.created_at AS {var}_created_at, \
444                             {alias}.updated_at AS {var}_updated_at"
445                        ));
446                    }
447                    VarKind::EventNode => {
448                        select_parts.push(format!(
449                            "{alias}.id AS {var}_id, {alias}.namespace AS {var}_namespace, \
450                             {alias}.verb AS {var}_verb, {alias}.substrate AS {var}_substrate, \
451                             {alias}.actor AS {var}_actor, {alias}.kind AS {var}_kind, \
452                             {alias}.outcome AS {var}_outcome, \
453                             {alias}.payload AS {var}_payload, \
454                             {alias}.created_at AS {var}_created_at"
455                        ));
456                    }
457                    VarKind::Edge => {
458                        select_parts.push(format!(
459                            "{alias}.id AS {var}_id, {alias}.source_id AS {var}_source, \
460                             {alias}.target_id AS {var}_target, \
461                             {alias}.relation AS {var}_relation, \
462                             {alias}.weight AS {var}_weight"
463                        ));
464                    }
465                },
466            }
467        } else {
468            return Err(QueryError::Compile(format!(
469                "unknown variable '{var}' in RETURN clause"
470            )));
471        }
472    }
473
474    let limit = query.limit.unwrap_or(opts.max_limit).min(opts.max_limit);
475    let limit_i64 = i64::try_from(limit)
476        .map_err(|_| QueryError::InvalidInput("limit exceeds i64::MAX".into()))?;
477    params.push(QueryValue::Integer(limit_i64));
478
479    let sql = format!(
480        "SELECT {} FROM {} {} WHERE {} LIMIT ?{}",
481        select_parts.join(", "),
482        from_parts.join(", "),
483        join_parts.join(" "),
484        where_parts.join(" AND "),
485        params.len(),
486    );
487
488    Ok(CompiledQuery {
489        sql,
490        params,
491        return_vars: query.return_items.clone(),
492        warnings: Vec::new(),
493    })
494}
495
496/// Compile a `WhereExpr` tree into a SQL fragment.
497fn compile_where_expr(
498    expr: &WhereExpr,
499    var_to_alias: &std::collections::HashMap<String, (String, VarKind)>,
500    params: &mut Vec<QueryValue>,
501) -> Result<Option<String>, QueryError> {
502    match expr {
503        WhereExpr::True => Ok(None),
504        WhereExpr::Condition(cond) => {
505            let sql = compile_single_condition(cond, var_to_alias, params)?;
506            Ok(Some(sql))
507        }
508        WhereExpr::And(l, r) => {
509            let ls = compile_where_expr(l, var_to_alias, params)?;
510            let rs = compile_where_expr(r, var_to_alias, params)?;
511            Ok(match (ls, rs) {
512                (None, None) => None,
513                (Some(s), None) | (None, Some(s)) => Some(s),
514                (Some(l), Some(r)) => Some(format!("{l} AND {r}")),
515            })
516        }
517        WhereExpr::Or(l, r) => {
518            let ls = compile_where_expr(l, var_to_alias, params)?;
519            let rs = compile_where_expr(r, var_to_alias, params)?;
520            Ok(match (ls, rs) {
521                (None, None) => None,
522                (Some(s), None) | (None, Some(s)) => Some(s),
523                (Some(l), Some(r)) => Some(format!("({l} OR {r})")),
524            })
525        }
526    }
527}
528
529fn compile_single_condition(
530    cond: &Condition,
531    var_to_alias: &std::collections::HashMap<String, (String, VarKind)>,
532    params: &mut Vec<QueryValue>,
533) -> Result<String, QueryError> {
534    let (alias, kind) = var_to_alias.get(&cond.variable).ok_or_else(|| {
535        QueryError::Compile(format!(
536            "unknown variable '{}' in WHERE clause",
537            cond.variable
538        ))
539    })?;
540
541    let col_expr = match kind {
542        VarKind::Node => {
543            if cond.property == "name"
544                || cond.property == "kind"
545                || cond.property == "entity_type"
546                || cond.property == "namespace"
547            {
548                format!("{alias}.{}", cond.property)
549            } else {
550                format!(
551                    "json_extract({alias}.properties, '$.{}')",
552                    cond.property.replace('\'', "''")
553                )
554            }
555        }
556        VarKind::NoteNode => {
557            if NOTE_COLUMNS.contains(&cond.property.as_str()) {
558                format!("{alias}.{}", cond.property)
559            } else {
560                format!(
561                    "json_extract({alias}.properties, '$.{}')",
562                    cond.property.replace('\'', "''")
563                )
564            }
565        }
566        VarKind::EventNode => {
567            // Events table has direct columns only; reject unknown fields.
568            if EVENT_COLUMNS.contains(&cond.property.as_str()) {
569                format!("{alias}.{}", cond.property)
570            } else {
571                return Err(QueryError::Validation(format!(
572                    "event property '{}' not queryable; valid columns: {}",
573                    cond.property,
574                    EVENT_COLUMNS.join(", ")
575                )));
576            }
577        }
578        VarKind::Edge => match cond.property.as_str() {
579            "relation" | "weight" => format!("{alias}.{}", cond.property),
580            other => {
581                return Err(QueryError::Validation(format!(
582                    "edge property '{other}' not queryable; use 'relation' or 'weight'"
583                )))
584            }
585        },
586    };
587
588    let op_str = match cond.op {
589        CompareOp::Eq => "=",
590        CompareOp::Neq => "!=",
591        CompareOp::Gt => ">",
592        CompareOp::Lt => "<",
593        CompareOp::Gte => ">=",
594        CompareOp::Lte => "<=",
595        CompareOp::Like => "LIKE",
596    };
597
598    let sql = match &cond.value {
599        ConditionValue::String(s) => {
600            params.push(QueryValue::Text(s.clone()));
601            let collate = if matches!(cond.op, CompareOp::Eq | CompareOp::Like) {
602                " COLLATE NOCASE"
603            } else {
604                ""
605            };
606            format!("{col_expr} {op_str} ?{}{}", params.len(), collate)
607        }
608        ConditionValue::Number(n) => {
609            if !n.is_finite() {
610                return Err(QueryError::InvalidInput(
611                    "non-finite float (NaN or Infinity) is not a valid query parameter".into(),
612                ));
613            }
614            params.push(QueryValue::Float(*n));
615            format!("{col_expr} {op_str} ?{}", params.len())
616        }
617        ConditionValue::Bool(b) => {
618            params.push(QueryValue::Integer(if *b { 1 } else { 0 }));
619            format!("{col_expr} {op_str} ?{}", params.len())
620        }
621    };
622    Ok(sql)
623}
624
625fn expr_endpoint_set(
626    expr: &WhereExpr,
627    start_var: Option<&str>,
628    end_var: Option<&str>,
629) -> (bool, bool) {
630    match expr {
631        WhereExpr::True => (false, false),
632        WhereExpr::Condition(c) => {
633            let is_start = start_var == Some(c.variable.as_str());
634            let is_end = end_var == Some(c.variable.as_str());
635            (is_start, is_end)
636        }
637        WhereExpr::And(l, r) | WhereExpr::Or(l, r) => {
638            let (ls, le) = expr_endpoint_set(l, start_var, end_var);
639            let (rs, re) = expr_endpoint_set(r, start_var, end_var);
640            (ls || rs, le || re)
641        }
642    }
643}
644
645/// Return `Err(Unsupported)` if any `Or` node spans both endpoint variables.
646fn reject_or_spanning_endpoints(
647    expr: &WhereExpr,
648    start: &NodePattern,
649    end: &NodePattern,
650) -> Result<(), QueryError> {
651    let start_var = start.variable.as_deref();
652    let end_var = end.variable.as_deref();
653    reject_or_spanning_impl(expr, start_var, end_var)
654}
655
656fn reject_or_spanning_impl(
657    expr: &WhereExpr,
658    start_var: Option<&str>,
659    end_var: Option<&str>,
660) -> Result<(), QueryError> {
661    match expr {
662        WhereExpr::True | WhereExpr::Condition(_) => Ok(()),
663        WhereExpr::And(l, r) => {
664            reject_or_spanning_impl(l, start_var, end_var)?;
665            reject_or_spanning_impl(r, start_var, end_var)
666        }
667        WhereExpr::Or(l, r) => {
668            let (l_start, l_end) = expr_endpoint_set(l, start_var, end_var);
669            let (r_start, r_end) = expr_endpoint_set(r, start_var, end_var);
670            let spans_start = l_start || r_start;
671            let spans_end = l_end || r_end;
672            if spans_start && spans_end {
673                return Err(QueryError::Unsupported(
674                    "WHERE clauses that span both endpoints in a variable-length pattern \
675                     are not yet supported; rewrite as separate queries or restrict each \
676                     OR branch to one endpoint"
677                        .into(),
678                ));
679            }
680            // Even if this OR is safe, recurse to catch nested ORs.
681            reject_or_spanning_impl(l, start_var, end_var)?;
682            reject_or_spanning_impl(r, start_var, end_var)
683        }
684    }
685}
686
687fn compile_var_len_condition(
688    cond: &Condition,
689    start_var: Option<&str>,
690    end_var: Option<&str>,
691    params: &mut Vec<QueryValue>,
692) -> Result<(String, &'static str), QueryError> {
693    let col_alias = if start_var == Some(cond.variable.as_str()) {
694        "s"
695    } else if end_var == Some(cond.variable.as_str()) {
696        "r"
697    } else {
698        return Err(QueryError::Compile(format!(
699            "variable '{}' in WHERE not supported in variable-length pattern \
700             (only start/end node variables)",
701            cond.variable
702        )));
703    };
704
705    let col_expr =
706        if cond.property == "name" || cond.property == "kind" || cond.property == "entity_type" {
707            format!("{col_alias}.{}", cond.property)
708        } else {
709            format!(
710                "json_extract({col_alias}.properties, '$.{}')",
711                cond.property.replace('\'', "''")
712            )
713        };
714
715    let op_str = match cond.op {
716        CompareOp::Eq => "=",
717        CompareOp::Neq => "!=",
718        CompareOp::Gt => ">",
719        CompareOp::Lt => "<",
720        CompareOp::Gte => ">=",
721        CompareOp::Lte => "<=",
722        CompareOp::Like => "LIKE",
723    };
724
725    let sql = match &cond.value {
726        ConditionValue::String(s) => {
727            params.push(QueryValue::Text(s.clone()));
728            let collate = if matches!(cond.op, CompareOp::Eq | CompareOp::Like) {
729                " COLLATE NOCASE"
730            } else {
731                ""
732            };
733            format!("{col_expr} {op_str} ?{}{collate}", params.len())
734        }
735        ConditionValue::Number(n) => {
736            if !n.is_finite() {
737                return Err(QueryError::InvalidInput(
738                    "non-finite float (NaN or Infinity) is not a valid query parameter".into(),
739                ));
740            }
741            params.push(QueryValue::Float(*n));
742            format!("{col_expr} {op_str} ?{}", params.len())
743        }
744        ConditionValue::Bool(b) => {
745            params.push(QueryValue::Integer(if *b { 1 } else { 0 }));
746            format!("{col_expr} {op_str} ?{}", params.len())
747        }
748    };
749    Ok((sql, col_alias))
750}
751
752/// Walk the `WhereExpr` tree for variable-length patterns, routing conditions to start or end.
753fn compile_variable_length_where(
754    expr: &WhereExpr,
755    start_var: Option<&str>,
756    end_var: Option<&str>,
757    params: &mut Vec<QueryValue>,
758    start_conditions: &mut Vec<String>,
759    end_conditions: &mut Vec<String>,
760) -> Result<Option<String>, QueryError> {
761    match expr {
762        WhereExpr::True => Ok(None),
763        WhereExpr::Condition(cond) => {
764            let (sql, alias) = compile_var_len_condition(cond, start_var, end_var, params)?;
765            if alias == "s" {
766                start_conditions.push(sql);
767            } else {
768                end_conditions.push(sql);
769            }
770            Ok(None)
771        }
772        WhereExpr::And(l, r) => {
773            compile_variable_length_where(
774                l,
775                start_var,
776                end_var,
777                params,
778                start_conditions,
779                end_conditions,
780            )?;
781            compile_variable_length_where(
782                r,
783                start_var,
784                end_var,
785                params,
786                start_conditions,
787                end_conditions,
788            )?;
789            Ok(None)
790        }
791        WhereExpr::Or(l, r) => {
792            // After reject_or_spanning_endpoints we know this Or does not straddle
793            // both endpoints.  Compile each branch to a SQL string, then combine
794            // with OR and push into the appropriate condition list.
795            let l_sql = compile_variable_length_where_to_sql(l, start_var, end_var, params)?;
796            let r_sql = compile_variable_length_where_to_sql(r, start_var, end_var, params)?;
797            match (l_sql, r_sql) {
798                (None, None) => {}
799                (Some((ls, la)), None) => {
800                    if la == "s" {
801                        start_conditions.push(ls);
802                    } else {
803                        end_conditions.push(ls);
804                    }
805                }
806                (None, Some((rs, ra))) => {
807                    if ra == "s" {
808                        start_conditions.push(rs);
809                    } else {
810                        end_conditions.push(rs);
811                    }
812                }
813                (Some((ls, la)), Some((rs, _ra))) => {
814                    // Both non-None and same alias (guaranteed by the spanning check).
815                    let combined = format!("({ls} OR {rs})");
816                    if la == "s" {
817                        start_conditions.push(combined);
818                    } else {
819                        end_conditions.push(combined);
820                    }
821                }
822            }
823            Ok(None)
824        }
825    }
826}
827
828/// Compile a `WhereExpr` sub-tree to a SQL string plus the endpoint alias (`"s"` or `"r"`).
829fn compile_variable_length_where_to_sql(
830    expr: &WhereExpr,
831    start_var: Option<&str>,
832    end_var: Option<&str>,
833    params: &mut Vec<QueryValue>,
834) -> Result<Option<(String, &'static str)>, QueryError> {
835    match expr {
836        WhereExpr::True => Ok(None),
837        WhereExpr::Condition(cond) => {
838            let (sql, alias) = compile_var_len_condition(cond, start_var, end_var, params)?;
839            Ok(Some((sql, alias)))
840        }
841        WhereExpr::And(l, r) => {
842            let ls = compile_variable_length_where_to_sql(l, start_var, end_var, params)?;
843            let rs = compile_variable_length_where_to_sql(r, start_var, end_var, params)?;
844            Ok(match (ls, rs) {
845                (None, None) => None,
846                (Some(s), None) | (None, Some(s)) => Some(s),
847                (Some((lsql, la)), Some((rsql, _))) => Some((format!("{lsql} AND {rsql}"), la)),
848            })
849        }
850        WhereExpr::Or(l, r) => {
851            let ls = compile_variable_length_where_to_sql(l, start_var, end_var, params)?;
852            let rs = compile_variable_length_where_to_sql(r, start_var, end_var, params)?;
853            Ok(match (ls, rs) {
854                (None, None) => None,
855                (Some(s), None) | (None, Some(s)) => Some(s),
856                (Some((lsql, la)), Some((rsql, _))) => Some((format!("({lsql} OR {rsql})"), la)),
857            })
858        }
859    }
860}
861
862/// Compile variable-length patterns to a recursive CTE.
863fn compile_variable_length(
864    query: &GqlQuery,
865    opts: &CompileOptions,
866) -> Result<CompiledQuery, QueryError> {
867    let mut params: Vec<QueryValue> = Vec::new();
868    let mut var_to_alias: std::collections::HashMap<String, (String, VarKind)> =
869        std::collections::HashMap::new();
870
871    // For variable-length, we expect exactly: start_node -[*N..M]-> end_node.
872    // Mixed fixed+variable chains and additional trailing pattern elements are
873    // not yet supported — reject explicitly rather than silently dropping them.
874    let nodes: Vec<&NodePattern> = query.pattern.nodes().collect();
875    let edges: Vec<&EdgePattern> = query.pattern.edges().collect();
876
877    if nodes.len() != 2 || edges.len() != 1 || query.pattern.elements.len() != 3 {
878        return Err(QueryError::Unsupported(
879            "variable-length patterns must be a single start_node -[*N..M]-> end_node \
880             (mixed fixed/variable chains are not yet implemented)"
881                .into(),
882        ));
883    }
884
885    let start = &nodes[0];
886    let edge = &edges[0];
887    let end = &nodes[1];
888
889    // Synthetic observed_as_* edges join event_observations, which has no
890    // recursive path structure — reject them in variable-length patterns before
891    // attempting CTE compilation (would produce a CTE over graph_edges with an
892    // invalid relation string).
893    if edge.relations.iter().any(|r| is_synthetic(r)) {
894        return Err(QueryError::Unsupported(
895            "synthetic observed_as_* edges cannot be variable-length; \
896             use a fixed-length edge pattern instead"
897                .into(),
898        ));
899    }
900
901    // MAJ-2: depth cap — always parameterized, never injected as literal
902    let max_depth = edge.max_hops.min(MAX_DEPTH);
903    let min_depth = edge.min_hops;
904
905    // Build start-node conditions
906    let mut start_conditions: Vec<String> = vec!["s.deleted_at IS NULL".to_string()];
907    let ns_filter = namespace_filter("s", opts, &mut params);
908    if !ns_filter.is_empty() {
909        start_conditions.push(ns_filter.trim_start_matches(" AND ").to_string());
910    }
911
912    if let Some(ref kind) = start.kind {
913        params.push(QueryValue::Text(kind.clone()));
914        start_conditions.push(format!("s.kind = ?{}", params.len()));
915    }
916    if let Some(ref et) = start.entity_type {
917        params.push(QueryValue::Text(et.clone()));
918        start_conditions.push(format!("s.entity_type = ?{}", params.len()));
919    }
920    let mut start_props: Vec<_> = start.properties.iter().collect();
921    start_props.sort_by_key(|(k, _)| k.as_str());
922    for (key, val) in start_props {
923        params.push(QueryValue::Text(val.clone()));
924        if key == "name" {
925            start_conditions.push(format!("s.name = ?{} COLLATE NOCASE", params.len()));
926        } else {
927            start_conditions.push(format!(
928                "json_extract(s.properties, '$.{}') = ?{} COLLATE NOCASE",
929                key.replace('\'', "''"),
930                params.len()
931            ));
932        }
933    }
934
935    // Relation filter
936    let mut relation_condition = String::new();
937    if !edge.relations.is_empty() {
938        if edge.relations.len() == 1 {
939            params.push(QueryValue::Text(edge.relations[0].clone()));
940            relation_condition = format!(" AND e.relation = ?{}", params.len());
941        } else {
942            let placeholders: Vec<String> = edge
943                .relations
944                .iter()
945                .map(|r| {
946                    params.push(QueryValue::Text(r.clone()));
947                    format!("?{}", params.len())
948                })
949                .collect();
950            relation_condition = format!(" AND e.relation IN ({})", placeholders.join(", "));
951        }
952    }
953
954    // Edge namespace filter
955    let e_ns_filter = namespace_filter("e", opts, &mut params);
956
957    // Direction-dependent JOIN
958    let (seed_join, seed_next, recurse_join, recurse_next) = match edge.direction {
959        EdgeDirection::Out => (
960            "e.source_id = s.id",
961            "e.target_id",
962            "e.source_id = t.current_id",
963            "e.target_id",
964        ),
965        EdgeDirection::In => (
966            "e.target_id = s.id",
967            "e.source_id",
968            "e.target_id = t.current_id",
969            "e.source_id",
970        ),
971        EdgeDirection::Both => (
972            "(e.source_id = s.id OR e.target_id = s.id)",
973            "CASE WHEN e.source_id = s.id THEN e.target_id ELSE e.source_id END",
974            "(e.source_id = t.current_id OR e.target_id = t.current_id)",
975            "CASE WHEN e.source_id = t.current_id THEN e.target_id ELSE e.source_id END",
976        ),
977    };
978
979    // Build the next-intermediate-node namespace filter.
980    // This is applied in the recursive CTE member to prevent traversal through
981    // deleted or out-of-scope intermediate nodes.  Without it, a path like
982    // A -> B_deleted -> C would be returned even when B is soft-deleted.
983    let next_node_ns_filter = namespace_filter("next_node", opts, &mut params);
984
985    let max_depth_i64 = i64::try_from(max_depth)
986        .map_err(|_| QueryError::InvalidInput("max_depth exceeds i64::MAX".into()))?;
987    params.push(QueryValue::Integer(max_depth_i64));
988    let depth_param = params.len();
989
990    // End-node conditions (applied in outer WHERE). `r` is always joined
991    // unconditionally below so these references resolve regardless of whether
992    // the end variable is projected.
993    let mut end_conditions: Vec<String> = vec!["r.deleted_at IS NULL".to_string()];
994    let r_ns_filter = namespace_filter("r", opts, &mut params);
995    if !r_ns_filter.is_empty() {
996        end_conditions.push(r_ns_filter.trim_start_matches(" AND ").to_string());
997    }
998    if let Some(ref kind) = end.kind {
999        params.push(QueryValue::Text(kind.clone()));
1000        end_conditions.push(format!("r.kind = ?{}", params.len()));
1001    }
1002    if let Some(ref et) = end.entity_type {
1003        params.push(QueryValue::Text(et.clone()));
1004        end_conditions.push(format!("r.entity_type = ?{}", params.len()));
1005    }
1006    let mut end_props: Vec<_> = end.properties.iter().collect();
1007    end_props.sort_by_key(|(k, _)| k.as_str());
1008    for (key, val) in end_props {
1009        params.push(QueryValue::Text(val.clone()));
1010        if key == "name" {
1011            end_conditions.push(format!("r.name = ?{} COLLATE NOCASE", params.len()));
1012        } else {
1013            end_conditions.push(format!(
1014                "json_extract(r.properties, '$.{}') = ?{} COLLATE NOCASE",
1015                key.replace('\'', "''"),
1016                params.len()
1017            ));
1018        }
1019    }
1020
1021    // WHERE clause conditions for variable-length patterns.
1022    // OR expressions that span both start and end nodes are not supported — reject
1023    // explicitly with an actionable error message rather than silently converting OR to AND.
1024    reject_or_spanning_endpoints(&query.where_clause, start, end)?;
1025
1026    // Compile the WHERE tree preserving Or/And connectives.  After the spanning
1027    // check above we know every Or node touches at most one endpoint, so we can
1028    // safely route whole sub-trees to start_conditions or end_conditions.
1029    if let Some(where_sql) = compile_variable_length_where(
1030        &query.where_clause,
1031        start.variable.as_deref(),
1032        end.variable.as_deref(),
1033        &mut params,
1034        &mut start_conditions,
1035        &mut end_conditions,
1036    )? {
1037        // A non-None return means the expression spans no variable (WhereExpr::True
1038        // is the only such case and returns None).  This branch is unreachable given
1039        // the reject_or_spanning_endpoints guard above, but handle it safely.
1040        start_conditions.push(where_sql);
1041    }
1042
1043    // MAJ-2: min_depth is always a bound parameter, never a literal
1044    if min_depth > 0 {
1045        let min_depth_i64 = i64::try_from(min_depth)
1046            .map_err(|_| QueryError::InvalidInput("min_depth exceeds i64::MAX".into()))?;
1047        params.push(QueryValue::Integer(min_depth_i64));
1048        end_conditions.push(format!("t.depth >= ?{}", params.len()));
1049    }
1050
1051    let limit = query.limit.unwrap_or(opts.max_limit).min(opts.max_limit);
1052    let limit_i64 = i64::try_from(limit)
1053        .map_err(|_| QueryError::InvalidInput("limit exceeds i64::MAX".into()))?;
1054    params.push(QueryValue::Integer(limit_i64));
1055    let limit_param = params.len();
1056
1057    // Register variables
1058    if let Some(ref var) = start.variable {
1059        var_to_alias.insert(var.clone(), ("s".to_string(), VarKind::Node));
1060    }
1061    if let Some(ref var) = end.variable {
1062        var_to_alias.insert(var.clone(), ("r".to_string(), VarKind::Node));
1063    }
1064    if let Some(ref var) = edge.variable {
1065        var_to_alias.insert(var.clone(), ("e".to_string(), VarKind::Edge));
1066    }
1067
1068    // Build SELECT based on RETURN items
1069    let mut select_parts: Vec<String> = Vec::new();
1070    let mut has_start = false;
1071
1072    for item in &query.return_items {
1073        let var = item.variable();
1074        if let Some((_, kind)) = var_to_alias.get(var) {
1075            match item {
1076                ReturnItem::Property(_, prop) => {
1077                    let is_start = start.variable.as_deref() == Some(var);
1078                    if matches!(kind, VarKind::EventNode | VarKind::NoteNode) {
1079                        return Err(QueryError::Unsupported(
1080                            "synthetic observed_as_* edges cannot be used in variable-length \
1081                             patterns; use a fixed-length edge pattern instead"
1082                                .into(),
1083                        ));
1084                    }
1085                    if *kind == VarKind::Node {
1086                        let tbl = if is_start { "s" } else { "r" };
1087                        if is_start {
1088                            has_start = true;
1089                        }
1090                        let col = property_to_column(prop, kind)?;
1091                        select_parts.push(format!("{tbl}.{col} AS {var}_{prop}"));
1092                    } else {
1093                        let col = match prop.as_str() {
1094                            "id" => "via_edge",
1095                            "relation" => "via_relation",
1096                            "weight" => "via_weight",
1097                            _ => {
1098                                return Err(QueryError::Compile(format!(
1099                                    "unknown edge property '{prop}' in RETURN projection. \
1100                                     Valid: id, source_id, target_id, relation, weight"
1101                                )));
1102                            }
1103                        };
1104                        select_parts.push(format!("t.{col} AS {var}_{prop}"));
1105                    }
1106                }
1107                ReturnItem::Variable(_) => match kind {
1108                    VarKind::Node => {
1109                        if start.variable.as_deref() == Some(var) {
1110                            has_start = true;
1111                            select_parts.push(format!(
1112                                "s.id AS {var}_id, s.namespace AS {var}_namespace, \
1113                                 s.kind AS {var}_kind, s.entity_type AS {var}_entity_type, \
1114                                 s.name AS {var}_name, \
1115                                 s.properties AS {var}_properties, \
1116                                 s.created_at AS {var}_created_at, \
1117                                 s.updated_at AS {var}_updated_at"
1118                            ));
1119                        } else {
1120                            select_parts.push(format!(
1121                                "r.id AS {var}_id, r.namespace AS {var}_namespace, \
1122                                 r.kind AS {var}_kind, r.entity_type AS {var}_entity_type, \
1123                                 r.name AS {var}_name, \
1124                                 r.properties AS {var}_properties, \
1125                                 r.created_at AS {var}_created_at, \
1126                                 r.updated_at AS {var}_updated_at"
1127                            ));
1128                        }
1129                    }
1130                    VarKind::EventNode | VarKind::NoteNode => {
1131                        // Synthetic observed_as_* edges require a fixed-length pattern;
1132                        // variable-length recursion over the events/notes tables is not supported.
1133                        return Err(QueryError::Unsupported(
1134                            "synthetic observed_as_* edges cannot be used in variable-length \
1135                             patterns; use a fixed-length edge pattern instead"
1136                                .into(),
1137                        ));
1138                    }
1139                    VarKind::Edge => {
1140                        select_parts.push(format!(
1141                            "t.via_edge AS {var}_id, t.via_relation AS {var}_relation, \
1142                             t.via_weight AS {var}_weight"
1143                        ));
1144                    }
1145                },
1146            }
1147        } else {
1148            return Err(QueryError::Compile(format!(
1149                "unknown variable '{var}' in RETURN clause"
1150            )));
1151        }
1152    }
1153
1154    // Always include traversal metadata
1155    select_parts.push("t.depth AS _depth".to_string());
1156    select_parts.push("t.total_weight AS _total_weight".to_string());
1157
1158    // `s` is optional (only joined if the start variable is projected); `r` is
1159    // always joined because the outer WHERE always references `r.deleted_at`,
1160    // `r.namespace` (and possibly r.kind / r.properties) regardless of whether
1161    // it appears in RETURN.
1162    let join_start = if has_start {
1163        "JOIN entities s ON s.id = t.start_id"
1164    } else {
1165        ""
1166    };
1167    let join_end = "JOIN entities r ON r.id = t.current_id";
1168
1169    // Build the next-node namespace filter clause (may be empty).
1170    // Already pushed into params by namespace_filter above.
1171    let next_node_ns_and = if next_node_ns_filter.is_empty() {
1172        String::new()
1173    } else {
1174        format!(" AND {}", next_node_ns_filter.trim_start_matches(" AND "))
1175    };
1176
1177    let sql = format!(
1178        "WITH RECURSIVE traverse(start_id, current_id, depth, path, total_weight, via_edge, via_relation, via_weight) AS (\
1179             SELECT s.id, {seed_next}, 1, s.id || ',' || {seed_next}, e.weight, \
1180                    e.id, e.relation, e.weight \
1181             FROM entities s \
1182             JOIN graph_edges e ON {seed_join} AND e.deleted_at IS NULL{e_ns_filter}{relation_condition} \
1183             WHERE {start_where} \
1184             UNION ALL \
1185             SELECT t.start_id, {recurse_next}, t.depth + 1, \
1186                    t.path || ',' || {recurse_next}, \
1187                    t.total_weight + e.weight, \
1188                    e.id, e.relation, e.weight \
1189             FROM traverse t \
1190             JOIN graph_edges e ON {recurse_join} AND e.deleted_at IS NULL{e_ns_filter}{relation_condition} \
1191             JOIN entities next_node ON next_node.id = ({recurse_next}) \
1192                    AND next_node.deleted_at IS NULL{next_node_ns_and} \
1193             WHERE t.depth < ?{depth_param} \
1194               AND (',' || t.path || ',') NOT LIKE '%,' || {recurse_next} || ',%' \
1195         ) \
1196         SELECT DISTINCT {select_cols} \
1197         FROM traverse t \
1198         {join_start} {join_end} \
1199         WHERE {end_where} \
1200         ORDER BY t.depth, t.total_weight DESC, t.start_id, t.current_id \
1201         LIMIT ?{limit_param}",
1202        seed_next = seed_next,
1203        seed_join = seed_join,
1204        e_ns_filter = e_ns_filter,
1205        relation_condition = relation_condition,
1206        start_where = start_conditions.join(" AND "),
1207        recurse_next = recurse_next,
1208        recurse_join = recurse_join,
1209        next_node_ns_and = next_node_ns_and,
1210        depth_param = depth_param,
1211        select_cols = select_parts.join(", "),
1212        join_start = join_start,
1213        join_end = join_end,
1214        end_where = end_conditions.join(" AND "),
1215        limit_param = limit_param,
1216    );
1217
1218    Ok(CompiledQuery {
1219        sql,
1220        params,
1221        return_vars: query.return_items.clone(),
1222        warnings: Vec::new(),
1223    })
1224}
1225
1226#[derive(Clone, Copy, PartialEq, Eq)]
1227enum VarKind {
1228    Node,
1229    /// Node that maps to the `events` table (synthetic `observed_as_*` edge source).
1230    EventNode,
1231    /// Node that maps to the `notes` table (synthetic `observed_as_*` edge target).
1232    NoteNode,
1233    Edge,
1234}
1235
1236const NODE_COLUMNS: &[&str] = &[
1237    "id",
1238    "name",
1239    "kind",
1240    "entity_type",
1241    "namespace",
1242    "description",
1243    "properties",
1244    "created_at",
1245    "updated_at",
1246];
1247/// Columns available for projection on `notes` table nodes (synthetic edge targets).
1248const NOTE_COLUMNS: &[&str] = &[
1249    "id",
1250    "namespace",
1251    "kind",
1252    "status",
1253    "name",
1254    "content",
1255    "salience",
1256    "decay_factor",
1257    "properties",
1258    "created_at",
1259    "updated_at",
1260];
1261/// Columns available for projection on `events` table nodes (synthetic edge sources).
1262const EVENT_COLUMNS: &[&str] = &[
1263    "id",
1264    "namespace",
1265    "verb",
1266    "substrate",
1267    "actor",
1268    "kind",
1269    "outcome",
1270    "payload",
1271    "duration_us",
1272    "target_id",
1273    "session_id",
1274    "created_at",
1275];
1276const EDGE_COLUMNS: &[&str] = &["id", "source_id", "target_id", "relation", "weight"];
1277
1278fn property_to_column<'a>(prop: &'a str, kind: &VarKind) -> Result<&'a str, QueryError> {
1279    let (valid, kind_name) = match kind {
1280        VarKind::Node => (NODE_COLUMNS, "node"),
1281        VarKind::NoteNode => (NOTE_COLUMNS, "note"),
1282        VarKind::EventNode => (EVENT_COLUMNS, "event"),
1283        VarKind::Edge => (EDGE_COLUMNS, "edge"),
1284    };
1285    if valid.contains(&prop) {
1286        Ok(prop)
1287    } else {
1288        Err(QueryError::Compile(format!(
1289            "unknown {kind_name} property '{prop}' in RETURN projection. \
1290             Valid: {}",
1291            valid.join(", ")
1292        )))
1293    }
1294}
1295
1296// INLINE TEST JUSTIFICATION: Tests access private helpers (compile_fixed_length,
1297// compile_variable_length, compile_single_condition, compile_var_len_condition) and
1298// internal types (VarKind) via pub(crate) visibility; moving to crates/khive-query/tests/
1299// would require making those items pub, which would widen the public API surface.
1300#[cfg(test)]
1301mod tests {
1302    use super::*;
1303    use crate::parsers::gql;
1304
1305    fn opts() -> CompileOptions {
1306        CompileOptions::default()
1307    }
1308
1309    fn scoped(namespace: &str) -> CompileOptions {
1310        CompileOptions {
1311            scopes: vec![namespace.to_string()],
1312            max_limit: 500,
1313        }
1314    }
1315
1316    #[test]
1317    fn fixed_length_basic() {
1318        let q =
1319            gql::parse("MATCH (a:concept)-[e:introduced_by]->(b:paper) RETURN a, e, b LIMIT 10")
1320                .unwrap();
1321        let compiled = compile(&q, &opts()).unwrap();
1322        assert!(compiled.sql.contains("JOIN graph_edges"));
1323        assert!(compiled.sql.contains("LIMIT"));
1324        assert_eq!(
1325            compiled.return_vars,
1326            vec![
1327                ReturnItem::Variable("a".into()),
1328                ReturnItem::Variable("e".into()),
1329                ReturnItem::Variable("b".into()),
1330            ]
1331        );
1332        // No recursive CTE for fixed-length
1333        assert!(!compiled.sql.contains("WITH RECURSIVE"));
1334    }
1335
1336    #[test]
1337    fn namespace_scoping_injected() {
1338        // Namespace must come from opts, never from the query
1339        let q =
1340            gql::parse("MATCH (a:concept)-[e:introduced_by]->(b:paper) RETURN a LIMIT 5").unwrap();
1341        let compiled = compile(&q, &scoped("research")).unwrap();
1342        assert!(compiled.sql.contains("namespace"));
1343        // The namespace value must appear as a parameter, not a literal in SQL
1344        let has_ns_param = compiled
1345            .params
1346            .iter()
1347            .any(|p| matches!(p, QueryValue::Text(s) if s == "research"));
1348        assert!(has_ns_param, "namespace must be a bound parameter");
1349    }
1350
1351    #[test]
1352    fn edge_property_whitelist_rejects_unknown() {
1353        // MAJ-1: only 'relation' and 'weight' are queryable edge properties
1354        let q = gql::parse("MATCH (a)-[e:introduced_by]->(b) WHERE e.source_id = 'x' RETURN a")
1355            .unwrap();
1356        let result = compile(&q, &opts());
1357        assert!(result.is_err());
1358        let err = result.unwrap_err().to_string();
1359        assert!(
1360            err.contains("source_id") || err.contains("not queryable"),
1361            "error: {err}"
1362        );
1363    }
1364
1365    #[test]
1366    fn edge_property_relation_allowed() {
1367        let q = gql::parse("MATCH (a)-[e]->(b) WHERE e.relation = 'extends' RETURN a").unwrap();
1368        let result = compile(&q, &opts());
1369        assert!(
1370            result.is_ok(),
1371            "relation should be allowed: {:?}",
1372            result.err()
1373        );
1374    }
1375
1376    #[test]
1377    fn edge_property_weight_allowed() {
1378        let q = gql::parse("MATCH (a)-[e]->(b) WHERE e.weight > 0.5 RETURN a").unwrap();
1379        let result = compile(&q, &opts());
1380        assert!(
1381            result.is_ok(),
1382            "weight should be allowed: {:?}",
1383            result.err()
1384        );
1385    }
1386
1387    #[test]
1388    fn variable_length_uses_cte() {
1389        let q =
1390            gql::parse("MATCH (a {name: 'LoRA'})-[:extends*1..3]->(b) RETURN b LIMIT 20").unwrap();
1391        let compiled = compile(&q, &opts()).unwrap();
1392        assert!(compiled.sql.contains("WITH RECURSIVE"));
1393        assert!(compiled.sql.contains("traverse"));
1394    }
1395
1396    #[test]
1397    fn depth_cap_at_ten_rejects_above_max() {
1398        // Exceeding MAX_DEPTH is an InvalidInput error at validation time —
1399        // the compiler never sees a query with depth > 10.
1400        let q = gql::parse("MATCH (a)-[:extends*1..50]->(b) RETURN b").unwrap();
1401        let err = compile(&q, &opts()).unwrap_err();
1402        assert!(
1403            matches!(err, QueryError::InvalidInput(_)),
1404            "expected InvalidInput for depth > 10, got {err:?}"
1405        );
1406    }
1407
1408    #[test]
1409    fn depth_within_cap_compiles() {
1410        // depth *1..10 is at the cap — must compile successfully.
1411        let q = gql::parse("MATCH (a)-[:extends*1..10]->(b) RETURN b").unwrap();
1412        let compiled = compile(&q, &opts()).unwrap();
1413        assert!(compiled.sql.contains("WITH RECURSIVE"));
1414        // The depth parameter must equal 10
1415        let depth_val = compiled.params.iter().find_map(|p| {
1416            if let QueryValue::Integer(n) = p {
1417                Some(*n)
1418            } else {
1419                None
1420            }
1421        });
1422        assert_eq!(depth_val, Some(10), "depth param should be 10");
1423    }
1424
1425    #[test]
1426    fn limit_capped_by_max_limit() {
1427        // Query requests 1000, max_limit is 500 — result should be 500
1428        let q = gql::parse("MATCH (a:concept)-[e]->(b) RETURN a LIMIT 1000").unwrap();
1429        let compiled = compile(&q, &opts()).unwrap();
1430        let limit_param = compiled.params.last().unwrap();
1431        assert!(
1432            matches!(limit_param, QueryValue::Integer(500)),
1433            "expected Integer(500), got {limit_param:?}"
1434        );
1435    }
1436
1437    #[test]
1438    fn compile_rejects_unknown_relation() {
1439        let q = gql::parse("MATCH (a)-[:not_a_relation]->(b) RETURN a").unwrap();
1440        let err = compile(&q, &opts()).unwrap_err();
1441        let msg = err.to_string();
1442        assert!(msg.contains("not_a_relation"), "msg: {msg}");
1443    }
1444
1445    #[test]
1446    fn compile_unknown_kind_passes_through() {
1447        // Pack-agnostic: any string is accepted as an entity kind at the query layer.
1448        // Validation is a pack-handler concern.
1449        let q = gql::parse("MATCH (a:gizmo)-[:extends]->(b) RETURN a").unwrap();
1450        let compiled = compile(&q, &opts()).unwrap();
1451        let has_gizmo = compiled
1452            .params
1453            .iter()
1454            .any(|p| matches!(p, QueryValue::Text(s) if s == "gizmo"));
1455        assert!(
1456            has_gizmo,
1457            "pack-agnostic: unknown kind must pass through into SQL params"
1458        );
1459    }
1460
1461    #[test]
1462    fn compile_kind_passes_through_unchanged() {
1463        // Pack-agnostic: 'paper' is no longer normalized to 'document' at the query layer.
1464        // The string passes through as-is.
1465        let q =
1466            gql::parse("MATCH (a:paper)-[:introduced_by]->(b:concept) RETURN a LIMIT 1").unwrap();
1467        let compiled = compile(&q, &opts()).unwrap();
1468        let has_paper = compiled
1469            .params
1470            .iter()
1471            .any(|p| matches!(p, QueryValue::Text(s) if s == "paper"));
1472        assert!(
1473            has_paper,
1474            "kind 'paper' must pass through unchanged into SQL params"
1475        );
1476    }
1477
1478    #[test]
1479    fn compile_rejects_namespace_in_where() {
1480        let q =
1481            gql::parse("MATCH (a:concept)-[:extends]->(b) WHERE a.namespace = 'other' RETURN a")
1482                .unwrap();
1483        let err = compile(&q, &opts()).unwrap_err();
1484        assert!(err.to_string().contains("namespace"), "msg: {err}");
1485    }
1486
1487    #[test]
1488    fn compile_rejects_unknown_relation_in_where() {
1489        let q = gql::parse("MATCH (a)-[e:extends]->(b) WHERE e.relation = 'related_to' RETURN a")
1490            .unwrap();
1491        let err = compile(&q, &opts()).unwrap_err();
1492        assert!(err.to_string().contains("related_to"), "msg: {err}");
1493    }
1494
1495    #[test]
1496    fn compile_kind_in_where_passes_through_unchanged() {
1497        // Pack-agnostic: kind strings in WHERE conditions pass through as-is.
1498        let q = gql::parse("MATCH (a)-[:extends]->(b) WHERE a.kind = 'paper' RETURN a").unwrap();
1499        let compiled = compile(&q, &opts()).unwrap();
1500        let has_paper = compiled
1501            .params
1502            .iter()
1503            .any(|p| matches!(p, QueryValue::Text(s) if s == "paper"));
1504        assert!(
1505            has_paper,
1506            "kind 'paper' must pass through unchanged into SQL params"
1507        );
1508    }
1509
1510    #[test]
1511    fn variable_length_return_start_only_joins_end_entity() {
1512        // Even when only the start variable is projected, the outer query
1513        // references `r.deleted_at` / `r.namespace`, so entities r must be
1514        // joined unconditionally.
1515        let q = gql::parse("MATCH (a:concept)-[:extends*1..3]->(b) RETURN a LIMIT 10").unwrap();
1516        let compiled = compile(&q, &opts()).unwrap();
1517        assert!(
1518            compiled.sql.contains("JOIN entities r"),
1519            "entities r must always be joined when r.* conditions are emitted; sql: {}",
1520            compiled.sql
1521        );
1522    }
1523
1524    #[test]
1525    fn variable_length_trailing_pattern_unsupported() {
1526        let q = gql::parse("MATCH (a)-[:extends*1..3]->(b)-[:implements]->(c) RETURN b").unwrap();
1527        let err = compile(&q, &opts()).unwrap_err();
1528        assert!(
1529            matches!(err, QueryError::Unsupported(_)),
1530            "expected Unsupported, got {err:?}"
1531        );
1532    }
1533
1534    #[test]
1535    fn variable_length_mixed_chain_unsupported() {
1536        // Mixed fixed + variable in one chain — has_variable_length() triggers
1537        // the variable-length path, which must reject because edges.len() > 1.
1538        let q = gql::parse("MATCH (a)-[:extends]->(b)-[:implements*1..2]->(c) RETURN c").unwrap();
1539        let err = compile(&q, &opts()).unwrap_err();
1540        assert!(matches!(err, QueryError::Unsupported(_)), "got {err:?}");
1541    }
1542
1543    #[test]
1544    fn sparql_star_rejected_as_unsupported() {
1545        use crate::parsers::sparql;
1546        let err = sparql::parse("SELECT ?a ?b WHERE { ?a :extends* ?b . }").unwrap_err();
1547        assert!(matches!(err, QueryError::Unsupported(_)), "got {err:?}");
1548    }
1549
1550    /// Regression guard for ISSUE #231: SPARQL subject→predicate→object direction.
1551    /// `?a :extends ?b` must bind ?a to source_id and ?b to target_id, not swapped.
1552    #[test]
1553    fn sparql_subject_object_direction_compiles_outbound() {
1554        use crate::parsers::sparql;
1555
1556        let q = sparql::parse("SELECT ?a ?b WHERE { ?a :extends ?b . }").unwrap();
1557        let compiled = compile(&q, &opts()).unwrap();
1558
1559        assert!(
1560            compiled
1561                .sql
1562                .contains("JOIN graph_edges e0 ON e0.source_id = n0.id"),
1563            "SPARQL subject must bind graph_edges.source_id; sql: {}",
1564            compiled.sql
1565        );
1566        assert!(
1567            compiled
1568                .sql
1569                .contains("JOIN entities n1 ON n1.id = e0.target_id"),
1570            "SPARQL object must bind graph_edges.target_id; sql: {}",
1571            compiled.sql
1572        );
1573        assert!(
1574            compiled.sql.contains("e0.relation = ?1"),
1575            "SPARQL predicate must bind graph_edges.relation; sql: {}",
1576            compiled.sql
1577        );
1578    }
1579
1580    #[test]
1581    fn return_property_projection_compiles() {
1582        let q =
1583            gql::parse("MATCH (a:concept)-[e:extends]->(b:concept) RETURN a.name, b.name LIMIT 5")
1584                .unwrap();
1585        let compiled = compile(&q, &opts()).unwrap();
1586        // Node aliases are n0, n1; the SQL uses `alias.col AS var_prop`
1587        assert!(
1588            compiled.sql.contains(".name AS a_name"),
1589            "sql: {}",
1590            compiled.sql
1591        );
1592        assert!(
1593            compiled.sql.contains(".name AS b_name"),
1594            "sql: {}",
1595            compiled.sql
1596        );
1597        assert!(
1598            !compiled.sql.contains("a_kind"),
1599            "should not emit full node columns"
1600        );
1601    }
1602
1603    #[test]
1604    fn return_unknown_node_property_rejected() {
1605        let q = gql::parse("MATCH (a:concept)-[:extends]->(b) RETURN a.domain LIMIT 5").unwrap();
1606        let err = compile(&q, &opts()).unwrap_err();
1607        assert!(
1608            matches!(err, QueryError::Compile(ref msg) if msg.contains("unknown node property 'domain'")),
1609            "got {err:?}"
1610        );
1611    }
1612
1613    #[test]
1614    fn return_unknown_edge_property_rejected() {
1615        let q = gql::parse("MATCH (a)-[e:extends]->(b) RETURN e.label LIMIT 5").unwrap();
1616        let err = compile(&q, &opts()).unwrap_err();
1617        assert!(
1618            matches!(err, QueryError::Compile(ref msg) if msg.contains("unknown edge property 'label'")),
1619            "got {err:?}"
1620        );
1621    }
1622
1623    #[test]
1624    fn return_valid_edge_property_compiles() {
1625        let q =
1626            gql::parse("MATCH (a)-[e:extends]->(b) RETURN e.relation, e.weight LIMIT 5").unwrap();
1627        let compiled = compile(&q, &opts()).unwrap();
1628        // Edge alias is e0; SQL: `e0.relation AS e_relation`
1629        assert!(
1630            compiled.sql.contains(".relation AS e_relation"),
1631            "sql: {}",
1632            compiled.sql
1633        );
1634        assert!(
1635            compiled.sql.contains(".weight AS e_weight"),
1636            "sql: {}",
1637            compiled.sql
1638        );
1639    }
1640
1641    #[test]
1642    fn entity_type_compiles_as_direct_column_not_json_extract() {
1643        // entity_type in a NodePattern must become `alias.entity_type = ?N` in the WHERE
1644        // clause — a direct column reference, not json_extract from the properties blob.
1645        let q = gql::parse("MATCH (n:document {entity_type: 'paper'})-[:extends]->(m) RETURN n")
1646            .unwrap();
1647        let compiled = compile(&q, &opts()).unwrap();
1648        assert!(
1649            compiled.sql.contains(".entity_type = ?"),
1650            "entity_type must compile to a direct column comparison; sql: {}",
1651            compiled.sql
1652        );
1653        assert!(
1654            !compiled.sql.contains("json_extract"),
1655            "entity_type must NOT use json_extract; sql: {}",
1656            compiled.sql
1657        );
1658        let has_paper_param = compiled
1659            .params
1660            .iter()
1661            .any(|p| matches!(p, QueryValue::Text(s) if s == "paper"));
1662        assert!(
1663            has_paper_param,
1664            "entity_type value 'paper' must appear as a bound parameter"
1665        );
1666    }
1667
1668    // --- OR support in WHERE clause ---
1669
1670    #[test]
1671    fn where_or_compiles_to_sql_or() {
1672        let q = gql::parse(
1673            "MATCH (a:concept)-[e:extends]->(b) WHERE a.name = 'LoRA' OR a.name = 'QLoRA' RETURN a",
1674        )
1675        .unwrap();
1676        let compiled = compile(&q, &opts()).unwrap();
1677        assert!(
1678            compiled.sql.contains(" OR "),
1679            "WHERE OR must produce SQL OR; sql: {}",
1680            compiled.sql
1681        );
1682        let has_lora = compiled
1683            .params
1684            .iter()
1685            .any(|p| matches!(p, QueryValue::Text(s) if s == "LoRA"));
1686        let has_qlora = compiled
1687            .params
1688            .iter()
1689            .any(|p| matches!(p, QueryValue::Text(s) if s == "QLoRA"));
1690        assert!(has_lora && has_qlora, "both OR values must be bound params");
1691    }
1692
1693    #[test]
1694    fn where_and_or_precedence() {
1695        // `a AND b OR c` should compile as `(a AND b) OR c`
1696        let q = gql::parse(
1697            "MATCH (a:concept)-[e:extends]->(b) WHERE a.name = 'X' AND a.kind = 'concept' OR b.kind = 'project' RETURN a"
1698        ).unwrap();
1699        let compiled = compile(&q, &opts()).unwrap();
1700        // The SQL should contain an OR at the outer level wrapping the AND group
1701        assert!(
1702            compiled.sql.contains(" OR "),
1703            "expected OR in sql; sql: {}",
1704            compiled.sql
1705        );
1706    }
1707
1708    // --- event_observations synthetic edge support ---
1709
1710    #[test]
1711    fn synthetic_edge_joins_event_observations() {
1712        let q = gql::parse("MATCH (ev)-[:observed_as_selected]->(m:memory) RETURN ev, m").unwrap();
1713        let compiled = compile(&q, &opts()).unwrap();
1714        assert!(
1715            compiled.sql.contains("event_observations"),
1716            "synthetic edge must join event_observations; sql: {}",
1717            compiled.sql
1718        );
1719        assert!(
1720            !compiled.sql.contains("graph_edges"),
1721            "synthetic edge must NOT join graph_edges; sql: {}",
1722            compiled.sql
1723        );
1724        let has_role_param = compiled
1725            .params
1726            .iter()
1727            .any(|p| matches!(p, QueryValue::Text(s) if s == "selected"));
1728        assert!(has_role_param, "role 'selected' must be a bound parameter");
1729    }
1730
1731    // CRIT-1 regression: event source node must bind to `events` table, not `entities`.
1732    // Previously `FROM entities n0 JOIN event_observations e0 ON e0.event_id = n0.id`
1733    // was emitted — IDs are disjoint so every query returned zero rows.
1734    #[test]
1735    fn synthetic_edge_event_source_binds_events_table() {
1736        let q = gql::parse("MATCH (ev)-[:observed_as_selected]->(m:memory) RETURN ev, m").unwrap();
1737        let compiled = compile(&q, &opts()).unwrap();
1738        assert!(
1739            compiled.sql.contains("FROM events "),
1740            "CRIT-1: event source must come FROM events table, not entities; sql: {}",
1741            compiled.sql
1742        );
1743        assert!(
1744            !compiled
1745                .sql
1746                .starts_with("SELECT * FROM entities n0 JOIN event_observations"),
1747            "CRIT-1: must not join events via entities table; sql: {}",
1748            compiled.sql
1749        );
1750    }
1751
1752    #[test]
1753    fn synthetic_edge_event_observation_join_uses_events_id() {
1754        // The JOIN must be `event_observations.event_id = events_alias.id`,
1755        // not `event_observations.event_id = entities_alias.id`.
1756        let q = gql::parse("MATCH (ev)-[:observed_as_selected]->(m) RETURN m").unwrap();
1757        let compiled = compile(&q, &opts()).unwrap();
1758        // The event alias is n0; the join must reference n0 against `events` table.
1759        assert!(
1760            compiled
1761                .sql
1762                .contains("JOIN event_observations e0 ON e0.event_id = n0.id"),
1763            "CRIT-1: event_observations must join on events.id (n0 is now events); sql: {}",
1764            compiled.sql
1765        );
1766    }
1767
1768    #[test]
1769    fn synthetic_edge_event_node_projects_event_columns() {
1770        // The event variable in RETURN must select event-table columns (verb, outcome, …),
1771        // not entity columns (name, entity_type, properties, …).
1772        let q = gql::parse("MATCH (ev)-[:observed_as_selected]->(m) RETURN ev").unwrap();
1773        let compiled = compile(&q, &opts()).unwrap();
1774        assert!(
1775            compiled.sql.contains("ev_verb"),
1776            "CRIT-1: event variable must project verb column; sql: {}",
1777            compiled.sql
1778        );
1779        assert!(
1780            compiled.sql.contains("ev_outcome"),
1781            "CRIT-1: event variable must project outcome column; sql: {}",
1782            compiled.sql
1783        );
1784        assert!(
1785            !compiled.sql.contains("ev_name,") && !compiled.sql.contains("ev_name "),
1786            "CRIT-1: event variable must NOT project entity name column; sql: {}",
1787            compiled.sql
1788        );
1789        assert!(
1790            !compiled.sql.contains("ev_properties"),
1791            "CRIT-1: event variable must NOT project entity properties column; sql: {}",
1792            compiled.sql
1793        );
1794    }
1795
1796    #[test]
1797    fn synthetic_edge_namespace_filter_on_events_table() {
1798        // MIN-2: when scoped, the namespace filter must target the events table
1799        // (which has a namespace column) — not rely on entities indirection.
1800        let q = gql::parse("MATCH (ev)-[:observed_as_selected]->(m) RETURN m").unwrap();
1801        let compiled = compile(&q, &scoped("test-ns")).unwrap();
1802        // Both the event alias (n0, now from `events`) and the target alias (n1, from `entities`)
1803        // must have namespace filters.
1804        let ns_count = compiled
1805            .params
1806            .iter()
1807            .filter(|p| matches!(p, QueryValue::Text(s) if s == "test-ns"))
1808            .count();
1809        assert!(
1810            ns_count >= 2,
1811            "MIN-2: namespace must be filtered on both events and target; params: {:?}",
1812            compiled.params
1813        );
1814    }
1815
1816    #[test]
1817    fn synthetic_edge_candidate_role() {
1818        let q = gql::parse("MATCH (ev)-[:observed_as_candidate]->(m) RETURN ev, m").unwrap();
1819        let compiled = compile(&q, &opts()).unwrap();
1820        assert!(
1821            compiled.sql.contains("event_observations"),
1822            "sql: {}",
1823            compiled.sql
1824        );
1825        let has_candidate = compiled
1826            .params
1827            .iter()
1828            .any(|p| matches!(p, QueryValue::Text(s) if s == "candidate"));
1829        assert!(has_candidate, "role 'candidate' must be bound");
1830    }
1831
1832    #[test]
1833    fn synthetic_edge_multi_role() {
1834        // Multiple observed_as_* relations compile to a role IN (...) predicate.
1835        let q =
1836            gql::parse("MATCH (ev)-[:observed_as_candidate|observed_as_selected]->(m) RETURN m")
1837                .unwrap();
1838        let compiled = compile(&q, &opts()).unwrap();
1839        assert!(
1840            compiled.sql.contains("event_observations"),
1841            "sql: {}",
1842            compiled.sql
1843        );
1844        assert!(
1845            compiled.sql.contains("IN"),
1846            "multi-role must use IN; sql: {}",
1847            compiled.sql
1848        );
1849    }
1850
1851    #[test]
1852    fn mixed_synthetic_and_canonical_rejected() {
1853        let q = gql::parse("MATCH (ev)-[:observed_as_selected|extends]->(m) RETURN m").unwrap();
1854        let err = compile(&q, &opts()).unwrap_err();
1855        assert!(
1856            matches!(err, QueryError::Compile(_)),
1857            "mixed synthetic+canonical must be rejected; got {err:?}"
1858        );
1859    }
1860
1861    #[test]
1862    fn synthetic_edge_inbound_rejected() {
1863        let q = gql::parse("MATCH (m)<-[:observed_as_selected]-(ev) RETURN m").unwrap();
1864        let err = compile(&q, &opts()).unwrap_err();
1865        assert!(
1866            matches!(err, QueryError::Compile(_)),
1867            "inbound synthetic edge must be rejected; got {err:?}"
1868        );
1869    }
1870
1871    // --- MAJ-1: OR spanning both endpoints in variable-length patterns must be rejected ---
1872
1873    #[test]
1874    fn variable_length_or_across_endpoints_rejected() {
1875        // MAJ-1: `WHERE a.name='X' OR b.name='Y'` in a variable-length pattern must be
1876        // rejected with Unsupported — not silently compiled to AND.
1877        let q = gql::parse(
1878            "MATCH (a)-[:extends*1..3]->(b) WHERE a.name = 'X' OR b.name = 'Y' RETURN a",
1879        )
1880        .unwrap();
1881        let result = compile(&q, &opts());
1882        assert!(
1883            matches!(result, Err(QueryError::Unsupported(_))),
1884            "MAJ-1: OR spanning both endpoints must return Unsupported; got {result:?}"
1885        );
1886        let err_msg = result.unwrap_err().to_string();
1887        assert!(
1888            err_msg.contains("separate queries") || err_msg.contains("one endpoint"),
1889            "error must be actionable; got: {err_msg}"
1890        );
1891    }
1892
1893    #[test]
1894    fn variable_length_or_single_endpoint_still_works() {
1895        // OR within a single endpoint (same alias) must still compile successfully.
1896        let q = gql::parse(
1897            "MATCH (a)-[:extends*1..3]->(b) WHERE a.name = 'X' OR a.name = 'Y' RETURN a",
1898        )
1899        .unwrap();
1900        let result = compile(&q, &opts());
1901        assert!(
1902            result.is_ok(),
1903            "single-endpoint OR must compile; got {result:?}"
1904        );
1905    }
1906
1907    #[test]
1908    fn variable_length_and_across_endpoints_still_works() {
1909        // AND across endpoints must still compile (the existing behavior is correct for AND).
1910        let q = gql::parse(
1911            "MATCH (a)-[:extends*1..3]->(b) WHERE a.name = 'X' AND b.name = 'Y' RETURN a",
1912        )
1913        .unwrap();
1914        let result = compile(&q, &opts());
1915        assert!(
1916            result.is_ok(),
1917            "AND across endpoints must compile; got {result:?}"
1918        );
1919    }
1920
1921    // --- Regression tests for #379: variable-length WHERE OR must not flatten to AND ---
1922
1923    #[test]
1924    fn test_variable_length_or_compiles_to_or() {
1925        // #379: MATCH (a)-[*1..3 WHERE p1 OR p2]-> in GQL surface maps to a single-endpoint
1926        // OR in the WHERE clause.  The compiled SQL must contain OR, not AND.
1927        let q = gql::parse(
1928            "MATCH (a)-[:extends*1..3]->(b) WHERE a.name = 'LoRA' OR a.name = 'QLoRA' RETURN b",
1929        )
1930        .unwrap();
1931        let compiled = compile(&q, &opts()).unwrap();
1932        // The start_conditions list must contain an OR fragment, not two AND-joined conditions.
1933        assert!(
1934            compiled.sql.contains(" OR "),
1935            "#379: variable-length single-endpoint OR must produce SQL OR; sql: {}",
1936            compiled.sql
1937        );
1938        // Both values must appear as bound parameters.
1939        let has_lora = compiled
1940            .params
1941            .iter()
1942            .any(|p| matches!(p, QueryValue::Text(s) if s == "LoRA"));
1943        let has_qlora = compiled
1944            .params
1945            .iter()
1946            .any(|p| matches!(p, QueryValue::Text(s) if s == "QLoRA"));
1947        assert!(has_lora && has_qlora, "both OR values must be bound params");
1948    }
1949
1950    #[test]
1951    fn test_single_endpoint_or_at_depth_1() {
1952        // #379: single-hop pattern with single-endpoint OR in WHERE.
1953        // The OR must appear in the compiled SQL (not silently become AND).
1954        let q = gql::parse(
1955            "MATCH (a)-[r:extends]->(b) WHERE r.weight > 0.5 OR r.relation = 'extends' RETURN a",
1956        )
1957        .unwrap();
1958        let compiled = compile(&q, &opts()).unwrap();
1959        assert!(
1960            compiled.sql.contains(" OR "),
1961            "#379: fixed-length single-endpoint OR must produce SQL OR; sql: {}",
1962            compiled.sql
1963        );
1964        let has_extends = compiled
1965            .params
1966            .iter()
1967            .any(|p| matches!(p, QueryValue::Text(s) if s == "extends"));
1968        assert!(
1969            has_extends,
1970            "relation value 'extends' must be a bound param"
1971        );
1972    }
1973
1974    #[test]
1975    fn test_and_still_works() {
1976        // #379: regression guard — simple WHERE p1 AND p2 must still emit AND.
1977        let q = gql::parse(
1978            "MATCH (a)-[:extends*1..3]->(b) WHERE a.name = 'LoRA' AND a.kind = 'concept' RETURN b",
1979        )
1980        .unwrap();
1981        let compiled = compile(&q, &opts()).unwrap();
1982        // The SQL must not contain a bare " OR " from the AND expression.
1983        assert!(
1984            !compiled.sql.contains(" OR "),
1985            "#379: AND must not produce OR; sql: {}",
1986            compiled.sql
1987        );
1988        let has_lora = compiled
1989            .params
1990            .iter()
1991            .any(|p| matches!(p, QueryValue::Text(s) if s == "LoRA"));
1992        let has_concept = compiled
1993            .params
1994            .iter()
1995            .any(|p| matches!(p, QueryValue::Text(s) if s == "concept"));
1996        assert!(
1997            has_lora && has_concept,
1998            "both AND values must be bound params"
1999        );
2000    }
2001
2002    // --- Regression tests for P0/P1 correctness fixes ---
2003
2004    /// max_limit overflow: usize::MAX as i64 == -1 on 64-bit, defeating the cap.
2005    #[test]
2006    fn max_limit_overflow_returns_error() {
2007        let q = gql::parse("MATCH (a)-[:extends]->(b) RETURN a").unwrap();
2008        let opts = CompileOptions {
2009            scopes: vec![],
2010            max_limit: usize::MAX,
2011        };
2012        // On 64-bit: usize::MAX > i64::MAX, so try_from must return Err.
2013        // On 32-bit: usize::MAX == u32::MAX which fits in i64, so this may succeed —
2014        // either way we must not produce a negative limit.
2015        let result = compile(&q, &opts);
2016        match result {
2017            Err(QueryError::InvalidInput(_)) => {
2018                // Expected on 64-bit: overflow detected, error returned.
2019            }
2020            Ok(compiled) => {
2021                // On 32-bit: limit fits in i64 — verify it is non-negative.
2022                let limit_param = compiled.params.last().unwrap();
2023                assert!(
2024                    matches!(limit_param, QueryValue::Integer(n) if *n >= 0),
2025                    "limit must never be negative; got {limit_param:?}"
2026                );
2027            }
2028            Err(e) => panic!("unexpected error: {e:?}"),
2029        }
2030    }
2031
2032    /// max_limit=0 with no query limit: query limit defaults to 0, no crash.
2033    #[test]
2034    fn max_limit_zero_compiles() {
2035        let q = gql::parse("MATCH (a)-[:extends]->(b) RETURN a").unwrap();
2036        let opts = CompileOptions {
2037            scopes: vec![],
2038            max_limit: 0,
2039        };
2040        let compiled = compile(&q, &opts).unwrap();
2041        let limit_param = compiled.params.last().unwrap();
2042        assert!(
2043            matches!(limit_param, QueryValue::Integer(0)),
2044            "max_limit=0 should produce LIMIT 0; got {limit_param:?}"
2045        );
2046    }
2047
2048    /// Variable-length synthetic edges must be rejected.
2049    #[test]
2050    fn variable_length_synthetic_edge_rejected() {
2051        // observed_as_selected*1..3 must be rejected — the recursive CTE targets
2052        // graph_edges, which has no event_observations data.
2053        let q = gql::parse("MATCH (ev)-[:observed_as_selected*1..3]->(m) RETURN m").unwrap();
2054        let err = compile(&q, &opts()).unwrap_err();
2055        assert!(
2056            matches!(err, QueryError::Unsupported(_)),
2057            "variable-length synthetic edge must return Unsupported; got {err:?}"
2058        );
2059        assert!(
2060            err.to_string().contains("synthetic") || err.to_string().contains("observed_as"),
2061            "error should mention synthetic edges: {err}"
2062        );
2063    }
2064
2065    /// Variable-length traversal must not pass through deleted intermediate nodes.
2066    /// The compiled SQL must join entities for the next node in the recursive member.
2067    #[test]
2068    fn variable_length_recursive_member_joins_next_node_for_deleted_filter() {
2069        let q = gql::parse("MATCH (a)-[:extends*1..3]->(b) RETURN b").unwrap();
2070        let compiled = compile(&q, &opts()).unwrap();
2071        // The recursive CTE member must join next_node to filter deleted intermediates.
2072        assert!(
2073            compiled.sql.contains("JOIN entities next_node"),
2074            "recursive CTE must join entities next_node for deleted-intermediate filtering; sql: {}",
2075            compiled.sql
2076        );
2077        assert!(
2078            compiled.sql.contains("next_node.deleted_at IS NULL"),
2079            "recursive CTE must filter next_node.deleted_at IS NULL; sql: {}",
2080            compiled.sql
2081        );
2082    }
2083
2084    /// Variable-length traversal with namespace scope: the next_node join must
2085    /// also apply the namespace filter to prevent namespace-crossing intermediates.
2086    #[test]
2087    fn variable_length_recursive_member_namespace_scopes_intermediates() {
2088        let q = gql::parse("MATCH (a)-[:extends*1..3]->(b) RETURN b").unwrap();
2089        let compiled = compile(&q, &scoped("test-ns")).unwrap();
2090        // The next_node join must include a namespace condition.
2091        assert!(
2092            compiled.sql.contains("next_node.namespace"),
2093            "recursive CTE next_node join must filter namespace; sql: {}",
2094            compiled.sql
2095        );
2096    }
2097
2098    /// Public AST panic: compile must return an error for a malformed AST instead
2099    /// of panicking with an out-of-bounds index.
2100    #[test]
2101    fn compile_malformed_ast_returns_error_not_panic() {
2102        use crate::ast::{EdgeDirection, EdgePattern, GqlQuery, MatchPattern, PatternElement};
2103        // An AST that starts with an Edge (no leading Node) is malformed.
2104        let q = GqlQuery {
2105            pattern: MatchPattern {
2106                elements: vec![PatternElement::Edge(EdgePattern {
2107                    variable: None,
2108                    relations: vec!["extends".to_string()],
2109                    direction: EdgeDirection::Out,
2110                    min_hops: 1,
2111                    max_hops: 1,
2112                })],
2113            },
2114            where_clause: WhereExpr::True,
2115            return_items: vec![],
2116            limit: None,
2117        };
2118        let result = compile(&q, &opts());
2119        assert!(
2120            result.is_err(),
2121            "malformed AST (starts with Edge) must return error, not panic"
2122        );
2123    }
2124
2125    /// GQL edge pattern suffix fix: `(a)-[e:extends](b)` must be rejected because
2126    /// the `-` suffix after `]` is required.
2127    #[test]
2128    fn edge_pattern_without_suffix_dash_rejected() {
2129        let result = gql::parse("MATCH (a)-[e:extends](b) RETURN a");
2130        assert!(
2131            result.is_err(),
2132            "edge pattern without suffix '-' must be rejected as a parse error"
2133        );
2134    }
2135
2136    /// Duplicate inline property rejection.
2137    #[test]
2138    fn duplicate_inline_property_rejected() {
2139        let result = gql::parse("MATCH (n {name: 'A', name: 'B'}) RETURN n");
2140        assert!(
2141            result.is_err(),
2142            "duplicate property 'name' in node props must be rejected"
2143        );
2144        let err = result.unwrap_err().to_string();
2145        assert!(
2146            err.contains("duplicate") || err.contains("name"),
2147            "error should mention duplicate or key name: {err}"
2148        );
2149    }
2150
2151    /// Unknown synthetic relation must be rejected at validation.
2152    #[test]
2153    fn unknown_synthetic_relation_rejected_at_compile() {
2154        let q = gql::parse("MATCH (a)-[:observed_as_bogus]->(b) RETURN a").unwrap();
2155        let err = compile(&q, &opts()).unwrap_err();
2156        assert!(
2157            matches!(err, QueryError::Validation(_)),
2158            "unknown synthetic relation must return Validation error; got {err:?}"
2159        );
2160    }
2161}