Skip to main content

khive_query/compilers/
sql.rs

1//! Compile GQL AST to parameterized SQL.
2//!
3//! Two compilation paths:
4//! - Fixed-length patterns (all edges *1..1) → JOIN chain
5//! - Variable-length patterns (any edge *N..M where M>1) → recursive CTE
6//!
7//! Synthetic edge paths (ADR-041):
8//! - Relations prefixed `observed_as_*` join against `event_observations`, not `graph_edges`.
9//!
10//! Security invariants (MAJ-1/MAJ-2/MAJ-3 from critic review):
11//! - Namespace injection: WHERE clause always comes from CompileOptions.scopes, never the query.
12//! - Edge property whitelist: only `relation` and `weight` are queryable edge columns.
13//! - Depth cap: recursive CTE depth capped at MAX_DEPTH; exceeding it errors at validation.
14
15use crate::ast::*;
16use crate::error::QueryError;
17use crate::validate::{validate_with_warnings, MAX_DEPTH};
18
19/// Observation roles used by the synthetic edge compiler (ADR-041 §8).
20const SYNTHETIC_RELATIONS: &[&str] = &[
21    "observed_as_candidate",
22    "observed_as_selected",
23    "observed_as_target",
24    "observed_as_signal",
25];
26
27/// Returns `true` when the relation string is a synthetic ADR-041 observation edge.
28fn is_synthetic(rel: &str) -> bool {
29    SYNTHETIC_RELATIONS.contains(&rel)
30}
31
32/// Returns the `role` value that maps to the given synthetic relation.
33fn synthetic_role(rel: &str) -> Option<&'static str> {
34    match rel {
35        "observed_as_candidate" => Some("candidate"),
36        "observed_as_selected" => Some("selected"),
37        "observed_as_target" => Some("target"),
38        "observed_as_signal" => Some("signal"),
39        _ => None,
40    }
41}
42
43#[derive(Debug)]
44pub struct CompiledQuery {
45    pub sql: String,
46    pub params: Vec<QueryValue>,
47    pub return_vars: Vec<ReturnItem>,
48    pub warnings: Vec<String>,
49}
50
51pub struct CompileOptions {
52    /// Namespace scope. Empty = cross-namespace (all). Non-empty = filter to these namespaces.
53    pub scopes: Vec<String>,
54    /// Hard limit cap (server-side safety). Query limit is min(requested, max_limit).
55    pub max_limit: usize,
56}
57
58impl Default for CompileOptions {
59    fn default() -> Self {
60        Self {
61            scopes: Vec::new(),
62            max_limit: 500,
63        }
64    }
65}
66
67pub fn compile(query: &GqlQuery, opts: &CompileOptions) -> Result<CompiledQuery, QueryError> {
68    if query.pattern.elements.is_empty() {
69        return Err(QueryError::Compile("empty pattern".into()));
70    }
71
72    // Validate edge relations + structural rules before emitting SQL.
73    let mut query = query.clone();
74    let warnings = validate_with_warnings(&mut query)?;
75
76    let mut compiled = if query.pattern.has_variable_length() {
77        compile_variable_length(&query, opts)?
78    } else {
79        compile_fixed_length(&query, opts)?
80    };
81    compiled.warnings = warnings;
82    Ok(compiled)
83}
84
85fn namespace_filter(alias: &str, opts: &CompileOptions, params: &mut Vec<QueryValue>) -> String {
86    if opts.scopes.is_empty() {
87        String::new()
88    } else if opts.scopes.len() == 1 {
89        params.push(QueryValue::Text(opts.scopes[0].clone()));
90        format!(" AND {alias}.namespace = ?{}", params.len())
91    } else {
92        let placeholders: Vec<String> = opts
93            .scopes
94            .iter()
95            .map(|s| {
96                params.push(QueryValue::Text(s.clone()));
97                format!("?{}", params.len())
98            })
99            .collect();
100        format!(" AND {alias}.namespace IN ({})", placeholders.join(", "))
101    }
102}
103
104/// Identifies node indices that are endpoints of synthetic `observed_as_*` edges.
105///
106/// Returns `(source_indices, target_indices)`:
107/// - `source_indices`: node indices bound to the `events` table (the event source node)
108/// - `target_indices`: node indices bound to the `notes` table (the observed note target node)
109fn synthetic_endpoint_node_indices(
110    elements: &[PatternElement],
111) -> (
112    std::collections::HashSet<usize>,
113    std::collections::HashSet<usize>,
114) {
115    let mut source_set = std::collections::HashSet::new();
116    let mut target_set = std::collections::HashSet::new();
117    let mut node_idx = 0usize;
118    let mut prev_node_idx: Option<usize> = None;
119    for element in elements {
120        match element {
121            PatternElement::Node(_) => {
122                prev_node_idx = Some(node_idx);
123                node_idx += 1;
124            }
125            PatternElement::Edge(ep) => {
126                let has_synthetic = ep.relations.iter().any(|r| is_synthetic(r));
127                if has_synthetic {
128                    if let Some(src_idx) = prev_node_idx {
129                        source_set.insert(src_idx);
130                        // The target is the next node (current node_idx).
131                        target_set.insert(node_idx);
132                    }
133                }
134            }
135        }
136    }
137    (source_set, target_set)
138}
139
140/// Compile fixed-length patterns to a chain of JOINs.
141///
142/// MATCH (a:concept)-[e:introduced_by]->(b:paper) WHERE ... RETURN a, e, b LIMIT 10
143/// →
144/// SELECT a.*, e.*, b.*
145/// FROM entities a
146/// JOIN graph_edges e ON e.source_id = a.id
147/// JOIN entities b ON b.id = e.target_id
148/// WHERE a.kind = 'concept' AND e.relation = 'introduced_by' AND b.kind = 'paper'
149///   AND a.deleted_at IS NULL AND b.deleted_at IS NULL
150/// LIMIT 10
151///
152/// Synthetic `observed_as_*` patterns (ADR-041 §8) route the event-source node
153/// to the `events` table instead of `entities`.
154fn compile_fixed_length(
155    query: &GqlQuery,
156    opts: &CompileOptions,
157) -> Result<CompiledQuery, QueryError> {
158    let mut params: Vec<QueryValue> = Vec::new();
159    let mut from_parts: Vec<String> = Vec::new();
160    let mut join_parts: Vec<String> = Vec::new();
161    let mut where_parts: Vec<String> = Vec::new();
162    let mut select_parts: Vec<String> = Vec::new();
163
164    let mut node_aliases: Vec<String> = Vec::new();
165    let mut edge_aliases: Vec<String> = Vec::new();
166    let mut var_to_alias: std::collections::HashMap<String, (String, VarKind)> =
167        std::collections::HashMap::new();
168
169    // Pre-compute which node indices are endpoints of synthetic edges.
170    // Source nodes bind to `events`; target nodes bind to `notes`.
171    let (event_source_indices, note_target_indices) =
172        synthetic_endpoint_node_indices(&query.pattern.elements);
173
174    let mut node_idx = 0usize;
175    let mut edge_idx = 0usize;
176
177    for element in &query.pattern.elements {
178        match element {
179            PatternElement::Node(np) => {
180                let alias = format!("n{node_idx}");
181                node_aliases.push(alias.clone());
182
183                let is_event_source = event_source_indices.contains(&node_idx);
184                let is_note_target = note_target_indices.contains(&node_idx);
185
186                if node_idx == 0 {
187                    if is_event_source {
188                        from_parts.push(format!("events {alias}"));
189                    } else {
190                        // Note targets are joined by the synthetic edge handler, not FROM.
191                        if !is_note_target {
192                            from_parts.push(format!("entities {alias}"));
193                        }
194                    }
195                }
196
197                if is_event_source {
198                    // Events table does not have `deleted_at`; filter is omitted.
199                    // Namespace filter uses the `events.namespace` column directly.
200                    let ns_filter = namespace_filter(&alias, opts, &mut params);
201                    if !ns_filter.is_empty() {
202                        where_parts.push(ns_filter.trim_start_matches(" AND ").to_string());
203                    }
204                    // `kind` on an event node filters events.kind (e.g. "recall_executed").
205                    if let Some(ref kind) = np.kind {
206                        params.push(QueryValue::Text(kind.clone()));
207                        where_parts.push(format!("{alias}.kind = ?{}", params.len()));
208                    }
209                    // entity_type and properties are not columns on events — reject explicitly.
210                    if np.entity_type.is_some() {
211                        return Err(QueryError::Compile(
212                            "event nodes do not have an entity_type column".into(),
213                        ));
214                    }
215                    if !np.properties.is_empty() {
216                        return Err(QueryError::Compile(
217                            "event nodes do not support inline property filters; \
218                             use a WHERE clause on verb, outcome, or payload fields"
219                                .into(),
220                        ));
221                    }
222                } else if is_note_target {
223                    // Note targets: `notes` table (joined by the synthetic edge handler).
224                    where_parts.push(format!("{alias}.deleted_at IS NULL"));
225
226                    let ns_filter = namespace_filter(&alias, opts, &mut params);
227                    if !ns_filter.is_empty() {
228                        where_parts.push(ns_filter.trim_start_matches(" AND ").to_string());
229                    }
230
231                    if let Some(ref kind) = np.kind {
232                        params.push(QueryValue::Text(kind.clone()));
233                        where_parts.push(format!("{alias}.kind = ?{}", params.len()));
234                    }
235
236                    // entity_type does not exist on notes — reject explicitly.
237                    if np.entity_type.is_some() {
238                        return Err(QueryError::Compile(
239                            "observed note targets do not have an entity_type column".into(),
240                        ));
241                    }
242
243                    for (key, val) in &np.properties {
244                        params.push(QueryValue::Text(val.clone()));
245                        if key == "name" || key == "content" {
246                            where_parts
247                                .push(format!("{alias}.{key} = ?{} COLLATE NOCASE", params.len()));
248                        } else {
249                            where_parts.push(format!(
250                                "json_extract({alias}.properties, '$.{}') = ?{} COLLATE NOCASE",
251                                key.replace('\'', "''"),
252                                params.len()
253                            ));
254                        }
255                    }
256                } else {
257                    where_parts.push(format!("{alias}.deleted_at IS NULL"));
258
259                    let ns_filter = namespace_filter(&alias, opts, &mut params);
260                    if !ns_filter.is_empty() {
261                        where_parts.push(ns_filter.trim_start_matches(" AND ").to_string());
262                    }
263
264                    if let Some(ref kind) = np.kind {
265                        params.push(QueryValue::Text(kind.clone()));
266                        where_parts.push(format!("{alias}.kind = ?{}", params.len()));
267                    }
268
269                    if let Some(ref et) = np.entity_type {
270                        params.push(QueryValue::Text(et.clone()));
271                        where_parts.push(format!("{alias}.entity_type = ?{}", params.len()));
272                    }
273
274                    for (key, val) in &np.properties {
275                        params.push(QueryValue::Text(val.clone()));
276                        if key == "name" {
277                            where_parts
278                                .push(format!("{alias}.name = ?{} COLLATE NOCASE", params.len()));
279                        } else {
280                            where_parts.push(format!(
281                                "json_extract({alias}.properties, '$.{}') = ?{} COLLATE NOCASE",
282                                key.replace('\'', "''"),
283                                params.len()
284                            ));
285                        }
286                    }
287                }
288
289                if let Some(ref var) = np.variable {
290                    let kind = if is_event_source {
291                        VarKind::EventNode
292                    } else if is_note_target {
293                        VarKind::NoteNode
294                    } else {
295                        VarKind::Node
296                    };
297                    var_to_alias.insert(var.clone(), (alias.clone(), kind));
298                }
299
300                node_idx += 1;
301            }
302            PatternElement::Edge(ep) => {
303                let e_alias = format!("e{edge_idx}");
304                let prev_node = &node_aliases[node_aliases.len() - 1];
305                let next_alias = format!("n{}", node_idx);
306
307                edge_aliases.push(e_alias.clone());
308
309                // Detect synthetic event_observations edges (ADR-041 §8).
310                // A synthetic edge is one whose only relation(s) are observed_as_* names.
311                // Mixed synthetic+canonical relations are rejected: the two tables don't share
312                // a common join key that would make an OR across them meaningful.
313                let has_synthetic = ep.relations.iter().any(|r| is_synthetic(r));
314                let has_canonical = ep.relations.iter().any(|r| !is_synthetic(r));
315                if has_synthetic && has_canonical {
316                    return Err(QueryError::Compile(
317                        "cannot mix synthetic observed_as_* relations with canonical edge relations \
318                         in a single edge pattern"
319                            .into(),
320                    ));
321                }
322
323                if has_synthetic {
324                    // Synthetic edge: join event_observations.
325                    // Direction is always event → entity/note (OUT from the event node).
326                    // The event node is the source (prev_node); the entity/note is the target.
327                    if !matches!(ep.direction, EdgeDirection::Out) {
328                        return Err(QueryError::Compile(
329                            "synthetic observed_as_* edges are always event → entity (outbound only)".into(),
330                        ));
331                    }
332                    join_parts.push(format!(
333                        "JOIN event_observations {e_alias} ON {e_alias}.event_id = {prev_node}.id"
334                    ));
335                    // Roles: collect the unique role values from the synthetic relation names.
336                    let roles: Vec<&'static str> = ep
337                        .relations
338                        .iter()
339                        .filter_map(|r| synthetic_role(r))
340                        .collect();
341                    if roles.len() == 1 {
342                        params.push(QueryValue::Text(roles[0].to_string()));
343                        where_parts.push(format!("{e_alias}.role = ?{}", params.len()));
344                    } else if roles.len() > 1 {
345                        let placeholders: Vec<String> = roles
346                            .iter()
347                            .map(|r| {
348                                params.push(QueryValue::Text(r.to_string()));
349                                format!("?{}", params.len())
350                            })
351                            .collect();
352                        where_parts
353                            .push(format!("{e_alias}.role IN ({})", placeholders.join(", ")));
354                    }
355                    // Join the target node via event_observations.entity_id.
356                    // The `referent_kind` column discriminates between note and entity
357                    // substrates.  Per ADR-041, recall/rerank observations always target
358                    // notes (`referent_kind='note'`); we filter to note substrate and join
359                    // the `notes` table.  An explicit `AND e0.referent_kind='note'`
360                    // prevents cross-substrate ID collisions.
361                    join_parts.push(format!(
362                        "JOIN notes {next_alias} ON {next_alias}.id = {e_alias}.entity_id \
363                         AND {e_alias}.referent_kind = 'note'"
364                    ));
365                } else {
366                    // Standard canonical edge: join graph_edges.
367                    let (source_join, target_join) = match ep.direction {
368                        EdgeDirection::Out => (
369                            format!("{e_alias}.source_id = {prev_node}.id"),
370                            "target_id",
371                        ),
372                        EdgeDirection::In => (
373                            format!("{e_alias}.target_id = {prev_node}.id"),
374                            "source_id",
375                        ),
376                        EdgeDirection::Both => (
377                            format!(
378                                "({e_alias}.source_id = {prev_node}.id OR {e_alias}.target_id = {prev_node}.id)"
379                            ),
380                            "CASE_BOTH",
381                        ),
382                    };
383
384                    let next_join_col = if target_join == "CASE_BOTH" {
385                        format!(
386                            "CASE WHEN {e_alias}.source_id = {prev_node}.id THEN {e_alias}.target_id ELSE {e_alias}.source_id END"
387                        )
388                    } else {
389                        format!("{e_alias}.{target_join}")
390                    };
391
392                    join_parts.push(format!(
393                        "JOIN graph_edges {e_alias} ON {source_join} AND {e_alias}.deleted_at IS NULL"
394                    ));
395
396                    let ens_filter = namespace_filter(&e_alias, opts, &mut params);
397                    if !ens_filter.is_empty() {
398                        where_parts.push(ens_filter.trim_start_matches(" AND ").to_string());
399                    }
400
401                    join_parts.push(format!(
402                        "JOIN entities {next_alias} ON {next_alias}.id = {next_join_col}"
403                    ));
404
405                    if !ep.relations.is_empty() {
406                        if ep.relations.len() == 1 {
407                            params.push(QueryValue::Text(ep.relations[0].clone()));
408                            where_parts.push(format!("{e_alias}.relation = ?{}", params.len()));
409                        } else {
410                            let placeholders: Vec<String> = ep
411                                .relations
412                                .iter()
413                                .map(|r| {
414                                    params.push(QueryValue::Text(r.clone()));
415                                    format!("?{}", params.len())
416                                })
417                                .collect();
418                            where_parts.push(format!(
419                                "{e_alias}.relation IN ({})",
420                                placeholders.join(", ")
421                            ));
422                        }
423                    }
424                }
425
426                if let Some(ref var) = ep.variable {
427                    var_to_alias.insert(var.clone(), (e_alias.clone(), VarKind::Edge));
428                }
429
430                edge_idx += 1;
431            }
432        }
433    }
434
435    // WHERE clause conditions from GQL WHERE (supports AND / OR tree — ADR-008)
436    if let Some(where_sql) = compile_where_expr(&query.where_clause, &var_to_alias, &mut params)? {
437        where_parts.push(where_sql);
438    }
439
440    // SELECT clause
441    for item in &query.return_items {
442        let var = item.variable();
443        if let Some((alias, kind)) = var_to_alias.get(var) {
444            match item {
445                ReturnItem::Property(_, prop) => {
446                    let col = property_to_column(prop, kind)?;
447                    select_parts.push(format!("{alias}.{col} AS {var}_{prop}"));
448                }
449                ReturnItem::Variable(_) => match kind {
450                    VarKind::Node => {
451                        select_parts.push(format!(
452                            "{alias}.id AS {var}_id, {alias}.namespace AS {var}_namespace, \
453                             {alias}.kind AS {var}_kind, {alias}.entity_type AS {var}_entity_type, \
454                             {alias}.name AS {var}_name, \
455                             {alias}.properties AS {var}_properties, \
456                             {alias}.created_at AS {var}_created_at, \
457                             {alias}.updated_at AS {var}_updated_at"
458                        ));
459                    }
460                    VarKind::NoteNode => {
461                        select_parts.push(format!(
462                            "{alias}.id AS {var}_id, {alias}.namespace AS {var}_namespace, \
463                             {alias}.kind AS {var}_kind, {alias}.status AS {var}_status, \
464                             {alias}.content AS {var}_content, \
465                             {alias}.salience AS {var}_salience, \
466                             {alias}.properties AS {var}_properties, \
467                             {alias}.created_at AS {var}_created_at, \
468                             {alias}.updated_at AS {var}_updated_at"
469                        ));
470                    }
471                    VarKind::EventNode => {
472                        select_parts.push(format!(
473                            "{alias}.id AS {var}_id, {alias}.namespace AS {var}_namespace, \
474                             {alias}.verb AS {var}_verb, {alias}.substrate AS {var}_substrate, \
475                             {alias}.actor AS {var}_actor, {alias}.kind AS {var}_kind, \
476                             {alias}.outcome AS {var}_outcome, \
477                             {alias}.payload AS {var}_payload, \
478                             {alias}.created_at AS {var}_created_at"
479                        ));
480                    }
481                    VarKind::Edge => {
482                        select_parts.push(format!(
483                            "{alias}.id AS {var}_id, {alias}.source_id AS {var}_source, \
484                             {alias}.target_id AS {var}_target, \
485                             {alias}.relation AS {var}_relation, \
486                             {alias}.weight AS {var}_weight"
487                        ));
488                    }
489                },
490            }
491        } else {
492            return Err(QueryError::Compile(format!(
493                "unknown variable '{var}' in RETURN clause"
494            )));
495        }
496    }
497
498    let limit = query.limit.unwrap_or(opts.max_limit).min(opts.max_limit);
499    params.push(QueryValue::Integer(limit as i64));
500
501    let sql = format!(
502        "SELECT {} FROM {} {} WHERE {} LIMIT ?{}",
503        select_parts.join(", "),
504        from_parts.join(", "),
505        join_parts.join(" "),
506        where_parts.join(" AND "),
507        params.len(),
508    );
509
510    Ok(CompiledQuery {
511        sql,
512        params,
513        return_vars: query.return_items.clone(),
514        warnings: Vec::new(),
515    })
516}
517
518/// Compile a `WhereExpr` tree into a SQL fragment, pushing bound parameters into `params`.
519///
520/// Returns `Ok(None)` for `WhereExpr::True` (no fragment needed), or `Ok(Some(sql))` otherwise.
521/// The caller is responsible for wrapping the result in an AND with the structural predicates.
522fn compile_where_expr(
523    expr: &WhereExpr,
524    var_to_alias: &std::collections::HashMap<String, (String, VarKind)>,
525    params: &mut Vec<QueryValue>,
526) -> Result<Option<String>, QueryError> {
527    match expr {
528        WhereExpr::True => Ok(None),
529        WhereExpr::Condition(cond) => {
530            let sql = compile_single_condition(cond, var_to_alias, params)?;
531            Ok(Some(sql))
532        }
533        WhereExpr::And(l, r) => {
534            let ls = compile_where_expr(l, var_to_alias, params)?;
535            let rs = compile_where_expr(r, var_to_alias, params)?;
536            Ok(match (ls, rs) {
537                (None, None) => None,
538                (Some(s), None) | (None, Some(s)) => Some(s),
539                (Some(l), Some(r)) => Some(format!("{l} AND {r}")),
540            })
541        }
542        WhereExpr::Or(l, r) => {
543            let ls = compile_where_expr(l, var_to_alias, params)?;
544            let rs = compile_where_expr(r, var_to_alias, params)?;
545            Ok(match (ls, rs) {
546                (None, None) => None,
547                (Some(s), None) | (None, Some(s)) => Some(s),
548                (Some(l), Some(r)) => Some(format!("({l} OR {r})")),
549            })
550        }
551    }
552}
553
554/// Compile a single leaf condition to a SQL predicate string.
555fn compile_single_condition(
556    cond: &Condition,
557    var_to_alias: &std::collections::HashMap<String, (String, VarKind)>,
558    params: &mut Vec<QueryValue>,
559) -> Result<String, QueryError> {
560    let (alias, kind) = var_to_alias.get(&cond.variable).ok_or_else(|| {
561        QueryError::Compile(format!(
562            "unknown variable '{}' in WHERE clause",
563            cond.variable
564        ))
565    })?;
566
567    let col_expr = match kind {
568        VarKind::Node => {
569            if cond.property == "name"
570                || cond.property == "kind"
571                || cond.property == "entity_type"
572                || cond.property == "namespace"
573            {
574                format!("{alias}.{}", cond.property)
575            } else {
576                format!(
577                    "json_extract({alias}.properties, '$.{}')",
578                    cond.property.replace('\'', "''")
579                )
580            }
581        }
582        VarKind::NoteNode => {
583            if NOTE_COLUMNS.contains(&cond.property.as_str()) {
584                format!("{alias}.{}", cond.property)
585            } else {
586                format!(
587                    "json_extract({alias}.properties, '$.{}')",
588                    cond.property.replace('\'', "''")
589                )
590            }
591        }
592        VarKind::EventNode => {
593            // Events table has direct columns only; reject unknown fields.
594            if EVENT_COLUMNS.contains(&cond.property.as_str()) {
595                format!("{alias}.{}", cond.property)
596            } else {
597                return Err(QueryError::Validation(format!(
598                    "event property '{}' not queryable; valid columns: {}",
599                    cond.property,
600                    EVENT_COLUMNS.join(", ")
601                )));
602            }
603        }
604        VarKind::Edge => match cond.property.as_str() {
605            "relation" | "weight" => format!("{alias}.{}", cond.property),
606            other => {
607                return Err(QueryError::Validation(format!(
608                    "edge property '{other}' not queryable; use 'relation' or 'weight'"
609                )))
610            }
611        },
612    };
613
614    let op_str = match cond.op {
615        CompareOp::Eq => "=",
616        CompareOp::Neq => "!=",
617        CompareOp::Gt => ">",
618        CompareOp::Lt => "<",
619        CompareOp::Gte => ">=",
620        CompareOp::Lte => "<=",
621        CompareOp::Like => "LIKE",
622    };
623
624    let sql = match &cond.value {
625        ConditionValue::String(s) => {
626            params.push(QueryValue::Text(s.clone()));
627            let collate = if matches!(cond.op, CompareOp::Eq | CompareOp::Like) {
628                " COLLATE NOCASE"
629            } else {
630                ""
631            };
632            format!("{col_expr} {op_str} ?{}{}", params.len(), collate)
633        }
634        ConditionValue::Number(n) => {
635            params.push(QueryValue::Float(*n));
636            format!("{col_expr} {op_str} ?{}", params.len())
637        }
638        ConditionValue::Bool(b) => {
639            params.push(QueryValue::Integer(if *b { 1 } else { 0 }));
640            format!("{col_expr} {op_str} ?{}", params.len())
641        }
642    };
643    Ok(sql)
644}
645
646/// Returns `true` if the given `WhereExpr` subtree references only the start
647/// variable (`start_var`), only the end variable, or neither — but NOT both.
648///
649/// Used to detect OR nodes whose branches reference different endpoints, which
650/// cannot be correctly compiled by the variable-length leaf-routing approach.
651fn expr_endpoint_set(
652    expr: &WhereExpr,
653    start_var: Option<&str>,
654    end_var: Option<&str>,
655) -> (bool, bool) {
656    match expr {
657        WhereExpr::True => (false, false),
658        WhereExpr::Condition(c) => {
659            let is_start = start_var == Some(c.variable.as_str());
660            let is_end = end_var == Some(c.variable.as_str());
661            (is_start, is_end)
662        }
663        WhereExpr::And(l, r) | WhereExpr::Or(l, r) => {
664            let (ls, le) = expr_endpoint_set(l, start_var, end_var);
665            let (rs, re) = expr_endpoint_set(r, start_var, end_var);
666            (ls || rs, le || re)
667        }
668    }
669}
670
671/// Walk the expression tree and return `Err(Unsupported)` if any `Or` node has
672/// branches that span both start and end endpoint variables.  Single-endpoint
673/// ORs (e.g. `a.name='X' OR a.name='Y'`) are fine.
674fn reject_or_spanning_endpoints(
675    expr: &WhereExpr,
676    start: &NodePattern,
677    end: &NodePattern,
678) -> Result<(), QueryError> {
679    let start_var = start.variable.as_deref();
680    let end_var = end.variable.as_deref();
681    reject_or_spanning_impl(expr, start_var, end_var)
682}
683
684fn reject_or_spanning_impl(
685    expr: &WhereExpr,
686    start_var: Option<&str>,
687    end_var: Option<&str>,
688) -> Result<(), QueryError> {
689    match expr {
690        WhereExpr::True | WhereExpr::Condition(_) => Ok(()),
691        WhereExpr::And(l, r) => {
692            reject_or_spanning_impl(l, start_var, end_var)?;
693            reject_or_spanning_impl(r, start_var, end_var)
694        }
695        WhereExpr::Or(l, r) => {
696            let (l_start, l_end) = expr_endpoint_set(l, start_var, end_var);
697            let (r_start, r_end) = expr_endpoint_set(r, start_var, end_var);
698            let spans_start = l_start || r_start;
699            let spans_end = l_end || r_end;
700            if spans_start && spans_end {
701                return Err(QueryError::Unsupported(
702                    "WHERE clauses that span both endpoints in a variable-length pattern \
703                     are not yet supported; rewrite as separate queries or restrict each \
704                     OR branch to one endpoint"
705                        .into(),
706                ));
707            }
708            // Even if this OR is safe, recurse to catch nested ORs.
709            reject_or_spanning_impl(l, start_var, end_var)?;
710            reject_or_spanning_impl(r, start_var, end_var)
711        }
712    }
713}
714
715/// Compile a leaf condition for the variable-length path, routing it to the correct
716/// alias (`s` for start, `r` for end).
717fn compile_var_len_condition(
718    cond: &Condition,
719    start_var: Option<&str>,
720    end_var: Option<&str>,
721    params: &mut Vec<QueryValue>,
722) -> Result<(String, &'static str), QueryError> {
723    let col_alias = if start_var == Some(cond.variable.as_str()) {
724        "s"
725    } else if end_var == Some(cond.variable.as_str()) {
726        "r"
727    } else {
728        return Err(QueryError::Compile(format!(
729            "variable '{}' in WHERE not supported in variable-length pattern \
730             (only start/end node variables)",
731            cond.variable
732        )));
733    };
734
735    let col_expr =
736        if cond.property == "name" || cond.property == "kind" || cond.property == "entity_type" {
737            format!("{col_alias}.{}", cond.property)
738        } else {
739            format!(
740                "json_extract({col_alias}.properties, '$.{}')",
741                cond.property.replace('\'', "''")
742            )
743        };
744
745    let op_str = match cond.op {
746        CompareOp::Eq => "=",
747        CompareOp::Neq => "!=",
748        CompareOp::Gt => ">",
749        CompareOp::Lt => "<",
750        CompareOp::Gte => ">=",
751        CompareOp::Lte => "<=",
752        CompareOp::Like => "LIKE",
753    };
754
755    let sql = match &cond.value {
756        ConditionValue::String(s) => {
757            params.push(QueryValue::Text(s.clone()));
758            let collate = if matches!(cond.op, CompareOp::Eq | CompareOp::Like) {
759                " COLLATE NOCASE"
760            } else {
761                ""
762            };
763            format!("{col_expr} {op_str} ?{}{collate}", params.len())
764        }
765        ConditionValue::Number(n) => {
766            params.push(QueryValue::Float(*n));
767            format!("{col_expr} {op_str} ?{}", params.len())
768        }
769        ConditionValue::Bool(b) => {
770            params.push(QueryValue::Integer(if *b { 1 } else { 0 }));
771            format!("{col_expr} {op_str} ?{}", params.len())
772        }
773    };
774    Ok((sql, col_alias))
775}
776
777/// Walk the `WhereExpr` tree for variable-length patterns, preserving Or/And
778/// connectives and routing each leaf to `start_conditions` or `end_conditions`.
779///
780/// Because `reject_or_spanning_endpoints` has already verified that no `Or` node
781/// straddles both endpoints, every sub-tree roots in at most one endpoint.  When a
782/// sub-tree is purely one endpoint we compile it as a single SQL fragment and push
783/// it directly into that endpoint's condition vec.  The function returns `Ok(None)`
784/// in all handled cases; `Ok(Some(_))` is never produced (the signature reserves it
785/// for `WhereExpr::True` which is a no-op).
786fn compile_variable_length_where(
787    expr: &WhereExpr,
788    start_var: Option<&str>,
789    end_var: Option<&str>,
790    params: &mut Vec<QueryValue>,
791    start_conditions: &mut Vec<String>,
792    end_conditions: &mut Vec<String>,
793) -> Result<Option<String>, QueryError> {
794    match expr {
795        WhereExpr::True => Ok(None),
796        WhereExpr::Condition(cond) => {
797            let (sql, alias) = compile_var_len_condition(cond, start_var, end_var, params)?;
798            if alias == "s" {
799                start_conditions.push(sql);
800            } else {
801                end_conditions.push(sql);
802            }
803            Ok(None)
804        }
805        WhereExpr::And(l, r) => {
806            compile_variable_length_where(
807                l,
808                start_var,
809                end_var,
810                params,
811                start_conditions,
812                end_conditions,
813            )?;
814            compile_variable_length_where(
815                r,
816                start_var,
817                end_var,
818                params,
819                start_conditions,
820                end_conditions,
821            )?;
822            Ok(None)
823        }
824        WhereExpr::Or(l, r) => {
825            // After reject_or_spanning_endpoints we know this Or does not straddle
826            // both endpoints.  Compile each branch to a SQL string, then combine
827            // with OR and push into the appropriate condition list.
828            let l_sql = compile_variable_length_where_to_sql(l, start_var, end_var, params)?;
829            let r_sql = compile_variable_length_where_to_sql(r, start_var, end_var, params)?;
830            match (l_sql, r_sql) {
831                (None, None) => {}
832                (Some((ls, la)), None) => {
833                    if la == "s" {
834                        start_conditions.push(ls);
835                    } else {
836                        end_conditions.push(ls);
837                    }
838                }
839                (None, Some((rs, ra))) => {
840                    if ra == "s" {
841                        start_conditions.push(rs);
842                    } else {
843                        end_conditions.push(rs);
844                    }
845                }
846                (Some((ls, la)), Some((rs, _ra))) => {
847                    // Both non-None and same alias (guaranteed by the spanning check).
848                    let combined = format!("({ls} OR {rs})");
849                    if la == "s" {
850                        start_conditions.push(combined);
851                    } else {
852                        end_conditions.push(combined);
853                    }
854                }
855            }
856            Ok(None)
857        }
858    }
859}
860
861/// Compile a `WhereExpr` sub-tree to a SQL string plus the endpoint alias it
862/// targets (`"s"` or `"r"`).  Returns `Ok(None)` for `WhereExpr::True`.
863///
864/// Used by `compile_variable_length_where` to collect the two sides of an `Or`
865/// before joining them with ` OR `.
866fn compile_variable_length_where_to_sql(
867    expr: &WhereExpr,
868    start_var: Option<&str>,
869    end_var: Option<&str>,
870    params: &mut Vec<QueryValue>,
871) -> Result<Option<(String, &'static str)>, QueryError> {
872    match expr {
873        WhereExpr::True => Ok(None),
874        WhereExpr::Condition(cond) => {
875            let (sql, alias) = compile_var_len_condition(cond, start_var, end_var, params)?;
876            Ok(Some((sql, alias)))
877        }
878        WhereExpr::And(l, r) => {
879            let ls = compile_variable_length_where_to_sql(l, start_var, end_var, params)?;
880            let rs = compile_variable_length_where_to_sql(r, start_var, end_var, params)?;
881            Ok(match (ls, rs) {
882                (None, None) => None,
883                (Some(s), None) | (None, Some(s)) => Some(s),
884                (Some((lsql, la)), Some((rsql, _))) => Some((format!("{lsql} AND {rsql}"), la)),
885            })
886        }
887        WhereExpr::Or(l, r) => {
888            let ls = compile_variable_length_where_to_sql(l, start_var, end_var, params)?;
889            let rs = compile_variable_length_where_to_sql(r, start_var, end_var, params)?;
890            Ok(match (ls, rs) {
891                (None, None) => None,
892                (Some(s), None) | (None, Some(s)) => Some(s),
893                (Some((lsql, la)), Some((rsql, _))) => Some((format!("({lsql} OR {rsql})"), la)),
894            })
895        }
896    }
897}
898
899/// Compile variable-length patterns to a recursive CTE.
900///
901/// Depth is capped at min(requested, 10) — MAJ-2 (parameterized min_depth, not literal).
902fn compile_variable_length(
903    query: &GqlQuery,
904    opts: &CompileOptions,
905) -> Result<CompiledQuery, QueryError> {
906    let mut params: Vec<QueryValue> = Vec::new();
907    let mut var_to_alias: std::collections::HashMap<String, (String, VarKind)> =
908        std::collections::HashMap::new();
909
910    // For variable-length, we expect exactly: start_node -[*N..M]-> end_node.
911    // Mixed fixed+variable chains and additional trailing pattern elements are
912    // not yet supported — reject explicitly rather than silently dropping them.
913    let nodes: Vec<&NodePattern> = query.pattern.nodes().collect();
914    let edges: Vec<&EdgePattern> = query.pattern.edges().collect();
915
916    if nodes.len() != 2 || edges.len() != 1 || query.pattern.elements.len() != 3 {
917        return Err(QueryError::Unsupported(
918            "variable-length patterns must be a single start_node -[*N..M]-> end_node \
919             (mixed fixed/variable chains are not yet implemented)"
920                .into(),
921        ));
922    }
923
924    let start = &nodes[0];
925    let edge = &edges[0];
926    let end = &nodes[1];
927
928    // MAJ-2: depth cap — always parameterized, never injected as literal
929    let max_depth = edge.max_hops.min(MAX_DEPTH);
930    let min_depth = edge.min_hops;
931
932    // Build start-node conditions
933    let mut start_conditions: Vec<String> = vec!["s.deleted_at IS NULL".to_string()];
934    let ns_filter = namespace_filter("s", opts, &mut params);
935    if !ns_filter.is_empty() {
936        start_conditions.push(ns_filter.trim_start_matches(" AND ").to_string());
937    }
938
939    if let Some(ref kind) = start.kind {
940        params.push(QueryValue::Text(kind.clone()));
941        start_conditions.push(format!("s.kind = ?{}", params.len()));
942    }
943    if let Some(ref et) = start.entity_type {
944        params.push(QueryValue::Text(et.clone()));
945        start_conditions.push(format!("s.entity_type = ?{}", params.len()));
946    }
947    for (key, val) in &start.properties {
948        params.push(QueryValue::Text(val.clone()));
949        if key == "name" {
950            start_conditions.push(format!("s.name = ?{} COLLATE NOCASE", params.len()));
951        } else {
952            start_conditions.push(format!(
953                "json_extract(s.properties, '$.{}') = ?{} COLLATE NOCASE",
954                key.replace('\'', "''"),
955                params.len()
956            ));
957        }
958    }
959
960    // Relation filter
961    let mut relation_condition = String::new();
962    if !edge.relations.is_empty() {
963        if edge.relations.len() == 1 {
964            params.push(QueryValue::Text(edge.relations[0].clone()));
965            relation_condition = format!(" AND e.relation = ?{}", params.len());
966        } else {
967            let placeholders: Vec<String> = edge
968                .relations
969                .iter()
970                .map(|r| {
971                    params.push(QueryValue::Text(r.clone()));
972                    format!("?{}", params.len())
973                })
974                .collect();
975            relation_condition = format!(" AND e.relation IN ({})", placeholders.join(", "));
976        }
977    }
978
979    // Edge namespace filter
980    let e_ns_filter = namespace_filter("e", opts, &mut params);
981
982    // Direction-dependent JOIN
983    let (seed_join, seed_next, recurse_join, recurse_next) = match edge.direction {
984        EdgeDirection::Out => (
985            "e.source_id = s.id",
986            "e.target_id",
987            "e.source_id = t.current_id",
988            "e.target_id",
989        ),
990        EdgeDirection::In => (
991            "e.target_id = s.id",
992            "e.source_id",
993            "e.target_id = t.current_id",
994            "e.source_id",
995        ),
996        EdgeDirection::Both => (
997            "(e.source_id = s.id OR e.target_id = s.id)",
998            "CASE WHEN e.source_id = s.id THEN e.target_id ELSE e.source_id END",
999            "(e.source_id = t.current_id OR e.target_id = t.current_id)",
1000            "CASE WHEN e.source_id = t.current_id THEN e.target_id ELSE e.source_id END",
1001        ),
1002    };
1003
1004    params.push(QueryValue::Integer(max_depth as i64));
1005    let depth_param = params.len();
1006
1007    // End-node conditions (applied in outer WHERE). `r` is always joined
1008    // unconditionally below so these references resolve regardless of whether
1009    // the end variable is projected.
1010    let mut end_conditions: Vec<String> = vec!["r.deleted_at IS NULL".to_string()];
1011    let r_ns_filter = namespace_filter("r", opts, &mut params);
1012    if !r_ns_filter.is_empty() {
1013        end_conditions.push(r_ns_filter.trim_start_matches(" AND ").to_string());
1014    }
1015    if let Some(ref kind) = end.kind {
1016        params.push(QueryValue::Text(kind.clone()));
1017        end_conditions.push(format!("r.kind = ?{}", params.len()));
1018    }
1019    if let Some(ref et) = end.entity_type {
1020        params.push(QueryValue::Text(et.clone()));
1021        end_conditions.push(format!("r.entity_type = ?{}", params.len()));
1022    }
1023    for (key, val) in &end.properties {
1024        params.push(QueryValue::Text(val.clone()));
1025        if key == "name" {
1026            end_conditions.push(format!("r.name = ?{} COLLATE NOCASE", params.len()));
1027        } else {
1028            end_conditions.push(format!(
1029                "json_extract(r.properties, '$.{}') = ?{} COLLATE NOCASE",
1030                key.replace('\'', "''"),
1031                params.len()
1032            ));
1033        }
1034    }
1035
1036    // WHERE clause conditions for variable-length patterns.
1037    // OR expressions that span both start and end nodes are not supported — reject
1038    // explicitly with an actionable error message rather than silently converting OR to AND.
1039    reject_or_spanning_endpoints(&query.where_clause, start, end)?;
1040
1041    // Compile the WHERE tree preserving Or/And connectives.  After the spanning
1042    // check above we know every Or node touches at most one endpoint, so we can
1043    // safely route whole sub-trees to start_conditions or end_conditions.
1044    if let Some(where_sql) = compile_variable_length_where(
1045        &query.where_clause,
1046        start.variable.as_deref(),
1047        end.variable.as_deref(),
1048        &mut params,
1049        &mut start_conditions,
1050        &mut end_conditions,
1051    )? {
1052        // A non-None return means the expression spans no variable (WhereExpr::True
1053        // is the only such case and returns None).  This branch is unreachable given
1054        // the reject_or_spanning_endpoints guard above, but handle it safely.
1055        start_conditions.push(where_sql);
1056    }
1057
1058    // MAJ-2: min_depth is always a bound parameter, never a literal
1059    if min_depth > 0 {
1060        params.push(QueryValue::Integer(min_depth as i64));
1061        end_conditions.push(format!("t.depth >= ?{}", params.len()));
1062    }
1063
1064    let limit = query.limit.unwrap_or(opts.max_limit).min(opts.max_limit);
1065    params.push(QueryValue::Integer(limit as i64));
1066    let limit_param = params.len();
1067
1068    // Register variables
1069    if let Some(ref var) = start.variable {
1070        var_to_alias.insert(var.clone(), ("s".to_string(), VarKind::Node));
1071    }
1072    if let Some(ref var) = end.variable {
1073        var_to_alias.insert(var.clone(), ("r".to_string(), VarKind::Node));
1074    }
1075    if let Some(ref var) = edge.variable {
1076        var_to_alias.insert(var.clone(), ("e".to_string(), VarKind::Edge));
1077    }
1078
1079    // Build SELECT based on RETURN items
1080    let mut select_parts: Vec<String> = Vec::new();
1081    let mut has_start = false;
1082
1083    for item in &query.return_items {
1084        let var = item.variable();
1085        if let Some((_, kind)) = var_to_alias.get(var) {
1086            match item {
1087                ReturnItem::Property(_, prop) => {
1088                    let is_start = start.variable.as_deref() == Some(var);
1089                    if matches!(kind, VarKind::EventNode | VarKind::NoteNode) {
1090                        return Err(QueryError::Unsupported(
1091                            "synthetic observed_as_* edges cannot be used in variable-length \
1092                             patterns; use a fixed-length edge pattern instead"
1093                                .into(),
1094                        ));
1095                    }
1096                    if *kind == VarKind::Node {
1097                        let tbl = if is_start { "s" } else { "r" };
1098                        if is_start {
1099                            has_start = true;
1100                        }
1101                        let col = property_to_column(prop, kind)?;
1102                        select_parts.push(format!("{tbl}.{col} AS {var}_{prop}"));
1103                    } else {
1104                        let col = match prop.as_str() {
1105                            "id" => "via_edge",
1106                            "relation" => "via_relation",
1107                            "weight" => "via_weight",
1108                            _ => {
1109                                return Err(QueryError::Compile(format!(
1110                                    "unknown edge property '{prop}' in RETURN projection. \
1111                                     Valid: id, source_id, target_id, relation, weight"
1112                                )));
1113                            }
1114                        };
1115                        select_parts.push(format!("t.{col} AS {var}_{prop}"));
1116                    }
1117                }
1118                ReturnItem::Variable(_) => match kind {
1119                    VarKind::Node => {
1120                        if start.variable.as_deref() == Some(var) {
1121                            has_start = true;
1122                            select_parts.push(format!(
1123                                "s.id AS {var}_id, s.namespace AS {var}_namespace, \
1124                                 s.kind AS {var}_kind, s.entity_type AS {var}_entity_type, \
1125                                 s.name AS {var}_name, \
1126                                 s.properties AS {var}_properties, \
1127                                 s.created_at AS {var}_created_at, \
1128                                 s.updated_at AS {var}_updated_at"
1129                            ));
1130                        } else {
1131                            select_parts.push(format!(
1132                                "r.id AS {var}_id, r.namespace AS {var}_namespace, \
1133                                 r.kind AS {var}_kind, r.entity_type AS {var}_entity_type, \
1134                                 r.name AS {var}_name, \
1135                                 r.properties AS {var}_properties, \
1136                                 r.created_at AS {var}_created_at, \
1137                                 r.updated_at AS {var}_updated_at"
1138                            ));
1139                        }
1140                    }
1141                    VarKind::EventNode | VarKind::NoteNode => {
1142                        // Synthetic observed_as_* edges require a fixed-length pattern;
1143                        // variable-length recursion over the events/notes tables is not supported.
1144                        return Err(QueryError::Unsupported(
1145                            "synthetic observed_as_* edges cannot be used in variable-length \
1146                             patterns; use a fixed-length edge pattern instead"
1147                                .into(),
1148                        ));
1149                    }
1150                    VarKind::Edge => {
1151                        select_parts.push(format!(
1152                            "t.via_edge AS {var}_id, t.via_relation AS {var}_relation, \
1153                             t.via_weight AS {var}_weight"
1154                        ));
1155                    }
1156                },
1157            }
1158        } else {
1159            return Err(QueryError::Compile(format!(
1160                "unknown variable '{var}' in RETURN clause"
1161            )));
1162        }
1163    }
1164
1165    // Always include traversal metadata
1166    select_parts.push("t.depth AS _depth".to_string());
1167    select_parts.push("t.total_weight AS _total_weight".to_string());
1168
1169    // `s` is optional (only joined if the start variable is projected); `r` is
1170    // always joined because the outer WHERE always references `r.deleted_at`,
1171    // `r.namespace` (and possibly r.kind / r.properties) regardless of whether
1172    // it appears in RETURN.
1173    let join_start = if has_start {
1174        "JOIN entities s ON s.id = t.start_id"
1175    } else {
1176        ""
1177    };
1178    let join_end = "JOIN entities r ON r.id = t.current_id";
1179
1180    let sql = format!(
1181        "WITH RECURSIVE traverse(start_id, current_id, depth, path, total_weight, via_edge, via_relation, via_weight) AS (\
1182             SELECT s.id, {seed_next}, 1, s.id || ',' || {seed_next}, e.weight, \
1183                    e.id, e.relation, e.weight \
1184             FROM entities s \
1185             JOIN graph_edges e ON {seed_join} AND e.deleted_at IS NULL{e_ns_filter}{relation_condition} \
1186             WHERE {start_where} \
1187             UNION ALL \
1188             SELECT t.start_id, {recurse_next}, t.depth + 1, \
1189                    t.path || ',' || {recurse_next}, \
1190                    t.total_weight + e.weight, \
1191                    e.id, e.relation, e.weight \
1192             FROM traverse t \
1193             JOIN graph_edges e ON {recurse_join} AND e.deleted_at IS NULL{e_ns_filter}{relation_condition} \
1194             WHERE t.depth < ?{depth_param} \
1195               AND (',' || t.path || ',') NOT LIKE '%,' || {recurse_next} || ',%' \
1196         ) \
1197         SELECT DISTINCT {select_cols} \
1198         FROM traverse t \
1199         {join_start} {join_end} \
1200         WHERE {end_where} \
1201         ORDER BY t.depth, t.total_weight DESC \
1202         LIMIT ?{limit_param}",
1203        seed_next = seed_next,
1204        seed_join = seed_join,
1205        e_ns_filter = e_ns_filter,
1206        relation_condition = relation_condition,
1207        start_where = start_conditions.join(" AND "),
1208        recurse_next = recurse_next,
1209        recurse_join = recurse_join,
1210        depth_param = depth_param,
1211        select_cols = select_parts.join(", "),
1212        join_start = join_start,
1213        join_end = join_end,
1214        end_where = end_conditions.join(" AND "),
1215        limit_param = limit_param,
1216    );
1217
1218    Ok(CompiledQuery {
1219        sql,
1220        params,
1221        return_vars: query.return_items.clone(),
1222        warnings: Vec::new(),
1223    })
1224}
1225
1226#[derive(Clone, Copy, PartialEq, Eq)]
1227enum VarKind {
1228    Node,
1229    /// Node that maps to the `events` table (synthetic edge source, ADR-041 §8).
1230    EventNode,
1231    /// Node that maps to the `notes` table (synthetic edge target, ADR-041 §8).
1232    NoteNode,
1233    Edge,
1234}
1235
1236const NODE_COLUMNS: &[&str] = &[
1237    "id",
1238    "name",
1239    "kind",
1240    "entity_type",
1241    "namespace",
1242    "description",
1243    "properties",
1244    "created_at",
1245    "updated_at",
1246];
1247/// Columns available for projection on `notes` table nodes (ADR-041 §8 targets).
1248const NOTE_COLUMNS: &[&str] = &[
1249    "id",
1250    "namespace",
1251    "kind",
1252    "status",
1253    "name",
1254    "content",
1255    "salience",
1256    "decay_factor",
1257    "properties",
1258    "created_at",
1259    "updated_at",
1260];
1261/// Columns available for projection on `events` table nodes (ADR-041 §8).
1262const EVENT_COLUMNS: &[&str] = &[
1263    "id",
1264    "namespace",
1265    "verb",
1266    "substrate",
1267    "actor",
1268    "kind",
1269    "outcome",
1270    "payload",
1271    "duration_us",
1272    "target_id",
1273    "session_id",
1274    "created_at",
1275];
1276const EDGE_COLUMNS: &[&str] = &["id", "source_id", "target_id", "relation", "weight"];
1277
1278fn property_to_column<'a>(prop: &'a str, kind: &VarKind) -> Result<&'a str, QueryError> {
1279    let (valid, kind_name) = match kind {
1280        VarKind::Node => (NODE_COLUMNS, "node"),
1281        VarKind::NoteNode => (NOTE_COLUMNS, "note"),
1282        VarKind::EventNode => (EVENT_COLUMNS, "event"),
1283        VarKind::Edge => (EDGE_COLUMNS, "edge"),
1284    };
1285    if valid.contains(&prop) {
1286        Ok(prop)
1287    } else {
1288        Err(QueryError::Compile(format!(
1289            "unknown {kind_name} property '{prop}' in RETURN projection. \
1290             Valid: {}",
1291            valid.join(", ")
1292        )))
1293    }
1294}
1295
1296#[cfg(test)]
1297mod tests {
1298    use super::*;
1299    use crate::parsers::gql;
1300
1301    fn opts() -> CompileOptions {
1302        CompileOptions::default()
1303    }
1304
1305    fn scoped(namespace: &str) -> CompileOptions {
1306        CompileOptions {
1307            scopes: vec![namespace.to_string()],
1308            max_limit: 500,
1309        }
1310    }
1311
1312    #[test]
1313    fn fixed_length_basic() {
1314        let q =
1315            gql::parse("MATCH (a:concept)-[e:introduced_by]->(b:paper) RETURN a, e, b LIMIT 10")
1316                .unwrap();
1317        let compiled = compile(&q, &opts()).unwrap();
1318        assert!(compiled.sql.contains("JOIN graph_edges"));
1319        assert!(compiled.sql.contains("LIMIT"));
1320        assert_eq!(
1321            compiled.return_vars,
1322            vec![
1323                ReturnItem::Variable("a".into()),
1324                ReturnItem::Variable("e".into()),
1325                ReturnItem::Variable("b".into()),
1326            ]
1327        );
1328        // No recursive CTE for fixed-length
1329        assert!(!compiled.sql.contains("WITH RECURSIVE"));
1330    }
1331
1332    #[test]
1333    fn namespace_scoping_injected() {
1334        // Namespace must come from opts, never from the query
1335        let q =
1336            gql::parse("MATCH (a:concept)-[e:introduced_by]->(b:paper) RETURN a LIMIT 5").unwrap();
1337        let compiled = compile(&q, &scoped("research")).unwrap();
1338        assert!(compiled.sql.contains("namespace"));
1339        // The namespace value must appear as a parameter, not a literal in SQL
1340        let has_ns_param = compiled
1341            .params
1342            .iter()
1343            .any(|p| matches!(p, QueryValue::Text(s) if s == "research"));
1344        assert!(has_ns_param, "namespace must be a bound parameter");
1345    }
1346
1347    #[test]
1348    fn edge_property_whitelist_rejects_unknown() {
1349        // MAJ-1: only 'relation' and 'weight' are queryable edge properties
1350        let q = gql::parse("MATCH (a)-[e:introduced_by]->(b) WHERE e.source_id = 'x' RETURN a")
1351            .unwrap();
1352        let result = compile(&q, &opts());
1353        assert!(result.is_err());
1354        let err = result.unwrap_err().to_string();
1355        assert!(
1356            err.contains("source_id") || err.contains("not queryable"),
1357            "error: {err}"
1358        );
1359    }
1360
1361    #[test]
1362    fn edge_property_relation_allowed() {
1363        let q = gql::parse("MATCH (a)-[e]->(b) WHERE e.relation = 'extends' RETURN a").unwrap();
1364        let result = compile(&q, &opts());
1365        assert!(
1366            result.is_ok(),
1367            "relation should be allowed: {:?}",
1368            result.err()
1369        );
1370    }
1371
1372    #[test]
1373    fn edge_property_weight_allowed() {
1374        let q = gql::parse("MATCH (a)-[e]->(b) WHERE e.weight > 0.5 RETURN a").unwrap();
1375        let result = compile(&q, &opts());
1376        assert!(
1377            result.is_ok(),
1378            "weight should be allowed: {:?}",
1379            result.err()
1380        );
1381    }
1382
1383    #[test]
1384    fn variable_length_uses_cte() {
1385        let q =
1386            gql::parse("MATCH (a {name: 'LoRA'})-[:extends*1..3]->(b) RETURN b LIMIT 20").unwrap();
1387        let compiled = compile(&q, &opts()).unwrap();
1388        assert!(compiled.sql.contains("WITH RECURSIVE"));
1389        assert!(compiled.sql.contains("traverse"));
1390    }
1391
1392    #[test]
1393    fn depth_cap_at_ten_rejects_above_max() {
1394        // ADR-008 §"Depth limits": exceeding MAX_DEPTH is an InvalidInput error at
1395        // validation time — the compiler never sees a query with depth > 10.
1396        let q = gql::parse("MATCH (a)-[:extends*1..50]->(b) RETURN b").unwrap();
1397        let err = compile(&q, &opts()).unwrap_err();
1398        assert!(
1399            matches!(err, QueryError::InvalidInput(_)),
1400            "expected InvalidInput for depth > 10, got {err:?}"
1401        );
1402    }
1403
1404    #[test]
1405    fn depth_within_cap_compiles() {
1406        // depth *1..10 is at the cap — must compile successfully.
1407        let q = gql::parse("MATCH (a)-[:extends*1..10]->(b) RETURN b").unwrap();
1408        let compiled = compile(&q, &opts()).unwrap();
1409        assert!(compiled.sql.contains("WITH RECURSIVE"));
1410        // The depth parameter must equal 10
1411        let depth_val = compiled.params.iter().find_map(|p| {
1412            if let QueryValue::Integer(n) = p {
1413                Some(*n)
1414            } else {
1415                None
1416            }
1417        });
1418        assert_eq!(depth_val, Some(10), "depth param should be 10");
1419    }
1420
1421    #[test]
1422    fn limit_capped_by_max_limit() {
1423        // Query requests 1000, max_limit is 500 — result should be 500
1424        let q = gql::parse("MATCH (a:concept)-[e]->(b) RETURN a LIMIT 1000").unwrap();
1425        let compiled = compile(&q, &opts()).unwrap();
1426        let limit_param = compiled.params.last().unwrap();
1427        assert!(
1428            matches!(limit_param, QueryValue::Integer(500)),
1429            "expected Integer(500), got {limit_param:?}"
1430        );
1431    }
1432
1433    #[test]
1434    fn compile_rejects_unknown_relation() {
1435        let q = gql::parse("MATCH (a)-[:not_a_relation]->(b) RETURN a").unwrap();
1436        let err = compile(&q, &opts()).unwrap_err();
1437        let msg = err.to_string();
1438        assert!(msg.contains("not_a_relation"), "msg: {msg}");
1439    }
1440
1441    #[test]
1442    fn compile_unknown_kind_passes_through() {
1443        // Pack-agnostic: any string is accepted as an entity kind at the query layer.
1444        // Validation is a pack-handler concern.
1445        let q = gql::parse("MATCH (a:gizmo)-[:extends]->(b) RETURN a").unwrap();
1446        let compiled = compile(&q, &opts()).unwrap();
1447        let has_gizmo = compiled
1448            .params
1449            .iter()
1450            .any(|p| matches!(p, QueryValue::Text(s) if s == "gizmo"));
1451        assert!(
1452            has_gizmo,
1453            "pack-agnostic: unknown kind must pass through into SQL params"
1454        );
1455    }
1456
1457    #[test]
1458    fn compile_kind_passes_through_unchanged() {
1459        // Pack-agnostic: 'paper' is no longer normalized to 'document' at the query layer.
1460        // The string passes through as-is.
1461        let q =
1462            gql::parse("MATCH (a:paper)-[:introduced_by]->(b:concept) RETURN a LIMIT 1").unwrap();
1463        let compiled = compile(&q, &opts()).unwrap();
1464        let has_paper = compiled
1465            .params
1466            .iter()
1467            .any(|p| matches!(p, QueryValue::Text(s) if s == "paper"));
1468        assert!(
1469            has_paper,
1470            "kind 'paper' must pass through unchanged into SQL params"
1471        );
1472    }
1473
1474    #[test]
1475    fn compile_rejects_namespace_in_where() {
1476        let q =
1477            gql::parse("MATCH (a:concept)-[:extends]->(b) WHERE a.namespace = 'other' RETURN a")
1478                .unwrap();
1479        let err = compile(&q, &opts()).unwrap_err();
1480        assert!(err.to_string().contains("namespace"), "msg: {err}");
1481    }
1482
1483    #[test]
1484    fn compile_rejects_unknown_relation_in_where() {
1485        let q = gql::parse("MATCH (a)-[e:extends]->(b) WHERE e.relation = 'related_to' RETURN a")
1486            .unwrap();
1487        let err = compile(&q, &opts()).unwrap_err();
1488        assert!(err.to_string().contains("related_to"), "msg: {err}");
1489    }
1490
1491    #[test]
1492    fn compile_kind_in_where_passes_through_unchanged() {
1493        // Pack-agnostic: kind strings in WHERE conditions pass through as-is.
1494        let q = gql::parse("MATCH (a)-[:extends]->(b) WHERE a.kind = 'paper' RETURN a").unwrap();
1495        let compiled = compile(&q, &opts()).unwrap();
1496        let has_paper = compiled
1497            .params
1498            .iter()
1499            .any(|p| matches!(p, QueryValue::Text(s) if s == "paper"));
1500        assert!(
1501            has_paper,
1502            "kind 'paper' must pass through unchanged into SQL params"
1503        );
1504    }
1505
1506    #[test]
1507    fn variable_length_return_start_only_joins_end_entity() {
1508        // Even when only the start variable is projected, the outer query
1509        // references `r.deleted_at` / `r.namespace`, so entities r must be
1510        // joined unconditionally.
1511        let q = gql::parse("MATCH (a:concept)-[:extends*1..3]->(b) RETURN a LIMIT 10").unwrap();
1512        let compiled = compile(&q, &opts()).unwrap();
1513        assert!(
1514            compiled.sql.contains("JOIN entities r"),
1515            "entities r must always be joined when r.* conditions are emitted; sql: {}",
1516            compiled.sql
1517        );
1518    }
1519
1520    #[test]
1521    fn variable_length_trailing_pattern_unsupported() {
1522        let q = gql::parse("MATCH (a)-[:extends*1..3]->(b)-[:implements]->(c) RETURN b").unwrap();
1523        let err = compile(&q, &opts()).unwrap_err();
1524        assert!(
1525            matches!(err, QueryError::Unsupported(_)),
1526            "expected Unsupported, got {err:?}"
1527        );
1528    }
1529
1530    #[test]
1531    fn variable_length_mixed_chain_unsupported() {
1532        // Mixed fixed + variable in one chain — has_variable_length() triggers
1533        // the variable-length path, which must reject because edges.len() > 1.
1534        let q = gql::parse("MATCH (a)-[:extends]->(b)-[:implements*1..2]->(c) RETURN c").unwrap();
1535        let err = compile(&q, &opts()).unwrap_err();
1536        assert!(matches!(err, QueryError::Unsupported(_)), "got {err:?}");
1537    }
1538
1539    #[test]
1540    fn sparql_star_rejected_as_unsupported() {
1541        use crate::parsers::sparql;
1542        let err = sparql::parse("SELECT ?a ?b WHERE { ?a :extends* ?b . }").unwrap_err();
1543        assert!(matches!(err, QueryError::Unsupported(_)), "got {err:?}");
1544    }
1545
1546    /// Regression guard for ISSUE #231.
1547    ///
1548    /// Verifies the full SPARQL subject→predicate→object direction contract:
1549    ///   ?a :extends ?b  must compile so that ?a binds `source_id` and ?b binds `target_id`.
1550    ///
1551    /// A swap (subject→target_id, object→source_id) would cause a query for
1552    /// A–extends→B to return rows where B–extends→A, silently returning wrong results.
1553    #[test]
1554    fn sparql_subject_object_direction_compiles_outbound() {
1555        use crate::parsers::sparql;
1556
1557        let q = sparql::parse("SELECT ?a ?b WHERE { ?a :extends ?b . }").unwrap();
1558        let compiled = compile(&q, &opts()).unwrap();
1559
1560        assert!(
1561            compiled
1562                .sql
1563                .contains("JOIN graph_edges e0 ON e0.source_id = n0.id"),
1564            "SPARQL subject must bind graph_edges.source_id; sql: {}",
1565            compiled.sql
1566        );
1567        assert!(
1568            compiled
1569                .sql
1570                .contains("JOIN entities n1 ON n1.id = e0.target_id"),
1571            "SPARQL object must bind graph_edges.target_id; sql: {}",
1572            compiled.sql
1573        );
1574        assert!(
1575            compiled.sql.contains("e0.relation = ?1"),
1576            "SPARQL predicate must bind graph_edges.relation; sql: {}",
1577            compiled.sql
1578        );
1579    }
1580
1581    #[test]
1582    fn return_property_projection_compiles() {
1583        let q =
1584            gql::parse("MATCH (a:concept)-[e:extends]->(b:concept) RETURN a.name, b.name LIMIT 5")
1585                .unwrap();
1586        let compiled = compile(&q, &opts()).unwrap();
1587        // Node aliases are n0, n1; the SQL uses `alias.col AS var_prop`
1588        assert!(
1589            compiled.sql.contains(".name AS a_name"),
1590            "sql: {}",
1591            compiled.sql
1592        );
1593        assert!(
1594            compiled.sql.contains(".name AS b_name"),
1595            "sql: {}",
1596            compiled.sql
1597        );
1598        assert!(
1599            !compiled.sql.contains("a_kind"),
1600            "should not emit full node columns"
1601        );
1602    }
1603
1604    #[test]
1605    fn return_unknown_node_property_rejected() {
1606        let q = gql::parse("MATCH (a:concept)-[:extends]->(b) RETURN a.domain LIMIT 5").unwrap();
1607        let err = compile(&q, &opts()).unwrap_err();
1608        assert!(
1609            matches!(err, QueryError::Compile(ref msg) if msg.contains("unknown node property 'domain'")),
1610            "got {err:?}"
1611        );
1612    }
1613
1614    #[test]
1615    fn return_unknown_edge_property_rejected() {
1616        let q = gql::parse("MATCH (a)-[e:extends]->(b) RETURN e.label LIMIT 5").unwrap();
1617        let err = compile(&q, &opts()).unwrap_err();
1618        assert!(
1619            matches!(err, QueryError::Compile(ref msg) if msg.contains("unknown edge property 'label'")),
1620            "got {err:?}"
1621        );
1622    }
1623
1624    #[test]
1625    fn return_valid_edge_property_compiles() {
1626        let q =
1627            gql::parse("MATCH (a)-[e:extends]->(b) RETURN e.relation, e.weight LIMIT 5").unwrap();
1628        let compiled = compile(&q, &opts()).unwrap();
1629        // Edge alias is e0; SQL: `e0.relation AS e_relation`
1630        assert!(
1631            compiled.sql.contains(".relation AS e_relation"),
1632            "sql: {}",
1633            compiled.sql
1634        );
1635        assert!(
1636            compiled.sql.contains(".weight AS e_weight"),
1637            "sql: {}",
1638            compiled.sql
1639        );
1640    }
1641
1642    #[test]
1643    fn entity_type_compiles_as_direct_column_not_json_extract() {
1644        // entity_type in a NodePattern must become `alias.entity_type = ?N` in the WHERE
1645        // clause — a direct column reference, not json_extract from the properties blob.
1646        let q = gql::parse("MATCH (n:document {entity_type: 'paper'})-[:extends]->(m) RETURN n")
1647            .unwrap();
1648        let compiled = compile(&q, &opts()).unwrap();
1649        assert!(
1650            compiled.sql.contains(".entity_type = ?"),
1651            "entity_type must compile to a direct column comparison; sql: {}",
1652            compiled.sql
1653        );
1654        assert!(
1655            !compiled.sql.contains("json_extract"),
1656            "entity_type must NOT use json_extract; sql: {}",
1657            compiled.sql
1658        );
1659        let has_paper_param = compiled
1660            .params
1661            .iter()
1662            .any(|p| matches!(p, QueryValue::Text(s) if s == "paper"));
1663        assert!(
1664            has_paper_param,
1665            "entity_type value 'paper' must appear as a bound parameter"
1666        );
1667    }
1668
1669    // --- F047: OR support in WHERE clause (ADR-008 §"GQL WHERE expression") ---
1670
1671    #[test]
1672    fn where_or_compiles_to_sql_or() {
1673        let q = gql::parse(
1674            "MATCH (a:concept)-[e:extends]->(b) WHERE a.name = 'LoRA' OR a.name = 'QLoRA' RETURN a",
1675        )
1676        .unwrap();
1677        let compiled = compile(&q, &opts()).unwrap();
1678        assert!(
1679            compiled.sql.contains(" OR "),
1680            "WHERE OR must produce SQL OR; sql: {}",
1681            compiled.sql
1682        );
1683        let has_lora = compiled
1684            .params
1685            .iter()
1686            .any(|p| matches!(p, QueryValue::Text(s) if s == "LoRA"));
1687        let has_qlora = compiled
1688            .params
1689            .iter()
1690            .any(|p| matches!(p, QueryValue::Text(s) if s == "QLoRA"));
1691        assert!(has_lora && has_qlora, "both OR values must be bound params");
1692    }
1693
1694    #[test]
1695    fn where_and_or_precedence() {
1696        // `a AND b OR c` should compile as `(a AND b) OR c`
1697        let q = gql::parse(
1698            "MATCH (a:concept)-[e:extends]->(b) WHERE a.name = 'X' AND a.kind = 'concept' OR b.kind = 'project' RETURN a"
1699        ).unwrap();
1700        let compiled = compile(&q, &opts()).unwrap();
1701        // The SQL should contain an OR at the outer level wrapping the AND group
1702        assert!(
1703            compiled.sql.contains(" OR "),
1704            "expected OR in sql; sql: {}",
1705            compiled.sql
1706        );
1707    }
1708
1709    // --- F218: event_observations synthetic edge support (ADR-041 §8) ---
1710
1711    #[test]
1712    fn synthetic_edge_joins_event_observations() {
1713        let q = gql::parse("MATCH (ev)-[:observed_as_selected]->(m:memory) RETURN ev, m").unwrap();
1714        let compiled = compile(&q, &opts()).unwrap();
1715        assert!(
1716            compiled.sql.contains("event_observations"),
1717            "synthetic edge must join event_observations; sql: {}",
1718            compiled.sql
1719        );
1720        assert!(
1721            !compiled.sql.contains("graph_edges"),
1722            "synthetic edge must NOT join graph_edges; sql: {}",
1723            compiled.sql
1724        );
1725        let has_role_param = compiled
1726            .params
1727            .iter()
1728            .any(|p| matches!(p, QueryValue::Text(s) if s == "selected"));
1729        assert!(has_role_param, "role 'selected' must be a bound parameter");
1730    }
1731
1732    // CRIT-1 regression: event source node must bind to `events` table, not `entities`.
1733    // Previously `FROM entities n0 JOIN event_observations e0 ON e0.event_id = n0.id`
1734    // was emitted — IDs are disjoint so every query returned zero rows.
1735    #[test]
1736    fn synthetic_edge_event_source_binds_events_table() {
1737        let q = gql::parse("MATCH (ev)-[:observed_as_selected]->(m:memory) RETURN ev, m").unwrap();
1738        let compiled = compile(&q, &opts()).unwrap();
1739        assert!(
1740            compiled.sql.contains("FROM events "),
1741            "CRIT-1: event source must come FROM events table, not entities; sql: {}",
1742            compiled.sql
1743        );
1744        assert!(
1745            !compiled
1746                .sql
1747                .starts_with("SELECT * FROM entities n0 JOIN event_observations"),
1748            "CRIT-1: must not join events via entities table; sql: {}",
1749            compiled.sql
1750        );
1751    }
1752
1753    #[test]
1754    fn synthetic_edge_event_observation_join_uses_events_id() {
1755        // The JOIN must be `event_observations.event_id = events_alias.id`,
1756        // not `event_observations.event_id = entities_alias.id`.
1757        let q = gql::parse("MATCH (ev)-[:observed_as_selected]->(m) RETURN m").unwrap();
1758        let compiled = compile(&q, &opts()).unwrap();
1759        // The event alias is n0; the join must reference n0 against `events` table.
1760        assert!(
1761            compiled
1762                .sql
1763                .contains("JOIN event_observations e0 ON e0.event_id = n0.id"),
1764            "CRIT-1: event_observations must join on events.id (n0 is now events); sql: {}",
1765            compiled.sql
1766        );
1767    }
1768
1769    #[test]
1770    fn synthetic_edge_event_node_projects_event_columns() {
1771        // The event variable in RETURN must select event-table columns (verb, outcome, …),
1772        // not entity columns (name, entity_type, properties, …).
1773        let q = gql::parse("MATCH (ev)-[:observed_as_selected]->(m) RETURN ev").unwrap();
1774        let compiled = compile(&q, &opts()).unwrap();
1775        assert!(
1776            compiled.sql.contains("ev_verb"),
1777            "CRIT-1: event variable must project verb column; sql: {}",
1778            compiled.sql
1779        );
1780        assert!(
1781            compiled.sql.contains("ev_outcome"),
1782            "CRIT-1: event variable must project outcome column; sql: {}",
1783            compiled.sql
1784        );
1785        assert!(
1786            !compiled.sql.contains("ev_name,") && !compiled.sql.contains("ev_name "),
1787            "CRIT-1: event variable must NOT project entity name column; sql: {}",
1788            compiled.sql
1789        );
1790        assert!(
1791            !compiled.sql.contains("ev_properties"),
1792            "CRIT-1: event variable must NOT project entity properties column; sql: {}",
1793            compiled.sql
1794        );
1795    }
1796
1797    #[test]
1798    fn synthetic_edge_namespace_filter_on_events_table() {
1799        // MIN-2: when scoped, the namespace filter must target the events table
1800        // (which has a namespace column) — not rely on entities indirection.
1801        let q = gql::parse("MATCH (ev)-[:observed_as_selected]->(m) RETURN m").unwrap();
1802        let compiled = compile(&q, &scoped("test-ns")).unwrap();
1803        // Both the event alias (n0, now from `events`) and the target alias (n1, from `entities`)
1804        // must have namespace filters.
1805        let ns_count = compiled
1806            .params
1807            .iter()
1808            .filter(|p| matches!(p, QueryValue::Text(s) if s == "test-ns"))
1809            .count();
1810        assert!(
1811            ns_count >= 2,
1812            "MIN-2: namespace must be filtered on both events and target; params: {:?}",
1813            compiled.params
1814        );
1815    }
1816
1817    #[test]
1818    fn synthetic_edge_candidate_role() {
1819        let q = gql::parse("MATCH (ev)-[:observed_as_candidate]->(m) RETURN ev, m").unwrap();
1820        let compiled = compile(&q, &opts()).unwrap();
1821        assert!(
1822            compiled.sql.contains("event_observations"),
1823            "sql: {}",
1824            compiled.sql
1825        );
1826        let has_candidate = compiled
1827            .params
1828            .iter()
1829            .any(|p| matches!(p, QueryValue::Text(s) if s == "candidate"));
1830        assert!(has_candidate, "role 'candidate' must be bound");
1831    }
1832
1833    #[test]
1834    fn synthetic_edge_multi_role() {
1835        // Multiple observed_as_* relations compile to a role IN (...) predicate.
1836        let q =
1837            gql::parse("MATCH (ev)-[:observed_as_candidate|observed_as_selected]->(m) RETURN m")
1838                .unwrap();
1839        let compiled = compile(&q, &opts()).unwrap();
1840        assert!(
1841            compiled.sql.contains("event_observations"),
1842            "sql: {}",
1843            compiled.sql
1844        );
1845        assert!(
1846            compiled.sql.contains("IN"),
1847            "multi-role must use IN; sql: {}",
1848            compiled.sql
1849        );
1850    }
1851
1852    #[test]
1853    fn mixed_synthetic_and_canonical_rejected() {
1854        let q = gql::parse("MATCH (ev)-[:observed_as_selected|extends]->(m) RETURN m").unwrap();
1855        let err = compile(&q, &opts()).unwrap_err();
1856        assert!(
1857            matches!(err, QueryError::Compile(_)),
1858            "mixed synthetic+canonical must be rejected; got {err:?}"
1859        );
1860    }
1861
1862    #[test]
1863    fn synthetic_edge_inbound_rejected() {
1864        let q = gql::parse("MATCH (m)<-[:observed_as_selected]-(ev) RETURN m").unwrap();
1865        let err = compile(&q, &opts()).unwrap_err();
1866        assert!(
1867            matches!(err, QueryError::Compile(_)),
1868            "inbound synthetic edge must be rejected; got {err:?}"
1869        );
1870    }
1871
1872    // --- MAJ-1: OR spanning both endpoints in variable-length patterns must be rejected ---
1873
1874    #[test]
1875    fn variable_length_or_across_endpoints_rejected() {
1876        // MAJ-1: `WHERE a.name='X' OR b.name='Y'` in a variable-length pattern must be
1877        // rejected with Unsupported — not silently compiled to AND.
1878        let q = gql::parse(
1879            "MATCH (a)-[:extends*1..3]->(b) WHERE a.name = 'X' OR b.name = 'Y' RETURN a",
1880        )
1881        .unwrap();
1882        let result = compile(&q, &opts());
1883        assert!(
1884            matches!(result, Err(QueryError::Unsupported(_))),
1885            "MAJ-1: OR spanning both endpoints must return Unsupported; got {result:?}"
1886        );
1887        let err_msg = result.unwrap_err().to_string();
1888        assert!(
1889            err_msg.contains("separate queries") || err_msg.contains("one endpoint"),
1890            "error must be actionable; got: {err_msg}"
1891        );
1892    }
1893
1894    #[test]
1895    fn variable_length_or_single_endpoint_still_works() {
1896        // OR within a single endpoint (same alias) must still compile successfully.
1897        let q = gql::parse(
1898            "MATCH (a)-[:extends*1..3]->(b) WHERE a.name = 'X' OR a.name = 'Y' RETURN a",
1899        )
1900        .unwrap();
1901        let result = compile(&q, &opts());
1902        assert!(
1903            result.is_ok(),
1904            "single-endpoint OR must compile; got {result:?}"
1905        );
1906    }
1907
1908    #[test]
1909    fn variable_length_and_across_endpoints_still_works() {
1910        // AND across endpoints must still compile (the existing behavior is correct for AND).
1911        let q = gql::parse(
1912            "MATCH (a)-[:extends*1..3]->(b) WHERE a.name = 'X' AND b.name = 'Y' RETURN a",
1913        )
1914        .unwrap();
1915        let result = compile(&q, &opts());
1916        assert!(
1917            result.is_ok(),
1918            "AND across endpoints must compile; got {result:?}"
1919        );
1920    }
1921
1922    // --- Regression tests for #379: variable-length WHERE OR must not flatten to AND ---
1923
1924    #[test]
1925    fn test_variable_length_or_compiles_to_or() {
1926        // #379: MATCH (a)-[*1..3 WHERE p1 OR p2]-> in GQL surface maps to a single-endpoint
1927        // OR in the WHERE clause.  The compiled SQL must contain OR, not AND.
1928        let q = gql::parse(
1929            "MATCH (a)-[:extends*1..3]->(b) WHERE a.name = 'LoRA' OR a.name = 'QLoRA' RETURN b",
1930        )
1931        .unwrap();
1932        let compiled = compile(&q, &opts()).unwrap();
1933        // The start_conditions list must contain an OR fragment, not two AND-joined conditions.
1934        assert!(
1935            compiled.sql.contains(" OR "),
1936            "#379: variable-length single-endpoint OR must produce SQL OR; sql: {}",
1937            compiled.sql
1938        );
1939        // Both values must appear as bound parameters.
1940        let has_lora = compiled
1941            .params
1942            .iter()
1943            .any(|p| matches!(p, QueryValue::Text(s) if s == "LoRA"));
1944        let has_qlora = compiled
1945            .params
1946            .iter()
1947            .any(|p| matches!(p, QueryValue::Text(s) if s == "QLoRA"));
1948        assert!(has_lora && has_qlora, "both OR values must be bound params");
1949    }
1950
1951    #[test]
1952    fn test_single_endpoint_or_at_depth_1() {
1953        // #379: single-hop pattern with single-endpoint OR in WHERE.
1954        // The OR must appear in the compiled SQL (not silently become AND).
1955        let q = gql::parse(
1956            "MATCH (a)-[r:extends]->(b) WHERE r.weight > 0.5 OR r.relation = 'extends' RETURN a",
1957        )
1958        .unwrap();
1959        let compiled = compile(&q, &opts()).unwrap();
1960        assert!(
1961            compiled.sql.contains(" OR "),
1962            "#379: fixed-length single-endpoint OR must produce SQL OR; sql: {}",
1963            compiled.sql
1964        );
1965        let has_extends = compiled
1966            .params
1967            .iter()
1968            .any(|p| matches!(p, QueryValue::Text(s) if s == "extends"));
1969        assert!(
1970            has_extends,
1971            "relation value 'extends' must be a bound param"
1972        );
1973    }
1974
1975    #[test]
1976    fn test_and_still_works() {
1977        // #379: regression guard — simple WHERE p1 AND p2 must still emit AND.
1978        let q = gql::parse(
1979            "MATCH (a)-[:extends*1..3]->(b) WHERE a.name = 'LoRA' AND a.kind = 'concept' RETURN b",
1980        )
1981        .unwrap();
1982        let compiled = compile(&q, &opts()).unwrap();
1983        // The SQL must not contain a bare " OR " from the AND expression.
1984        assert!(
1985            !compiled.sql.contains(" OR "),
1986            "#379: AND must not produce OR; sql: {}",
1987            compiled.sql
1988        );
1989        let has_lora = compiled
1990            .params
1991            .iter()
1992            .any(|p| matches!(p, QueryValue::Text(s) if s == "LoRA"));
1993        let has_concept = compiled
1994            .params
1995            .iter()
1996            .any(|p| matches!(p, QueryValue::Text(s) if s == "concept"));
1997        assert!(
1998            has_lora && has_concept,
1999            "both AND values must be bound params"
2000        );
2001    }
2002}