Skip to main content

khive_query/compilers/
sql.rs

1//! Compile GQL AST to parameterized SQL.
2//!
3//! Two compilation paths:
4//! - Fixed-length patterns (all edges *1..1) → JOIN chain
5//! - Variable-length patterns (any edge *N..M where M>1) → recursive CTE
6//!
7//! Synthetic edge paths (ADR-041):
8//! - Relations prefixed `observed_as_*` join against `event_observations`, not `graph_edges`.
9//!
10//! Security invariants (MAJ-1/MAJ-2/MAJ-3 from critic review):
11//! - Namespace injection: WHERE clause always comes from CompileOptions.scopes, never the query.
12//! - Edge property whitelist: only `relation` and `weight` are queryable edge columns.
13//! - Depth cap: recursive CTE depth capped at MAX_DEPTH; exceeding it errors at validation.
14
15use crate::ast::*;
16use crate::error::QueryError;
17use crate::validate::{validate_with_warnings, MAX_DEPTH};
18
19/// Observation roles used by the synthetic edge compiler (ADR-041 §8).
20const SYNTHETIC_RELATIONS: &[&str] = &[
21    "observed_as_candidate",
22    "observed_as_selected",
23    "observed_as_target",
24    "observed_as_signal",
25];
26
27/// Returns `true` when the relation string is a synthetic ADR-041 observation edge.
28fn is_synthetic(rel: &str) -> bool {
29    SYNTHETIC_RELATIONS.contains(&rel)
30}
31
32/// Returns the `role` value that maps to the given synthetic relation.
33fn synthetic_role(rel: &str) -> Option<&'static str> {
34    match rel {
35        "observed_as_candidate" => Some("candidate"),
36        "observed_as_selected" => Some("selected"),
37        "observed_as_target" => Some("target"),
38        "observed_as_signal" => Some("signal"),
39        _ => None,
40    }
41}
42
43#[derive(Debug)]
44pub struct CompiledQuery {
45    pub sql: String,
46    pub params: Vec<QueryValue>,
47    pub return_vars: Vec<ReturnItem>,
48    pub warnings: Vec<String>,
49}
50
51pub struct CompileOptions {
52    /// Namespace scope. Empty = cross-namespace (all). Non-empty = filter to these namespaces.
53    pub scopes: Vec<String>,
54    /// Hard limit cap (server-side safety). Query limit is min(requested, max_limit).
55    pub max_limit: usize,
56}
57
58impl Default for CompileOptions {
59    fn default() -> Self {
60        Self {
61            scopes: Vec::new(),
62            max_limit: 500,
63        }
64    }
65}
66
67pub fn compile(query: &GqlQuery, opts: &CompileOptions) -> Result<CompiledQuery, QueryError> {
68    if query.pattern.elements.is_empty() {
69        return Err(QueryError::Compile("empty pattern".into()));
70    }
71
72    // Validate edge relations + structural rules before emitting SQL.
73    let mut query = query.clone();
74    let warnings = validate_with_warnings(&mut query)?;
75
76    let mut compiled = if query.pattern.has_variable_length() {
77        compile_variable_length(&query, opts)?
78    } else {
79        compile_fixed_length(&query, opts)?
80    };
81    compiled.warnings = warnings;
82    Ok(compiled)
83}
84
85fn namespace_filter(alias: &str, opts: &CompileOptions, params: &mut Vec<QueryValue>) -> String {
86    if opts.scopes.is_empty() {
87        String::new()
88    } else if opts.scopes.len() == 1 {
89        params.push(QueryValue::Text(opts.scopes[0].clone()));
90        format!(" AND {alias}.namespace = ?{}", params.len())
91    } else {
92        let placeholders: Vec<String> = opts
93            .scopes
94            .iter()
95            .map(|s| {
96                params.push(QueryValue::Text(s.clone()));
97                format!("?{}", params.len())
98            })
99            .collect();
100        format!(" AND {alias}.namespace IN ({})", placeholders.join(", "))
101    }
102}
103
104/// Identifies node indices that are endpoints of synthetic `observed_as_*` edges.
105///
106/// Returns `(source_indices, target_indices)`:
107/// - `source_indices`: node indices bound to the `events` table (the event source node)
108/// - `target_indices`: node indices bound to the `notes` table (the observed note target node)
109fn synthetic_endpoint_node_indices(
110    elements: &[PatternElement],
111) -> (
112    std::collections::HashSet<usize>,
113    std::collections::HashSet<usize>,
114) {
115    let mut source_set = std::collections::HashSet::new();
116    let mut target_set = std::collections::HashSet::new();
117    let mut node_idx = 0usize;
118    let mut prev_node_idx: Option<usize> = None;
119    for element in elements {
120        match element {
121            PatternElement::Node(_) => {
122                prev_node_idx = Some(node_idx);
123                node_idx += 1;
124            }
125            PatternElement::Edge(ep) => {
126                let has_synthetic = ep.relations.iter().any(|r| is_synthetic(r));
127                if has_synthetic {
128                    if let Some(src_idx) = prev_node_idx {
129                        source_set.insert(src_idx);
130                        // The target is the next node (current node_idx).
131                        target_set.insert(node_idx);
132                    }
133                }
134            }
135        }
136    }
137    (source_set, target_set)
138}
139
140/// Compile fixed-length patterns to a chain of JOINs.
141///
142/// MATCH (a:concept)-[e:introduced_by]->(b:paper) WHERE ... RETURN a, e, b LIMIT 10
143/// →
144/// SELECT a.*, e.*, b.*
145/// FROM entities a
146/// JOIN graph_edges e ON e.source_id = a.id
147/// JOIN entities b ON b.id = e.target_id
148/// WHERE a.kind = 'concept' AND e.relation = 'introduced_by' AND b.kind = 'paper'
149///   AND a.deleted_at IS NULL AND b.deleted_at IS NULL
150/// LIMIT 10
151///
152/// Synthetic `observed_as_*` patterns (ADR-041 §8) route the event-source node
153/// to the `events` table instead of `entities`.
154fn compile_fixed_length(
155    query: &GqlQuery,
156    opts: &CompileOptions,
157) -> Result<CompiledQuery, QueryError> {
158    let mut params: Vec<QueryValue> = Vec::new();
159    let mut from_parts: Vec<String> = Vec::new();
160    let mut join_parts: Vec<String> = Vec::new();
161    let mut where_parts: Vec<String> = Vec::new();
162    let mut select_parts: Vec<String> = Vec::new();
163
164    let mut node_aliases: Vec<String> = Vec::new();
165    let mut edge_aliases: Vec<String> = Vec::new();
166    let mut var_to_alias: std::collections::HashMap<String, (String, VarKind)> =
167        std::collections::HashMap::new();
168
169    // Pre-compute which node indices are endpoints of synthetic edges.
170    // Source nodes bind to `events`; target nodes bind to `notes`.
171    let (event_source_indices, note_target_indices) =
172        synthetic_endpoint_node_indices(&query.pattern.elements);
173
174    let mut node_idx = 0usize;
175    let mut edge_idx = 0usize;
176
177    for element in &query.pattern.elements {
178        match element {
179            PatternElement::Node(np) => {
180                let alias = format!("n{node_idx}");
181                node_aliases.push(alias.clone());
182
183                let is_event_source = event_source_indices.contains(&node_idx);
184                let is_note_target = note_target_indices.contains(&node_idx);
185
186                if node_idx == 0 {
187                    if is_event_source {
188                        from_parts.push(format!("events {alias}"));
189                    } else {
190                        // Note targets are joined by the synthetic edge handler, not FROM.
191                        if !is_note_target {
192                            from_parts.push(format!("entities {alias}"));
193                        }
194                    }
195                }
196
197                if is_event_source {
198                    // Events table does not have `deleted_at`; filter is omitted.
199                    // Namespace filter uses the `events.namespace` column directly.
200                    let ns_filter = namespace_filter(&alias, opts, &mut params);
201                    if !ns_filter.is_empty() {
202                        where_parts.push(ns_filter.trim_start_matches(" AND ").to_string());
203                    }
204                    // `kind` on an event node filters events.kind (e.g. "recall_executed").
205                    if let Some(ref kind) = np.kind {
206                        params.push(QueryValue::Text(kind.clone()));
207                        where_parts.push(format!("{alias}.kind = ?{}", params.len()));
208                    }
209                    // entity_type and properties are not columns on events — reject explicitly.
210                    if np.entity_type.is_some() {
211                        return Err(QueryError::Compile(
212                            "event nodes do not have an entity_type column".into(),
213                        ));
214                    }
215                    if !np.properties.is_empty() {
216                        return Err(QueryError::Compile(
217                            "event nodes do not support inline property filters; \
218                             use a WHERE clause on verb, outcome, or payload fields"
219                                .into(),
220                        ));
221                    }
222                } else if is_note_target {
223                    // Note targets: `notes` table (joined by the synthetic edge handler).
224                    where_parts.push(format!("{alias}.deleted_at IS NULL"));
225
226                    let ns_filter = namespace_filter(&alias, opts, &mut params);
227                    if !ns_filter.is_empty() {
228                        where_parts.push(ns_filter.trim_start_matches(" AND ").to_string());
229                    }
230
231                    if let Some(ref kind) = np.kind {
232                        params.push(QueryValue::Text(kind.clone()));
233                        where_parts.push(format!("{alias}.kind = ?{}", params.len()));
234                    }
235
236                    // entity_type does not exist on notes — reject explicitly.
237                    if np.entity_type.is_some() {
238                        return Err(QueryError::Compile(
239                            "observed note targets do not have an entity_type column".into(),
240                        ));
241                    }
242
243                    for (key, val) in &np.properties {
244                        params.push(QueryValue::Text(val.clone()));
245                        if key == "name" || key == "content" {
246                            where_parts
247                                .push(format!("{alias}.{key} = ?{} COLLATE NOCASE", params.len()));
248                        } else {
249                            where_parts.push(format!(
250                                "json_extract({alias}.properties, '$.{}') = ?{} COLLATE NOCASE",
251                                key.replace('\'', "''"),
252                                params.len()
253                            ));
254                        }
255                    }
256                } else {
257                    where_parts.push(format!("{alias}.deleted_at IS NULL"));
258
259                    let ns_filter = namespace_filter(&alias, opts, &mut params);
260                    if !ns_filter.is_empty() {
261                        where_parts.push(ns_filter.trim_start_matches(" AND ").to_string());
262                    }
263
264                    if let Some(ref kind) = np.kind {
265                        params.push(QueryValue::Text(kind.clone()));
266                        where_parts.push(format!("{alias}.kind = ?{}", params.len()));
267                    }
268
269                    if let Some(ref et) = np.entity_type {
270                        params.push(QueryValue::Text(et.clone()));
271                        where_parts.push(format!("{alias}.entity_type = ?{}", params.len()));
272                    }
273
274                    for (key, val) in &np.properties {
275                        params.push(QueryValue::Text(val.clone()));
276                        if key == "name" {
277                            where_parts
278                                .push(format!("{alias}.name = ?{} COLLATE NOCASE", params.len()));
279                        } else {
280                            where_parts.push(format!(
281                                "json_extract({alias}.properties, '$.{}') = ?{} COLLATE NOCASE",
282                                key.replace('\'', "''"),
283                                params.len()
284                            ));
285                        }
286                    }
287                }
288
289                if let Some(ref var) = np.variable {
290                    let kind = if is_event_source {
291                        VarKind::EventNode
292                    } else if is_note_target {
293                        VarKind::NoteNode
294                    } else {
295                        VarKind::Node
296                    };
297                    var_to_alias.insert(var.clone(), (alias.clone(), kind));
298                }
299
300                node_idx += 1;
301            }
302            PatternElement::Edge(ep) => {
303                let e_alias = format!("e{edge_idx}");
304                let prev_node = &node_aliases[node_aliases.len() - 1];
305                let next_alias = format!("n{}", node_idx);
306
307                edge_aliases.push(e_alias.clone());
308
309                // Detect synthetic event_observations edges (ADR-041 §8).
310                // A synthetic edge is one whose only relation(s) are observed_as_* names.
311                // Mixed synthetic+canonical relations are rejected: the two tables don't share
312                // a common join key that would make an OR across them meaningful.
313                let has_synthetic = ep.relations.iter().any(|r| is_synthetic(r));
314                let has_canonical = ep.relations.iter().any(|r| !is_synthetic(r));
315                if has_synthetic && has_canonical {
316                    return Err(QueryError::Compile(
317                        "cannot mix synthetic observed_as_* relations with canonical edge relations \
318                         in a single edge pattern"
319                            .into(),
320                    ));
321                }
322
323                if has_synthetic {
324                    // Synthetic edge: join event_observations.
325                    // Direction is always event → entity/note (OUT from the event node).
326                    // The event node is the source (prev_node); the entity/note is the target.
327                    if !matches!(ep.direction, EdgeDirection::Out) {
328                        return Err(QueryError::Compile(
329                            "synthetic observed_as_* edges are always event → entity (outbound only)".into(),
330                        ));
331                    }
332                    join_parts.push(format!(
333                        "JOIN event_observations {e_alias} ON {e_alias}.event_id = {prev_node}.id"
334                    ));
335                    // Roles: collect the unique role values from the synthetic relation names.
336                    let roles: Vec<&'static str> = ep
337                        .relations
338                        .iter()
339                        .filter_map(|r| synthetic_role(r))
340                        .collect();
341                    if roles.len() == 1 {
342                        params.push(QueryValue::Text(roles[0].to_string()));
343                        where_parts.push(format!("{e_alias}.role = ?{}", params.len()));
344                    } else if roles.len() > 1 {
345                        let placeholders: Vec<String> = roles
346                            .iter()
347                            .map(|r| {
348                                params.push(QueryValue::Text(r.to_string()));
349                                format!("?{}", params.len())
350                            })
351                            .collect();
352                        where_parts
353                            .push(format!("{e_alias}.role IN ({})", placeholders.join(", ")));
354                    }
355                    // Join the target node via event_observations.entity_id.
356                    // The `referent_kind` column discriminates between note and entity
357                    // substrates.  Per ADR-041, recall/rerank observations always target
358                    // notes (`referent_kind='note'`); we filter to note substrate and join
359                    // the `notes` table.  An explicit `AND e0.referent_kind='note'`
360                    // prevents cross-substrate ID collisions.
361                    join_parts.push(format!(
362                        "JOIN notes {next_alias} ON {next_alias}.id = {e_alias}.entity_id \
363                         AND {e_alias}.referent_kind = 'note'"
364                    ));
365                } else {
366                    // Standard canonical edge: join graph_edges.
367                    let (source_join, target_join) = match ep.direction {
368                        EdgeDirection::Out => (
369                            format!("{e_alias}.source_id = {prev_node}.id"),
370                            "target_id",
371                        ),
372                        EdgeDirection::In => (
373                            format!("{e_alias}.target_id = {prev_node}.id"),
374                            "source_id",
375                        ),
376                        EdgeDirection::Both => (
377                            format!(
378                                "({e_alias}.source_id = {prev_node}.id OR {e_alias}.target_id = {prev_node}.id)"
379                            ),
380                            "CASE_BOTH",
381                        ),
382                    };
383
384                    let next_join_col = if target_join == "CASE_BOTH" {
385                        format!(
386                            "CASE WHEN {e_alias}.source_id = {prev_node}.id THEN {e_alias}.target_id ELSE {e_alias}.source_id END"
387                        )
388                    } else {
389                        format!("{e_alias}.{target_join}")
390                    };
391
392                    join_parts.push(format!(
393                        "JOIN graph_edges {e_alias} ON {source_join} AND {e_alias}.deleted_at IS NULL"
394                    ));
395
396                    let ens_filter = namespace_filter(&e_alias, opts, &mut params);
397                    if !ens_filter.is_empty() {
398                        where_parts.push(ens_filter.trim_start_matches(" AND ").to_string());
399                    }
400
401                    join_parts.push(format!(
402                        "JOIN entities {next_alias} ON {next_alias}.id = {next_join_col}"
403                    ));
404
405                    if !ep.relations.is_empty() {
406                        if ep.relations.len() == 1 {
407                            params.push(QueryValue::Text(ep.relations[0].clone()));
408                            where_parts.push(format!("{e_alias}.relation = ?{}", params.len()));
409                        } else {
410                            let placeholders: Vec<String> = ep
411                                .relations
412                                .iter()
413                                .map(|r| {
414                                    params.push(QueryValue::Text(r.clone()));
415                                    format!("?{}", params.len())
416                                })
417                                .collect();
418                            where_parts.push(format!(
419                                "{e_alias}.relation IN ({})",
420                                placeholders.join(", ")
421                            ));
422                        }
423                    }
424                }
425
426                if let Some(ref var) = ep.variable {
427                    var_to_alias.insert(var.clone(), (e_alias.clone(), VarKind::Edge));
428                }
429
430                edge_idx += 1;
431            }
432        }
433    }
434
435    // WHERE clause conditions from GQL WHERE (supports AND / OR tree — ADR-008)
436    if let Some(where_sql) = compile_where_expr(&query.where_clause, &var_to_alias, &mut params)? {
437        where_parts.push(where_sql);
438    }
439
440    // SELECT clause
441    for item in &query.return_items {
442        let var = item.variable();
443        if let Some((alias, kind)) = var_to_alias.get(var) {
444            match item {
445                ReturnItem::Property(_, prop) => {
446                    let col = property_to_column(prop, kind)?;
447                    select_parts.push(format!("{alias}.{col} AS {var}_{prop}"));
448                }
449                ReturnItem::Variable(_) => match kind {
450                    VarKind::Node => {
451                        select_parts.push(format!(
452                            "{alias}.id AS {var}_id, {alias}.namespace AS {var}_namespace, \
453                             {alias}.kind AS {var}_kind, {alias}.entity_type AS {var}_entity_type, \
454                             {alias}.name AS {var}_name, \
455                             {alias}.properties AS {var}_properties, \
456                             {alias}.created_at AS {var}_created_at, \
457                             {alias}.updated_at AS {var}_updated_at"
458                        ));
459                    }
460                    VarKind::NoteNode => {
461                        select_parts.push(format!(
462                            "{alias}.id AS {var}_id, {alias}.namespace AS {var}_namespace, \
463                             {alias}.kind AS {var}_kind, {alias}.status AS {var}_status, \
464                             {alias}.content AS {var}_content, \
465                             {alias}.salience AS {var}_salience, \
466                             {alias}.properties AS {var}_properties, \
467                             {alias}.created_at AS {var}_created_at, \
468                             {alias}.updated_at AS {var}_updated_at"
469                        ));
470                    }
471                    VarKind::EventNode => {
472                        select_parts.push(format!(
473                            "{alias}.id AS {var}_id, {alias}.namespace AS {var}_namespace, \
474                             {alias}.verb AS {var}_verb, {alias}.substrate AS {var}_substrate, \
475                             {alias}.actor AS {var}_actor, {alias}.kind AS {var}_kind, \
476                             {alias}.outcome AS {var}_outcome, \
477                             {alias}.payload AS {var}_payload, \
478                             {alias}.created_at AS {var}_created_at"
479                        ));
480                    }
481                    VarKind::Edge => {
482                        select_parts.push(format!(
483                            "{alias}.id AS {var}_id, {alias}.source_id AS {var}_source, \
484                             {alias}.target_id AS {var}_target, \
485                             {alias}.relation AS {var}_relation, \
486                             {alias}.weight AS {var}_weight"
487                        ));
488                    }
489                },
490            }
491        } else {
492            return Err(QueryError::Compile(format!(
493                "unknown variable '{var}' in RETURN clause"
494            )));
495        }
496    }
497
498    let limit = query.limit.unwrap_or(opts.max_limit).min(opts.max_limit);
499    params.push(QueryValue::Integer(limit as i64));
500
501    let sql = format!(
502        "SELECT {} FROM {} {} WHERE {} LIMIT ?{}",
503        select_parts.join(", "),
504        from_parts.join(", "),
505        join_parts.join(" "),
506        where_parts.join(" AND "),
507        params.len(),
508    );
509
510    Ok(CompiledQuery {
511        sql,
512        params,
513        return_vars: query.return_items.clone(),
514        warnings: Vec::new(),
515    })
516}
517
518/// Compile a `WhereExpr` tree into a SQL fragment, pushing bound parameters into `params`.
519///
520/// Returns `Ok(None)` for `WhereExpr::True` (no fragment needed), or `Ok(Some(sql))` otherwise.
521/// The caller is responsible for wrapping the result in an AND with the structural predicates.
522fn compile_where_expr(
523    expr: &WhereExpr,
524    var_to_alias: &std::collections::HashMap<String, (String, VarKind)>,
525    params: &mut Vec<QueryValue>,
526) -> Result<Option<String>, QueryError> {
527    match expr {
528        WhereExpr::True => Ok(None),
529        WhereExpr::Condition(cond) => {
530            let sql = compile_single_condition(cond, var_to_alias, params)?;
531            Ok(Some(sql))
532        }
533        WhereExpr::And(l, r) => {
534            let ls = compile_where_expr(l, var_to_alias, params)?;
535            let rs = compile_where_expr(r, var_to_alias, params)?;
536            Ok(match (ls, rs) {
537                (None, None) => None,
538                (Some(s), None) | (None, Some(s)) => Some(s),
539                (Some(l), Some(r)) => Some(format!("{l} AND {r}")),
540            })
541        }
542        WhereExpr::Or(l, r) => {
543            let ls = compile_where_expr(l, var_to_alias, params)?;
544            let rs = compile_where_expr(r, var_to_alias, params)?;
545            Ok(match (ls, rs) {
546                (None, None) => None,
547                (Some(s), None) | (None, Some(s)) => Some(s),
548                (Some(l), Some(r)) => Some(format!("({l} OR {r})")),
549            })
550        }
551    }
552}
553
554/// Compile a single leaf condition to a SQL predicate string.
555fn compile_single_condition(
556    cond: &Condition,
557    var_to_alias: &std::collections::HashMap<String, (String, VarKind)>,
558    params: &mut Vec<QueryValue>,
559) -> Result<String, QueryError> {
560    let (alias, kind) = var_to_alias.get(&cond.variable).ok_or_else(|| {
561        QueryError::Compile(format!(
562            "unknown variable '{}' in WHERE clause",
563            cond.variable
564        ))
565    })?;
566
567    let col_expr = match kind {
568        VarKind::Node => {
569            if cond.property == "name"
570                || cond.property == "kind"
571                || cond.property == "entity_type"
572                || cond.property == "namespace"
573            {
574                format!("{alias}.{}", cond.property)
575            } else {
576                format!(
577                    "json_extract({alias}.properties, '$.{}')",
578                    cond.property.replace('\'', "''")
579                )
580            }
581        }
582        VarKind::NoteNode => {
583            if NOTE_COLUMNS.contains(&cond.property.as_str()) {
584                format!("{alias}.{}", cond.property)
585            } else {
586                format!(
587                    "json_extract({alias}.properties, '$.{}')",
588                    cond.property.replace('\'', "''")
589                )
590            }
591        }
592        VarKind::EventNode => {
593            // Events table has direct columns only; reject unknown fields.
594            if EVENT_COLUMNS.contains(&cond.property.as_str()) {
595                format!("{alias}.{}", cond.property)
596            } else {
597                return Err(QueryError::Validation(format!(
598                    "event property '{}' not queryable; valid columns: {}",
599                    cond.property,
600                    EVENT_COLUMNS.join(", ")
601                )));
602            }
603        }
604        VarKind::Edge => match cond.property.as_str() {
605            "relation" | "weight" => format!("{alias}.{}", cond.property),
606            other => {
607                return Err(QueryError::Validation(format!(
608                    "edge property '{other}' not queryable; use 'relation' or 'weight'"
609                )))
610            }
611        },
612    };
613
614    let op_str = match cond.op {
615        CompareOp::Eq => "=",
616        CompareOp::Neq => "!=",
617        CompareOp::Gt => ">",
618        CompareOp::Lt => "<",
619        CompareOp::Gte => ">=",
620        CompareOp::Lte => "<=",
621        CompareOp::Like => "LIKE",
622    };
623
624    let sql = match &cond.value {
625        ConditionValue::String(s) => {
626            params.push(QueryValue::Text(s.clone()));
627            let collate = if matches!(cond.op, CompareOp::Eq | CompareOp::Like) {
628                " COLLATE NOCASE"
629            } else {
630                ""
631            };
632            format!("{col_expr} {op_str} ?{}{}", params.len(), collate)
633        }
634        ConditionValue::Number(n) => {
635            params.push(QueryValue::Float(*n));
636            format!("{col_expr} {op_str} ?{}", params.len())
637        }
638        ConditionValue::Bool(b) => {
639            params.push(QueryValue::Integer(if *b { 1 } else { 0 }));
640            format!("{col_expr} {op_str} ?{}", params.len())
641        }
642    };
643    Ok(sql)
644}
645
646/// Returns `true` if the given `WhereExpr` subtree references only the start
647/// variable (`start_var`), only the end variable, or neither — but NOT both.
648///
649/// Used to detect OR nodes whose branches reference different endpoints, which
650/// cannot be correctly compiled by the variable-length leaf-routing approach.
651fn expr_endpoint_set(
652    expr: &WhereExpr,
653    start_var: Option<&str>,
654    end_var: Option<&str>,
655) -> (bool, bool) {
656    match expr {
657        WhereExpr::True => (false, false),
658        WhereExpr::Condition(c) => {
659            let is_start = start_var == Some(c.variable.as_str());
660            let is_end = end_var == Some(c.variable.as_str());
661            (is_start, is_end)
662        }
663        WhereExpr::And(l, r) | WhereExpr::Or(l, r) => {
664            let (ls, le) = expr_endpoint_set(l, start_var, end_var);
665            let (rs, re) = expr_endpoint_set(r, start_var, end_var);
666            (ls || rs, le || re)
667        }
668    }
669}
670
671/// Walk the expression tree and return `Err(Unsupported)` if any `Or` node has
672/// branches that span both start and end endpoint variables.  Single-endpoint
673/// ORs (e.g. `a.name='X' OR a.name='Y'`) are fine.
674fn reject_or_spanning_endpoints(
675    expr: &WhereExpr,
676    start: &NodePattern,
677    end: &NodePattern,
678) -> Result<(), QueryError> {
679    let start_var = start.variable.as_deref();
680    let end_var = end.variable.as_deref();
681    reject_or_spanning_impl(expr, start_var, end_var)
682}
683
684fn reject_or_spanning_impl(
685    expr: &WhereExpr,
686    start_var: Option<&str>,
687    end_var: Option<&str>,
688) -> Result<(), QueryError> {
689    match expr {
690        WhereExpr::True | WhereExpr::Condition(_) => Ok(()),
691        WhereExpr::And(l, r) => {
692            reject_or_spanning_impl(l, start_var, end_var)?;
693            reject_or_spanning_impl(r, start_var, end_var)
694        }
695        WhereExpr::Or(l, r) => {
696            let (l_start, l_end) = expr_endpoint_set(l, start_var, end_var);
697            let (r_start, r_end) = expr_endpoint_set(r, start_var, end_var);
698            let spans_start = l_start || r_start;
699            let spans_end = l_end || r_end;
700            if spans_start && spans_end {
701                return Err(QueryError::Unsupported(
702                    "WHERE clauses that span both endpoints in a variable-length pattern \
703                     are not yet supported; rewrite as separate queries or restrict each \
704                     OR branch to one endpoint"
705                        .into(),
706                ));
707            }
708            // Even if this OR is safe, recurse to catch nested ORs.
709            reject_or_spanning_impl(l, start_var, end_var)?;
710            reject_or_spanning_impl(r, start_var, end_var)
711        }
712    }
713}
714
715/// Compile variable-length patterns to a recursive CTE.
716///
717/// Depth is capped at min(requested, 10) — MAJ-2 (parameterized min_depth, not literal).
718fn compile_variable_length(
719    query: &GqlQuery,
720    opts: &CompileOptions,
721) -> Result<CompiledQuery, QueryError> {
722    let mut params: Vec<QueryValue> = Vec::new();
723    let mut var_to_alias: std::collections::HashMap<String, (String, VarKind)> =
724        std::collections::HashMap::new();
725
726    // For variable-length, we expect exactly: start_node -[*N..M]-> end_node.
727    // Mixed fixed+variable chains and additional trailing pattern elements are
728    // not yet supported — reject explicitly rather than silently dropping them.
729    let nodes: Vec<&NodePattern> = query.pattern.nodes().collect();
730    let edges: Vec<&EdgePattern> = query.pattern.edges().collect();
731
732    if nodes.len() != 2 || edges.len() != 1 || query.pattern.elements.len() != 3 {
733        return Err(QueryError::Unsupported(
734            "variable-length patterns must be a single start_node -[*N..M]-> end_node \
735             (mixed fixed/variable chains are not yet implemented)"
736                .into(),
737        ));
738    }
739
740    let start = &nodes[0];
741    let edge = &edges[0];
742    let end = &nodes[1];
743
744    // MAJ-2: depth cap — always parameterized, never injected as literal
745    let max_depth = edge.max_hops.min(MAX_DEPTH);
746    let min_depth = edge.min_hops;
747
748    // Build start-node conditions
749    let mut start_conditions: Vec<String> = vec!["s.deleted_at IS NULL".to_string()];
750    let ns_filter = namespace_filter("s", opts, &mut params);
751    if !ns_filter.is_empty() {
752        start_conditions.push(ns_filter.trim_start_matches(" AND ").to_string());
753    }
754
755    if let Some(ref kind) = start.kind {
756        params.push(QueryValue::Text(kind.clone()));
757        start_conditions.push(format!("s.kind = ?{}", params.len()));
758    }
759    if let Some(ref et) = start.entity_type {
760        params.push(QueryValue::Text(et.clone()));
761        start_conditions.push(format!("s.entity_type = ?{}", params.len()));
762    }
763    for (key, val) in &start.properties {
764        params.push(QueryValue::Text(val.clone()));
765        if key == "name" {
766            start_conditions.push(format!("s.name = ?{} COLLATE NOCASE", params.len()));
767        } else {
768            start_conditions.push(format!(
769                "json_extract(s.properties, '$.{}') = ?{} COLLATE NOCASE",
770                key.replace('\'', "''"),
771                params.len()
772            ));
773        }
774    }
775
776    // Relation filter
777    let mut relation_condition = String::new();
778    if !edge.relations.is_empty() {
779        if edge.relations.len() == 1 {
780            params.push(QueryValue::Text(edge.relations[0].clone()));
781            relation_condition = format!(" AND e.relation = ?{}", params.len());
782        } else {
783            let placeholders: Vec<String> = edge
784                .relations
785                .iter()
786                .map(|r| {
787                    params.push(QueryValue::Text(r.clone()));
788                    format!("?{}", params.len())
789                })
790                .collect();
791            relation_condition = format!(" AND e.relation IN ({})", placeholders.join(", "));
792        }
793    }
794
795    // Edge namespace filter
796    let e_ns_filter = namespace_filter("e", opts, &mut params);
797
798    // Direction-dependent JOIN
799    let (seed_join, seed_next, recurse_join, recurse_next) = match edge.direction {
800        EdgeDirection::Out => (
801            "e.source_id = s.id",
802            "e.target_id",
803            "e.source_id = t.current_id",
804            "e.target_id",
805        ),
806        EdgeDirection::In => (
807            "e.target_id = s.id",
808            "e.source_id",
809            "e.target_id = t.current_id",
810            "e.source_id",
811        ),
812        EdgeDirection::Both => (
813            "(e.source_id = s.id OR e.target_id = s.id)",
814            "CASE WHEN e.source_id = s.id THEN e.target_id ELSE e.source_id END",
815            "(e.source_id = t.current_id OR e.target_id = t.current_id)",
816            "CASE WHEN e.source_id = t.current_id THEN e.target_id ELSE e.source_id END",
817        ),
818    };
819
820    params.push(QueryValue::Integer(max_depth as i64));
821    let depth_param = params.len();
822
823    // End-node conditions (applied in outer WHERE). `r` is always joined
824    // unconditionally below so these references resolve regardless of whether
825    // the end variable is projected.
826    let mut end_conditions: Vec<String> = vec!["r.deleted_at IS NULL".to_string()];
827    let r_ns_filter = namespace_filter("r", opts, &mut params);
828    if !r_ns_filter.is_empty() {
829        end_conditions.push(r_ns_filter.trim_start_matches(" AND ").to_string());
830    }
831    if let Some(ref kind) = end.kind {
832        params.push(QueryValue::Text(kind.clone()));
833        end_conditions.push(format!("r.kind = ?{}", params.len()));
834    }
835    if let Some(ref et) = end.entity_type {
836        params.push(QueryValue::Text(et.clone()));
837        end_conditions.push(format!("r.entity_type = ?{}", params.len()));
838    }
839    for (key, val) in &end.properties {
840        params.push(QueryValue::Text(val.clone()));
841        if key == "name" {
842            end_conditions.push(format!("r.name = ?{} COLLATE NOCASE", params.len()));
843        } else {
844            end_conditions.push(format!(
845                "json_extract(r.properties, '$.{}') = ?{} COLLATE NOCASE",
846                key.replace('\'', "''"),
847                params.len()
848            ));
849        }
850    }
851
852    // WHERE clause conditions for variable-length patterns.
853    // Each leaf condition is routed to start_conditions (alias s) or end_conditions
854    // (alias r) based on which variable it references.  OR expressions that span
855    // both start and end nodes are not supported — reject explicitly with an
856    // actionable error message rather than silently converting OR to AND.
857    reject_or_spanning_endpoints(&query.where_clause, start, end)?;
858
859    for cond in query.where_clause.conditions() {
860        let col_alias = if start.variable.as_deref() == Some(cond.variable.as_str()) {
861            "s"
862        } else if end.variable.as_deref() == Some(cond.variable.as_str()) {
863            "r"
864        } else {
865            return Err(QueryError::Compile(format!(
866                "variable '{}' in WHERE not supported in variable-length pattern (only start/end node variables)",
867                cond.variable
868            )));
869        };
870
871        let col_expr =
872            if cond.property == "name" || cond.property == "kind" || cond.property == "entity_type"
873            {
874                format!("{col_alias}.{}", cond.property)
875            } else {
876                format!(
877                    "json_extract({col_alias}.properties, '$.{}')",
878                    cond.property.replace('\'', "''")
879                )
880            };
881
882        let op_str = match cond.op {
883            CompareOp::Eq => "=",
884            CompareOp::Neq => "!=",
885            CompareOp::Gt => ">",
886            CompareOp::Lt => "<",
887            CompareOp::Gte => ">=",
888            CompareOp::Lte => "<=",
889            CompareOp::Like => "LIKE",
890        };
891
892        match &cond.value {
893            ConditionValue::String(s) => {
894                params.push(QueryValue::Text(s.clone()));
895                let collate = if matches!(cond.op, CompareOp::Eq | CompareOp::Like) {
896                    " COLLATE NOCASE"
897                } else {
898                    ""
899                };
900                if col_alias == "s" {
901                    start_conditions
902                        .push(format!("{col_expr} {op_str} ?{}{collate}", params.len()));
903                } else {
904                    end_conditions.push(format!("{col_expr} {op_str} ?{}{collate}", params.len()));
905                }
906            }
907            ConditionValue::Number(n) => {
908                params.push(QueryValue::Float(*n));
909                if col_alias == "s" {
910                    start_conditions.push(format!("{col_expr} {op_str} ?{}", params.len()));
911                } else {
912                    end_conditions.push(format!("{col_expr} {op_str} ?{}", params.len()));
913                }
914            }
915            ConditionValue::Bool(b) => {
916                params.push(QueryValue::Integer(if *b { 1 } else { 0 }));
917                if col_alias == "s" {
918                    start_conditions.push(format!("{col_expr} {op_str} ?{}", params.len()));
919                } else {
920                    end_conditions.push(format!("{col_expr} {op_str} ?{}", params.len()));
921                }
922            }
923        }
924    }
925
926    // MAJ-2: min_depth is always a bound parameter, never a literal
927    if min_depth > 0 {
928        params.push(QueryValue::Integer(min_depth as i64));
929        end_conditions.push(format!("t.depth >= ?{}", params.len()));
930    }
931
932    let limit = query.limit.unwrap_or(opts.max_limit).min(opts.max_limit);
933    params.push(QueryValue::Integer(limit as i64));
934    let limit_param = params.len();
935
936    // Register variables
937    if let Some(ref var) = start.variable {
938        var_to_alias.insert(var.clone(), ("s".to_string(), VarKind::Node));
939    }
940    if let Some(ref var) = end.variable {
941        var_to_alias.insert(var.clone(), ("r".to_string(), VarKind::Node));
942    }
943    if let Some(ref var) = edge.variable {
944        var_to_alias.insert(var.clone(), ("e".to_string(), VarKind::Edge));
945    }
946
947    // Build SELECT based on RETURN items
948    let mut select_parts: Vec<String> = Vec::new();
949    let mut has_start = false;
950
951    for item in &query.return_items {
952        let var = item.variable();
953        if let Some((_, kind)) = var_to_alias.get(var) {
954            match item {
955                ReturnItem::Property(_, prop) => {
956                    let is_start = start.variable.as_deref() == Some(var);
957                    if matches!(kind, VarKind::EventNode | VarKind::NoteNode) {
958                        return Err(QueryError::Unsupported(
959                            "synthetic observed_as_* edges cannot be used in variable-length \
960                             patterns; use a fixed-length edge pattern instead"
961                                .into(),
962                        ));
963                    }
964                    if *kind == VarKind::Node {
965                        let tbl = if is_start { "s" } else { "r" };
966                        if is_start {
967                            has_start = true;
968                        }
969                        let col = property_to_column(prop, kind)?;
970                        select_parts.push(format!("{tbl}.{col} AS {var}_{prop}"));
971                    } else {
972                        let col = match prop.as_str() {
973                            "id" => "via_edge",
974                            "relation" => "via_relation",
975                            "weight" => "via_weight",
976                            _ => {
977                                return Err(QueryError::Compile(format!(
978                                    "unknown edge property '{prop}' in RETURN projection. \
979                                     Valid: id, source_id, target_id, relation, weight"
980                                )));
981                            }
982                        };
983                        select_parts.push(format!("t.{col} AS {var}_{prop}"));
984                    }
985                }
986                ReturnItem::Variable(_) => match kind {
987                    VarKind::Node => {
988                        if start.variable.as_deref() == Some(var) {
989                            has_start = true;
990                            select_parts.push(format!(
991                                "s.id AS {var}_id, s.namespace AS {var}_namespace, \
992                                 s.kind AS {var}_kind, s.entity_type AS {var}_entity_type, \
993                                 s.name AS {var}_name, \
994                                 s.properties AS {var}_properties, \
995                                 s.created_at AS {var}_created_at, \
996                                 s.updated_at AS {var}_updated_at"
997                            ));
998                        } else {
999                            select_parts.push(format!(
1000                                "r.id AS {var}_id, r.namespace AS {var}_namespace, \
1001                                 r.kind AS {var}_kind, r.entity_type AS {var}_entity_type, \
1002                                 r.name AS {var}_name, \
1003                                 r.properties AS {var}_properties, \
1004                                 r.created_at AS {var}_created_at, \
1005                                 r.updated_at AS {var}_updated_at"
1006                            ));
1007                        }
1008                    }
1009                    VarKind::EventNode | VarKind::NoteNode => {
1010                        // Synthetic observed_as_* edges require a fixed-length pattern;
1011                        // variable-length recursion over the events/notes tables is not supported.
1012                        return Err(QueryError::Unsupported(
1013                            "synthetic observed_as_* edges cannot be used in variable-length \
1014                             patterns; use a fixed-length edge pattern instead"
1015                                .into(),
1016                        ));
1017                    }
1018                    VarKind::Edge => {
1019                        select_parts.push(format!(
1020                            "t.via_edge AS {var}_id, t.via_relation AS {var}_relation, \
1021                             t.via_weight AS {var}_weight"
1022                        ));
1023                    }
1024                },
1025            }
1026        } else {
1027            return Err(QueryError::Compile(format!(
1028                "unknown variable '{var}' in RETURN clause"
1029            )));
1030        }
1031    }
1032
1033    // Always include traversal metadata
1034    select_parts.push("t.depth AS _depth".to_string());
1035    select_parts.push("t.total_weight AS _total_weight".to_string());
1036
1037    // `s` is optional (only joined if the start variable is projected); `r` is
1038    // always joined because the outer WHERE always references `r.deleted_at`,
1039    // `r.namespace` (and possibly r.kind / r.properties) regardless of whether
1040    // it appears in RETURN.
1041    let join_start = if has_start {
1042        "JOIN entities s ON s.id = t.start_id"
1043    } else {
1044        ""
1045    };
1046    let join_end = "JOIN entities r ON r.id = t.current_id";
1047
1048    let sql = format!(
1049        "WITH RECURSIVE traverse(start_id, current_id, depth, path, total_weight, via_edge, via_relation, via_weight) AS (\
1050             SELECT s.id, {seed_next}, 1, s.id || ',' || {seed_next}, e.weight, \
1051                    e.id, e.relation, e.weight \
1052             FROM entities s \
1053             JOIN graph_edges e ON {seed_join} AND e.deleted_at IS NULL{e_ns_filter}{relation_condition} \
1054             WHERE {start_where} \
1055             UNION ALL \
1056             SELECT t.start_id, {recurse_next}, t.depth + 1, \
1057                    t.path || ',' || {recurse_next}, \
1058                    t.total_weight + e.weight, \
1059                    e.id, e.relation, e.weight \
1060             FROM traverse t \
1061             JOIN graph_edges e ON {recurse_join} AND e.deleted_at IS NULL{e_ns_filter}{relation_condition} \
1062             WHERE t.depth < ?{depth_param} \
1063               AND (',' || t.path || ',') NOT LIKE '%,' || {recurse_next} || ',%' \
1064         ) \
1065         SELECT DISTINCT {select_cols} \
1066         FROM traverse t \
1067         {join_start} {join_end} \
1068         WHERE {end_where} \
1069         ORDER BY t.depth, t.total_weight DESC \
1070         LIMIT ?{limit_param}",
1071        seed_next = seed_next,
1072        seed_join = seed_join,
1073        e_ns_filter = e_ns_filter,
1074        relation_condition = relation_condition,
1075        start_where = start_conditions.join(" AND "),
1076        recurse_next = recurse_next,
1077        recurse_join = recurse_join,
1078        depth_param = depth_param,
1079        select_cols = select_parts.join(", "),
1080        join_start = join_start,
1081        join_end = join_end,
1082        end_where = end_conditions.join(" AND "),
1083        limit_param = limit_param,
1084    );
1085
1086    Ok(CompiledQuery {
1087        sql,
1088        params,
1089        return_vars: query.return_items.clone(),
1090        warnings: Vec::new(),
1091    })
1092}
1093
1094#[derive(Clone, Copy, PartialEq, Eq)]
1095enum VarKind {
1096    Node,
1097    /// Node that maps to the `events` table (synthetic edge source, ADR-041 §8).
1098    EventNode,
1099    /// Node that maps to the `notes` table (synthetic edge target, ADR-041 §8).
1100    NoteNode,
1101    Edge,
1102}
1103
1104const NODE_COLUMNS: &[&str] = &[
1105    "id",
1106    "name",
1107    "kind",
1108    "entity_type",
1109    "namespace",
1110    "description",
1111    "properties",
1112    "created_at",
1113    "updated_at",
1114];
1115/// Columns available for projection on `notes` table nodes (ADR-041 §8 targets).
1116const NOTE_COLUMNS: &[&str] = &[
1117    "id",
1118    "namespace",
1119    "kind",
1120    "status",
1121    "name",
1122    "content",
1123    "salience",
1124    "decay_factor",
1125    "properties",
1126    "created_at",
1127    "updated_at",
1128];
1129/// Columns available for projection on `events` table nodes (ADR-041 §8).
1130const EVENT_COLUMNS: &[&str] = &[
1131    "id",
1132    "namespace",
1133    "verb",
1134    "substrate",
1135    "actor",
1136    "kind",
1137    "outcome",
1138    "payload",
1139    "duration_us",
1140    "target_id",
1141    "session_id",
1142    "created_at",
1143];
1144const EDGE_COLUMNS: &[&str] = &["id", "source_id", "target_id", "relation", "weight"];
1145
1146fn property_to_column<'a>(prop: &'a str, kind: &VarKind) -> Result<&'a str, QueryError> {
1147    let (valid, kind_name) = match kind {
1148        VarKind::Node => (NODE_COLUMNS, "node"),
1149        VarKind::NoteNode => (NOTE_COLUMNS, "note"),
1150        VarKind::EventNode => (EVENT_COLUMNS, "event"),
1151        VarKind::Edge => (EDGE_COLUMNS, "edge"),
1152    };
1153    if valid.contains(&prop) {
1154        Ok(prop)
1155    } else {
1156        Err(QueryError::Compile(format!(
1157            "unknown {kind_name} property '{prop}' in RETURN projection. \
1158             Valid: {}",
1159            valid.join(", ")
1160        )))
1161    }
1162}
1163
1164#[cfg(test)]
1165mod tests {
1166    use super::*;
1167    use crate::parsers::gql;
1168
1169    fn opts() -> CompileOptions {
1170        CompileOptions::default()
1171    }
1172
1173    fn scoped(namespace: &str) -> CompileOptions {
1174        CompileOptions {
1175            scopes: vec![namespace.to_string()],
1176            max_limit: 500,
1177        }
1178    }
1179
1180    #[test]
1181    fn fixed_length_basic() {
1182        let q =
1183            gql::parse("MATCH (a:concept)-[e:introduced_by]->(b:paper) RETURN a, e, b LIMIT 10")
1184                .unwrap();
1185        let compiled = compile(&q, &opts()).unwrap();
1186        assert!(compiled.sql.contains("JOIN graph_edges"));
1187        assert!(compiled.sql.contains("LIMIT"));
1188        assert_eq!(
1189            compiled.return_vars,
1190            vec![
1191                ReturnItem::Variable("a".into()),
1192                ReturnItem::Variable("e".into()),
1193                ReturnItem::Variable("b".into()),
1194            ]
1195        );
1196        // No recursive CTE for fixed-length
1197        assert!(!compiled.sql.contains("WITH RECURSIVE"));
1198    }
1199
1200    #[test]
1201    fn namespace_scoping_injected() {
1202        // Namespace must come from opts, never from the query
1203        let q =
1204            gql::parse("MATCH (a:concept)-[e:introduced_by]->(b:paper) RETURN a LIMIT 5").unwrap();
1205        let compiled = compile(&q, &scoped("research")).unwrap();
1206        assert!(compiled.sql.contains("namespace"));
1207        // The namespace value must appear as a parameter, not a literal in SQL
1208        let has_ns_param = compiled
1209            .params
1210            .iter()
1211            .any(|p| matches!(p, QueryValue::Text(s) if s == "research"));
1212        assert!(has_ns_param, "namespace must be a bound parameter");
1213    }
1214
1215    #[test]
1216    fn edge_property_whitelist_rejects_unknown() {
1217        // MAJ-1: only 'relation' and 'weight' are queryable edge properties
1218        let q = gql::parse("MATCH (a)-[e:introduced_by]->(b) WHERE e.source_id = 'x' RETURN a")
1219            .unwrap();
1220        let result = compile(&q, &opts());
1221        assert!(result.is_err());
1222        let err = result.unwrap_err().to_string();
1223        assert!(
1224            err.contains("source_id") || err.contains("not queryable"),
1225            "error: {err}"
1226        );
1227    }
1228
1229    #[test]
1230    fn edge_property_relation_allowed() {
1231        let q = gql::parse("MATCH (a)-[e]->(b) WHERE e.relation = 'extends' RETURN a").unwrap();
1232        let result = compile(&q, &opts());
1233        assert!(
1234            result.is_ok(),
1235            "relation should be allowed: {:?}",
1236            result.err()
1237        );
1238    }
1239
1240    #[test]
1241    fn edge_property_weight_allowed() {
1242        let q = gql::parse("MATCH (a)-[e]->(b) WHERE e.weight > 0.5 RETURN a").unwrap();
1243        let result = compile(&q, &opts());
1244        assert!(
1245            result.is_ok(),
1246            "weight should be allowed: {:?}",
1247            result.err()
1248        );
1249    }
1250
1251    #[test]
1252    fn variable_length_uses_cte() {
1253        let q =
1254            gql::parse("MATCH (a {name: 'LoRA'})-[:extends*1..3]->(b) RETURN b LIMIT 20").unwrap();
1255        let compiled = compile(&q, &opts()).unwrap();
1256        assert!(compiled.sql.contains("WITH RECURSIVE"));
1257        assert!(compiled.sql.contains("traverse"));
1258    }
1259
1260    #[test]
1261    fn depth_cap_at_ten_rejects_above_max() {
1262        // ADR-008 §"Depth limits": exceeding MAX_DEPTH is an InvalidInput error at
1263        // validation time — the compiler never sees a query with depth > 10.
1264        let q = gql::parse("MATCH (a)-[:extends*1..50]->(b) RETURN b").unwrap();
1265        let err = compile(&q, &opts()).unwrap_err();
1266        assert!(
1267            matches!(err, QueryError::InvalidInput(_)),
1268            "expected InvalidInput for depth > 10, got {err:?}"
1269        );
1270    }
1271
1272    #[test]
1273    fn depth_within_cap_compiles() {
1274        // depth *1..10 is at the cap — must compile successfully.
1275        let q = gql::parse("MATCH (a)-[:extends*1..10]->(b) RETURN b").unwrap();
1276        let compiled = compile(&q, &opts()).unwrap();
1277        assert!(compiled.sql.contains("WITH RECURSIVE"));
1278        // The depth parameter must equal 10
1279        let depth_val = compiled.params.iter().find_map(|p| {
1280            if let QueryValue::Integer(n) = p {
1281                Some(*n)
1282            } else {
1283                None
1284            }
1285        });
1286        assert_eq!(depth_val, Some(10), "depth param should be 10");
1287    }
1288
1289    #[test]
1290    fn limit_capped_by_max_limit() {
1291        // Query requests 1000, max_limit is 500 — result should be 500
1292        let q = gql::parse("MATCH (a:concept)-[e]->(b) RETURN a LIMIT 1000").unwrap();
1293        let compiled = compile(&q, &opts()).unwrap();
1294        let limit_param = compiled.params.last().unwrap();
1295        assert!(
1296            matches!(limit_param, QueryValue::Integer(500)),
1297            "expected Integer(500), got {limit_param:?}"
1298        );
1299    }
1300
1301    #[test]
1302    fn compile_rejects_unknown_relation() {
1303        let q = gql::parse("MATCH (a)-[:not_a_relation]->(b) RETURN a").unwrap();
1304        let err = compile(&q, &opts()).unwrap_err();
1305        let msg = err.to_string();
1306        assert!(msg.contains("not_a_relation"), "msg: {msg}");
1307    }
1308
1309    #[test]
1310    fn compile_unknown_kind_passes_through() {
1311        // Pack-agnostic: any string is accepted as an entity kind at the query layer.
1312        // Validation is a pack-handler concern.
1313        let q = gql::parse("MATCH (a:gizmo)-[:extends]->(b) RETURN a").unwrap();
1314        let compiled = compile(&q, &opts()).unwrap();
1315        let has_gizmo = compiled
1316            .params
1317            .iter()
1318            .any(|p| matches!(p, QueryValue::Text(s) if s == "gizmo"));
1319        assert!(
1320            has_gizmo,
1321            "pack-agnostic: unknown kind must pass through into SQL params"
1322        );
1323    }
1324
1325    #[test]
1326    fn compile_kind_passes_through_unchanged() {
1327        // Pack-agnostic: 'paper' is no longer normalized to 'document' at the query layer.
1328        // The string passes through as-is.
1329        let q =
1330            gql::parse("MATCH (a:paper)-[:introduced_by]->(b:concept) RETURN a LIMIT 1").unwrap();
1331        let compiled = compile(&q, &opts()).unwrap();
1332        let has_paper = compiled
1333            .params
1334            .iter()
1335            .any(|p| matches!(p, QueryValue::Text(s) if s == "paper"));
1336        assert!(
1337            has_paper,
1338            "kind 'paper' must pass through unchanged into SQL params"
1339        );
1340    }
1341
1342    #[test]
1343    fn compile_rejects_namespace_in_where() {
1344        let q =
1345            gql::parse("MATCH (a:concept)-[:extends]->(b) WHERE a.namespace = 'other' RETURN a")
1346                .unwrap();
1347        let err = compile(&q, &opts()).unwrap_err();
1348        assert!(err.to_string().contains("namespace"), "msg: {err}");
1349    }
1350
1351    #[test]
1352    fn compile_rejects_unknown_relation_in_where() {
1353        let q = gql::parse("MATCH (a)-[e:extends]->(b) WHERE e.relation = 'related_to' RETURN a")
1354            .unwrap();
1355        let err = compile(&q, &opts()).unwrap_err();
1356        assert!(err.to_string().contains("related_to"), "msg: {err}");
1357    }
1358
1359    #[test]
1360    fn compile_kind_in_where_passes_through_unchanged() {
1361        // Pack-agnostic: kind strings in WHERE conditions pass through as-is.
1362        let q = gql::parse("MATCH (a)-[:extends]->(b) WHERE a.kind = 'paper' RETURN a").unwrap();
1363        let compiled = compile(&q, &opts()).unwrap();
1364        let has_paper = compiled
1365            .params
1366            .iter()
1367            .any(|p| matches!(p, QueryValue::Text(s) if s == "paper"));
1368        assert!(
1369            has_paper,
1370            "kind 'paper' must pass through unchanged into SQL params"
1371        );
1372    }
1373
1374    #[test]
1375    fn variable_length_return_start_only_joins_end_entity() {
1376        // Even when only the start variable is projected, the outer query
1377        // references `r.deleted_at` / `r.namespace`, so entities r must be
1378        // joined unconditionally.
1379        let q = gql::parse("MATCH (a:concept)-[:extends*1..3]->(b) RETURN a LIMIT 10").unwrap();
1380        let compiled = compile(&q, &opts()).unwrap();
1381        assert!(
1382            compiled.sql.contains("JOIN entities r"),
1383            "entities r must always be joined when r.* conditions are emitted; sql: {}",
1384            compiled.sql
1385        );
1386    }
1387
1388    #[test]
1389    fn variable_length_trailing_pattern_unsupported() {
1390        let q = gql::parse("MATCH (a)-[:extends*1..3]->(b)-[:implements]->(c) RETURN b").unwrap();
1391        let err = compile(&q, &opts()).unwrap_err();
1392        assert!(
1393            matches!(err, QueryError::Unsupported(_)),
1394            "expected Unsupported, got {err:?}"
1395        );
1396    }
1397
1398    #[test]
1399    fn variable_length_mixed_chain_unsupported() {
1400        // Mixed fixed + variable in one chain — has_variable_length() triggers
1401        // the variable-length path, which must reject because edges.len() > 1.
1402        let q = gql::parse("MATCH (a)-[:extends]->(b)-[:implements*1..2]->(c) RETURN c").unwrap();
1403        let err = compile(&q, &opts()).unwrap_err();
1404        assert!(matches!(err, QueryError::Unsupported(_)), "got {err:?}");
1405    }
1406
1407    #[test]
1408    fn sparql_star_rejected_as_unsupported() {
1409        use crate::parsers::sparql;
1410        let err = sparql::parse("SELECT ?a ?b WHERE { ?a :extends* ?b . }").unwrap_err();
1411        assert!(matches!(err, QueryError::Unsupported(_)), "got {err:?}");
1412    }
1413
1414    /// Regression guard for ISSUE #231.
1415    ///
1416    /// Verifies the full SPARQL subject→predicate→object direction contract:
1417    ///   ?a :extends ?b  must compile so that ?a binds `source_id` and ?b binds `target_id`.
1418    ///
1419    /// A swap (subject→target_id, object→source_id) would cause a query for
1420    /// A–extends→B to return rows where B–extends→A, silently returning wrong results.
1421    #[test]
1422    fn sparql_subject_object_direction_compiles_outbound() {
1423        use crate::parsers::sparql;
1424
1425        let q = sparql::parse("SELECT ?a ?b WHERE { ?a :extends ?b . }").unwrap();
1426        let compiled = compile(&q, &opts()).unwrap();
1427
1428        assert!(
1429            compiled
1430                .sql
1431                .contains("JOIN graph_edges e0 ON e0.source_id = n0.id"),
1432            "SPARQL subject must bind graph_edges.source_id; sql: {}",
1433            compiled.sql
1434        );
1435        assert!(
1436            compiled
1437                .sql
1438                .contains("JOIN entities n1 ON n1.id = e0.target_id"),
1439            "SPARQL object must bind graph_edges.target_id; sql: {}",
1440            compiled.sql
1441        );
1442        assert!(
1443            compiled.sql.contains("e0.relation = ?1"),
1444            "SPARQL predicate must bind graph_edges.relation; sql: {}",
1445            compiled.sql
1446        );
1447    }
1448
1449    #[test]
1450    fn return_property_projection_compiles() {
1451        let q =
1452            gql::parse("MATCH (a:concept)-[e:extends]->(b:concept) RETURN a.name, b.name LIMIT 5")
1453                .unwrap();
1454        let compiled = compile(&q, &opts()).unwrap();
1455        // Node aliases are n0, n1; the SQL uses `alias.col AS var_prop`
1456        assert!(
1457            compiled.sql.contains(".name AS a_name"),
1458            "sql: {}",
1459            compiled.sql
1460        );
1461        assert!(
1462            compiled.sql.contains(".name AS b_name"),
1463            "sql: {}",
1464            compiled.sql
1465        );
1466        assert!(
1467            !compiled.sql.contains("a_kind"),
1468            "should not emit full node columns"
1469        );
1470    }
1471
1472    #[test]
1473    fn return_unknown_node_property_rejected() {
1474        let q = gql::parse("MATCH (a:concept)-[:extends]->(b) RETURN a.domain LIMIT 5").unwrap();
1475        let err = compile(&q, &opts()).unwrap_err();
1476        assert!(
1477            matches!(err, QueryError::Compile(ref msg) if msg.contains("unknown node property 'domain'")),
1478            "got {err:?}"
1479        );
1480    }
1481
1482    #[test]
1483    fn return_unknown_edge_property_rejected() {
1484        let q = gql::parse("MATCH (a)-[e:extends]->(b) RETURN e.label LIMIT 5").unwrap();
1485        let err = compile(&q, &opts()).unwrap_err();
1486        assert!(
1487            matches!(err, QueryError::Compile(ref msg) if msg.contains("unknown edge property 'label'")),
1488            "got {err:?}"
1489        );
1490    }
1491
1492    #[test]
1493    fn return_valid_edge_property_compiles() {
1494        let q =
1495            gql::parse("MATCH (a)-[e:extends]->(b) RETURN e.relation, e.weight LIMIT 5").unwrap();
1496        let compiled = compile(&q, &opts()).unwrap();
1497        // Edge alias is e0; SQL: `e0.relation AS e_relation`
1498        assert!(
1499            compiled.sql.contains(".relation AS e_relation"),
1500            "sql: {}",
1501            compiled.sql
1502        );
1503        assert!(
1504            compiled.sql.contains(".weight AS e_weight"),
1505            "sql: {}",
1506            compiled.sql
1507        );
1508    }
1509
1510    #[test]
1511    fn entity_type_compiles_as_direct_column_not_json_extract() {
1512        // entity_type in a NodePattern must become `alias.entity_type = ?N` in the WHERE
1513        // clause — a direct column reference, not json_extract from the properties blob.
1514        let q = gql::parse("MATCH (n:document {entity_type: 'paper'})-[:extends]->(m) RETURN n")
1515            .unwrap();
1516        let compiled = compile(&q, &opts()).unwrap();
1517        assert!(
1518            compiled.sql.contains(".entity_type = ?"),
1519            "entity_type must compile to a direct column comparison; sql: {}",
1520            compiled.sql
1521        );
1522        assert!(
1523            !compiled.sql.contains("json_extract"),
1524            "entity_type must NOT use json_extract; sql: {}",
1525            compiled.sql
1526        );
1527        let has_paper_param = compiled
1528            .params
1529            .iter()
1530            .any(|p| matches!(p, QueryValue::Text(s) if s == "paper"));
1531        assert!(
1532            has_paper_param,
1533            "entity_type value 'paper' must appear as a bound parameter"
1534        );
1535    }
1536
1537    // --- F047: OR support in WHERE clause (ADR-008 §"GQL WHERE expression") ---
1538
1539    #[test]
1540    fn where_or_compiles_to_sql_or() {
1541        let q = gql::parse(
1542            "MATCH (a:concept)-[e:extends]->(b) WHERE a.name = 'LoRA' OR a.name = 'QLoRA' RETURN a",
1543        )
1544        .unwrap();
1545        let compiled = compile(&q, &opts()).unwrap();
1546        assert!(
1547            compiled.sql.contains(" OR "),
1548            "WHERE OR must produce SQL OR; sql: {}",
1549            compiled.sql
1550        );
1551        let has_lora = compiled
1552            .params
1553            .iter()
1554            .any(|p| matches!(p, QueryValue::Text(s) if s == "LoRA"));
1555        let has_qlora = compiled
1556            .params
1557            .iter()
1558            .any(|p| matches!(p, QueryValue::Text(s) if s == "QLoRA"));
1559        assert!(has_lora && has_qlora, "both OR values must be bound params");
1560    }
1561
1562    #[test]
1563    fn where_and_or_precedence() {
1564        // `a AND b OR c` should compile as `(a AND b) OR c`
1565        let q = gql::parse(
1566            "MATCH (a:concept)-[e:extends]->(b) WHERE a.name = 'X' AND a.kind = 'concept' OR b.kind = 'project' RETURN a"
1567        ).unwrap();
1568        let compiled = compile(&q, &opts()).unwrap();
1569        // The SQL should contain an OR at the outer level wrapping the AND group
1570        assert!(
1571            compiled.sql.contains(" OR "),
1572            "expected OR in sql; sql: {}",
1573            compiled.sql
1574        );
1575    }
1576
1577    // --- F218: event_observations synthetic edge support (ADR-041 §8) ---
1578
1579    #[test]
1580    fn synthetic_edge_joins_event_observations() {
1581        let q = gql::parse("MATCH (ev)-[:observed_as_selected]->(m:memory) RETURN ev, m").unwrap();
1582        let compiled = compile(&q, &opts()).unwrap();
1583        assert!(
1584            compiled.sql.contains("event_observations"),
1585            "synthetic edge must join event_observations; sql: {}",
1586            compiled.sql
1587        );
1588        assert!(
1589            !compiled.sql.contains("graph_edges"),
1590            "synthetic edge must NOT join graph_edges; sql: {}",
1591            compiled.sql
1592        );
1593        let has_role_param = compiled
1594            .params
1595            .iter()
1596            .any(|p| matches!(p, QueryValue::Text(s) if s == "selected"));
1597        assert!(has_role_param, "role 'selected' must be a bound parameter");
1598    }
1599
1600    // CRIT-1 regression: event source node must bind to `events` table, not `entities`.
1601    // Previously `FROM entities n0 JOIN event_observations e0 ON e0.event_id = n0.id`
1602    // was emitted — IDs are disjoint so every query returned zero rows.
1603    #[test]
1604    fn synthetic_edge_event_source_binds_events_table() {
1605        let q = gql::parse("MATCH (ev)-[:observed_as_selected]->(m:memory) RETURN ev, m").unwrap();
1606        let compiled = compile(&q, &opts()).unwrap();
1607        assert!(
1608            compiled.sql.contains("FROM events "),
1609            "CRIT-1: event source must come FROM events table, not entities; sql: {}",
1610            compiled.sql
1611        );
1612        assert!(
1613            !compiled
1614                .sql
1615                .starts_with("SELECT * FROM entities n0 JOIN event_observations"),
1616            "CRIT-1: must not join events via entities table; sql: {}",
1617            compiled.sql
1618        );
1619    }
1620
1621    #[test]
1622    fn synthetic_edge_event_observation_join_uses_events_id() {
1623        // The JOIN must be `event_observations.event_id = events_alias.id`,
1624        // not `event_observations.event_id = entities_alias.id`.
1625        let q = gql::parse("MATCH (ev)-[:observed_as_selected]->(m) RETURN m").unwrap();
1626        let compiled = compile(&q, &opts()).unwrap();
1627        // The event alias is n0; the join must reference n0 against `events` table.
1628        assert!(
1629            compiled
1630                .sql
1631                .contains("JOIN event_observations e0 ON e0.event_id = n0.id"),
1632            "CRIT-1: event_observations must join on events.id (n0 is now events); sql: {}",
1633            compiled.sql
1634        );
1635    }
1636
1637    #[test]
1638    fn synthetic_edge_event_node_projects_event_columns() {
1639        // The event variable in RETURN must select event-table columns (verb, outcome, …),
1640        // not entity columns (name, entity_type, properties, …).
1641        let q = gql::parse("MATCH (ev)-[:observed_as_selected]->(m) RETURN ev").unwrap();
1642        let compiled = compile(&q, &opts()).unwrap();
1643        assert!(
1644            compiled.sql.contains("ev_verb"),
1645            "CRIT-1: event variable must project verb column; sql: {}",
1646            compiled.sql
1647        );
1648        assert!(
1649            compiled.sql.contains("ev_outcome"),
1650            "CRIT-1: event variable must project outcome column; sql: {}",
1651            compiled.sql
1652        );
1653        assert!(
1654            !compiled.sql.contains("ev_name,") && !compiled.sql.contains("ev_name "),
1655            "CRIT-1: event variable must NOT project entity name column; sql: {}",
1656            compiled.sql
1657        );
1658        assert!(
1659            !compiled.sql.contains("ev_properties"),
1660            "CRIT-1: event variable must NOT project entity properties column; sql: {}",
1661            compiled.sql
1662        );
1663    }
1664
1665    #[test]
1666    fn synthetic_edge_namespace_filter_on_events_table() {
1667        // MIN-2: when scoped, the namespace filter must target the events table
1668        // (which has a namespace column) — not rely on entities indirection.
1669        let q = gql::parse("MATCH (ev)-[:observed_as_selected]->(m) RETURN m").unwrap();
1670        let compiled = compile(&q, &scoped("test-ns")).unwrap();
1671        // Both the event alias (n0, now from `events`) and the target alias (n1, from `entities`)
1672        // must have namespace filters.
1673        let ns_count = compiled
1674            .params
1675            .iter()
1676            .filter(|p| matches!(p, QueryValue::Text(s) if s == "test-ns"))
1677            .count();
1678        assert!(
1679            ns_count >= 2,
1680            "MIN-2: namespace must be filtered on both events and target; params: {:?}",
1681            compiled.params
1682        );
1683    }
1684
1685    #[test]
1686    fn synthetic_edge_candidate_role() {
1687        let q = gql::parse("MATCH (ev)-[:observed_as_candidate]->(m) RETURN ev, m").unwrap();
1688        let compiled = compile(&q, &opts()).unwrap();
1689        assert!(
1690            compiled.sql.contains("event_observations"),
1691            "sql: {}",
1692            compiled.sql
1693        );
1694        let has_candidate = compiled
1695            .params
1696            .iter()
1697            .any(|p| matches!(p, QueryValue::Text(s) if s == "candidate"));
1698        assert!(has_candidate, "role 'candidate' must be bound");
1699    }
1700
1701    #[test]
1702    fn synthetic_edge_multi_role() {
1703        // Multiple observed_as_* relations compile to a role IN (...) predicate.
1704        let q =
1705            gql::parse("MATCH (ev)-[:observed_as_candidate|observed_as_selected]->(m) RETURN m")
1706                .unwrap();
1707        let compiled = compile(&q, &opts()).unwrap();
1708        assert!(
1709            compiled.sql.contains("event_observations"),
1710            "sql: {}",
1711            compiled.sql
1712        );
1713        assert!(
1714            compiled.sql.contains("IN"),
1715            "multi-role must use IN; sql: {}",
1716            compiled.sql
1717        );
1718    }
1719
1720    #[test]
1721    fn mixed_synthetic_and_canonical_rejected() {
1722        let q = gql::parse("MATCH (ev)-[:observed_as_selected|extends]->(m) RETURN m").unwrap();
1723        let err = compile(&q, &opts()).unwrap_err();
1724        assert!(
1725            matches!(err, QueryError::Compile(_)),
1726            "mixed synthetic+canonical must be rejected; got {err:?}"
1727        );
1728    }
1729
1730    #[test]
1731    fn synthetic_edge_inbound_rejected() {
1732        let q = gql::parse("MATCH (m)<-[:observed_as_selected]-(ev) RETURN m").unwrap();
1733        let err = compile(&q, &opts()).unwrap_err();
1734        assert!(
1735            matches!(err, QueryError::Compile(_)),
1736            "inbound synthetic edge must be rejected; got {err:?}"
1737        );
1738    }
1739
1740    // --- MAJ-1: OR spanning both endpoints in variable-length patterns must be rejected ---
1741
1742    #[test]
1743    fn variable_length_or_across_endpoints_rejected() {
1744        // MAJ-1: `WHERE a.name='X' OR b.name='Y'` in a variable-length pattern must be
1745        // rejected with Unsupported — not silently compiled to AND.
1746        let q = gql::parse(
1747            "MATCH (a)-[:extends*1..3]->(b) WHERE a.name = 'X' OR b.name = 'Y' RETURN a",
1748        )
1749        .unwrap();
1750        let result = compile(&q, &opts());
1751        assert!(
1752            matches!(result, Err(QueryError::Unsupported(_))),
1753            "MAJ-1: OR spanning both endpoints must return Unsupported; got {result:?}"
1754        );
1755        let err_msg = result.unwrap_err().to_string();
1756        assert!(
1757            err_msg.contains("separate queries") || err_msg.contains("one endpoint"),
1758            "error must be actionable; got: {err_msg}"
1759        );
1760    }
1761
1762    #[test]
1763    fn variable_length_or_single_endpoint_still_works() {
1764        // OR within a single endpoint (same alias) must still compile successfully.
1765        let q = gql::parse(
1766            "MATCH (a)-[:extends*1..3]->(b) WHERE a.name = 'X' OR a.name = 'Y' RETURN a",
1767        )
1768        .unwrap();
1769        let result = compile(&q, &opts());
1770        assert!(
1771            result.is_ok(),
1772            "single-endpoint OR must compile; got {result:?}"
1773        );
1774    }
1775
1776    #[test]
1777    fn variable_length_and_across_endpoints_still_works() {
1778        // AND across endpoints must still compile (the existing behavior is correct for AND).
1779        let q = gql::parse(
1780            "MATCH (a)-[:extends*1..3]->(b) WHERE a.name = 'X' AND b.name = 'Y' RETURN a",
1781        )
1782        .unwrap();
1783        let result = compile(&q, &opts());
1784        assert!(
1785            result.is_ok(),
1786            "AND across endpoints must compile; got {result:?}"
1787        );
1788    }
1789}