Skip to main content

uni_cypher/
locy_ast.rs

1use serde::{Deserialize, Serialize};
2
3use crate::ast::{Direction, Expr, Pattern, Query, ReturnClause, UnaryOp};
4
5/// A complete Locy program: optional module header, imports, and body statements.
6#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
7pub struct LocyProgram {
8    pub module: Option<ModuleDecl>,
9    pub uses: Vec<UseDecl>,
10    pub statements: Vec<LocyStatement>,
11}
12
13/// A dotted name like `acme.compliance.rules`.
14#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
15pub struct QualifiedName {
16    pub parts: Vec<String>,
17}
18
19impl std::fmt::Display for QualifiedName {
20    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
21        write!(f, "{}", self.parts.join("."))
22    }
23}
24
25/// `MODULE acme.compliance`
26#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
27pub struct ModuleDecl {
28    pub name: QualifiedName,
29}
30
31/// `USE acme.common` or `USE acme.common { control, reachable }`
32#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
33pub struct UseDecl {
34    pub name: QualifiedName,
35    /// `None` = glob import (all rules), `Some(vec)` = selective imports.
36    pub imports: Option<Vec<String>>,
37}
38
39/// A top-level statement in a Locy program.
40#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
41pub enum LocyStatement {
42    /// A standard Cypher query (passthrough).
43    Cypher(Query),
44    /// `CREATE RULE ... AS ...`
45    Rule(RuleDefinition),
46    /// `QUERY ruleName WHERE expr RETURN ...`
47    GoalQuery(GoalQuery),
48    /// `DERIVE ruleName WHERE ...`
49    DeriveCommand(DeriveCommand),
50    /// `ASSUME { mutations } THEN body`
51    AssumeBlock(AssumeBlock),
52    /// `ABDUCE [NOT] ruleName WHERE expr RETURN ...`
53    AbduceQuery(AbduceQuery),
54    /// `EXPLAIN RULE ruleName WHERE expr RETURN ...`
55    ExplainRule(ExplainRule),
56    /// `CREATE MODEL name AS INPUT (...) FEATURES ... OUTPUT type name USING xervo('...')`
57    /// Phase B neural-predicate preview. The grammar always parses this;
58    /// the compiler rejects it unless `LocyConfig::neural_predicates_preview`
59    /// is set.
60    Model(ModelDefinition),
61    /// `CALIBRATE name ON MATCH pattern [WHERE ...] TARGET expr METHOD method [HOLDOUT 0.2]`
62    /// Phase C C2 calibration statement.
63    Calibrate(CalibrateCommand),
64    /// `VALIDATE name ON MATCH pattern [WHERE ...] TARGET expr METRICS m1, m2, ...`
65    /// Phase C C3 validation statement.
66    Validate(ValidateCommand),
67}
68
69// ═══════════════════════════════════════════════════════════════════════════
70// RULE DEFINITION
71// ═══════════════════════════════════════════════════════════════════════════
72
73/// `CREATE RULE name [PRIORITY n] AS MATCH pattern [WHERE conds] [ALONG ...] [FOLD ...] [WHERE having] [BEST BY ...] YIELD/DERIVE ...`
74#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
75pub struct RuleDefinition {
76    pub name: QualifiedName,
77    pub priority: Option<i64>,
78    pub match_pattern: Pattern,
79    pub where_conditions: Vec<RuleCondition>,
80    pub along: Vec<AlongBinding>,
81    pub fold: Vec<FoldBinding>,
82    /// Post-FOLD filter conditions (HAVING semantics). These filter on
83    /// aggregate results after FOLD computation.
84    pub having: Vec<Expr>,
85    pub best_by: Option<BestByClause>,
86    pub output: RuleOutput,
87}
88
89/// A condition in a rule WHERE clause.
90#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
91pub enum RuleCondition {
92    /// `x IS rule`, `x IS rule TO y`, `(x,y) IS rule`
93    IsReference(IsReference),
94    /// A standard Cypher expression used as a boolean condition.
95    Expression(Expr),
96}
97
98/// An IS rule reference in various forms.
99#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
100pub struct IsReference {
101    pub subjects: Vec<String>,
102    pub rule_name: QualifiedName,
103    pub target: Option<String>,
104    pub negated: bool,
105}
106
107// ═══════════════════════════════════════════════════════════════════════════
108// ALONG (path-carried values)
109// ═══════════════════════════════════════════════════════════════════════════
110
111/// `name = along_expression`
112#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
113pub struct AlongBinding {
114    pub name: String,
115    pub expr: LocyExpr,
116}
117
118/// Locy expression: extends Cypher expressions with `prev.field`.
119#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
120pub enum LocyExpr {
121    /// `prev.fieldName` — reference to previous hop's value.
122    PrevRef(String),
123    /// A standard Cypher expression.
124    Cypher(Expr),
125    /// Binary operation between Locy expressions.
126    BinaryOp {
127        left: Box<LocyExpr>,
128        op: LocyBinaryOp,
129        right: Box<LocyExpr>,
130    },
131    /// Unary operation (NOT, negation).
132    UnaryOp(UnaryOp, Box<LocyExpr>),
133}
134
135#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
136pub enum LocyBinaryOp {
137    Add,
138    Sub,
139    Mul,
140    Div,
141    Mod,
142    Pow,
143    And,
144    Or,
145    Xor,
146    // Comparisons are handled via Cypher expression re-parse
147}
148
149// ═══════════════════════════════════════════════════════════════════════════
150// FOLD (aggregation)
151// ═══════════════════════════════════════════════════════════════════════════
152
153/// `name = fold_expression`
154#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
155pub struct FoldBinding {
156    pub name: String,
157    pub aggregate: Expr,
158}
159
160// ═══════════════════════════════════════════════════════════════════════════
161// BEST BY (optimized selection)
162// ═══════════════════════════════════════════════════════════════════════════
163
164/// Wrapper for the BEST BY clause items.
165#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
166pub struct BestByClause {
167    pub items: Vec<BestByItem>,
168}
169
170/// `expr [ASC|DESC]`
171#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
172pub struct BestByItem {
173    pub expr: Expr,
174    pub ascending: bool,
175}
176
177// ═══════════════════════════════════════════════════════════════════════════
178// YIELD (rule output schema)
179// ═══════════════════════════════════════════════════════════════════════════
180
181/// Either YIELD items or DERIVE clause as a rule's output.
182#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
183pub enum RuleOutput {
184    Yield(YieldClause),
185    Derive(DeriveClause),
186}
187
188/// Wrapper for the YIELD clause items.
189#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
190pub struct YieldClause {
191    pub items: Vec<LocyYieldItem>,
192}
193
194/// A single YIELD item, possibly marked as KEY or PROB.
195#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
196pub struct LocyYieldItem {
197    pub is_key: bool,
198    pub is_prob: bool,
199    pub expr: Expr,
200    pub alias: Option<String>,
201}
202
203/// Default output column name for a YIELD expression (before de-collision).
204///
205/// A bare variable yields its own name, a property access yields the bare
206/// property name (e.g. `a.id` → `id`), and anything else yields `"?"`.
207///
208/// # Examples
209///
210/// ```
211/// use uni_cypher::ast::Expr;
212/// use uni_cypher::locy_ast::default_yield_name;
213///
214/// let var = Expr::Variable("a".to_string());
215/// assert_eq!(default_yield_name(&var), "a");
216/// ```
217pub fn default_yield_name(expr: &Expr) -> String {
218    match expr {
219        Expr::Variable(name) => name.clone(),
220        Expr::Property(_, prop) => prop.clone(),
221        _ => "?".to_string(),
222    }
223}
224
225/// Resolve the output column name for each YIELD item, de-colliding clashes.
226///
227/// Each item's default name is its alias if present, otherwise
228/// [`default_yield_name`]. When two or more un-aliased property accesses would
229/// collapse onto the same bare property name (e.g. `KEY a.id, KEY b.id` both
230/// defaulting to `id`), the colliding ones are qualified as `<var>_<prop>`
231/// (e.g. `a_id`, `b_id`). Explicit aliases always win and are never rewritten,
232/// and non-colliding names are returned unchanged.
233///
234/// This is the single source of truth for YIELD column naming; the type
235/// checker, planner, and SLG resolver all call it so their column names agree
236/// (the names double as the fixpoint join keys).
237///
238/// # Examples
239///
240/// ```
241/// use uni_cypher::ast::Expr;
242/// use uni_cypher::locy_ast::{resolve_yield_column_names, LocyYieldItem};
243///
244/// let prop = |var: &str| LocyYieldItem {
245///     is_key: true,
246///     is_prob: false,
247///     expr: Expr::Property(Box::new(Expr::Variable(var.to_string())), "id".to_string()),
248///     alias: None,
249/// };
250/// let names = resolve_yield_column_names(&[prop("a"), prop("b")]);
251/// assert_eq!(names, vec!["a_id".to_string(), "b_id".to_string()]);
252/// ```
253pub fn resolve_yield_column_names(items: &[LocyYieldItem]) -> Vec<String> {
254    use std::collections::HashMap;
255
256    let base: Vec<String> = items
257        .iter()
258        .map(|item| {
259            item.alias
260                .clone()
261                .unwrap_or_else(|| default_yield_name(&item.expr))
262        })
263        .collect();
264
265    let mut counts: HashMap<&str, usize> = HashMap::new();
266    for name in &base {
267        *counts.entry(name.as_str()).or_default() += 1;
268    }
269
270    base.iter()
271        .enumerate()
272        .map(|(i, name)| {
273            let item = &items[i];
274            if item.alias.is_none()
275                && counts.get(name.as_str()).copied().unwrap_or(0) > 1
276                && let Expr::Property(object, prop) = &item.expr
277                && let Expr::Variable(var) = object.as_ref()
278            {
279                return format!("{var}_{prop}");
280            }
281            name.clone()
282        })
283        .collect()
284}
285
286// ═══════════════════════════════════════════════════════════════════════════
287// DERIVE (graph derivation in rule heads)
288// ═══════════════════════════════════════════════════════════════════════════
289
290/// `DERIVE pattern, pattern, ...` or `DERIVE MERGE a, b`
291#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
292pub enum DeriveClause {
293    Patterns(Vec<DerivePattern>),
294    Merge(String, String),
295}
296
297#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
298pub struct DerivePattern {
299    pub direction: Direction,
300    pub source: DeriveNodeSpec,
301    pub edge: DeriveEdgeSpec,
302    pub target: DeriveNodeSpec,
303}
304
305#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
306pub struct DeriveNodeSpec {
307    pub is_new: bool,
308    pub variable: String,
309    pub labels: Vec<String>,
310    pub properties: Option<Expr>,
311}
312
313#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
314pub struct DeriveEdgeSpec {
315    pub edge_type: String,
316    pub properties: Option<Expr>,
317}
318
319// ═══════════════════════════════════════════════════════════════════════════
320// GOAL-DIRECTED QUERY
321// ═══════════════════════════════════════════════════════════════════════════
322
323/// `QUERY ruleName [WHERE expr] [RETURN ...]`
324#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
325pub struct GoalQuery {
326    pub rule_name: QualifiedName,
327    pub where_expr: Option<Expr>,
328    pub return_clause: Option<ReturnClause>,
329}
330
331// ═══════════════════════════════════════════════════════════════════════════
332// DERIVE COMMAND (top-level)
333// ═══════════════════════════════════════════════════════════════════════════
334
335/// `DERIVE ruleName [WHERE expr]`
336#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
337pub struct DeriveCommand {
338    pub rule_name: QualifiedName,
339    pub where_expr: Option<Expr>,
340}
341
342// ═══════════════════════════════════════════════════════════════════════════
343// ASSUME BLOCK
344// ═══════════════════════════════════════════════════════════════════════════
345
346/// `ASSUME { mutations } THEN body`
347#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
348pub struct AssumeBlock {
349    pub mutations: Vec<crate::ast::Clause>,
350    pub body: Vec<LocyStatement>,
351}
352
353// ═══════════════════════════════════════════════════════════════════════════
354// ABDUCE QUERY
355// ═══════════════════════════════════════════════════════════════════════════
356
357/// `ABDUCE [NOT] ruleName [WHERE expr] [RETURN ...]`
358#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
359pub struct AbduceQuery {
360    pub negated: bool,
361    pub rule_name: QualifiedName,
362    pub where_expr: Option<Expr>,
363    pub return_clause: Option<ReturnClause>,
364}
365
366// ═══════════════════════════════════════════════════════════════════════════
367// EXPLAIN RULE
368// ═══════════════════════════════════════════════════════════════════════════
369
370/// `EXPLAIN RULE ruleName [WHERE expr] [RETURN ...]`
371#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
372pub struct ExplainRule {
373    pub rule_name: QualifiedName,
374    pub where_expr: Option<Expr>,
375    pub return_clause: Option<ReturnClause>,
376}
377
378// ═══════════════════════════════════════════════════════════════════════════
379// CREATE MODEL (neural predicate, Phase B preview)
380// ═══════════════════════════════════════════════════════════════════════════
381
382/// `CREATE MODEL` declaration. Parses the full surface from impl plan §2.1;
383/// `Conformal` / `Dirichlet` calibration methods are deferred to Phase C.
384#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
385pub struct ModelDefinition {
386    pub name: QualifiedName,
387    pub inputs: Vec<InputBinding>,
388    /// Feature expressions evaluated against input bindings. Empty when
389    /// the `FEATURES` clause is omitted (model receives all bound node
390    /// properties — interpretation deferred to the runtime adapter).
391    pub features: Vec<Expr>,
392    /// Phase D D3: `FEATURES (subject, column) FROM rule_name` pulls
393    /// `column` from a prior-derived relation `rule_name` (keyed by
394    /// `subject`) at runtime, and feeds it as a feature alongside any
395    /// `INPUT` bindings. MVP: at most one path-context feature per
396    /// model, mutually exclusive with the expression-`features` form.
397    pub path_context: Option<PathContextFeature>,
398    pub output: OutputBinding,
399    pub xervo_alias: String,
400    /// Phase D D2 follow-up: optional embedder alias surfaced by the
401    /// `USING xervo('classify/X', embedder='alias')` form. When
402    /// `None`, the runtime falls back to the alias `"default"` for
403    /// `semantic_match` query-text embedding.
404    pub embedder_alias: Option<String>,
405    pub calibration: Option<CalibrationMethod>,
406    pub version: Option<String>,
407    pub annotations: ModelAnnotations,
408}
409
410/// One INPUT binding, e.g. `(s:Supplier)`.
411#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
412pub struct InputBinding {
413    pub variable: String,
414    pub label: Option<String>,
415}
416
417/// Phase D D3: `FEATURES (subject_var, column) FROM source_rule`.
418#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
419pub struct PathContextFeature {
420    pub subject_var: String,
421    pub column: String,
422    pub source_rule: String,
423}
424
425/// The OUTPUT declaration, e.g. `OUTPUT PROB risk`.
426#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
427pub struct OutputBinding {
428    pub output_type: OutputType,
429    pub name: String,
430}
431
432#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
433pub enum OutputType {
434    Prob,
435    Score,
436    Label,
437    Vector,
438}
439
440#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
441pub enum CalibrationMethod {
442    PlattScaling,
443    IsotonicRegression,
444    TemperatureScaling,
445    BetaCalibration,
446    None,
447    /// Phase C C1a: split-conformal predictor. The point prediction
448    /// passes through unchanged; the calibrator carries a
449    /// `(1 - alpha)`-quantile of holdout nonconformity scores which
450    /// gates a per-prediction `ConfidenceBand` at inference. `alpha`
451    /// defaults to 0.1 (90% bands) when omitted.
452    Conformal {
453        alpha: f64,
454    },
455    /// Phase D D-C1d: multi-class Dirichlet calibration. The CALIBRATE
456    /// statement collects per-row `(class_index, score_vector)` pairs
457    /// instead of `(prediction, ground_truth)`. Compiler routes this
458    /// through `MulticlassCalibratorFitter` rather than the binary
459    /// `CalibratorFitter` trait. Method-of-moments fit by default.
460    Dirichlet,
461}
462
463/// Statement-level annotations. Currently only `@independent`, which
464/// suppresses Phase-C F2 shared-neural-input warnings. Parsed in Slice
465/// 1+2; semantically meaningful when F2 lands.
466#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
467pub struct ModelAnnotations {
468    pub independent: bool,
469}
470
471// ═══════════════════════════════════════════════════════════════════════════
472// CALIBRATE COMMAND  (Phase C C2)
473// ═══════════════════════════════════════════════════════════════════════════
474
475/// `CALIBRATE` statement. The runtime collects
476/// `(prediction, ground_truth)` pairs by invoking the registered
477/// classifier for `model_name` over the MATCH pattern, fits the
478/// chosen calibrator on a holdout-split, and returns the fitted
479/// transform + holdout metrics.
480#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
481pub struct CalibrateCommand {
482    pub model_name: QualifiedName,
483    pub pattern: Pattern,
484    pub where_expr: Option<Expr>,
485    pub target_expr: Expr,
486    pub method: CalibrationMethod,
487    /// Holdout fraction (must be in `(0, 1)`). `None` → compiler
488    /// resolves to default 0.2.
489    pub holdout: Option<f64>,
490}
491
492// ═══════════════════════════════════════════════════════════════════════════
493// VALIDATE COMMAND  (Phase C C3)
494// ═══════════════════════════════════════════════════════════════════════════
495
496/// `VALIDATE` statement. Runs the named rule, joins its PROB column
497/// output against the TARGET expression (ground truth), and computes
498/// the requested metrics. Unlike CALIBRATE, this never fits anything
499/// — it just measures.
500#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
501pub struct ValidateCommand {
502    pub rule_name: QualifiedName,
503    pub pattern: Pattern,
504    pub where_expr: Option<Expr>,
505    pub target_expr: Expr,
506    pub metrics: Vec<ValidationMetric>,
507}
508
509/// Supported metrics in `VALIDATE METRICS ...`. Each metric is a
510/// proper scoring rule or a calibration-quality summary; see
511/// `crates/uni-locy/src/calibration.rs` for definitions and
512/// numerical references.
513#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
514pub enum ValidationMetric {
515    BrierScore,
516    LogLoss,
517    /// Naive equal-width-binning ECE. Triggers
518    /// `WarningCode::EceBinningBias` (impl plan §3.4) suggesting
519    /// `DebiasedEce` instead.
520    Ece,
521    /// Debiased ECE per Kumar et al. NeurIPS 2019 — recommended.
522    DebiasedEce,
523    Accuracy,
524    Auc,
525}