uni_cypher/locy_ast.rs
1use serde::{Deserialize, Serialize};
2
3use crate::ast::{Direction, Expr, Pattern, Query, ReturnClause, UnaryOp};
4
5/// A complete Locy program: optional module header, imports, and body statements.
6#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
7pub struct LocyProgram {
8 pub module: Option<ModuleDecl>,
9 pub uses: Vec<UseDecl>,
10 pub statements: Vec<LocyStatement>,
11}
12
13/// A dotted name like `acme.compliance.rules`.
14#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
15pub struct QualifiedName {
16 pub parts: Vec<String>,
17}
18
19impl std::fmt::Display for QualifiedName {
20 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
21 write!(f, "{}", self.parts.join("."))
22 }
23}
24
25/// `MODULE acme.compliance`
26#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
27pub struct ModuleDecl {
28 pub name: QualifiedName,
29}
30
31/// `USE acme.common` or `USE acme.common { control, reachable }`
32#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
33pub struct UseDecl {
34 pub name: QualifiedName,
35 /// `None` = glob import (all rules), `Some(vec)` = selective imports.
36 pub imports: Option<Vec<String>>,
37}
38
39/// A top-level statement in a Locy program.
40#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
41pub enum LocyStatement {
42 /// A standard Cypher query (passthrough).
43 Cypher(Query),
44 /// `CREATE RULE ... AS ...`
45 Rule(RuleDefinition),
46 /// `QUERY ruleName WHERE expr RETURN ...`
47 GoalQuery(GoalQuery),
48 /// `DERIVE ruleName WHERE ...`
49 DeriveCommand(DeriveCommand),
50 /// `ASSUME { mutations } THEN body`
51 AssumeBlock(AssumeBlock),
52 /// `ABDUCE [NOT] ruleName WHERE expr RETURN ...`
53 AbduceQuery(AbduceQuery),
54 /// `EXPLAIN RULE ruleName WHERE expr RETURN ...`
55 ExplainRule(ExplainRule),
56 /// `CREATE MODEL name AS INPUT (...) FEATURES ... OUTPUT type name USING xervo('...')`
57 /// Phase B neural-predicate preview. The grammar always parses this;
58 /// the compiler rejects it unless `LocyConfig::neural_predicates_preview`
59 /// is set.
60 Model(ModelDefinition),
61 /// `CALIBRATE name ON MATCH pattern [WHERE ...] TARGET expr METHOD method [HOLDOUT 0.2]`
62 /// Phase C C2 calibration statement.
63 Calibrate(CalibrateCommand),
64 /// `VALIDATE name ON MATCH pattern [WHERE ...] TARGET expr METRICS m1, m2, ...`
65 /// Phase C C3 validation statement.
66 Validate(ValidateCommand),
67}
68
69// ═══════════════════════════════════════════════════════════════════════════
70// RULE DEFINITION
71// ═══════════════════════════════════════════════════════════════════════════
72
73/// `CREATE RULE name [PRIORITY n] AS MATCH pattern [WHERE conds] [ALONG ...] [FOLD ...] [WHERE having] [BEST BY ...] YIELD/DERIVE ...`
74#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
75pub struct RuleDefinition {
76 pub name: QualifiedName,
77 pub priority: Option<i64>,
78 pub match_pattern: Pattern,
79 pub where_conditions: Vec<RuleCondition>,
80 pub along: Vec<AlongBinding>,
81 pub fold: Vec<FoldBinding>,
82 /// Post-FOLD filter conditions (HAVING semantics). These filter on
83 /// aggregate results after FOLD computation.
84 pub having: Vec<Expr>,
85 pub best_by: Option<BestByClause>,
86 pub output: RuleOutput,
87}
88
89/// A condition in a rule WHERE clause.
90#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
91pub enum RuleCondition {
92 /// `x IS rule`, `x IS rule TO y`, `(x,y) IS rule`
93 IsReference(IsReference),
94 /// A standard Cypher expression used as a boolean condition.
95 Expression(Expr),
96}
97
98/// An IS rule reference in various forms.
99#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
100pub struct IsReference {
101 pub subjects: Vec<String>,
102 pub rule_name: QualifiedName,
103 pub target: Option<String>,
104 pub negated: bool,
105}
106
107// ═══════════════════════════════════════════════════════════════════════════
108// ALONG (path-carried values)
109// ═══════════════════════════════════════════════════════════════════════════
110
111/// `name = along_expression`
112#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
113pub struct AlongBinding {
114 pub name: String,
115 pub expr: LocyExpr,
116}
117
118/// Locy expression: extends Cypher expressions with `prev.field`.
119#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
120pub enum LocyExpr {
121 /// `prev.fieldName` — reference to previous hop's value.
122 PrevRef(String),
123 /// A standard Cypher expression.
124 Cypher(Expr),
125 /// Binary operation between Locy expressions.
126 BinaryOp {
127 left: Box<LocyExpr>,
128 op: LocyBinaryOp,
129 right: Box<LocyExpr>,
130 },
131 /// Unary operation (NOT, negation).
132 UnaryOp(UnaryOp, Box<LocyExpr>),
133}
134
135#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
136pub enum LocyBinaryOp {
137 Add,
138 Sub,
139 Mul,
140 Div,
141 Mod,
142 Pow,
143 And,
144 Or,
145 Xor,
146 // Comparisons are handled via Cypher expression re-parse
147}
148
149// ═══════════════════════════════════════════════════════════════════════════
150// FOLD (aggregation)
151// ═══════════════════════════════════════════════════════════════════════════
152
153/// `name = fold_expression`
154#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
155pub struct FoldBinding {
156 pub name: String,
157 pub aggregate: Expr,
158}
159
160// ═══════════════════════════════════════════════════════════════════════════
161// BEST BY (optimized selection)
162// ═══════════════════════════════════════════════════════════════════════════
163
164/// Wrapper for the BEST BY clause items.
165#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
166pub struct BestByClause {
167 pub items: Vec<BestByItem>,
168}
169
170/// `expr [ASC|DESC]`
171#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
172pub struct BestByItem {
173 pub expr: Expr,
174 pub ascending: bool,
175}
176
177// ═══════════════════════════════════════════════════════════════════════════
178// YIELD (rule output schema)
179// ═══════════════════════════════════════════════════════════════════════════
180
181/// Either YIELD items or DERIVE clause as a rule's output.
182#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
183pub enum RuleOutput {
184 Yield(YieldClause),
185 Derive(DeriveClause),
186}
187
188/// Wrapper for the YIELD clause items.
189#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
190pub struct YieldClause {
191 pub items: Vec<LocyYieldItem>,
192}
193
194/// A single YIELD item, possibly marked as KEY or PROB.
195#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
196pub struct LocyYieldItem {
197 pub is_key: bool,
198 pub is_prob: bool,
199 pub expr: Expr,
200 pub alias: Option<String>,
201}
202
203/// Default output column name for a YIELD expression (before de-collision).
204///
205/// A bare variable yields its own name, a property access yields the bare
206/// property name (e.g. `a.id` → `id`), and anything else yields `"?"`.
207///
208/// # Examples
209///
210/// ```
211/// use uni_cypher::ast::Expr;
212/// use uni_cypher::locy_ast::default_yield_name;
213///
214/// let var = Expr::Variable("a".to_string());
215/// assert_eq!(default_yield_name(&var), "a");
216/// ```
217pub fn default_yield_name(expr: &Expr) -> String {
218 match expr {
219 Expr::Variable(name) => name.clone(),
220 Expr::Property(_, prop) => prop.clone(),
221 _ => "?".to_string(),
222 }
223}
224
225/// Resolve the output column name for each YIELD item, de-colliding clashes.
226///
227/// Each item's default name is its alias if present, otherwise
228/// [`default_yield_name`]. When two or more un-aliased property accesses would
229/// collapse onto the same bare property name (e.g. `KEY a.id, KEY b.id` both
230/// defaulting to `id`), the colliding ones are qualified as `<var>_<prop>`
231/// (e.g. `a_id`, `b_id`). Explicit aliases always win and are never rewritten,
232/// and non-colliding names are returned unchanged.
233///
234/// This is the single source of truth for YIELD column naming; the type
235/// checker, planner, and SLG resolver all call it so their column names agree
236/// (the names double as the fixpoint join keys).
237///
238/// # Examples
239///
240/// ```
241/// use uni_cypher::ast::Expr;
242/// use uni_cypher::locy_ast::{resolve_yield_column_names, LocyYieldItem};
243///
244/// let prop = |var: &str| LocyYieldItem {
245/// is_key: true,
246/// is_prob: false,
247/// expr: Expr::Property(Box::new(Expr::Variable(var.to_string())), "id".to_string()),
248/// alias: None,
249/// };
250/// let names = resolve_yield_column_names(&[prop("a"), prop("b")]);
251/// assert_eq!(names, vec!["a_id".to_string(), "b_id".to_string()]);
252/// ```
253pub fn resolve_yield_column_names(items: &[LocyYieldItem]) -> Vec<String> {
254 use std::collections::HashMap;
255
256 let base: Vec<String> = items
257 .iter()
258 .map(|item| {
259 item.alias
260 .clone()
261 .unwrap_or_else(|| default_yield_name(&item.expr))
262 })
263 .collect();
264
265 let mut counts: HashMap<&str, usize> = HashMap::new();
266 for name in &base {
267 *counts.entry(name.as_str()).or_default() += 1;
268 }
269
270 base.iter()
271 .enumerate()
272 .map(|(i, name)| {
273 let item = &items[i];
274 if item.alias.is_none()
275 && counts.get(name.as_str()).copied().unwrap_or(0) > 1
276 && let Expr::Property(object, prop) = &item.expr
277 && let Expr::Variable(var) = object.as_ref()
278 {
279 return format!("{var}_{prop}");
280 }
281 name.clone()
282 })
283 .collect()
284}
285
286// ═══════════════════════════════════════════════════════════════════════════
287// DERIVE (graph derivation in rule heads)
288// ═══════════════════════════════════════════════════════════════════════════
289
290/// `DERIVE pattern, pattern, ...` or `DERIVE MERGE a, b`
291#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
292pub enum DeriveClause {
293 Patterns(Vec<DerivePattern>),
294 Merge(String, String),
295}
296
297#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
298pub struct DerivePattern {
299 pub direction: Direction,
300 pub source: DeriveNodeSpec,
301 pub edge: DeriveEdgeSpec,
302 pub target: DeriveNodeSpec,
303}
304
305#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
306pub struct DeriveNodeSpec {
307 pub is_new: bool,
308 pub variable: String,
309 pub labels: Vec<String>,
310 pub properties: Option<Expr>,
311}
312
313#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
314pub struct DeriveEdgeSpec {
315 pub edge_type: String,
316 pub properties: Option<Expr>,
317}
318
319// ═══════════════════════════════════════════════════════════════════════════
320// GOAL-DIRECTED QUERY
321// ═══════════════════════════════════════════════════════════════════════════
322
323/// `QUERY ruleName [WHERE expr] [RETURN ...]`
324#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
325pub struct GoalQuery {
326 pub rule_name: QualifiedName,
327 pub where_expr: Option<Expr>,
328 pub return_clause: Option<ReturnClause>,
329}
330
331// ═══════════════════════════════════════════════════════════════════════════
332// DERIVE COMMAND (top-level)
333// ═══════════════════════════════════════════════════════════════════════════
334
335/// `DERIVE ruleName [WHERE expr]`
336#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
337pub struct DeriveCommand {
338 pub rule_name: QualifiedName,
339 pub where_expr: Option<Expr>,
340}
341
342// ═══════════════════════════════════════════════════════════════════════════
343// ASSUME BLOCK
344// ═══════════════════════════════════════════════════════════════════════════
345
346/// `ASSUME { mutations } THEN body`
347#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
348pub struct AssumeBlock {
349 pub mutations: Vec<crate::ast::Clause>,
350 pub body: Vec<LocyStatement>,
351}
352
353// ═══════════════════════════════════════════════════════════════════════════
354// ABDUCE QUERY
355// ═══════════════════════════════════════════════════════════════════════════
356
357/// `ABDUCE [NOT] ruleName [WHERE expr] [RETURN ...]`
358#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
359pub struct AbduceQuery {
360 pub negated: bool,
361 pub rule_name: QualifiedName,
362 pub where_expr: Option<Expr>,
363 pub return_clause: Option<ReturnClause>,
364}
365
366// ═══════════════════════════════════════════════════════════════════════════
367// EXPLAIN RULE
368// ═══════════════════════════════════════════════════════════════════════════
369
370/// `EXPLAIN RULE ruleName [WHERE expr] [RETURN ...]`
371#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
372pub struct ExplainRule {
373 pub rule_name: QualifiedName,
374 pub where_expr: Option<Expr>,
375 pub return_clause: Option<ReturnClause>,
376}
377
378// ═══════════════════════════════════════════════════════════════════════════
379// CREATE MODEL (neural predicate, Phase B preview)
380// ═══════════════════════════════════════════════════════════════════════════
381
382/// `CREATE MODEL` declaration. Parses the full surface from impl plan §2.1;
383/// `Conformal` / `Dirichlet` calibration methods are deferred to Phase C.
384#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
385pub struct ModelDefinition {
386 pub name: QualifiedName,
387 pub inputs: Vec<InputBinding>,
388 /// Feature expressions evaluated against input bindings. Empty when
389 /// the `FEATURES` clause is omitted (model receives all bound node
390 /// properties — interpretation deferred to the runtime adapter).
391 pub features: Vec<Expr>,
392 /// Phase D D3: `FEATURES (subject, column) FROM rule_name` pulls
393 /// `column` from a prior-derived relation `rule_name` (keyed by
394 /// `subject`) at runtime, and feeds it as a feature alongside any
395 /// `INPUT` bindings. MVP: at most one path-context feature per
396 /// model, mutually exclusive with the expression-`features` form.
397 pub path_context: Option<PathContextFeature>,
398 pub output: OutputBinding,
399 pub xervo_alias: String,
400 /// Phase D D2 follow-up: optional embedder alias surfaced by the
401 /// `USING xervo('classify/X', embedder='alias')` form. When
402 /// `None`, the runtime falls back to the alias `"default"` for
403 /// `semantic_match` query-text embedding.
404 pub embedder_alias: Option<String>,
405 pub calibration: Option<CalibrationMethod>,
406 pub version: Option<String>,
407 pub annotations: ModelAnnotations,
408}
409
410/// One INPUT binding, e.g. `(s:Supplier)`.
411#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
412pub struct InputBinding {
413 pub variable: String,
414 pub label: Option<String>,
415}
416
417/// Phase D D3: `FEATURES (subject_var, column) FROM source_rule`.
418#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
419pub struct PathContextFeature {
420 pub subject_var: String,
421 pub column: String,
422 pub source_rule: String,
423}
424
425/// The OUTPUT declaration, e.g. `OUTPUT PROB risk`.
426#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
427pub struct OutputBinding {
428 pub output_type: OutputType,
429 pub name: String,
430}
431
432#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
433pub enum OutputType {
434 Prob,
435 Score,
436 Label,
437 Vector,
438}
439
440#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
441pub enum CalibrationMethod {
442 PlattScaling,
443 IsotonicRegression,
444 TemperatureScaling,
445 BetaCalibration,
446 None,
447 /// Phase C C1a: split-conformal predictor. The point prediction
448 /// passes through unchanged; the calibrator carries a
449 /// `(1 - alpha)`-quantile of holdout nonconformity scores which
450 /// gates a per-prediction `ConfidenceBand` at inference. `alpha`
451 /// defaults to 0.1 (90% bands) when omitted.
452 Conformal {
453 alpha: f64,
454 },
455 /// Phase D D-C1d: multi-class Dirichlet calibration. The CALIBRATE
456 /// statement collects per-row `(class_index, score_vector)` pairs
457 /// instead of `(prediction, ground_truth)`. Compiler routes this
458 /// through `MulticlassCalibratorFitter` rather than the binary
459 /// `CalibratorFitter` trait. Method-of-moments fit by default.
460 Dirichlet,
461}
462
463/// Statement-level annotations. Currently only `@independent`, which
464/// suppresses Phase-C F2 shared-neural-input warnings. Parsed in Slice
465/// 1+2; semantically meaningful when F2 lands.
466#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
467pub struct ModelAnnotations {
468 pub independent: bool,
469}
470
471// ═══════════════════════════════════════════════════════════════════════════
472// CALIBRATE COMMAND (Phase C C2)
473// ═══════════════════════════════════════════════════════════════════════════
474
475/// `CALIBRATE` statement. The runtime collects
476/// `(prediction, ground_truth)` pairs by invoking the registered
477/// classifier for `model_name` over the MATCH pattern, fits the
478/// chosen calibrator on a holdout-split, and returns the fitted
479/// transform + holdout metrics.
480#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
481pub struct CalibrateCommand {
482 pub model_name: QualifiedName,
483 pub pattern: Pattern,
484 pub where_expr: Option<Expr>,
485 pub target_expr: Expr,
486 pub method: CalibrationMethod,
487 /// Holdout fraction (must be in `(0, 1)`). `None` → compiler
488 /// resolves to default 0.2.
489 pub holdout: Option<f64>,
490}
491
492// ═══════════════════════════════════════════════════════════════════════════
493// VALIDATE COMMAND (Phase C C3)
494// ═══════════════════════════════════════════════════════════════════════════
495
496/// `VALIDATE` statement. Runs the named rule, joins its PROB column
497/// output against the TARGET expression (ground truth), and computes
498/// the requested metrics. Unlike CALIBRATE, this never fits anything
499/// — it just measures.
500#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
501pub struct ValidateCommand {
502 pub rule_name: QualifiedName,
503 pub pattern: Pattern,
504 pub where_expr: Option<Expr>,
505 pub target_expr: Expr,
506 pub metrics: Vec<ValidationMetric>,
507}
508
509/// Supported metrics in `VALIDATE METRICS ...`. Each metric is a
510/// proper scoring rule or a calibration-quality summary; see
511/// `crates/uni-locy/src/calibration.rs` for definitions and
512/// numerical references.
513#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
514pub enum ValidationMetric {
515 BrierScore,
516 LogLoss,
517 /// Naive equal-width-binning ECE. Triggers
518 /// `WarningCode::EceBinningBias` (impl plan §3.4) suggesting
519 /// `DebiasedEce` instead.
520 Ece,
521 /// Debiased ECE per Kumar et al. NeurIPS 2019 — recommended.
522 DebiasedEce,
523 Accuracy,
524 Auc,
525}