Skip to main content

uni_cypher/
locy_ast.rs

1use serde::{Deserialize, Serialize};
2
3use crate::ast::{Direction, Expr, Pattern, Query, ReturnClause, UnaryOp};
4
5/// A complete Locy program: optional module header, imports, and body statements.
6#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
7pub struct LocyProgram {
8    pub module: Option<ModuleDecl>,
9    pub uses: Vec<UseDecl>,
10    pub statements: Vec<LocyStatement>,
11}
12
13/// A dotted name like `acme.compliance.rules`.
14#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
15pub struct QualifiedName {
16    pub parts: Vec<String>,
17}
18
19impl std::fmt::Display for QualifiedName {
20    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
21        write!(f, "{}", self.parts.join("."))
22    }
23}
24
25/// `MODULE acme.compliance`
26#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
27pub struct ModuleDecl {
28    pub name: QualifiedName,
29}
30
31/// `USE acme.common` or `USE acme.common { control, reachable }`
32#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
33pub struct UseDecl {
34    pub name: QualifiedName,
35    /// `None` = glob import (all rules), `Some(vec)` = selective imports.
36    pub imports: Option<Vec<String>>,
37}
38
39/// A top-level statement in a Locy program.
40#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
41pub enum LocyStatement {
42    /// A standard Cypher query (passthrough).
43    Cypher(Query),
44    /// `CREATE RULE ... AS ...`
45    Rule(RuleDefinition),
46    /// `QUERY ruleName WHERE expr RETURN ...`
47    GoalQuery(GoalQuery),
48    /// `DERIVE ruleName WHERE ...`
49    DeriveCommand(DeriveCommand),
50    /// `ASSUME { mutations } THEN body`
51    AssumeBlock(AssumeBlock),
52    /// `ABDUCE [NOT] ruleName WHERE expr RETURN ...`
53    AbduceQuery(AbduceQuery),
54    /// `EXPLAIN RULE ruleName WHERE expr RETURN ...`
55    ExplainRule(ExplainRule),
56    /// `CREATE MODEL name AS INPUT (...) FEATURES ... OUTPUT type name USING xervo('...')`
57    /// Phase B neural-predicate preview. The grammar always parses this;
58    /// the compiler rejects it unless `LocyConfig::neural_predicates_preview`
59    /// is set.
60    Model(ModelDefinition),
61    /// `CALIBRATE name ON MATCH pattern [WHERE ...] TARGET expr METHOD method [HOLDOUT 0.2]`
62    /// Phase C C2 calibration statement.
63    Calibrate(CalibrateCommand),
64    /// `VALIDATE name ON MATCH pattern [WHERE ...] TARGET expr METRICS m1, m2, ...`
65    /// Phase C C3 validation statement.
66    Validate(ValidateCommand),
67}
68
69// ═══════════════════════════════════════════════════════════════════════════
70// RULE DEFINITION
71// ═══════════════════════════════════════════════════════════════════════════
72
73/// `CREATE RULE name [PRIORITY n] AS MATCH pattern [WHERE conds] [ALONG ...] [FOLD ...] [WHERE having] [BEST BY ...] YIELD/DERIVE ...`
74#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
75pub struct RuleDefinition {
76    pub name: QualifiedName,
77    pub priority: Option<i64>,
78    pub match_pattern: Pattern,
79    pub where_conditions: Vec<RuleCondition>,
80    pub along: Vec<AlongBinding>,
81    pub fold: Vec<FoldBinding>,
82    /// Post-FOLD filter conditions (HAVING semantics). These filter on
83    /// aggregate results after FOLD computation.
84    pub having: Vec<Expr>,
85    pub best_by: Option<BestByClause>,
86    pub output: RuleOutput,
87}
88
89/// A condition in a rule WHERE clause.
90#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
91pub enum RuleCondition {
92    /// `x IS rule`, `x IS rule TO y`, `(x,y) IS rule`
93    IsReference(IsReference),
94    /// A standard Cypher expression used as a boolean condition.
95    Expression(Expr),
96}
97
98/// An IS rule reference in various forms.
99#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
100pub struct IsReference {
101    pub subjects: Vec<String>,
102    pub rule_name: QualifiedName,
103    pub target: Option<String>,
104    pub negated: bool,
105}
106
107// ═══════════════════════════════════════════════════════════════════════════
108// ALONG (path-carried values)
109// ═══════════════════════════════════════════════════════════════════════════
110
111/// `name = along_expression`
112#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
113pub struct AlongBinding {
114    pub name: String,
115    pub expr: LocyExpr,
116}
117
118/// Locy expression: extends Cypher expressions with `prev.field`.
119#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
120pub enum LocyExpr {
121    /// `prev.fieldName` — reference to previous hop's value.
122    PrevRef(String),
123    /// A standard Cypher expression.
124    Cypher(Expr),
125    /// Binary operation between Locy expressions.
126    BinaryOp {
127        left: Box<LocyExpr>,
128        op: LocyBinaryOp,
129        right: Box<LocyExpr>,
130    },
131    /// Unary operation (NOT, negation).
132    UnaryOp(UnaryOp, Box<LocyExpr>),
133}
134
135#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
136pub enum LocyBinaryOp {
137    Add,
138    Sub,
139    Mul,
140    Div,
141    Mod,
142    Pow,
143    And,
144    Or,
145    Xor,
146    // Comparisons are handled via Cypher expression re-parse
147}
148
149// ═══════════════════════════════════════════════════════════════════════════
150// FOLD (aggregation)
151// ═══════════════════════════════════════════════════════════════════════════
152
153/// `name = fold_expression`
154#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
155pub struct FoldBinding {
156    pub name: String,
157    pub aggregate: Expr,
158}
159
160// ═══════════════════════════════════════════════════════════════════════════
161// BEST BY (optimized selection)
162// ═══════════════════════════════════════════════════════════════════════════
163
164/// Wrapper for the BEST BY clause items.
165#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
166pub struct BestByClause {
167    pub items: Vec<BestByItem>,
168}
169
170/// `expr [ASC|DESC]`
171#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
172pub struct BestByItem {
173    pub expr: Expr,
174    pub ascending: bool,
175}
176
177// ═══════════════════════════════════════════════════════════════════════════
178// YIELD (rule output schema)
179// ═══════════════════════════════════════════════════════════════════════════
180
181/// Either YIELD items or DERIVE clause as a rule's output.
182#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
183pub enum RuleOutput {
184    Yield(YieldClause),
185    Derive(DeriveClause),
186}
187
188/// Wrapper for the YIELD clause items.
189#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
190pub struct YieldClause {
191    pub items: Vec<LocyYieldItem>,
192}
193
194/// A single YIELD item, possibly marked as KEY or PROB.
195#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
196pub struct LocyYieldItem {
197    pub is_key: bool,
198    pub is_prob: bool,
199    pub expr: Expr,
200    pub alias: Option<String>,
201}
202
203// ═══════════════════════════════════════════════════════════════════════════
204// DERIVE (graph derivation in rule heads)
205// ═══════════════════════════════════════════════════════════════════════════
206
207/// `DERIVE pattern, pattern, ...` or `DERIVE MERGE a, b`
208#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
209pub enum DeriveClause {
210    Patterns(Vec<DerivePattern>),
211    Merge(String, String),
212}
213
214#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
215pub struct DerivePattern {
216    pub direction: Direction,
217    pub source: DeriveNodeSpec,
218    pub edge: DeriveEdgeSpec,
219    pub target: DeriveNodeSpec,
220}
221
222#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
223pub struct DeriveNodeSpec {
224    pub is_new: bool,
225    pub variable: String,
226    pub labels: Vec<String>,
227    pub properties: Option<Expr>,
228}
229
230#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
231pub struct DeriveEdgeSpec {
232    pub edge_type: String,
233    pub properties: Option<Expr>,
234}
235
236// ═══════════════════════════════════════════════════════════════════════════
237// GOAL-DIRECTED QUERY
238// ═══════════════════════════════════════════════════════════════════════════
239
240/// `QUERY ruleName [WHERE expr] [RETURN ...]`
241#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
242pub struct GoalQuery {
243    pub rule_name: QualifiedName,
244    pub where_expr: Option<Expr>,
245    pub return_clause: Option<ReturnClause>,
246}
247
248// ═══════════════════════════════════════════════════════════════════════════
249// DERIVE COMMAND (top-level)
250// ═══════════════════════════════════════════════════════════════════════════
251
252/// `DERIVE ruleName [WHERE expr]`
253#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
254pub struct DeriveCommand {
255    pub rule_name: QualifiedName,
256    pub where_expr: Option<Expr>,
257}
258
259// ═══════════════════════════════════════════════════════════════════════════
260// ASSUME BLOCK
261// ═══════════════════════════════════════════════════════════════════════════
262
263/// `ASSUME { mutations } THEN body`
264#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
265pub struct AssumeBlock {
266    pub mutations: Vec<crate::ast::Clause>,
267    pub body: Vec<LocyStatement>,
268}
269
270// ═══════════════════════════════════════════════════════════════════════════
271// ABDUCE QUERY
272// ═══════════════════════════════════════════════════════════════════════════
273
274/// `ABDUCE [NOT] ruleName [WHERE expr] [RETURN ...]`
275#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
276pub struct AbduceQuery {
277    pub negated: bool,
278    pub rule_name: QualifiedName,
279    pub where_expr: Option<Expr>,
280    pub return_clause: Option<ReturnClause>,
281}
282
283// ═══════════════════════════════════════════════════════════════════════════
284// EXPLAIN RULE
285// ═══════════════════════════════════════════════════════════════════════════
286
287/// `EXPLAIN RULE ruleName [WHERE expr] [RETURN ...]`
288#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
289pub struct ExplainRule {
290    pub rule_name: QualifiedName,
291    pub where_expr: Option<Expr>,
292    pub return_clause: Option<ReturnClause>,
293}
294
295// ═══════════════════════════════════════════════════════════════════════════
296// CREATE MODEL (neural predicate, Phase B preview)
297// ═══════════════════════════════════════════════════════════════════════════
298
299/// `CREATE MODEL` declaration. Parses the full surface from impl plan §2.1;
300/// `Conformal` / `Dirichlet` calibration methods are deferred to Phase C.
301#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
302pub struct ModelDefinition {
303    pub name: QualifiedName,
304    pub inputs: Vec<InputBinding>,
305    /// Feature expressions evaluated against input bindings. Empty when
306    /// the `FEATURES` clause is omitted (model receives all bound node
307    /// properties — interpretation deferred to the runtime adapter).
308    pub features: Vec<Expr>,
309    /// Phase D D3: `FEATURES (subject, column) FROM rule_name` pulls
310    /// `column` from a prior-derived relation `rule_name` (keyed by
311    /// `subject`) at runtime, and feeds it as a feature alongside any
312    /// `INPUT` bindings. MVP: at most one path-context feature per
313    /// model, mutually exclusive with the expression-`features` form.
314    pub path_context: Option<PathContextFeature>,
315    pub output: OutputBinding,
316    pub xervo_alias: String,
317    /// Phase D D2 follow-up: optional embedder alias surfaced by the
318    /// `USING xervo('classify/X', embedder='alias')` form. When
319    /// `None`, the runtime falls back to the alias `"default"` for
320    /// `semantic_match` query-text embedding.
321    pub embedder_alias: Option<String>,
322    pub calibration: Option<CalibrationMethod>,
323    pub version: Option<String>,
324    pub annotations: ModelAnnotations,
325}
326
327/// One INPUT binding, e.g. `(s:Supplier)`.
328#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
329pub struct InputBinding {
330    pub variable: String,
331    pub label: Option<String>,
332}
333
334/// Phase D D3: `FEATURES (subject_var, column) FROM source_rule`.
335#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
336pub struct PathContextFeature {
337    pub subject_var: String,
338    pub column: String,
339    pub source_rule: String,
340}
341
342/// The OUTPUT declaration, e.g. `OUTPUT PROB risk`.
343#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
344pub struct OutputBinding {
345    pub output_type: OutputType,
346    pub name: String,
347}
348
349#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
350pub enum OutputType {
351    Prob,
352    Score,
353    Label,
354    Vector,
355}
356
357#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
358pub enum CalibrationMethod {
359    PlattScaling,
360    IsotonicRegression,
361    TemperatureScaling,
362    BetaCalibration,
363    None,
364    /// Phase C C1a: split-conformal predictor. The point prediction
365    /// passes through unchanged; the calibrator carries a
366    /// `(1 - alpha)`-quantile of holdout nonconformity scores which
367    /// gates a per-prediction `ConfidenceBand` at inference. `alpha`
368    /// defaults to 0.1 (90% bands) when omitted.
369    Conformal {
370        alpha: f64,
371    },
372    /// Phase D D-C1d: multi-class Dirichlet calibration. The CALIBRATE
373    /// statement collects per-row `(class_index, score_vector)` pairs
374    /// instead of `(prediction, ground_truth)`. Compiler routes this
375    /// through `MulticlassCalibratorFitter` rather than the binary
376    /// `CalibratorFitter` trait. Method-of-moments fit by default.
377    Dirichlet,
378}
379
380/// Statement-level annotations. Currently only `@independent`, which
381/// suppresses Phase-C F2 shared-neural-input warnings. Parsed in Slice
382/// 1+2; semantically meaningful when F2 lands.
383#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
384pub struct ModelAnnotations {
385    pub independent: bool,
386}
387
388// ═══════════════════════════════════════════════════════════════════════════
389// CALIBRATE COMMAND  (Phase C C2)
390// ═══════════════════════════════════════════════════════════════════════════
391
392/// `CALIBRATE` statement. The runtime collects
393/// `(prediction, ground_truth)` pairs by invoking the registered
394/// classifier for `model_name` over the MATCH pattern, fits the
395/// chosen calibrator on a holdout-split, and returns the fitted
396/// transform + holdout metrics.
397#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
398pub struct CalibrateCommand {
399    pub model_name: QualifiedName,
400    pub pattern: Pattern,
401    pub where_expr: Option<Expr>,
402    pub target_expr: Expr,
403    pub method: CalibrationMethod,
404    /// Holdout fraction (must be in `(0, 1)`). `None` → compiler
405    /// resolves to default 0.2.
406    pub holdout: Option<f64>,
407}
408
409// ═══════════════════════════════════════════════════════════════════════════
410// VALIDATE COMMAND  (Phase C C3)
411// ═══════════════════════════════════════════════════════════════════════════
412
413/// `VALIDATE` statement. Runs the named rule, joins its PROB column
414/// output against the TARGET expression (ground truth), and computes
415/// the requested metrics. Unlike CALIBRATE, this never fits anything
416/// — it just measures.
417#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
418pub struct ValidateCommand {
419    pub rule_name: QualifiedName,
420    pub pattern: Pattern,
421    pub where_expr: Option<Expr>,
422    pub target_expr: Expr,
423    pub metrics: Vec<ValidationMetric>,
424}
425
426/// Supported metrics in `VALIDATE METRICS ...`. Each metric is a
427/// proper scoring rule or a calibration-quality summary; see
428/// `crates/uni-locy/src/calibration.rs` for definitions and
429/// numerical references.
430#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
431pub enum ValidationMetric {
432    BrierScore,
433    LogLoss,
434    /// Naive equal-width-binning ECE. Triggers
435    /// `WarningCode::EceBinningBias` (impl plan §3.4) suggesting
436    /// `DebiasedEce` instead.
437    Ece,
438    /// Debiased ECE per Kumar et al. NeurIPS 2019 — recommended.
439    DebiasedEce,
440    Accuracy,
441    Auc,
442}