uni_cypher/locy_ast.rs
1use serde::{Deserialize, Serialize};
2
3use crate::ast::{Direction, Expr, Pattern, Query, ReturnClause, UnaryOp};
4
5/// A complete Locy program: optional module header, imports, and body statements.
6#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
7pub struct LocyProgram {
8 pub module: Option<ModuleDecl>,
9 pub uses: Vec<UseDecl>,
10 pub statements: Vec<LocyStatement>,
11}
12
13/// A dotted name like `acme.compliance.rules`.
14#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
15pub struct QualifiedName {
16 pub parts: Vec<String>,
17}
18
19impl std::fmt::Display for QualifiedName {
20 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
21 write!(f, "{}", self.parts.join("."))
22 }
23}
24
25/// `MODULE acme.compliance`
26#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
27pub struct ModuleDecl {
28 pub name: QualifiedName,
29}
30
31/// `USE acme.common` or `USE acme.common { control, reachable }`
32#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
33pub struct UseDecl {
34 pub name: QualifiedName,
35 /// `None` = glob import (all rules), `Some(vec)` = selective imports.
36 pub imports: Option<Vec<String>>,
37}
38
39/// A top-level statement in a Locy program.
40#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
41pub enum LocyStatement {
42 /// A standard Cypher query (passthrough).
43 Cypher(Query),
44 /// `CREATE RULE ... AS ...`
45 Rule(RuleDefinition),
46 /// `QUERY ruleName WHERE expr RETURN ...`
47 GoalQuery(GoalQuery),
48 /// `DERIVE ruleName WHERE ...`
49 DeriveCommand(DeriveCommand),
50 /// `ASSUME { mutations } THEN body`
51 AssumeBlock(AssumeBlock),
52 /// `ABDUCE [NOT] ruleName WHERE expr RETURN ...`
53 AbduceQuery(AbduceQuery),
54 /// `EXPLAIN RULE ruleName WHERE expr RETURN ...`
55 ExplainRule(ExplainRule),
56 /// `CREATE MODEL name AS INPUT (...) FEATURES ... OUTPUT type name USING xervo('...')`
57 /// Phase B neural-predicate preview. The grammar always parses this;
58 /// the compiler rejects it unless `LocyConfig::neural_predicates_preview`
59 /// is set.
60 Model(ModelDefinition),
61 /// `CALIBRATE name ON MATCH pattern [WHERE ...] TARGET expr METHOD method [HOLDOUT 0.2]`
62 /// Phase C C2 calibration statement.
63 Calibrate(CalibrateCommand),
64 /// `VALIDATE name ON MATCH pattern [WHERE ...] TARGET expr METRICS m1, m2, ...`
65 /// Phase C C3 validation statement.
66 Validate(ValidateCommand),
67}
68
69// ═══════════════════════════════════════════════════════════════════════════
70// RULE DEFINITION
71// ═══════════════════════════════════════════════════════════════════════════
72
73/// `CREATE RULE name [PRIORITY n] AS MATCH pattern [WHERE conds] [ALONG ...] [FOLD ...] [WHERE having] [BEST BY ...] YIELD/DERIVE ...`
74#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
75pub struct RuleDefinition {
76 pub name: QualifiedName,
77 pub priority: Option<i64>,
78 pub match_pattern: Pattern,
79 pub where_conditions: Vec<RuleCondition>,
80 pub along: Vec<AlongBinding>,
81 pub fold: Vec<FoldBinding>,
82 /// Post-FOLD filter conditions (HAVING semantics). These filter on
83 /// aggregate results after FOLD computation.
84 pub having: Vec<Expr>,
85 pub best_by: Option<BestByClause>,
86 pub output: RuleOutput,
87}
88
89/// A condition in a rule WHERE clause.
90#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
91pub enum RuleCondition {
92 /// `x IS rule`, `x IS rule TO y`, `(x,y) IS rule`
93 IsReference(IsReference),
94 /// A standard Cypher expression used as a boolean condition.
95 Expression(Expr),
96}
97
98/// An IS rule reference in various forms.
99#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
100pub struct IsReference {
101 pub subjects: Vec<String>,
102 pub rule_name: QualifiedName,
103 pub target: Option<String>,
104 pub negated: bool,
105}
106
107// ═══════════════════════════════════════════════════════════════════════════
108// ALONG (path-carried values)
109// ═══════════════════════════════════════════════════════════════════════════
110
111/// `name = along_expression`
112#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
113pub struct AlongBinding {
114 pub name: String,
115 pub expr: LocyExpr,
116}
117
118/// Locy expression: extends Cypher expressions with `prev.field`.
119#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
120pub enum LocyExpr {
121 /// `prev.fieldName` — reference to previous hop's value.
122 PrevRef(String),
123 /// A standard Cypher expression.
124 Cypher(Expr),
125 /// Binary operation between Locy expressions.
126 BinaryOp {
127 left: Box<LocyExpr>,
128 op: LocyBinaryOp,
129 right: Box<LocyExpr>,
130 },
131 /// Unary operation (NOT, negation).
132 UnaryOp(UnaryOp, Box<LocyExpr>),
133}
134
135#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
136pub enum LocyBinaryOp {
137 Add,
138 Sub,
139 Mul,
140 Div,
141 Mod,
142 Pow,
143 And,
144 Or,
145 Xor,
146 // Comparisons are handled via Cypher expression re-parse
147}
148
149// ═══════════════════════════════════════════════════════════════════════════
150// FOLD (aggregation)
151// ═══════════════════════════════════════════════════════════════════════════
152
153/// `name = fold_expression`
154#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
155pub struct FoldBinding {
156 pub name: String,
157 pub aggregate: Expr,
158}
159
160// ═══════════════════════════════════════════════════════════════════════════
161// BEST BY (optimized selection)
162// ═══════════════════════════════════════════════════════════════════════════
163
164/// Wrapper for the BEST BY clause items.
165#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
166pub struct BestByClause {
167 pub items: Vec<BestByItem>,
168}
169
170/// `expr [ASC|DESC]`
171#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
172pub struct BestByItem {
173 pub expr: Expr,
174 pub ascending: bool,
175}
176
177// ═══════════════════════════════════════════════════════════════════════════
178// YIELD (rule output schema)
179// ═══════════════════════════════════════════════════════════════════════════
180
181/// Either YIELD items or DERIVE clause as a rule's output.
182#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
183pub enum RuleOutput {
184 Yield(YieldClause),
185 Derive(DeriveClause),
186}
187
188/// Wrapper for the YIELD clause items.
189#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
190pub struct YieldClause {
191 pub items: Vec<LocyYieldItem>,
192}
193
194/// A single YIELD item, possibly marked as KEY or PROB.
195#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
196pub struct LocyYieldItem {
197 pub is_key: bool,
198 pub is_prob: bool,
199 pub expr: Expr,
200 pub alias: Option<String>,
201}
202
203// ═══════════════════════════════════════════════════════════════════════════
204// DERIVE (graph derivation in rule heads)
205// ═══════════════════════════════════════════════════════════════════════════
206
207/// `DERIVE pattern, pattern, ...` or `DERIVE MERGE a, b`
208#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
209pub enum DeriveClause {
210 Patterns(Vec<DerivePattern>),
211 Merge(String, String),
212}
213
214#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
215pub struct DerivePattern {
216 pub direction: Direction,
217 pub source: DeriveNodeSpec,
218 pub edge: DeriveEdgeSpec,
219 pub target: DeriveNodeSpec,
220}
221
222#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
223pub struct DeriveNodeSpec {
224 pub is_new: bool,
225 pub variable: String,
226 pub labels: Vec<String>,
227 pub properties: Option<Expr>,
228}
229
230#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
231pub struct DeriveEdgeSpec {
232 pub edge_type: String,
233 pub properties: Option<Expr>,
234}
235
236// ═══════════════════════════════════════════════════════════════════════════
237// GOAL-DIRECTED QUERY
238// ═══════════════════════════════════════════════════════════════════════════
239
240/// `QUERY ruleName [WHERE expr] [RETURN ...]`
241#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
242pub struct GoalQuery {
243 pub rule_name: QualifiedName,
244 pub where_expr: Option<Expr>,
245 pub return_clause: Option<ReturnClause>,
246}
247
248// ═══════════════════════════════════════════════════════════════════════════
249// DERIVE COMMAND (top-level)
250// ═══════════════════════════════════════════════════════════════════════════
251
252/// `DERIVE ruleName [WHERE expr]`
253#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
254pub struct DeriveCommand {
255 pub rule_name: QualifiedName,
256 pub where_expr: Option<Expr>,
257}
258
259// ═══════════════════════════════════════════════════════════════════════════
260// ASSUME BLOCK
261// ═══════════════════════════════════════════════════════════════════════════
262
263/// `ASSUME { mutations } THEN body`
264#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
265pub struct AssumeBlock {
266 pub mutations: Vec<crate::ast::Clause>,
267 pub body: Vec<LocyStatement>,
268}
269
270// ═══════════════════════════════════════════════════════════════════════════
271// ABDUCE QUERY
272// ═══════════════════════════════════════════════════════════════════════════
273
274/// `ABDUCE [NOT] ruleName [WHERE expr] [RETURN ...]`
275#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
276pub struct AbduceQuery {
277 pub negated: bool,
278 pub rule_name: QualifiedName,
279 pub where_expr: Option<Expr>,
280 pub return_clause: Option<ReturnClause>,
281}
282
283// ═══════════════════════════════════════════════════════════════════════════
284// EXPLAIN RULE
285// ═══════════════════════════════════════════════════════════════════════════
286
287/// `EXPLAIN RULE ruleName [WHERE expr] [RETURN ...]`
288#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
289pub struct ExplainRule {
290 pub rule_name: QualifiedName,
291 pub where_expr: Option<Expr>,
292 pub return_clause: Option<ReturnClause>,
293}
294
295// ═══════════════════════════════════════════════════════════════════════════
296// CREATE MODEL (neural predicate, Phase B preview)
297// ═══════════════════════════════════════════════════════════════════════════
298
299/// `CREATE MODEL` declaration. Parses the full surface from impl plan §2.1;
300/// `Conformal` / `Dirichlet` calibration methods are deferred to Phase C.
301#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
302pub struct ModelDefinition {
303 pub name: QualifiedName,
304 pub inputs: Vec<InputBinding>,
305 /// Feature expressions evaluated against input bindings. Empty when
306 /// the `FEATURES` clause is omitted (model receives all bound node
307 /// properties — interpretation deferred to the runtime adapter).
308 pub features: Vec<Expr>,
309 /// Phase D D3: `FEATURES (subject, column) FROM rule_name` pulls
310 /// `column` from a prior-derived relation `rule_name` (keyed by
311 /// `subject`) at runtime, and feeds it as a feature alongside any
312 /// `INPUT` bindings. MVP: at most one path-context feature per
313 /// model, mutually exclusive with the expression-`features` form.
314 pub path_context: Option<PathContextFeature>,
315 pub output: OutputBinding,
316 pub xervo_alias: String,
317 /// Phase D D2 follow-up: optional embedder alias surfaced by the
318 /// `USING xervo('classify/X', embedder='alias')` form. When
319 /// `None`, the runtime falls back to the alias `"default"` for
320 /// `semantic_match` query-text embedding.
321 pub embedder_alias: Option<String>,
322 pub calibration: Option<CalibrationMethod>,
323 pub version: Option<String>,
324 pub annotations: ModelAnnotations,
325}
326
327/// One INPUT binding, e.g. `(s:Supplier)`.
328#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
329pub struct InputBinding {
330 pub variable: String,
331 pub label: Option<String>,
332}
333
334/// Phase D D3: `FEATURES (subject_var, column) FROM source_rule`.
335#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
336pub struct PathContextFeature {
337 pub subject_var: String,
338 pub column: String,
339 pub source_rule: String,
340}
341
342/// The OUTPUT declaration, e.g. `OUTPUT PROB risk`.
343#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
344pub struct OutputBinding {
345 pub output_type: OutputType,
346 pub name: String,
347}
348
349#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
350pub enum OutputType {
351 Prob,
352 Score,
353 Label,
354 Vector,
355}
356
357#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
358pub enum CalibrationMethod {
359 PlattScaling,
360 IsotonicRegression,
361 TemperatureScaling,
362 BetaCalibration,
363 None,
364 /// Phase C C1a: split-conformal predictor. The point prediction
365 /// passes through unchanged; the calibrator carries a
366 /// `(1 - alpha)`-quantile of holdout nonconformity scores which
367 /// gates a per-prediction `ConfidenceBand` at inference. `alpha`
368 /// defaults to 0.1 (90% bands) when omitted.
369 Conformal {
370 alpha: f64,
371 },
372 /// Phase D D-C1d: multi-class Dirichlet calibration. The CALIBRATE
373 /// statement collects per-row `(class_index, score_vector)` pairs
374 /// instead of `(prediction, ground_truth)`. Compiler routes this
375 /// through `MulticlassCalibratorFitter` rather than the binary
376 /// `CalibratorFitter` trait. Method-of-moments fit by default.
377 Dirichlet,
378}
379
380/// Statement-level annotations. Currently only `@independent`, which
381/// suppresses Phase-C F2 shared-neural-input warnings. Parsed in Slice
382/// 1+2; semantically meaningful when F2 lands.
383#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
384pub struct ModelAnnotations {
385 pub independent: bool,
386}
387
388// ═══════════════════════════════════════════════════════════════════════════
389// CALIBRATE COMMAND (Phase C C2)
390// ═══════════════════════════════════════════════════════════════════════════
391
392/// `CALIBRATE` statement. The runtime collects
393/// `(prediction, ground_truth)` pairs by invoking the registered
394/// classifier for `model_name` over the MATCH pattern, fits the
395/// chosen calibrator on a holdout-split, and returns the fitted
396/// transform + holdout metrics.
397#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
398pub struct CalibrateCommand {
399 pub model_name: QualifiedName,
400 pub pattern: Pattern,
401 pub where_expr: Option<Expr>,
402 pub target_expr: Expr,
403 pub method: CalibrationMethod,
404 /// Holdout fraction (must be in `(0, 1)`). `None` → compiler
405 /// resolves to default 0.2.
406 pub holdout: Option<f64>,
407}
408
409// ═══════════════════════════════════════════════════════════════════════════
410// VALIDATE COMMAND (Phase C C3)
411// ═══════════════════════════════════════════════════════════════════════════
412
413/// `VALIDATE` statement. Runs the named rule, joins its PROB column
414/// output against the TARGET expression (ground truth), and computes
415/// the requested metrics. Unlike CALIBRATE, this never fits anything
416/// — it just measures.
417#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
418pub struct ValidateCommand {
419 pub rule_name: QualifiedName,
420 pub pattern: Pattern,
421 pub where_expr: Option<Expr>,
422 pub target_expr: Expr,
423 pub metrics: Vec<ValidationMetric>,
424}
425
426/// Supported metrics in `VALIDATE METRICS ...`. Each metric is a
427/// proper scoring rule or a calibration-quality summary; see
428/// `crates/uni-locy/src/calibration.rs` for definitions and
429/// numerical references.
430#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
431pub enum ValidationMetric {
432 BrierScore,
433 LogLoss,
434 /// Naive equal-width-binning ECE. Triggers
435 /// `WarningCode::EceBinningBias` (impl plan §3.4) suggesting
436 /// `DebiasedEce` instead.
437 Ece,
438 /// Debiased ECE per Kumar et al. NeurIPS 2019 — recommended.
439 DebiasedEce,
440 Accuracy,
441 Auc,
442}