Skip to main content

lemma/planning/
fingerprint.rs

1//! Semantic plan fingerprint for content-addressable hashing.
2//!
3//! Projects ExecutionPlan onto a representation that contains only what the plan actually does.
4//! Uses dedicated fingerprint types (no LemmaType, LiteralValue, Arc<LemmaSpec>) so the hash
5//! does not depend on external types or other specs. Excludes sources, meta, source locations.
6//! Schema is explicit and stable: adding Rust fields does not change hashes for unused content.
7//!
8//! **Format versioning:** `fingerprint_hash` hashes `LMFP` + big-endian `FINGERPRINT_FORMAT_VERSION`
9//! (u32) + postcard(`PlanFingerprint`). Bump the version when the encoded semantics change in a
10//! way that must not share hashes with prior formats.
11
12use crate::parsing::ast::{CalendarUnit, DateCalendarKind, DateRelativeKind, DateTimeValue};
13use crate::planning::execution_plan::{Branch, ExecutableRule, ExecutionPlan};
14use crate::planning::semantics::{
15    ArithmeticComputation, ComparisonComputation, Expression, ExpressionKind, FactData, FactPath,
16    LemmaType, LiteralValue, MathematicalComputation, NegationType, RulePath,
17    SemanticConversionTarget, TypeDefiningSpec, TypeExtends, TypeSpecification, ValueKind,
18    VetoExpression,
19};
20use serde::Serialize;
21use sha2::{Digest, Sha256};
22use std::collections::BTreeMap;
23
24/// Bumped when the byte layout hashed by [`fingerprint_hash`] changes incompatibly (prefix + postcard).
25pub const FINGERPRINT_FORMAT_VERSION: u32 = 1;
26
27const FINGERPRINT_MAGIC: &[u8; 4] = b"LMFP";
28
29#[derive(Debug, Clone, Serialize)]
30#[serde(rename_all = "snake_case")]
31pub enum TypeDefiningSpecFingerprint {
32    Local,
33    Import {
34        /// Spec identifier: name or name~hash when pinned (e.g. `dep` or `dep~a1b2c3d4`).
35        spec_id: String,
36        effective_from: Option<DateTimeValue>,
37    },
38}
39
40#[derive(Debug, Clone, Serialize)]
41#[serde(rename_all = "snake_case")]
42pub enum TypeExtendsFingerprint {
43    Primitive,
44    Custom {
45        parent: String,
46        family: String,
47        defining_spec: TypeDefiningSpecFingerprint,
48    },
49}
50
51#[derive(Debug, Clone, Serialize)]
52pub struct LemmaTypeFingerprint {
53    pub name: Option<String>,
54    pub specifications: TypeSpecification,
55    pub extends: TypeExtendsFingerprint,
56}
57
58#[derive(Debug, Clone, Serialize)]
59pub struct LiteralValueFingerprint {
60    pub value: ValueKind,
61    pub lemma_type: LemmaTypeFingerprint,
62}
63
64/// Semantic fingerprint of an execution plan. Contains only content that affects evaluation.
65#[derive(Debug, Clone, Serialize)]
66pub struct PlanFingerprint {
67    pub spec_name: String,
68    pub valid_from: Option<DateTimeValue>,
69    pub facts: Vec<(FactPath, FactFingerprint)>,
70    pub rules: Vec<RuleFingerprint>,
71    pub named_types: BTreeMap<String, LemmaTypeFingerprint>,
72}
73
74#[derive(Debug, Clone, Serialize)]
75#[serde(rename_all = "snake_case")]
76pub enum FactFingerprint {
77    Value {
78        value: LiteralValueFingerprint,
79        is_default: bool,
80    },
81    TypeDeclaration {
82        resolved_type: LemmaTypeFingerprint,
83    },
84    SpecRef {
85        /// Spec identifier: name or name~hash when pinned (e.g. `dep` or `dep~a1b2c3d4`).
86        spec_id: String,
87        effective_from: Option<DateTimeValue>,
88    },
89}
90
91#[derive(Debug, Clone, Serialize)]
92pub struct RuleFingerprint {
93    pub path: RulePath,
94    pub name: String,
95    pub branches: Vec<BranchFingerprint>,
96    pub needs_facts: Vec<FactPath>,
97    pub rule_type: LemmaTypeFingerprint,
98}
99
100#[derive(Debug, Clone, Serialize)]
101pub struct BranchFingerprint {
102    pub condition: Option<ExpressionFingerprint>,
103    pub result: ExpressionFingerprint,
104}
105
106#[derive(Debug, Clone, Serialize)]
107pub struct ExpressionFingerprint {
108    pub kind: ExpressionKindFingerprint,
109}
110
111#[derive(Debug, Clone, Serialize)]
112#[serde(rename_all = "snake_case")]
113pub enum ExpressionKindFingerprint {
114    Literal(Box<LiteralValueFingerprint>),
115    FactPath(FactPath),
116    RulePath(RulePath),
117    LogicalAnd(Box<ExpressionFingerprint>, Box<ExpressionFingerprint>),
118    Arithmetic(
119        Box<ExpressionFingerprint>,
120        ArithmeticComputation,
121        Box<ExpressionFingerprint>,
122    ),
123    Comparison(
124        Box<ExpressionFingerprint>,
125        ComparisonComputation,
126        Box<ExpressionFingerprint>,
127    ),
128    UnitConversion(Box<ExpressionFingerprint>, SemanticConversionTarget),
129    LogicalNegation(Box<ExpressionFingerprint>, NegationType),
130    MathematicalComputation(MathematicalComputation, Box<ExpressionFingerprint>),
131    Veto(VetoExpression),
132    Now,
133    DateRelative(
134        DateRelativeKind,
135        Box<ExpressionFingerprint>,
136        Option<Box<ExpressionFingerprint>>,
137    ),
138    DateCalendar(DateCalendarKind, CalendarUnit, Box<ExpressionFingerprint>),
139}
140
141fn type_defining_spec_fingerprint(ds: &TypeDefiningSpec) -> TypeDefiningSpecFingerprint {
142    match ds {
143        TypeDefiningSpec::Local => TypeDefiningSpecFingerprint::Local,
144        TypeDefiningSpec::Import {
145            spec,
146            resolved_plan_hash,
147        } => TypeDefiningSpecFingerprint::Import {
148            spec_id: format!("{}~{}", spec.name, resolved_plan_hash),
149            effective_from: spec.effective_from.clone(),
150        },
151    }
152}
153
154fn type_extends_fingerprint(e: &TypeExtends) -> TypeExtendsFingerprint {
155    match e {
156        TypeExtends::Primitive => TypeExtendsFingerprint::Primitive,
157        TypeExtends::Custom {
158            parent,
159            family,
160            defining_spec,
161        } => TypeExtendsFingerprint::Custom {
162            parent: parent.clone(),
163            family: family.clone(),
164            defining_spec: type_defining_spec_fingerprint(defining_spec),
165        },
166    }
167}
168
169fn lemma_type_fingerprint(lt: &LemmaType) -> LemmaTypeFingerprint {
170    LemmaTypeFingerprint {
171        name: lt.name.clone(),
172        specifications: lt.specifications.clone(),
173        extends: type_extends_fingerprint(&lt.extends),
174    }
175}
176
177fn literal_value_fingerprint(lv: &LiteralValue) -> LiteralValueFingerprint {
178    LiteralValueFingerprint {
179        value: lv.value.clone(),
180        lemma_type: lemma_type_fingerprint(&lv.lemma_type),
181    }
182}
183
184/// Project ExecutionPlan to semantic fingerprint, excluding sources and meta.
185pub fn from_plan(plan: &ExecutionPlan) -> PlanFingerprint {
186    let facts: Vec<(FactPath, FactFingerprint)> = plan
187        .facts
188        .iter()
189        .map(|(path, data)| (path.clone(), fact_fingerprint(data)))
190        .collect();
191
192    let rules: Vec<RuleFingerprint> = plan.rules.iter().map(rule_fingerprint).collect();
193
194    let named_types: BTreeMap<String, LemmaTypeFingerprint> = plan
195        .named_types
196        .iter()
197        .map(|(k, v)| (k.clone(), lemma_type_fingerprint(v)))
198        .collect();
199
200    PlanFingerprint {
201        spec_name: plan.spec_name.clone(),
202        valid_from: plan.valid_from.clone(),
203        facts,
204        rules,
205        named_types,
206    }
207}
208
209fn fact_fingerprint(data: &FactData) -> FactFingerprint {
210    match data {
211        FactData::Value {
212            value, is_default, ..
213        } => FactFingerprint::Value {
214            value: literal_value_fingerprint(value),
215            is_default: *is_default,
216        },
217        FactData::TypeDeclaration { resolved_type, .. } => FactFingerprint::TypeDeclaration {
218            resolved_type: lemma_type_fingerprint(resolved_type),
219        },
220        FactData::SpecRef {
221            spec,
222            resolved_plan_hash,
223            ..
224        } => FactFingerprint::SpecRef {
225            spec_id: resolved_plan_hash
226                .as_ref()
227                .map(|h| format!("{}~{}", spec.name, h))
228                .unwrap_or_else(|| spec.name.clone()),
229            effective_from: spec.effective_from.clone(),
230        },
231    }
232}
233
234fn rule_fingerprint(rule: &ExecutableRule) -> RuleFingerprint {
235    RuleFingerprint {
236        path: rule.path.clone(),
237        name: rule.name.clone(),
238        branches: rule.branches.iter().map(branch_fingerprint).collect(),
239        needs_facts: rule.needs_facts.iter().cloned().collect(),
240        rule_type: lemma_type_fingerprint(&rule.rule_type),
241    }
242}
243
244fn branch_fingerprint(branch: &Branch) -> BranchFingerprint {
245    BranchFingerprint {
246        condition: branch.condition.as_ref().map(expression_fingerprint),
247        result: expression_fingerprint(&branch.result),
248    }
249}
250
251fn expression_fingerprint(expr: &Expression) -> ExpressionFingerprint {
252    ExpressionFingerprint {
253        kind: expression_kind_fingerprint(&expr.kind),
254    }
255}
256
257fn expression_kind_fingerprint(kind: &ExpressionKind) -> ExpressionKindFingerprint {
258    match kind {
259        ExpressionKind::Literal(lv) => {
260            ExpressionKindFingerprint::Literal(Box::new(literal_value_fingerprint(lv)))
261        }
262        ExpressionKind::FactPath(fp) => ExpressionKindFingerprint::FactPath(fp.clone()),
263        ExpressionKind::RulePath(rp) => ExpressionKindFingerprint::RulePath(rp.clone()),
264        ExpressionKind::LogicalAnd(l, r) => ExpressionKindFingerprint::LogicalAnd(
265            Box::new(expression_fingerprint(l)),
266            Box::new(expression_fingerprint(r)),
267        ),
268        ExpressionKind::Arithmetic(l, op, r) => ExpressionKindFingerprint::Arithmetic(
269            Box::new(expression_fingerprint(l)),
270            op.clone(),
271            Box::new(expression_fingerprint(r)),
272        ),
273        ExpressionKind::Comparison(l, op, r) => ExpressionKindFingerprint::Comparison(
274            Box::new(expression_fingerprint(l)),
275            op.clone(),
276            Box::new(expression_fingerprint(r)),
277        ),
278        ExpressionKind::UnitConversion(inner, target) => ExpressionKindFingerprint::UnitConversion(
279            Box::new(expression_fingerprint(inner)),
280            target.clone(),
281        ),
282        ExpressionKind::LogicalNegation(inner, nt) => ExpressionKindFingerprint::LogicalNegation(
283            Box::new(expression_fingerprint(inner)),
284            nt.clone(),
285        ),
286        ExpressionKind::MathematicalComputation(mc, inner) => {
287            ExpressionKindFingerprint::MathematicalComputation(
288                mc.clone(),
289                Box::new(expression_fingerprint(inner)),
290            )
291        }
292        ExpressionKind::Veto(ve) => ExpressionKindFingerprint::Veto(ve.clone()),
293        ExpressionKind::Now => ExpressionKindFingerprint::Now,
294        ExpressionKind::DateRelative(kind, date_expr, tol) => {
295            ExpressionKindFingerprint::DateRelative(
296                *kind,
297                Box::new(expression_fingerprint(date_expr)),
298                tol.as_ref().map(|t| Box::new(expression_fingerprint(t))),
299            )
300        }
301        ExpressionKind::DateCalendar(kind, unit, date_expr) => {
302            ExpressionKindFingerprint::DateCalendar(
303                *kind,
304                *unit,
305                Box::new(expression_fingerprint(date_expr)),
306            )
307        }
308    }
309}
310
311/// Compute deterministic 8-char hex hash from fingerprint.
312pub fn fingerprint_hash(fp: &PlanFingerprint) -> String {
313    let payload = postcard::to_allocvec(fp).expect("PlanFingerprint serialization");
314    let mut prefixed = Vec::with_capacity(FINGERPRINT_MAGIC.len() + 4 + payload.len());
315    prefixed.extend_from_slice(FINGERPRINT_MAGIC.as_slice());
316    prefixed.extend_from_slice(&FINGERPRINT_FORMAT_VERSION.to_be_bytes());
317    prefixed.extend_from_slice(&payload);
318    let digest = Sha256::digest(&prefixed);
319    let n = (u32::from(digest[0]) << 24)
320        | (u32::from(digest[1]) << 16)
321        | (u32::from(digest[2]) << 8)
322        | u32::from(digest[3]);
323    format!("{:08x}", n)
324}
325
326#[cfg(test)]
327mod tests {
328    use super::*;
329    use indexmap::IndexMap;
330    use std::collections::HashMap;
331
332    fn empty_plan(spec_name: &str) -> ExecutionPlan {
333        ExecutionPlan {
334            spec_name: spec_name.to_string(),
335            facts: IndexMap::new(),
336            rules: vec![],
337            sources: HashMap::new(),
338            meta: HashMap::new(),
339            named_types: BTreeMap::new(),
340            valid_from: None,
341            valid_to: None,
342        }
343    }
344
345    #[test]
346    fn same_plan_same_fingerprint() {
347        let plan = empty_plan("test");
348        let fp1 = from_plan(&plan);
349        let fp2 = from_plan(&plan);
350        assert_eq!(fp1.spec_name, fp2.spec_name);
351    }
352
353    #[test]
354    fn same_plan_same_hash() {
355        let plan = empty_plan("test");
356        let h1 = fingerprint_hash(&from_plan(&plan));
357        let h2 = fingerprint_hash(&from_plan(&plan));
358        assert_eq!(h1, h2);
359    }
360
361    #[test]
362    fn different_spec_name_different_hash() {
363        let h1 = fingerprint_hash(&from_plan(&empty_plan("a")));
364        let h2 = fingerprint_hash(&from_plan(&empty_plan("b")));
365        assert_ne!(h1, h2);
366    }
367
368    /// Golden vectors for `FINGERPRINT_FORMAT_VERSION` + postcard layout. Update when bumping format.
369    #[test]
370    fn golden_plan_hash_empty_spec_names() {
371        assert_eq!(
372            fingerprint_hash(&from_plan(&empty_plan("golden_empty"))),
373            "fc4c852f"
374        );
375        assert_eq!(fingerprint_hash(&from_plan(&empty_plan("x"))), "e97e410c");
376        let mut p = empty_plan("golden_valid_from");
377        p.valid_from = Some(DateTimeValue {
378            year: 2024,
379            month: 6,
380            day: 15,
381            hour: 0,
382            minute: 0,
383            second: 0,
384            microsecond: 0,
385            timezone: None,
386        });
387        assert_eq!(fingerprint_hash(&from_plan(&p)), "b301d0c3");
388    }
389}