Skip to main content

lex_vcs/
operation.rs

1//! The `Operation` enum + `OperationRecord` (operation plus its
2//! causal parents and resulting `OpId`).
3//!
4//! See `lib.rs` for the design context and #129 for the issue.
5
6use indexmap::IndexSet;
7use serde::{Deserialize, Serialize};
8use std::collections::{BTreeMap, BTreeSet};
9
10use crate::canonical;
11
12/// Signature identity of a function or type — the part that stays
13/// stable across body edits. Wraps the same string identity
14/// `lex-store` uses; we keep it as `String` here so this crate has
15/// no dependency on `lex-store`'s internals.
16pub type SigId = String;
17
18/// Content hash of a single stage (function body, type def, ...).
19/// Same string identity as the file under `<root>/stages/<SigId>/
20/// implementations/<StageId>.ast.json`.
21pub type StageId = String;
22
23/// Identity of an operation. `(kind, payload, parents)` SHA-256 in
24/// lowercase hex (64 chars). Two operations with identical payloads
25/// and parent sets produce identical `OpId`s; the store dedupes on
26/// this.
27pub type OpId = String;
28
29/// Sorted set of effect-kind strings (e.g. `["fs_write", "io"]`).
30/// `BTreeSet` so the canonical form is order-independent for
31/// hashing.
32pub type EffectSet = BTreeSet<String>;
33
34/// Reference to an imported module — either a stdlib name
35/// (`std.io`) or a local path (`./helpers`). Kept as a string so
36/// this crate doesn't pull in `lex-syntax`'s parser.
37pub type ModuleRef = String;
38
39/// Version tag for the operation canonical form (#244).
40///
41/// The pre-image bytes hashed to derive an `OpId` are not stable
42/// across schema evolutions: adding a field to `OperationKind` or
43/// changing its serde representation rotates every existing `OpId`.
44/// This enum tags the encoding used so a long-lived store can detect
45/// mismatches and migrate explicitly via [`crate::migrate`].
46///
47/// **Today only [`Self::V1`] is in production.** Adding a future
48/// variant requires:
49///
50/// 1. A new arm in [`Operation::canonical_bytes_in`].
51/// 2. An update to the canonical-form spec in [`crate::canonical`].
52/// 3. A `CHANGELOG.md` entry under `### Internal` calling out the
53///    `OpId` rotation.
54/// 4. A migration recipe via [`crate::migrate::plan_migration`] —
55///    the mechanism is encoder-agnostic, but each new variant needs
56///    its own `canonical_bytes_in` arm.
57#[derive(
58    Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize, Default,
59)]
60#[serde(rename_all = "lowercase")]
61pub enum OperationFormat {
62    #[default]
63    V1,
64}
65
66impl OperationFormat {
67    /// The format every newly-emitted op uses today.
68    pub const CURRENT: OperationFormat = OperationFormat::V1;
69
70    /// `true` for the implicit format (V1). Used by the
71    /// `skip_serializing_if` hook on [`OperationRecord::format_version`]
72    /// so existing V1 stores keep byte-identical on-disk JSON —
73    /// adding the version field doesn't itself rotate any `OpId`.
74    pub fn is_implicit(&self) -> bool {
75        matches!(self, OperationFormat::V1)
76    }
77}
78
79/// Effect of applying an operation on a stage's content-addressed
80/// identity. Used as the `produces` field of an [`OperationRecord`]
81/// so consumers can answer "after this op, what's the head stage
82/// for this SigId?" without rerunning the apply step.
83#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
84#[serde(tag = "kind", rename_all = "snake_case")]
85pub enum StageTransition {
86    /// New SigId; produces a stage that didn't exist before.
87    Create { sig_id: SigId, stage_id: StageId },
88    /// Existing SigId; replaces its head stage.
89    Replace { sig_id: SigId, from: StageId, to: StageId },
90    /// SigId removed; no head stage afterwards.
91    Remove { sig_id: SigId, last: StageId },
92    /// SigId renamed; same body hash, different signature identity.
93    Rename { from: SigId, to: SigId, body_stage_id: StageId },
94    /// Import-only change; doesn't touch any stage.
95    ImportOnly,
96    /// Merge op result. `entries` lists only the sigs whose head
97    /// changed relative to the merge op's first parent (`dst_head`):
98    /// `Some(stage_id)` sets the head; `None` removes the sig.
99    /// Sigs unaffected by the merge are not listed.
100    ///
101    /// **Canonical-form contract:** `BTreeMap` is load-bearing —
102    /// iteration is sorted by `SigId`, so on-disk JSON for two
103    /// callers that resolved the same conflicts in different
104    /// orders produces byte-identical output. Switching to
105    /// `HashMap` here would break canonical stability of the
106    /// `OperationRecord` JSON file and is rejected by the
107    /// canonical-form spec in `crate::canonical`.
108    Merge {
109        entries: BTreeMap<SigId, Option<StageId>>,
110    },
111}
112
113/// The kinds of operations that produce stage transitions. Mirrors
114/// the initial set in #129; new kinds (`MoveBetweenFiles`,
115/// `SplitFunction`, `ExtractType`) can be added later as long as
116/// they're appended at the end of this enum or use explicit
117/// `#[serde(rename = "...")]` tags so existing `OpId`s stay stable.
118#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
119#[serde(tag = "op", rename_all = "snake_case")]
120pub enum OperationKind {
121    /// New function published. `effects` is the effect set declared
122    /// in the signature; tracked here (not just inside the stage)
123    /// so #130's write-time gate has a cheap path to check effect
124    /// changes without rehydrating the AST.
125    ///
126    /// `budget_cost` (#247) records the function's declared
127    /// `[budget(N)]` cost. Optional with `skip_serializing_if`, so
128    /// pre-#247 ops without a declared budget continue to hash to
129    /// their original `OpId` (additive serialization, same trick
130    /// `intent_id` uses). `None` means the function declared no
131    /// budget effect; `Some(n)` is the literal `n` from
132    /// `[budget(n)]`.
133    AddFunction {
134        sig_id: SigId,
135        stage_id: StageId,
136        effects: EffectSet,
137        #[serde(default, skip_serializing_if = "Option::is_none")]
138        budget_cost: Option<u64>,
139    },
140    /// Function removed; `last_stage_id` is the head before the
141    /// remove (so blame can walk the predecessor without scanning).
142    RemoveFunction {
143        sig_id: SigId,
144        last_stage_id: StageId,
145    },
146    /// Function body changed; signature unchanged.
147    ///
148    /// `from_budget` / `to_budget` (#247) record the declared
149    /// `[budget(N)]` on each side. Same `Option` + `skip` discipline
150    /// as `AddFunction.budget_cost` — pre-#247 ops keep their
151    /// `OpId`s. The pair is what `lex op log --budget-drift` reads
152    /// to surface "budget grew/shrank" diffs without rehydrating
153    /// stages.
154    ModifyBody {
155        sig_id: SigId,
156        from_stage_id: StageId,
157        to_stage_id: StageId,
158        #[serde(default, skip_serializing_if = "Option::is_none")]
159        from_budget: Option<u64>,
160        #[serde(default, skip_serializing_if = "Option::is_none")]
161        to_budget: Option<u64>,
162    },
163    /// Symbol renamed. The body hash is preserved (`body_stage_id`)
164    /// so two renames of the same body collapse to the same OpId
165    /// and `lex blame` walks the rename as a single causal event
166    /// rather than `delete + add`.
167    RenameSymbol {
168        from: SigId,
169        to: SigId,
170        body_stage_id: StageId,
171    },
172    /// Effect signature changed. Captures both old and new effect
173    /// sets so the write-time gate (#130) can verify importers
174    /// haven't silently broken.
175    ///
176    /// `from_budget` / `to_budget` (#247) capture the declared
177    /// `[budget(N)]` on each side. ChangeEffectSig usually fires
178    /// because the effect *list* changed; #247 makes budget drift
179    /// visible without forcing a full effect-set diff.
180    ChangeEffectSig {
181        sig_id: SigId,
182        from_stage_id: StageId,
183        to_stage_id: StageId,
184        from_effects: EffectSet,
185        to_effects: EffectSet,
186        #[serde(default, skip_serializing_if = "Option::is_none")]
187        from_budget: Option<u64>,
188        #[serde(default, skip_serializing_if = "Option::is_none")]
189        to_budget: Option<u64>,
190    },
191    /// Import added to a file. `in_file` is the canonical path
192    /// (relative to the repo root, forward-slashes) so two
193    /// machines hashing the same edit get the same OpId.
194    AddImport {
195        in_file: String,
196        module: ModuleRef,
197    },
198    RemoveImport {
199        in_file: String,
200        module: ModuleRef,
201    },
202    AddType {
203        sig_id: SigId,
204        stage_id: StageId,
205    },
206    RemoveType {
207        sig_id: SigId,
208        last_stage_id: StageId,
209    },
210    ModifyType {
211        sig_id: SigId,
212        from_stage_id: StageId,
213        to_stage_id: StageId,
214    },
215    /// Merge of two branch heads. Carries only an informational count
216    /// of resolved sigs so two structurally identical merges of
217    /// different sizes don't collide on op_id; the per-sig deltas live
218    /// in `OperationRecord::produces` (`StageTransition::Merge`).
219    Merge {
220        resolved: usize,
221    },
222}
223
224impl OperationKind {
225    /// The `(SigId, Option<StageId>)` an op kind targets, as used by
226    /// `StageTransition::Merge::entries`. Used by the merge-commit
227    /// path (#134) to translate a `Resolution::Custom { op }` into
228    /// the head-map delta the merge op records:
229    ///
230    /// * Adds → `(sig, Some(stage_id))`
231    /// * Modifies → `(sig, Some(to_stage_id))`
232    /// * Removes → `(sig, None)`
233    /// * Renames → `(to_sig, Some(body_stage_id))`
234    /// * `AddImport` / `RemoveImport` / nested `Merge` → `None`
235    ///   (no single sig→stage delta)
236    pub fn merge_target(&self) -> Option<(SigId, Option<StageId>)> {
237        use OperationKind::*;
238        match self {
239            AddFunction { sig_id, stage_id, .. }
240            | AddType { sig_id, stage_id }
241                => Some((sig_id.clone(), Some(stage_id.clone()))),
242            ModifyBody { sig_id, to_stage_id, .. }
243            | ChangeEffectSig { sig_id, to_stage_id, .. }
244            | ModifyType { sig_id, to_stage_id, .. }
245                => Some((sig_id.clone(), Some(to_stage_id.clone()))),
246            RemoveFunction { sig_id, .. }
247            | RemoveType { sig_id, .. }
248                => Some((sig_id.clone(), None)),
249            RenameSymbol { to, body_stage_id, .. }
250                => Some((to.clone(), Some(body_stage_id.clone()))),
251            AddImport { .. } | RemoveImport { .. } | Merge { .. } => None,
252        }
253    }
254
255    /// `(from_budget, to_budget)` for ops that carry a budget delta
256    /// (#247). `(None, None)` for ops where the budget isn't part
257    /// of the canonical payload — `RemoveFunction`, `RenameSymbol`,
258    /// imports, and merges. `AddFunction` reports `(None,
259    /// Some(cost))` for "this is the initial cost." Used by `lex op
260    /// show`, `lex op log --budget-drift`, and `lex audit --budget`.
261    pub fn budget_delta(&self) -> (Option<u64>, Option<u64>) {
262        use OperationKind::*;
263        match self {
264            AddFunction { budget_cost, .. } => (None, *budget_cost),
265            ModifyBody { from_budget, to_budget, .. }
266            | ChangeEffectSig { from_budget, to_budget, .. } => (*from_budget, *to_budget),
267            _ => (None, None),
268        }
269    }
270
271    /// The `SigId` an op touches if it carries a budget — used for
272    /// per-sig audit rollups in `lex audit --budget`. Returns `None`
273    /// for ops without a relevant budget (the same set as the
274    /// `_ => (None, None)` arm of [`Self::budget_delta`]).
275    pub fn budget_sig(&self) -> Option<&SigId> {
276        use OperationKind::*;
277        match self {
278            AddFunction { sig_id, .. }
279            | ModifyBody { sig_id, .. }
280            | ChangeEffectSig { sig_id, .. } => Some(sig_id),
281            _ => None,
282        }
283    }
284}
285
286/// Extract the declared `[budget(N)]` integer from an [`EffectSet`],
287/// if any (#247).
288///
289/// Effect labels in [`EffectSet`] are produced by
290/// [`crate::compute_diff::effect_label`]: a `[budget(50)]`
291/// declaration becomes the literal string `"budget(50)"`. This
292/// helper parses that literal back to the integer; bare `"budget"`
293/// (no arg) returns `None` because the magnitude is unknown. A
294/// stage with multiple budget declarations — which the type-
295/// checker should reject anyway — picks the smallest, conservative
296/// answer for `lex audit --budget`.
297pub fn budget_from_effects(effects: &EffectSet) -> Option<u64> {
298    let mut min_cost: Option<u64> = None;
299    for label in effects {
300        let Some(rest) = label.strip_prefix("budget(") else { continue };
301        let Some(inner) = rest.strip_suffix(')') else { continue };
302        let Ok(n) = inner.parse::<u64>() else { continue };
303        min_cost = Some(min_cost.map(|c| c.min(n)).unwrap_or(n));
304    }
305    min_cost
306}
307
308/// The operation as a whole — its kind and the causal predecessors
309/// it assumes. The `OpId` is computed from this plus a sorted view
310/// of `parents`.
311///
312/// Operations without parents are valid and represent "applies to
313/// the empty repository" or "applies to the synthetic genesis
314/// state." `lex store migrate v1→v2` will produce parentless ops
315/// for stages it can't trace back to a clear predecessor.
316#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
317pub struct Operation {
318    #[serde(flatten)]
319    pub kind: OperationKind,
320    /// Operations whose `produces` this op assumes. Sorted before
321    /// hashing for canonical form. Empty for ops against the empty
322    /// repo.
323    #[serde(default, skip_serializing_if = "Vec::is_empty")]
324    pub parents: Vec<OpId>,
325    /// The intent that caused this op, if known. Optional because
326    /// operations produced outside an agent harness (e.g. a human
327    /// running `lex publish` directly) don't have one.
328    ///
329    /// Including the intent in the canonical hash means the same
330    /// logical change made under different intents produces
331    /// different `OpId`s — causally distinct events should hash
332    /// distinctly. Ops with `intent_id: None` keep their existing
333    /// hashes (the field is omitted from the canonical JSON via
334    /// `skip_serializing_if`), so this is backwards-compatible
335    /// for stores written before #131.
336    #[serde(default, skip_serializing_if = "Option::is_none")]
337    pub intent_id: Option<crate::intent::IntentId>,
338}
339
340impl Operation {
341    /// Construct an operation against zero or more parents. Caller
342    /// supplies parents in any order; canonicalization sorts them
343    /// before hashing.
344    pub fn new(kind: OperationKind, parents: impl IntoIterator<Item = OpId>) -> Self {
345        let mut parents: Vec<OpId> = parents.into_iter().collect();
346        parents.sort();
347        parents.dedup();
348        Self { kind, parents, intent_id: None }
349    }
350
351    /// Tag this operation with the intent that produced it. The
352    /// builder shape keeps existing call sites untouched; agent
353    /// harnesses that record intent call this once before
354    /// applying the op.
355    pub fn with_intent(mut self, intent_id: impl Into<crate::intent::IntentId>) -> Self {
356        self.intent_id = Some(intent_id.into());
357        self
358    }
359
360    /// Compute this operation's content-addressed identity under the
361    /// current production canonical form ([`OperationFormat::CURRENT`]).
362    ///
363    /// Stable across runs and machines: same `(kind, payload,
364    /// sorted parents, intent_id)` produces the same `OpId`. The
365    /// invariant #129's automatic-dedup behavior relies on.
366    pub fn op_id(&self) -> OpId {
367        self.op_id_in(OperationFormat::CURRENT)
368    }
369
370    /// Compute the `OpId` under a specific canonical-form version.
371    ///
372    /// Used by [`crate::migrate`] to derive new `OpId`s when porting
373    /// a store across format versions. Production code should call
374    /// [`Self::op_id`].
375    pub fn op_id_in(&self, format: OperationFormat) -> OpId {
376        canonical::hash_bytes(&self.canonical_bytes_in(format))
377    }
378
379    /// The byte sequence that gets hashed to produce [`Self::op_id`]
380    /// under the current canonical form. Equivalent to
381    /// `self.canonical_bytes_in(OperationFormat::CURRENT)`.
382    ///
383    /// Exposed (not just consumed by `op_id`) so golden tests can pin
384    /// the exact pre-image. **Not** equal to `serde_json::to_vec(&op)`
385    /// in general — the on-disk JSON skips empty `parents` and
386    /// `None` `intent_id`, while the canonical form always emits a
387    /// (sorted, deduped) `parents` array. See `canonical.rs` for the
388    /// full V1 canonical-form spec.
389    pub fn canonical_bytes(&self) -> Vec<u8> {
390        self.canonical_bytes_in(OperationFormat::CURRENT)
391    }
392
393    /// The pre-image hashed under a specific canonical-form version.
394    ///
395    /// Today every `OperationFormat` variant routes to V1's encoder
396    /// (only V1 exists in production). When V2 lands, this match
397    /// gains an arm and the migration tool's encoder closure routes
398    /// here.
399    pub fn canonical_bytes_in(&self, format: OperationFormat) -> Vec<u8> {
400        match format {
401            OperationFormat::V1 => self.canonical_bytes_v1(),
402        }
403    }
404
405    fn canonical_bytes_v1(&self) -> Vec<u8> {
406        // Build a transient hashable view rather than hashing
407        // `self` directly so the parent ordering is canonical
408        // even if a caller hand-constructs an `Operation` with
409        // unsorted parents.
410        let canonical = CanonicalView {
411            kind: &self.kind,
412            parents: self.parents.iter().collect::<IndexSet<_>>().into_iter().collect::<BTreeSet<_>>(),
413            intent_id: self.intent_id.as_deref(),
414        };
415        serde_json::to_vec(&canonical).expect("canonical serialization")
416    }
417}
418
419/// Hashable shadow of [`Operation`] with parents in a `BTreeSet` so
420/// the serialization is order-independent regardless of how the
421/// caller constructed the live operation. Never persisted; lives
422/// only as a transient for hashing.
423#[derive(Serialize)]
424struct CanonicalView<'a> {
425    #[serde(flatten)]
426    kind: &'a OperationKind,
427    parents: BTreeSet<&'a OpId>,
428    /// `skip_serializing_if = "Option::is_none"` keeps existing
429    /// `OpId`s stable for ops without an intent — the field is
430    /// omitted from the canonical JSON entirely.
431    #[serde(skip_serializing_if = "Option::is_none")]
432    intent_id: Option<&'a str>,
433}
434
435/// An operation paired with its computed `OpId` and the resulting
436/// stage transition. This is what gets persisted under
437/// `<root>/ops/<OpId>.json`.
438///
439/// `format_version` records the canonical form the `op_id` was
440/// computed under. Pre-#244 stores didn't emit this field; reading
441/// such records deserializes to [`OperationFormat::V1`] (the
442/// implicit pre-versioning format), and writing V1 records continues
443/// to omit it (`skip_serializing_if = is_implicit`) so adding the
444/// field doesn't rotate any existing `OpId` or change any on-disk
445/// byte. Records written under a future format will explicitly
446/// carry their version tag.
447#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
448pub struct OperationRecord {
449    pub op_id: OpId,
450    #[serde(default, skip_serializing_if = "OperationFormat::is_implicit")]
451    pub format_version: OperationFormat,
452    #[serde(flatten)]
453    pub op: Operation,
454    pub produces: StageTransition,
455}
456
457impl OperationRecord {
458    pub fn new(op: Operation, produces: StageTransition) -> Self {
459        let op_id = op.op_id();
460        Self { op_id, format_version: OperationFormat::CURRENT, op, produces }
461    }
462}
463
464#[cfg(test)]
465mod tests {
466    use super::*;
467
468    fn add_factorial() -> OperationKind {
469        OperationKind::AddFunction {
470            sig_id: "fac::Int->Int".into(),
471            stage_id: "abc123".into(),
472            effects: BTreeSet::new(),
473            budget_cost: None,
474        }
475    }
476
477    #[test]
478    fn identical_operations_have_identical_op_ids() {
479        let a = Operation::new(add_factorial(), []);
480        let b = Operation::new(add_factorial(), []);
481        assert_eq!(a.op_id(), b.op_id());
482    }
483
484    #[test]
485    fn different_operations_have_different_op_ids() {
486        let a = Operation::new(add_factorial(), []);
487        let b = Operation::new(
488            OperationKind::AddFunction {
489                sig_id: "double::Int->Int".into(),
490                stage_id: "abc123".into(),
491                effects: BTreeSet::new(),
492                budget_cost: None,
493            },
494            [],
495        );
496        assert_ne!(a.op_id(), b.op_id());
497    }
498
499    #[test]
500    fn parent_set_changes_op_id() {
501        let no_parent = Operation::new(add_factorial(), []);
502        let with_parent = Operation::new(add_factorial(), ["op-parent-1".into()]);
503        assert_ne!(no_parent.op_id(), with_parent.op_id());
504    }
505
506    #[test]
507    fn parent_order_does_not_affect_op_id() {
508        let a = Operation::new(add_factorial(), ["b".into(), "a".into(), "c".into()]);
509        let b = Operation::new(add_factorial(), ["c".into(), "a".into(), "b".into()]);
510        assert_eq!(a.op_id(), b.op_id());
511        // and the stored form is sorted.
512        assert_eq!(a.parents, vec!["a".to_string(), "b".to_string(), "c".to_string()]);
513    }
514
515    #[test]
516    fn duplicate_parents_are_deduped() {
517        let with_dups = Operation::new(
518            add_factorial(),
519            ["a".into(), "a".into(), "b".into()],
520        );
521        let no_dups = Operation::new(
522            add_factorial(),
523            ["a".into(), "b".into()],
524        );
525        assert_eq!(with_dups.op_id(), no_dups.op_id());
526        assert_eq!(with_dups.parents, vec!["a".to_string(), "b".to_string()]);
527    }
528
529    #[test]
530    fn rename_with_same_body_hashes_equal_across_runs() {
531        // Two independent runs producing the same rename against the
532        // same parent should produce the same OpId — this is the
533        // automatic-dedup property #129 relies on for distributed
534        // agents.
535        let kind = OperationKind::RenameSymbol {
536            from: "parse::Str->Int".into(),
537            to: "parse_int::Str->Int".into(),
538            body_stage_id: "abc123".into(),
539        };
540        let a = Operation::new(kind.clone(), ["op-parent".into()]);
541        let b = Operation::new(kind, ["op-parent".into()]);
542        assert_eq!(a.op_id(), b.op_id());
543    }
544
545    #[test]
546    fn rename_does_not_collide_with_delete_plus_add() {
547        // The whole point of `RenameSymbol` is that it's a different
548        // OpId from the (semantically-equivalent) `RemoveFunction +
549        // AddFunction` pair. Causal history sees one event, not two.
550        let rename = Operation::new(
551            OperationKind::RenameSymbol {
552                from: "parse::Str->Int".into(),
553                to: "parse_int::Str->Int".into(),
554                body_stage_id: "abc123".into(),
555            },
556            ["op-parent".into()],
557        );
558        let remove = Operation::new(
559            OperationKind::RemoveFunction {
560                sig_id: "parse::Str->Int".into(),
561                last_stage_id: "abc123".into(),
562            },
563            ["op-parent".into()],
564        );
565        let add = Operation::new(
566            OperationKind::AddFunction {
567                sig_id: "parse_int::Str->Int".into(),
568                stage_id: "abc123".into(),
569                effects: BTreeSet::new(),
570                budget_cost: None,
571            },
572            ["op-parent".into()],
573        );
574        assert_ne!(rename.op_id(), remove.op_id());
575        assert_ne!(rename.op_id(), add.op_id());
576    }
577
578    #[test]
579    fn effect_set_order_does_not_affect_op_id() {
580        // Effects are a BTreeSet so iteration is sorted. Build two
581        // ops via different insertion orders and confirm the
582        // canonical form is identical.
583        let a_effects: EffectSet = ["io".into(), "fs_write".into()].into_iter().collect();
584        let b_effects: EffectSet = ["fs_write".into(), "io".into()].into_iter().collect();
585        let a = Operation::new(
586            OperationKind::AddFunction {
587                sig_id: "x".into(), stage_id: "s".into(), effects: a_effects,
588                budget_cost: None,
589            },
590            [],
591        );
592        let b = Operation::new(
593            OperationKind::AddFunction {
594                sig_id: "x".into(), stage_id: "s".into(), effects: b_effects,
595                budget_cost: None,
596            },
597            [],
598        );
599        assert_eq!(a.op_id(), b.op_id());
600    }
601
602    #[test]
603    fn op_id_is_64_char_lowercase_hex() {
604        let id = Operation::new(add_factorial(), []).op_id();
605        assert_eq!(id.len(), 64);
606        assert!(id.chars().all(|c| c.is_ascii_digit() || ('a'..='f').contains(&c)));
607    }
608
609    #[test]
610    fn round_trip_through_serde_json() {
611        let op = Operation::new(
612            OperationKind::ChangeEffectSig {
613                sig_id: "f".into(),
614                from_stage_id: "old".into(),
615                to_stage_id: "new".into(),
616                from_effects: BTreeSet::new(),
617                to_effects: ["io".into()].into_iter().collect(),
618                from_budget: None,
619                to_budget: None,
620            },
621            ["op-parent".into()],
622        );
623        let json = serde_json::to_string(&op).expect("serialize");
624        let back: Operation = serde_json::from_str(&json).expect("deserialize");
625        assert_eq!(op, back);
626        assert_eq!(op.op_id(), back.op_id());
627    }
628
629    #[test]
630    fn operation_record_carries_op_id() {
631        let op = Operation::new(add_factorial(), []);
632        let expected = op.op_id();
633        let rec = OperationRecord::new(
634            op,
635            StageTransition::Create {
636                sig_id: "fac::Int->Int".into(),
637                stage_id: "abc123".into(),
638            },
639        );
640        assert_eq!(rec.op_id, expected);
641    }
642
643    #[test]
644    fn intent_id_is_part_of_op_id_canonical_hash() {
645        // The dedup property: same `(kind, parents, intent_id)`
646        // produces the same OpId. Different intent_ids on
647        // otherwise-identical ops produce different OpIds, so
648        // causally distinct events (different prompts) hash
649        // distinctly.
650        let no_intent = Operation::new(add_factorial(), []);
651        let with_intent_a = Operation::new(add_factorial(), [])
652            .with_intent("intent-a");
653        let with_intent_b = Operation::new(add_factorial(), [])
654            .with_intent("intent-b");
655        let with_intent_a_again = Operation::new(add_factorial(), [])
656            .with_intent("intent-a");
657
658        // No-intent op is distinct from any intent-tagged variant.
659        assert_ne!(no_intent.op_id(), with_intent_a.op_id());
660        // Different intents → different OpIds.
661        assert_ne!(with_intent_a.op_id(), with_intent_b.op_id());
662        // Same intent → same OpId (the load-bearing dedup invariant).
663        assert_eq!(with_intent_a.op_id(), with_intent_a_again.op_id());
664    }
665
666    #[test]
667    fn op_without_intent_keeps_pre_intent_op_id() {
668        // Backwards-compat invariant: an op constructed without an
669        // intent must hash to the same value as it would have
670        // before #131 added the field. The golden test below pins
671        // the exact hash; this one asserts that adding then
672        // resetting to None doesn't drift.
673        let mut op = Operation::new(add_factorial(), []);
674        let baseline = op.op_id();
675        op.intent_id = Some("transient".into());
676        let with_intent = op.op_id();
677        assert_ne!(baseline, with_intent);
678        op.intent_id = None;
679        let back = op.op_id();
680        assert_eq!(baseline, back);
681    }
682
683    /// Golden hash. If this changes, the canonical form has shifted
684    /// and *every* op_id in every existing store has changed too —
685    /// that's a major-version event for the data model and should
686    /// be a deliberate decision, not an accident from reordering
687    /// fields. Update with care.
688    #[test]
689    fn canonical_form_is_stable_for_a_known_input() {
690        let op = Operation::new(
691            OperationKind::AddFunction {
692                sig_id: "fac::Int->Int".into(),
693                stage_id: "abc123".into(),
694                effects: BTreeSet::new(),
695                budget_cost: None,
696            },
697            [],
698        );
699        assert_eq!(
700            op.op_id(),
701            "f112990d31ef2a63f3e5ca5680637ed36a54bc7e8230510ae0c0e93fcb39d104"
702        );
703    }
704
705    #[test]
706    fn merge_kind_round_trips() {
707        let op = Operation::new(
708            OperationKind::Merge { resolved: 3 },
709            ["op-a".into(), "op-b".into()],
710        );
711        let json = serde_json::to_string(&op).expect("ser");
712        let back: Operation = serde_json::from_str(&json).expect("de");
713        assert_eq!(op, back);
714        assert_eq!(op.op_id(), back.op_id());
715    }
716
717    #[test]
718    fn merge_stage_transition_round_trips() {
719        let mut entries = BTreeMap::new();
720        entries.insert("sig-a".to_string(), Some("stage-a".to_string()));
721        entries.insert("sig-b".to_string(), None); // removed by merge
722        let t = StageTransition::Merge { entries };
723        let json = serde_json::to_string(&t).expect("ser");
724        let back: StageTransition = serde_json::from_str(&json).expect("de");
725        assert_eq!(t, back);
726    }
727
728    #[test]
729    fn merge_resolved_count_changes_op_id() {
730        // Two merges with the same parents but different resolved counts
731        // must hash differently — keeps structurally distinct merges from
732        // colliding on op_id.
733        let parents: Vec<OpId> = vec!["op-a".into(), "op-b".into()];
734        let one = Operation::new(OperationKind::Merge { resolved: 1 }, parents.clone());
735        let two = Operation::new(OperationKind::Merge { resolved: 2 }, parents);
736        assert_ne!(one.op_id(), two.op_id());
737    }
738
739    #[test]
740    fn existing_add_function_op_id_is_unchanged_after_merge_added() {
741        // Constructing the new Merge variant in the same enum must not
742        // perturb the canonical bytes of existing variants. The golden
743        // hash test below checks the literal value; this one verifies
744        // the property holds even after a Merge op has been built.
745        let _merge = Operation::new(
746            OperationKind::Merge { resolved: 0 },
747            ["op-x".into(), "op-y".into()],
748        );
749        let op = Operation::new(add_factorial(), []);
750        assert_eq!(
751            op.op_id(),
752            "f112990d31ef2a63f3e5ca5680637ed36a54bc7e8230510ae0c0e93fcb39d104"
753        );
754    }
755}