Skip to main content

lex_vcs/
operation.rs

1//! The `Operation` enum + `OperationRecord` (operation plus its
2//! causal parents and resulting `OpId`).
3//!
4//! See `lib.rs` for the design context and #129 for the issue.
5
6use indexmap::IndexSet;
7use serde::{Deserialize, Serialize};
8use std::collections::{BTreeMap, BTreeSet};
9
10use crate::canonical;
11
12/// Signature identity of a function or type — the part that stays
13/// stable across body edits. Wraps the same string identity
14/// `lex-store` uses; we keep it as `String` here so this crate has
15/// no dependency on `lex-store`'s internals.
16pub type SigId = String;
17
18/// Content hash of a single stage (function body, type def, ...).
19/// Same string identity as the file under `<root>/stages/<SigId>/
20/// implementations/<StageId>.ast.json`.
21pub type StageId = String;
22
23/// Identity of an operation. `(kind, payload, parents)` SHA-256 in
24/// lowercase hex (64 chars). Two operations with identical payloads
25/// and parent sets produce identical `OpId`s; the store dedupes on
26/// this.
27pub type OpId = String;
28
29/// Sorted set of effect-kind strings (e.g. `["fs_write", "io"]`).
30/// `BTreeSet` so the canonical form is order-independent for
31/// hashing.
32pub type EffectSet = BTreeSet<String>;
33
34/// Reference to an imported module — either a stdlib name
35/// (`std.io`) or a local path (`./helpers`). Kept as a string so
36/// this crate doesn't pull in `lex-syntax`'s parser.
37pub type ModuleRef = String;
38
39/// Version tag for the operation canonical form (#244).
40///
41/// The pre-image bytes hashed to derive an `OpId` are not stable
42/// across schema evolutions: adding a field to `OperationKind` or
43/// changing its serde representation rotates every existing `OpId`.
44/// This enum tags the encoding used so a long-lived store can detect
45/// mismatches and migrate explicitly via [`crate::migrate`].
46///
47/// **Today only [`Self::V1`] is in production.** Adding a future
48/// variant requires:
49///
50/// 1. A new arm in [`Operation::canonical_bytes_in`].
51/// 2. An update to the canonical-form spec in [`crate::canonical`].
52/// 3. A `CHANGELOG.md` entry under `### Internal` calling out the
53///    `OpId` rotation.
54/// 4. A migration recipe via [`crate::migrate::plan_migration`] —
55///    the mechanism is encoder-agnostic, but each new variant needs
56///    its own `canonical_bytes_in` arm.
57#[derive(
58    Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize, Default,
59)]
60#[serde(rename_all = "lowercase")]
61pub enum OperationFormat {
62    #[default]
63    V1,
64}
65
66impl OperationFormat {
67    /// The format every newly-emitted op uses today.
68    pub const CURRENT: OperationFormat = OperationFormat::V1;
69
70    /// `true` for the implicit format (V1). Used by the
71    /// `skip_serializing_if` hook on [`OperationRecord::format_version`]
72    /// so existing V1 stores keep byte-identical on-disk JSON —
73    /// adding the version field doesn't itself rotate any `OpId`.
74    pub fn is_implicit(&self) -> bool {
75        matches!(self, OperationFormat::V1)
76    }
77}
78
79/// Effect of applying an operation on a stage's content-addressed
80/// identity. Used as the `produces` field of an [`OperationRecord`]
81/// so consumers can answer "after this op, what's the head stage
82/// for this SigId?" without rerunning the apply step.
83#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
84#[serde(tag = "kind", rename_all = "snake_case")]
85pub enum StageTransition {
86    /// New SigId; produces a stage that didn't exist before.
87    Create { sig_id: SigId, stage_id: StageId },
88    /// Existing SigId; replaces its head stage.
89    Replace { sig_id: SigId, from: StageId, to: StageId },
90    /// SigId removed; no head stage afterwards.
91    Remove { sig_id: SigId, last: StageId },
92    /// SigId renamed; same body hash, different signature identity.
93    Rename { from: SigId, to: SigId, body_stage_id: StageId },
94    /// Import-only change; doesn't touch any stage.
95    ImportOnly,
96    /// Merge op result. `entries` lists only the sigs whose head
97    /// changed relative to the merge op's first parent (`dst_head`):
98    /// `Some(stage_id)` sets the head; `None` removes the sig.
99    /// Sigs unaffected by the merge are not listed.
100    ///
101    /// **Canonical-form contract:** `BTreeMap` is load-bearing —
102    /// iteration is sorted by `SigId`, so on-disk JSON for two
103    /// callers that resolved the same conflicts in different
104    /// orders produces byte-identical output. Switching to
105    /// `HashMap` here would break canonical stability of the
106    /// `OperationRecord` JSON file and is rejected by the
107    /// canonical-form spec in `crate::canonical`.
108    Merge {
109        entries: BTreeMap<SigId, Option<StageId>>,
110    },
111}
112
113/// The kinds of operations that produce stage transitions. Mirrors
114/// the initial set in #129; new kinds (`MoveBetweenFiles`,
115/// `SplitFunction`, `ExtractType`) can be added later as long as
116/// they're appended at the end of this enum or use explicit
117/// `#[serde(rename = "...")]` tags so existing `OpId`s stay stable.
118#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
119#[serde(tag = "op", rename_all = "snake_case")]
120pub enum OperationKind {
121    /// New function published. `effects` is the effect set declared
122    /// in the signature; tracked here (not just inside the stage)
123    /// so #130's write-time gate has a cheap path to check effect
124    /// changes without rehydrating the AST.
125    ///
126    /// `budget_cost` (#247) records the function's declared
127    /// `[budget(N)]` cost. Optional with `skip_serializing_if`, so
128    /// pre-#247 ops without a declared budget continue to hash to
129    /// their original `OpId` (additive serialization, same trick
130    /// `intent_id` uses). `None` means the function declared no
131    /// budget effect; `Some(n)` is the literal `n` from
132    /// `[budget(n)]`.
133    AddFunction {
134        sig_id: SigId,
135        stage_id: StageId,
136        effects: EffectSet,
137        #[serde(default, skip_serializing_if = "Option::is_none")]
138        budget_cost: Option<u64>,
139    },
140    /// Function removed; `last_stage_id` is the head before the
141    /// remove (so blame can walk the predecessor without scanning).
142    RemoveFunction {
143        sig_id: SigId,
144        last_stage_id: StageId,
145    },
146    /// Function body changed; signature unchanged.
147    ///
148    /// `from_budget` / `to_budget` (#247) record the declared
149    /// `[budget(N)]` on each side. Same `Option` + `skip` discipline
150    /// as `AddFunction.budget_cost` — pre-#247 ops keep their
151    /// `OpId`s. The pair is what `lex op log --budget-drift` reads
152    /// to surface "budget grew/shrank" diffs without rehydrating
153    /// stages.
154    ModifyBody {
155        sig_id: SigId,
156        from_stage_id: StageId,
157        to_stage_id: StageId,
158        #[serde(default, skip_serializing_if = "Option::is_none")]
159        from_budget: Option<u64>,
160        #[serde(default, skip_serializing_if = "Option::is_none")]
161        to_budget: Option<u64>,
162    },
163    /// Symbol renamed. The body hash is preserved (`body_stage_id`)
164    /// so two renames of the same body collapse to the same OpId
165    /// and `lex blame` walks the rename as a single causal event
166    /// rather than `delete + add`.
167    RenameSymbol {
168        from: SigId,
169        to: SigId,
170        body_stage_id: StageId,
171    },
172    /// Effect signature changed. Captures both old and new effect
173    /// sets so the write-time gate (#130) can verify importers
174    /// haven't silently broken.
175    ///
176    /// `from_budget` / `to_budget` (#247) capture the declared
177    /// `[budget(N)]` on each side. ChangeEffectSig usually fires
178    /// because the effect *list* changed; #247 makes budget drift
179    /// visible without forcing a full effect-set diff.
180    ChangeEffectSig {
181        sig_id: SigId,
182        from_stage_id: StageId,
183        to_stage_id: StageId,
184        from_effects: EffectSet,
185        to_effects: EffectSet,
186        #[serde(default, skip_serializing_if = "Option::is_none")]
187        from_budget: Option<u64>,
188        #[serde(default, skip_serializing_if = "Option::is_none")]
189        to_budget: Option<u64>,
190    },
191    /// Import added to a file. `in_file` is the canonical path
192    /// (relative to the repo root, forward-slashes) so two
193    /// machines hashing the same edit get the same OpId.
194    AddImport {
195        in_file: String,
196        module: ModuleRef,
197    },
198    RemoveImport {
199        in_file: String,
200        module: ModuleRef,
201    },
202    AddType {
203        sig_id: SigId,
204        stage_id: StageId,
205    },
206    RemoveType {
207        sig_id: SigId,
208        last_stage_id: StageId,
209    },
210    ModifyType {
211        sig_id: SigId,
212        from_stage_id: StageId,
213        to_stage_id: StageId,
214    },
215    /// Merge of two branch heads. Carries only an informational count
216    /// of resolved sigs so two structurally identical merges of
217    /// different sizes don't collide on op_id; the per-sig deltas live
218    /// in `OperationRecord::produces` (`StageTransition::Merge`).
219    Merge {
220        resolved: usize,
221    },
222    /// Typed transform: inlined a `let x := v; body` by
223    /// substituting `v` for every unshadowed `x` in `body`, then
224    /// replacing the entire `Let` node with the substituted body
225    /// (#280). The op records the let-binding's position and the
226    /// inlined name; the actual substituted value lives in the
227    /// content-addressed `to_stage_id` so the op_id stays compact.
228    InlineLet {
229        sig_id: SigId,
230        from_stage_id: StageId,
231        to_stage_id: StageId,
232        let_node: String,
233        binding_name: String,
234        #[serde(default, skip_serializing_if = "Option::is_none")]
235        from_budget: Option<u64>,
236        #[serde(default, skip_serializing_if = "Option::is_none")]
237        to_budget: Option<u64>,
238    },
239    /// Typed transform: renamed a `let`-bound local within a fn
240    /// body (#280). Records the old/new identifiers and the position
241    /// of the let-binding in the AST. Body-shape-stable: the renamed
242    /// stage typically hashes near the original.
243    RenameLocal {
244        sig_id: SigId,
245        from_stage_id: StageId,
246        to_stage_id: StageId,
247        /// Path-style NodeId of the `Let` expression at the time of
248        /// the transform.
249        let_node: String,
250        old_name: String,
251        new_name: String,
252        #[serde(default, skip_serializing_if = "Option::is_none")]
253        from_budget: Option<u64>,
254        #[serde(default, skip_serializing_if = "Option::is_none")]
255        to_budget: Option<u64>,
256    },
257    /// Typed transform: replaced one arm's body in a `Match`
258    /// expression (#280). Semantically a `ModifyBody`, but the op
259    /// records *which* arm changed and *where* in the AST — so the
260    /// op log reads as a semantic edit history rather than as
261    /// opaque hash-to-hash bytes.
262    ///
263    /// `match_node` is the [`lex_ast::ids::NodeId`] of the Match
264    /// expression at the time of the transform. NodeIds aren't
265    /// stable across structural edits — they're audit-trail metadata,
266    /// not re-derivation keys. The authoritative record of the new
267    /// stage is `to_stage_id` (content-addressed).
268    ///
269    /// `from_budget`/`to_budget` follow the same `skip_if_none`
270    /// discipline as [`Self::ModifyBody`]: pre-#280 ops continue
271    /// hashing to their original `OpId`s.
272    ReplaceMatchArm {
273        sig_id: SigId,
274        from_stage_id: StageId,
275        to_stage_id: StageId,
276        /// Path-style NodeId of the Match expression that was
277        /// modified, captured at transform time. See
278        /// [`lex_ast::ids::NodeId`] for the format.
279        match_node: String,
280        arm_index: usize,
281        #[serde(default, skip_serializing_if = "Option::is_none")]
282        from_budget: Option<u64>,
283        #[serde(default, skip_serializing_if = "Option::is_none")]
284        to_budget: Option<u64>,
285    },
286}
287
288impl OperationKind {
289    /// The `(SigId, Option<StageId>)` an op kind targets, as used by
290    /// `StageTransition::Merge::entries`. Used by the merge-commit
291    /// path (#134) to translate a `Resolution::Custom { op }` into
292    /// the head-map delta the merge op records:
293    ///
294    /// * Adds → `(sig, Some(stage_id))`
295    /// * Modifies → `(sig, Some(to_stage_id))`
296    /// * Removes → `(sig, None)`
297    /// * Renames → `(to_sig, Some(body_stage_id))`
298    /// * `AddImport` / `RemoveImport` / nested `Merge` → `None`
299    ///   (no single sig→stage delta)
300    pub fn merge_target(&self) -> Option<(SigId, Option<StageId>)> {
301        use OperationKind::*;
302        match self {
303            AddFunction { sig_id, stage_id, .. }
304            | AddType { sig_id, stage_id }
305                => Some((sig_id.clone(), Some(stage_id.clone()))),
306            ModifyBody { sig_id, to_stage_id, .. }
307            | ChangeEffectSig { sig_id, to_stage_id, .. }
308            | ModifyType { sig_id, to_stage_id, .. }
309            | ReplaceMatchArm { sig_id, to_stage_id, .. }
310            | RenameLocal { sig_id, to_stage_id, .. }
311            | InlineLet { sig_id, to_stage_id, .. }
312                => Some((sig_id.clone(), Some(to_stage_id.clone()))),
313            RemoveFunction { sig_id, .. }
314            | RemoveType { sig_id, .. }
315                => Some((sig_id.clone(), None)),
316            RenameSymbol { to, body_stage_id, .. }
317                => Some((to.clone(), Some(body_stage_id.clone()))),
318            AddImport { .. } | RemoveImport { .. } | Merge { .. } => None,
319        }
320    }
321
322    /// `(from_budget, to_budget)` for ops that carry a budget delta
323    /// (#247). `(None, None)` for ops where the budget isn't part
324    /// of the canonical payload — `RemoveFunction`, `RenameSymbol`,
325    /// imports, and merges. `AddFunction` reports `(None,
326    /// Some(cost))` for "this is the initial cost." Used by `lex op
327    /// show`, `lex op log --budget-drift`, and `lex audit --budget`.
328    pub fn budget_delta(&self) -> (Option<u64>, Option<u64>) {
329        use OperationKind::*;
330        match self {
331            AddFunction { budget_cost, .. } => (None, *budget_cost),
332            ModifyBody { from_budget, to_budget, .. }
333            | ChangeEffectSig { from_budget, to_budget, .. }
334            | ReplaceMatchArm { from_budget, to_budget, .. }
335            | RenameLocal { from_budget, to_budget, .. }
336            | InlineLet { from_budget, to_budget, .. } => (*from_budget, *to_budget),
337            _ => (None, None),
338        }
339    }
340
341    /// The `SigId` an op touches if it carries a budget — used for
342    /// per-sig audit rollups in `lex audit --budget`. Returns `None`
343    /// for ops without a relevant budget (the same set as the
344    /// `_ => (None, None)` arm of [`Self::budget_delta`]).
345    pub fn budget_sig(&self) -> Option<&SigId> {
346        use OperationKind::*;
347        match self {
348            AddFunction { sig_id, .. }
349            | ModifyBody { sig_id, .. }
350            | ChangeEffectSig { sig_id, .. }
351            | ReplaceMatchArm { sig_id, .. }
352            | RenameLocal { sig_id, .. }
353            | InlineLet { sig_id, .. } => Some(sig_id),
354            _ => None,
355        }
356    }
357}
358
359/// Extract the declared `[budget(N)]` integer from an [`EffectSet`],
360/// if any (#247).
361///
362/// Effect labels in [`EffectSet`] are produced by
363/// [`crate::compute_diff::effect_label`]: a `[budget(50)]`
364/// declaration becomes the literal string `"budget(50)"`. This
365/// helper parses that literal back to the integer; bare `"budget"`
366/// (no arg) returns `None` because the magnitude is unknown. A
367/// stage with multiple budget declarations — which the type-
368/// checker should reject anyway — picks the smallest, conservative
369/// answer for `lex audit --budget`.
370pub fn budget_from_effects(effects: &EffectSet) -> Option<u64> {
371    let mut min_cost: Option<u64> = None;
372    for label in effects {
373        let Some(rest) = label.strip_prefix("budget(") else { continue };
374        let Some(inner) = rest.strip_suffix(')') else { continue };
375        let Ok(n) = inner.parse::<u64>() else { continue };
376        min_cost = Some(min_cost.map(|c| c.min(n)).unwrap_or(n));
377    }
378    min_cost
379}
380
381/// The operation as a whole — its kind and the causal predecessors
382/// it assumes. The `OpId` is computed from this plus a sorted view
383/// of `parents`.
384///
385/// Operations without parents are valid and represent "applies to
386/// the empty repository" or "applies to the synthetic genesis
387/// state." `lex store migrate v1→v2` will produce parentless ops
388/// for stages it can't trace back to a clear predecessor.
389#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
390pub struct Operation {
391    #[serde(flatten)]
392    pub kind: OperationKind,
393    /// Operations whose `produces` this op assumes. Sorted before
394    /// hashing for canonical form. Empty for ops against the empty
395    /// repo.
396    #[serde(default, skip_serializing_if = "Vec::is_empty")]
397    pub parents: Vec<OpId>,
398    /// The intent that caused this op, if known. Optional because
399    /// operations produced outside an agent harness (e.g. a human
400    /// running `lex publish` directly) don't have one.
401    ///
402    /// Including the intent in the canonical hash means the same
403    /// logical change made under different intents produces
404    /// different `OpId`s — causally distinct events should hash
405    /// distinctly. Ops with `intent_id: None` keep their existing
406    /// hashes (the field is omitted from the canonical JSON via
407    /// `skip_serializing_if`), so this is backwards-compatible
408    /// for stores written before #131.
409    #[serde(default, skip_serializing_if = "Option::is_none")]
410    pub intent_id: Option<crate::intent::IntentId>,
411}
412
413impl Operation {
414    /// Construct an operation against zero or more parents. Caller
415    /// supplies parents in any order; canonicalization sorts them
416    /// before hashing.
417    pub fn new(kind: OperationKind, parents: impl IntoIterator<Item = OpId>) -> Self {
418        let mut parents: Vec<OpId> = parents.into_iter().collect();
419        parents.sort();
420        parents.dedup();
421        Self { kind, parents, intent_id: None }
422    }
423
424    /// Tag this operation with the intent that produced it. The
425    /// builder shape keeps existing call sites untouched; agent
426    /// harnesses that record intent call this once before
427    /// applying the op.
428    pub fn with_intent(mut self, intent_id: impl Into<crate::intent::IntentId>) -> Self {
429        self.intent_id = Some(intent_id.into());
430        self
431    }
432
433    /// Compute this operation's content-addressed identity under the
434    /// current production canonical form ([`OperationFormat::CURRENT`]).
435    ///
436    /// Stable across runs and machines: same `(kind, payload,
437    /// sorted parents, intent_id)` produces the same `OpId`. The
438    /// invariant #129's automatic-dedup behavior relies on.
439    pub fn op_id(&self) -> OpId {
440        self.op_id_in(OperationFormat::CURRENT)
441    }
442
443    /// Compute the `OpId` under a specific canonical-form version.
444    ///
445    /// Used by [`crate::migrate`] to derive new `OpId`s when porting
446    /// a store across format versions. Production code should call
447    /// [`Self::op_id`].
448    pub fn op_id_in(&self, format: OperationFormat) -> OpId {
449        canonical::hash_bytes(&self.canonical_bytes_in(format))
450    }
451
452    /// The byte sequence that gets hashed to produce [`Self::op_id`]
453    /// under the current canonical form. Equivalent to
454    /// `self.canonical_bytes_in(OperationFormat::CURRENT)`.
455    ///
456    /// Exposed (not just consumed by `op_id`) so golden tests can pin
457    /// the exact pre-image. **Not** equal to `serde_json::to_vec(&op)`
458    /// in general — the on-disk JSON skips empty `parents` and
459    /// `None` `intent_id`, while the canonical form always emits a
460    /// (sorted, deduped) `parents` array. See `canonical.rs` for the
461    /// full V1 canonical-form spec.
462    pub fn canonical_bytes(&self) -> Vec<u8> {
463        self.canonical_bytes_in(OperationFormat::CURRENT)
464    }
465
466    /// The pre-image hashed under a specific canonical-form version.
467    ///
468    /// Today every `OperationFormat` variant routes to V1's encoder
469    /// (only V1 exists in production). When V2 lands, this match
470    /// gains an arm and the migration tool's encoder closure routes
471    /// here.
472    pub fn canonical_bytes_in(&self, format: OperationFormat) -> Vec<u8> {
473        match format {
474            OperationFormat::V1 => self.canonical_bytes_v1(),
475        }
476    }
477
478    fn canonical_bytes_v1(&self) -> Vec<u8> {
479        // Build a transient hashable view rather than hashing
480        // `self` directly so the parent ordering is canonical
481        // even if a caller hand-constructs an `Operation` with
482        // unsorted parents.
483        let canonical = CanonicalView {
484            kind: &self.kind,
485            parents: self.parents.iter().collect::<IndexSet<_>>().into_iter().collect::<BTreeSet<_>>(),
486            intent_id: self.intent_id.as_deref(),
487        };
488        serde_json::to_vec(&canonical).expect("canonical serialization")
489    }
490}
491
492/// Hashable shadow of [`Operation`] with parents in a `BTreeSet` so
493/// the serialization is order-independent regardless of how the
494/// caller constructed the live operation. Never persisted; lives
495/// only as a transient for hashing.
496#[derive(Serialize)]
497struct CanonicalView<'a> {
498    #[serde(flatten)]
499    kind: &'a OperationKind,
500    parents: BTreeSet<&'a OpId>,
501    /// `skip_serializing_if = "Option::is_none"` keeps existing
502    /// `OpId`s stable for ops without an intent — the field is
503    /// omitted from the canonical JSON entirely.
504    #[serde(skip_serializing_if = "Option::is_none")]
505    intent_id: Option<&'a str>,
506}
507
508/// An operation paired with its computed `OpId` and the resulting
509/// stage transition. This is what gets persisted under
510/// `<root>/ops/<OpId>.json`.
511///
512/// `format_version` records the canonical form the `op_id` was
513/// computed under. Pre-#244 stores didn't emit this field; reading
514/// such records deserializes to [`OperationFormat::V1`] (the
515/// implicit pre-versioning format), and writing V1 records continues
516/// to omit it (`skip_serializing_if = is_implicit`) so adding the
517/// field doesn't rotate any existing `OpId` or change any on-disk
518/// byte. Records written under a future format will explicitly
519/// carry their version tag.
520#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
521pub struct OperationRecord {
522    pub op_id: OpId,
523    #[serde(default, skip_serializing_if = "OperationFormat::is_implicit")]
524    pub format_version: OperationFormat,
525    #[serde(flatten)]
526    pub op: Operation,
527    pub produces: StageTransition,
528}
529
530impl OperationRecord {
531    pub fn new(op: Operation, produces: StageTransition) -> Self {
532        let op_id = op.op_id();
533        Self { op_id, format_version: OperationFormat::CURRENT, op, produces }
534    }
535}
536
537#[cfg(test)]
538mod tests {
539    use super::*;
540
541    fn add_factorial() -> OperationKind {
542        OperationKind::AddFunction {
543            sig_id: "fac::Int->Int".into(),
544            stage_id: "abc123".into(),
545            effects: BTreeSet::new(),
546            budget_cost: None,
547        }
548    }
549
550    #[test]
551    fn identical_operations_have_identical_op_ids() {
552        let a = Operation::new(add_factorial(), []);
553        let b = Operation::new(add_factorial(), []);
554        assert_eq!(a.op_id(), b.op_id());
555    }
556
557    #[test]
558    fn different_operations_have_different_op_ids() {
559        let a = Operation::new(add_factorial(), []);
560        let b = Operation::new(
561            OperationKind::AddFunction {
562                sig_id: "double::Int->Int".into(),
563                stage_id: "abc123".into(),
564                effects: BTreeSet::new(),
565                budget_cost: None,
566            },
567            [],
568        );
569        assert_ne!(a.op_id(), b.op_id());
570    }
571
572    #[test]
573    fn parent_set_changes_op_id() {
574        let no_parent = Operation::new(add_factorial(), []);
575        let with_parent = Operation::new(add_factorial(), ["op-parent-1".into()]);
576        assert_ne!(no_parent.op_id(), with_parent.op_id());
577    }
578
579    #[test]
580    fn parent_order_does_not_affect_op_id() {
581        let a = Operation::new(add_factorial(), ["b".into(), "a".into(), "c".into()]);
582        let b = Operation::new(add_factorial(), ["c".into(), "a".into(), "b".into()]);
583        assert_eq!(a.op_id(), b.op_id());
584        // and the stored form is sorted.
585        assert_eq!(a.parents, vec!["a".to_string(), "b".to_string(), "c".to_string()]);
586    }
587
588    #[test]
589    fn duplicate_parents_are_deduped() {
590        let with_dups = Operation::new(
591            add_factorial(),
592            ["a".into(), "a".into(), "b".into()],
593        );
594        let no_dups = Operation::new(
595            add_factorial(),
596            ["a".into(), "b".into()],
597        );
598        assert_eq!(with_dups.op_id(), no_dups.op_id());
599        assert_eq!(with_dups.parents, vec!["a".to_string(), "b".to_string()]);
600    }
601
602    #[test]
603    fn rename_with_same_body_hashes_equal_across_runs() {
604        // Two independent runs producing the same rename against the
605        // same parent should produce the same OpId — this is the
606        // automatic-dedup property #129 relies on for distributed
607        // agents.
608        let kind = OperationKind::RenameSymbol {
609            from: "parse::Str->Int".into(),
610            to: "parse_int::Str->Int".into(),
611            body_stage_id: "abc123".into(),
612        };
613        let a = Operation::new(kind.clone(), ["op-parent".into()]);
614        let b = Operation::new(kind, ["op-parent".into()]);
615        assert_eq!(a.op_id(), b.op_id());
616    }
617
618    #[test]
619    fn rename_does_not_collide_with_delete_plus_add() {
620        // The whole point of `RenameSymbol` is that it's a different
621        // OpId from the (semantically-equivalent) `RemoveFunction +
622        // AddFunction` pair. Causal history sees one event, not two.
623        let rename = Operation::new(
624            OperationKind::RenameSymbol {
625                from: "parse::Str->Int".into(),
626                to: "parse_int::Str->Int".into(),
627                body_stage_id: "abc123".into(),
628            },
629            ["op-parent".into()],
630        );
631        let remove = Operation::new(
632            OperationKind::RemoveFunction {
633                sig_id: "parse::Str->Int".into(),
634                last_stage_id: "abc123".into(),
635            },
636            ["op-parent".into()],
637        );
638        let add = Operation::new(
639            OperationKind::AddFunction {
640                sig_id: "parse_int::Str->Int".into(),
641                stage_id: "abc123".into(),
642                effects: BTreeSet::new(),
643                budget_cost: None,
644            },
645            ["op-parent".into()],
646        );
647        assert_ne!(rename.op_id(), remove.op_id());
648        assert_ne!(rename.op_id(), add.op_id());
649    }
650
651    #[test]
652    fn effect_set_order_does_not_affect_op_id() {
653        // Effects are a BTreeSet so iteration is sorted. Build two
654        // ops via different insertion orders and confirm the
655        // canonical form is identical.
656        let a_effects: EffectSet = ["io".into(), "fs_write".into()].into_iter().collect();
657        let b_effects: EffectSet = ["fs_write".into(), "io".into()].into_iter().collect();
658        let a = Operation::new(
659            OperationKind::AddFunction {
660                sig_id: "x".into(), stage_id: "s".into(), effects: a_effects,
661                budget_cost: None,
662            },
663            [],
664        );
665        let b = Operation::new(
666            OperationKind::AddFunction {
667                sig_id: "x".into(), stage_id: "s".into(), effects: b_effects,
668                budget_cost: None,
669            },
670            [],
671        );
672        assert_eq!(a.op_id(), b.op_id());
673    }
674
675    #[test]
676    fn op_id_is_64_char_lowercase_hex() {
677        let id = Operation::new(add_factorial(), []).op_id();
678        assert_eq!(id.len(), 64);
679        assert!(id.chars().all(|c| c.is_ascii_digit() || ('a'..='f').contains(&c)));
680    }
681
682    #[test]
683    fn round_trip_through_serde_json() {
684        let op = Operation::new(
685            OperationKind::ChangeEffectSig {
686                sig_id: "f".into(),
687                from_stage_id: "old".into(),
688                to_stage_id: "new".into(),
689                from_effects: BTreeSet::new(),
690                to_effects: ["io".into()].into_iter().collect(),
691                from_budget: None,
692                to_budget: None,
693            },
694            ["op-parent".into()],
695        );
696        let json = serde_json::to_string(&op).expect("serialize");
697        let back: Operation = serde_json::from_str(&json).expect("deserialize");
698        assert_eq!(op, back);
699        assert_eq!(op.op_id(), back.op_id());
700    }
701
702    #[test]
703    fn operation_record_carries_op_id() {
704        let op = Operation::new(add_factorial(), []);
705        let expected = op.op_id();
706        let rec = OperationRecord::new(
707            op,
708            StageTransition::Create {
709                sig_id: "fac::Int->Int".into(),
710                stage_id: "abc123".into(),
711            },
712        );
713        assert_eq!(rec.op_id, expected);
714    }
715
716    #[test]
717    fn intent_id_is_part_of_op_id_canonical_hash() {
718        // The dedup property: same `(kind, parents, intent_id)`
719        // produces the same OpId. Different intent_ids on
720        // otherwise-identical ops produce different OpIds, so
721        // causally distinct events (different prompts) hash
722        // distinctly.
723        let no_intent = Operation::new(add_factorial(), []);
724        let with_intent_a = Operation::new(add_factorial(), [])
725            .with_intent("intent-a");
726        let with_intent_b = Operation::new(add_factorial(), [])
727            .with_intent("intent-b");
728        let with_intent_a_again = Operation::new(add_factorial(), [])
729            .with_intent("intent-a");
730
731        // No-intent op is distinct from any intent-tagged variant.
732        assert_ne!(no_intent.op_id(), with_intent_a.op_id());
733        // Different intents → different OpIds.
734        assert_ne!(with_intent_a.op_id(), with_intent_b.op_id());
735        // Same intent → same OpId (the load-bearing dedup invariant).
736        assert_eq!(with_intent_a.op_id(), with_intent_a_again.op_id());
737    }
738
739    #[test]
740    fn op_without_intent_keeps_pre_intent_op_id() {
741        // Backwards-compat invariant: an op constructed without an
742        // intent must hash to the same value as it would have
743        // before #131 added the field. The golden test below pins
744        // the exact hash; this one asserts that adding then
745        // resetting to None doesn't drift.
746        let mut op = Operation::new(add_factorial(), []);
747        let baseline = op.op_id();
748        op.intent_id = Some("transient".into());
749        let with_intent = op.op_id();
750        assert_ne!(baseline, with_intent);
751        op.intent_id = None;
752        let back = op.op_id();
753        assert_eq!(baseline, back);
754    }
755
756    /// Golden hash. If this changes, the canonical form has shifted
757    /// and *every* op_id in every existing store has changed too —
758    /// that's a major-version event for the data model and should
759    /// be a deliberate decision, not an accident from reordering
760    /// fields. Update with care.
761    #[test]
762    fn canonical_form_is_stable_for_a_known_input() {
763        let op = Operation::new(
764            OperationKind::AddFunction {
765                sig_id: "fac::Int->Int".into(),
766                stage_id: "abc123".into(),
767                effects: BTreeSet::new(),
768                budget_cost: None,
769            },
770            [],
771        );
772        assert_eq!(
773            op.op_id(),
774            "f112990d31ef2a63f3e5ca5680637ed36a54bc7e8230510ae0c0e93fcb39d104"
775        );
776    }
777
778    #[test]
779    fn merge_kind_round_trips() {
780        let op = Operation::new(
781            OperationKind::Merge { resolved: 3 },
782            ["op-a".into(), "op-b".into()],
783        );
784        let json = serde_json::to_string(&op).expect("ser");
785        let back: Operation = serde_json::from_str(&json).expect("de");
786        assert_eq!(op, back);
787        assert_eq!(op.op_id(), back.op_id());
788    }
789
790    #[test]
791    fn merge_stage_transition_round_trips() {
792        let mut entries = BTreeMap::new();
793        entries.insert("sig-a".to_string(), Some("stage-a".to_string()));
794        entries.insert("sig-b".to_string(), None); // removed by merge
795        let t = StageTransition::Merge { entries };
796        let json = serde_json::to_string(&t).expect("ser");
797        let back: StageTransition = serde_json::from_str(&json).expect("de");
798        assert_eq!(t, back);
799    }
800
801    #[test]
802    fn merge_resolved_count_changes_op_id() {
803        // Two merges with the same parents but different resolved counts
804        // must hash differently — keeps structurally distinct merges from
805        // colliding on op_id.
806        let parents: Vec<OpId> = vec!["op-a".into(), "op-b".into()];
807        let one = Operation::new(OperationKind::Merge { resolved: 1 }, parents.clone());
808        let two = Operation::new(OperationKind::Merge { resolved: 2 }, parents);
809        assert_ne!(one.op_id(), two.op_id());
810    }
811
812    #[test]
813    fn existing_add_function_op_id_is_unchanged_after_merge_added() {
814        // Constructing the new Merge variant in the same enum must not
815        // perturb the canonical bytes of existing variants. The golden
816        // hash test below checks the literal value; this one verifies
817        // the property holds even after a Merge op has been built.
818        let _merge = Operation::new(
819            OperationKind::Merge { resolved: 0 },
820            ["op-x".into(), "op-y".into()],
821        );
822        let op = Operation::new(add_factorial(), []);
823        assert_eq!(
824            op.op_id(),
825            "f112990d31ef2a63f3e5ca5680637ed36a54bc7e8230510ae0c0e93fcb39d104"
826        );
827    }
828}