Skip to main content

lex_vcs/
operation.rs

1//! The `Operation` enum + `OperationRecord` (operation plus its
2//! causal parents and resulting `OpId`).
3//!
4//! See `lib.rs` for the design context and #129 for the issue.
5
6use indexmap::IndexSet;
7use serde::{Deserialize, Serialize};
8use std::collections::{BTreeMap, BTreeSet};
9
10use crate::canonical;
11
12/// Signature identity of a function or type — the part that stays
13/// stable across body edits. Wraps the same string identity
14/// `lex-store` uses; we keep it as `String` here so this crate has
15/// no dependency on `lex-store`'s internals.
16pub type SigId = String;
17
18/// Content hash of a single stage (function body, type def, ...).
19/// Same string identity as the file under `<root>/stages/<SigId>/
20/// implementations/<StageId>.ast.json`.
21pub type StageId = String;
22
23/// Identity of an operation. `(kind, payload, parents)` SHA-256 in
24/// lowercase hex (64 chars). Two operations with identical payloads
25/// and parent sets produce identical `OpId`s; the store dedupes on
26/// this.
27pub type OpId = String;
28
29/// Sorted set of effect-kind strings (e.g. `["fs_write", "io"]`).
30/// `BTreeSet` so the canonical form is order-independent for
31/// hashing.
32pub type EffectSet = BTreeSet<String>;
33
34/// Reference to an imported module — either a stdlib name
35/// (`std.io`) or a local path (`./helpers`). Kept as a string so
36/// this crate doesn't pull in `lex-syntax`'s parser.
37pub type ModuleRef = String;
38
39/// Effect of applying an operation on a stage's content-addressed
40/// identity. Used as the `produces` field of an [`OperationRecord`]
41/// so consumers can answer "after this op, what's the head stage
42/// for this SigId?" without rerunning the apply step.
43#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
44#[serde(tag = "kind", rename_all = "snake_case")]
45pub enum StageTransition {
46    /// New SigId; produces a stage that didn't exist before.
47    Create { sig_id: SigId, stage_id: StageId },
48    /// Existing SigId; replaces its head stage.
49    Replace { sig_id: SigId, from: StageId, to: StageId },
50    /// SigId removed; no head stage afterwards.
51    Remove { sig_id: SigId, last: StageId },
52    /// SigId renamed; same body hash, different signature identity.
53    Rename { from: SigId, to: SigId, body_stage_id: StageId },
54    /// Import-only change; doesn't touch any stage.
55    ImportOnly,
56    /// Merge op result. `entries` lists only the sigs whose head
57    /// changed relative to the merge op's first parent (`dst_head`):
58    /// `Some(stage_id)` sets the head; `None` removes the sig.
59    /// Sigs unaffected by the merge are not listed.
60    Merge {
61        entries: BTreeMap<SigId, Option<StageId>>,
62    },
63}
64
65/// The kinds of operations that produce stage transitions. Mirrors
66/// the initial set in #129; new kinds (`MoveBetweenFiles`,
67/// `SplitFunction`, `ExtractType`) can be added later as long as
68/// they're appended at the end of this enum or use explicit
69/// `#[serde(rename = "...")]` tags so existing `OpId`s stay stable.
70#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
71#[serde(tag = "op", rename_all = "snake_case")]
72pub enum OperationKind {
73    /// New function published. `effects` is the effect set declared
74    /// in the signature; tracked here (not just inside the stage)
75    /// so #130's write-time gate has a cheap path to check effect
76    /// changes without rehydrating the AST.
77    AddFunction {
78        sig_id: SigId,
79        stage_id: StageId,
80        effects: EffectSet,
81    },
82    /// Function removed; `last_stage_id` is the head before the
83    /// remove (so blame can walk the predecessor without scanning).
84    RemoveFunction {
85        sig_id: SigId,
86        last_stage_id: StageId,
87    },
88    /// Function body changed; signature unchanged.
89    ModifyBody {
90        sig_id: SigId,
91        from_stage_id: StageId,
92        to_stage_id: StageId,
93    },
94    /// Symbol renamed. The body hash is preserved (`body_stage_id`)
95    /// so two renames of the same body collapse to the same OpId
96    /// and `lex blame` walks the rename as a single causal event
97    /// rather than `delete + add`.
98    RenameSymbol {
99        from: SigId,
100        to: SigId,
101        body_stage_id: StageId,
102    },
103    /// Effect signature changed. Captures both old and new effect
104    /// sets so the write-time gate (#130) can verify importers
105    /// haven't silently broken.
106    ChangeEffectSig {
107        sig_id: SigId,
108        from_stage_id: StageId,
109        to_stage_id: StageId,
110        from_effects: EffectSet,
111        to_effects: EffectSet,
112    },
113    /// Import added to a file. `in_file` is the canonical path
114    /// (relative to the repo root, forward-slashes) so two
115    /// machines hashing the same edit get the same OpId.
116    AddImport {
117        in_file: String,
118        module: ModuleRef,
119    },
120    RemoveImport {
121        in_file: String,
122        module: ModuleRef,
123    },
124    AddType {
125        sig_id: SigId,
126        stage_id: StageId,
127    },
128    RemoveType {
129        sig_id: SigId,
130        last_stage_id: StageId,
131    },
132    ModifyType {
133        sig_id: SigId,
134        from_stage_id: StageId,
135        to_stage_id: StageId,
136    },
137    /// Merge of two branch heads. Carries only an informational count
138    /// of resolved sigs so two structurally identical merges of
139    /// different sizes don't collide on op_id; the per-sig deltas live
140    /// in `OperationRecord::produces` (`StageTransition::Merge`).
141    Merge {
142        resolved: usize,
143    },
144}
145
146impl OperationKind {
147    /// The `(SigId, Option<StageId>)` an op kind targets, as used by
148    /// `StageTransition::Merge::entries`. Used by the merge-commit
149    /// path (#134) to translate a `Resolution::Custom { op }` into
150    /// the head-map delta the merge op records:
151    ///
152    /// * Adds → `(sig, Some(stage_id))`
153    /// * Modifies → `(sig, Some(to_stage_id))`
154    /// * Removes → `(sig, None)`
155    /// * Renames → `(to_sig, Some(body_stage_id))`
156    /// * `AddImport` / `RemoveImport` / nested `Merge` → `None`
157    ///   (no single sig→stage delta)
158    pub fn merge_target(&self) -> Option<(SigId, Option<StageId>)> {
159        use OperationKind::*;
160        match self {
161            AddFunction { sig_id, stage_id, .. }
162            | AddType { sig_id, stage_id }
163                => Some((sig_id.clone(), Some(stage_id.clone()))),
164            ModifyBody { sig_id, to_stage_id, .. }
165            | ChangeEffectSig { sig_id, to_stage_id, .. }
166            | ModifyType { sig_id, to_stage_id, .. }
167                => Some((sig_id.clone(), Some(to_stage_id.clone()))),
168            RemoveFunction { sig_id, .. }
169            | RemoveType { sig_id, .. }
170                => Some((sig_id.clone(), None)),
171            RenameSymbol { to, body_stage_id, .. }
172                => Some((to.clone(), Some(body_stage_id.clone()))),
173            AddImport { .. } | RemoveImport { .. } | Merge { .. } => None,
174        }
175    }
176}
177
178/// The operation as a whole — its kind and the causal predecessors
179/// it assumes. The `OpId` is computed from this plus a sorted view
180/// of `parents`.
181///
182/// Operations without parents are valid and represent "applies to
183/// the empty repository" or "applies to the synthetic genesis
184/// state." `lex store migrate v1→v2` will produce parentless ops
185/// for stages it can't trace back to a clear predecessor.
186#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
187pub struct Operation {
188    #[serde(flatten)]
189    pub kind: OperationKind,
190    /// Operations whose `produces` this op assumes. Sorted before
191    /// hashing for canonical form. Empty for ops against the empty
192    /// repo.
193    #[serde(default, skip_serializing_if = "Vec::is_empty")]
194    pub parents: Vec<OpId>,
195    /// The intent that caused this op, if known. Optional because
196    /// operations produced outside an agent harness (e.g. a human
197    /// running `lex publish` directly) don't have one.
198    ///
199    /// Including the intent in the canonical hash means the same
200    /// logical change made under different intents produces
201    /// different `OpId`s — causally distinct events should hash
202    /// distinctly. Ops with `intent_id: None` keep their existing
203    /// hashes (the field is omitted from the canonical JSON via
204    /// `skip_serializing_if`), so this is backwards-compatible
205    /// for stores written before #131.
206    #[serde(default, skip_serializing_if = "Option::is_none")]
207    pub intent_id: Option<crate::intent::IntentId>,
208}
209
210impl Operation {
211    /// Construct an operation against zero or more parents. Caller
212    /// supplies parents in any order; canonicalization sorts them
213    /// before hashing.
214    pub fn new(kind: OperationKind, parents: impl IntoIterator<Item = OpId>) -> Self {
215        let mut parents: Vec<OpId> = parents.into_iter().collect();
216        parents.sort();
217        parents.dedup();
218        Self { kind, parents, intent_id: None }
219    }
220
221    /// Tag this operation with the intent that produced it. The
222    /// builder shape keeps existing call sites untouched; agent
223    /// harnesses that record intent call this once before
224    /// applying the op.
225    pub fn with_intent(mut self, intent_id: impl Into<crate::intent::IntentId>) -> Self {
226        self.intent_id = Some(intent_id.into());
227        self
228    }
229
230    /// Compute this operation's content-addressed identity.
231    ///
232    /// Stable across runs and machines: same `(kind, payload,
233    /// sorted parents, intent_id)` produces the same `OpId`. The
234    /// invariant #129's automatic-dedup behavior relies on.
235    pub fn op_id(&self) -> OpId {
236        // Build a transient hashable view rather than hashing
237        // `self` directly so the parent ordering is canonical
238        // even if a caller hand-constructs an `Operation` with
239        // unsorted parents.
240        let canonical = CanonicalView {
241            kind: &self.kind,
242            parents: self.parents.iter().collect::<IndexSet<_>>().into_iter().collect::<BTreeSet<_>>(),
243            intent_id: self.intent_id.as_deref(),
244        };
245        canonical::hash(&canonical)
246    }
247}
248
249/// Hashable shadow of [`Operation`] with parents in a `BTreeSet` so
250/// the serialization is order-independent regardless of how the
251/// caller constructed the live operation. Never persisted; lives
252/// only as a transient for hashing.
253#[derive(Serialize)]
254struct CanonicalView<'a> {
255    #[serde(flatten)]
256    kind: &'a OperationKind,
257    parents: BTreeSet<&'a OpId>,
258    /// `skip_serializing_if = "Option::is_none"` keeps existing
259    /// `OpId`s stable for ops without an intent — the field is
260    /// omitted from the canonical JSON entirely.
261    #[serde(skip_serializing_if = "Option::is_none")]
262    intent_id: Option<&'a str>,
263}
264
265/// An operation paired with its computed `OpId` and the resulting
266/// stage transition. This is what gets persisted under
267/// `<root>/ops/<OpId>.json` once `apply.rs` (next slice of #129)
268/// lands.
269#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
270pub struct OperationRecord {
271    pub op_id: OpId,
272    #[serde(flatten)]
273    pub op: Operation,
274    pub produces: StageTransition,
275}
276
277impl OperationRecord {
278    pub fn new(op: Operation, produces: StageTransition) -> Self {
279        let op_id = op.op_id();
280        Self { op_id, op, produces }
281    }
282}
283
284#[cfg(test)]
285mod tests {
286    use super::*;
287
288    fn add_factorial() -> OperationKind {
289        OperationKind::AddFunction {
290            sig_id: "fac::Int->Int".into(),
291            stage_id: "abc123".into(),
292            effects: BTreeSet::new(),
293        }
294    }
295
296    #[test]
297    fn identical_operations_have_identical_op_ids() {
298        let a = Operation::new(add_factorial(), []);
299        let b = Operation::new(add_factorial(), []);
300        assert_eq!(a.op_id(), b.op_id());
301    }
302
303    #[test]
304    fn different_operations_have_different_op_ids() {
305        let a = Operation::new(add_factorial(), []);
306        let b = Operation::new(
307            OperationKind::AddFunction {
308                sig_id: "double::Int->Int".into(),
309                stage_id: "abc123".into(),
310                effects: BTreeSet::new(),
311            },
312            [],
313        );
314        assert_ne!(a.op_id(), b.op_id());
315    }
316
317    #[test]
318    fn parent_set_changes_op_id() {
319        let no_parent = Operation::new(add_factorial(), []);
320        let with_parent = Operation::new(add_factorial(), ["op-parent-1".into()]);
321        assert_ne!(no_parent.op_id(), with_parent.op_id());
322    }
323
324    #[test]
325    fn parent_order_does_not_affect_op_id() {
326        let a = Operation::new(add_factorial(), ["b".into(), "a".into(), "c".into()]);
327        let b = Operation::new(add_factorial(), ["c".into(), "a".into(), "b".into()]);
328        assert_eq!(a.op_id(), b.op_id());
329        // and the stored form is sorted.
330        assert_eq!(a.parents, vec!["a".to_string(), "b".to_string(), "c".to_string()]);
331    }
332
333    #[test]
334    fn duplicate_parents_are_deduped() {
335        let with_dups = Operation::new(
336            add_factorial(),
337            ["a".into(), "a".into(), "b".into()],
338        );
339        let no_dups = Operation::new(
340            add_factorial(),
341            ["a".into(), "b".into()],
342        );
343        assert_eq!(with_dups.op_id(), no_dups.op_id());
344        assert_eq!(with_dups.parents, vec!["a".to_string(), "b".to_string()]);
345    }
346
347    #[test]
348    fn rename_with_same_body_hashes_equal_across_runs() {
349        // Two independent runs producing the same rename against the
350        // same parent should produce the same OpId — this is the
351        // automatic-dedup property #129 relies on for distributed
352        // agents.
353        let kind = OperationKind::RenameSymbol {
354            from: "parse::Str->Int".into(),
355            to: "parse_int::Str->Int".into(),
356            body_stage_id: "abc123".into(),
357        };
358        let a = Operation::new(kind.clone(), ["op-parent".into()]);
359        let b = Operation::new(kind, ["op-parent".into()]);
360        assert_eq!(a.op_id(), b.op_id());
361    }
362
363    #[test]
364    fn rename_does_not_collide_with_delete_plus_add() {
365        // The whole point of `RenameSymbol` is that it's a different
366        // OpId from the (semantically-equivalent) `RemoveFunction +
367        // AddFunction` pair. Causal history sees one event, not two.
368        let rename = Operation::new(
369            OperationKind::RenameSymbol {
370                from: "parse::Str->Int".into(),
371                to: "parse_int::Str->Int".into(),
372                body_stage_id: "abc123".into(),
373            },
374            ["op-parent".into()],
375        );
376        let remove = Operation::new(
377            OperationKind::RemoveFunction {
378                sig_id: "parse::Str->Int".into(),
379                last_stage_id: "abc123".into(),
380            },
381            ["op-parent".into()],
382        );
383        let add = Operation::new(
384            OperationKind::AddFunction {
385                sig_id: "parse_int::Str->Int".into(),
386                stage_id: "abc123".into(),
387                effects: BTreeSet::new(),
388            },
389            ["op-parent".into()],
390        );
391        assert_ne!(rename.op_id(), remove.op_id());
392        assert_ne!(rename.op_id(), add.op_id());
393    }
394
395    #[test]
396    fn effect_set_order_does_not_affect_op_id() {
397        // Effects are a BTreeSet so iteration is sorted. Build two
398        // ops via different insertion orders and confirm the
399        // canonical form is identical.
400        let a_effects: EffectSet = ["io".into(), "fs_write".into()].into_iter().collect();
401        let b_effects: EffectSet = ["fs_write".into(), "io".into()].into_iter().collect();
402        let a = Operation::new(
403            OperationKind::AddFunction {
404                sig_id: "x".into(), stage_id: "s".into(), effects: a_effects,
405            },
406            [],
407        );
408        let b = Operation::new(
409            OperationKind::AddFunction {
410                sig_id: "x".into(), stage_id: "s".into(), effects: b_effects,
411            },
412            [],
413        );
414        assert_eq!(a.op_id(), b.op_id());
415    }
416
417    #[test]
418    fn op_id_is_64_char_lowercase_hex() {
419        let id = Operation::new(add_factorial(), []).op_id();
420        assert_eq!(id.len(), 64);
421        assert!(id.chars().all(|c| c.is_ascii_digit() || ('a'..='f').contains(&c)));
422    }
423
424    #[test]
425    fn round_trip_through_serde_json() {
426        let op = Operation::new(
427            OperationKind::ChangeEffectSig {
428                sig_id: "f".into(),
429                from_stage_id: "old".into(),
430                to_stage_id: "new".into(),
431                from_effects: BTreeSet::new(),
432                to_effects: ["io".into()].into_iter().collect(),
433            },
434            ["op-parent".into()],
435        );
436        let json = serde_json::to_string(&op).expect("serialize");
437        let back: Operation = serde_json::from_str(&json).expect("deserialize");
438        assert_eq!(op, back);
439        assert_eq!(op.op_id(), back.op_id());
440    }
441
442    #[test]
443    fn operation_record_carries_op_id() {
444        let op = Operation::new(add_factorial(), []);
445        let expected = op.op_id();
446        let rec = OperationRecord::new(
447            op,
448            StageTransition::Create {
449                sig_id: "fac::Int->Int".into(),
450                stage_id: "abc123".into(),
451            },
452        );
453        assert_eq!(rec.op_id, expected);
454    }
455
456    #[test]
457    fn intent_id_is_part_of_op_id_canonical_hash() {
458        // The dedup property: same `(kind, parents, intent_id)`
459        // produces the same OpId. Different intent_ids on
460        // otherwise-identical ops produce different OpIds, so
461        // causally distinct events (different prompts) hash
462        // distinctly.
463        let no_intent = Operation::new(add_factorial(), []);
464        let with_intent_a = Operation::new(add_factorial(), [])
465            .with_intent("intent-a");
466        let with_intent_b = Operation::new(add_factorial(), [])
467            .with_intent("intent-b");
468        let with_intent_a_again = Operation::new(add_factorial(), [])
469            .with_intent("intent-a");
470
471        // No-intent op is distinct from any intent-tagged variant.
472        assert_ne!(no_intent.op_id(), with_intent_a.op_id());
473        // Different intents → different OpIds.
474        assert_ne!(with_intent_a.op_id(), with_intent_b.op_id());
475        // Same intent → same OpId (the load-bearing dedup invariant).
476        assert_eq!(with_intent_a.op_id(), with_intent_a_again.op_id());
477    }
478
479    #[test]
480    fn op_without_intent_keeps_pre_intent_op_id() {
481        // Backwards-compat invariant: an op constructed without an
482        // intent must hash to the same value as it would have
483        // before #131 added the field. The golden test below pins
484        // the exact hash; this one asserts that adding then
485        // resetting to None doesn't drift.
486        let mut op = Operation::new(add_factorial(), []);
487        let baseline = op.op_id();
488        op.intent_id = Some("transient".into());
489        let with_intent = op.op_id();
490        assert_ne!(baseline, with_intent);
491        op.intent_id = None;
492        let back = op.op_id();
493        assert_eq!(baseline, back);
494    }
495
496    /// Golden hash. If this changes, the canonical form has shifted
497    /// and *every* op_id in every existing store has changed too —
498    /// that's a major-version event for the data model and should
499    /// be a deliberate decision, not an accident from reordering
500    /// fields. Update with care.
501    #[test]
502    fn canonical_form_is_stable_for_a_known_input() {
503        let op = Operation::new(
504            OperationKind::AddFunction {
505                sig_id: "fac::Int->Int".into(),
506                stage_id: "abc123".into(),
507                effects: BTreeSet::new(),
508            },
509            [],
510        );
511        assert_eq!(
512            op.op_id(),
513            "f112990d31ef2a63f3e5ca5680637ed36a54bc7e8230510ae0c0e93fcb39d104"
514        );
515    }
516
517    #[test]
518    fn merge_kind_round_trips() {
519        let op = Operation::new(
520            OperationKind::Merge { resolved: 3 },
521            ["op-a".into(), "op-b".into()],
522        );
523        let json = serde_json::to_string(&op).expect("ser");
524        let back: Operation = serde_json::from_str(&json).expect("de");
525        assert_eq!(op, back);
526        assert_eq!(op.op_id(), back.op_id());
527    }
528
529    #[test]
530    fn merge_stage_transition_round_trips() {
531        let mut entries = BTreeMap::new();
532        entries.insert("sig-a".to_string(), Some("stage-a".to_string()));
533        entries.insert("sig-b".to_string(), None); // removed by merge
534        let t = StageTransition::Merge { entries };
535        let json = serde_json::to_string(&t).expect("ser");
536        let back: StageTransition = serde_json::from_str(&json).expect("de");
537        assert_eq!(t, back);
538    }
539
540    #[test]
541    fn merge_resolved_count_changes_op_id() {
542        // Two merges with the same parents but different resolved counts
543        // must hash differently — keeps structurally distinct merges from
544        // colliding on op_id.
545        let parents: Vec<OpId> = vec!["op-a".into(), "op-b".into()];
546        let one = Operation::new(OperationKind::Merge { resolved: 1 }, parents.clone());
547        let two = Operation::new(OperationKind::Merge { resolved: 2 }, parents);
548        assert_ne!(one.op_id(), two.op_id());
549    }
550
551    #[test]
552    fn existing_add_function_op_id_is_unchanged_after_merge_added() {
553        // Constructing the new Merge variant in the same enum must not
554        // perturb the canonical bytes of existing variants. The golden
555        // hash test below checks the literal value; this one verifies
556        // the property holds even after a Merge op has been built.
557        let _merge = Operation::new(
558            OperationKind::Merge { resolved: 0 },
559            ["op-x".into(), "op-y".into()],
560        );
561        let op = Operation::new(add_factorial(), []);
562        assert_eq!(
563            op.op_id(),
564            "f112990d31ef2a63f3e5ca5680637ed36a54bc7e8230510ae0c0e93fcb39d104"
565        );
566    }
567}