lex_vcs/operation.rs
1//! The `Operation` enum + `OperationRecord` (operation plus its
2//! causal parents and resulting `OpId`).
3//!
4//! See `lib.rs` for the design context and #129 for the issue.
5
6use indexmap::IndexSet;
7use serde::{Deserialize, Serialize};
8use std::collections::{BTreeMap, BTreeSet};
9
10use crate::canonical;
11
12/// Signature identity of a function or type — the part that stays
13/// stable across body edits. Wraps the same string identity
14/// `lex-store` uses; we keep it as `String` here so this crate has
15/// no dependency on `lex-store`'s internals.
16pub type SigId = String;
17
18/// Content hash of a single stage (function body, type def, ...).
19/// Same string identity as the file under `<root>/stages/<SigId>/
20/// implementations/<StageId>.ast.json`.
21pub type StageId = String;
22
23/// Identity of an operation. `(kind, payload, parents)` SHA-256 in
24/// lowercase hex (64 chars). Two operations with identical payloads
25/// and parent sets produce identical `OpId`s; the store dedupes on
26/// this.
27pub type OpId = String;
28
29/// Sorted set of effect-kind strings (e.g. `["fs_write", "io"]`).
30/// `BTreeSet` so the canonical form is order-independent for
31/// hashing.
32pub type EffectSet = BTreeSet<String>;
33
34/// Reference to an imported module — either a stdlib name
35/// (`std.io`) or a local path (`./helpers`). Kept as a string so
36/// this crate doesn't pull in `lex-syntax`'s parser.
37pub type ModuleRef = String;
38
39/// Effect of applying an operation on a stage's content-addressed
40/// identity. Used as the `produces` field of an [`OperationRecord`]
41/// so consumers can answer "after this op, what's the head stage
42/// for this SigId?" without rerunning the apply step.
43#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
44#[serde(tag = "kind", rename_all = "snake_case")]
45pub enum StageTransition {
46 /// New SigId; produces a stage that didn't exist before.
47 Create { sig_id: SigId, stage_id: StageId },
48 /// Existing SigId; replaces its head stage.
49 Replace { sig_id: SigId, from: StageId, to: StageId },
50 /// SigId removed; no head stage afterwards.
51 Remove { sig_id: SigId, last: StageId },
52 /// SigId renamed; same body hash, different signature identity.
53 Rename { from: SigId, to: SigId, body_stage_id: StageId },
54 /// Import-only change; doesn't touch any stage.
55 ImportOnly,
56 /// Merge op result. `entries` lists only the sigs whose head
57 /// changed relative to the merge op's first parent (`dst_head`):
58 /// `Some(stage_id)` sets the head; `None` removes the sig.
59 /// Sigs unaffected by the merge are not listed.
60 Merge {
61 entries: BTreeMap<SigId, Option<StageId>>,
62 },
63}
64
65/// The kinds of operations that produce stage transitions. Mirrors
66/// the initial set in #129; new kinds (`MoveBetweenFiles`,
67/// `SplitFunction`, `ExtractType`) can be added later as long as
68/// they're appended at the end of this enum or use explicit
69/// `#[serde(rename = "...")]` tags so existing `OpId`s stay stable.
70#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
71#[serde(tag = "op", rename_all = "snake_case")]
72pub enum OperationKind {
73 /// New function published. `effects` is the effect set declared
74 /// in the signature; tracked here (not just inside the stage)
75 /// so #130's write-time gate has a cheap path to check effect
76 /// changes without rehydrating the AST.
77 AddFunction {
78 sig_id: SigId,
79 stage_id: StageId,
80 effects: EffectSet,
81 },
82 /// Function removed; `last_stage_id` is the head before the
83 /// remove (so blame can walk the predecessor without scanning).
84 RemoveFunction {
85 sig_id: SigId,
86 last_stage_id: StageId,
87 },
88 /// Function body changed; signature unchanged.
89 ModifyBody {
90 sig_id: SigId,
91 from_stage_id: StageId,
92 to_stage_id: StageId,
93 },
94 /// Symbol renamed. The body hash is preserved (`body_stage_id`)
95 /// so two renames of the same body collapse to the same OpId
96 /// and `lex blame` walks the rename as a single causal event
97 /// rather than `delete + add`.
98 RenameSymbol {
99 from: SigId,
100 to: SigId,
101 body_stage_id: StageId,
102 },
103 /// Effect signature changed. Captures both old and new effect
104 /// sets so the write-time gate (#130) can verify importers
105 /// haven't silently broken.
106 ChangeEffectSig {
107 sig_id: SigId,
108 from_stage_id: StageId,
109 to_stage_id: StageId,
110 from_effects: EffectSet,
111 to_effects: EffectSet,
112 },
113 /// Import added to a file. `in_file` is the canonical path
114 /// (relative to the repo root, forward-slashes) so two
115 /// machines hashing the same edit get the same OpId.
116 AddImport {
117 in_file: String,
118 module: ModuleRef,
119 },
120 RemoveImport {
121 in_file: String,
122 module: ModuleRef,
123 },
124 AddType {
125 sig_id: SigId,
126 stage_id: StageId,
127 },
128 RemoveType {
129 sig_id: SigId,
130 last_stage_id: StageId,
131 },
132 ModifyType {
133 sig_id: SigId,
134 from_stage_id: StageId,
135 to_stage_id: StageId,
136 },
137 /// Merge of two branch heads. Carries only an informational count
138 /// of resolved sigs so two structurally identical merges of
139 /// different sizes don't collide on op_id; the per-sig deltas live
140 /// in `OperationRecord::produces` (`StageTransition::Merge`).
141 Merge {
142 resolved: usize,
143 },
144}
145
146impl OperationKind {
147 /// The `(SigId, Option<StageId>)` an op kind targets, as used by
148 /// `StageTransition::Merge::entries`. Used by the merge-commit
149 /// path (#134) to translate a `Resolution::Custom { op }` into
150 /// the head-map delta the merge op records:
151 ///
152 /// * Adds → `(sig, Some(stage_id))`
153 /// * Modifies → `(sig, Some(to_stage_id))`
154 /// * Removes → `(sig, None)`
155 /// * Renames → `(to_sig, Some(body_stage_id))`
156 /// * `AddImport` / `RemoveImport` / nested `Merge` → `None`
157 /// (no single sig→stage delta)
158 pub fn merge_target(&self) -> Option<(SigId, Option<StageId>)> {
159 use OperationKind::*;
160 match self {
161 AddFunction { sig_id, stage_id, .. }
162 | AddType { sig_id, stage_id }
163 => Some((sig_id.clone(), Some(stage_id.clone()))),
164 ModifyBody { sig_id, to_stage_id, .. }
165 | ChangeEffectSig { sig_id, to_stage_id, .. }
166 | ModifyType { sig_id, to_stage_id, .. }
167 => Some((sig_id.clone(), Some(to_stage_id.clone()))),
168 RemoveFunction { sig_id, .. }
169 | RemoveType { sig_id, .. }
170 => Some((sig_id.clone(), None)),
171 RenameSymbol { to, body_stage_id, .. }
172 => Some((to.clone(), Some(body_stage_id.clone()))),
173 AddImport { .. } | RemoveImport { .. } | Merge { .. } => None,
174 }
175 }
176}
177
178/// The operation as a whole — its kind and the causal predecessors
179/// it assumes. The `OpId` is computed from this plus a sorted view
180/// of `parents`.
181///
182/// Operations without parents are valid and represent "applies to
183/// the empty repository" or "applies to the synthetic genesis
184/// state." `lex store migrate v1→v2` will produce parentless ops
185/// for stages it can't trace back to a clear predecessor.
186#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
187pub struct Operation {
188 #[serde(flatten)]
189 pub kind: OperationKind,
190 /// Operations whose `produces` this op assumes. Sorted before
191 /// hashing for canonical form. Empty for ops against the empty
192 /// repo.
193 #[serde(default, skip_serializing_if = "Vec::is_empty")]
194 pub parents: Vec<OpId>,
195 /// The intent that caused this op, if known. Optional because
196 /// operations produced outside an agent harness (e.g. a human
197 /// running `lex publish` directly) don't have one.
198 ///
199 /// Including the intent in the canonical hash means the same
200 /// logical change made under different intents produces
201 /// different `OpId`s — causally distinct events should hash
202 /// distinctly. Ops with `intent_id: None` keep their existing
203 /// hashes (the field is omitted from the canonical JSON via
204 /// `skip_serializing_if`), so this is backwards-compatible
205 /// for stores written before #131.
206 #[serde(default, skip_serializing_if = "Option::is_none")]
207 pub intent_id: Option<crate::intent::IntentId>,
208}
209
210impl Operation {
211 /// Construct an operation against zero or more parents. Caller
212 /// supplies parents in any order; canonicalization sorts them
213 /// before hashing.
214 pub fn new(kind: OperationKind, parents: impl IntoIterator<Item = OpId>) -> Self {
215 let mut parents: Vec<OpId> = parents.into_iter().collect();
216 parents.sort();
217 parents.dedup();
218 Self { kind, parents, intent_id: None }
219 }
220
221 /// Tag this operation with the intent that produced it. The
222 /// builder shape keeps existing call sites untouched; agent
223 /// harnesses that record intent call this once before
224 /// applying the op.
225 pub fn with_intent(mut self, intent_id: impl Into<crate::intent::IntentId>) -> Self {
226 self.intent_id = Some(intent_id.into());
227 self
228 }
229
230 /// Compute this operation's content-addressed identity.
231 ///
232 /// Stable across runs and machines: same `(kind, payload,
233 /// sorted parents, intent_id)` produces the same `OpId`. The
234 /// invariant #129's automatic-dedup behavior relies on.
235 pub fn op_id(&self) -> OpId {
236 // Build a transient hashable view rather than hashing
237 // `self` directly so the parent ordering is canonical
238 // even if a caller hand-constructs an `Operation` with
239 // unsorted parents.
240 let canonical = CanonicalView {
241 kind: &self.kind,
242 parents: self.parents.iter().collect::<IndexSet<_>>().into_iter().collect::<BTreeSet<_>>(),
243 intent_id: self.intent_id.as_deref(),
244 };
245 canonical::hash(&canonical)
246 }
247}
248
249/// Hashable shadow of [`Operation`] with parents in a `BTreeSet` so
250/// the serialization is order-independent regardless of how the
251/// caller constructed the live operation. Never persisted; lives
252/// only as a transient for hashing.
253#[derive(Serialize)]
254struct CanonicalView<'a> {
255 #[serde(flatten)]
256 kind: &'a OperationKind,
257 parents: BTreeSet<&'a OpId>,
258 /// `skip_serializing_if = "Option::is_none"` keeps existing
259 /// `OpId`s stable for ops without an intent — the field is
260 /// omitted from the canonical JSON entirely.
261 #[serde(skip_serializing_if = "Option::is_none")]
262 intent_id: Option<&'a str>,
263}
264
265/// An operation paired with its computed `OpId` and the resulting
266/// stage transition. This is what gets persisted under
267/// `<root>/ops/<OpId>.json` once `apply.rs` (next slice of #129)
268/// lands.
269#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
270pub struct OperationRecord {
271 pub op_id: OpId,
272 #[serde(flatten)]
273 pub op: Operation,
274 pub produces: StageTransition,
275}
276
277impl OperationRecord {
278 pub fn new(op: Operation, produces: StageTransition) -> Self {
279 let op_id = op.op_id();
280 Self { op_id, op, produces }
281 }
282}
283
284#[cfg(test)]
285mod tests {
286 use super::*;
287
288 fn add_factorial() -> OperationKind {
289 OperationKind::AddFunction {
290 sig_id: "fac::Int->Int".into(),
291 stage_id: "abc123".into(),
292 effects: BTreeSet::new(),
293 }
294 }
295
296 #[test]
297 fn identical_operations_have_identical_op_ids() {
298 let a = Operation::new(add_factorial(), []);
299 let b = Operation::new(add_factorial(), []);
300 assert_eq!(a.op_id(), b.op_id());
301 }
302
303 #[test]
304 fn different_operations_have_different_op_ids() {
305 let a = Operation::new(add_factorial(), []);
306 let b = Operation::new(
307 OperationKind::AddFunction {
308 sig_id: "double::Int->Int".into(),
309 stage_id: "abc123".into(),
310 effects: BTreeSet::new(),
311 },
312 [],
313 );
314 assert_ne!(a.op_id(), b.op_id());
315 }
316
317 #[test]
318 fn parent_set_changes_op_id() {
319 let no_parent = Operation::new(add_factorial(), []);
320 let with_parent = Operation::new(add_factorial(), ["op-parent-1".into()]);
321 assert_ne!(no_parent.op_id(), with_parent.op_id());
322 }
323
324 #[test]
325 fn parent_order_does_not_affect_op_id() {
326 let a = Operation::new(add_factorial(), ["b".into(), "a".into(), "c".into()]);
327 let b = Operation::new(add_factorial(), ["c".into(), "a".into(), "b".into()]);
328 assert_eq!(a.op_id(), b.op_id());
329 // and the stored form is sorted.
330 assert_eq!(a.parents, vec!["a".to_string(), "b".to_string(), "c".to_string()]);
331 }
332
333 #[test]
334 fn duplicate_parents_are_deduped() {
335 let with_dups = Operation::new(
336 add_factorial(),
337 ["a".into(), "a".into(), "b".into()],
338 );
339 let no_dups = Operation::new(
340 add_factorial(),
341 ["a".into(), "b".into()],
342 );
343 assert_eq!(with_dups.op_id(), no_dups.op_id());
344 assert_eq!(with_dups.parents, vec!["a".to_string(), "b".to_string()]);
345 }
346
347 #[test]
348 fn rename_with_same_body_hashes_equal_across_runs() {
349 // Two independent runs producing the same rename against the
350 // same parent should produce the same OpId — this is the
351 // automatic-dedup property #129 relies on for distributed
352 // agents.
353 let kind = OperationKind::RenameSymbol {
354 from: "parse::Str->Int".into(),
355 to: "parse_int::Str->Int".into(),
356 body_stage_id: "abc123".into(),
357 };
358 let a = Operation::new(kind.clone(), ["op-parent".into()]);
359 let b = Operation::new(kind, ["op-parent".into()]);
360 assert_eq!(a.op_id(), b.op_id());
361 }
362
363 #[test]
364 fn rename_does_not_collide_with_delete_plus_add() {
365 // The whole point of `RenameSymbol` is that it's a different
366 // OpId from the (semantically-equivalent) `RemoveFunction +
367 // AddFunction` pair. Causal history sees one event, not two.
368 let rename = Operation::new(
369 OperationKind::RenameSymbol {
370 from: "parse::Str->Int".into(),
371 to: "parse_int::Str->Int".into(),
372 body_stage_id: "abc123".into(),
373 },
374 ["op-parent".into()],
375 );
376 let remove = Operation::new(
377 OperationKind::RemoveFunction {
378 sig_id: "parse::Str->Int".into(),
379 last_stage_id: "abc123".into(),
380 },
381 ["op-parent".into()],
382 );
383 let add = Operation::new(
384 OperationKind::AddFunction {
385 sig_id: "parse_int::Str->Int".into(),
386 stage_id: "abc123".into(),
387 effects: BTreeSet::new(),
388 },
389 ["op-parent".into()],
390 );
391 assert_ne!(rename.op_id(), remove.op_id());
392 assert_ne!(rename.op_id(), add.op_id());
393 }
394
395 #[test]
396 fn effect_set_order_does_not_affect_op_id() {
397 // Effects are a BTreeSet so iteration is sorted. Build two
398 // ops via different insertion orders and confirm the
399 // canonical form is identical.
400 let a_effects: EffectSet = ["io".into(), "fs_write".into()].into_iter().collect();
401 let b_effects: EffectSet = ["fs_write".into(), "io".into()].into_iter().collect();
402 let a = Operation::new(
403 OperationKind::AddFunction {
404 sig_id: "x".into(), stage_id: "s".into(), effects: a_effects,
405 },
406 [],
407 );
408 let b = Operation::new(
409 OperationKind::AddFunction {
410 sig_id: "x".into(), stage_id: "s".into(), effects: b_effects,
411 },
412 [],
413 );
414 assert_eq!(a.op_id(), b.op_id());
415 }
416
417 #[test]
418 fn op_id_is_64_char_lowercase_hex() {
419 let id = Operation::new(add_factorial(), []).op_id();
420 assert_eq!(id.len(), 64);
421 assert!(id.chars().all(|c| c.is_ascii_digit() || ('a'..='f').contains(&c)));
422 }
423
424 #[test]
425 fn round_trip_through_serde_json() {
426 let op = Operation::new(
427 OperationKind::ChangeEffectSig {
428 sig_id: "f".into(),
429 from_stage_id: "old".into(),
430 to_stage_id: "new".into(),
431 from_effects: BTreeSet::new(),
432 to_effects: ["io".into()].into_iter().collect(),
433 },
434 ["op-parent".into()],
435 );
436 let json = serde_json::to_string(&op).expect("serialize");
437 let back: Operation = serde_json::from_str(&json).expect("deserialize");
438 assert_eq!(op, back);
439 assert_eq!(op.op_id(), back.op_id());
440 }
441
442 #[test]
443 fn operation_record_carries_op_id() {
444 let op = Operation::new(add_factorial(), []);
445 let expected = op.op_id();
446 let rec = OperationRecord::new(
447 op,
448 StageTransition::Create {
449 sig_id: "fac::Int->Int".into(),
450 stage_id: "abc123".into(),
451 },
452 );
453 assert_eq!(rec.op_id, expected);
454 }
455
456 #[test]
457 fn intent_id_is_part_of_op_id_canonical_hash() {
458 // The dedup property: same `(kind, parents, intent_id)`
459 // produces the same OpId. Different intent_ids on
460 // otherwise-identical ops produce different OpIds, so
461 // causally distinct events (different prompts) hash
462 // distinctly.
463 let no_intent = Operation::new(add_factorial(), []);
464 let with_intent_a = Operation::new(add_factorial(), [])
465 .with_intent("intent-a");
466 let with_intent_b = Operation::new(add_factorial(), [])
467 .with_intent("intent-b");
468 let with_intent_a_again = Operation::new(add_factorial(), [])
469 .with_intent("intent-a");
470
471 // No-intent op is distinct from any intent-tagged variant.
472 assert_ne!(no_intent.op_id(), with_intent_a.op_id());
473 // Different intents → different OpIds.
474 assert_ne!(with_intent_a.op_id(), with_intent_b.op_id());
475 // Same intent → same OpId (the load-bearing dedup invariant).
476 assert_eq!(with_intent_a.op_id(), with_intent_a_again.op_id());
477 }
478
479 #[test]
480 fn op_without_intent_keeps_pre_intent_op_id() {
481 // Backwards-compat invariant: an op constructed without an
482 // intent must hash to the same value as it would have
483 // before #131 added the field. The golden test below pins
484 // the exact hash; this one asserts that adding then
485 // resetting to None doesn't drift.
486 let mut op = Operation::new(add_factorial(), []);
487 let baseline = op.op_id();
488 op.intent_id = Some("transient".into());
489 let with_intent = op.op_id();
490 assert_ne!(baseline, with_intent);
491 op.intent_id = None;
492 let back = op.op_id();
493 assert_eq!(baseline, back);
494 }
495
496 /// Golden hash. If this changes, the canonical form has shifted
497 /// and *every* op_id in every existing store has changed too —
498 /// that's a major-version event for the data model and should
499 /// be a deliberate decision, not an accident from reordering
500 /// fields. Update with care.
501 #[test]
502 fn canonical_form_is_stable_for_a_known_input() {
503 let op = Operation::new(
504 OperationKind::AddFunction {
505 sig_id: "fac::Int->Int".into(),
506 stage_id: "abc123".into(),
507 effects: BTreeSet::new(),
508 },
509 [],
510 );
511 assert_eq!(
512 op.op_id(),
513 "f112990d31ef2a63f3e5ca5680637ed36a54bc7e8230510ae0c0e93fcb39d104"
514 );
515 }
516
517 #[test]
518 fn merge_kind_round_trips() {
519 let op = Operation::new(
520 OperationKind::Merge { resolved: 3 },
521 ["op-a".into(), "op-b".into()],
522 );
523 let json = serde_json::to_string(&op).expect("ser");
524 let back: Operation = serde_json::from_str(&json).expect("de");
525 assert_eq!(op, back);
526 assert_eq!(op.op_id(), back.op_id());
527 }
528
529 #[test]
530 fn merge_stage_transition_round_trips() {
531 let mut entries = BTreeMap::new();
532 entries.insert("sig-a".to_string(), Some("stage-a".to_string()));
533 entries.insert("sig-b".to_string(), None); // removed by merge
534 let t = StageTransition::Merge { entries };
535 let json = serde_json::to_string(&t).expect("ser");
536 let back: StageTransition = serde_json::from_str(&json).expect("de");
537 assert_eq!(t, back);
538 }
539
540 #[test]
541 fn merge_resolved_count_changes_op_id() {
542 // Two merges with the same parents but different resolved counts
543 // must hash differently — keeps structurally distinct merges from
544 // colliding on op_id.
545 let parents: Vec<OpId> = vec!["op-a".into(), "op-b".into()];
546 let one = Operation::new(OperationKind::Merge { resolved: 1 }, parents.clone());
547 let two = Operation::new(OperationKind::Merge { resolved: 2 }, parents);
548 assert_ne!(one.op_id(), two.op_id());
549 }
550
551 #[test]
552 fn existing_add_function_op_id_is_unchanged_after_merge_added() {
553 // Constructing the new Merge variant in the same enum must not
554 // perturb the canonical bytes of existing variants. The golden
555 // hash test below checks the literal value; this one verifies
556 // the property holds even after a Merge op has been built.
557 let _merge = Operation::new(
558 OperationKind::Merge { resolved: 0 },
559 ["op-x".into(), "op-y".into()],
560 );
561 let op = Operation::new(add_factorial(), []);
562 assert_eq!(
563 op.op_id(),
564 "f112990d31ef2a63f3e5ca5680637ed36a54bc7e8230510ae0c0e93fcb39d104"
565 );
566 }
567}