lex_vcs/operation.rs
1//! The `Operation` enum + `OperationRecord` (operation plus its
2//! causal parents and resulting `OpId`).
3//!
4//! See `lib.rs` for the design context and #129 for the issue.
5
6use indexmap::IndexSet;
7use serde::{Deserialize, Serialize};
8use std::collections::{BTreeMap, BTreeSet};
9
10use crate::canonical;
11
12/// Signature identity of a function or type — the part that stays
13/// stable across body edits. Wraps the same string identity
14/// `lex-store` uses; we keep it as `String` here so this crate has
15/// no dependency on `lex-store`'s internals.
16pub type SigId = String;
17
18/// Content hash of a single stage (function body, type def, ...).
19/// Same string identity as the file under `<root>/stages/<SigId>/
20/// implementations/<StageId>.ast.json`.
21pub type StageId = String;
22
23/// Identity of an operation. `(kind, payload, parents)` SHA-256 in
24/// lowercase hex (64 chars). Two operations with identical payloads
25/// and parent sets produce identical `OpId`s; the store dedupes on
26/// this.
27pub type OpId = String;
28
29/// Sorted set of effect-kind strings (e.g. `["fs_write", "io"]`).
30/// `BTreeSet` so the canonical form is order-independent for
31/// hashing.
32pub type EffectSet = BTreeSet<String>;
33
34/// Reference to an imported module — either a stdlib name
35/// (`std.io`) or a local path (`./helpers`). Kept as a string so
36/// this crate doesn't pull in `lex-syntax`'s parser.
37pub type ModuleRef = String;
38
39/// Version tag for the operation canonical form (#244).
40///
41/// The pre-image bytes hashed to derive an `OpId` are not stable
42/// across schema evolutions: adding a field to `OperationKind` or
43/// changing its serde representation rotates every existing `OpId`.
44/// This enum tags the encoding used so a long-lived store can detect
45/// mismatches and migrate explicitly via [`crate::migrate`].
46///
47/// **Today only [`Self::V1`] is in production.** Adding a future
48/// variant requires:
49///
50/// 1. A new arm in [`Operation::canonical_bytes_in`].
51/// 2. An update to the canonical-form spec in [`crate::canonical`].
52/// 3. A `CHANGELOG.md` entry under `### Internal` calling out the
53/// `OpId` rotation.
54/// 4. A migration recipe via [`crate::migrate::plan_migration`] —
55/// the mechanism is encoder-agnostic, but each new variant needs
56/// its own `canonical_bytes_in` arm.
57#[derive(
58 Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize, Default,
59)]
60#[serde(rename_all = "lowercase")]
61pub enum OperationFormat {
62 #[default]
63 V1,
64}
65
66impl OperationFormat {
67 /// The format every newly-emitted op uses today.
68 pub const CURRENT: OperationFormat = OperationFormat::V1;
69
70 /// `true` for the implicit format (V1). Used by the
71 /// `skip_serializing_if` hook on [`OperationRecord::format_version`]
72 /// so existing V1 stores keep byte-identical on-disk JSON —
73 /// adding the version field doesn't itself rotate any `OpId`.
74 pub fn is_implicit(&self) -> bool {
75 matches!(self, OperationFormat::V1)
76 }
77}
78
79/// Effect of applying an operation on a stage's content-addressed
80/// identity. Used as the `produces` field of an [`OperationRecord`]
81/// so consumers can answer "after this op, what's the head stage
82/// for this SigId?" without rerunning the apply step.
83#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
84#[serde(tag = "kind", rename_all = "snake_case")]
85pub enum StageTransition {
86 /// New SigId; produces a stage that didn't exist before.
87 Create { sig_id: SigId, stage_id: StageId },
88 /// Existing SigId; replaces its head stage.
89 Replace { sig_id: SigId, from: StageId, to: StageId },
90 /// SigId removed; no head stage afterwards.
91 Remove { sig_id: SigId, last: StageId },
92 /// SigId renamed; same body hash, different signature identity.
93 Rename { from: SigId, to: SigId, body_stage_id: StageId },
94 /// Import-only change; doesn't touch any stage.
95 ImportOnly,
96 /// Merge op result. `entries` lists only the sigs whose head
97 /// changed relative to the merge op's first parent (`dst_head`):
98 /// `Some(stage_id)` sets the head; `None` removes the sig.
99 /// Sigs unaffected by the merge are not listed.
100 ///
101 /// **Canonical-form contract:** `BTreeMap` is load-bearing —
102 /// iteration is sorted by `SigId`, so on-disk JSON for two
103 /// callers that resolved the same conflicts in different
104 /// orders produces byte-identical output. Switching to
105 /// `HashMap` here would break canonical stability of the
106 /// `OperationRecord` JSON file and is rejected by the
107 /// canonical-form spec in `crate::canonical`.
108 Merge {
109 entries: BTreeMap<SigId, Option<StageId>>,
110 },
111}
112
113/// The kinds of operations that produce stage transitions. Mirrors
114/// the initial set in #129; new kinds (`MoveBetweenFiles`,
115/// `SplitFunction`, `ExtractType`) can be added later as long as
116/// they're appended at the end of this enum or use explicit
117/// `#[serde(rename = "...")]` tags so existing `OpId`s stay stable.
118#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
119#[serde(tag = "op", rename_all = "snake_case")]
120pub enum OperationKind {
121 /// New function published. `effects` is the effect set declared
122 /// in the signature; tracked here (not just inside the stage)
123 /// so #130's write-time gate has a cheap path to check effect
124 /// changes without rehydrating the AST.
125 ///
126 /// `budget_cost` (#247) records the function's declared
127 /// `[budget(N)]` cost. Optional with `skip_serializing_if`, so
128 /// pre-#247 ops without a declared budget continue to hash to
129 /// their original `OpId` (additive serialization, same trick
130 /// `intent_id` uses). `None` means the function declared no
131 /// budget effect; `Some(n)` is the literal `n` from
132 /// `[budget(n)]`.
133 AddFunction {
134 sig_id: SigId,
135 stage_id: StageId,
136 effects: EffectSet,
137 #[serde(default, skip_serializing_if = "Option::is_none")]
138 budget_cost: Option<u64>,
139 },
140 /// Function removed; `last_stage_id` is the head before the
141 /// remove (so blame can walk the predecessor without scanning).
142 RemoveFunction {
143 sig_id: SigId,
144 last_stage_id: StageId,
145 },
146 /// Function body changed; signature unchanged.
147 ///
148 /// `from_budget` / `to_budget` (#247) record the declared
149 /// `[budget(N)]` on each side. Same `Option` + `skip` discipline
150 /// as `AddFunction.budget_cost` — pre-#247 ops keep their
151 /// `OpId`s. The pair is what `lex op log --budget-drift` reads
152 /// to surface "budget grew/shrank" diffs without rehydrating
153 /// stages.
154 ModifyBody {
155 sig_id: SigId,
156 from_stage_id: StageId,
157 to_stage_id: StageId,
158 #[serde(default, skip_serializing_if = "Option::is_none")]
159 from_budget: Option<u64>,
160 #[serde(default, skip_serializing_if = "Option::is_none")]
161 to_budget: Option<u64>,
162 },
163 /// Symbol renamed. The body hash is preserved (`body_stage_id`)
164 /// so two renames of the same body collapse to the same OpId
165 /// and `lex blame` walks the rename as a single causal event
166 /// rather than `delete + add`.
167 RenameSymbol {
168 from: SigId,
169 to: SigId,
170 body_stage_id: StageId,
171 },
172 /// Effect signature changed. Captures both old and new effect
173 /// sets so the write-time gate (#130) can verify importers
174 /// haven't silently broken.
175 ///
176 /// `from_budget` / `to_budget` (#247) capture the declared
177 /// `[budget(N)]` on each side. ChangeEffectSig usually fires
178 /// because the effect *list* changed; #247 makes budget drift
179 /// visible without forcing a full effect-set diff.
180 ChangeEffectSig {
181 sig_id: SigId,
182 from_stage_id: StageId,
183 to_stage_id: StageId,
184 from_effects: EffectSet,
185 to_effects: EffectSet,
186 #[serde(default, skip_serializing_if = "Option::is_none")]
187 from_budget: Option<u64>,
188 #[serde(default, skip_serializing_if = "Option::is_none")]
189 to_budget: Option<u64>,
190 },
191 /// Import added to a file. `in_file` is the canonical path
192 /// (relative to the repo root, forward-slashes) so two
193 /// machines hashing the same edit get the same OpId.
194 AddImport {
195 in_file: String,
196 module: ModuleRef,
197 },
198 RemoveImport {
199 in_file: String,
200 module: ModuleRef,
201 },
202 AddType {
203 sig_id: SigId,
204 stage_id: StageId,
205 },
206 RemoveType {
207 sig_id: SigId,
208 last_stage_id: StageId,
209 },
210 ModifyType {
211 sig_id: SigId,
212 from_stage_id: StageId,
213 to_stage_id: StageId,
214 },
215 /// Merge of two branch heads. Carries only an informational count
216 /// of resolved sigs so two structurally identical merges of
217 /// different sizes don't collide on op_id; the per-sig deltas live
218 /// in `OperationRecord::produces` (`StageTransition::Merge`).
219 Merge {
220 resolved: usize,
221 },
222}
223
224impl OperationKind {
225 /// The `(SigId, Option<StageId>)` an op kind targets, as used by
226 /// `StageTransition::Merge::entries`. Used by the merge-commit
227 /// path (#134) to translate a `Resolution::Custom { op }` into
228 /// the head-map delta the merge op records:
229 ///
230 /// * Adds → `(sig, Some(stage_id))`
231 /// * Modifies → `(sig, Some(to_stage_id))`
232 /// * Removes → `(sig, None)`
233 /// * Renames → `(to_sig, Some(body_stage_id))`
234 /// * `AddImport` / `RemoveImport` / nested `Merge` → `None`
235 /// (no single sig→stage delta)
236 pub fn merge_target(&self) -> Option<(SigId, Option<StageId>)> {
237 use OperationKind::*;
238 match self {
239 AddFunction { sig_id, stage_id, .. }
240 | AddType { sig_id, stage_id }
241 => Some((sig_id.clone(), Some(stage_id.clone()))),
242 ModifyBody { sig_id, to_stage_id, .. }
243 | ChangeEffectSig { sig_id, to_stage_id, .. }
244 | ModifyType { sig_id, to_stage_id, .. }
245 => Some((sig_id.clone(), Some(to_stage_id.clone()))),
246 RemoveFunction { sig_id, .. }
247 | RemoveType { sig_id, .. }
248 => Some((sig_id.clone(), None)),
249 RenameSymbol { to, body_stage_id, .. }
250 => Some((to.clone(), Some(body_stage_id.clone()))),
251 AddImport { .. } | RemoveImport { .. } | Merge { .. } => None,
252 }
253 }
254
255 /// `(from_budget, to_budget)` for ops that carry a budget delta
256 /// (#247). `(None, None)` for ops where the budget isn't part
257 /// of the canonical payload — `RemoveFunction`, `RenameSymbol`,
258 /// imports, and merges. `AddFunction` reports `(None,
259 /// Some(cost))` for "this is the initial cost." Used by `lex op
260 /// show`, `lex op log --budget-drift`, and `lex audit --budget`.
261 pub fn budget_delta(&self) -> (Option<u64>, Option<u64>) {
262 use OperationKind::*;
263 match self {
264 AddFunction { budget_cost, .. } => (None, *budget_cost),
265 ModifyBody { from_budget, to_budget, .. }
266 | ChangeEffectSig { from_budget, to_budget, .. } => (*from_budget, *to_budget),
267 _ => (None, None),
268 }
269 }
270
271 /// The `SigId` an op touches if it carries a budget — used for
272 /// per-sig audit rollups in `lex audit --budget`. Returns `None`
273 /// for ops without a relevant budget (the same set as the
274 /// `_ => (None, None)` arm of [`Self::budget_delta`]).
275 pub fn budget_sig(&self) -> Option<&SigId> {
276 use OperationKind::*;
277 match self {
278 AddFunction { sig_id, .. }
279 | ModifyBody { sig_id, .. }
280 | ChangeEffectSig { sig_id, .. } => Some(sig_id),
281 _ => None,
282 }
283 }
284}
285
286/// Extract the declared `[budget(N)]` integer from an [`EffectSet`],
287/// if any (#247).
288///
289/// Effect labels in [`EffectSet`] are produced by
290/// [`crate::compute_diff::effect_label`]: a `[budget(50)]`
291/// declaration becomes the literal string `"budget(50)"`. This
292/// helper parses that literal back to the integer; bare `"budget"`
293/// (no arg) returns `None` because the magnitude is unknown. A
294/// stage with multiple budget declarations — which the type-
295/// checker should reject anyway — picks the smallest, conservative
296/// answer for `lex audit --budget`.
297pub fn budget_from_effects(effects: &EffectSet) -> Option<u64> {
298 let mut min_cost: Option<u64> = None;
299 for label in effects {
300 let Some(rest) = label.strip_prefix("budget(") else { continue };
301 let Some(inner) = rest.strip_suffix(')') else { continue };
302 let Ok(n) = inner.parse::<u64>() else { continue };
303 min_cost = Some(min_cost.map(|c| c.min(n)).unwrap_or(n));
304 }
305 min_cost
306}
307
308/// The operation as a whole — its kind and the causal predecessors
309/// it assumes. The `OpId` is computed from this plus a sorted view
310/// of `parents`.
311///
312/// Operations without parents are valid and represent "applies to
313/// the empty repository" or "applies to the synthetic genesis
314/// state." `lex store migrate v1→v2` will produce parentless ops
315/// for stages it can't trace back to a clear predecessor.
316#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
317pub struct Operation {
318 #[serde(flatten)]
319 pub kind: OperationKind,
320 /// Operations whose `produces` this op assumes. Sorted before
321 /// hashing for canonical form. Empty for ops against the empty
322 /// repo.
323 #[serde(default, skip_serializing_if = "Vec::is_empty")]
324 pub parents: Vec<OpId>,
325 /// The intent that caused this op, if known. Optional because
326 /// operations produced outside an agent harness (e.g. a human
327 /// running `lex publish` directly) don't have one.
328 ///
329 /// Including the intent in the canonical hash means the same
330 /// logical change made under different intents produces
331 /// different `OpId`s — causally distinct events should hash
332 /// distinctly. Ops with `intent_id: None` keep their existing
333 /// hashes (the field is omitted from the canonical JSON via
334 /// `skip_serializing_if`), so this is backwards-compatible
335 /// for stores written before #131.
336 #[serde(default, skip_serializing_if = "Option::is_none")]
337 pub intent_id: Option<crate::intent::IntentId>,
338}
339
340impl Operation {
341 /// Construct an operation against zero or more parents. Caller
342 /// supplies parents in any order; canonicalization sorts them
343 /// before hashing.
344 pub fn new(kind: OperationKind, parents: impl IntoIterator<Item = OpId>) -> Self {
345 let mut parents: Vec<OpId> = parents.into_iter().collect();
346 parents.sort();
347 parents.dedup();
348 Self { kind, parents, intent_id: None }
349 }
350
351 /// Tag this operation with the intent that produced it. The
352 /// builder shape keeps existing call sites untouched; agent
353 /// harnesses that record intent call this once before
354 /// applying the op.
355 pub fn with_intent(mut self, intent_id: impl Into<crate::intent::IntentId>) -> Self {
356 self.intent_id = Some(intent_id.into());
357 self
358 }
359
360 /// Compute this operation's content-addressed identity under the
361 /// current production canonical form ([`OperationFormat::CURRENT`]).
362 ///
363 /// Stable across runs and machines: same `(kind, payload,
364 /// sorted parents, intent_id)` produces the same `OpId`. The
365 /// invariant #129's automatic-dedup behavior relies on.
366 pub fn op_id(&self) -> OpId {
367 self.op_id_in(OperationFormat::CURRENT)
368 }
369
370 /// Compute the `OpId` under a specific canonical-form version.
371 ///
372 /// Used by [`crate::migrate`] to derive new `OpId`s when porting
373 /// a store across format versions. Production code should call
374 /// [`Self::op_id`].
375 pub fn op_id_in(&self, format: OperationFormat) -> OpId {
376 canonical::hash_bytes(&self.canonical_bytes_in(format))
377 }
378
379 /// The byte sequence that gets hashed to produce [`Self::op_id`]
380 /// under the current canonical form. Equivalent to
381 /// `self.canonical_bytes_in(OperationFormat::CURRENT)`.
382 ///
383 /// Exposed (not just consumed by `op_id`) so golden tests can pin
384 /// the exact pre-image. **Not** equal to `serde_json::to_vec(&op)`
385 /// in general — the on-disk JSON skips empty `parents` and
386 /// `None` `intent_id`, while the canonical form always emits a
387 /// (sorted, deduped) `parents` array. See `canonical.rs` for the
388 /// full V1 canonical-form spec.
389 pub fn canonical_bytes(&self) -> Vec<u8> {
390 self.canonical_bytes_in(OperationFormat::CURRENT)
391 }
392
393 /// The pre-image hashed under a specific canonical-form version.
394 ///
395 /// Today every `OperationFormat` variant routes to V1's encoder
396 /// (only V1 exists in production). When V2 lands, this match
397 /// gains an arm and the migration tool's encoder closure routes
398 /// here.
399 pub fn canonical_bytes_in(&self, format: OperationFormat) -> Vec<u8> {
400 match format {
401 OperationFormat::V1 => self.canonical_bytes_v1(),
402 }
403 }
404
405 fn canonical_bytes_v1(&self) -> Vec<u8> {
406 // Build a transient hashable view rather than hashing
407 // `self` directly so the parent ordering is canonical
408 // even if a caller hand-constructs an `Operation` with
409 // unsorted parents.
410 let canonical = CanonicalView {
411 kind: &self.kind,
412 parents: self.parents.iter().collect::<IndexSet<_>>().into_iter().collect::<BTreeSet<_>>(),
413 intent_id: self.intent_id.as_deref(),
414 };
415 serde_json::to_vec(&canonical).expect("canonical serialization")
416 }
417}
418
419/// Hashable shadow of [`Operation`] with parents in a `BTreeSet` so
420/// the serialization is order-independent regardless of how the
421/// caller constructed the live operation. Never persisted; lives
422/// only as a transient for hashing.
423#[derive(Serialize)]
424struct CanonicalView<'a> {
425 #[serde(flatten)]
426 kind: &'a OperationKind,
427 parents: BTreeSet<&'a OpId>,
428 /// `skip_serializing_if = "Option::is_none"` keeps existing
429 /// `OpId`s stable for ops without an intent — the field is
430 /// omitted from the canonical JSON entirely.
431 #[serde(skip_serializing_if = "Option::is_none")]
432 intent_id: Option<&'a str>,
433}
434
435/// An operation paired with its computed `OpId` and the resulting
436/// stage transition. This is what gets persisted under
437/// `<root>/ops/<OpId>.json`.
438///
439/// `format_version` records the canonical form the `op_id` was
440/// computed under. Pre-#244 stores didn't emit this field; reading
441/// such records deserializes to [`OperationFormat::V1`] (the
442/// implicit pre-versioning format), and writing V1 records continues
443/// to omit it (`skip_serializing_if = is_implicit`) so adding the
444/// field doesn't rotate any existing `OpId` or change any on-disk
445/// byte. Records written under a future format will explicitly
446/// carry their version tag.
447#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
448pub struct OperationRecord {
449 pub op_id: OpId,
450 #[serde(default, skip_serializing_if = "OperationFormat::is_implicit")]
451 pub format_version: OperationFormat,
452 #[serde(flatten)]
453 pub op: Operation,
454 pub produces: StageTransition,
455}
456
457impl OperationRecord {
458 pub fn new(op: Operation, produces: StageTransition) -> Self {
459 let op_id = op.op_id();
460 Self { op_id, format_version: OperationFormat::CURRENT, op, produces }
461 }
462}
463
464#[cfg(test)]
465mod tests {
466 use super::*;
467
468 fn add_factorial() -> OperationKind {
469 OperationKind::AddFunction {
470 sig_id: "fac::Int->Int".into(),
471 stage_id: "abc123".into(),
472 effects: BTreeSet::new(),
473 budget_cost: None,
474 }
475 }
476
477 #[test]
478 fn identical_operations_have_identical_op_ids() {
479 let a = Operation::new(add_factorial(), []);
480 let b = Operation::new(add_factorial(), []);
481 assert_eq!(a.op_id(), b.op_id());
482 }
483
484 #[test]
485 fn different_operations_have_different_op_ids() {
486 let a = Operation::new(add_factorial(), []);
487 let b = Operation::new(
488 OperationKind::AddFunction {
489 sig_id: "double::Int->Int".into(),
490 stage_id: "abc123".into(),
491 effects: BTreeSet::new(),
492 budget_cost: None,
493 },
494 [],
495 );
496 assert_ne!(a.op_id(), b.op_id());
497 }
498
499 #[test]
500 fn parent_set_changes_op_id() {
501 let no_parent = Operation::new(add_factorial(), []);
502 let with_parent = Operation::new(add_factorial(), ["op-parent-1".into()]);
503 assert_ne!(no_parent.op_id(), with_parent.op_id());
504 }
505
506 #[test]
507 fn parent_order_does_not_affect_op_id() {
508 let a = Operation::new(add_factorial(), ["b".into(), "a".into(), "c".into()]);
509 let b = Operation::new(add_factorial(), ["c".into(), "a".into(), "b".into()]);
510 assert_eq!(a.op_id(), b.op_id());
511 // and the stored form is sorted.
512 assert_eq!(a.parents, vec!["a".to_string(), "b".to_string(), "c".to_string()]);
513 }
514
515 #[test]
516 fn duplicate_parents_are_deduped() {
517 let with_dups = Operation::new(
518 add_factorial(),
519 ["a".into(), "a".into(), "b".into()],
520 );
521 let no_dups = Operation::new(
522 add_factorial(),
523 ["a".into(), "b".into()],
524 );
525 assert_eq!(with_dups.op_id(), no_dups.op_id());
526 assert_eq!(with_dups.parents, vec!["a".to_string(), "b".to_string()]);
527 }
528
529 #[test]
530 fn rename_with_same_body_hashes_equal_across_runs() {
531 // Two independent runs producing the same rename against the
532 // same parent should produce the same OpId — this is the
533 // automatic-dedup property #129 relies on for distributed
534 // agents.
535 let kind = OperationKind::RenameSymbol {
536 from: "parse::Str->Int".into(),
537 to: "parse_int::Str->Int".into(),
538 body_stage_id: "abc123".into(),
539 };
540 let a = Operation::new(kind.clone(), ["op-parent".into()]);
541 let b = Operation::new(kind, ["op-parent".into()]);
542 assert_eq!(a.op_id(), b.op_id());
543 }
544
545 #[test]
546 fn rename_does_not_collide_with_delete_plus_add() {
547 // The whole point of `RenameSymbol` is that it's a different
548 // OpId from the (semantically-equivalent) `RemoveFunction +
549 // AddFunction` pair. Causal history sees one event, not two.
550 let rename = Operation::new(
551 OperationKind::RenameSymbol {
552 from: "parse::Str->Int".into(),
553 to: "parse_int::Str->Int".into(),
554 body_stage_id: "abc123".into(),
555 },
556 ["op-parent".into()],
557 );
558 let remove = Operation::new(
559 OperationKind::RemoveFunction {
560 sig_id: "parse::Str->Int".into(),
561 last_stage_id: "abc123".into(),
562 },
563 ["op-parent".into()],
564 );
565 let add = Operation::new(
566 OperationKind::AddFunction {
567 sig_id: "parse_int::Str->Int".into(),
568 stage_id: "abc123".into(),
569 effects: BTreeSet::new(),
570 budget_cost: None,
571 },
572 ["op-parent".into()],
573 );
574 assert_ne!(rename.op_id(), remove.op_id());
575 assert_ne!(rename.op_id(), add.op_id());
576 }
577
578 #[test]
579 fn effect_set_order_does_not_affect_op_id() {
580 // Effects are a BTreeSet so iteration is sorted. Build two
581 // ops via different insertion orders and confirm the
582 // canonical form is identical.
583 let a_effects: EffectSet = ["io".into(), "fs_write".into()].into_iter().collect();
584 let b_effects: EffectSet = ["fs_write".into(), "io".into()].into_iter().collect();
585 let a = Operation::new(
586 OperationKind::AddFunction {
587 sig_id: "x".into(), stage_id: "s".into(), effects: a_effects,
588 budget_cost: None,
589 },
590 [],
591 );
592 let b = Operation::new(
593 OperationKind::AddFunction {
594 sig_id: "x".into(), stage_id: "s".into(), effects: b_effects,
595 budget_cost: None,
596 },
597 [],
598 );
599 assert_eq!(a.op_id(), b.op_id());
600 }
601
602 #[test]
603 fn op_id_is_64_char_lowercase_hex() {
604 let id = Operation::new(add_factorial(), []).op_id();
605 assert_eq!(id.len(), 64);
606 assert!(id.chars().all(|c| c.is_ascii_digit() || ('a'..='f').contains(&c)));
607 }
608
609 #[test]
610 fn round_trip_through_serde_json() {
611 let op = Operation::new(
612 OperationKind::ChangeEffectSig {
613 sig_id: "f".into(),
614 from_stage_id: "old".into(),
615 to_stage_id: "new".into(),
616 from_effects: BTreeSet::new(),
617 to_effects: ["io".into()].into_iter().collect(),
618 from_budget: None,
619 to_budget: None,
620 },
621 ["op-parent".into()],
622 );
623 let json = serde_json::to_string(&op).expect("serialize");
624 let back: Operation = serde_json::from_str(&json).expect("deserialize");
625 assert_eq!(op, back);
626 assert_eq!(op.op_id(), back.op_id());
627 }
628
629 #[test]
630 fn operation_record_carries_op_id() {
631 let op = Operation::new(add_factorial(), []);
632 let expected = op.op_id();
633 let rec = OperationRecord::new(
634 op,
635 StageTransition::Create {
636 sig_id: "fac::Int->Int".into(),
637 stage_id: "abc123".into(),
638 },
639 );
640 assert_eq!(rec.op_id, expected);
641 }
642
643 #[test]
644 fn intent_id_is_part_of_op_id_canonical_hash() {
645 // The dedup property: same `(kind, parents, intent_id)`
646 // produces the same OpId. Different intent_ids on
647 // otherwise-identical ops produce different OpIds, so
648 // causally distinct events (different prompts) hash
649 // distinctly.
650 let no_intent = Operation::new(add_factorial(), []);
651 let with_intent_a = Operation::new(add_factorial(), [])
652 .with_intent("intent-a");
653 let with_intent_b = Operation::new(add_factorial(), [])
654 .with_intent("intent-b");
655 let with_intent_a_again = Operation::new(add_factorial(), [])
656 .with_intent("intent-a");
657
658 // No-intent op is distinct from any intent-tagged variant.
659 assert_ne!(no_intent.op_id(), with_intent_a.op_id());
660 // Different intents → different OpIds.
661 assert_ne!(with_intent_a.op_id(), with_intent_b.op_id());
662 // Same intent → same OpId (the load-bearing dedup invariant).
663 assert_eq!(with_intent_a.op_id(), with_intent_a_again.op_id());
664 }
665
666 #[test]
667 fn op_without_intent_keeps_pre_intent_op_id() {
668 // Backwards-compat invariant: an op constructed without an
669 // intent must hash to the same value as it would have
670 // before #131 added the field. The golden test below pins
671 // the exact hash; this one asserts that adding then
672 // resetting to None doesn't drift.
673 let mut op = Operation::new(add_factorial(), []);
674 let baseline = op.op_id();
675 op.intent_id = Some("transient".into());
676 let with_intent = op.op_id();
677 assert_ne!(baseline, with_intent);
678 op.intent_id = None;
679 let back = op.op_id();
680 assert_eq!(baseline, back);
681 }
682
683 /// Golden hash. If this changes, the canonical form has shifted
684 /// and *every* op_id in every existing store has changed too —
685 /// that's a major-version event for the data model and should
686 /// be a deliberate decision, not an accident from reordering
687 /// fields. Update with care.
688 #[test]
689 fn canonical_form_is_stable_for_a_known_input() {
690 let op = Operation::new(
691 OperationKind::AddFunction {
692 sig_id: "fac::Int->Int".into(),
693 stage_id: "abc123".into(),
694 effects: BTreeSet::new(),
695 budget_cost: None,
696 },
697 [],
698 );
699 assert_eq!(
700 op.op_id(),
701 "f112990d31ef2a63f3e5ca5680637ed36a54bc7e8230510ae0c0e93fcb39d104"
702 );
703 }
704
705 #[test]
706 fn merge_kind_round_trips() {
707 let op = Operation::new(
708 OperationKind::Merge { resolved: 3 },
709 ["op-a".into(), "op-b".into()],
710 );
711 let json = serde_json::to_string(&op).expect("ser");
712 let back: Operation = serde_json::from_str(&json).expect("de");
713 assert_eq!(op, back);
714 assert_eq!(op.op_id(), back.op_id());
715 }
716
717 #[test]
718 fn merge_stage_transition_round_trips() {
719 let mut entries = BTreeMap::new();
720 entries.insert("sig-a".to_string(), Some("stage-a".to_string()));
721 entries.insert("sig-b".to_string(), None); // removed by merge
722 let t = StageTransition::Merge { entries };
723 let json = serde_json::to_string(&t).expect("ser");
724 let back: StageTransition = serde_json::from_str(&json).expect("de");
725 assert_eq!(t, back);
726 }
727
728 #[test]
729 fn merge_resolved_count_changes_op_id() {
730 // Two merges with the same parents but different resolved counts
731 // must hash differently — keeps structurally distinct merges from
732 // colliding on op_id.
733 let parents: Vec<OpId> = vec!["op-a".into(), "op-b".into()];
734 let one = Operation::new(OperationKind::Merge { resolved: 1 }, parents.clone());
735 let two = Operation::new(OperationKind::Merge { resolved: 2 }, parents);
736 assert_ne!(one.op_id(), two.op_id());
737 }
738
739 #[test]
740 fn existing_add_function_op_id_is_unchanged_after_merge_added() {
741 // Constructing the new Merge variant in the same enum must not
742 // perturb the canonical bytes of existing variants. The golden
743 // hash test below checks the literal value; this one verifies
744 // the property holds even after a Merge op has been built.
745 let _merge = Operation::new(
746 OperationKind::Merge { resolved: 0 },
747 ["op-x".into(), "op-y".into()],
748 );
749 let op = Operation::new(add_factorial(), []);
750 assert_eq!(
751 op.op_id(),
752 "f112990d31ef2a63f3e5ca5680637ed36a54bc7e8230510ae0c0e93fcb39d104"
753 );
754 }
755}