lex_vcs/operation.rs
1//! The `Operation` enum + `OperationRecord` (operation plus its
2//! causal parents and resulting `OpId`).
3//!
4//! See `lib.rs` for the design context and #129 for the issue.
5
6use indexmap::IndexSet;
7use serde::{Deserialize, Serialize};
8use std::collections::{BTreeMap, BTreeSet};
9
10use crate::canonical;
11
12/// Signature identity of a function or type — the part that stays
13/// stable across body edits. Wraps the same string identity
14/// `lex-store` uses; we keep it as `String` here so this crate has
15/// no dependency on `lex-store`'s internals.
16pub type SigId = String;
17
18/// Content hash of a single stage (function body, type def, ...).
19/// Same string identity as the file under `<root>/stages/<SigId>/
20/// implementations/<StageId>.ast.json`.
21pub type StageId = String;
22
23/// Identity of an operation. `(kind, payload, parents)` SHA-256 in
24/// lowercase hex (64 chars). Two operations with identical payloads
25/// and parent sets produce identical `OpId`s; the store dedupes on
26/// this.
27pub type OpId = String;
28
29/// Sorted set of effect-kind strings (e.g. `["fs_write", "io"]`).
30/// `BTreeSet` so the canonical form is order-independent for
31/// hashing.
32pub type EffectSet = BTreeSet<String>;
33
34/// Reference to an imported module — either a stdlib name
35/// (`std.io`) or a local path (`./helpers`). Kept as a string so
36/// this crate doesn't pull in `lex-syntax`'s parser.
37pub type ModuleRef = String;
38
39/// Version tag for the operation canonical form (#244).
40///
41/// The pre-image bytes hashed to derive an `OpId` are not stable
42/// across schema evolutions: adding a field to `OperationKind` or
43/// changing its serde representation rotates every existing `OpId`.
44/// This enum tags the encoding used so a long-lived store can detect
45/// mismatches and migrate explicitly via [`crate::migrate`].
46///
47/// **Today only [`Self::V1`] is in production.** Adding a future
48/// variant requires:
49///
50/// 1. A new arm in [`Operation::canonical_bytes_in`].
51/// 2. An update to the canonical-form spec in [`crate::canonical`].
52/// 3. A `CHANGELOG.md` entry under `### Internal` calling out the
53/// `OpId` rotation.
54/// 4. A migration recipe via [`crate::migrate::plan_migration`] —
55/// the mechanism is encoder-agnostic, but each new variant needs
56/// its own `canonical_bytes_in` arm.
57#[derive(
58 Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize, Default,
59)]
60#[serde(rename_all = "lowercase")]
61pub enum OperationFormat {
62 #[default]
63 V1,
64}
65
66impl OperationFormat {
67 /// The format every newly-emitted op uses today.
68 pub const CURRENT: OperationFormat = OperationFormat::V1;
69
70 /// `true` for the implicit format (V1). Used by the
71 /// `skip_serializing_if` hook on [`OperationRecord::format_version`]
72 /// so existing V1 stores keep byte-identical on-disk JSON —
73 /// adding the version field doesn't itself rotate any `OpId`.
74 pub fn is_implicit(&self) -> bool {
75 matches!(self, OperationFormat::V1)
76 }
77}
78
79/// Effect of applying an operation on a stage's content-addressed
80/// identity. Used as the `produces` field of an [`OperationRecord`]
81/// so consumers can answer "after this op, what's the head stage
82/// for this SigId?" without rerunning the apply step.
83#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
84#[serde(tag = "kind", rename_all = "snake_case")]
85pub enum StageTransition {
86 /// New SigId; produces a stage that didn't exist before.
87 Create { sig_id: SigId, stage_id: StageId },
88 /// Existing SigId; replaces its head stage.
89 Replace { sig_id: SigId, from: StageId, to: StageId },
90 /// SigId removed; no head stage afterwards.
91 Remove { sig_id: SigId, last: StageId },
92 /// SigId renamed; same body hash, different signature identity.
93 Rename { from: SigId, to: SigId, body_stage_id: StageId },
94 /// Import-only change; doesn't touch any stage.
95 ImportOnly,
96 /// Merge op result. `entries` lists only the sigs whose head
97 /// changed relative to the merge op's first parent (`dst_head`):
98 /// `Some(stage_id)` sets the head; `None` removes the sig.
99 /// Sigs unaffected by the merge are not listed.
100 ///
101 /// **Canonical-form contract:** `BTreeMap` is load-bearing —
102 /// iteration is sorted by `SigId`, so on-disk JSON for two
103 /// callers that resolved the same conflicts in different
104 /// orders produces byte-identical output. Switching to
105 /// `HashMap` here would break canonical stability of the
106 /// `OperationRecord` JSON file and is rejected by the
107 /// canonical-form spec in `crate::canonical`.
108 Merge {
109 entries: BTreeMap<SigId, Option<StageId>>,
110 },
111}
112
113/// The kinds of operations that produce stage transitions. Mirrors
114/// the initial set in #129; new kinds (`MoveBetweenFiles`,
115/// `SplitFunction`, `ExtractType`) can be added later as long as
116/// they're appended at the end of this enum or use explicit
117/// `#[serde(rename = "...")]` tags so existing `OpId`s stay stable.
118#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
119#[serde(tag = "op", rename_all = "snake_case")]
120pub enum OperationKind {
121 /// New function published. `effects` is the effect set declared
122 /// in the signature; tracked here (not just inside the stage)
123 /// so #130's write-time gate has a cheap path to check effect
124 /// changes without rehydrating the AST.
125 ///
126 /// `budget_cost` (#247) records the function's declared
127 /// `[budget(N)]` cost. Optional with `skip_serializing_if`, so
128 /// pre-#247 ops without a declared budget continue to hash to
129 /// their original `OpId` (additive serialization, same trick
130 /// `intent_id` uses). `None` means the function declared no
131 /// budget effect; `Some(n)` is the literal `n` from
132 /// `[budget(n)]`.
133 AddFunction {
134 sig_id: SigId,
135 stage_id: StageId,
136 effects: EffectSet,
137 #[serde(default, skip_serializing_if = "Option::is_none")]
138 budget_cost: Option<u64>,
139 },
140 /// Function removed; `last_stage_id` is the head before the
141 /// remove (so blame can walk the predecessor without scanning).
142 RemoveFunction {
143 sig_id: SigId,
144 last_stage_id: StageId,
145 },
146 /// Function body changed; signature unchanged.
147 ///
148 /// `from_budget` / `to_budget` (#247) record the declared
149 /// `[budget(N)]` on each side. Same `Option` + `skip` discipline
150 /// as `AddFunction.budget_cost` — pre-#247 ops keep their
151 /// `OpId`s. The pair is what `lex op log --budget-drift` reads
152 /// to surface "budget grew/shrank" diffs without rehydrating
153 /// stages.
154 ModifyBody {
155 sig_id: SigId,
156 from_stage_id: StageId,
157 to_stage_id: StageId,
158 #[serde(default, skip_serializing_if = "Option::is_none")]
159 from_budget: Option<u64>,
160 #[serde(default, skip_serializing_if = "Option::is_none")]
161 to_budget: Option<u64>,
162 },
163 /// Symbol renamed. The body hash is preserved (`body_stage_id`)
164 /// so two renames of the same body collapse to the same OpId
165 /// and `lex blame` walks the rename as a single causal event
166 /// rather than `delete + add`.
167 RenameSymbol {
168 from: SigId,
169 to: SigId,
170 body_stage_id: StageId,
171 },
172 /// Effect signature changed. Captures both old and new effect
173 /// sets so the write-time gate (#130) can verify importers
174 /// haven't silently broken.
175 ///
176 /// `from_budget` / `to_budget` (#247) capture the declared
177 /// `[budget(N)]` on each side. ChangeEffectSig usually fires
178 /// because the effect *list* changed; #247 makes budget drift
179 /// visible without forcing a full effect-set diff.
180 ChangeEffectSig {
181 sig_id: SigId,
182 from_stage_id: StageId,
183 to_stage_id: StageId,
184 from_effects: EffectSet,
185 to_effects: EffectSet,
186 #[serde(default, skip_serializing_if = "Option::is_none")]
187 from_budget: Option<u64>,
188 #[serde(default, skip_serializing_if = "Option::is_none")]
189 to_budget: Option<u64>,
190 },
191 /// Import added to a file. `in_file` is the canonical path
192 /// (relative to the repo root, forward-slashes) so two
193 /// machines hashing the same edit get the same OpId.
194 AddImport {
195 in_file: String,
196 module: ModuleRef,
197 },
198 RemoveImport {
199 in_file: String,
200 module: ModuleRef,
201 },
202 AddType {
203 sig_id: SigId,
204 stage_id: StageId,
205 },
206 RemoveType {
207 sig_id: SigId,
208 last_stage_id: StageId,
209 },
210 ModifyType {
211 sig_id: SigId,
212 from_stage_id: StageId,
213 to_stage_id: StageId,
214 },
215 /// Merge of two branch heads. Carries only an informational count
216 /// of resolved sigs so two structurally identical merges of
217 /// different sizes don't collide on op_id; the per-sig deltas live
218 /// in `OperationRecord::produces` (`StageTransition::Merge`).
219 Merge {
220 resolved: usize,
221 },
222 /// Typed transform: inlined a `let x := v; body` by
223 /// substituting `v` for every unshadowed `x` in `body`, then
224 /// replacing the entire `Let` node with the substituted body
225 /// (#280). The op records the let-binding's position and the
226 /// inlined name; the actual substituted value lives in the
227 /// content-addressed `to_stage_id` so the op_id stays compact.
228 InlineLet {
229 sig_id: SigId,
230 from_stage_id: StageId,
231 to_stage_id: StageId,
232 let_node: String,
233 binding_name: String,
234 #[serde(default, skip_serializing_if = "Option::is_none")]
235 from_budget: Option<u64>,
236 #[serde(default, skip_serializing_if = "Option::is_none")]
237 to_budget: Option<u64>,
238 },
239 /// Typed transform: renamed a `let`-bound local within a fn
240 /// body (#280). Records the old/new identifiers and the position
241 /// of the let-binding in the AST. Body-shape-stable: the renamed
242 /// stage typically hashes near the original.
243 RenameLocal {
244 sig_id: SigId,
245 from_stage_id: StageId,
246 to_stage_id: StageId,
247 /// Path-style NodeId of the `Let` expression at the time of
248 /// the transform.
249 let_node: String,
250 old_name: String,
251 new_name: String,
252 #[serde(default, skip_serializing_if = "Option::is_none")]
253 from_budget: Option<u64>,
254 #[serde(default, skip_serializing_if = "Option::is_none")]
255 to_budget: Option<u64>,
256 },
257 /// Typed transform: replaced one arm's body in a `Match`
258 /// expression (#280). Semantically a `ModifyBody`, but the op
259 /// records *which* arm changed and *where* in the AST — so the
260 /// op log reads as a semantic edit history rather than as
261 /// opaque hash-to-hash bytes.
262 ///
263 /// `match_node` is the [`lex_ast::ids::NodeId`] of the Match
264 /// expression at the time of the transform. NodeIds aren't
265 /// stable across structural edits — they're audit-trail metadata,
266 /// not re-derivation keys. The authoritative record of the new
267 /// stage is `to_stage_id` (content-addressed).
268 ///
269 /// `from_budget`/`to_budget` follow the same `skip_if_none`
270 /// discipline as [`Self::ModifyBody`]: pre-#280 ops continue
271 /// hashing to their original `OpId`s.
272 ReplaceMatchArm {
273 sig_id: SigId,
274 from_stage_id: StageId,
275 to_stage_id: StageId,
276 /// Path-style NodeId of the Match expression that was
277 /// modified, captured at transform time. See
278 /// [`lex_ast::ids::NodeId`] for the format.
279 match_node: String,
280 arm_index: usize,
281 #[serde(default, skip_serializing_if = "Option::is_none")]
282 from_budget: Option<u64>,
283 #[serde(default, skip_serializing_if = "Option::is_none")]
284 to_budget: Option<u64>,
285 },
286}
287
288impl OperationKind {
289 /// The `(SigId, Option<StageId>)` an op kind targets, as used by
290 /// `StageTransition::Merge::entries`. Used by the merge-commit
291 /// path (#134) to translate a `Resolution::Custom { op }` into
292 /// the head-map delta the merge op records:
293 ///
294 /// * Adds → `(sig, Some(stage_id))`
295 /// * Modifies → `(sig, Some(to_stage_id))`
296 /// * Removes → `(sig, None)`
297 /// * Renames → `(to_sig, Some(body_stage_id))`
298 /// * `AddImport` / `RemoveImport` / nested `Merge` → `None`
299 /// (no single sig→stage delta)
300 pub fn merge_target(&self) -> Option<(SigId, Option<StageId>)> {
301 use OperationKind::*;
302 match self {
303 AddFunction { sig_id, stage_id, .. }
304 | AddType { sig_id, stage_id }
305 => Some((sig_id.clone(), Some(stage_id.clone()))),
306 ModifyBody { sig_id, to_stage_id, .. }
307 | ChangeEffectSig { sig_id, to_stage_id, .. }
308 | ModifyType { sig_id, to_stage_id, .. }
309 | ReplaceMatchArm { sig_id, to_stage_id, .. }
310 | RenameLocal { sig_id, to_stage_id, .. }
311 | InlineLet { sig_id, to_stage_id, .. }
312 => Some((sig_id.clone(), Some(to_stage_id.clone()))),
313 RemoveFunction { sig_id, .. }
314 | RemoveType { sig_id, .. }
315 => Some((sig_id.clone(), None)),
316 RenameSymbol { to, body_stage_id, .. }
317 => Some((to.clone(), Some(body_stage_id.clone()))),
318 AddImport { .. } | RemoveImport { .. } | Merge { .. } => None,
319 }
320 }
321
322 /// `(from_budget, to_budget)` for ops that carry a budget delta
323 /// (#247). `(None, None)` for ops where the budget isn't part
324 /// of the canonical payload — `RemoveFunction`, `RenameSymbol`,
325 /// imports, and merges. `AddFunction` reports `(None,
326 /// Some(cost))` for "this is the initial cost." Used by `lex op
327 /// show`, `lex op log --budget-drift`, and `lex audit --budget`.
328 pub fn budget_delta(&self) -> (Option<u64>, Option<u64>) {
329 use OperationKind::*;
330 match self {
331 AddFunction { budget_cost, .. } => (None, *budget_cost),
332 ModifyBody { from_budget, to_budget, .. }
333 | ChangeEffectSig { from_budget, to_budget, .. }
334 | ReplaceMatchArm { from_budget, to_budget, .. }
335 | RenameLocal { from_budget, to_budget, .. }
336 | InlineLet { from_budget, to_budget, .. } => (*from_budget, *to_budget),
337 _ => (None, None),
338 }
339 }
340
341 /// The `SigId` an op touches if it carries a budget — used for
342 /// per-sig audit rollups in `lex audit --budget`. Returns `None`
343 /// for ops without a relevant budget (the same set as the
344 /// `_ => (None, None)` arm of [`Self::budget_delta`]).
345 pub fn budget_sig(&self) -> Option<&SigId> {
346 use OperationKind::*;
347 match self {
348 AddFunction { sig_id, .. }
349 | ModifyBody { sig_id, .. }
350 | ChangeEffectSig { sig_id, .. }
351 | ReplaceMatchArm { sig_id, .. }
352 | RenameLocal { sig_id, .. }
353 | InlineLet { sig_id, .. } => Some(sig_id),
354 _ => None,
355 }
356 }
357}
358
359/// Extract the declared `[budget(N)]` integer from an [`EffectSet`],
360/// if any (#247).
361///
362/// Effect labels in [`EffectSet`] are produced by
363/// [`crate::compute_diff::effect_label`]: a `[budget(50)]`
364/// declaration becomes the literal string `"budget(50)"`. This
365/// helper parses that literal back to the integer; bare `"budget"`
366/// (no arg) returns `None` because the magnitude is unknown. A
367/// stage with multiple budget declarations — which the type-
368/// checker should reject anyway — picks the smallest, conservative
369/// answer for `lex audit --budget`.
370pub fn budget_from_effects(effects: &EffectSet) -> Option<u64> {
371 let mut min_cost: Option<u64> = None;
372 for label in effects {
373 let Some(rest) = label.strip_prefix("budget(") else { continue };
374 let Some(inner) = rest.strip_suffix(')') else { continue };
375 let Ok(n) = inner.parse::<u64>() else { continue };
376 min_cost = Some(min_cost.map(|c| c.min(n)).unwrap_or(n));
377 }
378 min_cost
379}
380
381/// The operation as a whole — its kind and the causal predecessors
382/// it assumes. The `OpId` is computed from this plus a sorted view
383/// of `parents`.
384///
385/// Operations without parents are valid and represent "applies to
386/// the empty repository" or "applies to the synthetic genesis
387/// state." `lex store migrate v1→v2` will produce parentless ops
388/// for stages it can't trace back to a clear predecessor.
389#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
390pub struct Operation {
391 #[serde(flatten)]
392 pub kind: OperationKind,
393 /// Operations whose `produces` this op assumes. Sorted before
394 /// hashing for canonical form. Empty for ops against the empty
395 /// repo.
396 #[serde(default, skip_serializing_if = "Vec::is_empty")]
397 pub parents: Vec<OpId>,
398 /// The intent that caused this op, if known. Optional because
399 /// operations produced outside an agent harness (e.g. a human
400 /// running `lex publish` directly) don't have one.
401 ///
402 /// Including the intent in the canonical hash means the same
403 /// logical change made under different intents produces
404 /// different `OpId`s — causally distinct events should hash
405 /// distinctly. Ops with `intent_id: None` keep their existing
406 /// hashes (the field is omitted from the canonical JSON via
407 /// `skip_serializing_if`), so this is backwards-compatible
408 /// for stores written before #131.
409 #[serde(default, skip_serializing_if = "Option::is_none")]
410 pub intent_id: Option<crate::intent::IntentId>,
411}
412
413impl Operation {
414 /// Construct an operation against zero or more parents. Caller
415 /// supplies parents in any order; canonicalization sorts them
416 /// before hashing.
417 pub fn new(kind: OperationKind, parents: impl IntoIterator<Item = OpId>) -> Self {
418 let mut parents: Vec<OpId> = parents.into_iter().collect();
419 parents.sort();
420 parents.dedup();
421 Self { kind, parents, intent_id: None }
422 }
423
424 /// Tag this operation with the intent that produced it. The
425 /// builder shape keeps existing call sites untouched; agent
426 /// harnesses that record intent call this once before
427 /// applying the op.
428 pub fn with_intent(mut self, intent_id: impl Into<crate::intent::IntentId>) -> Self {
429 self.intent_id = Some(intent_id.into());
430 self
431 }
432
433 /// Compute this operation's content-addressed identity under the
434 /// current production canonical form ([`OperationFormat::CURRENT`]).
435 ///
436 /// Stable across runs and machines: same `(kind, payload,
437 /// sorted parents, intent_id)` produces the same `OpId`. The
438 /// invariant #129's automatic-dedup behavior relies on.
439 pub fn op_id(&self) -> OpId {
440 self.op_id_in(OperationFormat::CURRENT)
441 }
442
443 /// Compute the `OpId` under a specific canonical-form version.
444 ///
445 /// Used by [`crate::migrate`] to derive new `OpId`s when porting
446 /// a store across format versions. Production code should call
447 /// [`Self::op_id`].
448 pub fn op_id_in(&self, format: OperationFormat) -> OpId {
449 canonical::hash_bytes(&self.canonical_bytes_in(format))
450 }
451
452 /// The byte sequence that gets hashed to produce [`Self::op_id`]
453 /// under the current canonical form. Equivalent to
454 /// `self.canonical_bytes_in(OperationFormat::CURRENT)`.
455 ///
456 /// Exposed (not just consumed by `op_id`) so golden tests can pin
457 /// the exact pre-image. **Not** equal to `serde_json::to_vec(&op)`
458 /// in general — the on-disk JSON skips empty `parents` and
459 /// `None` `intent_id`, while the canonical form always emits a
460 /// (sorted, deduped) `parents` array. See `canonical.rs` for the
461 /// full V1 canonical-form spec.
462 pub fn canonical_bytes(&self) -> Vec<u8> {
463 self.canonical_bytes_in(OperationFormat::CURRENT)
464 }
465
466 /// The pre-image hashed under a specific canonical-form version.
467 ///
468 /// Today every `OperationFormat` variant routes to V1's encoder
469 /// (only V1 exists in production). When V2 lands, this match
470 /// gains an arm and the migration tool's encoder closure routes
471 /// here.
472 pub fn canonical_bytes_in(&self, format: OperationFormat) -> Vec<u8> {
473 match format {
474 OperationFormat::V1 => self.canonical_bytes_v1(),
475 }
476 }
477
478 fn canonical_bytes_v1(&self) -> Vec<u8> {
479 // Build a transient hashable view rather than hashing
480 // `self` directly so the parent ordering is canonical
481 // even if a caller hand-constructs an `Operation` with
482 // unsorted parents.
483 let canonical = CanonicalView {
484 kind: &self.kind,
485 parents: self.parents.iter().collect::<IndexSet<_>>().into_iter().collect::<BTreeSet<_>>(),
486 intent_id: self.intent_id.as_deref(),
487 };
488 serde_json::to_vec(&canonical).expect("canonical serialization")
489 }
490}
491
492/// Hashable shadow of [`Operation`] with parents in a `BTreeSet` so
493/// the serialization is order-independent regardless of how the
494/// caller constructed the live operation. Never persisted; lives
495/// only as a transient for hashing.
496#[derive(Serialize)]
497struct CanonicalView<'a> {
498 #[serde(flatten)]
499 kind: &'a OperationKind,
500 parents: BTreeSet<&'a OpId>,
501 /// `skip_serializing_if = "Option::is_none"` keeps existing
502 /// `OpId`s stable for ops without an intent — the field is
503 /// omitted from the canonical JSON entirely.
504 #[serde(skip_serializing_if = "Option::is_none")]
505 intent_id: Option<&'a str>,
506}
507
508/// An operation paired with its computed `OpId` and the resulting
509/// stage transition. This is what gets persisted under
510/// `<root>/ops/<OpId>.json`.
511///
512/// `format_version` records the canonical form the `op_id` was
513/// computed under. Pre-#244 stores didn't emit this field; reading
514/// such records deserializes to [`OperationFormat::V1`] (the
515/// implicit pre-versioning format), and writing V1 records continues
516/// to omit it (`skip_serializing_if = is_implicit`) so adding the
517/// field doesn't rotate any existing `OpId` or change any on-disk
518/// byte. Records written under a future format will explicitly
519/// carry their version tag.
520#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
521pub struct OperationRecord {
522 pub op_id: OpId,
523 #[serde(default, skip_serializing_if = "OperationFormat::is_implicit")]
524 pub format_version: OperationFormat,
525 #[serde(flatten)]
526 pub op: Operation,
527 pub produces: StageTransition,
528}
529
530impl OperationRecord {
531 pub fn new(op: Operation, produces: StageTransition) -> Self {
532 let op_id = op.op_id();
533 Self { op_id, format_version: OperationFormat::CURRENT, op, produces }
534 }
535}
536
537#[cfg(test)]
538mod tests {
539 use super::*;
540
541 fn add_factorial() -> OperationKind {
542 OperationKind::AddFunction {
543 sig_id: "fac::Int->Int".into(),
544 stage_id: "abc123".into(),
545 effects: BTreeSet::new(),
546 budget_cost: None,
547 }
548 }
549
550 #[test]
551 fn identical_operations_have_identical_op_ids() {
552 let a = Operation::new(add_factorial(), []);
553 let b = Operation::new(add_factorial(), []);
554 assert_eq!(a.op_id(), b.op_id());
555 }
556
557 #[test]
558 fn different_operations_have_different_op_ids() {
559 let a = Operation::new(add_factorial(), []);
560 let b = Operation::new(
561 OperationKind::AddFunction {
562 sig_id: "double::Int->Int".into(),
563 stage_id: "abc123".into(),
564 effects: BTreeSet::new(),
565 budget_cost: None,
566 },
567 [],
568 );
569 assert_ne!(a.op_id(), b.op_id());
570 }
571
572 #[test]
573 fn parent_set_changes_op_id() {
574 let no_parent = Operation::new(add_factorial(), []);
575 let with_parent = Operation::new(add_factorial(), ["op-parent-1".into()]);
576 assert_ne!(no_parent.op_id(), with_parent.op_id());
577 }
578
579 #[test]
580 fn parent_order_does_not_affect_op_id() {
581 let a = Operation::new(add_factorial(), ["b".into(), "a".into(), "c".into()]);
582 let b = Operation::new(add_factorial(), ["c".into(), "a".into(), "b".into()]);
583 assert_eq!(a.op_id(), b.op_id());
584 // and the stored form is sorted.
585 assert_eq!(a.parents, vec!["a".to_string(), "b".to_string(), "c".to_string()]);
586 }
587
588 #[test]
589 fn duplicate_parents_are_deduped() {
590 let with_dups = Operation::new(
591 add_factorial(),
592 ["a".into(), "a".into(), "b".into()],
593 );
594 let no_dups = Operation::new(
595 add_factorial(),
596 ["a".into(), "b".into()],
597 );
598 assert_eq!(with_dups.op_id(), no_dups.op_id());
599 assert_eq!(with_dups.parents, vec!["a".to_string(), "b".to_string()]);
600 }
601
602 #[test]
603 fn rename_with_same_body_hashes_equal_across_runs() {
604 // Two independent runs producing the same rename against the
605 // same parent should produce the same OpId — this is the
606 // automatic-dedup property #129 relies on for distributed
607 // agents.
608 let kind = OperationKind::RenameSymbol {
609 from: "parse::Str->Int".into(),
610 to: "parse_int::Str->Int".into(),
611 body_stage_id: "abc123".into(),
612 };
613 let a = Operation::new(kind.clone(), ["op-parent".into()]);
614 let b = Operation::new(kind, ["op-parent".into()]);
615 assert_eq!(a.op_id(), b.op_id());
616 }
617
618 #[test]
619 fn rename_does_not_collide_with_delete_plus_add() {
620 // The whole point of `RenameSymbol` is that it's a different
621 // OpId from the (semantically-equivalent) `RemoveFunction +
622 // AddFunction` pair. Causal history sees one event, not two.
623 let rename = Operation::new(
624 OperationKind::RenameSymbol {
625 from: "parse::Str->Int".into(),
626 to: "parse_int::Str->Int".into(),
627 body_stage_id: "abc123".into(),
628 },
629 ["op-parent".into()],
630 );
631 let remove = Operation::new(
632 OperationKind::RemoveFunction {
633 sig_id: "parse::Str->Int".into(),
634 last_stage_id: "abc123".into(),
635 },
636 ["op-parent".into()],
637 );
638 let add = Operation::new(
639 OperationKind::AddFunction {
640 sig_id: "parse_int::Str->Int".into(),
641 stage_id: "abc123".into(),
642 effects: BTreeSet::new(),
643 budget_cost: None,
644 },
645 ["op-parent".into()],
646 );
647 assert_ne!(rename.op_id(), remove.op_id());
648 assert_ne!(rename.op_id(), add.op_id());
649 }
650
651 #[test]
652 fn effect_set_order_does_not_affect_op_id() {
653 // Effects are a BTreeSet so iteration is sorted. Build two
654 // ops via different insertion orders and confirm the
655 // canonical form is identical.
656 let a_effects: EffectSet = ["io".into(), "fs_write".into()].into_iter().collect();
657 let b_effects: EffectSet = ["fs_write".into(), "io".into()].into_iter().collect();
658 let a = Operation::new(
659 OperationKind::AddFunction {
660 sig_id: "x".into(), stage_id: "s".into(), effects: a_effects,
661 budget_cost: None,
662 },
663 [],
664 );
665 let b = Operation::new(
666 OperationKind::AddFunction {
667 sig_id: "x".into(), stage_id: "s".into(), effects: b_effects,
668 budget_cost: None,
669 },
670 [],
671 );
672 assert_eq!(a.op_id(), b.op_id());
673 }
674
675 #[test]
676 fn op_id_is_64_char_lowercase_hex() {
677 let id = Operation::new(add_factorial(), []).op_id();
678 assert_eq!(id.len(), 64);
679 assert!(id.chars().all(|c| c.is_ascii_digit() || ('a'..='f').contains(&c)));
680 }
681
682 #[test]
683 fn round_trip_through_serde_json() {
684 let op = Operation::new(
685 OperationKind::ChangeEffectSig {
686 sig_id: "f".into(),
687 from_stage_id: "old".into(),
688 to_stage_id: "new".into(),
689 from_effects: BTreeSet::new(),
690 to_effects: ["io".into()].into_iter().collect(),
691 from_budget: None,
692 to_budget: None,
693 },
694 ["op-parent".into()],
695 );
696 let json = serde_json::to_string(&op).expect("serialize");
697 let back: Operation = serde_json::from_str(&json).expect("deserialize");
698 assert_eq!(op, back);
699 assert_eq!(op.op_id(), back.op_id());
700 }
701
702 #[test]
703 fn operation_record_carries_op_id() {
704 let op = Operation::new(add_factorial(), []);
705 let expected = op.op_id();
706 let rec = OperationRecord::new(
707 op,
708 StageTransition::Create {
709 sig_id: "fac::Int->Int".into(),
710 stage_id: "abc123".into(),
711 },
712 );
713 assert_eq!(rec.op_id, expected);
714 }
715
716 #[test]
717 fn intent_id_is_part_of_op_id_canonical_hash() {
718 // The dedup property: same `(kind, parents, intent_id)`
719 // produces the same OpId. Different intent_ids on
720 // otherwise-identical ops produce different OpIds, so
721 // causally distinct events (different prompts) hash
722 // distinctly.
723 let no_intent = Operation::new(add_factorial(), []);
724 let with_intent_a = Operation::new(add_factorial(), [])
725 .with_intent("intent-a");
726 let with_intent_b = Operation::new(add_factorial(), [])
727 .with_intent("intent-b");
728 let with_intent_a_again = Operation::new(add_factorial(), [])
729 .with_intent("intent-a");
730
731 // No-intent op is distinct from any intent-tagged variant.
732 assert_ne!(no_intent.op_id(), with_intent_a.op_id());
733 // Different intents → different OpIds.
734 assert_ne!(with_intent_a.op_id(), with_intent_b.op_id());
735 // Same intent → same OpId (the load-bearing dedup invariant).
736 assert_eq!(with_intent_a.op_id(), with_intent_a_again.op_id());
737 }
738
739 #[test]
740 fn op_without_intent_keeps_pre_intent_op_id() {
741 // Backwards-compat invariant: an op constructed without an
742 // intent must hash to the same value as it would have
743 // before #131 added the field. The golden test below pins
744 // the exact hash; this one asserts that adding then
745 // resetting to None doesn't drift.
746 let mut op = Operation::new(add_factorial(), []);
747 let baseline = op.op_id();
748 op.intent_id = Some("transient".into());
749 let with_intent = op.op_id();
750 assert_ne!(baseline, with_intent);
751 op.intent_id = None;
752 let back = op.op_id();
753 assert_eq!(baseline, back);
754 }
755
756 /// Golden hash. If this changes, the canonical form has shifted
757 /// and *every* op_id in every existing store has changed too —
758 /// that's a major-version event for the data model and should
759 /// be a deliberate decision, not an accident from reordering
760 /// fields. Update with care.
761 #[test]
762 fn canonical_form_is_stable_for_a_known_input() {
763 let op = Operation::new(
764 OperationKind::AddFunction {
765 sig_id: "fac::Int->Int".into(),
766 stage_id: "abc123".into(),
767 effects: BTreeSet::new(),
768 budget_cost: None,
769 },
770 [],
771 );
772 assert_eq!(
773 op.op_id(),
774 "f112990d31ef2a63f3e5ca5680637ed36a54bc7e8230510ae0c0e93fcb39d104"
775 );
776 }
777
778 #[test]
779 fn merge_kind_round_trips() {
780 let op = Operation::new(
781 OperationKind::Merge { resolved: 3 },
782 ["op-a".into(), "op-b".into()],
783 );
784 let json = serde_json::to_string(&op).expect("ser");
785 let back: Operation = serde_json::from_str(&json).expect("de");
786 assert_eq!(op, back);
787 assert_eq!(op.op_id(), back.op_id());
788 }
789
790 #[test]
791 fn merge_stage_transition_round_trips() {
792 let mut entries = BTreeMap::new();
793 entries.insert("sig-a".to_string(), Some("stage-a".to_string()));
794 entries.insert("sig-b".to_string(), None); // removed by merge
795 let t = StageTransition::Merge { entries };
796 let json = serde_json::to_string(&t).expect("ser");
797 let back: StageTransition = serde_json::from_str(&json).expect("de");
798 assert_eq!(t, back);
799 }
800
801 #[test]
802 fn merge_resolved_count_changes_op_id() {
803 // Two merges with the same parents but different resolved counts
804 // must hash differently — keeps structurally distinct merges from
805 // colliding on op_id.
806 let parents: Vec<OpId> = vec!["op-a".into(), "op-b".into()];
807 let one = Operation::new(OperationKind::Merge { resolved: 1 }, parents.clone());
808 let two = Operation::new(OperationKind::Merge { resolved: 2 }, parents);
809 assert_ne!(one.op_id(), two.op_id());
810 }
811
812 #[test]
813 fn existing_add_function_op_id_is_unchanged_after_merge_added() {
814 // Constructing the new Merge variant in the same enum must not
815 // perturb the canonical bytes of existing variants. The golden
816 // hash test below checks the literal value; this one verifies
817 // the property holds even after a Merge op has been built.
818 let _merge = Operation::new(
819 OperationKind::Merge { resolved: 0 },
820 ["op-x".into(), "op-y".into()],
821 );
822 let op = Operation::new(add_factorial(), []);
823 assert_eq!(
824 op.op_id(),
825 "f112990d31ef2a63f3e5ca5680637ed36a54bc7e8230510ae0c0e93fcb39d104"
826 );
827 }
828}