Skip to main content

objects/object/
state_core.rs

1// SPDX-License-Identifier: Apache-2.0
2//! Core state type.
3
4use chrono::{DateTime, Utc};
5use serde::{Deserialize, Serialize};
6
7use super::{Attribution, ChangeId, ContentHash, StateSignature, Status, Verification};
8
9/// A state is an immutable snapshot with rich metadata.
10///
11/// On-disk encoding is rmp-serde's positional struct format (a fixed-length
12/// tuple). This is sensitive to field order: inserting a field in the middle
13/// of the tuple breaks every pre-existing on-disk state. The invariant we
14/// keep going forward is:
15///
16/// > **New optional fields are added at the tail of the struct, below
17/// > `status`, with `#[serde(default)]`.** Mid-struct inserts are
18/// > forbidden. rmp-serde's positional deserializer tolerates missing
19/// > trailing fields when they have a `Default` impl, so tail-only growth
20/// > is forward-compatible automatically.
21///
22/// Required (non-optional) fields — `change_id`, `tree`, `parents`,
23/// `attribution`, `created_at`, `status` — must never move. Optional fields
24/// may be reordered only among themselves, and only at the tail.
25#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
26pub struct State {
27    pub change_id: ChangeId,
28    #[serde(skip)]
29    content_hash: Option<ContentHash>,
30    pub tree: ContentHash,
31    pub parents: Vec<ChangeId>,
32    pub attribution: Attribution,
33    pub intent: Option<String>,
34    pub confidence: Option<f32>,
35    pub created_at: DateTime<Utc>,
36    pub verification: Option<Verification>,
37    pub signature: Option<StateSignature>,
38    pub status: Status,
39    // --- tail-only optional fields below. Add new fields here, never above. ---
40    #[serde(default)]
41    pub provenance: Option<ContentHash>,
42    #[serde(default)]
43    pub logical_change_id: Option<ChangeId>,
44    /// Optional context tree root for code annotations.
45    #[serde(default)]
46    pub context: Option<ContentHash>,
47    /// Authoring timestamp for this state, when distinct from
48    /// `created_at`.
49    ///
50    /// `created_at` is the *committer* time — when the state object
51    /// came into being in its current form. We hash that into the
52    /// state id so re-imports of the same git history produce
53    /// deterministic Heddle hashes. But for blame display we usually
54    /// want the *author* time — when someone actually wrote the
55    /// change — which survives `git rebase`, cherry-pick, squash-
56    /// merge, and `git commit --amend`. The `bridge git ingest`
57    /// importer fills this from `git_commit.authored_at`; native
58    /// heddle commits leave it `None` and blame falls back to
59    /// `created_at`.
60    #[serde(default)]
61    pub authored_at: Option<DateTime<Utc>>,
62    /// Content hash of the state's [`RiskSignalBlob`](crate::object::RiskSignalBlob),
63    /// when present. Computed and persisted whenever risk signals fire on a
64    /// state. `None` for states from before W1 and for states where no
65    /// signals fired.
66    ///
67    /// Hash framing: a single `0` byte when `None`, `[1]` + 32-byte hash when
68    /// `Some`. Legacy states without this field deserialize as `None` and
69    /// hash byte-identical to before W1.
70    #[serde(default)]
71    pub risk_signals: Option<ContentHash>,
72    /// Content hash of the state's [`ReviewSignaturesBlob`](crate::object::ReviewSignaturesBlob),
73    /// when reviewers have signed off (read / agent-preview / agent-co-review).
74    #[serde(default)]
75    pub review_signatures: Option<ContentHash>,
76    /// Content hash of the state's [`DiscussionsBlob`](crate::object::DiscussionsBlob),
77    /// when discussions are anchored to this state.
78    #[serde(default)]
79    pub discussions: Option<ContentHash>,
80    /// Content hash of the state's [`StructuredConflict`](crate::object::StructuredConflict),
81    /// when this state captures an unresolved merge conflict as data.
82    #[serde(default)]
83    pub structured_conflicts: Option<ContentHash>,
84}
85
86impl State {
87    pub fn new(tree: ContentHash, parents: Vec<ChangeId>, attribution: Attribution) -> Self {
88        Self::new_snapshot(tree, parents, attribution)
89    }
90
91    pub fn new_snapshot(
92        tree: ContentHash,
93        parents: Vec<ChangeId>,
94        attribution: Attribution,
95    ) -> Self {
96        let change_id = ChangeId::generate();
97        Self::new_with_logical_change_id(tree, parents, attribution, change_id)
98    }
99
100    pub fn new_merge(tree: ContentHash, parents: Vec<ChangeId>, attribution: Attribution) -> Self {
101        Self::new_snapshot(tree, parents, attribution)
102    }
103
104    pub fn new_refresh_of(
105        tree: ContentHash,
106        parents: Vec<ChangeId>,
107        attribution: Attribution,
108        logical_change_id: ChangeId,
109    ) -> Self {
110        Self::new_with_logical_change_id(tree, parents, attribution, logical_change_id)
111    }
112
113    pub fn new_fork_of(
114        tree: ContentHash,
115        parents: Vec<ChangeId>,
116        attribution: Attribution,
117    ) -> Self {
118        Self::new_snapshot(tree, parents, attribution)
119    }
120
121    pub fn new_collapse_of(
122        tree: ContentHash,
123        parents: Vec<ChangeId>,
124        attribution: Attribution,
125    ) -> Self {
126        Self::new_snapshot(tree, parents, attribution)
127    }
128
129    fn new_with_logical_change_id(
130        tree: ContentHash,
131        parents: Vec<ChangeId>,
132        attribution: Attribution,
133        logical_change_id: ChangeId,
134    ) -> Self {
135        Self {
136            change_id: ChangeId::generate(),
137            logical_change_id: Some(logical_change_id),
138            content_hash: None,
139            tree,
140            parents,
141            attribution,
142            intent: None,
143            confidence: None,
144            created_at: Utc::now(),
145            verification: None,
146            signature: None,
147            provenance: None,
148            context: None,
149            authored_at: None,
150            risk_signals: None,
151            review_signatures: None,
152            discussions: None,
153            structured_conflicts: None,
154            status: Status::Draft,
155        }
156    }
157
158    pub fn with_intent(mut self, intent: impl Into<String>) -> Self {
159        self.intent = Some(intent.into());
160        self.content_hash = None;
161        self
162    }
163
164    pub fn with_confidence(mut self, confidence: f32) -> Self {
165        self.confidence = Some(confidence.clamp(0.0, 1.0));
166        self.content_hash = None;
167        self
168    }
169
170    pub fn with_verification(mut self, verification: Verification) -> Self {
171        self.verification = Some(verification);
172        self.content_hash = None;
173        self
174    }
175
176    pub fn with_signature(mut self, signature: StateSignature) -> Self {
177        self.signature = Some(signature);
178        self
179    }
180
181    pub fn with_provenance(mut self, provenance: ContentHash) -> Self {
182        self.provenance = Some(provenance);
183        self.content_hash = None;
184        self
185    }
186
187    /// Set the context tree root.
188    pub fn with_context(mut self, context: ContentHash) -> Self {
189        self.context = Some(context);
190        self.content_hash = None;
191        self
192    }
193
194    /// Attach a [`RiskSignalBlob`](crate::object::RiskSignalBlob) hash.
195    /// Render-time tick budgeting (selecting which signals to surface) is a
196    /// view over this stored data, not part of storage itself.
197    ///
198    /// **Not part of the state hash.** Risk signals are derived data computed
199    /// *about* a state from the diff against its parent; including them in
200    /// identity would make the same logical state hash differently depending
201    /// on which signals fired. That breaks every "is this the same state?"
202    /// check in the system. See `authored_at` for the same pattern.
203    pub fn with_risk_signals(mut self, risk_signals: ContentHash) -> Self {
204        self.risk_signals = Some(risk_signals);
205        self
206    }
207
208    /// Attach a [`ReviewSignaturesBlob`](crate::object::ReviewSignaturesBlob)
209    /// hash. The state's authoring [`StateSignature`] is unaffected; review
210    /// signatures live alongside it and accumulate over time.
211    ///
212    /// **Not part of the state hash.** Review signatures accumulate
213    /// post-capture; including them in identity would mean every signature
214    /// re-keys the state. See `authored_at` for the same pattern.
215    pub fn with_review_signatures(mut self, review_signatures: ContentHash) -> Self {
216        self.review_signatures = Some(review_signatures);
217        self
218    }
219
220    /// Attach a [`DiscussionsBlob`](crate::object::DiscussionsBlob) hash.
221    ///
222    /// **Not part of the state hash.** Discussions evolve independently of
223    /// the state they're anchored to — appending a turn must not change the
224    /// state's identity. See `authored_at` for the same pattern.
225    pub fn with_discussions(mut self, discussions: ContentHash) -> Self {
226        self.discussions = Some(discussions);
227        self
228    }
229
230    /// Attach a [`StructuredConflict`](crate::object::StructuredConflict) hash.
231    ///
232    /// **Not part of the state hash.** Conflict objects describe the merge's
233    /// disagreement; the state's tree and parents already encode what's being
234    /// merged. See `authored_at` for the same pattern.
235    pub fn with_structured_conflicts(mut self, structured_conflicts: ContentHash) -> Self {
236        self.structured_conflicts = Some(structured_conflicts);
237        self
238    }
239
240    /// Record the authoring timestamp separately from `created_at`.
241    /// Used by the git-ingest importer to preserve the distinction
242    /// between "when the change was originally written" (authored)
243    /// and "when this commit object came into being" (committer time,
244    /// stored in `created_at` so re-imports stay deterministic).
245    /// Native heddle commits leave this `None`; blame display then
246    /// falls back to `created_at`.
247    ///
248    /// **Not part of the state hash.** `created_at` is what hashes;
249    /// this field is purely metadata for display. A re-imported repo
250    /// that picks up updated authored timestamps will produce the
251    /// same Heddle State hashes as before.
252    pub fn with_authored_at(mut self, timestamp: DateTime<Utc>) -> Self {
253        self.authored_at = Some(timestamp);
254        // Intentionally no `content_hash = None` here — authored_at is
255        // not in the hash by design.
256        self
257    }
258
259    pub fn with_status(mut self, status: Status) -> Self {
260        self.status = status;
261        self.content_hash = None;
262        self
263    }
264
265    pub fn with_change_id(mut self, change_id: ChangeId) -> Self {
266        let previous_change_id = self.change_id;
267        self.change_id = change_id;
268        if self.logical_change_id == Some(previous_change_id) || self.logical_change_id.is_none() {
269            self.logical_change_id = Some(change_id);
270            self.content_hash = None;
271        }
272        self
273    }
274
275    pub fn with_logical_change_id(mut self, logical_change_id: ChangeId) -> Self {
276        self.logical_change_id = Some(logical_change_id);
277        self.content_hash = None;
278        self
279    }
280
281    pub fn logical_change_id(&self) -> ChangeId {
282        self.logical_change_id.unwrap_or(self.change_id)
283    }
284
285    pub fn with_timestamp(mut self, timestamp: DateTime<Utc>) -> Self {
286        self.created_at = timestamp;
287        self.content_hash = None;
288        self
289    }
290
291    pub fn compute_hash(&self) -> ContentHash {
292        let content_len = self.hash_len();
293        ContentHash::compute_typed_with_len("state", content_len, |hasher| {
294            self.update_hash(hasher);
295        })
296    }
297
298    pub fn hash(&mut self) -> ContentHash {
299        if self.content_hash.is_none() {
300            self.content_hash = Some(self.compute_hash());
301        }
302        self.content_hash.expect("hash was just computed above")
303    }
304
305    pub fn is_root(&self) -> bool {
306        self.parents.is_empty()
307    }
308
309    pub fn is_merge(&self) -> bool {
310        self.parents.len() > 1
311    }
312
313    pub fn is_agent_authored(&self) -> bool {
314        self.attribution.agent.is_some()
315    }
316
317    pub fn first_parent(&self) -> Option<&ChangeId> {
318        self.parents.first()
319    }
320
321    fn hash_len(&self) -> u64 {
322        let principal = &self.attribution.principal;
323        let mut len = 0u64;
324
325        len += 1;
326        if self.logical_change_id.is_some() {
327            len += 16;
328        }
329
330        len += self.tree.as_bytes().len() as u64;
331        len += 4;
332        len += (self.parents.len() * 16) as u64;
333
334        len += principal.name.len() as u64 + 1;
335        len += principal.email.len() as u64 + 1;
336
337        len += 1;
338        if let Some(agent) = &self.attribution.agent {
339            len += agent.provider.len() as u64 + 1;
340            len += agent.model.len() as u64 + 1;
341
342            len += 1;
343            if let Some(session_id) = &agent.session_id {
344                len += session_id.len() as u64 + 1;
345            }
346
347            len += 1;
348            if let Some(policy_id) = &agent.policy_id {
349                len += policy_id.len() as u64 + 1;
350            }
351        }
352
353        len += 1;
354        if let Some(intent) = &self.intent {
355            len += intent.len() as u64 + 1;
356        }
357
358        len += 1;
359        if self.confidence.is_some() {
360            len += 4;
361        }
362
363        len += 8;
364
365        len += 1;
366        if let Some(verification) = &self.verification {
367            len += verification.hash_len() as u64;
368        }
369
370        len += 1;
371        if self.provenance.is_some() {
372            len += 32;
373        }
374
375        len += 1;
376        if self.context.is_some() {
377            len += 32;
378        }
379
380        len += 1;
381
382        len
383    }
384
385    fn update_hash(&self, hasher: &mut blake3::Hasher) {
386        let principal = &self.attribution.principal;
387
388        if let Some(logical_change_id) = self.logical_change_id {
389            hasher.update(&[1]);
390            hasher.update(logical_change_id.as_bytes());
391        } else {
392            hasher.update(&[0]);
393        }
394
395        hasher.update(self.tree.as_bytes());
396        hasher.update(&(self.parents.len() as u32).to_le_bytes());
397        for parent in &self.parents {
398            hasher.update(parent.as_bytes());
399        }
400
401        hasher.update(principal.name.as_bytes());
402        hasher.update(&[0]);
403        hasher.update(principal.email.as_bytes());
404        hasher.update(&[0]);
405
406        if let Some(agent) = &self.attribution.agent {
407            hasher.update(&[1]);
408            hasher.update(agent.provider.as_bytes());
409            hasher.update(&[0]);
410            hasher.update(agent.model.as_bytes());
411            hasher.update(&[0]);
412            write_optional_string(hasher, &agent.session_id);
413            write_optional_string(hasher, &agent.segment_id);
414            write_optional_string(hasher, &agent.policy_id);
415        } else {
416            hasher.update(&[0]);
417        }
418
419        write_optional_string(hasher, &self.intent);
420
421        if let Some(confidence) = self.confidence {
422            hasher.update(&[1]);
423            hasher.update(&confidence.to_le_bytes());
424        } else {
425            hasher.update(&[0]);
426        }
427
428        hasher.update(&self.created_at.timestamp().to_le_bytes());
429
430        if let Some(verification) = &self.verification {
431            hasher.update(&[1]);
432            verification.update_hasher(hasher);
433        } else {
434            hasher.update(&[0]);
435        }
436
437        if let Some(provenance) = self.provenance {
438            hasher.update(&[1]);
439            hasher.update(provenance.as_bytes());
440        } else {
441            hasher.update(&[0]);
442        }
443
444        if let Some(context) = self.context {
445            hasher.update(&[1]);
446            hasher.update(context.as_bytes());
447        } else {
448            hasher.update(&[0]);
449        }
450
451        hasher.update(&[self.status.to_byte()]);
452    }
453}
454
455fn write_optional_string(hasher: &mut blake3::Hasher, value: &Option<String>) {
456    match value {
457        Some(value) => {
458            hasher.update(&[1]);
459            hasher.update(value.as_bytes());
460            hasher.update(&[0]);
461        }
462        None => {
463            hasher.update(&[0]);
464        }
465    }
466}
467
468#[cfg(test)]
469mod tests {
470    use super::*;
471    use crate::object::Principal;
472
473    fn sample_attribution() -> Attribution {
474        Attribution::human(Principal::new("Alice", "alice@example.com"))
475    }
476
477    #[test]
478    fn new_snapshot_sets_fresh_logical_identity() {
479        let state =
480            State::new_snapshot(ContentHash::compute(b"tree"), vec![], sample_attribution());
481        let logical_change_id = state
482            .logical_change_id
483            .expect("snapshot should set logical identity");
484        assert_ne!(state.logical_change_id(), state.change_id);
485        assert_eq!(state.logical_change_id(), logical_change_id);
486    }
487
488    #[test]
489    fn new_refresh_preserves_explicit_logical_identity() {
490        let logical_change_id = ChangeId::from_bytes([7; 16]);
491        let state = State::new_refresh_of(
492            ContentHash::compute(b"tree"),
493            vec![],
494            sample_attribution(),
495            logical_change_id,
496        );
497        assert_eq!(state.logical_change_id(), logical_change_id);
498        assert_ne!(state.change_id, logical_change_id);
499    }
500
501    #[test]
502    fn new_merge_uses_fresh_logical_identity() {
503        let state = State::new_merge(
504            ContentHash::compute(b"tree"),
505            vec![ChangeId::from_bytes([1; 16]), ChangeId::from_bytes([2; 16])],
506            sample_attribution(),
507        );
508        let logical_change_id = state
509            .logical_change_id
510            .expect("merge should set logical identity");
511        assert_ne!(state.logical_change_id(), state.change_id);
512        assert_eq!(state.logical_change_id(), logical_change_id);
513        assert!(state.is_merge());
514    }
515
516    #[test]
517    fn with_change_id_invalidates_cached_hash_when_logical_identity_changes() {
518        let mut state =
519            State::new_snapshot(ContentHash::compute(b"tree"), vec![], sample_attribution());
520        let previous_change_id = state.change_id;
521        state = state.with_logical_change_id(previous_change_id);
522        let original_hash = state.hash();
523        let replacement = ChangeId::from_bytes([9; 16]);
524
525        let mut updated = state.with_change_id(replacement);
526
527        assert_eq!(updated.logical_change_id(), replacement);
528        assert_ne!(updated.hash(), original_hash);
529        assert_eq!(updated.hash(), updated.compute_hash());
530    }
531
532    #[test]
533    fn agent_segment_is_part_of_state_hash() {
534        let principal = Principal::new("Alice", "alice@example.com");
535        let attribution_a = Attribution::with_agent(
536            principal.clone(),
537            crate::object::Agent::new("openai", "gpt-5").with_session("sess-1", "seg-1"),
538        );
539        let attribution_b = Attribution::with_agent(
540            principal,
541            crate::object::Agent::new("openai", "gpt-5").with_session("sess-1", "seg-2"),
542        );
543        let tree = ContentHash::compute(b"tree");
544        let timestamp = Utc::now();
545        let logical_change_id = ChangeId::from_bytes([3; 16]);
546        let state_a = State::new_snapshot(tree, vec![], attribution_a)
547            .with_logical_change_id(logical_change_id)
548            .with_timestamp(timestamp);
549        let state_b = State::new_snapshot(tree, vec![], attribution_b)
550            .with_logical_change_id(logical_change_id)
551            .with_timestamp(timestamp);
552
553        assert_ne!(state_a.compute_hash(), state_b.compute_hash());
554    }
555
556    fn sample_state() -> State {
557        State::new_snapshot(ContentHash::compute(b"tree"), vec![], sample_attribution())
558    }
559
560    fn assert_mutator_invalidates_cached_hash(
561        mut state: State,
562        mutate: impl FnOnce(State) -> State,
563    ) {
564        let original_hash = state.hash();
565        let mut updated = mutate(state);
566        assert_ne!(updated.hash(), original_hash);
567        assert_eq!(updated.hash(), updated.compute_hash());
568    }
569
570    #[test]
571    fn with_intent_invalidates_cached_hash() {
572        assert_mutator_invalidates_cached_hash(sample_state(), |state| {
573            state.with_intent("capture intent")
574        });
575    }
576
577    #[test]
578    fn with_confidence_invalidates_cached_hash() {
579        assert_mutator_invalidates_cached_hash(sample_state(), |state| state.with_confidence(0.9));
580    }
581
582    #[test]
583    fn with_verification_invalidates_cached_hash() {
584        assert_mutator_invalidates_cached_hash(sample_state(), |state| {
585            state.with_verification(Verification::new().with_tests_passed(true))
586        });
587    }
588
589    #[test]
590    fn with_status_invalidates_cached_hash() {
591        assert_mutator_invalidates_cached_hash(sample_state(), |state| {
592            state.with_status(Status::Published)
593        });
594    }
595
596    #[test]
597    fn with_timestamp_invalidates_cached_hash() {
598        assert_mutator_invalidates_cached_hash(sample_state(), |state| {
599            state.with_timestamp(Utc::now() + chrono::Duration::seconds(1))
600        });
601    }
602
603    /// Locks the contract that W1 tail-append fields (risk_signals,
604    /// review_signatures, discussions, structured_conflicts) are NOT
605    /// part of the state hash. Adding them to identity would mean the
606    /// same logical state hashes differently depending on what signals
607    /// fired, what review signatures arrived, or whether a discussion
608    /// was anchored — which would break every "same state?" check in
609    /// the system. Their persistence is independent of identity.
610    #[test]
611    fn w1_tail_fields_are_not_part_of_state_hash() {
612        let mut bare = sample_state();
613        let bare_hash = bare.hash();
614
615        let mut decorated = sample_state()
616            .with_change_id(bare.change_id)
617            .with_logical_change_id(bare.logical_change_id())
618            .with_risk_signals(ContentHash::compute(b"risk-signals-blob"))
619            .with_review_signatures(ContentHash::compute(b"review-signatures-blob"))
620            .with_discussions(ContentHash::compute(b"discussions-blob"))
621            .with_structured_conflicts(ContentHash::compute(b"conflicts-blob"));
622        decorated.created_at = bare.created_at;
623
624        assert_eq!(
625            decorated.hash(),
626            bare_hash,
627            "W1 tail fields must not affect the state hash"
628        );
629    }
630}