1use std::collections::BTreeMap;
6
7use chrono::{DateTime, Utc};
8use serde::{Deserialize, Serialize};
9
10use super::{Attribution, ChangeId, ContentHash, Principal};
11
12#[derive(Clone, Copy, Debug, PartialEq, Eq, Default, Serialize, Deserialize)]
16pub enum Status {
17 #[default]
18 Draft,
19 Published,
20}
21
22impl Status {
23 pub fn to_byte(&self) -> u8 {
24 match self {
25 Status::Draft => 0,
26 Status::Published => 1,
27 }
28 }
29
30 pub fn from_byte(b: u8) -> Option<Self> {
31 match b {
32 0 => Some(Status::Draft),
33 1 => Some(Status::Published),
34 _ => None,
35 }
36 }
37}
38
39#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
43pub struct StateSignature {
44 pub algorithm: String,
45 pub public_key: String,
46 pub signature: String,
47}
48
49impl StateSignature {
50 pub fn algorithm(&self) -> &str {
51 &self.algorithm
52 }
53}
54
55#[derive(Clone, Copy, Debug, PartialEq, Eq)]
57pub enum SignatureStatus {
58 Valid,
59 Invalid,
60 Unsigned,
61}
62
63impl SignatureStatus {
64 pub fn is_valid(self) -> bool {
65 self == SignatureStatus::Valid
66 }
67
68 pub fn is_unsigned(self) -> bool {
69 self == SignatureStatus::Unsigned
70 }
71}
72
73#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
77pub struct Verification {
78 pub tests_passed: Option<bool>,
79 pub tests_failed: Option<u32>,
80 pub coverage_pct: Option<f32>,
81 pub coverage_delta: Option<f32>,
82 pub lint_warnings: Option<u32>,
83 #[serde(default)]
84 pub custom: BTreeMap<String, serde_json::Value>,
85}
86
87impl Verification {
88 pub fn new() -> Self {
89 Self::default()
90 }
91
92 pub fn with_tests_passed(mut self, passed: bool) -> Self {
93 self.tests_passed = Some(passed);
94 self
95 }
96
97 pub fn with_tests_failed(mut self, failed: u32) -> Self {
98 self.tests_failed = Some(failed);
99 self
100 }
101
102 pub fn is_empty(&self) -> bool {
103 self.tests_passed.is_none()
104 && self.tests_failed.is_none()
105 && self.coverage_pct.is_none()
106 && self.coverage_delta.is_none()
107 && self.lint_warnings.is_none()
108 && self.custom.is_empty()
109 }
110
111 pub(crate) fn hash_len(&self) -> usize {
112 let mut len = 0;
113 len += 1 + self.tests_passed.map(|_| 1).unwrap_or(0);
114 len += 1 + self.tests_failed.map(|_| 4).unwrap_or(0);
115 len += 1 + self.coverage_pct.map(|_| 4).unwrap_or(0);
116 len += 1 + self.coverage_delta.map(|_| 4).unwrap_or(0);
117 len += 1 + self.lint_warnings.map(|_| 4).unwrap_or(0);
118 len += 4;
119 for (key, value) in &self.custom {
120 let value_bytes = serde_json::to_vec(value).unwrap_or_default();
121 len += 4 + key.len();
122 len += 4 + value_bytes.len();
123 }
124 len
125 }
126
127 pub(crate) fn update_hasher(&self, hasher: &mut blake3::Hasher) {
128 let tests_passed = self.tests_passed.map(u8::from);
129 write_optional_u8(hasher, tests_passed);
130 write_optional_u32(hasher, self.tests_failed);
131 write_optional_f32(hasher, self.coverage_pct);
132 write_optional_f32(hasher, self.coverage_delta);
133 write_optional_u32(hasher, self.lint_warnings);
134 let custom_len = self.custom.len() as u32;
135 hasher.update(&custom_len.to_le_bytes());
136 for (key, value) in &self.custom {
137 let key_bytes = key.as_bytes();
138 let value_bytes = serde_json::to_vec(value).unwrap_or_default();
139 hasher.update(&(key_bytes.len() as u32).to_le_bytes());
140 hasher.update(key_bytes);
141 hasher.update(&(value_bytes.len() as u32).to_le_bytes());
142 hasher.update(&value_bytes);
143 }
144 }
145}
146
147fn write_optional_u8(hasher: &mut blake3::Hasher, value: Option<u8>) {
148 match value {
149 Some(v) => {
150 hasher.update(&[1]);
151 hasher.update(&[v]);
152 }
153 None => {
154 hasher.update(&[0]);
155 }
156 }
157}
158
159fn write_optional_u32(hasher: &mut blake3::Hasher, value: Option<u32>) {
160 match value {
161 Some(v) => {
162 hasher.update(&[1]);
163 hasher.update(&v.to_le_bytes());
164 }
165 None => {
166 hasher.update(&[0]);
167 }
168 }
169}
170
171fn write_optional_f32(hasher: &mut blake3::Hasher, value: Option<f32>) {
172 match value {
173 Some(v) => {
174 hasher.update(&[1]);
175 hasher.update(&v.to_le_bytes());
176 }
177 None => {
178 hasher.update(&[0]);
179 }
180 }
181}
182
183#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
202pub struct State {
203 pub change_id: ChangeId,
204 #[serde(skip)]
205 content_hash: Option<ContentHash>,
206 pub tree: ContentHash,
207 pub parents: Vec<ChangeId>,
208 pub attribution: Attribution,
209 pub intent: Option<String>,
210 pub confidence: Option<f32>,
211 pub created_at: DateTime<Utc>,
212 pub verification: Option<Verification>,
213 pub signature: Option<StateSignature>,
214 pub status: Status,
215 #[serde(default)]
217 pub provenance: Option<ContentHash>,
218 #[serde(default)]
219 pub logical_change_id: Option<ChangeId>,
220 #[serde(default)]
222 pub context: Option<ContentHash>,
223 #[serde(default)]
241 pub authored_at: Option<DateTime<Utc>>,
242 #[serde(default)]
251 pub risk_signals: Option<ContentHash>,
252 #[serde(default)]
255 pub review_signatures: Option<ContentHash>,
256 #[serde(default)]
259 pub discussions: Option<ContentHash>,
260 #[serde(default)]
263 pub structured_conflicts: Option<ContentHash>,
264 #[serde(default)]
281 pub committer: Option<Principal>,
282 #[serde(default)]
287 pub authored_tz_offset: i32,
288 #[serde(default)]
291 pub committer_tz_offset: i32,
292 #[serde(default)]
304 pub raw_message: Option<Vec<u8>>,
305 #[serde(default)]
320 pub git_lossy: bool,
321 #[serde(default)]
340 pub extra_headers: Vec<(Vec<u8>, Vec<u8>)>,
341}
342
343impl State {
344 pub fn new(tree: ContentHash, parents: Vec<ChangeId>, attribution: Attribution) -> Self {
345 Self::new_snapshot(tree, parents, attribution)
346 }
347
348 pub fn new_snapshot(
349 tree: ContentHash,
350 parents: Vec<ChangeId>,
351 attribution: Attribution,
352 ) -> Self {
353 let change_id = ChangeId::generate();
354 Self::new_with_logical_change_id(tree, parents, attribution, change_id)
355 }
356
357 pub fn new_merge(tree: ContentHash, parents: Vec<ChangeId>, attribution: Attribution) -> Self {
358 Self::new_snapshot(tree, parents, attribution)
359 }
360
361 pub fn new_refresh_of(
362 tree: ContentHash,
363 parents: Vec<ChangeId>,
364 attribution: Attribution,
365 logical_change_id: ChangeId,
366 ) -> Self {
367 Self::new_with_logical_change_id(tree, parents, attribution, logical_change_id)
368 }
369
370 pub fn new_fork_of(
371 tree: ContentHash,
372 parents: Vec<ChangeId>,
373 attribution: Attribution,
374 ) -> Self {
375 Self::new_snapshot(tree, parents, attribution)
376 }
377
378 pub fn new_collapse_of(
379 tree: ContentHash,
380 parents: Vec<ChangeId>,
381 attribution: Attribution,
382 ) -> Self {
383 Self::new_snapshot(tree, parents, attribution)
384 }
385
386 fn new_with_logical_change_id(
387 tree: ContentHash,
388 parents: Vec<ChangeId>,
389 attribution: Attribution,
390 logical_change_id: ChangeId,
391 ) -> Self {
392 Self {
393 change_id: ChangeId::generate(),
394 logical_change_id: Some(logical_change_id),
395 content_hash: None,
396 tree,
397 parents,
398 attribution,
399 intent: None,
400 confidence: None,
401 created_at: Utc::now(),
402 verification: None,
403 signature: None,
404 provenance: None,
405 context: None,
406 authored_at: None,
407 risk_signals: None,
408 review_signatures: None,
409 discussions: None,
410 structured_conflicts: None,
411 committer: None,
412 authored_tz_offset: 0,
413 committer_tz_offset: 0,
414 raw_message: None,
415 git_lossy: false,
416 extra_headers: Vec::new(),
417 status: Status::Draft,
418 }
419 }
420
421 pub fn with_intent(mut self, intent: impl Into<String>) -> Self {
422 self.intent = Some(intent.into());
423 self.content_hash = None;
424 self
425 }
426
427 pub fn with_confidence(mut self, confidence: f32) -> Self {
428 self.confidence = Some(confidence.clamp(0.0, 1.0));
429 self.content_hash = None;
430 self
431 }
432
433 pub fn with_verification(mut self, verification: Verification) -> Self {
434 self.verification = Some(verification);
435 self.content_hash = None;
436 self
437 }
438
439 pub fn with_signature(mut self, signature: StateSignature) -> Self {
440 self.signature = Some(signature);
441 self
442 }
443
444 pub fn with_provenance(mut self, provenance: ContentHash) -> Self {
445 self.provenance = Some(provenance);
446 self.content_hash = None;
447 self
448 }
449
450 pub fn with_context(mut self, context: ContentHash) -> Self {
452 self.context = Some(context);
453 self.content_hash = None;
454 self
455 }
456
457 pub fn with_risk_signals(mut self, risk_signals: ContentHash) -> Self {
467 self.risk_signals = Some(risk_signals);
468 self
469 }
470
471 pub fn with_review_signatures(mut self, review_signatures: ContentHash) -> Self {
479 self.review_signatures = Some(review_signatures);
480 self
481 }
482
483 pub fn with_discussions(mut self, discussions: ContentHash) -> Self {
489 self.discussions = Some(discussions);
490 self
491 }
492
493 pub fn with_structured_conflicts(mut self, structured_conflicts: ContentHash) -> Self {
499 self.structured_conflicts = Some(structured_conflicts);
500 self
501 }
502
503 pub fn with_authored_at(mut self, timestamp: DateTime<Utc>) -> Self {
514 self.authored_at = Some(timestamp);
515 self.content_hash = None;
516 self
517 }
518
519 pub fn with_committer(mut self, committer: Principal) -> Self {
524 self.committer = Some(committer);
525 self.content_hash = None;
526 self
527 }
528
529 pub fn with_tz_offsets(mut self, authored: i32, committer: i32) -> Self {
532 self.authored_tz_offset = authored;
533 self.committer_tz_offset = committer;
534 self.content_hash = None;
535 self
536 }
537
538 pub fn with_raw_message(mut self, raw_message: impl AsRef<[u8]>) -> Self {
542 self.raw_message = Some(raw_message.as_ref().to_vec());
543 self.content_hash = None;
544 self
545 }
546
547 pub fn with_git_lossy(mut self, git_lossy: bool) -> Self {
553 self.git_lossy = git_lossy;
554 self.content_hash = None;
555 self
556 }
557
558 pub fn with_extra_headers(mut self, extra_headers: Vec<(Vec<u8>, Vec<u8>)>) -> Self {
562 self.extra_headers = extra_headers;
563 self.content_hash = None;
564 self
565 }
566
567 pub fn with_status(mut self, status: Status) -> Self {
568 self.status = status;
569 self.content_hash = None;
570 self
571 }
572
573 pub fn with_change_id(mut self, change_id: ChangeId) -> Self {
574 let previous_change_id = self.change_id;
575 self.change_id = change_id;
576 if self.logical_change_id == Some(previous_change_id) || self.logical_change_id.is_none() {
577 self.logical_change_id = Some(change_id);
578 self.content_hash = None;
579 }
580 self
581 }
582
583 pub fn with_logical_change_id(mut self, logical_change_id: ChangeId) -> Self {
584 self.logical_change_id = Some(logical_change_id);
585 self.content_hash = None;
586 self
587 }
588
589 pub fn logical_change_id(&self) -> ChangeId {
590 self.logical_change_id.unwrap_or(self.change_id)
591 }
592
593 pub fn with_timestamp(mut self, timestamp: DateTime<Utc>) -> Self {
594 self.created_at = timestamp;
595 self.content_hash = None;
596 self
597 }
598
599 pub fn compute_hash(&self) -> ContentHash {
600 let content_len = self.hash_len();
601 ContentHash::compute_typed_with_len("state", content_len, |hasher| {
602 self.update_hash(hasher);
603 })
604 }
605
606 pub fn compute_hash_pre_fidelity(&self) -> ContentHash {
620 let content_len = self.hash_len_core();
621 ContentHash::compute_typed_with_len("state", content_len, |hasher| {
622 self.update_hash_core(hasher);
623 })
624 }
625
626 pub fn hash(&mut self) -> ContentHash {
627 if self.content_hash.is_none() {
628 self.content_hash = Some(self.compute_hash());
629 }
630 self.content_hash.expect("hash was just computed above")
631 }
632
633 pub fn is_root(&self) -> bool {
634 self.parents.is_empty()
635 }
636
637 pub fn is_merge(&self) -> bool {
638 self.parents.len() > 1
639 }
640
641 pub fn is_agent_authored(&self) -> bool {
642 self.attribution.agent.is_some()
643 }
644
645 pub fn first_parent(&self) -> Option<&ChangeId> {
646 self.parents.first()
647 }
648
649 fn hash_len(&self) -> u64 {
650 self.hash_len_core() + self.hash_len_fidelity()
651 }
652
653 fn hash_len_core(&self) -> u64 {
657 let principal = &self.attribution.principal;
658 let mut len = 0u64;
659
660 len += 1;
661 if self.logical_change_id.is_some() {
662 len += 16;
663 }
664
665 len += self.tree.as_bytes().len() as u64;
666 len += 4;
667 len += (self.parents.len() * 16) as u64;
668
669 len += principal.name.len() as u64 + 1;
670 len += principal.email.len() as u64 + 1;
671
672 len += 1;
673 if let Some(agent) = &self.attribution.agent {
674 len += agent.provider.len() as u64 + 1;
675 len += agent.model.len() as u64 + 1;
676
677 len += 1;
678 if let Some(session_id) = &agent.session_id {
679 len += session_id.len() as u64 + 1;
680 }
681
682 len += 1;
683 if let Some(policy_id) = &agent.policy_id {
684 len += policy_id.len() as u64 + 1;
685 }
686 }
687
688 len += 1;
689 if let Some(intent) = &self.intent {
690 len += intent.len() as u64 + 1;
691 }
692
693 len += 1;
694 if self.confidence.is_some() {
695 len += 4;
696 }
697
698 len += 8;
699
700 len += 1;
701 if let Some(verification) = &self.verification {
702 len += verification.hash_len() as u64;
703 }
704
705 len += 1;
706 if self.provenance.is_some() {
707 len += 32;
708 }
709
710 len += 1;
711 if self.context.is_some() {
712 len += 32;
713 }
714
715 len += 1;
716
717 len
718 }
719
720 fn hash_len_fidelity(&self) -> u64 {
724 let mut len = 0u64;
725
726 len += 1;
729 if let Some(committer) = &self.committer {
730 len += committer.name.len() as u64 + 1;
731 len += committer.email.len() as u64 + 1;
732 }
733 len += 4;
735 len += 4;
736 len += 1;
738 if self.authored_at.is_some() {
739 len += 8;
740 }
741 len += 1;
745 if let Some(raw_message) = &self.raw_message {
746 len += 4 + raw_message.len() as u64;
747 }
748 len += 4;
751 for (key, value) in &self.extra_headers {
752 len += 4 + key.len() as u64;
753 len += 4 + value.len() as u64;
754 }
755
756 len
757 }
758
759 fn update_hash(&self, hasher: &mut blake3::Hasher) {
760 self.update_hash_core(hasher);
761 self.update_hash_fidelity(hasher);
762 }
763
764 fn update_hash_core(&self, hasher: &mut blake3::Hasher) {
769 let principal = &self.attribution.principal;
770
771 if let Some(logical_change_id) = self.logical_change_id {
772 hasher.update(&[1]);
773 hasher.update(logical_change_id.as_bytes());
774 } else {
775 hasher.update(&[0]);
776 }
777
778 hasher.update(self.tree.as_bytes());
779 hasher.update(&(self.parents.len() as u32).to_le_bytes());
780 for parent in &self.parents {
781 hasher.update(parent.as_bytes());
782 }
783
784 hasher.update(principal.name.as_bytes());
785 hasher.update(&[0]);
786 hasher.update(principal.email.as_bytes());
787 hasher.update(&[0]);
788
789 if let Some(agent) = &self.attribution.agent {
790 hasher.update(&[1]);
791 hasher.update(agent.provider.as_bytes());
792 hasher.update(&[0]);
793 hasher.update(agent.model.as_bytes());
794 hasher.update(&[0]);
795 write_optional_string(hasher, &agent.session_id);
796 write_optional_string(hasher, &agent.segment_id);
797 write_optional_string(hasher, &agent.policy_id);
798 } else {
799 hasher.update(&[0]);
800 }
801
802 write_optional_string(hasher, &self.intent);
803
804 if let Some(confidence) = self.confidence {
805 hasher.update(&[1]);
806 hasher.update(&confidence.to_le_bytes());
807 } else {
808 hasher.update(&[0]);
809 }
810
811 hasher.update(&self.created_at.timestamp().to_le_bytes());
812
813 if let Some(verification) = &self.verification {
814 hasher.update(&[1]);
815 verification.update_hasher(hasher);
816 } else {
817 hasher.update(&[0]);
818 }
819
820 if let Some(provenance) = self.provenance {
821 hasher.update(&[1]);
822 hasher.update(provenance.as_bytes());
823 } else {
824 hasher.update(&[0]);
825 }
826
827 if let Some(context) = self.context {
828 hasher.update(&[1]);
829 hasher.update(context.as_bytes());
830 } else {
831 hasher.update(&[0]);
832 }
833
834 hasher.update(&[self.status.to_byte()]);
835 }
836
837 fn update_hash_fidelity(&self, hasher: &mut blake3::Hasher) {
850 if let Some(committer) = &self.committer {
851 hasher.update(&[1]);
852 hasher.update(committer.name.as_bytes());
853 hasher.update(&[0]);
854 hasher.update(committer.email.as_bytes());
855 hasher.update(&[0]);
856 } else {
857 hasher.update(&[0]);
858 }
859
860 hasher.update(&self.authored_tz_offset.to_le_bytes());
861 hasher.update(&self.committer_tz_offset.to_le_bytes());
862
863 if let Some(authored_at) = self.authored_at {
866 hasher.update(&[1]);
867 hasher.update(&authored_at.timestamp().to_le_bytes());
868 } else {
869 hasher.update(&[0]);
870 }
871
872 write_optional_bytes(hasher, &self.raw_message);
873
874 hasher.update(&(self.extra_headers.len() as u32).to_le_bytes());
876 for (key, value) in &self.extra_headers {
877 hasher.update(&(key.len() as u32).to_le_bytes());
878 hasher.update(key);
879 hasher.update(&(value.len() as u32).to_le_bytes());
880 hasher.update(value);
881 }
882 }
883}
884
885fn write_optional_bytes(hasher: &mut blake3::Hasher, value: &Option<Vec<u8>>) {
891 match value {
892 Some(bytes) => {
893 hasher.update(&[1]);
894 hasher.update(&(bytes.len() as u32).to_le_bytes());
895 hasher.update(bytes);
896 }
897 None => {
898 hasher.update(&[0]);
899 }
900 }
901}
902
903fn write_optional_string(hasher: &mut blake3::Hasher, value: &Option<String>) {
904 match value {
905 Some(value) => {
906 hasher.update(&[1]);
907 hasher.update(value.as_bytes());
908 hasher.update(&[0]);
909 }
910 None => {
911 hasher.update(&[0]);
912 }
913 }
914}
915
916pub fn parse_commit_extension_headers(commit_content: &[u8]) -> Vec<(Vec<u8>, Vec<u8>)> {
945 let header_block = match find_subslice(commit_content, b"\n\n") {
949 Some(idx) => &commit_content[..idx],
950 None => commit_content,
952 };
953
954 let mut headers: Vec<(Vec<u8>, Vec<u8>)> = Vec::new();
957 for line in header_block.split(|&b| b == b'\n') {
958 if line.first() == Some(&b' ') {
959 if let Some((_, value)) = headers.last_mut() {
962 value.push(b'\n');
963 value.extend_from_slice(&line[1..]);
964 }
965 continue;
968 }
969 let (name, value) = match line.iter().position(|&b| b == b' ') {
973 Some(sp) => (line[..sp].to_vec(), line[sp + 1..].to_vec()),
974 None => (line.to_vec(), Vec::new()),
975 };
976 headers.push((name, value));
977 }
978
979 match headers.iter().position(|(name, _)| name == b"committer") {
984 Some(idx) => headers.split_off(idx + 1),
985 None => headers
986 .into_iter()
987 .filter(|(name, _)| {
988 !matches!(
989 name.as_slice(),
990 b"tree" | b"parent" | b"author" | b"committer"
991 )
992 })
993 .collect(),
994 }
995}
996
997fn find_subslice(haystack: &[u8], needle: &[u8]) -> Option<usize> {
999 if needle.is_empty() || needle.len() > haystack.len() {
1000 return None;
1001 }
1002 haystack.windows(needle.len()).position(|w| w == needle)
1003}
1004
1005#[cfg(test)]
1006mod tests {
1007 use super::*;
1008 use crate::object::Principal;
1009
1010 fn sample_attribution() -> Attribution {
1011 Attribution::human(Principal::new("Alice", "alice@example.com"))
1012 }
1013
1014 #[test]
1015 fn new_snapshot_sets_fresh_logical_identity() {
1016 let state =
1017 State::new_snapshot(ContentHash::compute(b"tree"), vec![], sample_attribution());
1018 let logical_change_id = state
1019 .logical_change_id
1020 .expect("snapshot should set logical identity");
1021 assert_ne!(state.logical_change_id(), state.change_id);
1022 assert_eq!(state.logical_change_id(), logical_change_id);
1023 }
1024
1025 #[test]
1026 fn new_refresh_preserves_explicit_logical_identity() {
1027 let logical_change_id = ChangeId::from_bytes([7; 16]);
1028 let state = State::new_refresh_of(
1029 ContentHash::compute(b"tree"),
1030 vec![],
1031 sample_attribution(),
1032 logical_change_id,
1033 );
1034 assert_eq!(state.logical_change_id(), logical_change_id);
1035 assert_ne!(state.change_id, logical_change_id);
1036 }
1037
1038 #[test]
1039 fn new_merge_uses_fresh_logical_identity() {
1040 let state = State::new_merge(
1041 ContentHash::compute(b"tree"),
1042 vec![ChangeId::from_bytes([1; 16]), ChangeId::from_bytes([2; 16])],
1043 sample_attribution(),
1044 );
1045 let logical_change_id = state
1046 .logical_change_id
1047 .expect("merge should set logical identity");
1048 assert_ne!(state.logical_change_id(), state.change_id);
1049 assert_eq!(state.logical_change_id(), logical_change_id);
1050 assert!(state.is_merge());
1051 }
1052
1053 #[test]
1054 fn with_change_id_invalidates_cached_hash_when_logical_identity_changes() {
1055 let mut state =
1056 State::new_snapshot(ContentHash::compute(b"tree"), vec![], sample_attribution());
1057 let previous_change_id = state.change_id;
1058 state = state.with_logical_change_id(previous_change_id);
1059 let original_hash = state.hash();
1060 let replacement = ChangeId::from_bytes([9; 16]);
1061
1062 let mut updated = state.with_change_id(replacement);
1063
1064 assert_eq!(updated.logical_change_id(), replacement);
1065 assert_ne!(updated.hash(), original_hash);
1066 assert_eq!(updated.hash(), updated.compute_hash());
1067 }
1068
1069 #[test]
1070 fn agent_segment_is_part_of_state_hash() {
1071 let principal = Principal::new("Alice", "alice@example.com");
1072 let attribution_a = Attribution::with_agent(
1073 principal.clone(),
1074 crate::object::Agent::new("openai", "gpt-5").with_session("sess-1", "seg-1"),
1075 );
1076 let attribution_b = Attribution::with_agent(
1077 principal,
1078 crate::object::Agent::new("openai", "gpt-5").with_session("sess-1", "seg-2"),
1079 );
1080 let tree = ContentHash::compute(b"tree");
1081 let timestamp = Utc::now();
1082 let logical_change_id = ChangeId::from_bytes([3; 16]);
1083 let state_a = State::new_snapshot(tree, vec![], attribution_a)
1084 .with_logical_change_id(logical_change_id)
1085 .with_timestamp(timestamp);
1086 let state_b = State::new_snapshot(tree, vec![], attribution_b)
1087 .with_logical_change_id(logical_change_id)
1088 .with_timestamp(timestamp);
1089
1090 assert_ne!(state_a.compute_hash(), state_b.compute_hash());
1091 }
1092
1093 fn sample_state() -> State {
1094 State::new_snapshot(ContentHash::compute(b"tree"), vec![], sample_attribution())
1095 }
1096
1097 fn assert_mutator_invalidates_cached_hash(
1098 mut state: State,
1099 mutate: impl FnOnce(State) -> State,
1100 ) {
1101 let original_hash = state.hash();
1102 let mut updated = mutate(state);
1103 assert_ne!(updated.hash(), original_hash);
1104 assert_eq!(updated.hash(), updated.compute_hash());
1105 }
1106
1107 #[test]
1108 fn with_intent_invalidates_cached_hash() {
1109 assert_mutator_invalidates_cached_hash(sample_state(), |state| {
1110 state.with_intent("capture intent")
1111 });
1112 }
1113
1114 #[test]
1115 fn with_confidence_invalidates_cached_hash() {
1116 assert_mutator_invalidates_cached_hash(sample_state(), |state| state.with_confidence(0.9));
1117 }
1118
1119 #[test]
1120 fn with_verification_invalidates_cached_hash() {
1121 assert_mutator_invalidates_cached_hash(sample_state(), |state| {
1122 state.with_verification(Verification::new().with_tests_passed(true))
1123 });
1124 }
1125
1126 #[test]
1127 fn with_status_invalidates_cached_hash() {
1128 assert_mutator_invalidates_cached_hash(sample_state(), |state| {
1129 state.with_status(Status::Published)
1130 });
1131 }
1132
1133 #[test]
1134 fn with_timestamp_invalidates_cached_hash() {
1135 assert_mutator_invalidates_cached_hash(sample_state(), |state| {
1136 state.with_timestamp(Utc::now() + chrono::Duration::seconds(1))
1137 });
1138 }
1139
1140 #[test]
1148 fn w1_tail_fields_are_not_part_of_state_hash() {
1149 let mut bare = sample_state();
1150 let bare_hash = bare.hash();
1151
1152 let mut decorated = sample_state()
1153 .with_change_id(bare.change_id)
1154 .with_logical_change_id(bare.logical_change_id())
1155 .with_risk_signals(ContentHash::compute(b"risk-signals-blob"))
1156 .with_review_signatures(ContentHash::compute(b"review-signatures-blob"))
1157 .with_discussions(ContentHash::compute(b"discussions-blob"))
1158 .with_structured_conflicts(ContentHash::compute(b"conflicts-blob"));
1159 decorated.created_at = bare.created_at;
1160
1161 assert_eq!(
1162 decorated.hash(),
1163 bare_hash,
1164 "W1 tail fields must not affect the state hash"
1165 );
1166 }
1167
1168 #[test]
1173 fn fidelity_fields_are_part_of_state_hash() {
1174 let base = sample_state();
1175 let base_hash = base.compute_hash();
1176
1177 let with_committer = sample_state()
1178 .with_change_id(base.change_id)
1179 .with_logical_change_id(base.logical_change_id());
1180 let mut with_committer =
1181 with_committer.with_committer(Principal::new("Carol", "carol@example.com"));
1182 with_committer.created_at = base.created_at;
1183 assert_ne!(
1184 with_committer.hash(),
1185 base_hash,
1186 "committer must affect the state hash"
1187 );
1188
1189 for mutate in [
1190 |s: State| s.with_tz_offsets(3600, -7200),
1191 |s: State| s.with_authored_at(Utc::now() + chrono::Duration::seconds(1)),
1192 |s: State| s.with_raw_message("verbatim body\n"),
1193 |s: State| {
1195 s.with_extra_headers(vec![(
1196 b"gpgsig".to_vec(),
1197 b"-----BEGIN PGP SIGNATURE-----\n".to_vec(),
1198 )])
1199 },
1200 |s: State| s.with_extra_headers(vec![(b"mergetag".to_vec(), b"x".to_vec())]),
1201 ] {
1202 let seeded = sample_state()
1203 .with_change_id(base.change_id)
1204 .with_logical_change_id(base.logical_change_id());
1205 let mut decorated = mutate(seeded);
1206 decorated.created_at = base.created_at;
1207 assert_ne!(
1208 decorated.hash(),
1209 base_hash,
1210 "fidelity field must affect the state hash"
1211 );
1212 }
1213 }
1214
1215 #[test]
1216 fn pre_fidelity_hash_matches_legacy_golden_vector() {
1217 let state = State::new_snapshot(
1218 ContentHash::compute(b"issue-633-tree"),
1219 vec![ChangeId::from_bytes([0x11; 16])],
1220 Attribution::with_agent(
1221 Principal::new("Legacy Author", "legacy@example.com"),
1222 crate::object::Agent::new("openai", "gpt-5")
1223 .with_session("session-633", "segment-001")
1224 .with_policy("policy-legacy"),
1225 ),
1226 )
1227 .with_logical_change_id(ChangeId::from_bytes([0x63; 16]))
1228 .with_intent("freeze pre-565 hash")
1229 .with_confidence(0.875)
1230 .with_timestamp(DateTime::from_timestamp(1_700_000_000, 0).expect("valid timestamp"))
1231 .with_committer(Principal::new("Legacy Committer", "committer@example.com"))
1232 .with_tz_offsets(3600, -18000)
1233 .with_authored_at(DateTime::from_timestamp(1_699_999_000, 0).expect("valid timestamp"))
1234 .with_raw_message(b"legacy commit message\n")
1235 .with_extra_headers(vec![(b"encoding".to_vec(), b"UTF-8".to_vec())])
1236 .with_status(Status::Published);
1237
1238 let legacy_hash = state.compute_hash_pre_fidelity();
1239 assert_eq!(
1245 legacy_hash.to_hex(),
1246 "b89e1b40e681a1bf88679db7cfcacdafb1f370bc40ed5d50760dae1d4ab49dab",
1247 );
1248 assert_ne!(
1249 legacy_hash,
1250 state.compute_hash(),
1251 "fixture must distinguish the pre-#565 legacy path from the current hash",
1252 );
1253 }
1254
1255 #[test]
1258 fn extra_headers_order_affects_hash() {
1259 let base = sample_state();
1260 let one = sample_state()
1261 .with_change_id(base.change_id)
1262 .with_logical_change_id(base.logical_change_id());
1263 let mut one = one.with_extra_headers(vec![
1264 (b"a".to_vec(), b"1".to_vec()),
1265 (b"b".to_vec(), b"2".to_vec()),
1266 ]);
1267 one.created_at = base.created_at;
1268
1269 let two = sample_state()
1270 .with_change_id(base.change_id)
1271 .with_logical_change_id(base.logical_change_id());
1272 let mut two = two.with_extra_headers(vec![
1273 (b"b".to_vec(), b"2".to_vec()),
1274 (b"a".to_vec(), b"1".to_vec()),
1275 ]);
1276 two.created_at = base.created_at;
1277
1278 assert_ne!(one.hash(), two.hash());
1279 }
1280
1281 #[test]
1285 fn fidelity_fields_hash_is_stable() {
1286 let mut state = sample_state()
1287 .with_committer(Principal::new("Dave", "dave@example.com"))
1288 .with_tz_offsets(3600, 0)
1289 .with_authored_at(Utc::now())
1290 .with_raw_message("body\n")
1291 .with_extra_headers(vec![
1292 (b"gpgsig".to_vec(), b"sig".to_vec()),
1293 (b"k".to_vec(), b"v".to_vec()),
1294 ]);
1295 assert_eq!(state.hash(), state.compute_hash());
1296 }
1297
1298 #[test]
1303 fn non_utf8_raw_message_is_byte_preserved() {
1304 let raw = b"caf\xe9\n".to_vec();
1305 assert!(
1306 String::from_utf8(raw.clone()).is_err(),
1307 "test fixture must be invalid UTF-8 to be meaningful"
1308 );
1309 let mut state = sample_state().with_raw_message(&raw);
1310 assert_eq!(
1311 state.raw_message.as_deref(),
1312 Some(raw.as_slice()),
1313 "raw bytes preserved verbatim"
1314 );
1315 let bytes = rmp_serde::to_vec(&state).expect("serialize state");
1318 let back: State = rmp_serde::from_slice(&bytes).expect("deserialize state");
1319 assert_eq!(back.raw_message.as_deref(), Some(raw.as_slice()));
1320 let mut back = back;
1321 assert_eq!(state.hash(), back.hash());
1322 assert_eq!(back.hash(), back.compute_hash());
1323 }
1324
1325 #[test]
1329 fn raw_message_with_nul_byte_changes_hash() {
1330 let base = sample_state();
1331 let with_nul = sample_state()
1332 .with_change_id(base.change_id)
1333 .with_logical_change_id(base.logical_change_id());
1334 let mut a = with_nul.with_raw_message(b"a\x00b");
1335 a.created_at = base.created_at;
1336
1337 let other = sample_state()
1338 .with_change_id(base.change_id)
1339 .with_logical_change_id(base.logical_change_id());
1340 let mut b = other.with_raw_message(b"a\x00c");
1341 b.created_at = base.created_at;
1342
1343 assert_ne!(a.hash(), b.hash());
1344 }
1345
1346 #[test]
1354 fn parse_extension_headers_preserves_noncanonical_wire_order() {
1355 let lines: &[&[u8]] = &[
1362 b"tree 1111111111111111111111111111111111111111",
1363 b"parent 2222222222222222222222222222222222222222",
1364 b"author Alice <alice@example.com> 1700000000 +0000",
1365 b"committer Bob <bob@example.com> 1700000100 +0000",
1366 b"x-custom custom value",
1367 b"gpgsig -----BEGIN PGP SIGNATURE-----",
1368 b" sig-line-1",
1369 b" -----END PGP SIGNATURE-----",
1370 b"encoding ISO-8859-1",
1371 b"mergetag object 3333333333333333333333333333333333333333",
1372 b" type commit",
1373 b" tag sidetag",
1374 b" tagger Carol <carol@example.com> 1700000050 +0000",
1375 b" ", b" signed side tag",
1377 b"", b"the commit message",
1379 b"",
1380 ];
1381 let content = lines.join(&b'\n');
1382
1383 let headers = parse_commit_extension_headers(&content);
1384
1385 let expected: Vec<(Vec<u8>, Vec<u8>)> = vec![
1386 (b"x-custom".to_vec(), b"custom value".to_vec()),
1387 (
1388 b"gpgsig".to_vec(),
1389 b"-----BEGIN PGP SIGNATURE-----\nsig-line-1\n-----END PGP SIGNATURE-----"
1392 .to_vec(),
1393 ),
1394 (b"encoding".to_vec(), b"ISO-8859-1".to_vec()),
1395 (
1396 b"mergetag".to_vec(),
1397 b"object 3333333333333333333333333333333333333333\ntype commit\ntag sidetag\ntagger Carol <carol@example.com> 1700000050 +0000\n\nsigned side tag".to_vec(),
1400 ),
1401 ];
1402
1403 assert_eq!(headers, expected);
1404 }
1405
1406 #[test]
1410 fn parse_extension_headers_empty_when_only_core_headers() {
1411 let content: &[u8] = b"\
1412tree 1111111111111111111111111111111111111111\n\
1413author Alice <alice@example.com> 1700000000 +0000\n\
1414committer Bob <bob@example.com> 1700000100 +0000\n\
1415\n\
1416just a message\n";
1417 assert!(parse_commit_extension_headers(content).is_empty());
1418 }
1419}