1use std::collections::BTreeMap;
6
7use chrono::{DateTime, Utc};
8use serde::{Deserialize, Serialize};
9
10use super::{Attribution, ChangeId, ContentHash, Principal};
11
12#[derive(Clone, Copy, Debug, PartialEq, Eq, Default, Serialize, Deserialize)]
16pub enum Status {
17 #[default]
18 Draft,
19 Published,
20}
21
22impl Status {
23 pub fn to_byte(&self) -> u8 {
24 match self {
25 Status::Draft => 0,
26 Status::Published => 1,
27 }
28 }
29
30 pub fn from_byte(b: u8) -> Option<Self> {
31 match b {
32 0 => Some(Status::Draft),
33 1 => Some(Status::Published),
34 _ => None,
35 }
36 }
37}
38
39#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
43pub struct StateSignature {
44 pub algorithm: String,
45 pub public_key: String,
46 pub signature: String,
47}
48
49impl StateSignature {
50 pub fn algorithm(&self) -> &str {
51 &self.algorithm
52 }
53}
54
55#[derive(Clone, Copy, Debug, PartialEq, Eq)]
57pub enum SignatureStatus {
58 Valid,
59 Invalid,
60 Unsigned,
61}
62
63impl SignatureStatus {
64 pub fn is_valid(self) -> bool {
65 self == SignatureStatus::Valid
66 }
67
68 pub fn is_unsigned(self) -> bool {
69 self == SignatureStatus::Unsigned
70 }
71}
72
73#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
77pub struct Verification {
78 pub tests_passed: Option<bool>,
79 pub tests_failed: Option<u32>,
80 pub coverage_pct: Option<f32>,
81 pub coverage_delta: Option<f32>,
82 pub lint_warnings: Option<u32>,
83 #[serde(default)]
84 pub custom: BTreeMap<String, serde_json::Value>,
85}
86
87impl Verification {
88 pub fn new() -> Self {
89 Self::default()
90 }
91
92 pub fn with_tests_passed(mut self, passed: bool) -> Self {
93 self.tests_passed = Some(passed);
94 self
95 }
96
97 pub fn with_tests_failed(mut self, failed: u32) -> Self {
98 self.tests_failed = Some(failed);
99 self
100 }
101
102 pub fn is_empty(&self) -> bool {
103 self.tests_passed.is_none()
104 && self.tests_failed.is_none()
105 && self.coverage_pct.is_none()
106 && self.coverage_delta.is_none()
107 && self.lint_warnings.is_none()
108 && self.custom.is_empty()
109 }
110
111 pub(crate) fn hash_len(&self) -> usize {
112 let mut len = 0;
113 len += 1 + self.tests_passed.map(|_| 1).unwrap_or(0);
114 len += 1 + self.tests_failed.map(|_| 4).unwrap_or(0);
115 len += 1 + self.coverage_pct.map(|_| 4).unwrap_or(0);
116 len += 1 + self.coverage_delta.map(|_| 4).unwrap_or(0);
117 len += 1 + self.lint_warnings.map(|_| 4).unwrap_or(0);
118 len += 4;
119 for (key, value) in &self.custom {
120 let value_bytes = serde_json::to_vec(value).unwrap_or_default();
121 len += 4 + key.len();
122 len += 4 + value_bytes.len();
123 }
124 len
125 }
126
127 pub(crate) fn update_hasher(&self, hasher: &mut blake3::Hasher) {
128 let tests_passed = self.tests_passed.map(u8::from);
129 write_optional_u8(hasher, tests_passed);
130 write_optional_u32(hasher, self.tests_failed);
131 write_optional_f32(hasher, self.coverage_pct);
132 write_optional_f32(hasher, self.coverage_delta);
133 write_optional_u32(hasher, self.lint_warnings);
134 let custom_len = self.custom.len() as u32;
135 hasher.update(&custom_len.to_le_bytes());
136 for (key, value) in &self.custom {
137 let key_bytes = key.as_bytes();
138 let value_bytes = serde_json::to_vec(value).unwrap_or_default();
139 hasher.update(&(key_bytes.len() as u32).to_le_bytes());
140 hasher.update(key_bytes);
141 hasher.update(&(value_bytes.len() as u32).to_le_bytes());
142 hasher.update(&value_bytes);
143 }
144 }
145}
146
147fn write_optional_u8(hasher: &mut blake3::Hasher, value: Option<u8>) {
148 match value {
149 Some(v) => {
150 hasher.update(&[1]);
151 hasher.update(&[v]);
152 }
153 None => {
154 hasher.update(&[0]);
155 }
156 }
157}
158
159fn write_optional_u32(hasher: &mut blake3::Hasher, value: Option<u32>) {
160 match value {
161 Some(v) => {
162 hasher.update(&[1]);
163 hasher.update(&v.to_le_bytes());
164 }
165 None => {
166 hasher.update(&[0]);
167 }
168 }
169}
170
171fn write_optional_f32(hasher: &mut blake3::Hasher, value: Option<f32>) {
172 match value {
173 Some(v) => {
174 hasher.update(&[1]);
175 hasher.update(&v.to_le_bytes());
176 }
177 None => {
178 hasher.update(&[0]);
179 }
180 }
181}
182
183#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
202pub struct State {
203 pub change_id: ChangeId,
204 #[serde(skip)]
205 content_hash: Option<ContentHash>,
206 pub tree: ContentHash,
207 pub parents: Vec<ChangeId>,
208 pub attribution: Attribution,
209 pub intent: Option<String>,
210 pub confidence: Option<f32>,
211 pub created_at: DateTime<Utc>,
212 pub verification: Option<Verification>,
213 pub signature: Option<StateSignature>,
214 pub status: Status,
215 #[serde(default)]
217 pub provenance: Option<ContentHash>,
218 #[serde(default)]
219 pub logical_change_id: Option<ChangeId>,
220 #[serde(default)]
222 pub context: Option<ContentHash>,
223 #[serde(default)]
241 pub authored_at: Option<DateTime<Utc>>,
242 #[serde(default)]
251 pub risk_signals: Option<ContentHash>,
252 #[serde(default)]
255 pub review_signatures: Option<ContentHash>,
256 #[serde(default)]
259 pub discussions: Option<ContentHash>,
260 #[serde(default)]
263 pub structured_conflicts: Option<ContentHash>,
264 #[serde(default)]
281 pub committer: Option<Principal>,
282 #[serde(default)]
287 pub authored_tz_offset: i32,
288 #[serde(default)]
291 pub committer_tz_offset: i32,
292 #[serde(default)]
304 pub raw_message: Option<Vec<u8>>,
305 #[serde(default)]
320 pub git_lossy: bool,
321 #[serde(default)]
340 pub extra_headers: Vec<(Vec<u8>, Vec<u8>)>,
341}
342
343impl State {
344 pub fn new(tree: ContentHash, parents: Vec<ChangeId>, attribution: Attribution) -> Self {
345 Self::new_snapshot(tree, parents, attribution)
346 }
347
348 pub fn new_snapshot(
349 tree: ContentHash,
350 parents: Vec<ChangeId>,
351 attribution: Attribution,
352 ) -> Self {
353 let change_id = ChangeId::generate();
354 Self::new_with_logical_change_id(tree, parents, attribution, change_id)
355 }
356
357 pub fn new_merge(tree: ContentHash, parents: Vec<ChangeId>, attribution: Attribution) -> Self {
358 Self::new_snapshot(tree, parents, attribution)
359 }
360
361 pub fn new_refresh_of(
362 tree: ContentHash,
363 parents: Vec<ChangeId>,
364 attribution: Attribution,
365 logical_change_id: ChangeId,
366 ) -> Self {
367 Self::new_with_logical_change_id(tree, parents, attribution, logical_change_id)
368 }
369
370 pub fn new_fork_of(
371 tree: ContentHash,
372 parents: Vec<ChangeId>,
373 attribution: Attribution,
374 ) -> Self {
375 Self::new_snapshot(tree, parents, attribution)
376 }
377
378 pub fn new_collapse_of(
379 tree: ContentHash,
380 parents: Vec<ChangeId>,
381 attribution: Attribution,
382 ) -> Self {
383 Self::new_snapshot(tree, parents, attribution)
384 }
385
386 fn new_with_logical_change_id(
387 tree: ContentHash,
388 parents: Vec<ChangeId>,
389 attribution: Attribution,
390 logical_change_id: ChangeId,
391 ) -> Self {
392 Self {
393 change_id: ChangeId::generate(),
394 logical_change_id: Some(logical_change_id),
395 content_hash: None,
396 tree,
397 parents,
398 attribution,
399 intent: None,
400 confidence: None,
401 created_at: Utc::now(),
402 verification: None,
403 signature: None,
404 provenance: None,
405 context: None,
406 authored_at: None,
407 risk_signals: None,
408 review_signatures: None,
409 discussions: None,
410 structured_conflicts: None,
411 committer: None,
412 authored_tz_offset: 0,
413 committer_tz_offset: 0,
414 raw_message: None,
415 git_lossy: false,
416 extra_headers: Vec::new(),
417 status: Status::Draft,
418 }
419 }
420
421 pub fn with_intent(mut self, intent: impl Into<String>) -> Self {
422 self.intent = Some(intent.into());
423 self.content_hash = None;
424 self
425 }
426
427 pub fn with_confidence(mut self, confidence: f32) -> Self {
428 self.confidence = Some(confidence.clamp(0.0, 1.0));
429 self.content_hash = None;
430 self
431 }
432
433 pub fn with_verification(mut self, verification: Verification) -> Self {
434 self.verification = Some(verification);
435 self.content_hash = None;
436 self
437 }
438
439 pub fn with_signature(mut self, signature: StateSignature) -> Self {
440 self.signature = Some(signature);
441 self
442 }
443
444 pub fn with_provenance(mut self, provenance: ContentHash) -> Self {
445 self.provenance = Some(provenance);
446 self.content_hash = None;
447 self
448 }
449
450 pub fn with_context(mut self, context: ContentHash) -> Self {
452 self.context = Some(context);
453 self.content_hash = None;
454 self
455 }
456
457 pub fn with_risk_signals(mut self, risk_signals: ContentHash) -> Self {
467 self.risk_signals = Some(risk_signals);
468 self
469 }
470
471 pub fn with_review_signatures(mut self, review_signatures: ContentHash) -> Self {
479 self.review_signatures = Some(review_signatures);
480 self
481 }
482
483 pub fn with_discussions(mut self, discussions: ContentHash) -> Self {
489 self.discussions = Some(discussions);
490 self
491 }
492
493 pub fn with_structured_conflicts(mut self, structured_conflicts: ContentHash) -> Self {
499 self.structured_conflicts = Some(structured_conflicts);
500 self
501 }
502
503 pub fn with_authored_at(mut self, timestamp: DateTime<Utc>) -> Self {
514 self.authored_at = Some(timestamp);
515 self.content_hash = None;
516 self
517 }
518
519 pub fn with_committer(mut self, committer: Principal) -> Self {
524 self.committer = Some(committer);
525 self.content_hash = None;
526 self
527 }
528
529 pub fn with_tz_offsets(mut self, authored: i32, committer: i32) -> Self {
532 self.authored_tz_offset = authored;
533 self.committer_tz_offset = committer;
534 self.content_hash = None;
535 self
536 }
537
538 pub fn with_raw_message(mut self, raw_message: impl AsRef<[u8]>) -> Self {
542 self.raw_message = Some(raw_message.as_ref().to_vec());
543 self.content_hash = None;
544 self
545 }
546
547 pub fn with_git_lossy(mut self, git_lossy: bool) -> Self {
553 self.git_lossy = git_lossy;
554 self.content_hash = None;
555 self
556 }
557
558 pub fn with_extra_headers(mut self, extra_headers: Vec<(Vec<u8>, Vec<u8>)>) -> Self {
562 self.extra_headers = extra_headers;
563 self.content_hash = None;
564 self
565 }
566
567 pub fn with_status(mut self, status: Status) -> Self {
568 self.status = status;
569 self.content_hash = None;
570 self
571 }
572
573 pub fn with_change_id(mut self, change_id: ChangeId) -> Self {
574 let previous_change_id = self.change_id;
575 self.change_id = change_id;
576 if self.logical_change_id == Some(previous_change_id) || self.logical_change_id.is_none() {
577 self.logical_change_id = Some(change_id);
578 self.content_hash = None;
579 }
580 self
581 }
582
583 pub fn with_logical_change_id(mut self, logical_change_id: ChangeId) -> Self {
584 self.logical_change_id = Some(logical_change_id);
585 self.content_hash = None;
586 self
587 }
588
589 pub fn logical_change_id(&self) -> ChangeId {
590 self.logical_change_id.unwrap_or(self.change_id)
591 }
592
593 pub fn with_timestamp(mut self, timestamp: DateTime<Utc>) -> Self {
594 self.created_at = timestamp;
595 self.content_hash = None;
596 self
597 }
598
599 pub fn compute_hash(&self) -> ContentHash {
600 let content_len = self.hash_len();
601 ContentHash::compute_typed_with_len("state", content_len, |hasher| {
602 self.update_hash(hasher);
603 })
604 }
605
606 #[doc(hidden)]
613 pub fn compute_hash_for_legacy_signature_migration(&self) -> ContentHash {
614 let content_len = self.hash_len_core();
615 ContentHash::compute_typed_with_len("state", content_len, |hasher| {
616 self.update_hash_core(hasher);
617 })
618 }
619
620 pub fn hash(&mut self) -> ContentHash {
621 if self.content_hash.is_none() {
622 self.content_hash = Some(self.compute_hash());
623 }
624 self.content_hash.expect("hash was just computed above")
625 }
626
627 pub fn is_root(&self) -> bool {
628 self.parents.is_empty()
629 }
630
631 pub fn is_merge(&self) -> bool {
632 self.parents.len() > 1
633 }
634
635 pub fn is_agent_authored(&self) -> bool {
636 self.attribution.agent.is_some()
637 }
638
639 pub fn first_parent(&self) -> Option<&ChangeId> {
640 self.parents.first()
641 }
642
643 fn hash_len(&self) -> u64 {
644 self.hash_len_core() + self.hash_len_fidelity()
645 }
646
647 fn hash_len_core(&self) -> u64 {
651 let principal = &self.attribution.principal;
652 let mut len = 0u64;
653
654 len += 1;
655 if self.logical_change_id.is_some() {
656 len += 16;
657 }
658
659 len += self.tree.as_bytes().len() as u64;
660 len += 4;
661 len += (self.parents.len() * 16) as u64;
662
663 len += principal.name.len() as u64 + 1;
664 len += principal.email.len() as u64 + 1;
665
666 len += 1;
667 if let Some(agent) = &self.attribution.agent {
668 len += agent.provider.len() as u64 + 1;
669 len += agent.model.len() as u64 + 1;
670
671 len += 1;
672 if let Some(session_id) = &agent.session_id {
673 len += session_id.len() as u64 + 1;
674 }
675
676 len += 1;
677 if let Some(policy_id) = &agent.policy_id {
678 len += policy_id.len() as u64 + 1;
679 }
680 }
681
682 len += 1;
683 if let Some(intent) = &self.intent {
684 len += intent.len() as u64 + 1;
685 }
686
687 len += 1;
688 if self.confidence.is_some() {
689 len += 4;
690 }
691
692 len += 8;
693
694 len += 1;
695 if let Some(verification) = &self.verification {
696 len += verification.hash_len() as u64;
697 }
698
699 len += 1;
700 if self.provenance.is_some() {
701 len += 32;
702 }
703
704 len += 1;
705 if self.context.is_some() {
706 len += 32;
707 }
708
709 len += 1;
710
711 len
712 }
713
714 fn hash_len_fidelity(&self) -> u64 {
719 let mut len = 0u64;
720
721 len += 1;
724 if let Some(committer) = &self.committer {
725 len += committer.name.len() as u64 + 1;
726 len += committer.email.len() as u64 + 1;
727 }
728 len += 4;
730 len += 4;
731 len += 1;
733 if self.authored_at.is_some() {
734 len += 8;
735 }
736 len += 1;
740 if let Some(raw_message) = &self.raw_message {
741 len += 4 + raw_message.len() as u64;
742 }
743 len += 4;
746 for (key, value) in &self.extra_headers {
747 len += 4 + key.len() as u64;
748 len += 4 + value.len() as u64;
749 }
750
751 len
752 }
753
754 fn update_hash(&self, hasher: &mut blake3::Hasher) {
755 self.update_hash_core(hasher);
756 self.update_hash_fidelity(hasher);
757 }
758
759 fn update_hash_core(&self, hasher: &mut blake3::Hasher) {
763 let principal = &self.attribution.principal;
764
765 if let Some(logical_change_id) = self.logical_change_id {
766 hasher.update(&[1]);
767 hasher.update(logical_change_id.as_bytes());
768 } else {
769 hasher.update(&[0]);
770 }
771
772 hasher.update(self.tree.as_bytes());
773 hasher.update(&(self.parents.len() as u32).to_le_bytes());
774 for parent in &self.parents {
775 hasher.update(parent.as_bytes());
776 }
777
778 hasher.update(principal.name.as_bytes());
779 hasher.update(&[0]);
780 hasher.update(principal.email.as_bytes());
781 hasher.update(&[0]);
782
783 if let Some(agent) = &self.attribution.agent {
784 hasher.update(&[1]);
785 hasher.update(agent.provider.as_bytes());
786 hasher.update(&[0]);
787 hasher.update(agent.model.as_bytes());
788 hasher.update(&[0]);
789 write_optional_string(hasher, &agent.session_id);
790 write_optional_string(hasher, &agent.segment_id);
791 write_optional_string(hasher, &agent.policy_id);
792 } else {
793 hasher.update(&[0]);
794 }
795
796 write_optional_string(hasher, &self.intent);
797
798 if let Some(confidence) = self.confidence {
799 hasher.update(&[1]);
800 hasher.update(&confidence.to_le_bytes());
801 } else {
802 hasher.update(&[0]);
803 }
804
805 hasher.update(&self.created_at.timestamp().to_le_bytes());
806
807 if let Some(verification) = &self.verification {
808 hasher.update(&[1]);
809 verification.update_hasher(hasher);
810 } else {
811 hasher.update(&[0]);
812 }
813
814 if let Some(provenance) = self.provenance {
815 hasher.update(&[1]);
816 hasher.update(provenance.as_bytes());
817 } else {
818 hasher.update(&[0]);
819 }
820
821 if let Some(context) = self.context {
822 hasher.update(&[1]);
823 hasher.update(context.as_bytes());
824 } else {
825 hasher.update(&[0]);
826 }
827
828 hasher.update(&[self.status.to_byte()]);
829 }
830
831 fn update_hash_fidelity(&self, hasher: &mut blake3::Hasher) {
845 if let Some(committer) = &self.committer {
846 hasher.update(&[1]);
847 hasher.update(committer.name.as_bytes());
848 hasher.update(&[0]);
849 hasher.update(committer.email.as_bytes());
850 hasher.update(&[0]);
851 } else {
852 hasher.update(&[0]);
853 }
854
855 hasher.update(&self.authored_tz_offset.to_le_bytes());
856 hasher.update(&self.committer_tz_offset.to_le_bytes());
857
858 if let Some(authored_at) = self.authored_at {
861 hasher.update(&[1]);
862 hasher.update(&authored_at.timestamp().to_le_bytes());
863 } else {
864 hasher.update(&[0]);
865 }
866
867 write_optional_bytes(hasher, &self.raw_message);
868
869 hasher.update(&(self.extra_headers.len() as u32).to_le_bytes());
871 for (key, value) in &self.extra_headers {
872 hasher.update(&(key.len() as u32).to_le_bytes());
873 hasher.update(key);
874 hasher.update(&(value.len() as u32).to_le_bytes());
875 hasher.update(value);
876 }
877 }
878}
879
880fn write_optional_bytes(hasher: &mut blake3::Hasher, value: &Option<Vec<u8>>) {
886 match value {
887 Some(bytes) => {
888 hasher.update(&[1]);
889 hasher.update(&(bytes.len() as u32).to_le_bytes());
890 hasher.update(bytes);
891 }
892 None => {
893 hasher.update(&[0]);
894 }
895 }
896}
897
898fn write_optional_string(hasher: &mut blake3::Hasher, value: &Option<String>) {
899 match value {
900 Some(value) => {
901 hasher.update(&[1]);
902 hasher.update(value.as_bytes());
903 hasher.update(&[0]);
904 }
905 None => {
906 hasher.update(&[0]);
907 }
908 }
909}
910
911pub fn parse_commit_extension_headers(commit_content: &[u8]) -> Vec<(Vec<u8>, Vec<u8>)> {
940 let header_block = match find_subslice(commit_content, b"\n\n") {
944 Some(idx) => &commit_content[..idx],
945 None => commit_content,
947 };
948
949 let mut headers: Vec<(Vec<u8>, Vec<u8>)> = Vec::new();
952 for line in header_block.split(|&b| b == b'\n') {
953 if line.first() == Some(&b' ') {
954 if let Some((_, value)) = headers.last_mut() {
957 value.push(b'\n');
958 value.extend_from_slice(&line[1..]);
959 }
960 continue;
963 }
964 let (name, value) = match line.iter().position(|&b| b == b' ') {
968 Some(sp) => (line[..sp].to_vec(), line[sp + 1..].to_vec()),
969 None => (line.to_vec(), Vec::new()),
970 };
971 headers.push((name, value));
972 }
973
974 match headers.iter().position(|(name, _)| name == b"committer") {
979 Some(idx) => headers.split_off(idx + 1),
980 None => headers
981 .into_iter()
982 .filter(|(name, _)| {
983 !matches!(
984 name.as_slice(),
985 b"tree" | b"parent" | b"author" | b"committer"
986 )
987 })
988 .collect(),
989 }
990}
991
992fn find_subslice(haystack: &[u8], needle: &[u8]) -> Option<usize> {
994 if needle.is_empty() || needle.len() > haystack.len() {
995 return None;
996 }
997 haystack.windows(needle.len()).position(|w| w == needle)
998}
999
1000#[cfg(test)]
1001mod tests {
1002 use super::*;
1003 use crate::object::Principal;
1004
1005 fn sample_attribution() -> Attribution {
1006 Attribution::human(Principal::new("Alice", "alice@example.com"))
1007 }
1008
1009 #[test]
1010 fn new_snapshot_sets_fresh_logical_identity() {
1011 let state =
1012 State::new_snapshot(ContentHash::compute(b"tree"), vec![], sample_attribution());
1013 let logical_change_id = state
1014 .logical_change_id
1015 .expect("snapshot should set logical identity");
1016 assert_ne!(state.logical_change_id(), state.change_id);
1017 assert_eq!(state.logical_change_id(), logical_change_id);
1018 }
1019
1020 #[test]
1021 fn new_refresh_preserves_explicit_logical_identity() {
1022 let logical_change_id = ChangeId::from_bytes([7; 16]);
1023 let state = State::new_refresh_of(
1024 ContentHash::compute(b"tree"),
1025 vec![],
1026 sample_attribution(),
1027 logical_change_id,
1028 );
1029 assert_eq!(state.logical_change_id(), logical_change_id);
1030 assert_ne!(state.change_id, logical_change_id);
1031 }
1032
1033 #[test]
1034 fn new_merge_uses_fresh_logical_identity() {
1035 let state = State::new_merge(
1036 ContentHash::compute(b"tree"),
1037 vec![ChangeId::from_bytes([1; 16]), ChangeId::from_bytes([2; 16])],
1038 sample_attribution(),
1039 );
1040 let logical_change_id = state
1041 .logical_change_id
1042 .expect("merge should set logical identity");
1043 assert_ne!(state.logical_change_id(), state.change_id);
1044 assert_eq!(state.logical_change_id(), logical_change_id);
1045 assert!(state.is_merge());
1046 }
1047
1048 #[test]
1049 fn with_change_id_invalidates_cached_hash_when_logical_identity_changes() {
1050 let mut state =
1051 State::new_snapshot(ContentHash::compute(b"tree"), vec![], sample_attribution());
1052 let previous_change_id = state.change_id;
1053 state = state.with_logical_change_id(previous_change_id);
1054 let original_hash = state.hash();
1055 let replacement = ChangeId::from_bytes([9; 16]);
1056
1057 let mut updated = state.with_change_id(replacement);
1058
1059 assert_eq!(updated.logical_change_id(), replacement);
1060 assert_ne!(updated.hash(), original_hash);
1061 assert_eq!(updated.hash(), updated.compute_hash());
1062 }
1063
1064 #[test]
1065 fn agent_segment_is_part_of_state_hash() {
1066 let principal = Principal::new("Alice", "alice@example.com");
1067 let attribution_a = Attribution::with_agent(
1068 principal.clone(),
1069 crate::object::Agent::new("openai", "gpt-5").with_session("sess-1", "seg-1"),
1070 );
1071 let attribution_b = Attribution::with_agent(
1072 principal,
1073 crate::object::Agent::new("openai", "gpt-5").with_session("sess-1", "seg-2"),
1074 );
1075 let tree = ContentHash::compute(b"tree");
1076 let timestamp = Utc::now();
1077 let logical_change_id = ChangeId::from_bytes([3; 16]);
1078 let state_a = State::new_snapshot(tree, vec![], attribution_a)
1079 .with_logical_change_id(logical_change_id)
1080 .with_timestamp(timestamp);
1081 let state_b = State::new_snapshot(tree, vec![], attribution_b)
1082 .with_logical_change_id(logical_change_id)
1083 .with_timestamp(timestamp);
1084
1085 assert_ne!(state_a.compute_hash(), state_b.compute_hash());
1086 }
1087
1088 fn sample_state() -> State {
1089 State::new_snapshot(ContentHash::compute(b"tree"), vec![], sample_attribution())
1090 }
1091
1092 fn assert_mutator_invalidates_cached_hash(
1093 mut state: State,
1094 mutate: impl FnOnce(State) -> State,
1095 ) {
1096 let original_hash = state.hash();
1097 let mut updated = mutate(state);
1098 assert_ne!(updated.hash(), original_hash);
1099 assert_eq!(updated.hash(), updated.compute_hash());
1100 }
1101
1102 #[test]
1103 fn with_intent_invalidates_cached_hash() {
1104 assert_mutator_invalidates_cached_hash(sample_state(), |state| {
1105 state.with_intent("capture intent")
1106 });
1107 }
1108
1109 #[test]
1110 fn with_confidence_invalidates_cached_hash() {
1111 assert_mutator_invalidates_cached_hash(sample_state(), |state| state.with_confidence(0.9));
1112 }
1113
1114 #[test]
1115 fn with_verification_invalidates_cached_hash() {
1116 assert_mutator_invalidates_cached_hash(sample_state(), |state| {
1117 state.with_verification(Verification::new().with_tests_passed(true))
1118 });
1119 }
1120
1121 #[test]
1122 fn with_status_invalidates_cached_hash() {
1123 assert_mutator_invalidates_cached_hash(sample_state(), |state| {
1124 state.with_status(Status::Published)
1125 });
1126 }
1127
1128 #[test]
1129 fn with_timestamp_invalidates_cached_hash() {
1130 assert_mutator_invalidates_cached_hash(sample_state(), |state| {
1131 state.with_timestamp(Utc::now() + chrono::Duration::seconds(1))
1132 });
1133 }
1134
1135 #[test]
1143 fn w1_tail_fields_are_not_part_of_state_hash() {
1144 let mut bare = sample_state();
1145 let bare_hash = bare.hash();
1146
1147 let mut decorated = sample_state()
1148 .with_change_id(bare.change_id)
1149 .with_logical_change_id(bare.logical_change_id())
1150 .with_risk_signals(ContentHash::compute(b"risk-signals-blob"))
1151 .with_review_signatures(ContentHash::compute(b"review-signatures-blob"))
1152 .with_discussions(ContentHash::compute(b"discussions-blob"))
1153 .with_structured_conflicts(ContentHash::compute(b"conflicts-blob"));
1154 decorated.created_at = bare.created_at;
1155
1156 assert_eq!(
1157 decorated.hash(),
1158 bare_hash,
1159 "W1 tail fields must not affect the state hash"
1160 );
1161 }
1162
1163 #[test]
1168 fn fidelity_fields_are_part_of_state_hash() {
1169 let base = sample_state();
1170 let base_hash = base.compute_hash();
1171
1172 let with_committer = sample_state()
1173 .with_change_id(base.change_id)
1174 .with_logical_change_id(base.logical_change_id());
1175 let mut with_committer =
1176 with_committer.with_committer(Principal::new("Carol", "carol@example.com"));
1177 with_committer.created_at = base.created_at;
1178 assert_ne!(
1179 with_committer.hash(),
1180 base_hash,
1181 "committer must affect the state hash"
1182 );
1183
1184 for mutate in [
1185 |s: State| s.with_tz_offsets(3600, -7200),
1186 |s: State| s.with_authored_at(Utc::now() + chrono::Duration::seconds(1)),
1187 |s: State| s.with_raw_message("verbatim body\n"),
1188 |s: State| {
1190 s.with_extra_headers(vec![(
1191 b"gpgsig".to_vec(),
1192 b"-----BEGIN PGP SIGNATURE-----\n".to_vec(),
1193 )])
1194 },
1195 |s: State| s.with_extra_headers(vec![(b"mergetag".to_vec(), b"x".to_vec())]),
1196 ] {
1197 let seeded = sample_state()
1198 .with_change_id(base.change_id)
1199 .with_logical_change_id(base.logical_change_id());
1200 let mut decorated = mutate(seeded);
1201 decorated.created_at = base.created_at;
1202 assert_ne!(
1203 decorated.hash(),
1204 base_hash,
1205 "fidelity field must affect the state hash"
1206 );
1207 }
1208 }
1209
1210 #[test]
1211 fn legacy_signature_migration_hash_matches_golden_vector() {
1212 let state = State::new_snapshot(
1213 ContentHash::compute(b"issue-633-tree"),
1214 vec![ChangeId::from_bytes([0x11; 16])],
1215 Attribution::with_agent(
1216 Principal::new("Legacy Author", "legacy@example.com"),
1217 crate::object::Agent::new("openai", "gpt-5")
1218 .with_session("session-633", "segment-001")
1219 .with_policy("policy-legacy"),
1220 ),
1221 )
1222 .with_logical_change_id(ChangeId::from_bytes([0x63; 16]))
1223 .with_intent("freeze pre-565 hash")
1224 .with_confidence(0.875)
1225 .with_timestamp(DateTime::from_timestamp(1_700_000_000, 0).expect("valid timestamp"))
1226 .with_committer(Principal::new("Legacy Committer", "committer@example.com"))
1227 .with_tz_offsets(3600, -18000)
1228 .with_authored_at(DateTime::from_timestamp(1_699_999_000, 0).expect("valid timestamp"))
1229 .with_raw_message(b"legacy commit message\n")
1230 .with_extra_headers(vec![(b"encoding".to_vec(), b"UTF-8".to_vec())])
1231 .with_status(Status::Published);
1232
1233 let legacy_hash = state.compute_hash_for_legacy_signature_migration();
1234 assert_eq!(
1240 legacy_hash.to_hex(),
1241 "b89e1b40e681a1bf88679db7cfcacdafb1f370bc40ed5d50760dae1d4ab49dab",
1242 );
1243 assert_ne!(
1244 legacy_hash,
1245 state.compute_hash(),
1246 "fixture must distinguish the pre-#565 legacy path from the current hash",
1247 );
1248 }
1249
1250 #[test]
1253 fn extra_headers_order_affects_hash() {
1254 let base = sample_state();
1255 let one = sample_state()
1256 .with_change_id(base.change_id)
1257 .with_logical_change_id(base.logical_change_id());
1258 let mut one = one.with_extra_headers(vec![
1259 (b"a".to_vec(), b"1".to_vec()),
1260 (b"b".to_vec(), b"2".to_vec()),
1261 ]);
1262 one.created_at = base.created_at;
1263
1264 let two = sample_state()
1265 .with_change_id(base.change_id)
1266 .with_logical_change_id(base.logical_change_id());
1267 let mut two = two.with_extra_headers(vec![
1268 (b"b".to_vec(), b"2".to_vec()),
1269 (b"a".to_vec(), b"1".to_vec()),
1270 ]);
1271 two.created_at = base.created_at;
1272
1273 assert_ne!(one.hash(), two.hash());
1274 }
1275
1276 #[test]
1280 fn fidelity_fields_hash_is_stable() {
1281 let mut state = sample_state()
1282 .with_committer(Principal::new("Dave", "dave@example.com"))
1283 .with_tz_offsets(3600, 0)
1284 .with_authored_at(Utc::now())
1285 .with_raw_message("body\n")
1286 .with_extra_headers(vec![
1287 (b"gpgsig".to_vec(), b"sig".to_vec()),
1288 (b"k".to_vec(), b"v".to_vec()),
1289 ]);
1290 assert_eq!(state.hash(), state.compute_hash());
1291 }
1292
1293 #[test]
1298 fn non_utf8_raw_message_is_byte_preserved() {
1299 let raw = b"caf\xe9\n".to_vec();
1300 assert!(
1301 String::from_utf8(raw.clone()).is_err(),
1302 "test fixture must be invalid UTF-8 to be meaningful"
1303 );
1304 let mut state = sample_state().with_raw_message(&raw);
1305 assert_eq!(
1306 state.raw_message.as_deref(),
1307 Some(raw.as_slice()),
1308 "raw bytes preserved verbatim"
1309 );
1310 let bytes = rmp_serde::to_vec(&state).expect("serialize state");
1313 let back: State = rmp_serde::from_slice(&bytes).expect("deserialize state");
1314 assert_eq!(back.raw_message.as_deref(), Some(raw.as_slice()));
1315 let mut back = back;
1316 assert_eq!(state.hash(), back.hash());
1317 assert_eq!(back.hash(), back.compute_hash());
1318 }
1319
1320 #[test]
1324 fn raw_message_with_nul_byte_changes_hash() {
1325 let base = sample_state();
1326 let with_nul = sample_state()
1327 .with_change_id(base.change_id)
1328 .with_logical_change_id(base.logical_change_id());
1329 let mut a = with_nul.with_raw_message(b"a\x00b");
1330 a.created_at = base.created_at;
1331
1332 let other = sample_state()
1333 .with_change_id(base.change_id)
1334 .with_logical_change_id(base.logical_change_id());
1335 let mut b = other.with_raw_message(b"a\x00c");
1336 b.created_at = base.created_at;
1337
1338 assert_ne!(a.hash(), b.hash());
1339 }
1340
1341 #[test]
1349 fn parse_extension_headers_preserves_noncanonical_wire_order() {
1350 let lines: &[&[u8]] = &[
1357 b"tree 1111111111111111111111111111111111111111",
1358 b"parent 2222222222222222222222222222222222222222",
1359 b"author Alice <alice@example.com> 1700000000 +0000",
1360 b"committer Bob <bob@example.com> 1700000100 +0000",
1361 b"x-custom custom value",
1362 b"gpgsig -----BEGIN PGP SIGNATURE-----",
1363 b" sig-line-1",
1364 b" -----END PGP SIGNATURE-----",
1365 b"encoding ISO-8859-1",
1366 b"mergetag object 3333333333333333333333333333333333333333",
1367 b" type commit",
1368 b" tag sidetag",
1369 b" tagger Carol <carol@example.com> 1700000050 +0000",
1370 b" ", b" signed side tag",
1372 b"", b"the commit message",
1374 b"",
1375 ];
1376 let content = lines.join(&b'\n');
1377
1378 let headers = parse_commit_extension_headers(&content);
1379
1380 let expected: Vec<(Vec<u8>, Vec<u8>)> = vec![
1381 (b"x-custom".to_vec(), b"custom value".to_vec()),
1382 (
1383 b"gpgsig".to_vec(),
1384 b"-----BEGIN PGP SIGNATURE-----\nsig-line-1\n-----END PGP SIGNATURE-----"
1387 .to_vec(),
1388 ),
1389 (b"encoding".to_vec(), b"ISO-8859-1".to_vec()),
1390 (
1391 b"mergetag".to_vec(),
1392 b"object 3333333333333333333333333333333333333333\ntype commit\ntag sidetag\ntagger Carol <carol@example.com> 1700000050 +0000\n\nsigned side tag".to_vec(),
1395 ),
1396 ];
1397
1398 assert_eq!(headers, expected);
1399 }
1400
1401 #[test]
1405 fn parse_extension_headers_empty_when_only_core_headers() {
1406 let content: &[u8] = b"\
1407tree 1111111111111111111111111111111111111111\n\
1408author Alice <alice@example.com> 1700000000 +0000\n\
1409committer Bob <bob@example.com> 1700000100 +0000\n\
1410\n\
1411just a message\n";
1412 assert!(parse_commit_extension_headers(content).is_empty());
1413 }
1414}