use std::collections::BTreeMap;
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use super::{Attribution, ChangeId, ContentHash, Principal};
#[derive(Clone, Copy, Debug, PartialEq, Eq, Default, Serialize, Deserialize)]
pub enum Status {
#[default]
Draft,
Published,
}
impl Status {
pub fn to_byte(&self) -> u8 {
match self {
Status::Draft => 0,
Status::Published => 1,
}
}
pub fn from_byte(b: u8) -> Option<Self> {
match b {
0 => Some(Status::Draft),
1 => Some(Status::Published),
_ => None,
}
}
}
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
pub struct StateSignature {
pub algorithm: String,
pub public_key: String,
pub signature: String,
}
impl StateSignature {
pub fn algorithm(&self) -> &str {
&self.algorithm
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum SignatureStatus {
Valid,
Invalid,
Unsigned,
}
impl SignatureStatus {
pub fn is_valid(self) -> bool {
self == SignatureStatus::Valid
}
pub fn is_unsigned(self) -> bool {
self == SignatureStatus::Unsigned
}
}
#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
pub struct Verification {
pub tests_passed: Option<bool>,
pub tests_failed: Option<u32>,
pub coverage_pct: Option<f32>,
pub coverage_delta: Option<f32>,
pub lint_warnings: Option<u32>,
#[serde(default)]
pub custom: BTreeMap<String, serde_json::Value>,
}
impl Verification {
pub fn new() -> Self {
Self::default()
}
pub fn with_tests_passed(mut self, passed: bool) -> Self {
self.tests_passed = Some(passed);
self
}
pub fn with_tests_failed(mut self, failed: u32) -> Self {
self.tests_failed = Some(failed);
self
}
pub fn is_empty(&self) -> bool {
self.tests_passed.is_none()
&& self.tests_failed.is_none()
&& self.coverage_pct.is_none()
&& self.coverage_delta.is_none()
&& self.lint_warnings.is_none()
&& self.custom.is_empty()
}
pub(crate) fn hash_len(&self) -> usize {
let mut len = 0;
len += 1 + self.tests_passed.map(|_| 1).unwrap_or(0);
len += 1 + self.tests_failed.map(|_| 4).unwrap_or(0);
len += 1 + self.coverage_pct.map(|_| 4).unwrap_or(0);
len += 1 + self.coverage_delta.map(|_| 4).unwrap_or(0);
len += 1 + self.lint_warnings.map(|_| 4).unwrap_or(0);
len += 4;
for (key, value) in &self.custom {
let value_bytes = serde_json::to_vec(value).unwrap_or_default();
len += 4 + key.len();
len += 4 + value_bytes.len();
}
len
}
pub(crate) fn update_hasher(&self, hasher: &mut blake3::Hasher) {
let tests_passed = self.tests_passed.map(u8::from);
write_optional_u8(hasher, tests_passed);
write_optional_u32(hasher, self.tests_failed);
write_optional_f32(hasher, self.coverage_pct);
write_optional_f32(hasher, self.coverage_delta);
write_optional_u32(hasher, self.lint_warnings);
let custom_len = self.custom.len() as u32;
hasher.update(&custom_len.to_le_bytes());
for (key, value) in &self.custom {
let key_bytes = key.as_bytes();
let value_bytes = serde_json::to_vec(value).unwrap_or_default();
hasher.update(&(key_bytes.len() as u32).to_le_bytes());
hasher.update(key_bytes);
hasher.update(&(value_bytes.len() as u32).to_le_bytes());
hasher.update(&value_bytes);
}
}
}
fn write_optional_u8(hasher: &mut blake3::Hasher, value: Option<u8>) {
match value {
Some(v) => {
hasher.update(&[1]);
hasher.update(&[v]);
}
None => {
hasher.update(&[0]);
}
}
}
fn write_optional_u32(hasher: &mut blake3::Hasher, value: Option<u32>) {
match value {
Some(v) => {
hasher.update(&[1]);
hasher.update(&v.to_le_bytes());
}
None => {
hasher.update(&[0]);
}
}
}
fn write_optional_f32(hasher: &mut blake3::Hasher, value: Option<f32>) {
match value {
Some(v) => {
hasher.update(&[1]);
hasher.update(&v.to_le_bytes());
}
None => {
hasher.update(&[0]);
}
}
}
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
pub struct State {
pub change_id: ChangeId,
#[serde(skip)]
content_hash: Option<ContentHash>,
pub tree: ContentHash,
pub parents: Vec<ChangeId>,
pub attribution: Attribution,
pub intent: Option<String>,
pub confidence: Option<f32>,
pub created_at: DateTime<Utc>,
pub verification: Option<Verification>,
pub signature: Option<StateSignature>,
pub status: Status,
#[serde(default)]
pub provenance: Option<ContentHash>,
#[serde(default)]
pub logical_change_id: Option<ChangeId>,
#[serde(default)]
pub context: Option<ContentHash>,
#[serde(default)]
pub authored_at: Option<DateTime<Utc>>,
#[serde(default)]
pub risk_signals: Option<ContentHash>,
#[serde(default)]
pub review_signatures: Option<ContentHash>,
#[serde(default)]
pub discussions: Option<ContentHash>,
#[serde(default)]
pub structured_conflicts: Option<ContentHash>,
#[serde(default)]
pub committer: Option<Principal>,
#[serde(default)]
pub authored_tz_offset: i32,
#[serde(default)]
pub committer_tz_offset: i32,
#[serde(default)]
pub raw_message: Option<Vec<u8>>,
#[serde(default)]
pub git_lossy: bool,
#[serde(default)]
pub extra_headers: Vec<(Vec<u8>, Vec<u8>)>,
}
impl State {
pub fn new(tree: ContentHash, parents: Vec<ChangeId>, attribution: Attribution) -> Self {
Self::new_snapshot(tree, parents, attribution)
}
pub fn new_snapshot(
tree: ContentHash,
parents: Vec<ChangeId>,
attribution: Attribution,
) -> Self {
let change_id = ChangeId::generate();
Self::new_with_logical_change_id(tree, parents, attribution, change_id)
}
pub fn new_merge(tree: ContentHash, parents: Vec<ChangeId>, attribution: Attribution) -> Self {
Self::new_snapshot(tree, parents, attribution)
}
pub fn new_refresh_of(
tree: ContentHash,
parents: Vec<ChangeId>,
attribution: Attribution,
logical_change_id: ChangeId,
) -> Self {
Self::new_with_logical_change_id(tree, parents, attribution, logical_change_id)
}
pub fn new_fork_of(
tree: ContentHash,
parents: Vec<ChangeId>,
attribution: Attribution,
) -> Self {
Self::new_snapshot(tree, parents, attribution)
}
pub fn new_collapse_of(
tree: ContentHash,
parents: Vec<ChangeId>,
attribution: Attribution,
) -> Self {
Self::new_snapshot(tree, parents, attribution)
}
fn new_with_logical_change_id(
tree: ContentHash,
parents: Vec<ChangeId>,
attribution: Attribution,
logical_change_id: ChangeId,
) -> Self {
Self {
change_id: ChangeId::generate(),
logical_change_id: Some(logical_change_id),
content_hash: None,
tree,
parents,
attribution,
intent: None,
confidence: None,
created_at: Utc::now(),
verification: None,
signature: None,
provenance: None,
context: None,
authored_at: None,
risk_signals: None,
review_signatures: None,
discussions: None,
structured_conflicts: None,
committer: None,
authored_tz_offset: 0,
committer_tz_offset: 0,
raw_message: None,
git_lossy: false,
extra_headers: Vec::new(),
status: Status::Draft,
}
}
pub fn with_intent(mut self, intent: impl Into<String>) -> Self {
self.intent = Some(intent.into());
self.content_hash = None;
self
}
pub fn with_confidence(mut self, confidence: f32) -> Self {
self.confidence = Some(confidence.clamp(0.0, 1.0));
self.content_hash = None;
self
}
pub fn with_verification(mut self, verification: Verification) -> Self {
self.verification = Some(verification);
self.content_hash = None;
self
}
pub fn with_signature(mut self, signature: StateSignature) -> Self {
self.signature = Some(signature);
self
}
pub fn with_provenance(mut self, provenance: ContentHash) -> Self {
self.provenance = Some(provenance);
self.content_hash = None;
self
}
pub fn with_context(mut self, context: ContentHash) -> Self {
self.context = Some(context);
self.content_hash = None;
self
}
pub fn with_risk_signals(mut self, risk_signals: ContentHash) -> Self {
self.risk_signals = Some(risk_signals);
self
}
pub fn with_review_signatures(mut self, review_signatures: ContentHash) -> Self {
self.review_signatures = Some(review_signatures);
self
}
pub fn with_discussions(mut self, discussions: ContentHash) -> Self {
self.discussions = Some(discussions);
self
}
pub fn with_structured_conflicts(mut self, structured_conflicts: ContentHash) -> Self {
self.structured_conflicts = Some(structured_conflicts);
self
}
pub fn with_authored_at(mut self, timestamp: DateTime<Utc>) -> Self {
self.authored_at = Some(timestamp);
self.content_hash = None;
self
}
pub fn with_committer(mut self, committer: Principal) -> Self {
self.committer = Some(committer);
self.content_hash = None;
self
}
pub fn with_tz_offsets(mut self, authored: i32, committer: i32) -> Self {
self.authored_tz_offset = authored;
self.committer_tz_offset = committer;
self.content_hash = None;
self
}
pub fn with_raw_message(mut self, raw_message: impl AsRef<[u8]>) -> Self {
self.raw_message = Some(raw_message.as_ref().to_vec());
self.content_hash = None;
self
}
pub fn with_git_lossy(mut self, git_lossy: bool) -> Self {
self.git_lossy = git_lossy;
self.content_hash = None;
self
}
pub fn with_extra_headers(mut self, extra_headers: Vec<(Vec<u8>, Vec<u8>)>) -> Self {
self.extra_headers = extra_headers;
self.content_hash = None;
self
}
pub fn with_status(mut self, status: Status) -> Self {
self.status = status;
self.content_hash = None;
self
}
pub fn with_change_id(mut self, change_id: ChangeId) -> Self {
let previous_change_id = self.change_id;
self.change_id = change_id;
if self.logical_change_id == Some(previous_change_id) || self.logical_change_id.is_none() {
self.logical_change_id = Some(change_id);
self.content_hash = None;
}
self
}
pub fn with_logical_change_id(mut self, logical_change_id: ChangeId) -> Self {
self.logical_change_id = Some(logical_change_id);
self.content_hash = None;
self
}
pub fn logical_change_id(&self) -> ChangeId {
self.logical_change_id.unwrap_or(self.change_id)
}
pub fn with_timestamp(mut self, timestamp: DateTime<Utc>) -> Self {
self.created_at = timestamp;
self.content_hash = None;
self
}
pub fn compute_hash(&self) -> ContentHash {
let content_len = self.hash_len();
ContentHash::compute_typed_with_len("state", content_len, |hasher| {
self.update_hash(hasher);
})
}
pub fn compute_hash_pre_fidelity(&self) -> ContentHash {
let content_len = self.hash_len_core();
ContentHash::compute_typed_with_len("state", content_len, |hasher| {
self.update_hash_core(hasher);
})
}
pub fn hash(&mut self) -> ContentHash {
if self.content_hash.is_none() {
self.content_hash = Some(self.compute_hash());
}
self.content_hash.expect("hash was just computed above")
}
pub fn is_root(&self) -> bool {
self.parents.is_empty()
}
pub fn is_merge(&self) -> bool {
self.parents.len() > 1
}
pub fn is_agent_authored(&self) -> bool {
self.attribution.agent.is_some()
}
pub fn first_parent(&self) -> Option<&ChangeId> {
self.parents.first()
}
fn hash_len(&self) -> u64 {
self.hash_len_core() + self.hash_len_fidelity()
}
fn hash_len_core(&self) -> u64 {
let principal = &self.attribution.principal;
let mut len = 0u64;
len += 1;
if self.logical_change_id.is_some() {
len += 16;
}
len += self.tree.as_bytes().len() as u64;
len += 4;
len += (self.parents.len() * 16) as u64;
len += principal.name.len() as u64 + 1;
len += principal.email.len() as u64 + 1;
len += 1;
if let Some(agent) = &self.attribution.agent {
len += agent.provider.len() as u64 + 1;
len += agent.model.len() as u64 + 1;
len += 1;
if let Some(session_id) = &agent.session_id {
len += session_id.len() as u64 + 1;
}
len += 1;
if let Some(policy_id) = &agent.policy_id {
len += policy_id.len() as u64 + 1;
}
}
len += 1;
if let Some(intent) = &self.intent {
len += intent.len() as u64 + 1;
}
len += 1;
if self.confidence.is_some() {
len += 4;
}
len += 8;
len += 1;
if let Some(verification) = &self.verification {
len += verification.hash_len() as u64;
}
len += 1;
if self.provenance.is_some() {
len += 32;
}
len += 1;
if self.context.is_some() {
len += 32;
}
len += 1;
len
}
fn hash_len_fidelity(&self) -> u64 {
let mut len = 0u64;
len += 1;
if let Some(committer) = &self.committer {
len += committer.name.len() as u64 + 1;
len += committer.email.len() as u64 + 1;
}
len += 4;
len += 4;
len += 1;
if self.authored_at.is_some() {
len += 8;
}
len += 1;
if let Some(raw_message) = &self.raw_message {
len += 4 + raw_message.len() as u64;
}
len += 4;
for (key, value) in &self.extra_headers {
len += 4 + key.len() as u64;
len += 4 + value.len() as u64;
}
len
}
fn update_hash(&self, hasher: &mut blake3::Hasher) {
self.update_hash_core(hasher);
self.update_hash_fidelity(hasher);
}
fn update_hash_core(&self, hasher: &mut blake3::Hasher) {
let principal = &self.attribution.principal;
if let Some(logical_change_id) = self.logical_change_id {
hasher.update(&[1]);
hasher.update(logical_change_id.as_bytes());
} else {
hasher.update(&[0]);
}
hasher.update(self.tree.as_bytes());
hasher.update(&(self.parents.len() as u32).to_le_bytes());
for parent in &self.parents {
hasher.update(parent.as_bytes());
}
hasher.update(principal.name.as_bytes());
hasher.update(&[0]);
hasher.update(principal.email.as_bytes());
hasher.update(&[0]);
if let Some(agent) = &self.attribution.agent {
hasher.update(&[1]);
hasher.update(agent.provider.as_bytes());
hasher.update(&[0]);
hasher.update(agent.model.as_bytes());
hasher.update(&[0]);
write_optional_string(hasher, &agent.session_id);
write_optional_string(hasher, &agent.segment_id);
write_optional_string(hasher, &agent.policy_id);
} else {
hasher.update(&[0]);
}
write_optional_string(hasher, &self.intent);
if let Some(confidence) = self.confidence {
hasher.update(&[1]);
hasher.update(&confidence.to_le_bytes());
} else {
hasher.update(&[0]);
}
hasher.update(&self.created_at.timestamp().to_le_bytes());
if let Some(verification) = &self.verification {
hasher.update(&[1]);
verification.update_hasher(hasher);
} else {
hasher.update(&[0]);
}
if let Some(provenance) = self.provenance {
hasher.update(&[1]);
hasher.update(provenance.as_bytes());
} else {
hasher.update(&[0]);
}
if let Some(context) = self.context {
hasher.update(&[1]);
hasher.update(context.as_bytes());
} else {
hasher.update(&[0]);
}
hasher.update(&[self.status.to_byte()]);
}
fn update_hash_fidelity(&self, hasher: &mut blake3::Hasher) {
if let Some(committer) = &self.committer {
hasher.update(&[1]);
hasher.update(committer.name.as_bytes());
hasher.update(&[0]);
hasher.update(committer.email.as_bytes());
hasher.update(&[0]);
} else {
hasher.update(&[0]);
}
hasher.update(&self.authored_tz_offset.to_le_bytes());
hasher.update(&self.committer_tz_offset.to_le_bytes());
if let Some(authored_at) = self.authored_at {
hasher.update(&[1]);
hasher.update(&authored_at.timestamp().to_le_bytes());
} else {
hasher.update(&[0]);
}
write_optional_bytes(hasher, &self.raw_message);
hasher.update(&(self.extra_headers.len() as u32).to_le_bytes());
for (key, value) in &self.extra_headers {
hasher.update(&(key.len() as u32).to_le_bytes());
hasher.update(key);
hasher.update(&(value.len() as u32).to_le_bytes());
hasher.update(value);
}
}
}
fn write_optional_bytes(hasher: &mut blake3::Hasher, value: &Option<Vec<u8>>) {
match value {
Some(bytes) => {
hasher.update(&[1]);
hasher.update(&(bytes.len() as u32).to_le_bytes());
hasher.update(bytes);
}
None => {
hasher.update(&[0]);
}
}
}
fn write_optional_string(hasher: &mut blake3::Hasher, value: &Option<String>) {
match value {
Some(value) => {
hasher.update(&[1]);
hasher.update(value.as_bytes());
hasher.update(&[0]);
}
None => {
hasher.update(&[0]);
}
}
}
pub fn parse_commit_extension_headers(commit_content: &[u8]) -> Vec<(Vec<u8>, Vec<u8>)> {
let header_block = match find_subslice(commit_content, b"\n\n") {
Some(idx) => &commit_content[..idx],
None => commit_content,
};
let mut headers: Vec<(Vec<u8>, Vec<u8>)> = Vec::new();
for line in header_block.split(|&b| b == b'\n') {
if line.first() == Some(&b' ') {
if let Some((_, value)) = headers.last_mut() {
value.push(b'\n');
value.extend_from_slice(&line[1..]);
}
continue;
}
let (name, value) = match line.iter().position(|&b| b == b' ') {
Some(sp) => (line[..sp].to_vec(), line[sp + 1..].to_vec()),
None => (line.to_vec(), Vec::new()),
};
headers.push((name, value));
}
match headers.iter().position(|(name, _)| name == b"committer") {
Some(idx) => headers.split_off(idx + 1),
None => headers
.into_iter()
.filter(|(name, _)| {
!matches!(
name.as_slice(),
b"tree" | b"parent" | b"author" | b"committer"
)
})
.collect(),
}
}
fn find_subslice(haystack: &[u8], needle: &[u8]) -> Option<usize> {
if needle.is_empty() || needle.len() > haystack.len() {
return None;
}
haystack.windows(needle.len()).position(|w| w == needle)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::object::Principal;
fn sample_attribution() -> Attribution {
Attribution::human(Principal::new("Alice", "alice@example.com"))
}
#[test]
fn new_snapshot_sets_fresh_logical_identity() {
let state =
State::new_snapshot(ContentHash::compute(b"tree"), vec![], sample_attribution());
let logical_change_id = state
.logical_change_id
.expect("snapshot should set logical identity");
assert_ne!(state.logical_change_id(), state.change_id);
assert_eq!(state.logical_change_id(), logical_change_id);
}
#[test]
fn new_refresh_preserves_explicit_logical_identity() {
let logical_change_id = ChangeId::from_bytes([7; 16]);
let state = State::new_refresh_of(
ContentHash::compute(b"tree"),
vec![],
sample_attribution(),
logical_change_id,
);
assert_eq!(state.logical_change_id(), logical_change_id);
assert_ne!(state.change_id, logical_change_id);
}
#[test]
fn new_merge_uses_fresh_logical_identity() {
let state = State::new_merge(
ContentHash::compute(b"tree"),
vec![ChangeId::from_bytes([1; 16]), ChangeId::from_bytes([2; 16])],
sample_attribution(),
);
let logical_change_id = state
.logical_change_id
.expect("merge should set logical identity");
assert_ne!(state.logical_change_id(), state.change_id);
assert_eq!(state.logical_change_id(), logical_change_id);
assert!(state.is_merge());
}
#[test]
fn with_change_id_invalidates_cached_hash_when_logical_identity_changes() {
let mut state =
State::new_snapshot(ContentHash::compute(b"tree"), vec![], sample_attribution());
let previous_change_id = state.change_id;
state = state.with_logical_change_id(previous_change_id);
let original_hash = state.hash();
let replacement = ChangeId::from_bytes([9; 16]);
let mut updated = state.with_change_id(replacement);
assert_eq!(updated.logical_change_id(), replacement);
assert_ne!(updated.hash(), original_hash);
assert_eq!(updated.hash(), updated.compute_hash());
}
#[test]
fn agent_segment_is_part_of_state_hash() {
let principal = Principal::new("Alice", "alice@example.com");
let attribution_a = Attribution::with_agent(
principal.clone(),
crate::object::Agent::new("openai", "gpt-5").with_session("sess-1", "seg-1"),
);
let attribution_b = Attribution::with_agent(
principal,
crate::object::Agent::new("openai", "gpt-5").with_session("sess-1", "seg-2"),
);
let tree = ContentHash::compute(b"tree");
let timestamp = Utc::now();
let logical_change_id = ChangeId::from_bytes([3; 16]);
let state_a = State::new_snapshot(tree, vec![], attribution_a)
.with_logical_change_id(logical_change_id)
.with_timestamp(timestamp);
let state_b = State::new_snapshot(tree, vec![], attribution_b)
.with_logical_change_id(logical_change_id)
.with_timestamp(timestamp);
assert_ne!(state_a.compute_hash(), state_b.compute_hash());
}
fn sample_state() -> State {
State::new_snapshot(ContentHash::compute(b"tree"), vec![], sample_attribution())
}
fn assert_mutator_invalidates_cached_hash(
mut state: State,
mutate: impl FnOnce(State) -> State,
) {
let original_hash = state.hash();
let mut updated = mutate(state);
assert_ne!(updated.hash(), original_hash);
assert_eq!(updated.hash(), updated.compute_hash());
}
#[test]
fn with_intent_invalidates_cached_hash() {
assert_mutator_invalidates_cached_hash(sample_state(), |state| {
state.with_intent("capture intent")
});
}
#[test]
fn with_confidence_invalidates_cached_hash() {
assert_mutator_invalidates_cached_hash(sample_state(), |state| state.with_confidence(0.9));
}
#[test]
fn with_verification_invalidates_cached_hash() {
assert_mutator_invalidates_cached_hash(sample_state(), |state| {
state.with_verification(Verification::new().with_tests_passed(true))
});
}
#[test]
fn with_status_invalidates_cached_hash() {
assert_mutator_invalidates_cached_hash(sample_state(), |state| {
state.with_status(Status::Published)
});
}
#[test]
fn with_timestamp_invalidates_cached_hash() {
assert_mutator_invalidates_cached_hash(sample_state(), |state| {
state.with_timestamp(Utc::now() + chrono::Duration::seconds(1))
});
}
#[test]
fn w1_tail_fields_are_not_part_of_state_hash() {
let mut bare = sample_state();
let bare_hash = bare.hash();
let mut decorated = sample_state()
.with_change_id(bare.change_id)
.with_logical_change_id(bare.logical_change_id())
.with_risk_signals(ContentHash::compute(b"risk-signals-blob"))
.with_review_signatures(ContentHash::compute(b"review-signatures-blob"))
.with_discussions(ContentHash::compute(b"discussions-blob"))
.with_structured_conflicts(ContentHash::compute(b"conflicts-blob"));
decorated.created_at = bare.created_at;
assert_eq!(
decorated.hash(),
bare_hash,
"W1 tail fields must not affect the state hash"
);
}
#[test]
fn fidelity_fields_are_part_of_state_hash() {
let base = sample_state();
let base_hash = base.compute_hash();
let with_committer = sample_state()
.with_change_id(base.change_id)
.with_logical_change_id(base.logical_change_id());
let mut with_committer =
with_committer.with_committer(Principal::new("Carol", "carol@example.com"));
with_committer.created_at = base.created_at;
assert_ne!(
with_committer.hash(),
base_hash,
"committer must affect the state hash"
);
for mutate in [
|s: State| s.with_tz_offsets(3600, -7200),
|s: State| s.with_authored_at(Utc::now() + chrono::Duration::seconds(1)),
|s: State| s.with_raw_message("verbatim body\n"),
|s: State| {
s.with_extra_headers(vec![(
b"gpgsig".to_vec(),
b"-----BEGIN PGP SIGNATURE-----\n".to_vec(),
)])
},
|s: State| s.with_extra_headers(vec![(b"mergetag".to_vec(), b"x".to_vec())]),
] {
let seeded = sample_state()
.with_change_id(base.change_id)
.with_logical_change_id(base.logical_change_id());
let mut decorated = mutate(seeded);
decorated.created_at = base.created_at;
assert_ne!(
decorated.hash(),
base_hash,
"fidelity field must affect the state hash"
);
}
}
#[test]
fn pre_fidelity_hash_matches_legacy_golden_vector() {
let state = State::new_snapshot(
ContentHash::compute(b"issue-633-tree"),
vec![ChangeId::from_bytes([0x11; 16])],
Attribution::with_agent(
Principal::new("Legacy Author", "legacy@example.com"),
crate::object::Agent::new("openai", "gpt-5")
.with_session("session-633", "segment-001")
.with_policy("policy-legacy"),
),
)
.with_logical_change_id(ChangeId::from_bytes([0x63; 16]))
.with_intent("freeze pre-565 hash")
.with_confidence(0.875)
.with_timestamp(DateTime::from_timestamp(1_700_000_000, 0).expect("valid timestamp"))
.with_committer(Principal::new("Legacy Committer", "committer@example.com"))
.with_tz_offsets(3600, -18000)
.with_authored_at(DateTime::from_timestamp(1_699_999_000, 0).expect("valid timestamp"))
.with_raw_message(b"legacy commit message\n")
.with_extra_headers(vec![(b"encoding".to_vec(), b"UTF-8".to_vec())])
.with_status(Status::Published);
let legacy_hash = state.compute_hash_pre_fidelity();
assert_eq!(
legacy_hash.to_hex(),
"b89e1b40e681a1bf88679db7cfcacdafb1f370bc40ed5d50760dae1d4ab49dab",
);
assert_ne!(
legacy_hash,
state.compute_hash(),
"fixture must distinguish the pre-#565 legacy path from the current hash",
);
}
#[test]
fn extra_headers_order_affects_hash() {
let base = sample_state();
let one = sample_state()
.with_change_id(base.change_id)
.with_logical_change_id(base.logical_change_id());
let mut one = one.with_extra_headers(vec![
(b"a".to_vec(), b"1".to_vec()),
(b"b".to_vec(), b"2".to_vec()),
]);
one.created_at = base.created_at;
let two = sample_state()
.with_change_id(base.change_id)
.with_logical_change_id(base.logical_change_id());
let mut two = two.with_extra_headers(vec![
(b"b".to_vec(), b"2".to_vec()),
(b"a".to_vec(), b"1".to_vec()),
]);
two.created_at = base.created_at;
assert_ne!(one.hash(), two.hash());
}
#[test]
fn fidelity_fields_hash_is_stable() {
let mut state = sample_state()
.with_committer(Principal::new("Dave", "dave@example.com"))
.with_tz_offsets(3600, 0)
.with_authored_at(Utc::now())
.with_raw_message("body\n")
.with_extra_headers(vec![
(b"gpgsig".to_vec(), b"sig".to_vec()),
(b"k".to_vec(), b"v".to_vec()),
]);
assert_eq!(state.hash(), state.compute_hash());
}
#[test]
fn non_utf8_raw_message_is_byte_preserved() {
let raw = b"caf\xe9\n".to_vec();
assert!(
String::from_utf8(raw.clone()).is_err(),
"test fixture must be invalid UTF-8 to be meaningful"
);
let mut state = sample_state().with_raw_message(&raw);
assert_eq!(
state.raw_message.as_deref(),
Some(raw.as_slice()),
"raw bytes preserved verbatim"
);
let bytes = rmp_serde::to_vec(&state).expect("serialize state");
let back: State = rmp_serde::from_slice(&bytes).expect("deserialize state");
assert_eq!(back.raw_message.as_deref(), Some(raw.as_slice()));
let mut back = back;
assert_eq!(state.hash(), back.hash());
assert_eq!(back.hash(), back.compute_hash());
}
#[test]
fn raw_message_with_nul_byte_changes_hash() {
let base = sample_state();
let with_nul = sample_state()
.with_change_id(base.change_id)
.with_logical_change_id(base.logical_change_id());
let mut a = with_nul.with_raw_message(b"a\x00b");
a.created_at = base.created_at;
let other = sample_state()
.with_change_id(base.change_id)
.with_logical_change_id(base.logical_change_id());
let mut b = other.with_raw_message(b"a\x00c");
b.created_at = base.created_at;
assert_ne!(a.hash(), b.hash());
}
#[test]
fn parse_extension_headers_preserves_noncanonical_wire_order() {
let lines: &[&[u8]] = &[
b"tree 1111111111111111111111111111111111111111",
b"parent 2222222222222222222222222222222222222222",
b"author Alice <alice@example.com> 1700000000 +0000",
b"committer Bob <bob@example.com> 1700000100 +0000",
b"x-custom custom value",
b"gpgsig -----BEGIN PGP SIGNATURE-----",
b" sig-line-1",
b" -----END PGP SIGNATURE-----",
b"encoding ISO-8859-1",
b"mergetag object 3333333333333333333333333333333333333333",
b" type commit",
b" tag sidetag",
b" tagger Carol <carol@example.com> 1700000050 +0000",
b" ", b" signed side tag",
b"", b"the commit message",
b"",
];
let content = lines.join(&b'\n');
let headers = parse_commit_extension_headers(&content);
let expected: Vec<(Vec<u8>, Vec<u8>)> = vec![
(b"x-custom".to_vec(), b"custom value".to_vec()),
(
b"gpgsig".to_vec(),
b"-----BEGIN PGP SIGNATURE-----\nsig-line-1\n-----END PGP SIGNATURE-----"
.to_vec(),
),
(b"encoding".to_vec(), b"ISO-8859-1".to_vec()),
(
b"mergetag".to_vec(),
b"object 3333333333333333333333333333333333333333\ntype commit\ntag sidetag\ntagger Carol <carol@example.com> 1700000050 +0000\n\nsigned side tag".to_vec(),
),
];
assert_eq!(headers, expected);
}
#[test]
fn parse_extension_headers_empty_when_only_core_headers() {
let content: &[u8] = b"\
tree 1111111111111111111111111111111111111111\n\
author Alice <alice@example.com> 1700000000 +0000\n\
committer Bob <bob@example.com> 1700000100 +0000\n\
\n\
just a message\n";
assert!(parse_commit_extension_headers(content).is_empty());
}
}