use std::collections::BTreeMap;
use std::fmt::Write as _;
use std::io::Read;
use serde_json::Value;
use sha2::{Digest, Sha256};
use super::errors::HashError;
use super::schema::PeriodicReport;
const BINARY_HASH_MAX_BYTES: u64 = 256 * 1024 * 1024;
pub fn compute_content_hash(report: &PeriodicReport) -> Result<String, HashError> {
let mut value = serde_json::to_value(report)?;
blank_content_hash(&mut value);
let canonical = canonicalize(value);
let bytes = serde_json::to_vec(&canonical)?;
Ok(format_sha256(&bytes))
}
#[derive(Clone, Copy)]
enum BlankZero {
Null,
EmptyString,
}
const POST_SIGN_FIELDS: &[(&str, &str, BlankZero)] = &[
("integrity", "content_hash", BlankZero::EmptyString),
("integrity", "signature", BlankZero::Null),
("integrity", "binary_attestation", BlankZero::Null),
("report_metadata", "integrity_level", BlankZero::EmptyString),
];
fn blank_content_hash(v: &mut Value) {
for (parent, field, zero) in POST_SIGN_FIELDS {
if let Some(obj) = v.get_mut(*parent).and_then(Value::as_object_mut) {
let zeroed = match zero {
BlankZero::Null => Value::Null,
BlankZero::EmptyString => Value::String(String::new()),
};
obj.insert((*field).to_string(), zeroed);
}
}
}
fn canonicalize(v: Value) -> Value {
match v {
Value::Object(map) => {
let sorted: BTreeMap<String, Value> = map
.into_iter()
.map(|(k, val)| (k, canonicalize(val)))
.collect();
let mut out = serde_json::Map::new();
for (k, val) in sorted {
out.insert(k, val);
}
Value::Object(out)
}
Value::Array(arr) => Value::Array(arr.into_iter().map(canonicalize).collect()),
other => other,
}
}
fn format_sha256(bytes: &[u8]) -> String {
let digest = Sha256::digest(bytes);
let mut out = String::with_capacity(7 + 64);
out.push_str("sha256:");
for byte in digest {
let _ = write!(out, "{byte:02x}");
}
out
}
pub fn binary_hash() -> std::io::Result<String> {
let path = std::env::current_exe()?;
let file = std::fs::File::open(&path)?;
let total_len = file.metadata().map_or(0, |m| m.len());
if total_len > BINARY_HASH_MAX_BYTES {
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidData,
format!(
"binary at {} exceeds {} byte cap ({} bytes), refusing to hash a truncated view",
path.display(),
BINARY_HASH_MAX_BYTES,
total_len
),
));
}
let mut reader = std::io::BufReader::new(file).take(BINARY_HASH_MAX_BYTES);
let mut hasher = Sha256::new();
let mut buf = [0u8; 8192];
loop {
let n = reader.read(&mut buf)?;
if n == 0 {
break;
}
hasher.update(&buf[..n]);
}
let digest = hasher.finalize();
let mut out = String::with_capacity(7 + 64);
out.push_str("sha256:");
for byte in digest {
let _ = write!(out, "{byte:02x}");
}
Ok(out)
}
pub fn compute_file_sha256_hex(path: &std::path::Path) -> std::io::Result<String> {
let file = std::fs::File::open(path)?;
let total_len = file.metadata().map_or(0, |m| m.len());
if total_len > BINARY_HASH_MAX_BYTES {
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidData,
format!(
"file at {} exceeds {} byte cap ({} bytes)",
path.display(),
BINARY_HASH_MAX_BYTES,
total_len
),
));
}
let mut reader = std::io::BufReader::new(file).take(BINARY_HASH_MAX_BYTES);
let mut hasher = Sha256::new();
let mut buf = [0u8; 8192];
loop {
let n = reader.read(&mut buf)?;
if n == 0 {
break;
}
hasher.update(&buf[..n]);
}
let digest = hasher.finalize();
let mut out = String::with_capacity(64);
for byte in digest {
let _ = write!(out, "{byte:02x}");
}
Ok(out)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::report::periodic::schema::{
Application, Confidentiality, PeriodicReport, ReportIntent,
};
use crate::report::periodic::test_fixtures;
fn sample_report() -> PeriodicReport {
test_fixtures::sample_report(
ReportIntent::Official,
Confidentiality::Public,
vec![Application::G1(test_fixtures::sample_g1_application())],
)
}
#[test]
fn hash_is_deterministic() {
let r = sample_report();
let first = compute_content_hash(&r).unwrap();
for _ in 0..100 {
assert_eq!(compute_content_hash(&r).unwrap(), first);
}
assert!(first.starts_with("sha256:"));
assert_eq!(first.len(), 7 + 64);
assert!(first[7..].chars().all(|c| c.is_ascii_hexdigit()));
}
#[test]
fn content_hash_survives_json_roundtrip() {
let mut r = sample_report();
r.aggregate.total_energy_kwh = 2.0 / 3.0;
r.aggregate.total_carbon_kgco2eq = 1.0 / 7.0;
r.aggregate.canonical_waste.energy_kwh = 100.0 * 5.0 / 6.0 * 1e-7;
r.aggregate.canonical_waste.carbon_kgco2eq = 10.0 * 5.0 / 6.0 / 1000.0;
r.aggregate.operational_waste.energy_kwh = 1.0 / 11.0;
let before = compute_content_hash(&r).unwrap();
let json = serde_json::to_string(&r).unwrap();
let reparsed: PeriodicReport = serde_json::from_str(&json).unwrap();
let after = compute_content_hash(&reparsed).unwrap();
assert_eq!(before, after, "content_hash must survive a JSON round-trip");
}
#[test]
fn hash_changes_on_aggregate_mutation() {
let r = sample_report();
let baseline = compute_content_hash(&r).unwrap();
let mut mutated = r.clone();
mutated.aggregate.total_energy_kwh += 0.000_001;
let after = compute_content_hash(&mutated).unwrap();
assert_ne!(baseline, after);
}
#[test]
fn hash_ignores_existing_content_hash() {
let mut r = sample_report();
r.integrity.content_hash = "sha256:aaaa".to_string();
let first = compute_content_hash(&r).unwrap();
r.integrity.content_hash = "sha256:bbbb".to_string();
let second = compute_content_hash(&r).unwrap();
assert_eq!(first, second);
}
#[test]
fn canonicalize_is_key_order_invariant() {
let a = serde_json::json!({ "alpha": 1, "beta": 2, "gamma": 3 });
let b = serde_json::json!({ "gamma": 3, "alpha": 1, "beta": 2 });
let ca = canonicalize(a);
let cb = canonicalize(b);
assert_eq!(
serde_json::to_vec(&ca).unwrap(),
serde_json::to_vec(&cb).unwrap()
);
}
#[test]
fn canonicalize_recurses_into_nested_objects() {
let a = serde_json::json!({
"outer": { "z": 1, "a": 2 },
"list": [{ "b": 1, "a": 2 }]
});
let b = serde_json::json!({
"list": [{ "a": 2, "b": 1 }],
"outer": { "a": 2, "z": 1 }
});
assert_eq!(
serde_json::to_vec(&canonicalize(a)).unwrap(),
serde_json::to_vec(&canonicalize(b)).unwrap(),
);
}
#[test]
fn hash_blanks_content_hash_without_removing_key() {
let r = sample_report();
let mut v = serde_json::to_value(&r).unwrap();
blank_content_hash(&mut v);
let integrity = v.get("integrity").and_then(Value::as_object).unwrap();
assert!(integrity.contains_key("content_hash"));
assert_eq!(
integrity.get("content_hash"),
Some(&Value::String(String::new()))
);
}
#[test]
fn format_sha256_known_vector() {
let empty = format_sha256(&[]);
assert_eq!(
empty,
"sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
);
}
#[test]
fn post_sign_fields_inventory_is_locked() {
let expected: &[(&str, &str)] = &[
("integrity", "content_hash"),
("integrity", "signature"),
("integrity", "binary_attestation"),
("report_metadata", "integrity_level"),
];
let actual: Vec<(&str, &str)> = POST_SIGN_FIELDS.iter().map(|(p, f, _)| (*p, *f)).collect();
assert_eq!(actual, expected);
}
#[test]
fn hash_changes_when_any_non_blanked_field_mutates() {
use chrono::Datelike as _;
let r = sample_report();
let baseline = compute_content_hash(&r).unwrap();
let mut m = r.clone();
m.organisation.name = format!("{}-edited", m.organisation.name);
assert_ne!(
compute_content_hash(&m).unwrap(),
baseline,
"organisation.name"
);
let mut m = r.clone();
m.aggregate.total_energy_kwh += 1.0;
assert_ne!(
compute_content_hash(&m).unwrap(),
baseline,
"aggregate.total_energy_kwh"
);
let mut m = r.clone();
m.aggregate.canonical_waste.n_plus_one_threshold += 1;
assert_ne!(
compute_content_hash(&m).unwrap(),
baseline,
"aggregate.canonical_waste.n_plus_one_threshold"
);
let mut m = r.clone();
m.methodology.sci_specification = format!("{}-v2", m.methodology.sci_specification);
assert_ne!(
compute_content_hash(&m).unwrap(),
baseline,
"methodology.sci_specification"
);
let mut m = r.clone();
m.period.from_date = m
.period
.from_date
.with_day(m.period.from_date.day().wrapping_add(1).min(28))
.unwrap();
assert_ne!(
compute_content_hash(&m).unwrap(),
baseline,
"period.from_date"
);
}
#[test]
fn hash_is_invariant_under_post_sign_locator_addition() {
use crate::report::periodic::schema::{
BinaryAttestationMetadata, IntegrityLevel, SignatureMetadata,
};
let r = sample_report();
let baseline = compute_content_hash(&r).unwrap();
let mut signed = r.clone();
signed.report_metadata.integrity_level = IntegrityLevel::Signed;
signed.integrity.signature = Some(SignatureMetadata {
format: "sigstore-cosign-intoto-v1".to_string(),
bundle_url: "https://example.fr/bundle.sig".to_string(),
signer_identity: "ci@example.fr".to_string(),
signer_issuer: "https://accounts.google.com".to_string(),
rekor_url: "https://rekor.sigstore.dev".to_string(),
rekor_log_index: 42,
signed_at: "2026-05-14T12:00:00Z".to_string(),
});
assert_eq!(compute_content_hash(&signed).unwrap(), baseline);
signed.report_metadata.integrity_level = IntegrityLevel::SignedWithAttestation;
signed.integrity.binary_attestation = Some(BinaryAttestationMetadata {
format: "slsa-provenance-v1".to_string(),
attestation_url: "https://gh/p.intoto.jsonl".to_string(),
builder_id: "https://github.com/actions/runner".to_string(),
git_tag: "v0.7.0".to_string(),
git_commit: "deadbeef".to_string(),
slsa_level: "L2".to_string(),
});
assert_eq!(compute_content_hash(&signed).unwrap(), baseline);
}
#[test]
fn hash_unaffected_by_absent_v1_2_fields_but_changes_when_populated() {
use crate::report::periodic::schema::TemporalCoverage;
let r = sample_report();
let baseline = compute_content_hash(&r).unwrap();
let v = serde_json::to_value(&r).unwrap();
assert!(v["aggregate"].get("temporal_coverage").is_none());
assert!(v["scope_manifest"].get("coverage_basis").is_none());
assert!(v["integrity"].get("cross_period_log").is_none());
let mut same = r.clone();
same.aggregate.temporal_coverage = TemporalCoverage::default();
assert_eq!(compute_content_hash(&same).unwrap(), baseline);
let mut populated = r;
populated.aggregate.temporal_coverage = TemporalCoverage {
temporal_coverage: 0.5,
observed_days: 45,
days_in_period: 90,
largest_gap_days: 10,
};
assert_ne!(compute_content_hash(&populated).unwrap(), baseline);
}
}