use super::envelopes::{
build_capture_envelope_json, build_publish_envelope_json, build_transform_envelope_json,
verify_publish_envelope_json,
};
#[cfg(feature = "bao-range-proofs")]
use super::hashing::bao_range_proof_package_json;
use super::hashing::{
batch_blake3_hash_file_roots, blake3_content_root, blake3_hash, blake3_hash_file_root,
blake3_hash_mt, build_checkpoint_manifest_json, build_delivery_proof_package_json,
full_root_proof_package_json, verify_delivery_proof_package_json,
verify_delivery_proof_package_report_json, verify_full_root_proof_package_json,
};
use super::manifests::{compute_source_root_hash_json, validate_source_manifest_json};
use crate::checkpoint::{canonical_checkpoint_manifest_payload_bytes, CheckpointManifest};
use crate::content::{compute_content_descriptor_hash, RangeProofPackage};
use crate::envelope::{make_attestation, ArtifactRecord};
use crate::transform_pipeline::{build_transform_envelope, TransformStageInput};
use crate::trust::TrustPolicy;
use serde_json::Value;
use std::io::Write;
use tempfile::NamedTempFile;
const TEST_SIGNING_KEY_HEX: &str =
"4f3edf983ac636a65a842ce7c78d9aa706d3b113bce036f9a4f5762b76f70f18";
fn signed_transform_envelope_bundle(
content_root_hash: &str,
) -> (Vec<String>, Vec<String>, Vec<String>) {
let seed = make_attestation(
"transformer",
TEST_SIGNING_KEY_HEX,
"2026-01-01T00:00:00Z",
b"",
)
.expect("seed attestation");
let stage = TransformStageInput {
schema_version: "1.0.0".to_string(),
issued_at: "2026-01-01T00:00:00Z".to_string(),
subject_id: "transform-1".to_string(),
transform_job_id: "job-1".to_string(),
transform_stage: "raw_to_release".to_string(),
input_refs: vec!["obj://raw/1".to_string()],
output_refs: vec!["obj://release/1".to_string()],
input_artifacts: vec![ArtifactRecord {
artifact_id: "input-1".to_string(),
artifact_ref: "obj://raw/1".to_string(),
content_root_hash: "input-root".to_string(),
}],
output_artifacts: vec![ArtifactRecord {
artifact_id: "artifact-1".to_string(),
artifact_ref: "obj://release/1".to_string(),
content_root_hash: content_root_hash.to_string(),
}],
toolchain: "rust".to_string(),
parameters_ref: "cfg://1".to_string(),
parameters_hash: "cfg-hash".to_string(),
determinism_profile: "det-v1".to_string(),
runtime_env_ref: Some("oci://img".to_string()),
runtime_env_hash: Some("img-hash".to_string()),
provenance_start_mode: "transport_capture".to_string(),
source_manifest_ref: None,
source_manifest_hash: None,
source_root_hash: None,
transform_spec_ref: None,
transform_spec_hash: None,
chunking_profile_ref: None,
chunking_profile_hash: None,
execution_manifest_ref: None,
execution_manifest_hash: None,
runtime_manifest_ref: None,
runtime_manifest_hash: None,
key_id: seed.key_id.clone(),
};
let mut envelope = build_transform_envelope(&stage, seed.clone());
let attestation = make_attestation(
"transformer",
TEST_SIGNING_KEY_HEX,
"2026-01-01T00:00:00Z",
&envelope.canonical_attestation_payload_bytes(),
)
.expect("transform attestation");
envelope.attestations = vec![attestation.clone()];
(
vec![serde_json::to_string(&envelope).expect("serialize transform envelope")],
vec![serde_json::to_string(&attestation).expect("serialize transform attestation")],
vec![format!("transform://{}", envelope.subject_id)],
)
}
fn signed_checkpoint_manifest_json(unsigned_checkpoint_json: &str) -> String {
let mut checkpoint_manifest: CheckpointManifest =
serde_json::from_str(unsigned_checkpoint_json).expect("parse checkpoint manifest");
let attestation = make_attestation(
"checkpoint-signer",
TEST_SIGNING_KEY_HEX,
"2026-01-01T00:00:00Z",
&canonical_checkpoint_manifest_payload_bytes(&checkpoint_manifest),
)
.expect("checkpoint attestation");
checkpoint_manifest.checkpoint_signature_bundle =
vec![serde_json::to_string(&attestation).expect("serialize checkpoint attestation")];
serde_json::to_string(&checkpoint_manifest).expect("serialize signed checkpoint manifest")
}
#[test]
fn python_binding_hash_functions_match() {
let data = b"hello-python-binding";
let single = blake3_hash(data);
let multi = blake3_hash_mt(data, 4);
assert_eq!(single, multi);
}
#[test]
fn python_binding_blake3_content_root_returns_32_bytes() {
let mut file = NamedTempFile::new().expect("temp file");
file.write_all(b"binding-file-content").expect("write");
let path = file.path().to_str().expect("utf8 path");
let root = blake3_content_root(path, 4, Some(2)).expect("content root");
assert_eq!(root.len(), 32);
}
#[test]
fn python_binding_blake3_hash_file_root_returns_32_bytes() {
let mut file = NamedTempFile::new().expect("temp file");
file.write_all(b"streaming-bindings-content")
.expect("write");
let path = file.path().to_str().expect("utf8 path");
let root = blake3_hash_file_root(path, 4, Some(2)).expect("streaming hash file");
assert_eq!(root.len(), 32);
}
#[test]
fn python_binding_batch_blake3_hash_file_roots_returns_roots() {
let mut file_a = NamedTempFile::new().expect("temp file a");
file_a.write_all(b"file-a").expect("write a");
let mut file_b = NamedTempFile::new().expect("temp file b");
file_b.write_all(b"file-b").expect("write b");
let roots = batch_blake3_hash_file_roots(
vec![
file_a.path().to_string_lossy().to_string(),
file_b.path().to_string_lossy().to_string(),
],
4,
Some(2),
)
.expect("batch root hashing");
assert_eq!(roots.len(), 2);
assert_eq!(roots[0].len(), 32);
assert_eq!(roots[1].len(), 32);
}
#[test]
fn python_binding_build_capture_envelope_json_supports_source_capture() {
let session_input = serde_json::json!({
"schema_version": "1.0.0",
"issued_at": "2026-01-01T00:00:00Z",
"subject_id": "capture-1",
"capture_role": "source",
"capture_actor_id": "sensor-1",
"capture_system_id": "sensor-pipeline-1",
"capture_window": "2026-01-01T00:00:00Z/2026-01-01T00:05:00Z",
"input_refs": [],
"output_refs": ["obj://raw/1"],
"content_descriptor_ref": null,
"content_descriptor_hash": null,
"key_id": "seed-key",
"policy_profile_id": null
});
let segments = serde_json::json!([
{"segment_id": "seg-1", "payload": [97, 98, 99]},
{"segment_id": "seg-2", "payload": [100, 101, 102]}
]);
let envelope_json = build_capture_envelope_json(
&session_input.to_string(),
&segments.to_string(),
"sensor-1",
TEST_SIGNING_KEY_HEX,
"2026-01-01T00:00:00Z",
)
.expect("build capture envelope");
let envelope_value: Value = serde_json::from_str(&envelope_json).expect("parse capture");
assert_eq!(
envelope_value
.get("capture_role")
.and_then(Value::as_str)
.unwrap_or_default(),
"source"
);
assert_eq!(
envelope_value
.get("capture_actor_id")
.and_then(Value::as_str)
.unwrap_or_default(),
"sensor-1"
);
}
#[test]
fn python_binding_build_publish_and_verify_json_roundtrip() {
const TEST_SIGNING_KEY_HEX: &str =
"4f3edf983ac636a65a842ce7c78d9aa706d3b113bce036f9a4f5762b76f70f18";
let publish_input = serde_json::json!({
"schema_version": "1.0.0",
"issued_at": "2026-01-01T00:20:00Z",
"subject_id": "publish-1",
"dataset_id": "sst",
"dataset_version": "v1",
"input_refs": ["obj://zarr/1"],
"output_refs": ["obj://release/1"],
"published_artifacts": [{
"artifact_id": "artifact-1",
"content_root_hash": "0909090909090909090909090909090909090909090909090909090909090909",
"content_descriptor_ref": null,
"content_descriptor_hash": null,
"media_type": "application/vnd+zarr"
}],
"primary_artifact_id": "artifact-1",
"checkpoint_manifest_ref": "checkpoint://1",
"checkpoint_manifest_hash": "0909090909090909090909090909090909090909090909090909090909090909",
"checkpoint_id": "checkpoint-1",
"checkpoint_log_root_hash": "0909090909090909090909090909090909090909090909090909090909090909",
"lineage_refs": ["capture://1", "transform://1"],
"verification_policy_id": "verify-default",
"key_id": "demo-key-id",
"stac_refs": [],
"reward_context_ref": null,
"reward_context_hash": null,
"provenance_start_mode": "transport_capture",
"bootstrap_origin_label": null,
"reward_eligible": false
});
let envelope_json = build_publish_envelope_json(
&publish_input.to_string(),
"publisher",
TEST_SIGNING_KEY_HEX,
"2026-01-01T00:00:00Z",
)
.expect("build publish json envelope");
let envelope_value: Value = serde_json::from_str(&envelope_json).expect("parse envelope");
let key_id = envelope_value
.get("key_id")
.and_then(Value::as_str)
.expect("key_id present")
.to_string();
let mut trust_policy = TrustPolicy::new();
trust_policy.allow_key_with_reason(&key_id, "2026-01-01T00:00:00Z", "roundtrip test");
let trust_policy_json = serde_json::to_string(&trust_policy).expect("serialize trust policy");
let report_json = verify_publish_envelope_json(&envelope_json, "sampled", &trust_policy_json)
.expect("verify publish envelope");
let report_value: Value = serde_json::from_str(&report_json).expect("parse report");
assert_eq!(
report_value
.get("signature_state")
.and_then(Value::as_str)
.unwrap_or_default(),
"Passed"
);
assert_eq!(
report_value
.get("trust_state")
.and_then(Value::as_str)
.unwrap_or_default(),
"Passed"
);
assert_eq!(
report_value
.get("lineage_state")
.and_then(Value::as_str)
.unwrap_or_default(),
"Passed"
);
}
#[test]
fn python_binding_rejects_dataset_bootstrap_publish_without_origin_label() {
const TEST_SIGNING_KEY_HEX: &str =
"4f3edf983ac636a65a842ce7c78d9aa706d3b113bce036f9a4f5762b76f70f18";
let publish_input = serde_json::json!({
"schema_version": "1.0.0",
"issued_at": "2026-01-01T00:20:00Z",
"subject_id": "publish-bootstrap",
"dataset_id": "sst",
"dataset_version": "v1",
"input_refs": ["obj://zarr/1"],
"output_refs": ["obj://release/1"],
"published_artifacts": [{
"artifact_id": "artifact-1",
"content_root_hash": "0909090909090909090909090909090909090909090909090909090909090909",
"content_descriptor_ref": null,
"content_descriptor_hash": null,
"media_type": "application/vnd+zarr"
}],
"primary_artifact_id": "artifact-1",
"checkpoint_manifest_ref": "checkpoint://1",
"checkpoint_manifest_hash": "0909090909090909090909090909090909090909090909090909090909090909",
"checkpoint_id": "checkpoint-1",
"checkpoint_log_root_hash": "0909090909090909090909090909090909090909090909090909090909090909",
"lineage_refs": ["transform://1"],
"verification_policy_id": "verify-default",
"key_id": "demo-key-id",
"stac_refs": [],
"reward_context_ref": null,
"reward_context_hash": null,
"provenance_start_mode": "dataset_bootstrap",
"bootstrap_origin_label": null,
"reward_eligible": false
});
let err = build_publish_envelope_json(
&publish_input.to_string(),
"publisher",
TEST_SIGNING_KEY_HEX,
"2026-01-01T00:00:00Z",
)
.expect_err("bootstrap publish without origin label must fail");
assert!(err.to_string().contains("bootstrap_origin_label"));
}
#[test]
fn python_binding_rejects_dataset_bootstrap_transform_without_source_and_spec_fields() {
const TEST_SIGNING_KEY_HEX: &str =
"4f3edf983ac636a65a842ce7c78d9aa706d3b113bce036f9a4f5762b76f70f18";
let transform_input = serde_json::json!({
"schema_version": "1.0.0",
"issued_at": "2026-01-01T00:10:00Z",
"subject_id": "transform-bootstrap",
"transform_job_id": "job-bootstrap",
"transform_stage": "nc_to_zarr",
"input_refs": ["source://sst"],
"output_refs": ["zarr://sst"],
"input_artifacts": [{
"artifact_id": "input-1",
"artifact_ref": "source://sst",
"content_root_hash": "0101010101010101010101010101010101010101010101010101010101010101"
}],
"output_artifacts": [{
"artifact_id": "output-1",
"artifact_ref": "zarr://sst",
"content_root_hash": "0202020202020202020202020202020202020202020202020202020202020202"
}],
"toolchain": "python-xarray",
"parameters_ref": "params://1",
"parameters_hash": "0303030303030303030303030303030303030303030303030303030303030303",
"determinism_profile": "strict-v1",
"runtime_env_ref": null,
"runtime_env_hash": null,
"provenance_start_mode": "dataset_bootstrap",
"source_manifest_ref": null,
"source_manifest_hash": null,
"source_root_hash": null,
"transform_spec_ref": null,
"transform_spec_hash": null,
"execution_manifest_ref": null,
"execution_manifest_hash": null,
"runtime_manifest_ref": null,
"runtime_manifest_hash": null,
"key_id": "demo-key-id"
});
let err = build_transform_envelope_json(
&transform_input.to_string(),
"publisher",
TEST_SIGNING_KEY_HEX,
"2026-01-01T00:00:00Z",
)
.expect_err("bootstrap transform without required fields must fail");
assert!(err.to_string().contains("source_manifest_ref"));
assert!(err.to_string().contains("transform_spec_ref"));
}
#[test]
fn python_binding_full_root_checkpoint_and_delivery_proof_roundtrip() {
let expected = std::fs::read("Cargo.toml").expect("read Cargo.toml");
let package_json =
full_root_proof_package_json("Cargo.toml", 16, Some(1)).expect("full-root proof package");
let package: RangeProofPackage =
serde_json::from_str(&package_json).expect("parse full-root proof package");
let verified_full = verify_full_root_proof_package_json(&package_json, "Cargo.toml")
.expect("verify full-root proof package");
assert_eq!(verified_full, expected);
let (lineage_envelopes, signature_bundle, lineage_refs) =
signed_transform_envelope_bundle(&package.content_descriptor.content_root_hash);
let published_artifacts_json = serde_json::json!([
{
"artifact_id": package.artifact_id,
"content_root_hash": package.content_descriptor.content_root_hash,
"content_descriptor_ref": "descriptor://artifact-1",
"content_descriptor_hash": hex::encode(compute_content_descriptor_hash(&package.content_descriptor).0),
"media_type": package.content_descriptor.media_type
}
])
.to_string();
let unsigned_checkpoint_json = build_checkpoint_manifest_json(
"checkpoint-1",
"2026-01-01T00:00:00Z/2026-01-01T01:00:00Z",
&published_artifacts_json,
lineage_refs,
Vec::new(),
None,
)
.expect("checkpoint json");
let checkpoint_json = signed_checkpoint_manifest_json(&unsigned_checkpoint_json);
let checkpoint_manifest: CheckpointManifest =
serde_json::from_str(&checkpoint_json).expect("parse signed checkpoint manifest");
let delivery_json = build_delivery_proof_package_json(
&package_json,
&checkpoint_json,
lineage_envelopes,
signature_bundle,
)
.expect("delivery package json");
let report_json = verify_delivery_proof_package_report_json(
&delivery_json,
Some("Cargo.toml"),
Some(&checkpoint_manifest.checkpoint_log_root_hash),
)
.expect("delivery package report");
let report: Value = serde_json::from_str(&report_json).expect("parse delivery report");
assert_eq!(
report
.get("signature_state")
.and_then(Value::as_str)
.unwrap_or_default(),
"Passed"
);
assert_eq!(
report
.get("binding_state")
.and_then(Value::as_str)
.unwrap_or_default(),
"Passed"
);
assert_eq!(
report
.get("lineage_state")
.and_then(Value::as_str)
.unwrap_or_default(),
"Passed"
);
assert_eq!(
report
.get("content_state")
.and_then(Value::as_str)
.unwrap_or_default(),
"Passed"
);
assert_eq!(
report
.get("checkpoint_state")
.and_then(Value::as_str)
.unwrap_or_default(),
"Passed"
);
let verified = verify_delivery_proof_package_json(
&delivery_json,
Some("Cargo.toml"),
Some(&checkpoint_manifest.checkpoint_log_root_hash),
)
.expect("verify delivery package");
assert_eq!(verified, expected);
}
#[cfg(feature = "bao-range-proofs")]
#[test]
fn python_binding_checkpoint_and_delivery_proof_roundtrip() {
let package_json = bao_range_proof_package_json("Cargo.toml", 0, 32, 16, Some(1))
.expect("range proof package");
let package: RangeProofPackage =
serde_json::from_str(&package_json).expect("parse bao range package");
let (lineage_envelopes, signature_bundle, lineage_refs) =
signed_transform_envelope_bundle(&package.content_descriptor.content_root_hash);
let published_artifacts_json = serde_json::json!([
{
"artifact_id": package.artifact_id,
"content_root_hash": package.content_descriptor.content_root_hash,
"content_descriptor_ref": "descriptor://artifact-1",
"content_descriptor_hash": hex::encode(compute_content_descriptor_hash(&package.content_descriptor).0),
"media_type": package.content_descriptor.media_type
}
])
.to_string();
let unsigned_checkpoint_json = build_checkpoint_manifest_json(
"checkpoint-1",
"2026-01-01T00:00:00Z/2026-01-01T01:00:00Z",
&published_artifacts_json,
lineage_refs,
Vec::new(),
None,
)
.expect("checkpoint json");
let checkpoint_json = signed_checkpoint_manifest_json(&unsigned_checkpoint_json);
let checkpoint_manifest: CheckpointManifest =
serde_json::from_str(&checkpoint_json).expect("parse signed checkpoint manifest");
let delivery_json = build_delivery_proof_package_json(
&package_json,
&checkpoint_json,
lineage_envelopes,
signature_bundle,
)
.expect("delivery package json");
let verified = verify_delivery_proof_package_json(
&delivery_json,
None,
Some(&checkpoint_manifest.checkpoint_log_root_hash),
)
.expect("verify delivery package");
assert_eq!(verified.len(), 32);
}
#[test]
fn python_binding_source_manifest_root_and_validation() {
let source_files = serde_json::json!([
{
"source_uri": "s3://bucket/b.nc",
"content_hash": "h2",
"byte_length": 2,
"observed_mtime": "2026-01-02T00:00:00Z"
},
{
"source_uri": "s3://bucket/a.nc",
"content_hash": "h1",
"byte_length": 1,
"observed_mtime": "2026-01-01T00:00:00Z"
}
]);
let root_hash =
compute_source_root_hash_json(&source_files.to_string()).expect("compute source root hash");
let manifest = serde_json::json!({
"manifest_id": "m-1",
"manifest_created_at": "2026-01-01T00:00:00Z",
"source_dataset_id": "dataset-1",
"source_files": source_files,
"source_file_count": 2,
"source_root_hash": root_hash,
});
validate_source_manifest_json(&manifest.to_string()).expect("manifest validates");
let mut tampered = manifest;
tampered["source_file_count"] = serde_json::json!(3);
let err = validate_source_manifest_json(&tampered.to_string())
.expect_err("tampered manifest must fail");
assert!(err.to_string().contains("source_file_count"));
}