trazaeo 0.5.2

Open-source provenance SDK and specification for verifiable EO and climate data workflows
Documentation
use crate::checkpoint::CheckpointArtifact;
use crate::envelope::attestation::Attestation;
use crate::envelope::validation::{
    canonical_signed_bytes, push_if_blank, push_if_blank_option, push_if_empty_vec,
    validate_capture_role, validate_ref_hash_pair, validate_rfc3339, validate_start_mode,
    ValidationResult,
};

#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, PartialEq, Eq)]
pub struct CaptureEnvelope {
    pub schema_version: String,
    pub envelope_type: String,
    pub issued_at: String,
    pub subject_id: String,
    pub capture_role: String,
    pub capture_actor_id: String,
    pub capture_system_id: String,
    pub capture_window: String,
    pub segment_ids: Vec<String>,
    pub input_refs: Vec<String>,
    pub output_refs: Vec<String>,
    pub segment_hashes: Vec<String>,
    pub rolling_hash_state: Option<String>,
    pub content_root_hash: String,
    pub content_commitment_profile: String,
    pub chunk_size: usize,
    pub leaf_count: usize,
    pub content_descriptor_ref: Option<String>,
    pub content_descriptor_hash: Option<String>,
    pub attestations: Vec<Attestation>,
    pub key_id: String,
    pub policy_profile_id: Option<String>,
}

#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, PartialEq, Eq)]
pub struct ArtifactRecord {
    pub artifact_id: String,
    pub artifact_ref: String,
    pub content_root_hash: String,
}

#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, PartialEq, Eq)]
pub struct TransformEnvelope {
    pub schema_version: String,
    pub envelope_type: String,
    pub issued_at: String,
    pub subject_id: String,
    pub transform_job_id: String,
    pub transform_stage: String,
    pub input_refs: Vec<String>,
    pub output_refs: Vec<String>,
    pub input_artifact_roots: Vec<String>,
    pub output_artifact_roots: Vec<String>,
    pub input_artifacts: Vec<ArtifactRecord>,
    pub output_artifacts: Vec<ArtifactRecord>,
    pub toolchain: String,
    pub parameters_ref: String,
    pub parameters_hash: String,
    pub determinism_profile: String,
    pub runtime_env_ref: Option<String>,
    pub runtime_env_hash: Option<String>,
    pub provenance_start_mode: String,
    pub source_manifest_ref: Option<String>,
    pub source_manifest_hash: Option<String>,
    pub source_root_hash: Option<String>,
    pub transform_spec_ref: Option<String>,
    pub transform_spec_hash: Option<String>,
    pub chunking_profile_ref: Option<String>,
    pub chunking_profile_hash: Option<String>,
    pub execution_manifest_ref: Option<String>,
    pub execution_manifest_hash: Option<String>,
    pub runtime_manifest_ref: Option<String>,
    pub runtime_manifest_hash: Option<String>,
    pub attestations: Vec<Attestation>,
    pub key_id: String,
}

#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, PartialEq, Eq)]
pub struct PublishEnvelope {
    pub schema_version: String,
    pub envelope_type: String,
    pub issued_at: String,
    pub subject_id: String,
    pub dataset_id: String,
    pub dataset_version: String,
    pub input_refs: Vec<String>,
    pub output_refs: Vec<String>,
    pub published_artifacts: Vec<CheckpointArtifact>,
    pub primary_artifact_id: String,
    pub checkpoint_manifest_ref: String,
    pub checkpoint_manifest_hash: String,
    pub checkpoint_id: String,
    pub checkpoint_log_root_hash: String,
    pub lineage_refs: Vec<String>,
    pub verification_policy_id: String,
    pub attestations: Vec<Attestation>,
    pub key_id: String,
    pub stac_refs: Vec<String>,
    pub reward_context_ref: Option<String>,
    pub reward_context_hash: Option<String>,
    pub provenance_start_mode: String,
    pub bootstrap_origin_label: Option<String>,
    pub reward_eligible: bool,
}

impl CaptureEnvelope {
    pub fn canonical_attestation_payload_bytes(&self) -> Vec<u8> {
        let mut normalized = self.clone();
        for att in &mut normalized.attestations {
            att.signature.clear();
        }
        canonical_signed_bytes(&normalized)
    }

    pub fn canonical_signed_bytes(&self) -> Vec<u8> {
        canonical_signed_bytes(self)
    }

    pub fn validate(&self) -> ValidationResult {
        let mut errors = Vec::new();
        push_if_blank(&mut errors, "schema_version", &self.schema_version);
        push_if_blank(&mut errors, "capture_role", &self.capture_role);
        push_if_blank(&mut errors, "capture_actor_id", &self.capture_actor_id);
        push_if_blank(&mut errors, "capture_system_id", &self.capture_system_id);
        push_if_blank(&mut errors, "capture_window", &self.capture_window);
        push_if_blank(&mut errors, "content_root_hash", &self.content_root_hash);
        push_if_blank(
            &mut errors,
            "content_commitment_profile",
            &self.content_commitment_profile,
        );
        push_if_blank(&mut errors, "key_id", &self.key_id);
        validate_rfc3339(&mut errors, "issued_at", &self.issued_at);
        push_if_empty_vec(&mut errors, "segment_ids", &self.segment_ids);
        push_if_empty_vec(&mut errors, "segment_hashes", &self.segment_hashes);
        push_if_empty_vec(&mut errors, "output_refs", &self.output_refs);
        if self.envelope_type != "capture" {
            errors.push("envelope_type must be 'capture'".to_string());
        }
        validate_capture_role(&mut errors, &self.capture_role);
        if self.attestations.is_empty() {
            errors.push("attestations must not be empty".to_string());
        }
        if self.segment_ids.len() != self.segment_hashes.len() {
            errors.push("segment_ids and segment_hashes length must match".to_string());
        }
        if self.chunk_size == 0 {
            errors.push("chunk_size must be greater than zero".to_string());
        }
        if self.leaf_count == 0 {
            errors.push("leaf_count must be greater than zero".to_string());
        }
        validate_ref_hash_pair(
            &mut errors,
            "content_descriptor_ref",
            "content_descriptor_hash",
            &self.content_descriptor_ref,
            &self.content_descriptor_hash,
        );
        if errors.is_empty() {
            Ok(())
        } else {
            Err(errors)
        }
    }
}

impl TransformEnvelope {
    pub fn canonical_attestation_payload_bytes(&self) -> Vec<u8> {
        let mut normalized = self.clone();
        for att in &mut normalized.attestations {
            att.signature.clear();
        }
        canonical_signed_bytes(&normalized)
    }

    pub fn canonical_signed_bytes(&self) -> Vec<u8> {
        canonical_signed_bytes(self)
    }

    pub fn validate(&self) -> ValidationResult {
        let mut errors = Vec::new();
        push_if_blank(&mut errors, "schema_version", &self.schema_version);
        push_if_blank(&mut errors, "subject_id", &self.subject_id);
        push_if_blank(&mut errors, "transform_job_id", &self.transform_job_id);
        push_if_blank(&mut errors, "transform_stage", &self.transform_stage);
        push_if_blank(&mut errors, "toolchain", &self.toolchain);
        push_if_blank(&mut errors, "parameters_ref", &self.parameters_ref);
        push_if_blank(&mut errors, "parameters_hash", &self.parameters_hash);
        push_if_blank(
            &mut errors,
            "determinism_profile",
            &self.determinism_profile,
        );
        push_if_blank(&mut errors, "key_id", &self.key_id);
        validate_rfc3339(&mut errors, "issued_at", &self.issued_at);
        validate_start_mode(&mut errors, &self.provenance_start_mode);
        push_if_empty_vec(&mut errors, "input_refs", &self.input_refs);
        push_if_empty_vec(&mut errors, "output_refs", &self.output_refs);
        push_if_empty_vec(&mut errors, "input_artifacts", &self.input_artifacts);
        push_if_empty_vec(&mut errors, "output_artifacts", &self.output_artifacts);
        if self.envelope_type != "transform" {
            errors.push("envelope_type must be 'transform'".to_string());
        }
        if self.attestations.is_empty() {
            errors.push("attestations must not be empty".to_string());
        }
        if self.input_artifact_roots.len() != self.input_artifacts.len() {
            errors.push("input_artifact_roots and input_artifacts length must match".to_string());
        }
        if self.output_artifact_roots.len() != self.output_artifacts.len() {
            errors.push("output_artifact_roots and output_artifacts length must match".to_string());
        }
        validate_ref_hash_pair(
            &mut errors,
            "runtime_env_ref",
            "runtime_env_hash",
            &self.runtime_env_ref,
            &self.runtime_env_hash,
        );
        validate_ref_hash_pair(
            &mut errors,
            "source_manifest_ref",
            "source_manifest_hash",
            &self.source_manifest_ref,
            &self.source_manifest_hash,
        );
        validate_ref_hash_pair(
            &mut errors,
            "transform_spec_ref",
            "transform_spec_hash",
            &self.transform_spec_ref,
            &self.transform_spec_hash,
        );
        validate_ref_hash_pair(
            &mut errors,
            "chunking_profile_ref",
            "chunking_profile_hash",
            &self.chunking_profile_ref,
            &self.chunking_profile_hash,
        );
        validate_ref_hash_pair(
            &mut errors,
            "execution_manifest_ref",
            "execution_manifest_hash",
            &self.execution_manifest_ref,
            &self.execution_manifest_hash,
        );
        validate_ref_hash_pair(
            &mut errors,
            "runtime_manifest_ref",
            "runtime_manifest_hash",
            &self.runtime_manifest_ref,
            &self.runtime_manifest_hash,
        );
        if matches!(
            self.provenance_start_mode.as_str(),
            "dataset_bootstrap" | "dataset_incremental"
        ) {
            push_if_blank_option(
                &mut errors,
                "source_manifest_ref",
                &self.source_manifest_ref,
            );
            push_if_blank_option(
                &mut errors,
                "source_manifest_hash",
                &self.source_manifest_hash,
            );
            push_if_blank_option(&mut errors, "source_root_hash", &self.source_root_hash);
            push_if_blank_option(&mut errors, "transform_spec_ref", &self.transform_spec_ref);
            push_if_blank_option(
                &mut errors,
                "transform_spec_hash",
                &self.transform_spec_hash,
            );
        }
        if errors.is_empty() {
            Ok(())
        } else {
            Err(errors)
        }
    }
}

impl PublishEnvelope {
    pub fn canonical_attestation_payload_bytes(&self) -> Vec<u8> {
        let mut normalized = self.clone();
        for att in &mut normalized.attestations {
            att.signature.clear();
        }
        canonical_signed_bytes(&normalized)
    }

    pub fn canonical_signed_bytes(&self) -> Vec<u8> {
        canonical_signed_bytes(self)
    }

    pub fn validate(&self) -> ValidationResult {
        let mut errors = Vec::new();
        push_if_blank(&mut errors, "schema_version", &self.schema_version);
        push_if_blank(&mut errors, "subject_id", &self.subject_id);
        push_if_blank(&mut errors, "dataset_id", &self.dataset_id);
        push_if_blank(&mut errors, "dataset_version", &self.dataset_version);
        push_if_blank(
            &mut errors,
            "primary_artifact_id",
            &self.primary_artifact_id,
        );
        push_if_blank(
            &mut errors,
            "checkpoint_manifest_ref",
            &self.checkpoint_manifest_ref,
        );
        push_if_blank(
            &mut errors,
            "checkpoint_manifest_hash",
            &self.checkpoint_manifest_hash,
        );
        push_if_blank(&mut errors, "checkpoint_id", &self.checkpoint_id);
        push_if_blank(
            &mut errors,
            "checkpoint_log_root_hash",
            &self.checkpoint_log_root_hash,
        );
        push_if_blank(
            &mut errors,
            "verification_policy_id",
            &self.verification_policy_id,
        );
        push_if_blank(&mut errors, "key_id", &self.key_id);
        validate_rfc3339(&mut errors, "issued_at", &self.issued_at);
        validate_start_mode(&mut errors, &self.provenance_start_mode);
        push_if_empty_vec(&mut errors, "input_refs", &self.input_refs);
        push_if_empty_vec(&mut errors, "output_refs", &self.output_refs);
        push_if_empty_vec(
            &mut errors,
            "published_artifacts",
            &self.published_artifacts,
        );
        push_if_empty_vec(&mut errors, "lineage_refs", &self.lineage_refs);
        if self.envelope_type != "publish" {
            errors.push("envelope_type must be 'publish'".to_string());
        }
        if self.attestations.is_empty() {
            errors.push("attestations must not be empty".to_string());
        }
        validate_ref_hash_pair(
            &mut errors,
            "reward_context_ref",
            "reward_context_hash",
            &self.reward_context_ref,
            &self.reward_context_hash,
        );
        if !self
            .published_artifacts
            .iter()
            .any(|artifact| artifact.artifact_id == self.primary_artifact_id)
        {
            errors.push("primary_artifact_id must appear in published_artifacts".to_string());
        }
        if self.provenance_start_mode == "dataset_bootstrap" {
            push_if_blank_option(
                &mut errors,
                "bootstrap_origin_label",
                &self.bootstrap_origin_label,
            );
        }
        if errors.is_empty() {
            Ok(())
        } else {
            Err(errors)
        }
    }
}