trazaeo 0.5.2

Open-source provenance SDK and specification for verifiable EO and climate data workflows
Documentation
use crate::content::{build_content_descriptor, ContentDescriptor, ContentDescriptorInput};
use crate::envelope::{ArtifactRecord, Attestation, TransformEnvelope};
use crate::hashing::hash_file_streaming;
use serde::{Deserialize, Serialize};
use std::io;
use std::path::Path;

#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct TransformStageInput {
    pub schema_version: String,
    pub issued_at: String,
    pub subject_id: String,
    pub transform_job_id: String,
    pub transform_stage: String,
    pub input_refs: Vec<String>,
    pub output_refs: Vec<String>,
    pub input_artifacts: Vec<ArtifactRecord>,
    pub output_artifacts: Vec<ArtifactRecord>,
    pub toolchain: String,
    pub parameters_ref: String,
    pub parameters_hash: String,
    pub determinism_profile: String,
    pub runtime_env_ref: Option<String>,
    pub runtime_env_hash: Option<String>,
    pub provenance_start_mode: String,
    pub source_manifest_ref: Option<String>,
    pub source_manifest_hash: Option<String>,
    pub source_root_hash: Option<String>,
    pub transform_spec_ref: Option<String>,
    pub transform_spec_hash: Option<String>,
    pub chunking_profile_ref: Option<String>,
    pub chunking_profile_hash: Option<String>,
    pub execution_manifest_ref: Option<String>,
    pub execution_manifest_hash: Option<String>,
    pub runtime_manifest_ref: Option<String>,
    pub runtime_manifest_hash: Option<String>,
    pub key_id: String,
}

/// Builds transform envelope.
pub fn build_transform_envelope(
    stage: &TransformStageInput,
    attestation: Attestation,
) -> TransformEnvelope {
    let input_artifact_roots = stage
        .input_artifacts
        .iter()
        .map(|artifact| artifact.content_root_hash.clone())
        .collect();
    let output_artifact_roots = stage
        .output_artifacts
        .iter()
        .map(|artifact| artifact.content_root_hash.clone())
        .collect();

    TransformEnvelope {
        schema_version: stage.schema_version.clone(),
        envelope_type: "transform".to_string(),
        issued_at: stage.issued_at.clone(),
        subject_id: stage.subject_id.clone(),
        transform_job_id: stage.transform_job_id.clone(),
        transform_stage: stage.transform_stage.clone(),
        input_refs: stage.input_refs.clone(),
        output_refs: stage.output_refs.clone(),
        input_artifact_roots,
        output_artifact_roots,
        input_artifacts: stage.input_artifacts.clone(),
        output_artifacts: stage.output_artifacts.clone(),
        toolchain: stage.toolchain.clone(),
        parameters_ref: stage.parameters_ref.clone(),
        parameters_hash: stage.parameters_hash.clone(),
        determinism_profile: stage.determinism_profile.clone(),
        runtime_env_ref: stage.runtime_env_ref.clone(),
        runtime_env_hash: stage.runtime_env_hash.clone(),
        provenance_start_mode: stage.provenance_start_mode.clone(),
        source_manifest_ref: stage.source_manifest_ref.clone(),
        source_manifest_hash: stage.source_manifest_hash.clone(),
        source_root_hash: stage.source_root_hash.clone(),
        transform_spec_ref: stage.transform_spec_ref.clone(),
        transform_spec_hash: stage.transform_spec_hash.clone(),
        chunking_profile_ref: stage.chunking_profile_ref.clone(),
        chunking_profile_hash: stage.chunking_profile_hash.clone(),
        execution_manifest_ref: stage.execution_manifest_ref.clone(),
        execution_manifest_hash: stage.execution_manifest_hash.clone(),
        runtime_manifest_ref: stage.runtime_manifest_ref.clone(),
        runtime_manifest_hash: stage.runtime_manifest_hash.clone(),
        attestations: vec![attestation],
        key_id: stage.key_id.clone(),
    }
}

/// Describes artifact.
pub fn describe_artifact<P: AsRef<Path>>(
    path: P,
    artifact_id: &str,
    chunk_size: usize,
    threads: usize,
) -> io::Result<ContentDescriptor> {
    let path_ref = path.as_ref();
    let metadata = std::fs::metadata(path_ref)?;
    let (_hashes, root) = hash_file_streaming(path_ref, chunk_size, threads)?;
    Ok(build_content_descriptor(ContentDescriptorInput {
        artifact_id,
        root,
        chunk_size,
        leaf_count: metadata.len().div_ceil(chunk_size as u64) as usize,
        byte_length: metadata.len(),
        media_type: "application/octet-stream",
        created_at: "1970-01-01T00:00:00Z",
    }))
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::io::Write;
    use tempfile::NamedTempFile;

    /// Handles stage input.
    fn stage_input() -> TransformStageInput {
        TransformStageInput {
            schema_version: "1.0.0".to_string(),
            issued_at: "2026-01-01T00:00:00Z".to_string(),
            subject_id: "transform-1".to_string(),
            transform_job_id: "job-1".to_string(),
            transform_stage: "raw_to_nc".to_string(),
            input_refs: vec!["obj://raw/1".to_string()],
            output_refs: vec!["obj://nc/1".to_string()],
            input_artifacts: vec![ArtifactRecord {
                artifact_id: "input-1".to_string(),
                artifact_ref: "obj://raw/1".to_string(),
                content_root_hash: "root-in".to_string(),
            }],
            output_artifacts: vec![ArtifactRecord {
                artifact_id: "output-1".to_string(),
                artifact_ref: "obj://nc/1".to_string(),
                content_root_hash: "root-out".to_string(),
            }],
            toolchain: "rust".to_string(),
            parameters_ref: "cfg://1".to_string(),
            parameters_hash: "cfg-hash".to_string(),
            determinism_profile: "det-v1".to_string(),
            runtime_env_ref: Some("oci://img".to_string()),
            runtime_env_hash: Some("img-hash".to_string()),
            provenance_start_mode: "transport_capture".to_string(),
            source_manifest_ref: None,
            source_manifest_hash: None,
            source_root_hash: None,
            transform_spec_ref: None,
            transform_spec_hash: None,
            chunking_profile_ref: None,
            chunking_profile_hash: None,
            execution_manifest_ref: None,
            execution_manifest_hash: None,
            runtime_manifest_ref: None,
            runtime_manifest_hash: None,
            key_id: "key-1".to_string(),
        }
    }

    /// Handles attestation.
    fn attestation() -> Attestation {
        Attestation {
            signer_id: "transformer".to_string(),
            key_id: "key-1".to_string(),
            signature: "sig".to_string(),
            signed_at: "2026-01-01T00:00:00Z".to_string(),
        }
    }

    /// Tests that build transform envelope validates.
    #[test]
    fn build_transform_envelope_validates() {
        let env = build_transform_envelope(&stage_input(), attestation());
        assert_eq!(env.transform_stage, "raw_to_nc");
        assert!(env.validate().is_ok());
    }

    /// Tests that describe artifact returns descriptor.
    #[test]
    fn describe_artifact_returns_descriptor() {
        let mut file = NamedTempFile::new().expect("temp file");
        file.write_all(b"artifact-content").expect("write");
        let descriptor = describe_artifact(file.path(), "artifact-1", 4, 2).expect("hash");
        assert_eq!(descriptor.artifact_id, "artifact-1");
        assert!(!descriptor.content_root_hash.is_empty());
    }
}