trazaeo 0.5.0

Open-source provenance SDK and specification for verifiable EO and climate data workflows
Documentation
use crate::error::{TrazaeoError, TrazaeoResult};
use crate::utils::Hash;
use serde::{Deserialize, Serialize};

#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct CheckpointArtifact {
    pub artifact_id: String,
    pub content_root_hash: String,
    pub content_descriptor_ref: Option<String>,
    pub content_descriptor_hash: Option<String>,
    pub media_type: String,
}

#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct CheckpointManifest {
    pub checkpoint_id: String,
    pub checkpoint_time_window: String,
    pub checkpoint_log_root_hash: String,
    pub prior_checkpoint_ref: Option<String>,
    pub checkpoint_signature_bundle: Vec<String>,
    pub published_artifacts: Vec<CheckpointArtifact>,
    pub lineage_refs: Vec<String>,
}

#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct CheckpointInclusionProof {
    pub leaf_index: usize,
    pub leaf_hash: String,
    pub sibling_hashes: Vec<String>,
}

/// Builds checkpoint manifest.
pub fn build_checkpoint_manifest(
    checkpoint_id: &str,
    checkpoint_time_window: &str,
    prior_checkpoint_ref: Option<&str>,
    checkpoint_signature_bundle: Vec<String>,
    published_artifacts: Vec<CheckpointArtifact>,
    lineage_refs: Vec<String>,
) -> CheckpointManifest {
    let mut manifest = CheckpointManifest {
        checkpoint_id: checkpoint_id.to_string(),
        checkpoint_time_window: checkpoint_time_window.to_string(),
        checkpoint_log_root_hash: String::new(),
        prior_checkpoint_ref: prior_checkpoint_ref.map(str::to_string),
        checkpoint_signature_bundle,
        published_artifacts,
        lineage_refs,
    };
    let manifest_hash = compute_checkpoint_binding_hash(&manifest);
    let leaves = manifest
        .published_artifacts
        .iter()
        .map(|artifact| checkpoint_leaf_hash(&artifact.artifact_id, &hex::encode(manifest_hash.0)))
        .collect::<Vec<_>>();
    manifest.checkpoint_log_root_hash = hex::encode(compute_checkpoint_log_root(&leaves).0);
    manifest
}

/// Validates checkpoint manifest.
pub fn validate_checkpoint_manifest(manifest: &CheckpointManifest) -> TrazaeoResult<()> {
    if manifest.checkpoint_id.trim().is_empty() {
        return Err(TrazaeoError::invalid_input(
            "validate checkpoint manifest",
            "checkpoint_id must not be empty",
        ));
    }
    if manifest.checkpoint_time_window.trim().is_empty() {
        return Err(TrazaeoError::invalid_input(
            "validate checkpoint manifest",
            "checkpoint_time_window must not be empty",
        ));
    }
    if manifest.checkpoint_log_root_hash.trim().is_empty() {
        return Err(TrazaeoError::invalid_input(
            "validate checkpoint manifest",
            "checkpoint_log_root_hash must not be empty",
        ));
    }
    if manifest.published_artifacts.is_empty() {
        return Err(TrazaeoError::invalid_input(
            "validate checkpoint manifest",
            "published_artifacts must not be empty",
        ));
    }
    if manifest.lineage_refs.is_empty() {
        return Err(TrazaeoError::invalid_input(
            "validate checkpoint manifest",
            "lineage_refs must not be empty",
        ));
    }
    if manifest.checkpoint_signature_bundle.is_empty() {
        return Err(TrazaeoError::invalid_input(
            "validate checkpoint manifest",
            "checkpoint_signature_bundle must not be empty",
        ));
    }
    if manifest
        .checkpoint_signature_bundle
        .iter()
        .any(|entry| entry.trim().is_empty())
    {
        return Err(TrazaeoError::invalid_input(
            "validate checkpoint manifest",
            "checkpoint_signature_bundle entries must not be blank",
        ));
    }
    Ok(())
}

/// Computes checkpoint binding hash.
pub fn compute_checkpoint_binding_hash(manifest: &CheckpointManifest) -> Hash {
    let mut normalized = manifest.clone();
    normalized.checkpoint_log_root_hash.clear();
    normalized.checkpoint_signature_bundle.clear();
    compute_checkpoint_manifest_hash(&normalized)
}

/// Returns canonical checkpoint bytes for signature verification.
pub fn canonical_checkpoint_manifest_payload_bytes(manifest: &CheckpointManifest) -> Vec<u8> {
    let mut normalized = manifest.clone();
    normalized.checkpoint_signature_bundle.clear();
    serde_json::to_vec(&normalized).expect("checkpoint manifest serialization should succeed")
}

/// Builds checkpoint inclusion proof for artifact.
pub fn build_checkpoint_inclusion_proof_for_artifact(
    manifest: &CheckpointManifest,
    artifact_id: &str,
) -> TrazaeoResult<CheckpointInclusionProof> {
    validate_checkpoint_manifest(manifest)?;
    let manifest_hash = hex::encode(compute_checkpoint_binding_hash(manifest).0);
    let leaves = manifest
        .published_artifacts
        .iter()
        .map(|artifact| checkpoint_leaf_hash(&artifact.artifact_id, &manifest_hash))
        .collect::<Vec<_>>();
    let index = manifest
        .published_artifacts
        .iter()
        .position(|artifact| artifact.artifact_id == artifact_id)
        .ok_or_else(|| {
            TrazaeoError::invalid_input(
                "build checkpoint inclusion proof",
                "artifact_id not present in checkpoint manifest",
            )
        })?;
    make_checkpoint_inclusion_proof(&leaves, index).ok_or_else(|| {
        TrazaeoError::external(
            "build checkpoint inclusion proof",
            "failed to build checkpoint inclusion proof",
        )
    })
}

/// Handles merkle parent.
fn merkle_parent(left: &Hash, right: &Hash) -> Hash {
    let mut hasher = blake3::Hasher::new();
    hasher.update(b"node");
    hasher.update(&left.0);
    hasher.update(&right.0);
    Hash(*hasher.finalize().as_bytes())
}

/// Handles checkpoint leaf hash.
pub fn checkpoint_leaf_hash(artifact_id: &str, checkpoint_manifest_hash: &str) -> Hash {
    let mut hasher = blake3::Hasher::new();
    hasher.update(b"leaf");
    hasher.update(artifact_id.as_bytes());
    hasher.update(checkpoint_manifest_hash.as_bytes());
    Hash(*hasher.finalize().as_bytes())
}

/// Computes checkpoint log root.
pub fn compute_checkpoint_log_root(leaves: &[Hash]) -> Hash {
    if leaves.is_empty() {
        return Hash(*blake3::hash(b"checkpoint-log-empty").as_bytes());
    }
    let mut level = leaves.to_vec();
    while level.len() > 1 {
        let mut next = Vec::new();
        for pair in level.chunks(2) {
            let right = if pair.len() == 2 { &pair[1] } else { &pair[0] };
            next.push(merkle_parent(&pair[0], right));
        }
        level = next;
    }
    level[0].clone()
}

/// Computes checkpoint manifest hash.
pub fn compute_checkpoint_manifest_hash(manifest: &CheckpointManifest) -> Hash {
    let manifest_bytes =
        serde_json::to_vec(manifest).expect("checkpoint manifest serialization should succeed");
    Hash(*blake3::hash(&manifest_bytes).as_bytes())
}

/// Creates checkpoint inclusion proof.
pub fn make_checkpoint_inclusion_proof(
    leaves: &[Hash],
    leaf_index: usize,
) -> Option<CheckpointInclusionProof> {
    if leaf_index >= leaves.len() {
        return None;
    }
    let mut level = leaves.to_vec();
    let mut index = leaf_index;
    let mut siblings = Vec::new();
    while level.len() > 1 {
        let sibling_index = if index % 2 == 0 {
            (index + 1).min(level.len() - 1)
        } else {
            index - 1
        };
        siblings.push(hex::encode(level[sibling_index].0));
        let mut next = Vec::new();
        for pair in level.chunks(2) {
            let right = if pair.len() == 2 { &pair[1] } else { &pair[0] };
            next.push(merkle_parent(&pair[0], right));
        }
        index /= 2;
        level = next;
    }
    Some(CheckpointInclusionProof {
        leaf_index,
        leaf_hash: hex::encode(leaves[leaf_index].0),
        sibling_hashes: siblings,
    })
}

/// Verifies checkpoint inclusion.
pub fn verify_checkpoint_inclusion(root: &Hash, proof: &CheckpointInclusionProof) -> bool {
    let Ok(mut current_bytes) = hex::decode(&proof.leaf_hash) else {
        return false;
    };
    let Ok(mut current_hash) = <[u8; 32]>::try_from(current_bytes.as_slice()) else {
        return false;
    };
    let mut index = proof.leaf_index;
    for sibling in &proof.sibling_hashes {
        let Ok(sibling_bytes) = hex::decode(sibling) else {
            return false;
        };
        let Ok(sibling_hash) = <[u8; 32]>::try_from(sibling_bytes.as_slice()) else {
            return false;
        };
        let left = if index % 2 == 0 {
            Hash(current_hash)
        } else {
            Hash(sibling_hash)
        };
        let right = if index % 2 == 0 {
            Hash(sibling_hash)
        } else {
            Hash(current_hash)
        };
        current_hash = merkle_parent(&left, &right).0;
        index /= 2;
        current_bytes = current_hash.to_vec();
    }
    current_bytes == root.0
}

#[cfg(test)]
mod tests {
    use super::*;

    /// Tests that checkpoint root and inclusion proof verify.
    #[test]
    fn checkpoint_root_and_inclusion_proof_verify() {
        let leaves = vec![
            checkpoint_leaf_hash("a", "m1"),
            checkpoint_leaf_hash("b", "m2"),
            checkpoint_leaf_hash("c", "m3"),
        ];
        let root = compute_checkpoint_log_root(&leaves);
        let proof = make_checkpoint_inclusion_proof(&leaves, 1).expect("proof");
        assert!(verify_checkpoint_inclusion(&root, &proof));
    }

    /// Tests that checkpoint manifest builder sets log root.
    #[test]
    fn checkpoint_manifest_builder_sets_log_root() {
        let manifest = build_checkpoint_manifest(
            "checkpoint-1",
            "2026-01-01T00:00:00Z/2026-01-01T01:00:00Z",
            None,
            vec!["sig-1".to_string()],
            vec![CheckpointArtifact {
                artifact_id: "artifact-1".to_string(),
                content_root_hash: "root-1".to_string(),
                content_descriptor_ref: None,
                content_descriptor_hash: None,
                media_type: "application/octet-stream".to_string(),
            }],
            vec!["capture://1".to_string()],
        );
        assert!(validate_checkpoint_manifest(&manifest).is_ok());
        assert!(!manifest.checkpoint_log_root_hash.is_empty());
    }
}