use crate::content::{build_content_descriptor, ContentDescriptor, ContentDescriptorInput};
use crate::envelope::{ArtifactRecord, Attestation, TransformEnvelope};
use crate::hashing::hash_file_streaming;
use serde::{Deserialize, Serialize};
use std::io;
use std::path::Path;
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct TransformStageInput {
pub schema_version: String,
pub issued_at: String,
pub subject_id: String,
pub transform_job_id: String,
pub transform_stage: String,
pub input_refs: Vec<String>,
pub output_refs: Vec<String>,
pub input_artifacts: Vec<ArtifactRecord>,
pub output_artifacts: Vec<ArtifactRecord>,
pub toolchain: String,
pub parameters_ref: String,
pub parameters_hash: String,
pub determinism_profile: String,
pub runtime_env_ref: Option<String>,
pub runtime_env_hash: Option<String>,
pub provenance_start_mode: String,
pub source_manifest_ref: Option<String>,
pub source_manifest_hash: Option<String>,
pub source_root_hash: Option<String>,
pub transform_spec_ref: Option<String>,
pub transform_spec_hash: Option<String>,
pub execution_manifest_ref: Option<String>,
pub execution_manifest_hash: Option<String>,
pub runtime_manifest_ref: Option<String>,
pub runtime_manifest_hash: Option<String>,
pub key_id: String,
}
pub fn build_transform_envelope(
stage: &TransformStageInput,
attestation: Attestation,
) -> TransformEnvelope {
let input_artifact_roots = stage
.input_artifacts
.iter()
.map(|artifact| artifact.content_root_hash.clone())
.collect();
let output_artifact_roots = stage
.output_artifacts
.iter()
.map(|artifact| artifact.content_root_hash.clone())
.collect();
TransformEnvelope {
schema_version: stage.schema_version.clone(),
envelope_type: "transform".to_string(),
issued_at: stage.issued_at.clone(),
subject_id: stage.subject_id.clone(),
transform_job_id: stage.transform_job_id.clone(),
transform_stage: stage.transform_stage.clone(),
input_refs: stage.input_refs.clone(),
output_refs: stage.output_refs.clone(),
input_artifact_roots,
output_artifact_roots,
input_artifacts: stage.input_artifacts.clone(),
output_artifacts: stage.output_artifacts.clone(),
toolchain: stage.toolchain.clone(),
parameters_ref: stage.parameters_ref.clone(),
parameters_hash: stage.parameters_hash.clone(),
determinism_profile: stage.determinism_profile.clone(),
runtime_env_ref: stage.runtime_env_ref.clone(),
runtime_env_hash: stage.runtime_env_hash.clone(),
provenance_start_mode: stage.provenance_start_mode.clone(),
source_manifest_ref: stage.source_manifest_ref.clone(),
source_manifest_hash: stage.source_manifest_hash.clone(),
source_root_hash: stage.source_root_hash.clone(),
transform_spec_ref: stage.transform_spec_ref.clone(),
transform_spec_hash: stage.transform_spec_hash.clone(),
execution_manifest_ref: stage.execution_manifest_ref.clone(),
execution_manifest_hash: stage.execution_manifest_hash.clone(),
runtime_manifest_ref: stage.runtime_manifest_ref.clone(),
runtime_manifest_hash: stage.runtime_manifest_hash.clone(),
attestations: vec![attestation],
key_id: stage.key_id.clone(),
}
}
pub fn describe_artifact<P: AsRef<Path>>(
path: P,
artifact_id: &str,
chunk_size: usize,
threads: usize,
) -> io::Result<ContentDescriptor> {
let path_ref = path.as_ref();
let metadata = std::fs::metadata(path_ref)?;
let (_hashes, root) = hash_file_streaming(path_ref, chunk_size, threads)?;
Ok(build_content_descriptor(ContentDescriptorInput {
artifact_id,
root,
chunk_size,
leaf_count: metadata.len().div_ceil(chunk_size as u64) as usize,
byte_length: metadata.len(),
media_type: "application/octet-stream",
created_at: "1970-01-01T00:00:00Z",
}))
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Write;
use tempfile::NamedTempFile;
fn stage_input() -> TransformStageInput {
TransformStageInput {
schema_version: "1.0.0".to_string(),
issued_at: "2026-01-01T00:00:00Z".to_string(),
subject_id: "transform-1".to_string(),
transform_job_id: "job-1".to_string(),
transform_stage: "raw_to_nc".to_string(),
input_refs: vec!["obj://raw/1".to_string()],
output_refs: vec!["obj://nc/1".to_string()],
input_artifacts: vec![ArtifactRecord {
artifact_id: "input-1".to_string(),
artifact_ref: "obj://raw/1".to_string(),
content_root_hash: "root-in".to_string(),
}],
output_artifacts: vec![ArtifactRecord {
artifact_id: "output-1".to_string(),
artifact_ref: "obj://nc/1".to_string(),
content_root_hash: "root-out".to_string(),
}],
toolchain: "rust".to_string(),
parameters_ref: "cfg://1".to_string(),
parameters_hash: "cfg-hash".to_string(),
determinism_profile: "det-v1".to_string(),
runtime_env_ref: Some("oci://img".to_string()),
runtime_env_hash: Some("img-hash".to_string()),
provenance_start_mode: "transport_capture".to_string(),
source_manifest_ref: None,
source_manifest_hash: None,
source_root_hash: None,
transform_spec_ref: None,
transform_spec_hash: None,
execution_manifest_ref: None,
execution_manifest_hash: None,
runtime_manifest_ref: None,
runtime_manifest_hash: None,
key_id: "key-1".to_string(),
}
}
fn attestation() -> Attestation {
Attestation {
signer_id: "transformer".to_string(),
key_id: "key-1".to_string(),
signature: "sig".to_string(),
signed_at: "2026-01-01T00:00:00Z".to_string(),
}
}
#[test]
fn build_transform_envelope_validates() {
let env = build_transform_envelope(&stage_input(), attestation());
assert_eq!(env.transform_stage, "raw_to_nc");
assert!(env.validate().is_ok());
}
#[test]
fn describe_artifact_returns_descriptor() {
let mut file = NamedTempFile::new().expect("temp file");
file.write_all(b"artifact-content").expect("write");
let descriptor = describe_artifact(file.path(), "artifact-1", 4, 2).expect("hash");
assert_eq!(descriptor.artifact_id, "artifact-1");
assert!(!descriptor.content_root_hash.is_empty());
}
}