Skip to main content

shape_runtime/
hashing.rs

1//! SHA256 hashing utilities for reproducibility artifacts
2//!
3//! Provides deterministic hashing for:
4//! - Script content
5//! - Data files/checksums
6//! - Parameter configurations
7//! - Combined artifact hashes
8
9use sha2::{Digest, Sha256};
10use std::collections::BTreeMap;
11use std::io::{self, Read};
12use std::path::Path;
13
14/// Hash result as hex string with prefix
15#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
16pub struct HashDigest(pub String);
17
18impl HashDigest {
19    /// Create from raw hex string (adds sha256: prefix if missing)
20    pub fn from_hex(hex: &str) -> Self {
21        if hex.starts_with("sha256:") {
22            HashDigest(hex.to_string())
23        } else {
24            HashDigest(format!("sha256:{}", hex))
25        }
26    }
27
28    /// Get the raw hex string without prefix
29    pub fn hex(&self) -> &str {
30        self.0.strip_prefix("sha256:").unwrap_or(&self.0)
31    }
32
33    /// Get the full string with prefix
34    pub fn full(&self) -> &str {
35        &self.0
36    }
37
38    /// Check if this hash matches another (prefix-agnostic)
39    pub fn matches(&self, other: &HashDigest) -> bool {
40        self.hex() == other.hex()
41    }
42}
43
44impl std::fmt::Display for HashDigest {
45    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
46        write!(f, "{}", self.0)
47    }
48}
49
50/// Hash a string (script content, etc.)
51pub fn hash_string(content: &str) -> HashDigest {
52    let mut hasher = Sha256::new();
53    hasher.update(content.as_bytes());
54    let result = hasher.finalize();
55    HashDigest::from_hex(&hex::encode(result))
56}
57
58/// Hash a byte slice
59pub fn hash_bytes(data: &[u8]) -> HashDigest {
60    let mut hasher = Sha256::new();
61    hasher.update(data);
62    let result = hasher.finalize();
63    HashDigest::from_hex(&hex::encode(result))
64}
65
66/// Hash a file by reading its contents
67pub fn hash_file(path: &Path) -> io::Result<HashDigest> {
68    let mut file = std::fs::File::open(path)?;
69    let mut hasher = Sha256::new();
70    let mut buffer = [0u8; 8192];
71
72    loop {
73        let bytes_read = file.read(&mut buffer)?;
74        if bytes_read == 0 {
75            break;
76        }
77        hasher.update(&buffer[..bytes_read]);
78    }
79
80    let result = hasher.finalize();
81    Ok(HashDigest::from_hex(&hex::encode(result)))
82}
83
84/// Hash parameters in a deterministic way (sorted keys)
85pub fn hash_parameters(params: &BTreeMap<String, serde_json::Value>) -> HashDigest {
86    // Serialize to JSON with sorted keys for determinism
87    let json = serde_json::to_string(params).unwrap_or_default();
88    hash_string(&json)
89}
90
91/// Combine multiple hashes into one
92pub fn combine_hashes(hashes: &[&HashDigest]) -> HashDigest {
93    let mut hasher = Sha256::new();
94    for hash in hashes {
95        hasher.update(hash.hex().as_bytes());
96        hasher.update(b"|"); // Separator
97    }
98    let result = hasher.finalize();
99    HashDigest::from_hex(&hex::encode(result))
100}
101
102/// Hash multiple files and combine
103pub fn hash_data_files(paths: &[&Path]) -> io::Result<HashDigest> {
104    let mut hashes = Vec::new();
105    for path in paths {
106        hashes.push(hash_file(path)?);
107    }
108
109    let refs: Vec<&HashDigest> = hashes.iter().collect();
110    Ok(combine_hashes(&refs))
111}
112
113/// Hex encoding/decoding (simple implementation to avoid extra dependency)
114mod hex {
115    const HEX_CHARS: &[u8; 16] = b"0123456789abcdef";
116
117    pub fn encode(data: impl AsRef<[u8]>) -> String {
118        let bytes = data.as_ref();
119        let mut result = String::with_capacity(bytes.len() * 2);
120        for &byte in bytes {
121            result.push(HEX_CHARS[(byte >> 4) as usize] as char);
122            result.push(HEX_CHARS[(byte & 0x0f) as usize] as char);
123        }
124        result
125    }
126}
127
128#[cfg(test)]
129mod tests {
130    use super::*;
131
132    #[test]
133    fn test_hash_string() {
134        let hash = hash_string("hello world");
135        assert!(hash.full().starts_with("sha256:"));
136        assert_eq!(hash.hex().len(), 64); // SHA256 produces 32 bytes = 64 hex chars
137    }
138
139    #[test]
140    fn test_hash_deterministic() {
141        let hash1 = hash_string("test content");
142        let hash2 = hash_string("test content");
143        assert_eq!(hash1, hash2);
144    }
145
146    #[test]
147    fn test_hash_different_content() {
148        let hash1 = hash_string("content a");
149        let hash2 = hash_string("content b");
150        assert_ne!(hash1, hash2);
151    }
152
153    #[test]
154    fn test_combine_hashes() {
155        let h1 = hash_string("first");
156        let h2 = hash_string("second");
157        let combined = combine_hashes(&[&h1, &h2]);
158
159        // Combined should be different from individual
160        assert_ne!(combined, h1);
161        assert_ne!(combined, h2);
162
163        // Combining should be deterministic
164        let combined2 = combine_hashes(&[&h1, &h2]);
165        assert_eq!(combined, combined2);
166    }
167
168    #[test]
169    fn test_hash_parameters() {
170        let mut params = BTreeMap::new();
171        params.insert("period".to_string(), serde_json::json!(20));
172        params.insert("threshold".to_string(), serde_json::json!(0.5));
173
174        let hash1 = hash_parameters(&params);
175        let hash2 = hash_parameters(&params);
176        assert_eq!(hash1, hash2);
177    }
178
179    #[test]
180    fn test_hash_digest_matches() {
181        let h1 = HashDigest::from_hex("abc123");
182        let h2 = HashDigest::from_hex("sha256:abc123");
183        assert!(h1.matches(&h2));
184    }
185}