Skip to main content

cuenv_ci/compiler/
digest.rs

1//! Runtime digest computation for cache keys
2//!
3//! Computes content-addressable digests for task execution based on:
4//! - Input file hashes
5//! - Command and arguments
6//! - Environment variables
7//! - Runtime configuration (flake.lock, output path)
8//! - Secret fingerprints (salted HMAC)
9
10use sha2::{Digest, Sha256};
11use std::collections::BTreeMap;
12
13/// Runtime digest builder for cache key computation
14pub struct DigestBuilder {
15    hasher: Sha256,
16}
17
18impl DigestBuilder {
19    /// Create a new digest builder
20    #[must_use]
21    pub fn new() -> Self {
22        Self {
23            hasher: Sha256::new(),
24        }
25    }
26
27    /// Add command to digest
28    pub fn add_command(&mut self, command: &[String]) -> &mut Self {
29        for arg in command {
30            self.hasher.update(arg.as_bytes());
31            self.hasher.update([0u8]); // separator
32        }
33        self
34    }
35
36    /// Add environment variables to digest (sorted by key for determinism)
37    pub fn add_env(&mut self, env: &BTreeMap<String, String>) -> &mut Self {
38        // BTreeMap is already sorted by key
39        let sorted: Vec<_> = env.iter().collect();
40
41        for (key, value) in sorted {
42            self.hasher.update(key.as_bytes());
43            self.hasher.update([b'=']);
44            self.hasher.update(value.as_bytes());
45            self.hasher.update([0u8]); // separator
46        }
47        self
48    }
49
50    /// Add input file patterns to digest
51    pub fn add_inputs(&mut self, inputs: &[String]) -> &mut Self {
52        for input in inputs {
53            self.hasher.update(input.as_bytes());
54            self.hasher.update([0u8]); // separator
55        }
56        self
57    }
58
59    /// Add runtime configuration to digest
60    pub fn add_runtime(&mut self, flake: &str, output: &str, system: &str) -> &mut Self {
61        self.hasher.update(flake.as_bytes());
62        self.hasher.update([0u8]);
63        self.hasher.update(output.as_bytes());
64        self.hasher.update([0u8]);
65        self.hasher.update(system.as_bytes());
66        self.hasher.update([0u8]);
67        self
68    }
69
70    /// Add secret fingerprints to digest (HMAC-SHA256 with system salt)
71    ///
72    /// # Arguments
73    /// * `secrets` - Map of secret names to their values
74    /// * `salt` - System-wide salt for HMAC computation
75    pub fn add_secret_fingerprints(
76        &mut self,
77        secrets: &BTreeMap<String, String>,
78        salt: &str,
79    ) -> &mut Self {
80        // BTreeMap is already sorted by key
81        let sorted: Vec<_> = secrets.iter().collect();
82
83        for (key, value) in sorted {
84            // Compute HMAC-SHA256(key + value, salt)
85            let mut hmac = Sha256::new();
86            hmac.update(salt.as_bytes());
87            hmac.update(key.as_bytes());
88            hmac.update(value.as_bytes());
89            let fingerprint = hmac.finalize();
90
91            // Add fingerprint to overall digest
92            self.hasher.update(fingerprint);
93        }
94        self
95    }
96
97    /// Add a UUID for impure flake inputs (forces cache miss)
98    pub fn add_impurity_uuid(&mut self, uuid: &str) -> &mut Self {
99        self.hasher.update(b"IMPURE:");
100        self.hasher.update(uuid.as_bytes());
101        self.hasher.update([0u8]);
102        self
103    }
104
105    /// Finalize and return hex-encoded digest
106    #[must_use]
107    pub fn finalize(self) -> String {
108        let result = self.hasher.finalize();
109        format!("sha256:{}", hex::encode(result))
110    }
111}
112
113impl Default for DigestBuilder {
114    fn default() -> Self {
115        Self::new()
116    }
117}
118
119/// Compute task runtime digest
120#[must_use]
121pub fn compute_task_digest(
122    command: &[String],
123    env: &BTreeMap<String, String>,
124    inputs: &[String],
125    runtime_digest: Option<&str>,
126    secret_fingerprints: Option<&BTreeMap<String, String>>,
127    system_salt: Option<&str>,
128) -> String {
129    let mut builder = DigestBuilder::new();
130
131    builder.add_command(command);
132    builder.add_env(env);
133    builder.add_inputs(inputs);
134
135    if let Some(runtime) = runtime_digest {
136        builder.hasher.update(runtime.as_bytes());
137    }
138
139    if let Some(secrets) = secret_fingerprints
140        && let Some(salt) = system_salt
141    {
142        builder.add_secret_fingerprints(secrets, salt);
143    }
144
145    builder.finalize()
146}
147
148#[cfg(test)]
149mod tests {
150    use super::*;
151
152    #[test]
153    fn test_digest_deterministic() {
154        let command = vec!["cargo".to_string(), "build".to_string()];
155        let env = BTreeMap::from([("RUST_LOG".to_string(), "debug".to_string())]);
156        let inputs = vec!["src/**/*.rs".to_string()];
157
158        let digest1 = compute_task_digest(&command, &env, &inputs, None, None, None);
159        let digest2 = compute_task_digest(&command, &env, &inputs, None, None, None);
160
161        assert_eq!(digest1, digest2);
162        assert!(digest1.starts_with("sha256:"));
163    }
164
165    #[test]
166    fn test_digest_changes_with_command() {
167        let env = BTreeMap::new();
168        let inputs = vec![];
169
170        let digest1 = compute_task_digest(&["echo".to_string()], &env, &inputs, None, None, None);
171        let digest2 = compute_task_digest(&["ls".to_string()], &env, &inputs, None, None, None);
172
173        assert_ne!(digest1, digest2);
174    }
175
176    #[test]
177    fn test_digest_changes_with_env() {
178        let command = vec!["echo".to_string()];
179        let inputs = vec![];
180
181        let env1 = BTreeMap::from([("KEY".to_string(), "value1".to_string())]);
182        let env2 = BTreeMap::from([("KEY".to_string(), "value2".to_string())]);
183
184        let digest1 = compute_task_digest(&command, &env1, &inputs, None, None, None);
185        let digest2 = compute_task_digest(&command, &env2, &inputs, None, None, None);
186
187        assert_ne!(digest1, digest2);
188    }
189
190    #[test]
191    fn test_digest_env_order_independent() {
192        let command = vec!["echo".to_string()];
193        let inputs = vec![];
194
195        let mut env1 = BTreeMap::new();
196        env1.insert("A".to_string(), "1".to_string());
197        env1.insert("B".to_string(), "2".to_string());
198
199        let mut env2 = BTreeMap::new();
200        env2.insert("B".to_string(), "2".to_string());
201        env2.insert("A".to_string(), "1".to_string());
202
203        let digest1 = compute_task_digest(&command, &env1, &inputs, None, None, None);
204        let digest2 = compute_task_digest(&command, &env2, &inputs, None, None, None);
205
206        assert_eq!(digest1, digest2);
207    }
208
209    #[test]
210    fn test_secret_fingerprints() {
211        let command = vec!["deploy".to_string()];
212        let env = BTreeMap::new();
213        let inputs = vec![];
214
215        let secrets = BTreeMap::from([("API_KEY".to_string(), "secret123".to_string())]);
216        let salt = "system-wide-salt";
217
218        let digest1 =
219            compute_task_digest(&command, &env, &inputs, None, Some(&secrets), Some(salt));
220
221        // Change secret value
222        let secrets2 = BTreeMap::from([("API_KEY".to_string(), "secret456".to_string())]);
223
224        let digest2 =
225            compute_task_digest(&command, &env, &inputs, None, Some(&secrets2), Some(salt));
226
227        // Digests should differ when secret changes
228        assert_ne!(digest1, digest2);
229    }
230
231    #[test]
232    fn test_secret_fingerprints_deterministic() {
233        let command = vec!["deploy".to_string()];
234        let env = BTreeMap::new();
235        let inputs = vec![];
236
237        let secrets = BTreeMap::from([("API_KEY".to_string(), "secret123".to_string())]);
238        let salt = "system-wide-salt";
239
240        let digest1 =
241            compute_task_digest(&command, &env, &inputs, None, Some(&secrets), Some(salt));
242        let digest2 =
243            compute_task_digest(&command, &env, &inputs, None, Some(&secrets), Some(salt));
244
245        assert_eq!(digest1, digest2);
246    }
247
248    #[test]
249    fn test_impurity_uuid() {
250        let mut builder = DigestBuilder::new();
251        builder.add_command(&["echo".to_string()]);
252        builder.add_impurity_uuid("550e8400-e29b-41d4-a716-446655440000");
253        let digest1 = builder.finalize();
254
255        let mut builder = DigestBuilder::new();
256        builder.add_command(&["echo".to_string()]);
257        builder.add_impurity_uuid("550e8400-e29b-41d4-a716-446655440001");
258        let digest2 = builder.finalize();
259
260        assert_ne!(digest1, digest2);
261    }
262}
263
264#[cfg(test)]
265mod proptest_tests {
266    use super::*;
267    use proptest::prelude::*;
268
269    proptest! {
270        /// Property: Same inputs always produce the same digest
271        #[test]
272        fn digest_is_deterministic(
273            cmd in prop::collection::vec("[a-z]+", 1..5),
274            key in "[A-Z_]+",
275            value in "[a-zA-Z0-9]+",
276        ) {
277            let env = BTreeMap::from([(key, value)]);
278            let inputs: Vec<String> = vec![];
279
280            let digest1 = compute_task_digest(&cmd, &env, &inputs, None, None, None);
281            let digest2 = compute_task_digest(&cmd, &env, &inputs, None, None, None);
282
283            prop_assert_eq!(digest1, digest2);
284        }
285
286        /// Property: Different commands produce different digests
287        #[test]
288        fn different_commands_produce_different_digests(
289            cmd1 in "[a-z]+",
290            cmd2 in "[a-z]+",
291        ) {
292            prop_assume!(cmd1 != cmd2);
293
294            let env = BTreeMap::new();
295            let inputs: Vec<String> = vec![];
296
297            let digest1 = compute_task_digest(&[cmd1], &env, &inputs, None, None, None);
298            let digest2 = compute_task_digest(&[cmd2], &env, &inputs, None, None, None);
299
300            prop_assert_ne!(digest1, digest2);
301        }
302
303        /// Property: Different env values produce different digests
304        #[test]
305        fn different_env_values_produce_different_digests(
306            key in "[A-Z]+",
307            value1 in "[a-z]+",
308            value2 in "[a-z]+",
309        ) {
310            prop_assume!(value1 != value2);
311
312            let cmd = vec!["test".to_string()];
313            let env1 = BTreeMap::from([(key.clone(), value1)]);
314            let env2 = BTreeMap::from([(key, value2)]);
315            let inputs: Vec<String> = vec![];
316
317            let digest1 = compute_task_digest(&cmd, &env1, &inputs, None, None, None);
318            let digest2 = compute_task_digest(&cmd, &env2, &inputs, None, None, None);
319
320            prop_assert_ne!(digest1, digest2);
321        }
322
323        /// Property: Env order doesn't matter (digest is order-independent)
324        #[test]
325        fn env_order_is_irrelevant(
326            env in prop::collection::btree_map("[A-Z]+", "[a-z]+", 2..5),
327        ) {
328            let cmd = vec!["test".to_string()];
329            let inputs: Vec<String> = vec![];
330
331            // Convert to vec and create envs in different iteration orders
332            let pairs: Vec<_> = env.iter().map(|(k, v)| (k.clone(), v.clone())).collect();
333
334            // Create env in original order
335            let env1: BTreeMap<String, String> = pairs.iter().cloned().collect();
336
337            // Create env in reverse order
338            let env2: BTreeMap<String, String> = pairs.iter().rev().cloned().collect();
339
340            let digest1 = compute_task_digest(&cmd, &env1, &inputs, None, None, None);
341            let digest2 = compute_task_digest(&cmd, &env2, &inputs, None, None, None);
342
343            prop_assert_eq!(digest1, digest2);
344        }
345
346        /// Property: Digests always have the sha256: prefix
347        #[test]
348        fn digest_has_correct_format(
349            cmd in prop::collection::vec("[a-z]+", 1..3),
350        ) {
351            let env = BTreeMap::new();
352            let inputs: Vec<String> = vec![];
353
354            let digest = compute_task_digest(&cmd, &env, &inputs, None, None, None);
355
356            prop_assert!(digest.starts_with("sha256:"));
357            // SHA256 produces 64 hex characters
358            prop_assert_eq!(digest.len(), 7 + 64); // "sha256:" + 64 hex chars
359        }
360    }
361}