cuenv_ci/compiler/
digest.rs

1//! Runtime digest computation for cache keys
2//!
3//! Computes content-addressable digests for task execution based on:
4//! - Input file hashes
5//! - Command and arguments
6//! - Environment variables
7//! - Runtime configuration (flake.lock, output path)
8//! - Secret fingerprints (salted HMAC)
9
10use sha2::{Digest, Sha256};
11use std::collections::HashMap;
12
13/// Runtime digest builder for cache key computation
14pub struct DigestBuilder {
15    hasher: Sha256,
16}
17
18impl DigestBuilder {
19    /// Create a new digest builder
20    #[must_use]
21    pub fn new() -> Self {
22        Self {
23            hasher: Sha256::new(),
24        }
25    }
26
27    /// Add command to digest
28    pub fn add_command(&mut self, command: &[String]) -> &mut Self {
29        for arg in command {
30            self.hasher.update(arg.as_bytes());
31            self.hasher.update([0u8]); // separator
32        }
33        self
34    }
35
36    /// Add environment variables to digest (sorted by key for determinism)
37    pub fn add_env(&mut self, env: &HashMap<String, String>) -> &mut Self {
38        let mut sorted: Vec<_> = env.iter().collect();
39        sorted.sort_by_key(|(k, _)| *k);
40
41        for (key, value) in sorted {
42            self.hasher.update(key.as_bytes());
43            self.hasher.update([b'=']);
44            self.hasher.update(value.as_bytes());
45            self.hasher.update([0u8]); // separator
46        }
47        self
48    }
49
50    /// Add input file patterns to digest
51    pub fn add_inputs(&mut self, inputs: &[String]) -> &mut Self {
52        for input in inputs {
53            self.hasher.update(input.as_bytes());
54            self.hasher.update([0u8]); // separator
55        }
56        self
57    }
58
59    /// Add runtime configuration to digest
60    pub fn add_runtime(&mut self, flake: &str, output: &str, system: &str) -> &mut Self {
61        self.hasher.update(flake.as_bytes());
62        self.hasher.update([0u8]);
63        self.hasher.update(output.as_bytes());
64        self.hasher.update([0u8]);
65        self.hasher.update(system.as_bytes());
66        self.hasher.update([0u8]);
67        self
68    }
69
70    /// Add secret fingerprints to digest (HMAC-SHA256 with system salt)
71    ///
72    /// # Arguments
73    /// * `secrets` - Map of secret names to their values
74    /// * `salt` - System-wide salt for HMAC computation
75    pub fn add_secret_fingerprints(
76        &mut self,
77        secrets: &HashMap<String, String>,
78        salt: &str,
79    ) -> &mut Self {
80        let mut sorted: Vec<_> = secrets.iter().collect();
81        sorted.sort_by_key(|(k, _)| *k);
82
83        for (key, value) in sorted {
84            // Compute HMAC-SHA256(key + value, salt)
85            let mut hmac = Sha256::new();
86            hmac.update(salt.as_bytes());
87            hmac.update(key.as_bytes());
88            hmac.update(value.as_bytes());
89            let fingerprint = hmac.finalize();
90
91            // Add fingerprint to overall digest
92            self.hasher.update(fingerprint);
93        }
94        self
95    }
96
97    /// Add a UUID for impure flake inputs (forces cache miss)
98    pub fn add_impurity_uuid(&mut self, uuid: &str) -> &mut Self {
99        self.hasher.update(b"IMPURE:");
100        self.hasher.update(uuid.as_bytes());
101        self.hasher.update([0u8]);
102        self
103    }
104
105    /// Finalize and return hex-encoded digest
106    #[must_use]
107    pub fn finalize(self) -> String {
108        let result = self.hasher.finalize();
109        format!("sha256:{}", hex::encode(result))
110    }
111}
112
113impl Default for DigestBuilder {
114    fn default() -> Self {
115        Self::new()
116    }
117}
118
119/// Compute task runtime digest
120#[must_use]
121#[allow(clippy::implicit_hasher)] // Standard HashMap is fine for digest computation
122pub fn compute_task_digest(
123    command: &[String],
124    env: &HashMap<String, String>,
125    inputs: &[String],
126    runtime_digest: Option<&str>,
127    secret_fingerprints: Option<&HashMap<String, String>>,
128    system_salt: Option<&str>,
129) -> String {
130    let mut builder = DigestBuilder::new();
131
132    builder.add_command(command);
133    builder.add_env(env);
134    builder.add_inputs(inputs);
135
136    if let Some(runtime) = runtime_digest {
137        builder.hasher.update(runtime.as_bytes());
138    }
139
140    if let Some(secrets) = secret_fingerprints
141        && let Some(salt) = system_salt
142    {
143        builder.add_secret_fingerprints(secrets, salt);
144    }
145
146    builder.finalize()
147}
148
149#[cfg(test)]
150mod tests {
151    use super::*;
152
153    #[test]
154    fn test_digest_deterministic() {
155        let command = vec!["cargo".to_string(), "build".to_string()];
156        let env = HashMap::from([("RUST_LOG".to_string(), "debug".to_string())]);
157        let inputs = vec!["src/**/*.rs".to_string()];
158
159        let digest1 = compute_task_digest(&command, &env, &inputs, None, None, None);
160        let digest2 = compute_task_digest(&command, &env, &inputs, None, None, None);
161
162        assert_eq!(digest1, digest2);
163        assert!(digest1.starts_with("sha256:"));
164    }
165
166    #[test]
167    fn test_digest_changes_with_command() {
168        let env = HashMap::new();
169        let inputs = vec![];
170
171        let digest1 = compute_task_digest(&["echo".to_string()], &env, &inputs, None, None, None);
172        let digest2 = compute_task_digest(&["ls".to_string()], &env, &inputs, None, None, None);
173
174        assert_ne!(digest1, digest2);
175    }
176
177    #[test]
178    fn test_digest_changes_with_env() {
179        let command = vec!["echo".to_string()];
180        let inputs = vec![];
181
182        let env1 = HashMap::from([("KEY".to_string(), "value1".to_string())]);
183        let env2 = HashMap::from([("KEY".to_string(), "value2".to_string())]);
184
185        let digest1 = compute_task_digest(&command, &env1, &inputs, None, None, None);
186        let digest2 = compute_task_digest(&command, &env2, &inputs, None, None, None);
187
188        assert_ne!(digest1, digest2);
189    }
190
191    #[test]
192    fn test_digest_env_order_independent() {
193        let command = vec!["echo".to_string()];
194        let inputs = vec![];
195
196        let mut env1 = HashMap::new();
197        env1.insert("A".to_string(), "1".to_string());
198        env1.insert("B".to_string(), "2".to_string());
199
200        let mut env2 = HashMap::new();
201        env2.insert("B".to_string(), "2".to_string());
202        env2.insert("A".to_string(), "1".to_string());
203
204        let digest1 = compute_task_digest(&command, &env1, &inputs, None, None, None);
205        let digest2 = compute_task_digest(&command, &env2, &inputs, None, None, None);
206
207        assert_eq!(digest1, digest2);
208    }
209
210    #[test]
211    fn test_secret_fingerprints() {
212        let command = vec!["deploy".to_string()];
213        let env = HashMap::new();
214        let inputs = vec![];
215
216        let secrets = HashMap::from([("API_KEY".to_string(), "secret123".to_string())]);
217        let salt = "system-wide-salt";
218
219        let digest1 =
220            compute_task_digest(&command, &env, &inputs, None, Some(&secrets), Some(salt));
221
222        // Change secret value
223        let secrets2 = HashMap::from([("API_KEY".to_string(), "secret456".to_string())]);
224
225        let digest2 =
226            compute_task_digest(&command, &env, &inputs, None, Some(&secrets2), Some(salt));
227
228        // Digests should differ when secret changes
229        assert_ne!(digest1, digest2);
230    }
231
232    #[test]
233    fn test_secret_fingerprints_deterministic() {
234        let command = vec!["deploy".to_string()];
235        let env = HashMap::new();
236        let inputs = vec![];
237
238        let secrets = HashMap::from([("API_KEY".to_string(), "secret123".to_string())]);
239        let salt = "system-wide-salt";
240
241        let digest1 =
242            compute_task_digest(&command, &env, &inputs, None, Some(&secrets), Some(salt));
243        let digest2 =
244            compute_task_digest(&command, &env, &inputs, None, Some(&secrets), Some(salt));
245
246        assert_eq!(digest1, digest2);
247    }
248
249    #[test]
250    fn test_impurity_uuid() {
251        let mut builder = DigestBuilder::new();
252        builder.add_command(&["echo".to_string()]);
253        builder.add_impurity_uuid("550e8400-e29b-41d4-a716-446655440000");
254        let digest1 = builder.finalize();
255
256        let mut builder = DigestBuilder::new();
257        builder.add_command(&["echo".to_string()]);
258        builder.add_impurity_uuid("550e8400-e29b-41d4-a716-446655440001");
259        let digest2 = builder.finalize();
260
261        assert_ne!(digest1, digest2);
262    }
263}
264
265#[cfg(test)]
266mod proptest_tests {
267    use super::*;
268    use proptest::prelude::*;
269
270    proptest! {
271        /// Property: Same inputs always produce the same digest
272        #[test]
273        fn digest_is_deterministic(
274            cmd in prop::collection::vec("[a-z]+", 1..5),
275            key in "[A-Z_]+",
276            value in "[a-zA-Z0-9]+",
277        ) {
278            let env = HashMap::from([(key.clone(), value.clone())]);
279            let inputs: Vec<String> = vec![];
280
281            let digest1 = compute_task_digest(&cmd, &env, &inputs, None, None, None);
282            let digest2 = compute_task_digest(&cmd, &env, &inputs, None, None, None);
283
284            prop_assert_eq!(digest1, digest2);
285        }
286
287        /// Property: Different commands produce different digests
288        #[test]
289        fn different_commands_produce_different_digests(
290            cmd1 in "[a-z]+",
291            cmd2 in "[a-z]+",
292        ) {
293            prop_assume!(cmd1 != cmd2);
294
295            let env = HashMap::new();
296            let inputs: Vec<String> = vec![];
297
298            let digest1 = compute_task_digest(&[cmd1], &env, &inputs, None, None, None);
299            let digest2 = compute_task_digest(&[cmd2], &env, &inputs, None, None, None);
300
301            prop_assert_ne!(digest1, digest2);
302        }
303
304        /// Property: Different env values produce different digests
305        #[test]
306        fn different_env_values_produce_different_digests(
307            key in "[A-Z]+",
308            value1 in "[a-z]+",
309            value2 in "[a-z]+",
310        ) {
311            prop_assume!(value1 != value2);
312
313            let cmd = vec!["test".to_string()];
314            let env1 = HashMap::from([(key.clone(), value1)]);
315            let env2 = HashMap::from([(key, value2)]);
316            let inputs: Vec<String> = vec![];
317
318            let digest1 = compute_task_digest(&cmd, &env1, &inputs, None, None, None);
319            let digest2 = compute_task_digest(&cmd, &env2, &inputs, None, None, None);
320
321            prop_assert_ne!(digest1, digest2);
322        }
323
324        /// Property: Env order doesn't matter (digest is order-independent)
325        #[test]
326        fn env_order_is_irrelevant(
327            env in prop::collection::hash_map("[A-Z]+", "[a-z]+", 2..5),
328        ) {
329            let cmd = vec!["test".to_string()];
330            let inputs: Vec<String> = vec![];
331
332            // Convert to vec and create envs in different iteration orders
333            let pairs: Vec<_> = env.iter().map(|(k, v)| (k.clone(), v.clone())).collect();
334
335            // Create env in original order
336            let env1: HashMap<String, String> = pairs.iter().cloned().collect();
337
338            // Create env in reverse order
339            let env2: HashMap<String, String> = pairs.iter().rev().cloned().collect();
340
341            let digest1 = compute_task_digest(&cmd, &env1, &inputs, None, None, None);
342            let digest2 = compute_task_digest(&cmd, &env2, &inputs, None, None, None);
343
344            prop_assert_eq!(digest1, digest2);
345        }
346
347        /// Property: Digests always have the sha256: prefix
348        #[test]
349        fn digest_has_correct_format(
350            cmd in prop::collection::vec("[a-z]+", 1..3),
351        ) {
352            let env = HashMap::new();
353            let inputs: Vec<String> = vec![];
354
355            let digest = compute_task_digest(&cmd, &env, &inputs, None, None, None);
356
357            prop_assert!(digest.starts_with("sha256:"));
358            // SHA256 produces 64 hex characters
359            prop_assert_eq!(digest.len(), 7 + 64); // "sha256:" + 64 hex chars
360        }
361    }
362}