Skip to main content

haz_cache/key/
mod.rs

1//! Cache-key type and builder.
2//!
3//! [`CacheKey`] is the 32-byte content-addressed identity of a task
4//! action under a particular set of inputs, predecessor streams,
5//! and resolved environment (`CACHE-001`). [`CacheKeyBuilder`]
6//! drives the canonical serialisation of `CACHE-004..009` through a
7//! single [`Hasher`] and produces the final [`CacheKey`].
8
9pub mod components;
10pub mod prefix;
11
12use haz_domain::action::TaskAction;
13use haz_domain::settings::cache::HashAlgo;
14
15use crate::hasher::Hasher;
16use crate::hex::{self, HexError};
17use crate::key::components::{
18    contribute_action, contribute_env, contribute_input_files, contribute_predecessors,
19};
20
21pub use crate::key::components::{EnvContribution, InputFile, PredecessorStreams};
22pub use crate::key::prefix::{CHAPTER_REVISION, hash_function_id, schema_version_prefix};
23
24/// The cache-key identity of a task under a given set of inputs
25/// (`CACHE-001`).
26///
27/// Width is 32 bytes: both specification-recognised hash functions
28/// (`CACHE-002`) emit 32-byte digests. A future hash function with
29/// a different width would require a chapter revision (`CACHE-003`)
30/// and is out of scope here.
31///
32/// [`CacheKey`] is `Copy` because its only field is a 32-byte
33/// array; passing it by value avoids accidental borrow lifetimes
34/// in caller code that threads keys through scheduling layers.
35#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
36pub struct CacheKey([u8; 32]);
37
38impl CacheKey {
39    /// The 32 bytes of this key.
40    #[must_use]
41    pub const fn as_bytes(&self) -> &[u8; 32] {
42        &self.0
43    }
44
45    /// Wrap `bytes` directly as a [`CacheKey`]. Intended for callers
46    /// that already obtained 32 bytes from a trusted source (e.g.
47    /// hex-decoded manifest, network protocol). The cache library's
48    /// invariants on a key are byte-level only; no further checking
49    /// is performed.
50    #[must_use]
51    pub const fn from_bytes(bytes: [u8; 32]) -> Self {
52        Self(bytes)
53    }
54
55    /// Hexadecimal encoding (lowercase, 64 ASCII characters) used
56    /// for on-disk paths per `CACHE-010` and the manifest's `key`
57    /// field per `CACHE-011`.
58    #[must_use]
59    pub fn to_hex(&self) -> String {
60        hex::encode_32(&self.0)
61    }
62
63    /// Decode a 64-character hexadecimal string into a
64    /// [`CacheKey`]. Mirror of [`CacheKey::to_hex`].
65    ///
66    /// # Errors
67    ///
68    /// Returns [`HexError`] when the input is not 64 hex
69    /// characters.
70    pub fn from_hex(s: &str) -> Result<Self, HexError> {
71        Ok(Self(hex::decode_32(s)?))
72    }
73}
74
75/// The full set of inputs to a cache-key derivation
76/// (`CACHE-004..008`).
77///
78/// Carries borrowed references throughout; the builder consumes
79/// nothing and the caller retains ownership of every component.
80pub struct CacheKeyInputs<'a> {
81    /// The task's declared action (`CACHE-005`).
82    pub action: &'a TaskAction,
83    /// The files matched by the task's `inputs` patterns, each
84    /// paired with its content hash under the active hash function
85    /// (`CACHE-006`).
86    pub input_files: &'a [InputFile<'a>],
87    /// The hard-edge predecessors with their captured stream
88    /// hashes (`CACHE-007`).
89    pub hard_predecessors: &'a [PredecessorStreams<'a>],
90    /// The resolved environment contribution (`CACHE-008`).
91    pub env: &'a EnvContribution<'a>,
92}
93
94/// Driver for cache-key derivation.
95///
96/// Construct with [`CacheKeyBuilder::new`] (which carries the
97/// active [`HashAlgo`] and writes the schema-version prefix per
98/// `CACHE-003`), then call [`CacheKeyBuilder::finish`] to consume
99/// the canonical [`CacheKeyInputs`] and emit a [`CacheKey`].
100///
101/// One builder produces one key; the type is not reusable. To
102/// derive several keys, construct a fresh builder per key. The
103/// canonical-byte sequence depends on the prefix being the very
104/// first contribution; the new-builder/finish pairing makes this
105/// invariant structural.
106pub struct CacheKeyBuilder {
107    hasher: Hasher,
108}
109
110impl CacheKeyBuilder {
111    /// Construct a fresh builder under `algo`.
112    ///
113    /// The schema-version prefix (`CACHE-003`) is written into the
114    /// hasher immediately: `[CHAPTER_REVISION, hash_function_id]`.
115    /// Any later `finish` call therefore derives a key under the
116    /// composite of `(chapter_revision, hash_function_id)` for that
117    /// algo, even when no other component is supplied.
118    #[must_use]
119    pub fn new(algo: HashAlgo) -> Self {
120        let mut hasher = Hasher::new(algo);
121        hasher.update(&schema_version_prefix(algo));
122        Self { hasher }
123    }
124
125    /// Consume the builder, contribute every `CACHE-004` component
126    /// in canonical order (`CACHE-009`), and return the finalised
127    /// [`CacheKey`].
128    #[must_use]
129    pub fn finish(mut self, inputs: &CacheKeyInputs<'_>) -> CacheKey {
130        contribute_action(&mut self.hasher, inputs.action);
131        contribute_input_files(&mut self.hasher, inputs.input_files);
132        contribute_predecessors(&mut self.hasher, inputs.hard_predecessors);
133        contribute_env(&mut self.hasher, inputs.env);
134        CacheKey(self.hasher.finalize())
135    }
136}
137
138#[cfg(test)]
139mod tests {
140    use std::collections::BTreeMap;
141
142    use haz_domain::action::{ShellType, TaskAction};
143    use haz_domain::env::EnvVarName;
144    use haz_domain::name::{ProjectName, TaskName};
145    use haz_domain::settings::cache::HashAlgo;
146    use nonempty::NonEmpty;
147
148    use crate::key::components::{EnvContribution, InputFile, PredecessorStreams};
149    use crate::key::{CacheKey, CacheKeyBuilder, CacheKeyInputs};
150
151    fn key_of(action: &TaskAction, algo: HashAlgo) -> CacheKey {
152        let host: BTreeMap<EnvVarName, Option<String>> = BTreeMap::new();
153        let overrides: BTreeMap<EnvVarName, String> = BTreeMap::new();
154        let env = EnvContribution {
155            from_host: &host,
156            overrides: &overrides,
157        };
158        let inputs = CacheKeyInputs {
159            action,
160            input_files: &[],
161            hard_predecessors: &[],
162            env: &env,
163        };
164        CacheKeyBuilder::new(algo).finish(&inputs)
165    }
166
167    fn cmd(args: &[&str]) -> TaskAction {
168        TaskAction::Command(
169            NonEmpty::from_vec(args.iter().map(|s| (*s).to_owned()).collect())
170                .expect("non-empty argv"),
171        )
172    }
173
174    fn shell(script: &str, shell_type: ShellType) -> TaskAction {
175        TaskAction::Shell {
176            script: script.to_owned(),
177            shell: shell_type,
178        }
179    }
180
181    // ----- CacheKey type -----
182
183    #[test]
184    fn cache_009_to_hex_is_64_lowercase_chars() {
185        let key = key_of(&cmd(&["true"]), HashAlgo::Blake3);
186        let h = key.to_hex();
187        assert_eq!(h.len(), 64);
188        assert!(
189            h.chars()
190                .all(|c| c.is_ascii_hexdigit() && !c.is_uppercase())
191        );
192    }
193
194    #[test]
195    fn cache_key_is_copy() {
196        // Sanity: the type's Copy bound is asserted at the type
197        // level, but a use-site check guards against accidental
198        // future changes.
199        let key = key_of(&cmd(&["true"]), HashAlgo::Blake3);
200        let copied = key;
201        assert_eq!(key.as_bytes(), copied.as_bytes());
202    }
203
204    // ----- Schema-version prefix dependency -----
205
206    #[test]
207    fn cache_002_blake3_and_sha256_keys_diverge_on_same_inputs() {
208        let action = cmd(&["true"]);
209        let blake = key_of(&action, HashAlgo::Blake3);
210        let sha = key_of(&action, HashAlgo::Sha256);
211        assert_ne!(
212            blake.as_bytes(),
213            sha.as_bytes(),
214            "hash_function_id byte must be in the prefix per CACHE-003"
215        );
216    }
217
218    // ----- CACHE-005 task action -----
219
220    #[test]
221    fn cache_005_command_and_shell_with_same_text_diverge() {
222        // Tag byte 0x01 (command) vs 0x02 (shell) keeps them
223        // distinct even when the textual content is identical.
224        let cmd_key = key_of(&cmd(&["foo"]), HashAlgo::Blake3);
225        let shell_key = key_of(&shell("foo", ShellType::Sh), HashAlgo::Blake3);
226        assert_ne!(cmd_key.as_bytes(), shell_key.as_bytes());
227    }
228
229    #[test]
230    fn cache_005_shell_type_change_changes_key() {
231        let sh_key = key_of(&shell("echo hi", ShellType::Sh), HashAlgo::Blake3);
232        let bash_key = key_of(&shell("echo hi", ShellType::Bash), HashAlgo::Blake3);
233        assert_ne!(sh_key.as_bytes(), bash_key.as_bytes());
234    }
235
236    #[test]
237    fn cache_005_argv_order_changes_key() {
238        // A change in argv order is a change to the command and
239        // MUST produce a different key.
240        let a = key_of(&cmd(&["echo", "a", "b"]), HashAlgo::Blake3);
241        let b = key_of(&cmd(&["echo", "b", "a"]), HashAlgo::Blake3);
242        assert_ne!(a.as_bytes(), b.as_bytes());
243    }
244
245    #[test]
246    fn cache_005_empty_string_argument_is_distinct_from_no_argument() {
247        // CACHE-005 length prefixes make a single empty argument
248        // distinct from the absence of that argument.
249        let with_empty = key_of(&cmd(&["echo", ""]), HashAlgo::Blake3);
250        let without_arg = key_of(&cmd(&["echo"]), HashAlgo::Blake3);
251        assert_ne!(with_empty.as_bytes(), without_arg.as_bytes());
252    }
253
254    // ----- CACHE-006 input files -----
255
256    fn key_with_inputs(files: &[InputFile<'_>]) -> CacheKey {
257        let host: BTreeMap<EnvVarName, Option<String>> = BTreeMap::new();
258        let overrides: BTreeMap<EnvVarName, String> = BTreeMap::new();
259        let env = EnvContribution {
260            from_host: &host,
261            overrides: &overrides,
262        };
263        let action = cmd(&["true"]);
264        let inputs = CacheKeyInputs {
265            action: &action,
266            input_files: files,
267            hard_predecessors: &[],
268            env: &env,
269        };
270        CacheKeyBuilder::new(HashAlgo::Blake3).finish(&inputs)
271    }
272
273    #[test]
274    fn cache_006_input_file_order_does_not_matter() {
275        // CACHE-009 requires byte-wise ascending order on
276        // workspace-absolute paths; the builder sorts internally
277        // so caller order is irrelevant.
278        let a = InputFile {
279            workspace_absolute_path: "/p/a",
280            content_hash: [0xAA; 32],
281        };
282        let b = InputFile {
283            workspace_absolute_path: "/p/b",
284            content_hash: [0xBB; 32],
285        };
286        let ab = key_with_inputs(&[a, b]);
287        let ba = key_with_inputs(&[
288            InputFile {
289                workspace_absolute_path: "/p/b",
290                content_hash: [0xBB; 32],
291            },
292            InputFile {
293                workspace_absolute_path: "/p/a",
294                content_hash: [0xAA; 32],
295            },
296        ]);
297        assert_eq!(ab.as_bytes(), ba.as_bytes());
298    }
299
300    #[test]
301    fn cache_006_input_file_count_changes_key() {
302        let a = InputFile {
303            workspace_absolute_path: "/p/a",
304            content_hash: [0xAA; 32],
305        };
306        let with_one = key_with_inputs(&[a]);
307        let empty = key_with_inputs(&[]);
308        assert_ne!(with_one.as_bytes(), empty.as_bytes());
309    }
310
311    #[test]
312    fn cache_006_input_file_path_change_changes_key() {
313        let original = key_with_inputs(&[InputFile {
314            workspace_absolute_path: "/p/a",
315            content_hash: [0xAA; 32],
316        }]);
317        let renamed = key_with_inputs(&[InputFile {
318            workspace_absolute_path: "/p/b",
319            content_hash: [0xAA; 32],
320        }]);
321        assert_ne!(original.as_bytes(), renamed.as_bytes());
322    }
323
324    #[test]
325    fn cache_006_input_file_content_change_changes_key() {
326        let original = key_with_inputs(&[InputFile {
327            workspace_absolute_path: "/p/a",
328            content_hash: [0xAA; 32],
329        }]);
330        let edited = key_with_inputs(&[InputFile {
331            workspace_absolute_path: "/p/a",
332            content_hash: [0xBB; 32],
333        }]);
334        assert_ne!(original.as_bytes(), edited.as_bytes());
335    }
336
337    // ----- CACHE-007 hard-edge predecessors -----
338
339    fn key_with_predecessors(preds: &[PredecessorStreams<'_>]) -> CacheKey {
340        let host: BTreeMap<EnvVarName, Option<String>> = BTreeMap::new();
341        let overrides: BTreeMap<EnvVarName, String> = BTreeMap::new();
342        let env = EnvContribution {
343            from_host: &host,
344            overrides: &overrides,
345        };
346        let action = cmd(&["true"]);
347        let inputs = CacheKeyInputs {
348            action: &action,
349            input_files: &[],
350            hard_predecessors: preds,
351            env: &env,
352        };
353        CacheKeyBuilder::new(HashAlgo::Blake3).finish(&inputs)
354    }
355
356    #[test]
357    fn cache_007_predecessor_order_does_not_matter() {
358        let p_a = ProjectName::try_new("alpha").unwrap();
359        let p_b = ProjectName::try_new("beta").unwrap();
360        let t_x = TaskName::try_new("x").unwrap();
361        let t_y = TaskName::try_new("y").unwrap();
362        let pred_a = PredecessorStreams {
363            project: &p_a,
364            task: &t_x,
365            stdout_hash: [0x01; 32],
366            stderr_hash: [0x02; 32],
367        };
368        let pred_b = PredecessorStreams {
369            project: &p_b,
370            task: &t_y,
371            stdout_hash: [0x03; 32],
372            stderr_hash: [0x04; 32],
373        };
374        let ab = key_with_predecessors(&[pred_a, pred_b]);
375        let ba = key_with_predecessors(&[
376            PredecessorStreams {
377                project: &p_b,
378                task: &t_y,
379                stdout_hash: [0x03; 32],
380                stderr_hash: [0x04; 32],
381            },
382            PredecessorStreams {
383                project: &p_a,
384                task: &t_x,
385                stdout_hash: [0x01; 32],
386                stderr_hash: [0x02; 32],
387            },
388        ]);
389        assert_eq!(ab.as_bytes(), ba.as_bytes());
390    }
391
392    #[test]
393    fn cache_007_predecessor_stdout_stderr_swap_changes_key() {
394        // CACHE-007 keeps the two streams distinct: swapping
395        // stdout and stderr hashes for the same predecessor MUST
396        // yield a different key.
397        let p = ProjectName::try_new("alpha").unwrap();
398        let t = TaskName::try_new("x").unwrap();
399        let original = key_with_predecessors(&[PredecessorStreams {
400            project: &p,
401            task: &t,
402            stdout_hash: [0x01; 32],
403            stderr_hash: [0x02; 32],
404        }]);
405        let swapped = key_with_predecessors(&[PredecessorStreams {
406            project: &p,
407            task: &t,
408            stdout_hash: [0x02; 32],
409            stderr_hash: [0x01; 32],
410        }]);
411        assert_ne!(original.as_bytes(), swapped.as_bytes());
412    }
413
414    // ----- CACHE-008 environment -----
415
416    fn name(s: &str) -> EnvVarName {
417        EnvVarName::try_new(s).unwrap()
418    }
419
420    fn key_with_env(env: &EnvContribution<'_>) -> CacheKey {
421        let action = cmd(&["true"]);
422        let inputs = CacheKeyInputs {
423            action: &action,
424            input_files: &[],
425            hard_predecessors: &[],
426            env,
427        };
428        CacheKeyBuilder::new(HashAlgo::Blake3).finish(&inputs)
429    }
430
431    #[test]
432    fn from_host_value_change_changes_key() {
433        let mut a_host = BTreeMap::new();
434        a_host.insert(name("PATH"), Some("/usr/bin".to_owned()));
435        let mut b_host = BTreeMap::new();
436        b_host.insert(name("PATH"), Some("/usr/local/bin".to_owned()));
437        let overrides = BTreeMap::new();
438        let a = key_with_env(&EnvContribution {
439            from_host: &a_host,
440            overrides: &overrides,
441        });
442        let b = key_with_env(&EnvContribution {
443            from_host: &b_host,
444            overrides: &overrides,
445        });
446        assert_ne!(a.as_bytes(), b.as_bytes());
447    }
448
449    #[test]
450    fn from_host_absent_differs_from_empty_string() {
451        // CACHE-008's 0x00 absent marker keeps "host did not set
452        // the variable" distinct from "host set it to the empty
453        // string".
454        let mut absent_host = BTreeMap::new();
455        absent_host.insert(name("X"), None);
456        let mut empty_host = BTreeMap::new();
457        empty_host.insert(name("X"), Some(String::new()));
458        let overrides = BTreeMap::new();
459        let absent = key_with_env(&EnvContribution {
460            from_host: &absent_host,
461            overrides: &overrides,
462        });
463        let empty = key_with_env(&EnvContribution {
464            from_host: &empty_host,
465            overrides: &overrides,
466        });
467        assert_ne!(absent.as_bytes(), empty.as_bytes());
468    }
469
470    #[test]
471    fn override_wins_over_from_host_on_name_collision() {
472        // Same override is present in both runs; from_host's value
473        // for the collided name differs. The key MUST be the same:
474        // overrides win and the from_host value for the collided
475        // name does not contribute (CACHE-008).
476        let mut host_a = BTreeMap::new();
477        host_a.insert(name("X"), Some("host-a".to_owned()));
478        let mut host_b = BTreeMap::new();
479        host_b.insert(name("X"), Some("host-b".to_owned()));
480        let mut overrides = BTreeMap::new();
481        overrides.insert(name("X"), "fixed".to_owned());
482
483        let a = key_with_env(&EnvContribution {
484            from_host: &host_a,
485            overrides: &overrides,
486        });
487        let b = key_with_env(&EnvContribution {
488            from_host: &host_b,
489            overrides: &overrides,
490        });
491        assert_eq!(a.as_bytes(), b.as_bytes());
492    }
493
494    #[test]
495    fn from_host_and_override_with_same_bytes_still_distinct() {
496        // CACHE-008 keeps the two contributions distinct even when
497        // a name appears under one and produces a byte-identical
498        // value under the other. A name in from_host with value
499        // "v" vs the same name in overrides with value "v" must
500        // yield different keys.
501        let mut host = BTreeMap::new();
502        host.insert(name("X"), Some("v".to_owned()));
503        let empty_overrides = BTreeMap::new();
504        let only_host = key_with_env(&EnvContribution {
505            from_host: &host,
506            overrides: &empty_overrides,
507        });
508
509        let empty_host = BTreeMap::new();
510        let mut overrides = BTreeMap::new();
511        overrides.insert(name("X"), "v".to_owned());
512        let only_overrides = key_with_env(&EnvContribution {
513            from_host: &empty_host,
514            overrides: &overrides,
515        });
516
517        assert_ne!(only_host.as_bytes(), only_overrides.as_bytes());
518    }
519
520    #[test]
521    fn empty_env_is_distinct_from_any_named_env() {
522        let empty_host = BTreeMap::new();
523        let empty_overrides = BTreeMap::new();
524        let empty = key_with_env(&EnvContribution {
525            from_host: &empty_host,
526            overrides: &empty_overrides,
527        });
528
529        let mut single_entry = BTreeMap::new();
530        single_entry.insert(name("X"), None);
531        let one_absent = key_with_env(&EnvContribution {
532            from_host: &single_entry,
533            overrides: &empty_overrides,
534        });
535
536        assert_ne!(empty.as_bytes(), one_absent.as_bytes());
537    }
538
539    // ----- Smoke / soundness -----
540
541    #[test]
542    fn cache_001_identical_inputs_yield_identical_keys() {
543        // Cache-key determinism (CACHE-001): two builders fed the
544        // same inputs in the same order produce the same key.
545        let a = key_of(&cmd(&["echo", "hi"]), HashAlgo::Blake3);
546        let b = key_of(&cmd(&["echo", "hi"]), HashAlgo::Blake3);
547        assert_eq!(a.as_bytes(), b.as_bytes());
548    }
549
550    #[test]
551    fn cache_001_task_identity_does_not_contribute() {
552        // The builder takes no project/task identity. Two keys
553        // derived for what would be different (project, task)
554        // pairs in production code, but with identical components,
555        // collide by design (CACHE-001 content addressing). This
556        // test is a structural reminder: the API surface lacks any
557        // means to inject identity.
558        let a = key_of(&cmd(&["echo", "hi"]), HashAlgo::Blake3);
559        let b = key_of(&cmd(&["echo", "hi"]), HashAlgo::Blake3);
560        assert_eq!(a.as_bytes(), b.as_bytes());
561    }
562}