omicron_zone_package/
cache.rs

1// This Source Code Form is subject to the terms of the Mozilla Public
2// License, v. 2.0. If a copy of the MPL was not distributed with this
3// file, You can obtain one at https://mozilla.org/MPL/2.0/.
4
5//! Tracks inputs and outputs by digest to help caching.
6//!
7//! When a package is built, we keep track of all the inputs
8//! used to construct that package, as well as the output file
9//! name. This information is then converted into an [ArtifactManifest],
10//! which tracks the digests of all those inputs, and this manifest
11//! is written to the [CACHE_SUBDIRECTORY] within the output directory.
12//!
13//! When re-building, we can look up this manifest: if all the inputs
14//! to build a package are the same, the output should be the same, so
15//! we can use the cached output to avoid an unnecessary package construction
16//! step.
17
18use crate::digest::{DefaultDigest, Digest, FileDigester};
19use crate::input::{BuildInput, BuildInputs};
20
21use anyhow::{anyhow, bail, Context};
22use camino::{Utf8Path, Utf8PathBuf};
23use serde::{Deserialize, Serialize};
24use std::marker::PhantomData;
25use thiserror::Error;
26use tokio::fs::File;
27use tokio::io::{AsyncReadExt, AsyncWriteExt};
28
29pub const CACHE_SUBDIRECTORY: &str = "manifest-cache";
30
31pub type Inputs = Vec<BuildInput>;
32
33// It's not actually a map, because serde doesn't like enum keys.
34//
35// This has the side-effect that changing the order of input files
36// changes the package.
37#[derive(Debug, PartialEq, Eq, Serialize, Deserialize)]
38struct InputMap(Vec<InputEntry>);
39
40#[derive(Debug, PartialEq, Eq, Serialize, Deserialize)]
41struct InputEntry {
42    key: BuildInput,
43    value: Option<Digest>,
44}
45
46#[derive(Debug, PartialEq, Eq, Serialize, Deserialize)]
47pub struct ArtifactManifest<D = DefaultDigest> {
48    // All inputs, which create this artifact
49    inputs: InputMap,
50
51    // Output, created by this artifact
52    output_path: Utf8PathBuf,
53
54    // Which digest is being used?
55    phantom: PhantomData<D>,
56}
57
58impl<D: FileDigester> ArtifactManifest<D> {
59    /// Reads all inputs and outputs, collecting their digests.
60    async fn new(inputs: &BuildInputs, output_path: Utf8PathBuf) -> anyhow::Result<Self> {
61        let result = Self::new_internal(inputs, output_path, None).await?;
62        Ok(result)
63    }
64
65    // If the optional "compare_with" field is supplied, construction
66    // of the ArtifactManifest exits early if any of the inputs are not
67    // equal to the digests found in "compare_with". This helps improve
68    // the "cache miss" case, by allowing us to stop calculating hashes
69    // as soon as we find any divergence.
70    async fn new_internal(
71        inputs: &BuildInputs,
72        output_path: Utf8PathBuf,
73        compare_with: Option<&Self>,
74    ) -> Result<Self, CacheError> {
75        let input_entry_tasks = inputs.0.iter().cloned().enumerate().map(|(i, input)| {
76            let expected_input = compare_with.map(|manifest| &manifest.inputs.0[i]);
77            async move {
78                let digest = if let Some(input_path) = input.input_path() {
79                    Some(D::get_digest(input_path).await?)
80                } else {
81                    None
82                };
83                let input = InputEntry {
84                    key: input.clone(),
85                    value: digest,
86                };
87
88                if let Some(expected_input) = expected_input {
89                    if *expected_input != input {
90                        CacheError::miss(format!(
91                            "Differing build inputs.\nSaw {:#?}\nExpected {:#?})",
92                            input, expected_input
93                        ));
94                    }
95                };
96
97                Ok::<_, CacheError>(input)
98            }
99        });
100
101        let inputs = InputMap(futures::future::try_join_all(input_entry_tasks).await?);
102
103        Ok(Self {
104            inputs,
105            output_path,
106            phantom: PhantomData,
107        })
108    }
109
110    // Writes a manifest file to a particular location.
111    async fn write_to(&self, path: &Utf8PathBuf) -> anyhow::Result<()> {
112        let Some(extension) = path.extension() else {
113            bail!("Missing extension?");
114        };
115        if extension != "json" {
116            bail!("JSON encoding is all we know. Write to a '.json' file?");
117        }
118        let serialized =
119            serde_json::to_string(&self).context("Failed to serialize ArtifactManifest to JSON")?;
120
121        let mut f = File::create(path).await?;
122        f.write_all(serialized.as_bytes()).await?;
123        Ok(())
124    }
125
126    // Reads a manifest file to a particular location.
127    //
128    // Does not validate whether or not any corresponding artifacts exist.
129    async fn read_from(path: &Utf8PathBuf) -> Result<Self, CacheError> {
130        let Some(extension) = path.extension() else {
131            return Err(anyhow!("Missing extension?").into());
132        };
133        if extension != "json" {
134            return Err(anyhow!("JSON encoding is all we know. Read from a '.json' file?").into());
135        }
136
137        let mut f = match File::open(path).await {
138            Ok(f) => f,
139            Err(e) => {
140                if matches!(e.kind(), std::io::ErrorKind::NotFound) {
141                    return Err(CacheError::miss(format!("File {} not found", path)));
142                } else {
143                    return Err(anyhow!(e).into());
144                }
145            }
146        };
147        let mut buffer = String::new();
148        f.read_to_string(&mut buffer)
149            .await
150            .map_err(|e| anyhow!(e))?;
151
152        // In the case that we cannot read the manifest, treat it as "missing".
153        // This will force a rebuild anyway.
154        let Ok(manifest) = serde_json::from_str(&buffer) else {
155            return Err(CacheError::miss(format!(
156                "Cannot parse manifest at {}",
157                path
158            )));
159        };
160        Ok(manifest)
161    }
162}
163
164/// Errors that can be returned when looking up cached artifacts.
165#[derive(Error, Debug)]
166pub enum CacheError {
167    /// Identifies that cache lookup has failed, for a wide number of reasons,
168    /// but that we should probably try to continue with package building
169    /// anyway.
170    #[error("Cache Miss: {reason}")]
171    CacheMiss { reason: String },
172
173    /// Other errors, which could indicate a more fundamental problem.
174    ///
175    /// These errors encourage callers to exit immediately, rather than
176    /// treating the failure like a "miss".
177    #[error(transparent)]
178    Other(#[from] anyhow::Error),
179}
180
181impl CacheError {
182    // Convenience wrapper
183    fn miss<T: Into<String>>(t: T) -> Self {
184        CacheError::CacheMiss { reason: t.into() }
185    }
186}
187
188/// Provides access to a set of manifests describing packages.
189///
190/// Provides two primary operations:
191/// - [Self::lookup]: Support for finding previously-built packages
192/// - [Self::update]: Support for updating a package's latest manifest
193pub struct Cache {
194    disabled: bool,
195    cache_directory: Utf8PathBuf,
196}
197
198impl Cache {
199    /// Ensures the cache directory exists within the output directory
200    pub async fn new(output_directory: &Utf8Path) -> anyhow::Result<Self> {
201        let cache_directory = output_directory.join(CACHE_SUBDIRECTORY);
202        tokio::fs::create_dir_all(&cache_directory).await?;
203        Ok(Self {
204            disabled: false,
205            cache_directory,
206        })
207    }
208
209    /// If "disable" is true, causes cache operations to be no-ops.
210    /// Otherwise, causes the cache to act normally.
211    pub fn set_disable(&mut self, disable: bool) {
212        self.disabled = disable;
213    }
214
215    /// Looks up an entry from the cache.
216    ///
217    /// Confirms that the artifact exists.
218    pub async fn lookup(
219        &self,
220        inputs: &BuildInputs,
221        output_path: &Utf8Path,
222    ) -> Result<ArtifactManifest, CacheError> {
223        if self.disabled {
224            return Err(CacheError::miss("Cache disabled"));
225        }
226
227        let artifact_filename = output_path
228            .file_name()
229            .ok_or_else(|| CacheError::Other(anyhow!("Output has no file name")))?;
230        let mut manifest_filename = String::from(artifact_filename);
231        manifest_filename.push_str(".json");
232
233        let manifest_path = self.cache_directory.join(manifest_filename);
234
235        // Look up the manifest file in the cache
236        let manifest = ArtifactManifest::read_from(&manifest_path).await?;
237
238        // Do a quick check if the input files are different.
239        //
240        // We'll actually validate the digests later, but this lets us bail
241        // early if any files were added or removed.
242        if inputs
243            .0
244            .iter()
245            .ne(manifest.inputs.0.iter().map(|entry| &entry.key))
246        {
247            return Err(CacheError::miss("Set of inputs has changed"));
248        }
249        if output_path != manifest.output_path {
250            return Err(CacheError::miss(format!(
251                "Output path changed from {} -> {}",
252                manifest.output_path, output_path,
253            )));
254        }
255
256        // Confirm the output file exists
257        if !tokio::fs::try_exists(&output_path)
258            .await
259            .map_err(|e| CacheError::miss(format!("Cannot locate output artifact: {e}")))?
260        {
261            return Err(CacheError::miss("Output does not exist"));
262        }
263
264        // Confirm the output matches.
265        let Some(observed_filename) = manifest.output_path.file_name() else {
266            return Err(CacheError::miss(format!(
267                "Missing output file name from manifest {}",
268                manifest.output_path
269            )));
270        };
271        if observed_filename != artifact_filename {
272            return Err(CacheError::miss(format!(
273                "Wrong output name in manifest (saw {}, expected {})",
274                observed_filename, artifact_filename
275            )));
276        }
277
278        // Finally, compare the manifests, including their digests.
279        //
280        // This calculation bails out early if any inputs don't match.
281        let calculated_manifest =
282            ArtifactManifest::new_internal(inputs, output_path.to_path_buf(), Some(&manifest))
283                .await?;
284
285        // This is a hard stop-gap against any other differences in the
286        // manifests. The error message here is worse (we don't know "why"),
287        // but it's a quick check that's protective.
288        if calculated_manifest != manifest {
289            return Err(CacheError::miss("Manifests appear different"));
290        }
291
292        Ok(manifest)
293    }
294
295    /// Updates an artifact's entry within the cache
296    pub async fn update(
297        &self,
298        inputs: &BuildInputs,
299        output_path: &Utf8Path,
300    ) -> Result<(), CacheError> {
301        if self.disabled {
302            // Return immediately, regardless of the input. We have nothing to
303            // calculate, and nothing to save.
304            return Ok(());
305        }
306
307        // This call actually acquires the digests for all inputs
308        let manifest =
309            ArtifactManifest::<DefaultDigest>::new(inputs, output_path.to_path_buf()).await?;
310
311        let Some(artifact_filename) = manifest.output_path.file_name() else {
312            return Err(anyhow!("Bad manifest: Missing output name").into());
313        };
314
315        let mut manifest_filename = String::from(artifact_filename);
316        manifest_filename.push_str(".json");
317        let manifest_path = self.cache_directory.join(manifest_filename);
318        manifest.write_to(&manifest_path).await?;
319
320        Ok(())
321    }
322}
323
324#[cfg(test)]
325mod test {
326    use super::*;
327    use crate::input::MappedPath;
328    use camino::Utf8PathBuf;
329    use camino_tempfile::{tempdir, Utf8TempDir};
330
331    struct CacheTest {
332        _input_dir: Utf8TempDir,
333        output_dir: Utf8TempDir,
334
335        input_path: Utf8PathBuf,
336        output_path: Utf8PathBuf,
337    }
338
339    impl CacheTest {
340        fn new() -> Self {
341            let input_dir = tempdir().unwrap();
342            let output_dir = tempdir().unwrap();
343            let input_path = input_dir.path().join("binary.exe");
344            let output_path = output_dir.path().join("output.tar.gz");
345            Self {
346                _input_dir: input_dir,
347                output_dir,
348                input_path,
349                output_path,
350            }
351        }
352
353        async fn create_input(&self, contents: &str) {
354            tokio::fs::write(&self.input_path, contents).await.unwrap()
355        }
356
357        async fn create_output(&self, contents: &str) {
358            tokio::fs::write(&self.output_path, contents).await.unwrap()
359        }
360
361        async fn remove_output(&self) {
362            tokio::fs::remove_file(&self.output_path).await.unwrap()
363        }
364    }
365
366    fn expect_missing_manifest(err: &CacheError, file: &str) {
367        match &err {
368            CacheError::CacheMiss { reason } => {
369                let expected = format!("{file}.json not found");
370                assert!(reason.contains(&expected), "{}", reason);
371            }
372            _ => panic!("Unexpected error: {}", err),
373        }
374    }
375
376    fn expect_cache_disabled(err: &CacheError) {
377        match &err {
378            CacheError::CacheMiss { reason } => {
379                assert!(reason.contains("Cache disabled"), "{}", reason);
380            }
381            _ => panic!("Unexpected error: {}", err),
382        }
383    }
384
385    fn expect_changed_manifests(err: &CacheError) {
386        match &err {
387            CacheError::CacheMiss { reason } => {
388                assert!(reason.contains("Manifests appear different"), "{}", reason);
389            }
390            _ => panic!("Unexpected error: {}", err),
391        }
392    }
393
394    fn expect_missing_output(err: &CacheError) {
395        match &err {
396            CacheError::CacheMiss { reason } => {
397                assert!(reason.contains("Output does not exist"), "{}", reason);
398            }
399            _ => panic!("Unexpected error: {}", err),
400        }
401    }
402
403    #[tokio::test]
404    async fn test_cache_lookup_misses_before_update() {
405        let test = CacheTest::new();
406
407        test.create_input("Hi I'm the input file").await;
408        let inputs = BuildInputs(vec![BuildInput::add_file(MappedPath {
409            from: test.input_path.to_path_buf(),
410            to: Utf8PathBuf::from("/very/important/file"),
411        })
412        .unwrap()]);
413
414        let cache = Cache::new(test.output_dir.path()).await.unwrap();
415
416        // Look for the package in the cache. It shouldn't exist.
417        let err = cache.lookup(&inputs, &test.output_path).await.unwrap_err();
418        expect_missing_manifest(&err, "output.tar.gz");
419
420        // Create the output we're expecting
421        test.create_output("Hi I'm the output file").await;
422
423        // Still expect a failure; we haven't called "cache.update".
424        let err = cache.lookup(&inputs, &test.output_path).await.unwrap_err();
425        expect_missing_manifest(&err, "output.tar.gz");
426    }
427
428    #[tokio::test]
429    async fn test_cache_lookup_hits_after_update() {
430        let test = CacheTest::new();
431
432        test.create_input("Hi I'm the input file").await;
433        let inputs = BuildInputs(vec![BuildInput::add_file(MappedPath {
434            from: test.input_path.to_path_buf(),
435            to: Utf8PathBuf::from("/very/important/file"),
436        })
437        .unwrap()]);
438
439        // Create the output we're expecting
440        test.create_output("Hi I'm the output file").await;
441
442        let cache = Cache::new(test.output_dir.path()).await.unwrap();
443
444        // If we update the cache, we expect a hit.
445        cache.update(&inputs, &test.output_path).await.unwrap();
446        cache.lookup(&inputs, &test.output_path).await.unwrap();
447
448        // If we update the input again, we expect a miss.
449        test.create_input("hi i'M tHe InPuT fIlE").await;
450        let err = cache.lookup(&inputs, &test.output_path).await.unwrap_err();
451        expect_changed_manifests(&err);
452    }
453
454    #[tokio::test]
455    async fn test_cache_lookup_misses_after_removing_output() {
456        let test = CacheTest::new();
457
458        test.create_input("Hi I'm the input file").await;
459        let inputs = BuildInputs(vec![BuildInput::add_file(MappedPath {
460            from: test.input_path.to_path_buf(),
461            to: Utf8PathBuf::from("/very/important/file"),
462        })
463        .unwrap()]);
464
465        // Create the output we're expecting
466        test.create_output("Hi I'm the output file").await;
467
468        let cache = Cache::new(test.output_dir.path()).await.unwrap();
469
470        // If we update the cache, we expect a hit.
471        cache.update(&inputs, &test.output_path).await.unwrap();
472        cache.lookup(&inputs, &test.output_path).await.unwrap();
473
474        // If we remove the output file, we expect a miss.
475        // This is somewhat of a "special case", as all the inputs are the same.
476        test.remove_output().await;
477        let err = cache.lookup(&inputs, &test.output_path).await.unwrap_err();
478        expect_missing_output(&err);
479    }
480
481    #[tokio::test]
482    async fn test_cache_disabled_always_misses() {
483        let test = CacheTest::new();
484
485        test.create_input("Hi I'm the input file").await;
486        let inputs = BuildInputs(vec![BuildInput::add_file(MappedPath {
487            from: test.input_path.to_path_buf(),
488            to: Utf8PathBuf::from("/very/important/file"),
489        })
490        .unwrap()]);
491
492        // Create the output we're expecting
493        test.create_output("Hi I'm the output file").await;
494
495        let mut cache = Cache::new(test.output_dir.path()).await.unwrap();
496        cache.set_disable(true);
497
498        // Updating the cache should still succeed, though it'll do nothing.
499        cache.update(&inputs, &test.output_path).await.unwrap();
500
501        // The lookup will miss, as the cache has been disabled.
502        let err = cache.lookup(&inputs, &test.output_path).await.unwrap_err();
503        expect_cache_disabled(&err);
504    }
505}