Skip to main content

microsandbox_image/cache/
store.rs

1//! Global on-disk image and layer cache.
2
3use std::path::{Path, PathBuf};
4
5use oci_client::Reference;
6use serde::{Deserialize, Serialize};
7use sha2::{Digest as Sha2Digest, Sha256};
8
9use crate::{
10    config::ImageConfig,
11    digest::Digest,
12    error::{ImageError, ImageResult},
13};
14
15//--------------------------------------------------------------------------------------------------
16// Constants
17//--------------------------------------------------------------------------------------------------
18
19/// Subdirectory for per-layer EROFS images (keyed by diff_id).
20const LAYERS_DIR: &str = "layers";
21
22/// Subdirectory for fsmeta EROFS images (keyed by manifest digest).
23const FSMETA_DIR: &str = "fsmeta";
24
25/// Subdirectory for VMDK descriptors (keyed by manifest digest).
26const VMDK_DIR: &str = "vmdk";
27
28/// Subdirectory for cached manifest + config metadata.
29const MANIFESTS_DIR: &str = "manifests";
30
31/// Subdirectory for transient staging (downloads, work dirs).
32const TMP_DIR: &str = "tmp";
33
34/// EROFS images are emitted in 4 KiB filesystem blocks.
35const EROFS_ALIGNMENT_BYTES: u64 = 4096;
36
37//--------------------------------------------------------------------------------------------------
38// Types
39//--------------------------------------------------------------------------------------------------
40
41/// On-disk global cache for OCI layers and EROFS images.
42///
43/// Layout:
44/// ```text
45/// ~/.microsandbox/cache/manifests/<sha256-of-ref>.json       # manifest + config metadata
46/// ~/.microsandbox/cache/tmp/<blob>.part                      # partial downloads
47/// ~/.microsandbox/cache/tmp/<blob>.download.lock             # download flock files
48/// ~/.microsandbox/cache/tmp/<blob>.work/                     # materialization work dirs
49/// ~/.microsandbox/cache/layers/<diff_id_safe>.erofs          # per-layer EROFS
50/// ~/.microsandbox/cache/layers/<diff_id_safe>.erofs.lock     # materialization flock
51/// ~/.microsandbox/cache/fsmeta/<manifest_safe>.erofs         # fsmeta EROFS (fsmerge metadata)
52/// ~/.microsandbox/cache/fsmeta/<manifest_safe>.erofs.lock    # materialization flock
53/// ~/.microsandbox/cache/vmdk/<manifest_safe>.vmdk            # VMDK descriptor
54/// ~/.microsandbox/cache/vmdk/<manifest_safe>.vmdk.lock       # materialization flock
55/// ```
56pub struct GlobalCache {
57    /// Root of the layer EROFS cache (`~/.microsandbox/cache/layers/`).
58    layers_dir: PathBuf,
59
60    /// Root of the fsmeta EROFS cache (`~/.microsandbox/cache/fsmeta/`).
61    fsmeta_dir: PathBuf,
62
63    /// Root of the VMDK descriptor cache (`~/.microsandbox/cache/vmdk/`).
64    vmdk_dir: PathBuf,
65
66    /// Root of the manifest metadata cache (`~/.microsandbox/cache/manifests/`).
67    manifests_dir: PathBuf,
68
69    /// Root of the transient staging area (`~/.microsandbox/cache/tmp/`).
70    tmp_dir: PathBuf,
71}
72
73/// Cached metadata for a pulled image reference.
74#[derive(Debug, Clone, Serialize, Deserialize)]
75pub struct CachedImageMetadata {
76    /// Content-addressable digest of the resolved manifest.
77    pub manifest_digest: String,
78    /// Content-addressable digest of the config blob.
79    pub config_digest: String,
80    /// Raw resolved image manifest JSON.
81    pub raw_manifest_json: String,
82    /// Raw image config JSON.
83    pub raw_config_json: String,
84    /// Parsed OCI image configuration.
85    pub config: ImageConfig,
86    /// Layer metadata in bottom-to-top order.
87    pub layers: Vec<CachedLayerMetadata>,
88}
89
90/// Cached metadata for a single layer descriptor.
91#[derive(Debug, Clone, Serialize, Deserialize)]
92pub struct CachedLayerMetadata {
93    /// Compressed layer digest from the manifest (blob digest).
94    pub digest: String,
95    /// OCI media type of the layer blob.
96    pub media_type: Option<String>,
97    /// Compressed blob size in bytes.
98    pub size_bytes: Option<u64>,
99    /// Uncompressed diff ID from the image config.
100    pub diff_id: String,
101}
102
103//--------------------------------------------------------------------------------------------------
104// Methods
105//--------------------------------------------------------------------------------------------------
106
107impl GlobalCache {
108    /// Create a new GlobalCache using the provided cache directory.
109    ///
110    /// Creates all subdirectories if they don't exist.
111    pub fn new(cache_dir: &Path) -> ImageResult<Self> {
112        let layers_dir = cache_dir.join(LAYERS_DIR);
113        let fsmeta_dir = cache_dir.join(FSMETA_DIR);
114        let vmdk_dir = cache_dir.join(VMDK_DIR);
115        let manifests_dir = cache_dir.join(MANIFESTS_DIR);
116        let tmp_dir = cache_dir.join(TMP_DIR);
117
118        for dir in [
119            &layers_dir,
120            &fsmeta_dir,
121            &vmdk_dir,
122            &manifests_dir,
123            &tmp_dir,
124        ] {
125            std::fs::create_dir_all(dir).map_err(|e| ImageError::Cache {
126                path: dir.clone(),
127                source: e,
128            })?;
129        }
130
131        Ok(Self {
132            layers_dir,
133            fsmeta_dir,
134            vmdk_dir,
135            manifests_dir,
136            tmp_dir,
137        })
138    }
139
140    /// Create a new GlobalCache using async filesystem operations.
141    pub async fn new_async(cache_dir: &Path) -> ImageResult<Self> {
142        let layers_dir = cache_dir.join(LAYERS_DIR);
143        let fsmeta_dir = cache_dir.join(FSMETA_DIR);
144        let vmdk_dir = cache_dir.join(VMDK_DIR);
145        let manifests_dir = cache_dir.join(MANIFESTS_DIR);
146        let tmp_dir = cache_dir.join(TMP_DIR);
147
148        for dir in [
149            &layers_dir,
150            &fsmeta_dir,
151            &vmdk_dir,
152            &manifests_dir,
153            &tmp_dir,
154        ] {
155            tokio::fs::create_dir_all(dir)
156                .await
157                .map_err(|e| ImageError::Cache {
158                    path: dir.clone(),
159                    source: e,
160                })?;
161        }
162
163        Ok(Self {
164            layers_dir,
165            fsmeta_dir,
166            vmdk_dir,
167            manifests_dir,
168            tmp_dir,
169        })
170    }
171
172    // ── Layer EROFS paths (keyed by diff_id) ─────────────────────────
173
174    /// Root layer EROFS cache directory.
175    pub fn layers_dir(&self) -> &Path {
176        &self.layers_dir
177    }
178
179    /// Path to the per-layer EROFS image for a given diff_id.
180    pub fn layer_erofs_path(&self, diff_id: &Digest) -> PathBuf {
181        self.layers_dir
182            .join(format!("{}.erofs", diff_id.to_path_safe()))
183    }
184
185    /// Path to the materialization lock for a layer EROFS image.
186    pub fn layer_erofs_lock_path(&self, diff_id: &Digest) -> PathBuf {
187        self.layers_dir
188            .join(format!("{}.erofs.lock", diff_id.to_path_safe()))
189    }
190
191    /// Check if a layer EROFS image exists.
192    pub fn is_layer_materialized(&self, diff_id: &Digest) -> bool {
193        is_valid_erofs_artifact(&self.layer_erofs_path(diff_id))
194    }
195
196    /// Check if all given layer diff_ids have materialized EROFS images.
197    pub fn all_layers_materialized(&self, diff_ids: &[Digest]) -> bool {
198        diff_ids.iter().all(|d| self.is_layer_materialized(d))
199    }
200
201    // ── fsmeta EROFS paths (keyed by manifest digest) ─────────────────
202
203    /// Root fsmeta EROFS cache directory.
204    pub fn fsmeta_dir(&self) -> &Path {
205        &self.fsmeta_dir
206    }
207
208    /// Path to the fsmeta EROFS image for a given manifest digest.
209    pub fn fsmeta_erofs_path(&self, manifest_digest: &Digest) -> PathBuf {
210        self.fsmeta_dir
211            .join(format!("{}.erofs", manifest_digest.to_path_safe()))
212    }
213
214    /// Path to the materialization lock for a fsmeta EROFS image.
215    pub fn fsmeta_erofs_lock_path(&self, manifest_digest: &Digest) -> PathBuf {
216        self.fsmeta_dir
217            .join(format!("{}.erofs.lock", manifest_digest.to_path_safe()))
218    }
219
220    /// Check if a fsmeta EROFS image exists.
221    pub fn is_fsmeta_materialized(&self, manifest_digest: &Digest) -> bool {
222        is_valid_erofs_artifact(&self.fsmeta_erofs_path(manifest_digest))
223    }
224
225    // ── VMDK descriptor paths (keyed by manifest digest) ────────────
226
227    /// Root VMDK cache directory.
228    pub fn vmdk_dir(&self) -> &Path {
229        &self.vmdk_dir
230    }
231
232    /// Path to the VMDK descriptor for a given manifest digest.
233    pub fn vmdk_path(&self, manifest_digest: &Digest) -> PathBuf {
234        self.vmdk_dir
235            .join(format!("{}.vmdk", manifest_digest.to_path_safe()))
236    }
237
238    /// Path to the materialization lock for a VMDK descriptor.
239    pub fn vmdk_lock_path(&self, manifest_digest: &Digest) -> PathBuf {
240        self.vmdk_dir
241            .join(format!("{}.vmdk.lock", manifest_digest.to_path_safe()))
242    }
243
244    /// Check if a VMDK descriptor exists for a given manifest digest.
245    pub fn is_vmdk_materialized(&self, manifest_digest: &Digest) -> bool {
246        self.vmdk_path(manifest_digest).exists()
247    }
248
249    // ── Staging/tmp paths (downloads, work dirs) ─────────────────────
250
251    /// Root staging directory.
252    pub fn tmp_dir(&self) -> &Path {
253        &self.tmp_dir
254    }
255
256    /// Path to the partial download file for a blob.
257    pub fn part_path(&self, blob_digest: &Digest) -> PathBuf {
258        self.tmp_dir
259            .join(format!("{}.part", blob_digest.to_path_safe()))
260    }
261
262    /// Path to the download lock file for a blob.
263    pub fn download_lock_path(&self, blob_digest: &Digest) -> PathBuf {
264        self.tmp_dir
265            .join(format!("{}.download.lock", blob_digest.to_path_safe()))
266    }
267
268    /// Path to the materialization work directory for an EROFS build.
269    pub fn work_dir(&self, key: &Digest) -> PathBuf {
270        self.tmp_dir.join(format!("{}.work", key.to_path_safe()))
271    }
272
273    // ── Manifest metadata cache ──────────────────────────────────────
274
275    /// Root manifest metadata directory.
276    pub fn manifests_dir(&self) -> &Path {
277        &self.manifests_dir
278    }
279
280    /// Path to the pull lock file for an image reference.
281    pub fn image_lock_path(&self, reference: &Reference) -> PathBuf {
282        self.manifests_dir
283            .join(format!("{}.lock", image_cache_key(reference)))
284    }
285
286    /// Read cached metadata for an image reference.
287    pub fn read_image_metadata(
288        &self,
289        reference: &Reference,
290    ) -> ImageResult<Option<CachedImageMetadata>> {
291        let path = self.image_metadata_path(reference);
292
293        let data = match std::fs::read_to_string(&path) {
294            Ok(data) => data,
295            Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(None),
296            Err(e) => return Err(ImageError::Cache { path, source: e }),
297        };
298
299        parse_cached_image_metadata(&path, &data)
300    }
301
302    /// Read cached metadata for an image reference using async filesystem I/O.
303    pub async fn read_image_metadata_async(
304        &self,
305        reference: &Reference,
306    ) -> ImageResult<Option<CachedImageMetadata>> {
307        let path = self.image_metadata_path(reference);
308
309        let data = match tokio::fs::read_to_string(&path).await {
310            Ok(data) => data,
311            Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(None),
312            Err(e) => return Err(ImageError::Cache { path, source: e }),
313        };
314
315        parse_cached_image_metadata(&path, &data)
316    }
317
318    /// Write cached metadata for an image reference.
319    #[cfg_attr(not(test), allow(dead_code))]
320    pub(crate) fn write_image_metadata(
321        &self,
322        reference: &Reference,
323        metadata: &CachedImageMetadata,
324    ) -> ImageResult<()> {
325        let path = self.image_metadata_path(reference);
326        let temp_path = path.with_extension("json.part");
327        let payload = serde_json::to_vec(metadata).map_err(|e| {
328            ImageError::ConfigParse(format!("failed to serialize cached image metadata: {e}"))
329        })?;
330
331        std::fs::write(&temp_path, payload).map_err(|e| ImageError::Cache {
332            path: temp_path.clone(),
333            source: e,
334        })?;
335        std::fs::rename(&temp_path, &path).map_err(|e| ImageError::Cache { path, source: e })?;
336
337        Ok(())
338    }
339
340    /// Write cached metadata for an image reference using async filesystem I/O.
341    pub(crate) async fn write_image_metadata_async(
342        &self,
343        reference: &Reference,
344        metadata: &CachedImageMetadata,
345    ) -> ImageResult<()> {
346        let path = self.image_metadata_path(reference);
347        let temp_path = path.with_extension("json.part");
348        let payload = serde_json::to_vec(metadata).map_err(|e| {
349            ImageError::ConfigParse(format!("failed to serialize cached image metadata: {e}"))
350        })?;
351
352        tokio::fs::write(&temp_path, payload)
353            .await
354            .map_err(|e| ImageError::Cache {
355                path: temp_path.clone(),
356                source: e,
357            })?;
358        tokio::fs::rename(&temp_path, &path)
359            .await
360            .map_err(|e| ImageError::Cache { path, source: e })?;
361
362        Ok(())
363    }
364
365    /// Delete cached metadata for an image reference.
366    pub fn delete_image_metadata(&self, reference: &Reference) -> ImageResult<()> {
367        let path = self.image_metadata_path(reference);
368        match std::fs::remove_file(&path) {
369            Ok(()) => Ok(()),
370            Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(()),
371            Err(e) => Err(ImageError::Cache { path, source: e }),
372        }
373    }
374
375    /// Delete cached metadata for an image reference using async filesystem I/O.
376    pub async fn delete_image_metadata_async(&self, reference: &Reference) -> ImageResult<()> {
377        let path = self.image_metadata_path(reference);
378        match tokio::fs::remove_file(&path).await {
379            Ok(()) => Ok(()),
380            Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(()),
381            Err(e) => Err(ImageError::Cache { path, source: e }),
382        }
383    }
384
385    /// Path to the cached metadata file for an image reference.
386    fn image_metadata_path(&self, reference: &Reference) -> PathBuf {
387        self.manifests_dir
388            .join(format!("{}.json", image_cache_key(reference)))
389    }
390
391    // ── Blob cache paths ──────────────────────────────────────────────
392
393    /// Path to the cached compressed tarball for a layer blob.
394    pub fn tar_path(&self, digest: &Digest) -> PathBuf {
395        self.layers_dir
396            .join(format!("{}.tar.gz", digest.to_path_safe()))
397    }
398}
399
400//--------------------------------------------------------------------------------------------------
401// Functions
402//--------------------------------------------------------------------------------------------------
403
404fn image_cache_key(reference: &Reference) -> String {
405    let mut hasher = Sha256::new();
406    hasher.update(reference.to_string().as_bytes());
407    hex::encode(hasher.finalize())
408}
409
410fn parse_cached_image_metadata(
411    path: &Path,
412    data: &str,
413) -> ImageResult<Option<CachedImageMetadata>> {
414    match serde_json::from_str::<CachedImageMetadata>(data) {
415        Ok(metadata) => Ok(Some(metadata)),
416        Err(e) => {
417            tracing::warn!(
418                path = %path.display(),
419                error = %e,
420                "corrupt image metadata cache, ignoring"
421            );
422            Ok(None)
423        }
424    }
425}
426
427pub(crate) fn is_valid_erofs_artifact(path: &Path) -> bool {
428    match std::fs::metadata(path) {
429        Ok(meta) => {
430            let len = meta.len();
431            len > 0 && len % EROFS_ALIGNMENT_BYTES == 0
432        }
433        Err(_) => false,
434    }
435}
436
437pub(crate) async fn is_valid_erofs_artifact_async(path: &Path) -> bool {
438    match tokio::fs::metadata(path).await {
439        Ok(meta) => {
440            let len = meta.len();
441            len > 0 && len % EROFS_ALIGNMENT_BYTES == 0
442        }
443        Err(_) => false,
444    }
445}