microsandbox-image 0.4.1

OCI image pulling, layer extraction, and caching for microsandbox.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
//! Global on-disk image and layer cache.

use std::path::{Path, PathBuf};

use oci_client::Reference;
use serde::{Deserialize, Serialize};
use sha2::{Digest as Sha2Digest, Sha256};

use crate::{
    config::ImageConfig,
    digest::Digest,
    error::{ImageError, ImageResult},
};

//--------------------------------------------------------------------------------------------------
// Constants
//--------------------------------------------------------------------------------------------------

/// Subdirectory for per-layer EROFS images (keyed by diff_id).
const LAYERS_DIR: &str = "layers";

/// Subdirectory for fsmeta EROFS images (keyed by manifest digest).
const FSMETA_DIR: &str = "fsmeta";

/// Subdirectory for VMDK descriptors (keyed by manifest digest).
const VMDK_DIR: &str = "vmdk";

/// Subdirectory for cached manifest + config metadata.
const MANIFESTS_DIR: &str = "manifests";

/// Subdirectory for transient staging (downloads, work dirs).
const TMP_DIR: &str = "tmp";

/// EROFS images are emitted in 4 KiB filesystem blocks.
const EROFS_ALIGNMENT_BYTES: u64 = 4096;

//--------------------------------------------------------------------------------------------------
// Types
//--------------------------------------------------------------------------------------------------

/// On-disk global cache for OCI layers and EROFS images.
///
/// Layout:
/// ```text
/// ~/.microsandbox/cache/manifests/<sha256-of-ref>.json       # manifest + config metadata
/// ~/.microsandbox/cache/tmp/<blob>.part                      # partial downloads
/// ~/.microsandbox/cache/tmp/<blob>.download.lock             # download flock files
/// ~/.microsandbox/cache/tmp/<blob>.work/                     # materialization work dirs
/// ~/.microsandbox/cache/layers/<diff_id_safe>.erofs          # per-layer EROFS
/// ~/.microsandbox/cache/layers/<diff_id_safe>.erofs.lock     # materialization flock
/// ~/.microsandbox/cache/fsmeta/<manifest_safe>.erofs         # fsmeta EROFS (fsmerge metadata)
/// ~/.microsandbox/cache/fsmeta/<manifest_safe>.erofs.lock    # materialization flock
/// ~/.microsandbox/cache/vmdk/<manifest_safe>.vmdk            # VMDK descriptor
/// ~/.microsandbox/cache/vmdk/<manifest_safe>.vmdk.lock       # materialization flock
/// ```
pub struct GlobalCache {
    /// Root of the layer EROFS cache (`~/.microsandbox/cache/layers/`).
    layers_dir: PathBuf,

    /// Root of the fsmeta EROFS cache (`~/.microsandbox/cache/fsmeta/`).
    fsmeta_dir: PathBuf,

    /// Root of the VMDK descriptor cache (`~/.microsandbox/cache/vmdk/`).
    vmdk_dir: PathBuf,

    /// Root of the manifest metadata cache (`~/.microsandbox/cache/manifests/`).
    manifests_dir: PathBuf,

    /// Root of the transient staging area (`~/.microsandbox/cache/tmp/`).
    tmp_dir: PathBuf,
}

/// Cached metadata for a pulled image reference.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CachedImageMetadata {
    /// Content-addressable digest of the resolved manifest.
    pub manifest_digest: String,
    /// Content-addressable digest of the config blob.
    pub config_digest: String,
    /// Parsed OCI image configuration.
    pub config: ImageConfig,
    /// Layer metadata in bottom-to-top order.
    pub layers: Vec<CachedLayerMetadata>,
}

/// Cached metadata for a single layer descriptor.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CachedLayerMetadata {
    /// Compressed layer digest from the manifest (blob digest).
    pub digest: String,
    /// OCI media type of the layer blob.
    pub media_type: Option<String>,
    /// Compressed blob size in bytes.
    pub size_bytes: Option<u64>,
    /// Uncompressed diff ID from the image config.
    pub diff_id: String,
}

//--------------------------------------------------------------------------------------------------
// Methods
//--------------------------------------------------------------------------------------------------

impl GlobalCache {
    /// Create a new GlobalCache using the provided cache directory.
    ///
    /// Creates all subdirectories if they don't exist.
    pub fn new(cache_dir: &Path) -> ImageResult<Self> {
        let layers_dir = cache_dir.join(LAYERS_DIR);
        let fsmeta_dir = cache_dir.join(FSMETA_DIR);
        let vmdk_dir = cache_dir.join(VMDK_DIR);
        let manifests_dir = cache_dir.join(MANIFESTS_DIR);
        let tmp_dir = cache_dir.join(TMP_DIR);

        for dir in [
            &layers_dir,
            &fsmeta_dir,
            &vmdk_dir,
            &manifests_dir,
            &tmp_dir,
        ] {
            std::fs::create_dir_all(dir).map_err(|e| ImageError::Cache {
                path: dir.clone(),
                source: e,
            })?;
        }

        Ok(Self {
            layers_dir,
            fsmeta_dir,
            vmdk_dir,
            manifests_dir,
            tmp_dir,
        })
    }

    /// Create a new GlobalCache using async filesystem operations.
    pub async fn new_async(cache_dir: &Path) -> ImageResult<Self> {
        let layers_dir = cache_dir.join(LAYERS_DIR);
        let fsmeta_dir = cache_dir.join(FSMETA_DIR);
        let vmdk_dir = cache_dir.join(VMDK_DIR);
        let manifests_dir = cache_dir.join(MANIFESTS_DIR);
        let tmp_dir = cache_dir.join(TMP_DIR);

        for dir in [
            &layers_dir,
            &fsmeta_dir,
            &vmdk_dir,
            &manifests_dir,
            &tmp_dir,
        ] {
            tokio::fs::create_dir_all(dir)
                .await
                .map_err(|e| ImageError::Cache {
                    path: dir.clone(),
                    source: e,
                })?;
        }

        Ok(Self {
            layers_dir,
            fsmeta_dir,
            vmdk_dir,
            manifests_dir,
            tmp_dir,
        })
    }

    // ── Layer EROFS paths (keyed by diff_id) ─────────────────────────

    /// Root layer EROFS cache directory.
    pub fn layers_dir(&self) -> &Path {
        &self.layers_dir
    }

    /// Path to the per-layer EROFS image for a given diff_id.
    pub fn layer_erofs_path(&self, diff_id: &Digest) -> PathBuf {
        self.layers_dir
            .join(format!("{}.erofs", diff_id.to_path_safe()))
    }

    /// Path to the materialization lock for a layer EROFS image.
    pub fn layer_erofs_lock_path(&self, diff_id: &Digest) -> PathBuf {
        self.layers_dir
            .join(format!("{}.erofs.lock", diff_id.to_path_safe()))
    }

    /// Check if a layer EROFS image exists.
    pub fn is_layer_materialized(&self, diff_id: &Digest) -> bool {
        is_valid_erofs_artifact(&self.layer_erofs_path(diff_id))
    }

    /// Check if all given layer diff_ids have materialized EROFS images.
    pub fn all_layers_materialized(&self, diff_ids: &[Digest]) -> bool {
        diff_ids.iter().all(|d| self.is_layer_materialized(d))
    }

    // ── fsmeta EROFS paths (keyed by manifest digest) ─────────────────

    /// Root fsmeta EROFS cache directory.
    pub fn fsmeta_dir(&self) -> &Path {
        &self.fsmeta_dir
    }

    /// Path to the fsmeta EROFS image for a given manifest digest.
    pub fn fsmeta_erofs_path(&self, manifest_digest: &Digest) -> PathBuf {
        self.fsmeta_dir
            .join(format!("{}.erofs", manifest_digest.to_path_safe()))
    }

    /// Path to the materialization lock for a fsmeta EROFS image.
    pub fn fsmeta_erofs_lock_path(&self, manifest_digest: &Digest) -> PathBuf {
        self.fsmeta_dir
            .join(format!("{}.erofs.lock", manifest_digest.to_path_safe()))
    }

    /// Check if a fsmeta EROFS image exists.
    pub fn is_fsmeta_materialized(&self, manifest_digest: &Digest) -> bool {
        is_valid_erofs_artifact(&self.fsmeta_erofs_path(manifest_digest))
    }

    // ── VMDK descriptor paths (keyed by manifest digest) ────────────

    /// Root VMDK cache directory.
    pub fn vmdk_dir(&self) -> &Path {
        &self.vmdk_dir
    }

    /// Path to the VMDK descriptor for a given manifest digest.
    pub fn vmdk_path(&self, manifest_digest: &Digest) -> PathBuf {
        self.vmdk_dir
            .join(format!("{}.vmdk", manifest_digest.to_path_safe()))
    }

    /// Path to the materialization lock for a VMDK descriptor.
    pub fn vmdk_lock_path(&self, manifest_digest: &Digest) -> PathBuf {
        self.vmdk_dir
            .join(format!("{}.vmdk.lock", manifest_digest.to_path_safe()))
    }

    /// Check if a VMDK descriptor exists for a given manifest digest.
    pub fn is_vmdk_materialized(&self, manifest_digest: &Digest) -> bool {
        self.vmdk_path(manifest_digest).exists()
    }

    // ── Staging/tmp paths (downloads, work dirs) ─────────────────────

    /// Root staging directory.
    pub fn tmp_dir(&self) -> &Path {
        &self.tmp_dir
    }

    /// Path to the partial download file for a blob.
    pub fn part_path(&self, blob_digest: &Digest) -> PathBuf {
        self.tmp_dir
            .join(format!("{}.part", blob_digest.to_path_safe()))
    }

    /// Path to the download lock file for a blob.
    pub fn download_lock_path(&self, blob_digest: &Digest) -> PathBuf {
        self.tmp_dir
            .join(format!("{}.download.lock", blob_digest.to_path_safe()))
    }

    /// Path to the materialization work directory for an EROFS build.
    pub fn work_dir(&self, key: &Digest) -> PathBuf {
        self.tmp_dir.join(format!("{}.work", key.to_path_safe()))
    }

    // ── Manifest metadata cache ──────────────────────────────────────

    /// Root manifest metadata directory.
    pub fn manifests_dir(&self) -> &Path {
        &self.manifests_dir
    }

    /// Path to the pull lock file for an image reference.
    pub fn image_lock_path(&self, reference: &Reference) -> PathBuf {
        self.manifests_dir
            .join(format!("{}.lock", image_cache_key(reference)))
    }

    /// Read cached metadata for an image reference.
    pub fn read_image_metadata(
        &self,
        reference: &Reference,
    ) -> ImageResult<Option<CachedImageMetadata>> {
        let path = self.image_metadata_path(reference);

        let data = match std::fs::read_to_string(&path) {
            Ok(data) => data,
            Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(None),
            Err(e) => return Err(ImageError::Cache { path, source: e }),
        };

        parse_cached_image_metadata(&path, &data)
    }

    /// Read cached metadata for an image reference using async filesystem I/O.
    pub async fn read_image_metadata_async(
        &self,
        reference: &Reference,
    ) -> ImageResult<Option<CachedImageMetadata>> {
        let path = self.image_metadata_path(reference);

        let data = match tokio::fs::read_to_string(&path).await {
            Ok(data) => data,
            Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(None),
            Err(e) => return Err(ImageError::Cache { path, source: e }),
        };

        parse_cached_image_metadata(&path, &data)
    }

    /// Write cached metadata for an image reference.
    #[cfg_attr(not(test), allow(dead_code))]
    pub(crate) fn write_image_metadata(
        &self,
        reference: &Reference,
        metadata: &CachedImageMetadata,
    ) -> ImageResult<()> {
        let path = self.image_metadata_path(reference);
        let temp_path = path.with_extension("json.part");
        let payload = serde_json::to_vec(metadata).map_err(|e| {
            ImageError::ConfigParse(format!("failed to serialize cached image metadata: {e}"))
        })?;

        std::fs::write(&temp_path, payload).map_err(|e| ImageError::Cache {
            path: temp_path.clone(),
            source: e,
        })?;
        std::fs::rename(&temp_path, &path).map_err(|e| ImageError::Cache { path, source: e })?;

        Ok(())
    }

    /// Write cached metadata for an image reference using async filesystem I/O.
    pub(crate) async fn write_image_metadata_async(
        &self,
        reference: &Reference,
        metadata: &CachedImageMetadata,
    ) -> ImageResult<()> {
        let path = self.image_metadata_path(reference);
        let temp_path = path.with_extension("json.part");
        let payload = serde_json::to_vec(metadata).map_err(|e| {
            ImageError::ConfigParse(format!("failed to serialize cached image metadata: {e}"))
        })?;

        tokio::fs::write(&temp_path, payload)
            .await
            .map_err(|e| ImageError::Cache {
                path: temp_path.clone(),
                source: e,
            })?;
        tokio::fs::rename(&temp_path, &path)
            .await
            .map_err(|e| ImageError::Cache { path, source: e })?;

        Ok(())
    }

    /// Delete cached metadata for an image reference.
    pub fn delete_image_metadata(&self, reference: &Reference) -> ImageResult<()> {
        let path = self.image_metadata_path(reference);
        match std::fs::remove_file(&path) {
            Ok(()) => Ok(()),
            Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(()),
            Err(e) => Err(ImageError::Cache { path, source: e }),
        }
    }

    /// Delete cached metadata for an image reference using async filesystem I/O.
    pub async fn delete_image_metadata_async(&self, reference: &Reference) -> ImageResult<()> {
        let path = self.image_metadata_path(reference);
        match tokio::fs::remove_file(&path).await {
            Ok(()) => Ok(()),
            Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(()),
            Err(e) => Err(ImageError::Cache { path, source: e }),
        }
    }

    /// Path to the cached metadata file for an image reference.
    fn image_metadata_path(&self, reference: &Reference) -> PathBuf {
        self.manifests_dir
            .join(format!("{}.json", image_cache_key(reference)))
    }

    // ── Blob cache paths ──────────────────────────────────────────────

    /// Path to the cached compressed tarball for a layer blob.
    pub fn tar_path(&self, digest: &Digest) -> PathBuf {
        self.layers_dir
            .join(format!("{}.tar.gz", digest.to_path_safe()))
    }
}

//--------------------------------------------------------------------------------------------------
// Functions
//--------------------------------------------------------------------------------------------------

fn image_cache_key(reference: &Reference) -> String {
    let mut hasher = Sha256::new();
    hasher.update(reference.to_string().as_bytes());
    hex::encode(hasher.finalize())
}

fn parse_cached_image_metadata(
    path: &Path,
    data: &str,
) -> ImageResult<Option<CachedImageMetadata>> {
    match serde_json::from_str::<CachedImageMetadata>(data) {
        Ok(metadata) => Ok(Some(metadata)),
        Err(e) => {
            tracing::warn!(
                path = %path.display(),
                error = %e,
                "corrupt image metadata cache, ignoring"
            );
            Ok(None)
        }
    }
}

pub(crate) fn is_valid_erofs_artifact(path: &Path) -> bool {
    match std::fs::metadata(path) {
        Ok(meta) => {
            let len = meta.len();
            len > 0 && len % EROFS_ALIGNMENT_BYTES == 0
        }
        Err(_) => false,
    }
}

pub(crate) async fn is_valid_erofs_artifact_async(path: &Path) -> bool {
    match tokio::fs::metadata(path).await {
        Ok(meta) => {
            let len = meta.len();
            len > 0 && len % EROFS_ALIGNMENT_BYTES == 0
        }
        Err(_) => false,
    }
}