Skip to main content

microsandbox_image/archive/
docker.rs

1//! Container image archive import/export.
2
3use std::collections::{BTreeMap, HashMap, HashSet};
4use std::fs::{File, OpenOptions};
5use std::io::{self, BufWriter, Read, Write};
6use std::path::{Path, PathBuf};
7use std::sync::{
8    Arc,
9    atomic::{AtomicU64, Ordering},
10};
11
12use serde::{Deserialize, Serialize};
13use sha2::{Digest as Sha2Digest, Sha256};
14
15use crate::{
16    CachedImageMetadata, CachedLayerMetadata, Digest, GlobalCache, ImageConfig, ImageError,
17    ImageResult, Platform, Reference, Registry,
18    erofs::{ErofsEntryKind, ErofsReader},
19    path_bytes::{os_str_bytes, os_string_from_vec, path_bytes},
20    tar::Compression,
21};
22
23//--------------------------------------------------------------------------------------------------
24// Constants
25//--------------------------------------------------------------------------------------------------
26
27const OCI_CONFIG_MEDIA_TYPE: &str = "application/vnd.oci.image.config.v1+json";
28const OCI_MANIFEST_MEDIA_TYPE: &str = "application/vnd.oci.image.manifest.v1+json";
29const OCI_INDEX_MEDIA_TYPE: &str = "application/vnd.oci.image.index.v1+json";
30const OCI_LAYER_MEDIA_TYPE: &str = "application/vnd.oci.image.layer.v1.tar";
31const OCI_LAYER_GZIP_MEDIA_TYPE: &str = "application/vnd.oci.image.layer.v1.tar+gzip";
32const OCI_LAYER_ZSTD_MEDIA_TYPE: &str = "application/vnd.oci.image.layer.v1.tar+zstd";
33const OCI_REF_NAME_ANNOTATION: &str = "org.opencontainers.image.ref.name";
34const ARCHIVE_METADATA_MAX_BYTES: u64 = 16 * 1024 * 1024;
35const ARCHIVE_LAYER_MAX_BYTES: u64 = 10 * 1024 * 1024 * 1024;
36const ARCHIVE_MAX_ENTRY_COUNT: u64 = 1_000_000;
37static TEMP_FILE_COUNTER: AtomicU64 = AtomicU64::new(0);
38
39//--------------------------------------------------------------------------------------------------
40// Types
41//--------------------------------------------------------------------------------------------------
42
43/// Options for importing image archives.
44#[derive(Debug, Clone, Default)]
45pub struct ImageLoadOptions {
46    /// Extra tags to apply to the first image in the archive.
47    pub tags: Vec<String>,
48    /// Optional sink for materialization progress events. `None` stays silent.
49    pub progress: Option<crate::progress::PullProgressSender>,
50}
51
52/// Archive format to use when saving images.
53#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
54pub enum ImageArchiveFormat {
55    /// Docker `docker save` compatible archive.
56    #[default]
57    Docker,
58    /// OCI Image Layout archive.
59    Oci,
60}
61
62/// One loaded image reference and its cached metadata.
63#[derive(Debug, Clone)]
64pub struct LoadedImage {
65    /// Image reference imported into the local cache.
66    pub reference: String,
67    /// Cached image metadata to persist in the database.
68    pub metadata: CachedImageMetadata,
69}
70
71/// Image data needed to export a Docker archive.
72#[derive(Debug, Clone)]
73pub struct ImageSaveRequest {
74    /// Image reference to write as a Docker `RepoTags` entry.
75    pub reference: String,
76    /// Image config fields.
77    pub config: ImageSaveConfig,
78    /// Raw image config JSON to preserve non-runtime metadata on export.
79    pub raw_config_json: String,
80    /// Ordered layer list, bottom-to-top.
81    pub layers: Vec<ImageSaveLayer>,
82}
83
84/// Config fields used when synthesizing an exported Docker image config.
85#[derive(Debug, Clone, Default)]
86pub struct ImageSaveConfig {
87    /// Target architecture.
88    pub architecture: Option<String>,
89    /// Target OS.
90    pub os: Option<String>,
91    /// Environment variables.
92    pub env: Vec<String>,
93    /// Entrypoint.
94    pub entrypoint: Option<Vec<String>>,
95    /// Command.
96    pub cmd: Option<Vec<String>>,
97    /// Working directory.
98    pub working_dir: Option<String>,
99    /// User.
100    pub user: Option<String>,
101    /// Labels.
102    pub labels: BTreeMap<String, String>,
103}
104
105/// Layer data used when exporting an image.
106#[derive(Debug, Clone)]
107pub struct ImageSaveLayer {
108    /// Original cached layer diff ID.
109    pub diff_id: String,
110}
111
112#[derive(Debug)]
113struct PreparedLoadedImage {
114    reference: String,
115    metadata: CachedImageMetadata,
116}
117
118#[derive(Debug)]
119struct PreparedArchiveLoad {
120    images: Vec<PreparedLoadedImage>,
121    staged_layers: HashMap<String, PathBuf>,
122}
123
124#[derive(Debug)]
125struct StagedLayerGuard {
126    paths: HashMap<String, PathBuf>,
127    cleanup_on_drop: bool,
128}
129
130#[derive(Debug)]
131struct LayerBlobInfo {
132    digest: String,
133    media_type: String,
134    size_bytes: u64,
135    path: PathBuf,
136}
137
138#[derive(Debug, Deserialize)]
139struct DockerManifestEntry {
140    #[serde(rename = "Config")]
141    config: String,
142    #[serde(rename = "RepoTags")]
143    repo_tags: Option<Vec<String>>,
144    #[serde(rename = "Layers")]
145    layers: Vec<String>,
146}
147
148#[derive(Debug, Serialize)]
149struct DockerManifestOut {
150    #[serde(rename = "Config")]
151    config: String,
152    #[serde(rename = "RepoTags")]
153    repo_tags: Vec<String>,
154    #[serde(rename = "Layers")]
155    layers: Vec<String>,
156}
157
158#[derive(Debug)]
159struct GeneratedLayer {
160    diff_id: String,
161    hex: String,
162    path: PathBuf,
163    size: u64,
164}
165
166struct DigestingWriter<W> {
167    inner: W,
168    hasher: Sha256,
169    written: u64,
170}
171
172//--------------------------------------------------------------------------------------------------
173// Methods
174//--------------------------------------------------------------------------------------------------
175
176impl<W> DigestingWriter<W> {
177    fn new(inner: W) -> Self {
178        Self {
179            inner,
180            hasher: Sha256::new(),
181            written: 0,
182        }
183    }
184
185    fn finish(self) -> (W, String, u64) {
186        (
187            self.inner,
188            hex::encode(self.hasher.finalize()),
189            self.written,
190        )
191    }
192}
193
194impl StagedLayerGuard {
195    fn new() -> Self {
196        Self {
197            paths: HashMap::new(),
198            cleanup_on_drop: true,
199        }
200    }
201
202    fn track(&mut self, digest: String, path: PathBuf) -> PathBuf {
203        if let Some(existing_path) = self.paths.get(&digest) {
204            let _ = std::fs::remove_file(&path);
205            return existing_path.clone();
206        }
207
208        self.paths.insert(digest, path.clone());
209        path
210    }
211
212    fn into_inner(mut self) -> HashMap<String, PathBuf> {
213        self.cleanup_on_drop = false;
214        std::mem::take(&mut self.paths)
215    }
216}
217
218//--------------------------------------------------------------------------------------------------
219// Trait Implementations
220//--------------------------------------------------------------------------------------------------
221
222impl<W: Write> Write for DigestingWriter<W> {
223    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
224        let written = self.inner.write(buf)?;
225        self.hasher.update(&buf[..written]);
226        self.written += written as u64;
227        Ok(written)
228    }
229
230    fn flush(&mut self) -> io::Result<()> {
231        self.inner.flush()
232    }
233}
234
235impl Drop for StagedLayerGuard {
236    fn drop(&mut self) {
237        if !self.cleanup_on_drop {
238            return;
239        }
240
241        for path in self.paths.values() {
242            let _ = std::fs::remove_file(path);
243        }
244    }
245}
246
247//--------------------------------------------------------------------------------------------------
248// Functions
249//--------------------------------------------------------------------------------------------------
250
251/// Load a Docker image archive into the microsandbox image cache.
252pub async fn load_archive(
253    cache_dir: &Path,
254    input: &Path,
255    options: ImageLoadOptions,
256) -> ImageResult<Vec<LoadedImage>> {
257    let cache_dir_for_blocking = cache_dir.to_path_buf();
258    let input = input.to_path_buf();
259    let progress = options.progress.clone();
260    let prepared = tokio::task::spawn_blocking(move || {
261        load_archive_blocking(&cache_dir_for_blocking, &input, options)
262    })
263    .await
264    .map_err(|e| ImageError::Io(io::Error::other(e)))??;
265
266    let cache = GlobalCache::new_async(cache_dir).await?;
267    let registry = Registry::new(Platform::host_linux(), cache)?;
268    let PreparedArchiveLoad {
269        images,
270        staged_layers,
271    } = prepared;
272    let cleanup_paths = staged_layers.values().cloned().collect::<Vec<_>>();
273    let staged_layers = Arc::new(staged_layers);
274    let cache = GlobalCache::new_async(cache_dir).await?;
275    let mut loaded = Vec::with_capacity(images.len());
276
277    let result = async {
278        for image in images {
279            let reference: Reference = image
280                .reference
281                .parse()
282                .map_err(|e| ImageError::ManifestParse(format!("invalid image reference: {e}")))?;
283
284            registry
285                .materialize_cached_layers_from_paths(
286                    &reference,
287                    &image.metadata,
288                    false,
289                    Arc::clone(&staged_layers),
290                    progress.clone(),
291                )
292                .await?;
293
294            cache
295                .write_image_metadata_async(&reference, &image.metadata)
296                .await?;
297
298            loaded.push(LoadedImage {
299                reference: image.reference,
300                metadata: image.metadata,
301            });
302        }
303
304        Ok(loaded)
305    }
306    .await;
307
308    for path in cleanup_paths {
309        let _ = tokio::fs::remove_file(path).await;
310    }
311
312    result
313}
314
315/// Save images as a Docker-compatible image archive.
316pub fn save_docker_archive(
317    cache: &GlobalCache,
318    output: &Path,
319    images: &[ImageSaveRequest],
320) -> ImageResult<()> {
321    save_archive(cache, output, images, ImageArchiveFormat::Docker)
322}
323
324/// Save images as a container image archive.
325pub fn save_archive(
326    cache: &GlobalCache,
327    output: &Path,
328    images: &[ImageSaveRequest],
329    format: ImageArchiveFormat,
330) -> ImageResult<()> {
331    match format {
332        ImageArchiveFormat::Docker => save_docker_archive_inner(cache, output, images),
333        ImageArchiveFormat::Oci => save_oci_archive_inner(cache, output, images),
334    }
335}
336
337fn save_docker_archive_inner(
338    cache: &GlobalCache,
339    output: &Path,
340    images: &[ImageSaveRequest],
341) -> ImageResult<()> {
342    if images.is_empty() {
343        return Err(ImageError::ManifestParse(
344            "at least one image reference is required".into(),
345        ));
346    }
347
348    let output_file = File::create(output).map_err(|e| ImageError::Cache {
349        path: output.to_path_buf(),
350        source: e,
351    })?;
352    let mut archive = tar::Builder::new(BufWriter::new(output_file));
353    let mut generated_layers: HashMap<String, GeneratedLayer> = HashMap::new();
354    let mut appended_layers: HashSet<String> = HashSet::new();
355    let mut manifest_entries = Vec::with_capacity(images.len());
356    let mut config_entries = Vec::with_capacity(images.len());
357
358    for image in images {
359        let mut layer_paths = Vec::with_capacity(image.layers.len());
360        let mut regenerated_diff_ids = Vec::with_capacity(image.layers.len());
361
362        for layer in &image.layers {
363            let generated = match generated_layers.get(&layer.diff_id) {
364                Some(generated) => generated,
365                None => {
366                    let generated = generate_layer_tar(cache, layer)?;
367                    generated_layers.insert(layer.diff_id.clone(), generated);
368                    generated_layers.get(&layer.diff_id).unwrap()
369                }
370            };
371
372            regenerated_diff_ids.push(generated.diff_id.clone());
373            layer_paths.push(format!("{}/layer.tar", generated.hex));
374        }
375
376        let config_bytes =
377            docker_config_json(&image.config, &image.raw_config_json, &regenerated_diff_ids)?;
378        let config_hex = sha256_hex(&config_bytes);
379        let config_name = format!("{config_hex}.json");
380
381        config_entries.push((config_name.clone(), config_bytes));
382
383        manifest_entries.push(DockerManifestOut {
384            config: config_name,
385            repo_tags: vec![image.reference.clone()],
386            layers: layer_paths,
387        });
388    }
389
390    let manifest_bytes = serde_json::to_vec_pretty(&manifest_entries)
391        .map_err(|e| ImageError::ConfigParse(format!("serialize docker manifest: {e}")))?;
392    append_bytes(&mut archive, "manifest.json", &manifest_bytes)?;
393
394    for (config_name, config_bytes) in config_entries {
395        append_bytes(&mut archive, &config_name, &config_bytes)?;
396    }
397
398    for image in images {
399        for layer in &image.layers {
400            let generated = generated_layers.get(&layer.diff_id).ok_or_else(|| {
401                ImageError::ManifestParse(format!("missing generated layer {}", layer.diff_id))
402            })?;
403            if appended_layers.insert(generated.hex.clone()) {
404                append_layer_entries(&mut archive, generated)?;
405            }
406        }
407    }
408
409    archive.finish().map_err(ImageError::Io)?;
410
411    for layer in generated_layers.values() {
412        let _ = std::fs::remove_file(&layer.path);
413    }
414
415    Ok(())
416}
417
418fn save_oci_archive_inner(
419    cache: &GlobalCache,
420    output: &Path,
421    images: &[ImageSaveRequest],
422) -> ImageResult<()> {
423    if images.is_empty() {
424        return Err(ImageError::ManifestParse(
425            "at least one image reference is required".into(),
426        ));
427    }
428
429    let output_file = File::create(output).map_err(|e| ImageError::Cache {
430        path: output.to_path_buf(),
431        source: e,
432    })?;
433    let mut archive = tar::Builder::new(BufWriter::new(output_file));
434    let mut generated_layers: HashMap<String, GeneratedLayer> = HashMap::new();
435    let mut appended_metadata_blobs: HashSet<String> = HashSet::new();
436    let mut appended_layer_blobs: HashSet<String> = HashSet::new();
437    let mut layer_blob_order = Vec::new();
438    let mut metadata_blobs = Vec::new();
439    let mut index_manifests = Vec::with_capacity(images.len());
440
441    for image in images {
442        let mut layer_descriptors = Vec::with_capacity(image.layers.len());
443        let mut regenerated_diff_ids = Vec::with_capacity(image.layers.len());
444
445        for layer in &image.layers {
446            let generated = match generated_layers.get(&layer.diff_id) {
447                Some(generated) => generated,
448                None => {
449                    let generated = generate_layer_tar(cache, layer)?;
450                    generated_layers.insert(layer.diff_id.clone(), generated);
451                    generated_layers.get(&layer.diff_id).unwrap()
452                }
453            };
454
455            regenerated_diff_ids.push(generated.diff_id.clone());
456            if appended_layer_blobs.insert(generated.hex.clone()) {
457                layer_blob_order.push(layer.diff_id.clone());
458            }
459            layer_descriptors.push(serde_json::json!({
460                "mediaType": OCI_LAYER_MEDIA_TYPE,
461                "digest": generated.diff_id,
462                "size": generated.size,
463            }));
464        }
465
466        let config_bytes =
467            docker_config_json(&image.config, &image.raw_config_json, &regenerated_diff_ids)?;
468        let config_hex = sha256_hex(&config_bytes);
469        if appended_metadata_blobs.insert(config_hex.clone()) {
470            metadata_blobs.push((config_hex.clone(), config_bytes.clone()));
471        }
472
473        let manifest_bytes = serde_json::to_vec(&serde_json::json!({
474            "schemaVersion": 2,
475            "mediaType": OCI_MANIFEST_MEDIA_TYPE,
476            "config": {
477                "mediaType": OCI_CONFIG_MEDIA_TYPE,
478                "digest": format!("sha256:{config_hex}"),
479                "size": config_bytes.len(),
480            },
481            "layers": layer_descriptors,
482        }))
483        .map_err(|e| ImageError::ManifestParse(format!("serialize OCI manifest: {e}")))?;
484        let manifest_hex = sha256_hex(&manifest_bytes);
485        if appended_metadata_blobs.insert(manifest_hex.clone()) {
486            metadata_blobs.push((manifest_hex.clone(), manifest_bytes.clone()));
487        }
488
489        index_manifests.push(serde_json::json!({
490            "mediaType": OCI_MANIFEST_MEDIA_TYPE,
491            "digest": format!("sha256:{manifest_hex}"),
492            "size": manifest_bytes.len(),
493            "platform": {
494                "architecture": image.config.architecture.as_deref().unwrap_or("amd64"),
495                "os": image.config.os.as_deref().unwrap_or("linux"),
496            },
497            "annotations": {
498                (OCI_REF_NAME_ANNOTATION): image.reference.clone(),
499            },
500        }));
501    }
502
503    let index_bytes = serde_json::to_vec_pretty(&serde_json::json!({
504        "schemaVersion": 2,
505        "mediaType": OCI_INDEX_MEDIA_TYPE,
506        "manifests": index_manifests,
507    }))
508    .map_err(|e| ImageError::ManifestParse(format!("serialize OCI index: {e}")))?;
509
510    append_bytes(
511        &mut archive,
512        "oci-layout",
513        br#"{"imageLayoutVersion":"1.0.0"}"#,
514    )?;
515    append_bytes(&mut archive, "index.json", &index_bytes)?;
516    append_directory(&mut archive, "blobs")?;
517    append_directory(&mut archive, "blobs/sha256")?;
518
519    for (hex, bytes) in metadata_blobs {
520        append_blob_bytes(&mut archive, &hex, &bytes)?;
521    }
522
523    for diff_id in layer_blob_order {
524        let generated = generated_layers.get(&diff_id).ok_or_else(|| {
525            ImageError::ManifestParse(format!("missing generated layer {diff_id}"))
526        })?;
527        append_blob_file(
528            &mut archive,
529            &generated.hex,
530            &generated.path,
531            generated.size,
532        )?;
533    }
534
535    archive.finish().map_err(ImageError::Io)?;
536
537    for layer in generated_layers.values() {
538        let _ = std::fs::remove_file(&layer.path);
539    }
540
541    Ok(())
542}
543
544fn load_archive_blocking(
545    cache_dir: &Path,
546    input: &Path,
547    options: ImageLoadOptions,
548) -> ImageResult<PreparedArchiveLoad> {
549    // A `docker save` archive carries a `manifest.json` (and often an `oci-layout` compat shim). Prefer the Docker path when `manifest.json` is present: it derives the image
550    // name from `RepoTags` and handles the layer layout that `docker save` actually writes. The OCI path is for archives that ship only an OCI layout.
551    if let Some(manifest_json) = read_archive_entry(input, "manifest.json")? {
552        let manifest: Vec<DockerManifestEntry> = serde_json::from_slice(&manifest_json)
553            .map_err(|e| ImageError::ManifestParse(format!("docker manifest.json: {e}")))?;
554        return load_docker_archive_blocking(cache_dir, input, options, manifest);
555    }
556
557    if read_archive_entry(input, "oci-layout")?.is_some() {
558        return load_oci_archive_blocking(cache_dir, input, options);
559    }
560
561    Err(ImageError::ManifestParse(
562        "archive missing manifest.json or oci-layout".into(),
563    ))
564}
565
566fn load_docker_archive_blocking(
567    cache_dir: &Path,
568    input: &Path,
569    options: ImageLoadOptions,
570    manifest: Vec<DockerManifestEntry>,
571) -> ImageResult<PreparedArchiveLoad> {
572    let cache = GlobalCache::new(cache_dir)?;
573    if manifest.is_empty() {
574        return Err(ImageError::ManifestParse(
575            "docker archive manifest is empty".into(),
576        ));
577    }
578
579    let required_configs = manifest
580        .iter()
581        .map(|image| image.config.clone())
582        .collect::<HashSet<_>>();
583    let required_layers = manifest
584        .iter()
585        .flat_map(|image| image.layers.iter().cloned())
586        .collect::<HashSet<_>>();
587
588    // Early cache gate: when this exact image is already materialized, skip staging (and re-hashing) every layer blob. `docker save` names images via RepoTags, so we look up
589    // the cached metadata by reference, confirm the archive's content still matches it (diff_ids -- guards against a rebuilt tag reusing the name), and verify the EROFS/VMDK
590    // artifacts survive. On a hit the cached metadata is reused verbatim, which also keys fsmeta/VMDK by the manifest digest recorded at materialization time -- so a `pull`
591    // followed by a `load` of the same image still hits. Only the small config blob is read (seekably, via `read_archive_entries`); layer bytes are never touched.
592    'early_gate: {
593        let config_blobs = read_archive_entries(input, &required_configs)?;
594        let mut early_images = Vec::new();
595        for (image_index, image) in manifest.iter().enumerate() {
596            let Some(config_bytes) = config_blobs.get(&image.config) else {
597                break 'early_gate;
598            };
599            let (_, diff_ids) = ImageConfig::parse(config_bytes)?;
600            if diff_ids.len() != image.layers.len() {
601                break 'early_gate;
602            }
603            let config_digest = format!("sha256:{}", sha256_hex(config_bytes));
604
605            let mut refs = image
606                .repo_tags
607                .clone()
608                .unwrap_or_default()
609                .into_iter()
610                .filter(|tag| tag != "<none>:<none>")
611                .collect::<Vec<_>>();
612            if image_index == 0 {
613                refs.extend(options.tags.iter().cloned());
614            }
615            refs.sort();
616            refs.dedup();
617            if refs.is_empty() {
618                break 'early_gate;
619            }
620
621            // Cached metadata under any of this image's refs whose recorded content (diff_ids) still equals the archive's.
622            let mut cached = None;
623            for reference in &refs {
624                let Ok(parsed) = reference.parse::<Reference>() else {
625                    break 'early_gate;
626                };
627                if let Some(metadata) = cache.read_image_metadata(&parsed)? {
628                    let cached_diff_ids = metadata
629                        .layers
630                        .iter()
631                        .map(|layer| layer.diff_id.clone())
632                        .collect::<Vec<_>>();
633                    if metadata.config_digest == config_digest && cached_diff_ids == diff_ids {
634                        cached = Some(metadata);
635                        break;
636                    }
637                }
638            }
639            let Some(metadata) = cached else {
640                break 'early_gate;
641            };
642
643            let Ok(manifest_digest) = metadata.manifest_digest.parse::<crate::Digest>() else {
644                break 'early_gate;
645            };
646            if !crate::cache::is_valid_erofs_artifact(&cache.fsmeta_erofs_path(&manifest_digest))
647                || !cache.vmdk_path(&manifest_digest).exists()
648            {
649                break 'early_gate;
650            }
651            let mut layers_present = true;
652            for diff_id_str in &diff_ids {
653                let Ok(diff_id) = diff_id_str.parse::<crate::Digest>() else {
654                    layers_present = false;
655                    break;
656                };
657                if !crate::cache::is_valid_erofs_artifact(&cache.layer_erofs_path(&diff_id)) {
658                    layers_present = false;
659                    break;
660                }
661            }
662            if !layers_present {
663                break 'early_gate;
664            }
665
666            for reference in refs {
667                early_images.push(PreparedLoadedImage {
668                    reference,
669                    metadata: metadata.clone(),
670                });
671            }
672        }
673        if !archive_contains_entries(input, &required_layers)? {
674            break 'early_gate;
675        }
676        return Ok(PreparedArchiveLoad {
677            images: early_images,
678            staged_layers: HashMap::new(),
679        });
680    }
681
682    let file = File::open(input).map_err(|e| ImageError::Cache {
683        path: input.to_path_buf(),
684        source: e,
685    })?;
686    let mut archive = tar::Archive::new(file);
687    let mut configs: HashMap<String, Vec<u8>> = HashMap::new();
688    let mut layers: HashMap<String, LayerBlobInfo> = HashMap::new();
689    let mut staged_layers = StagedLayerGuard::new();
690    let mut temp_counter = 0u64;
691    let mut entry_count = 0u64;
692
693    for entry in archive.entries().map_err(ImageError::Io)? {
694        let mut entry = entry.map_err(ImageError::Io)?;
695        entry_count += 1;
696        enforce_archive_entry_count(entry_count)?;
697        let path = normalized_archive_path(&entry)?;
698
699        if required_configs.contains(&path) {
700            let data = read_entry_to_vec(&mut entry, &path, ARCHIVE_METADATA_MAX_BYTES)?;
701            configs.insert(path, data);
702            continue;
703        }
704
705        if required_layers.contains(&path) {
706            let mut info = extract_layer_blob(&cache, &path, &mut entry, temp_counter)?;
707            temp_counter += 1;
708            info.path = staged_layers.track(info.digest.clone(), info.path);
709            verify_docker_layer_path_digest(&path, &info.digest)?;
710            layers.insert(path, info);
711            continue;
712        }
713    }
714
715    let mut loaded = Vec::new();
716    for (image_index, image) in manifest.into_iter().enumerate() {
717        let config_bytes = configs.get(&image.config).ok_or_else(|| {
718            ImageError::ConfigParse(format!("docker archive missing config {}", image.config))
719        })?;
720        let (config, diff_ids) = ImageConfig::parse(config_bytes)?;
721
722        if diff_ids.len() != image.layers.len() {
723            return Err(ImageError::ManifestParse(format!(
724                "layer count mismatch: config has {} diff_ids but archive manifest has {} layers",
725                diff_ids.len(),
726                image.layers.len()
727            )));
728        }
729
730        let config_digest = format!("sha256:{}", sha256_hex(config_bytes));
731        let mut layer_metadata = Vec::with_capacity(image.layers.len());
732        let mut manifest_layers = Vec::with_capacity(image.layers.len());
733
734        for (position, layer_path) in image.layers.iter().enumerate() {
735            let layer = layers.get(layer_path).ok_or_else(|| {
736                ImageError::ManifestParse(format!("docker archive missing layer {layer_path}"))
737            })?;
738            let diff_id = diff_ids[position].clone();
739            layer_metadata.push(CachedLayerMetadata {
740                digest: layer.digest.clone(),
741                media_type: Some(layer.media_type.clone()),
742                size_bytes: Some(layer.size_bytes),
743                diff_id,
744            });
745            manifest_layers.push(serde_json::json!({
746                "mediaType": layer.media_type,
747                "digest": layer.digest,
748                "size": layer.size_bytes,
749            }));
750        }
751
752        let manifest_bytes = serde_json::to_vec(&serde_json::json!({
753            "schemaVersion": 2,
754            "mediaType": OCI_MANIFEST_MEDIA_TYPE,
755            "config": {
756                "mediaType": OCI_CONFIG_MEDIA_TYPE,
757                "digest": config_digest,
758                "size": config_bytes.len(),
759            },
760            "layers": manifest_layers,
761        }))
762        .map_err(|e| ImageError::ManifestParse(format!("serialize manifest: {e}")))?;
763        let manifest_digest = format!("sha256:{}", sha256_hex(&manifest_bytes));
764
765        let metadata = CachedImageMetadata {
766            manifest_digest,
767            config_digest,
768            raw_manifest_json: json_bytes_to_string(&manifest_bytes, "docker manifest")?,
769            raw_config_json: json_bytes_to_string(config_bytes, "docker config")?,
770            config,
771            layers: layer_metadata,
772        };
773
774        let mut refs = image
775            .repo_tags
776            .unwrap_or_default()
777            .into_iter()
778            .filter(|tag| tag != "<none>:<none>")
779            .collect::<Vec<_>>();
780
781        if image_index == 0 {
782            refs.extend(options.tags.iter().cloned());
783        }
784
785        refs.sort();
786        refs.dedup();
787
788        if refs.is_empty() {
789            return Err(ImageError::ManifestParse(
790                "docker archive image has no tags; pass --tag to name it".into(),
791            ));
792        }
793
794        for reference in refs {
795            let _: Reference = reference.parse().map_err(|e| {
796                ImageError::ManifestParse(format!("invalid image reference {reference}: {e}"))
797            })?;
798            loaded.push(PreparedLoadedImage {
799                reference,
800                metadata: metadata.clone(),
801            });
802        }
803    }
804
805    Ok(PreparedArchiveLoad {
806        images: loaded,
807        staged_layers: staged_layers.into_inner(),
808    })
809}
810
811fn load_oci_archive_blocking(
812    cache_dir: &Path,
813    input: &Path,
814    options: ImageLoadOptions,
815) -> ImageResult<PreparedArchiveLoad> {
816    let cache = GlobalCache::new(cache_dir)?;
817    let layout_json = read_archive_entry(input, "oci-layout")?
818        .ok_or_else(|| ImageError::ManifestParse("OCI layout missing oci-layout".into()))?;
819    serde_json::from_slice::<oci_spec::image::OciLayout>(&layout_json)
820        .map_err(|e| ImageError::ManifestParse(format!("oci-layout: {e}")))?;
821
822    let index_json = read_archive_entry(input, "index.json")?
823        .ok_or_else(|| ImageError::ManifestParse("OCI layout missing index.json".into()))?;
824    let index: oci_spec::image::ImageIndex = serde_json::from_slice(&index_json)
825        .map_err(|e| ImageError::ManifestParse(format!("OCI index.json: {e}")))?;
826    let manifest_descriptors = selectable_oci_manifests(index.manifests())?;
827    if manifest_descriptors.is_empty() {
828        return Err(ImageError::ManifestParse(
829            "OCI layout contains no image manifests for the host platform".into(),
830        ));
831    }
832
833    let manifest_paths = manifest_descriptors
834        .iter()
835        .map(|descriptor| blob_path_from_digest(descriptor.digest().as_ref()))
836        .collect::<ImageResult<HashSet<_>>>()?;
837    let manifest_blobs = read_archive_entries(input, &manifest_paths)?;
838    let mut manifests = Vec::with_capacity(manifest_descriptors.len());
839    let mut required_configs = HashSet::new();
840    let mut required_layers = HashSet::new();
841
842    for descriptor in &manifest_descriptors {
843        let manifest_path = blob_path_from_digest(descriptor.digest().as_ref())?;
844        let manifest_bytes = manifest_blobs.get(&manifest_path).ok_or_else(|| {
845            ImageError::ManifestParse(format!("OCI layout missing manifest blob {manifest_path}"))
846        })?;
847        verify_descriptor_blob(descriptor, manifest_bytes)?;
848        let manifest: oci_spec::image::ImageManifest = serde_json::from_slice(manifest_bytes)
849            .map_err(|e| ImageError::ManifestParse(format!("OCI image manifest: {e}")))?;
850
851        required_configs.insert(blob_path_from_digest(manifest.config().digest().as_ref())?);
852        for layer in manifest.layers() {
853            required_layers.insert(blob_path_from_digest(layer.digest().as_ref())?);
854        }
855        manifests.push((descriptor.clone(), manifest, manifest_bytes.clone()));
856    }
857
858    // Fast path: skip re-importing an image that is already fully materialized.
859    //
860    // A `CachedImageMetadata` is fully determined by the small manifest and config blobs — `manifest_digest` is `sha256(manifest_bytes)`; each layer's digest, media type, and
861    // size come from the manifest descriptors; the diff_ids come from the config. So the whole record can be built without opening a single layer blob. When the fsmeta, VMDK,
862    // and every layer EROFS are already cached, return here and never stage (and re-SHA-256) the layer blobs — the work that otherwise dominates a warm-cache load. Any miss
863    // breaks out to the full staging path below.
864    //
865    // Only the small config/manifest blobs are read, seekably (`read_archive_entries` uses `entries_with_seek`), so a hit skips both the ~16 s of layer hashing and the
866    // streaming of layer bytes -- it is truly sub-second.
867    'early_gate: {
868        let config_blobs = read_archive_entries(input, &required_configs)?;
869        let mut early_images = Vec::new();
870        for (image_index, (descriptor, manifest, manifest_bytes)) in manifests.iter().enumerate() {
871            let config_path = blob_path_from_digest(manifest.config().digest().as_ref())?;
872            let Some(config_bytes) = config_blobs.get(&config_path) else {
873                break 'early_gate;
874            };
875            verify_descriptor_blob(manifest.config(), config_bytes)?;
876            let (config, diff_ids) = ImageConfig::parse(config_bytes)?;
877            if diff_ids.len() != manifest.layers().len() {
878                break 'early_gate;
879            }
880            let manifest_digest_str = format!("sha256:{}", sha256_hex(manifest_bytes));
881            let Ok(manifest_digest) = manifest_digest_str.parse::<crate::Digest>() else {
882                break 'early_gate;
883            };
884            if !crate::cache::is_valid_erofs_artifact(&cache.fsmeta_erofs_path(&manifest_digest))
885                || !cache.vmdk_path(&manifest_digest).exists()
886            {
887                break 'early_gate;
888            }
889
890            let mut layer_metadata = Vec::with_capacity(manifest.layers().len());
891            for (position, layer_descriptor) in manifest.layers().iter().enumerate() {
892                let diff_id_str = diff_ids[position].clone();
893                let Ok(diff_id) = diff_id_str.parse::<crate::Digest>() else {
894                    break 'early_gate;
895                };
896                if !crate::cache::is_valid_erofs_artifact(&cache.layer_erofs_path(&diff_id)) {
897                    break 'early_gate;
898                }
899                layer_metadata.push(CachedLayerMetadata {
900                    digest: layer_descriptor.digest().to_string(),
901                    media_type: Some(layer_descriptor.media_type().to_string()),
902                    size_bytes: Some(layer_descriptor.size()),
903                    diff_id: diff_id_str,
904                });
905            }
906
907            let metadata = CachedImageMetadata {
908                manifest_digest: manifest_digest_str,
909                config_digest: manifest.config().digest().to_string(),
910                raw_manifest_json: json_bytes_to_string(manifest_bytes, "OCI manifest")?,
911                raw_config_json: json_bytes_to_string(config_bytes, "OCI config")?,
912                config,
913                layers: layer_metadata,
914            };
915
916            let mut refs = descriptor
917                .annotations()
918                .as_ref()
919                .and_then(|annotations| annotations.get(OCI_REF_NAME_ANNOTATION))
920                .cloned()
921                .into_iter()
922                .collect::<Vec<_>>();
923            if image_index == 0 {
924                refs.extend(options.tags.iter().cloned());
925            }
926            refs.sort();
927            refs.dedup();
928            if refs.is_empty() {
929                break 'early_gate;
930            }
931            for reference in refs {
932                if reference.parse::<Reference>().is_err() {
933                    break 'early_gate;
934                }
935                early_images.push(PreparedLoadedImage {
936                    reference,
937                    metadata: metadata.clone(),
938                });
939            }
940        }
941        if !archive_contains_entries(input, &required_layers)? {
942            break 'early_gate;
943        }
944        return Ok(PreparedArchiveLoad {
945            images: early_images,
946            staged_layers: HashMap::new(),
947        });
948    }
949
950    // Full path: some layer (or fsmeta/VMDK) is missing — stage every layer blob so
951    // the materialize step can build what isn't cached.
952    let file = File::open(input).map_err(|e| ImageError::Cache {
953        path: input.to_path_buf(),
954        source: e,
955    })?;
956    let mut archive = tar::Archive::new(file);
957    let mut configs: HashMap<String, Vec<u8>> = HashMap::new();
958    let mut layers: HashMap<String, LayerBlobInfo> = HashMap::new();
959    let mut staged_layers = StagedLayerGuard::new();
960    let mut temp_counter = 0u64;
961    let mut entry_count = 0u64;
962
963    for entry in archive.entries().map_err(ImageError::Io)? {
964        let mut entry = entry.map_err(ImageError::Io)?;
965        entry_count += 1;
966        enforce_archive_entry_count(entry_count)?;
967        let path = normalized_archive_path(&entry)?;
968
969        if required_configs.contains(&path) {
970            let data = read_entry_to_vec(&mut entry, &path, ARCHIVE_METADATA_MAX_BYTES)?;
971            configs.insert(path, data);
972            continue;
973        }
974
975        if required_layers.contains(&path) {
976            let mut info = extract_layer_blob(&cache, &path, &mut entry, temp_counter)?;
977            temp_counter += 1;
978            info.path = staged_layers.track(info.digest.clone(), info.path);
979            layers.insert(path, info);
980            continue;
981        }
982    }
983
984    let mut loaded = Vec::new();
985    for (image_index, (descriptor, manifest, manifest_bytes)) in manifests.into_iter().enumerate() {
986        let config_path = blob_path_from_digest(manifest.config().digest().as_ref())?;
987        let config_bytes = configs.get(&config_path).ok_or_else(|| {
988            ImageError::ConfigParse(format!("OCI layout missing config blob {config_path}"))
989        })?;
990        verify_descriptor_blob(manifest.config(), config_bytes)?;
991        let (config, diff_ids) = ImageConfig::parse(config_bytes)?;
992
993        if diff_ids.len() != manifest.layers().len() {
994            return Err(ImageError::ManifestParse(format!(
995                "layer count mismatch: config has {} diff_ids but OCI manifest has {} layers",
996                diff_ids.len(),
997                manifest.layers().len()
998            )));
999        }
1000
1001        let mut layer_metadata = Vec::with_capacity(manifest.layers().len());
1002        for (position, layer_descriptor) in manifest.layers().iter().enumerate() {
1003            let layer_path = blob_path_from_digest(layer_descriptor.digest().as_ref())?;
1004            let layer = layers.get(&layer_path).ok_or_else(|| {
1005                ImageError::ManifestParse(format!("OCI layout missing layer blob {layer_path}"))
1006            })?;
1007            verify_layer_descriptor(layer_descriptor, layer)?;
1008            layer_metadata.push(CachedLayerMetadata {
1009                digest: layer.digest.clone(),
1010                media_type: Some(layer.media_type.clone()),
1011                size_bytes: Some(layer.size_bytes),
1012                diff_id: diff_ids[position].clone(),
1013            });
1014        }
1015
1016        let metadata = CachedImageMetadata {
1017            manifest_digest: format!("sha256:{}", sha256_hex(&manifest_bytes)),
1018            config_digest: manifest.config().digest().to_string(),
1019            raw_manifest_json: json_bytes_to_string(&manifest_bytes, "OCI manifest")?,
1020            raw_config_json: json_bytes_to_string(config_bytes, "OCI config")?,
1021            config,
1022            layers: layer_metadata,
1023        };
1024
1025        let mut refs = descriptor
1026            .annotations()
1027            .as_ref()
1028            .and_then(|annotations| annotations.get(OCI_REF_NAME_ANNOTATION))
1029            .cloned()
1030            .into_iter()
1031            .collect::<Vec<_>>();
1032
1033        if image_index == 0 {
1034            refs.extend(options.tags.iter().cloned());
1035        }
1036
1037        refs.sort();
1038        refs.dedup();
1039
1040        if refs.is_empty() {
1041            return Err(ImageError::ManifestParse(
1042                "OCI layout image has no ref.name annotation; pass --tag to name it".into(),
1043            ));
1044        }
1045
1046        for reference in refs {
1047            let _: Reference = reference.parse().map_err(|e| {
1048                ImageError::ManifestParse(format!("invalid image reference {reference}: {e}"))
1049            })?;
1050            loaded.push(PreparedLoadedImage {
1051                reference,
1052                metadata: metadata.clone(),
1053            });
1054        }
1055    }
1056
1057    Ok(PreparedArchiveLoad {
1058        images: loaded,
1059        staged_layers: staged_layers.into_inner(),
1060    })
1061}
1062
1063fn read_archive_entry(input: &Path, wanted_path: &str) -> ImageResult<Option<Vec<u8>>> {
1064    let file = File::open(input).map_err(|e| ImageError::Cache {
1065        path: input.to_path_buf(),
1066        source: e,
1067    })?;
1068    let mut archive = tar::Archive::new(file);
1069    let mut entry_count = 0u64;
1070
1071    // `entries_with_seek` skips over each entry's data via the header size field instead of reading through it. In an OCI `docker save` tar the metadata sits at the tail
1072    // (after the blobs), so this reaches it in O(headers) rather than a full read of every layer blob.
1073    for entry in archive.entries_with_seek().map_err(ImageError::Io)? {
1074        let mut entry = entry.map_err(ImageError::Io)?;
1075        entry_count += 1;
1076        enforce_archive_entry_count(entry_count)?;
1077        let path = normalized_archive_path(&entry)?;
1078        if path != wanted_path {
1079            continue;
1080        }
1081
1082        let data = read_entry_to_vec(&mut entry, &path, ARCHIVE_METADATA_MAX_BYTES)?;
1083        return Ok(Some(data));
1084    }
1085
1086    Ok(None)
1087}
1088
1089fn read_archive_entries(
1090    input: &Path,
1091    wanted_paths: &HashSet<String>,
1092) -> ImageResult<HashMap<String, Vec<u8>>> {
1093    let file = File::open(input).map_err(|e| ImageError::Cache {
1094        path: input.to_path_buf(),
1095        source: e,
1096    })?;
1097    let mut archive = tar::Archive::new(file);
1098    let mut entries = HashMap::new();
1099    let mut entry_count = 0u64;
1100
1101    // Seek past unwanted entry data (see `read_archive_entry`).
1102    for entry in archive.entries_with_seek().map_err(ImageError::Io)? {
1103        let mut entry = entry.map_err(ImageError::Io)?;
1104        entry_count += 1;
1105        enforce_archive_entry_count(entry_count)?;
1106        let path = normalized_archive_path(&entry)?;
1107        if !wanted_paths.contains(&path) {
1108            continue;
1109        }
1110
1111        let data = read_entry_to_vec(&mut entry, &path, ARCHIVE_METADATA_MAX_BYTES)?;
1112        entries.insert(path, data);
1113        if entries.len() == wanted_paths.len() {
1114            break;
1115        }
1116    }
1117
1118    Ok(entries)
1119}
1120
1121fn archive_contains_entries(input: &Path, wanted_paths: &HashSet<String>) -> ImageResult<bool> {
1122    if wanted_paths.is_empty() {
1123        return Ok(true);
1124    }
1125
1126    let file = File::open(input).map_err(|e| ImageError::Cache {
1127        path: input.to_path_buf(),
1128        source: e,
1129    })?;
1130    let mut archive = tar::Archive::new(file);
1131    let mut entries = HashSet::new();
1132    let mut entry_count = 0u64;
1133
1134    // The warm gate trusts cached layer EROFS contents, but the archive still needs
1135    // to contain the layer members it advertises. Header-only scanning preserves the
1136    // warm path's main win: layer bytes are seeked over, not read or re-hashed.
1137    for entry in archive.entries_with_seek().map_err(ImageError::Io)? {
1138        let entry = entry.map_err(ImageError::Io)?;
1139        entry_count += 1;
1140        enforce_archive_entry_count(entry_count)?;
1141        let path = normalized_archive_path(&entry)?;
1142        if wanted_paths.contains(&path) {
1143            entries.insert(path);
1144            if entries.len() == wanted_paths.len() {
1145                return Ok(true);
1146            }
1147        }
1148    }
1149
1150    Ok(false)
1151}
1152
1153fn selectable_oci_manifests(
1154    descriptors: &[oci_spec::image::Descriptor],
1155) -> ImageResult<Vec<oci_spec::image::Descriptor>> {
1156    let host = Platform::host_linux();
1157    let selected = descriptors
1158        .iter()
1159        .filter(|descriptor| is_oci_image_manifest_descriptor(descriptor))
1160        .filter(|descriptor| descriptor_matches_platform(descriptor, &host))
1161        .cloned()
1162        .collect();
1163
1164    Ok(selected)
1165}
1166
1167fn is_oci_image_manifest_descriptor(descriptor: &oci_spec::image::Descriptor) -> bool {
1168    matches!(
1169        descriptor.media_type(),
1170        oci_spec::image::MediaType::ImageManifest
1171    ) || descriptor.media_type().to_string()
1172        == "application/vnd.docker.distribution.manifest.v2+json"
1173}
1174
1175fn descriptor_matches_platform(descriptor: &oci_spec::image::Descriptor, host: &Platform) -> bool {
1176    let Some(platform) = descriptor.platform() else {
1177        return true;
1178    };
1179
1180    if *platform.os() != host.os || *platform.architecture() != host.arch {
1181        return false;
1182    }
1183
1184    match (&host.variant, platform.variant()) {
1185        (Some(host_variant), Some(descriptor_variant)) => host_variant == descriptor_variant,
1186        (Some(_), None) => false,
1187        (None, _) => true,
1188    }
1189}
1190
1191fn blob_path_from_digest(digest: &str) -> ImageResult<String> {
1192    let digest: Digest = digest.parse()?;
1193    Ok(format!("blobs/{}/{}", digest.algorithm(), digest.hex()))
1194}
1195
1196fn verify_descriptor_blob(
1197    descriptor: &oci_spec::image::Descriptor,
1198    bytes: &[u8],
1199) -> ImageResult<()> {
1200    if descriptor.size() != bytes.len() as u64 {
1201        return Err(ImageError::ManifestParse(format!(
1202            "OCI blob {} size mismatch: descriptor has {}, archive has {}",
1203            descriptor.digest(),
1204            descriptor.size(),
1205            bytes.len()
1206        )));
1207    }
1208
1209    verify_digest_bytes(descriptor.digest().as_ref(), bytes)
1210}
1211
1212fn verify_layer_descriptor(
1213    descriptor: &oci_spec::image::Descriptor,
1214    layer: &LayerBlobInfo,
1215) -> ImageResult<()> {
1216    if descriptor.size() != layer.size_bytes {
1217        return Err(ImageError::ManifestParse(format!(
1218            "OCI layer {} size mismatch: descriptor has {}, archive has {}",
1219            descriptor.digest(),
1220            descriptor.size(),
1221            layer.size_bytes
1222        )));
1223    }
1224
1225    if descriptor.digest().to_string() != layer.digest {
1226        return Err(ImageError::ManifestParse(format!(
1227            "OCI layer digest mismatch: descriptor has {}, archive has {}",
1228            descriptor.digest(),
1229            layer.digest
1230        )));
1231    }
1232
1233    Ok(())
1234}
1235
1236fn verify_digest_bytes(digest: &str, bytes: &[u8]) -> ImageResult<()> {
1237    let digest: Digest = digest.parse()?;
1238    if digest.algorithm() != "sha256" {
1239        return Err(ImageError::ManifestParse(format!(
1240            "unsupported OCI digest algorithm: {}",
1241            digest.algorithm()
1242        )));
1243    }
1244
1245    let actual = sha256_hex(bytes);
1246    if actual != digest.hex() {
1247        return Err(ImageError::ManifestParse(format!(
1248            "OCI blob digest mismatch: expected {}, got sha256:{actual}",
1249            digest
1250        )));
1251    }
1252
1253    Ok(())
1254}
1255
1256fn verify_docker_layer_path_digest(path: &str, digest: &str) -> ImageResult<()> {
1257    let Some(hex) = path.strip_prefix("blobs/sha256/") else {
1258        return Ok(());
1259    };
1260    if hex.contains('/') {
1261        return Ok(());
1262    }
1263
1264    let expected = format!("sha256:{hex}");
1265    if expected != digest {
1266        return Err(ImageError::ManifestParse(format!(
1267            "docker archive layer path {path} digest mismatch: expected {expected}, got {digest}"
1268        )));
1269    }
1270
1271    Ok(())
1272}
1273
1274fn create_unique_temp_file(dir: &Path, prefix: &str, suffix: &str) -> ImageResult<(File, PathBuf)> {
1275    for _ in 0..128 {
1276        let id = TEMP_FILE_COUNTER.fetch_add(1, Ordering::Relaxed);
1277        let path = dir.join(format!("{prefix}-{}-{id}{suffix}", std::process::id()));
1278        match OpenOptions::new().write(true).create_new(true).open(&path) {
1279            Ok(file) => return Ok((file, path)),
1280            Err(e) if e.kind() == io::ErrorKind::AlreadyExists => continue,
1281            Err(e) => {
1282                return Err(ImageError::Cache { path, source: e });
1283            }
1284        }
1285    }
1286
1287    Err(ImageError::Cache {
1288        path: dir.to_path_buf(),
1289        source: io::Error::new(
1290            io::ErrorKind::AlreadyExists,
1291            "could not allocate a unique temporary image archive file",
1292        ),
1293    })
1294}
1295
1296fn extract_layer_blob(
1297    cache: &GlobalCache,
1298    path: &str,
1299    entry: &mut tar::Entry<'_, File>,
1300    counter: u64,
1301) -> ImageResult<LayerBlobInfo> {
1302    let declared_size = entry.header().size().map_err(ImageError::Io)?;
1303    if declared_size > ARCHIVE_LAYER_MAX_BYTES {
1304        return Err(ImageError::ManifestParse(format!(
1305            "archive layer {path} is {declared_size} bytes; max is {ARCHIVE_LAYER_MAX_BYTES}"
1306        )));
1307    }
1308
1309    let (mut temp, temp_path) =
1310        create_unique_temp_file(cache.tmp_dir(), &format!("load-{counter}"), ".blob")?;
1311    let result = (|| {
1312        let mut hasher = Sha256::new();
1313        let mut size = 0u64;
1314        let mut magic = Vec::with_capacity(4);
1315        let mut buf = [0u8; 64 * 1024];
1316
1317        loop {
1318            let read = entry.read(&mut buf).map_err(ImageError::Io)?;
1319            if read == 0 {
1320                break;
1321            }
1322            if magic.len() < 4 {
1323                let take = (4 - magic.len()).min(read);
1324                magic.extend_from_slice(&buf[..take]);
1325            }
1326            hasher.update(&buf[..read]);
1327            temp.write_all(&buf[..read])
1328                .map_err(|e| ImageError::Cache {
1329                    path: temp_path.clone(),
1330                    source: e,
1331                })?;
1332            size += read as u64;
1333            if size > ARCHIVE_LAYER_MAX_BYTES {
1334                return Err(ImageError::ManifestParse(format!(
1335                    "archive layer {path} exceeds {ARCHIVE_LAYER_MAX_BYTES} bytes"
1336                )));
1337            }
1338        }
1339        temp.flush().map_err(|e| ImageError::Cache {
1340            path: temp_path.clone(),
1341            source: e,
1342        })?;
1343        drop(temp);
1344
1345        let digest = Digest::new("sha256", hex::encode(hasher.finalize()));
1346        let staged_path = temp_path.clone();
1347
1348        let media_type = match Compression::detect(&magic) {
1349            Compression::None => OCI_LAYER_MEDIA_TYPE,
1350            Compression::Gzip => OCI_LAYER_GZIP_MEDIA_TYPE,
1351            Compression::Zstd => OCI_LAYER_ZSTD_MEDIA_TYPE,
1352        };
1353
1354        tracing::debug!(path, digest = %digest, size, "loaded layer blob from docker archive");
1355
1356        Ok(LayerBlobInfo {
1357            digest: digest.to_string(),
1358            media_type: media_type.to_string(),
1359            size_bytes: size,
1360            path: staged_path,
1361        })
1362    })();
1363
1364    if result.is_err() {
1365        let _ = std::fs::remove_file(&temp_path);
1366    }
1367
1368    result
1369}
1370
1371fn generate_layer_tar(cache: &GlobalCache, layer: &ImageSaveLayer) -> ImageResult<GeneratedLayer> {
1372    let diff_id: Digest = layer.diff_id.parse()?;
1373    let erofs_path = cache.layer_erofs_path(&diff_id);
1374    let file = File::open(&erofs_path).map_err(|e| ImageError::Cache {
1375        path: erofs_path.clone(),
1376        source: e,
1377    })?;
1378    let mut reader = ErofsReader::new(file).map_err(ImageError::Io)?;
1379    let (temp_file, temp_path) = create_unique_temp_file(cache.tmp_dir(), "save", ".layer.tar")?;
1380    let result = (|| {
1381        let digesting = DigestingWriter::new(BufWriter::new(temp_file));
1382        let mut builder = tar::Builder::new(digesting);
1383        let mut hardlinks: HashMap<u32, PathBuf> = HashMap::new();
1384
1385        reader.walk_entries::<ImageError, _>(|reader, entry| {
1386            if entry.path.as_os_str().is_empty() {
1387                return Ok(());
1388            }
1389
1390            if entry.kind == ErofsEntryKind::CharDevice && entry.rdev == Some((0, 0)) {
1391                append_whiteout(&mut builder, &entry)?;
1392                return Ok(());
1393            }
1394
1395            append_erofs_entry(&mut builder, reader, &entry, &mut hardlinks)?;
1396
1397            if entry.kind == ErofsEntryKind::Directory && entry.is_opaque() {
1398                append_opaque_marker(&mut builder, &entry)?;
1399            }
1400            Ok(())
1401        })?;
1402
1403        let digesting = builder.into_inner().map_err(ImageError::Io)?;
1404        let (mut file, hex, size) = digesting.finish();
1405        file.flush().map_err(|e| ImageError::Cache {
1406            path: temp_path.clone(),
1407            source: e,
1408        })?;
1409
1410        Ok(GeneratedLayer {
1411            diff_id: format!("sha256:{hex}"),
1412            hex,
1413            path: temp_path.clone(),
1414            size,
1415        })
1416    })();
1417
1418    if result.is_err() {
1419        let _ = std::fs::remove_file(&temp_path);
1420    }
1421
1422    result
1423}
1424
1425fn append_erofs_entry<W: Write>(
1426    builder: &mut tar::Builder<DigestingWriter<W>>,
1427    reader: &mut ErofsReader,
1428    entry: &crate::erofs::ErofsTreeEntry,
1429    hardlinks: &mut HashMap<u32, PathBuf>,
1430) -> ImageResult<()> {
1431    let mut header = tar::Header::new_gnu();
1432    apply_header_metadata(&mut header, entry);
1433
1434    match entry.kind {
1435        ErofsEntryKind::RegularFile => {
1436            if let Some(first_path) = hardlinks.get(&entry.nid) {
1437                header.set_entry_type(tar::EntryType::Link);
1438                header.set_size(0);
1439                header.set_link_name(first_path).map_err(ImageError::Io)?;
1440                header.set_cksum();
1441                builder
1442                    .append_data(&mut header, &entry.path, io::empty())
1443                    .map_err(ImageError::Io)?;
1444                return Ok(());
1445            }
1446
1447            hardlinks.insert(entry.nid, entry.path.clone());
1448            header.set_entry_type(tar::EntryType::Regular);
1449            header.set_size(entry.size);
1450            header.set_cksum();
1451            let mut data = reader.file_data_reader(entry.nid).map_err(ImageError::Io)?;
1452            builder
1453                .append_data(&mut header, &entry.path, &mut data)
1454                .map_err(ImageError::Io)?;
1455        }
1456        ErofsEntryKind::Directory => {
1457            header.set_entry_type(tar::EntryType::Directory);
1458            header.set_size(0);
1459            header.set_cksum();
1460            builder
1461                .append_data(&mut header, &entry.path, io::empty())
1462                .map_err(ImageError::Io)?;
1463        }
1464        ErofsEntryKind::Symlink => {
1465            header.set_entry_type(tar::EntryType::Symlink);
1466            header.set_size(0);
1467            let target = reader.read_link_by_nid(entry.nid).map_err(ImageError::Io)?;
1468            header
1469                .set_link_name_literal(target)
1470                .map_err(ImageError::Io)?;
1471            header.set_cksum();
1472            builder
1473                .append_data(&mut header, &entry.path, io::empty())
1474                .map_err(ImageError::Io)?;
1475        }
1476        ErofsEntryKind::CharDevice | ErofsEntryKind::BlockDevice => {
1477            header.set_entry_type(if entry.kind == ErofsEntryKind::CharDevice {
1478                tar::EntryType::Char
1479            } else {
1480                tar::EntryType::Block
1481            });
1482            header.set_size(0);
1483            if let Some((major, minor)) = entry.rdev {
1484                header.set_device_major(major).map_err(ImageError::Io)?;
1485                header.set_device_minor(minor).map_err(ImageError::Io)?;
1486            }
1487            header.set_cksum();
1488            builder
1489                .append_data(&mut header, &entry.path, io::empty())
1490                .map_err(ImageError::Io)?;
1491        }
1492        ErofsEntryKind::Fifo => {
1493            header.set_entry_type(tar::EntryType::Fifo);
1494            header.set_size(0);
1495            header.set_cksum();
1496            builder
1497                .append_data(&mut header, &entry.path, io::empty())
1498                .map_err(ImageError::Io)?;
1499        }
1500        ErofsEntryKind::Socket => {
1501            header.set_entry_type(tar::EntryType::new(0o140));
1502            header.set_size(0);
1503            header.set_cksum();
1504            builder
1505                .append_data(&mut header, &entry.path, io::empty())
1506                .map_err(ImageError::Io)?;
1507        }
1508    }
1509
1510    Ok(())
1511}
1512
1513fn append_whiteout<W: Write>(
1514    builder: &mut tar::Builder<DigestingWriter<W>>,
1515    entry: &crate::erofs::ErofsTreeEntry,
1516) -> ImageResult<()> {
1517    let Some(file_name) = entry.path.file_name() else {
1518        return Ok(());
1519    };
1520    let mut path = entry.path.clone();
1521    let mut whiteout_name = b".wh.".to_vec();
1522    whiteout_name.extend_from_slice(os_str_bytes(file_name));
1523    path.set_file_name(os_string_from_vec(whiteout_name).map_err(ImageError::Io)?);
1524    append_empty_file(builder, &path, entry)
1525}
1526
1527fn append_opaque_marker<W: Write>(
1528    builder: &mut tar::Builder<DigestingWriter<W>>,
1529    entry: &crate::erofs::ErofsTreeEntry,
1530) -> ImageResult<()> {
1531    let path = entry.path.join(".wh..wh..opq");
1532    append_empty_file(builder, &path, entry)
1533}
1534
1535fn append_empty_file<W: Write>(
1536    builder: &mut tar::Builder<DigestingWriter<W>>,
1537    path: &Path,
1538    entry: &crate::erofs::ErofsTreeEntry,
1539) -> ImageResult<()> {
1540    let mut header = tar::Header::new_gnu();
1541    apply_header_metadata(&mut header, entry);
1542    header.set_mode(0o000);
1543    header.set_entry_type(tar::EntryType::Regular);
1544    header.set_size(0);
1545    header.set_cksum();
1546    builder
1547        .append_data(&mut header, path, io::empty())
1548        .map_err(ImageError::Io)
1549}
1550
1551fn append_layer_entries<W: Write>(
1552    archive: &mut tar::Builder<W>,
1553    layer: &GeneratedLayer,
1554) -> ImageResult<()> {
1555    append_bytes(archive, &format!("{}/VERSION", layer.hex), b"1.0\n")?;
1556    append_bytes(archive, &format!("{}/json", layer.hex), b"{}")?;
1557
1558    let mut file = File::open(&layer.path).map_err(|e| ImageError::Cache {
1559        path: layer.path.clone(),
1560        source: e,
1561    })?;
1562    let mut header = tar::Header::new_gnu();
1563    header.set_entry_type(tar::EntryType::Regular);
1564    header.set_mode(0o644);
1565    header.set_uid(0);
1566    header.set_gid(0);
1567    header.set_mtime(0);
1568    header.set_size(layer.size);
1569    header.set_cksum();
1570    archive
1571        .append_data(&mut header, format!("{}/layer.tar", layer.hex), &mut file)
1572        .map_err(ImageError::Io)
1573}
1574
1575fn append_blob_file<W: Write>(
1576    archive: &mut tar::Builder<W>,
1577    hex: &str,
1578    path: &Path,
1579    size: u64,
1580) -> ImageResult<()> {
1581    let mut file = File::open(path).map_err(|e| ImageError::Cache {
1582        path: path.to_path_buf(),
1583        source: e,
1584    })?;
1585    let mut header = tar::Header::new_gnu();
1586    header.set_entry_type(tar::EntryType::Regular);
1587    header.set_mode(0o644);
1588    header.set_uid(0);
1589    header.set_gid(0);
1590    header.set_mtime(0);
1591    header.set_size(size);
1592    header.set_cksum();
1593    archive
1594        .append_data(&mut header, format!("blobs/sha256/{hex}"), &mut file)
1595        .map_err(ImageError::Io)
1596}
1597
1598fn append_blob_bytes<W: Write>(
1599    archive: &mut tar::Builder<W>,
1600    hex: &str,
1601    bytes: &[u8],
1602) -> ImageResult<()> {
1603    append_bytes(archive, &format!("blobs/sha256/{hex}"), bytes)
1604}
1605
1606fn append_directory<W: Write>(archive: &mut tar::Builder<W>, path: &str) -> ImageResult<()> {
1607    let mut header = tar::Header::new_gnu();
1608    header.set_entry_type(tar::EntryType::Directory);
1609    header.set_mode(0o755);
1610    header.set_uid(0);
1611    header.set_gid(0);
1612    header.set_mtime(0);
1613    header.set_size(0);
1614    header.set_cksum();
1615    archive
1616        .append_data(&mut header, path, io::empty())
1617        .map_err(ImageError::Io)
1618}
1619
1620fn append_bytes<W: Write>(
1621    archive: &mut tar::Builder<W>,
1622    path: &str,
1623    bytes: &[u8],
1624) -> ImageResult<()> {
1625    let mut header = tar::Header::new_gnu();
1626    header.set_entry_type(tar::EntryType::Regular);
1627    header.set_mode(0o644);
1628    header.set_uid(0);
1629    header.set_gid(0);
1630    header.set_mtime(0);
1631    header.set_size(bytes.len() as u64);
1632    header.set_cksum();
1633    archive
1634        .append_data(&mut header, path, bytes)
1635        .map_err(ImageError::Io)
1636}
1637
1638fn enforce_archive_entry_count(count: u64) -> ImageResult<()> {
1639    if count > ARCHIVE_MAX_ENTRY_COUNT {
1640        return Err(ImageError::ManifestParse(format!(
1641            "archive has more than {ARCHIVE_MAX_ENTRY_COUNT} entries"
1642        )));
1643    }
1644
1645    Ok(())
1646}
1647
1648fn read_entry_to_vec(
1649    entry: &mut tar::Entry<'_, File>,
1650    path: &str,
1651    max_bytes: u64,
1652) -> ImageResult<Vec<u8>> {
1653    let declared_size = entry.header().size().map_err(ImageError::Io)?;
1654    if declared_size > max_bytes {
1655        return Err(ImageError::ManifestParse(format!(
1656            "archive metadata entry {path} is {declared_size} bytes; max is {max_bytes}"
1657        )));
1658    }
1659
1660    let mut data = Vec::with_capacity(declared_size as usize);
1661    entry.read_to_end(&mut data).map_err(ImageError::Io)?;
1662    Ok(data)
1663}
1664
1665fn json_bytes_to_string(bytes: &[u8], context: &str) -> ImageResult<String> {
1666    std::str::from_utf8(bytes)
1667        .map(str::to_owned)
1668        .map_err(|e| ImageError::ConfigParse(format!("{context} is not UTF-8 JSON: {e}")))
1669}
1670
1671fn docker_config_json(
1672    config: &ImageSaveConfig,
1673    raw_config_json: &str,
1674    diff_ids: &[String],
1675) -> ImageResult<Vec<u8>> {
1676    if !raw_config_json.is_empty() {
1677        let mut config_json: serde_json::Value = serde_json::from_str(raw_config_json)
1678            .map_err(|e| ImageError::ConfigParse(format!("parse raw image config: {e}")))?;
1679        let Some(object) = config_json.as_object_mut() else {
1680            return Err(ImageError::ConfigParse(
1681                "raw image config JSON is not an object".into(),
1682            ));
1683        };
1684        object.insert(
1685            "rootfs".into(),
1686            serde_json::json!({
1687                "type": "layers",
1688                "diff_ids": diff_ids,
1689            }),
1690        );
1691        object.entry("architecture").or_insert_with(|| {
1692            serde_json::json!(config.architecture.as_deref().unwrap_or("amd64"))
1693        });
1694        object
1695            .entry("os")
1696            .or_insert_with(|| serde_json::json!(config.os.as_deref().unwrap_or("linux")));
1697        return serde_json::to_vec(&config_json)
1698            .map_err(|e| ImageError::ConfigParse(format!("serialize image config: {e}")));
1699    }
1700
1701    let config_json = serde_json::json!({
1702        "architecture": config.architecture.as_deref().unwrap_or("amd64"),
1703        "os": config.os.as_deref().unwrap_or("linux"),
1704        "config": {
1705            "Env": config.env,
1706            "Entrypoint": config.entrypoint,
1707            "Cmd": config.cmd,
1708            "WorkingDir": config.working_dir,
1709            "User": config.user,
1710            "Labels": if config.labels.is_empty() {
1711                serde_json::Value::Null
1712            } else {
1713                serde_json::to_value(&config.labels)
1714                    .map_err(|e| ImageError::ConfigParse(format!("serialize labels: {e}")))?
1715            },
1716        },
1717        "rootfs": {
1718            "type": "layers",
1719            "diff_ids": diff_ids,
1720        },
1721        "history": diff_ids
1722            .iter()
1723            .map(|_| serde_json::json!({"created_by": "microsandbox image save"}))
1724            .collect::<Vec<_>>(),
1725    });
1726
1727    serde_json::to_vec(&config_json)
1728        .map_err(|e| ImageError::ConfigParse(format!("serialize image config: {e}")))
1729}
1730
1731fn apply_header_metadata(header: &mut tar::Header, entry: &crate::erofs::ErofsTreeEntry) {
1732    header.set_mode((entry.metadata.mode & 0o7777) as u32);
1733    header.set_uid(entry.metadata.uid as u64);
1734    header.set_gid(entry.metadata.gid as u64);
1735    header.set_mtime(entry.metadata.mtime);
1736}
1737
1738fn normalized_archive_path(entry: &tar::Entry<'_, File>) -> ImageResult<String> {
1739    let path = entry.path().map_err(ImageError::Io)?;
1740    let bytes = path_bytes(path.as_ref());
1741    let normalized = if let Some(stripped) = bytes.strip_prefix(b"./") {
1742        stripped
1743    } else {
1744        bytes
1745    };
1746    String::from_utf8(normalized.to_vec())
1747        .map_err(|_| ImageError::ManifestParse("archive path is not valid UTF-8".into()))
1748}
1749
1750fn sha256_hex(bytes: &[u8]) -> String {
1751    hex::encode(Sha256::digest(bytes))
1752}
1753
1754//--------------------------------------------------------------------------------------------------
1755// Tests
1756//--------------------------------------------------------------------------------------------------
1757
1758#[cfg(test)]
1759mod tests {
1760    use std::collections::BTreeMap;
1761    use std::io::Cursor;
1762
1763    use tempfile::tempdir;
1764
1765    use super::*;
1766
1767    #[test]
1768    fn docker_archive_load_save_load_roundtrip() {
1769        let runtime = tokio::runtime::Builder::new_current_thread()
1770            .enable_all()
1771            .build()
1772            .unwrap();
1773        let temp = tempdir().unwrap();
1774        let input = temp.path().join("image.tar");
1775        write_test_docker_archive(&input, "tiny:latest");
1776
1777        let first_cache = temp.path().join("cache-1");
1778        let loaded = runtime
1779            .block_on(load_archive(
1780                &first_cache,
1781                &input,
1782                ImageLoadOptions::default(),
1783            ))
1784            .unwrap();
1785
1786        assert_eq!(loaded.len(), 1);
1787        assert_eq!(loaded[0].reference, "tiny:latest");
1788
1789        let saved = temp.path().join("saved.tar");
1790        let request = save_request_from_loaded(&loaded[0]);
1791        let cache = GlobalCache::new(&first_cache).unwrap();
1792        save_docker_archive(&cache, &saved, &[request]).unwrap();
1793
1794        let second_cache = temp.path().join("cache-2");
1795        let reloaded = runtime
1796            .block_on(load_archive(
1797                &second_cache,
1798                &saved,
1799                ImageLoadOptions::default(),
1800            ))
1801            .unwrap();
1802
1803        assert_eq!(reloaded.len(), 1);
1804        assert_eq!(reloaded[0].reference, "tiny:latest");
1805        assert_eq!(
1806            reloaded[0].metadata.config.cmd,
1807            Some(vec!["cat".into(), "/hello.txt".into()])
1808        );
1809    }
1810
1811    #[test]
1812    fn docker_archive_loads_manifest_blob_paths() {
1813        let runtime = tokio::runtime::Builder::new_current_thread()
1814            .enable_all()
1815            .build()
1816            .unwrap();
1817        let temp = tempdir().unwrap();
1818        let input = temp.path().join("blob-paths.tar");
1819        write_test_docker_blob_archive_from_layer(&input, "blob-paths:latest", simple_layer_tar());
1820
1821        let loaded = runtime
1822            .block_on(load_archive(
1823                &temp.path().join("cache"),
1824                &input,
1825                ImageLoadOptions::default(),
1826            ))
1827            .unwrap();
1828
1829        assert_eq!(loaded.len(), 1);
1830        assert_eq!(loaded[0].reference, "blob-paths:latest");
1831        assert_eq!(
1832            loaded[0].metadata.config.cmd,
1833            Some(vec!["cat".into(), "/hello.txt".into()])
1834        );
1835    }
1836
1837    #[test]
1838    fn docker_archive_rejects_mismatched_blob_layer_path() {
1839        let runtime = tokio::runtime::Builder::new_current_thread()
1840            .enable_all()
1841            .build()
1842            .unwrap();
1843        let temp = tempdir().unwrap();
1844        let input = temp.path().join("bad-blob-path.tar");
1845        let layer_bytes = simple_layer_tar();
1846        let diff_id = format!("sha256:{}", sha256_hex(&layer_bytes));
1847        let config_bytes = test_config_bytes(&diff_id);
1848        let config_name = format!("blobs/sha256/{}", sha256_hex(&config_bytes));
1849        let layer_name = format!("blobs/sha256/{:064x}", 1u8);
1850
1851        write_test_docker_archive_entries(
1852            &input,
1853            "bad-blob-path:latest",
1854            config_name,
1855            layer_name,
1856            config_bytes,
1857            layer_bytes,
1858        );
1859
1860        let err = runtime
1861            .block_on(load_archive(
1862                &temp.path().join("cache"),
1863                &input,
1864                ImageLoadOptions::default(),
1865            ))
1866            .unwrap_err();
1867
1868        assert!(err.to_string().contains("digest mismatch"));
1869    }
1870
1871    #[test]
1872    fn oci_layout_archive_load_save_load_roundtrip() {
1873        let runtime = tokio::runtime::Builder::new_current_thread()
1874            .enable_all()
1875            .build()
1876            .unwrap();
1877        let temp = tempdir().unwrap();
1878        let input = temp.path().join("oci-layout.tar");
1879        write_test_oci_archive_from_layer(&input, "oci-layout:latest", simple_layer_tar());
1880
1881        let first_cache = temp.path().join("cache-1");
1882        let loaded = runtime
1883            .block_on(load_archive(
1884                &first_cache,
1885                &input,
1886                ImageLoadOptions::default(),
1887            ))
1888            .unwrap();
1889
1890        assert_eq!(loaded.len(), 1);
1891        assert_eq!(loaded[0].reference, "oci-layout:latest");
1892
1893        let saved = temp.path().join("saved-oci-layout.tar");
1894        let request = save_request_from_loaded(&loaded[0]);
1895        let cache = GlobalCache::new(&first_cache).unwrap();
1896        save_archive(&cache, &saved, &[request], ImageArchiveFormat::Oci).unwrap();
1897
1898        let index_bytes = read_archive_entry(&saved, "index.json").unwrap().unwrap();
1899        let index: oci_spec::image::ImageIndex = serde_json::from_slice(&index_bytes).unwrap();
1900        assert_eq!(index.manifests().len(), 1);
1901        assert_eq!(
1902            index.manifests()[0]
1903                .annotations()
1904                .as_ref()
1905                .unwrap()
1906                .get(OCI_REF_NAME_ANNOTATION),
1907            Some(&"oci-layout:latest".to_string())
1908        );
1909
1910        let second_cache = temp.path().join("cache-2");
1911        let reloaded = runtime
1912            .block_on(load_archive(
1913                &second_cache,
1914                &saved,
1915                ImageLoadOptions::default(),
1916            ))
1917            .unwrap();
1918
1919        assert_eq!(reloaded.len(), 1);
1920        assert_eq!(reloaded[0].reference, "oci-layout:latest");
1921    }
1922
1923    #[test]
1924    fn docker_archive_save_preserves_layer_semantics() {
1925        let runtime = tokio::runtime::Builder::new_current_thread()
1926            .enable_all()
1927            .build()
1928            .unwrap();
1929        let temp = tempdir().unwrap();
1930        let input = temp.path().join("complex.tar");
1931        let layer_bytes = complex_layer_tar();
1932        write_test_docker_archive_from_layer(&input, "complex:latest", layer_bytes);
1933
1934        let first_cache = temp.path().join("cache-1");
1935        let loaded = runtime
1936            .block_on(load_archive(
1937                &first_cache,
1938                &input,
1939                ImageLoadOptions::default(),
1940            ))
1941            .unwrap();
1942
1943        let saved = temp.path().join("saved-complex.tar");
1944        let request = save_request_from_loaded(&loaded[0]);
1945        let cache = GlobalCache::new(&first_cache).unwrap();
1946        save_docker_archive(&cache, &saved, &[request]).unwrap();
1947
1948        let entries = saved_layer_entries(&saved);
1949        let config_entry = entries.get("etc/config.txt").unwrap();
1950        let config_link_entry = entries.get("etc/config.link").unwrap();
1951        let regular_config_paths = [
1952            ("etc/config.txt", config_entry),
1953            ("etc/config.link", config_link_entry),
1954        ]
1955        .into_iter()
1956        .filter(|(_, entry)| entry.entry_type == tar::EntryType::Regular)
1957        .collect::<Vec<_>>();
1958        let hardlink_config_paths = [
1959            ("etc/config.txt", config_entry),
1960            ("etc/config.link", config_link_entry),
1961        ]
1962        .into_iter()
1963        .filter(|(_, entry)| entry.entry_type == tar::EntryType::Link)
1964        .collect::<Vec<_>>();
1965
1966        assert_eq!(regular_config_paths.len(), 1);
1967        assert_eq!(hardlink_config_paths.len(), 1);
1968        assert_eq!(regular_config_paths[0].1.data, b"shared config\n");
1969        assert_eq!(
1970            hardlink_config_paths[0].1.link_name.as_deref(),
1971            Some(regular_config_paths[0].0)
1972        );
1973        assert_eq!(regular_config_paths[0].1.mode, 0o640);
1974        assert_eq!(regular_config_paths[0].1.uid, 1000);
1975        assert_eq!(regular_config_paths[0].1.gid, 1001);
1976        assert_eq!(regular_config_paths[0].1.mtime, 42);
1977
1978        let symlink_entry = entries.get("bin/config").unwrap();
1979        assert_eq!(symlink_entry.entry_type, tar::EntryType::Symlink);
1980        assert_eq!(
1981            symlink_entry.link_name.as_deref(),
1982            Some("../etc/config.txt")
1983        );
1984
1985        let whiteout_entry = entries.get("var/.wh.deleted").unwrap();
1986        assert_eq!(whiteout_entry.entry_type, tar::EntryType::Regular);
1987        assert!(whiteout_entry.data.is_empty());
1988
1989        let opaque_entry = entries.get("cache/.wh..wh..opq").unwrap();
1990        assert_eq!(opaque_entry.entry_type, tar::EntryType::Regular);
1991        assert!(opaque_entry.data.is_empty());
1992
1993        let second_cache = temp.path().join("cache-2");
1994        let reloaded = runtime
1995            .block_on(load_archive(
1996                &second_cache,
1997                &saved,
1998                ImageLoadOptions::default(),
1999            ))
2000            .unwrap();
2001
2002        assert_eq!(reloaded.len(), 1);
2003        assert_eq!(reloaded[0].reference, "complex:latest");
2004    }
2005
2006    #[test]
2007    fn docker_archive_save_preserves_raw_config_fields() {
2008        let runtime = tokio::runtime::Builder::new_current_thread()
2009            .enable_all()
2010            .build()
2011            .unwrap();
2012        let temp = tempdir().unwrap();
2013        let input = temp.path().join("config-fidelity.tar");
2014        let layer_bytes = simple_layer_tar();
2015        let diff_id = format!("sha256:{}", sha256_hex(&layer_bytes));
2016        let config_bytes = serde_json::to_vec(&serde_json::json!({
2017            "architecture": "arm64",
2018            "os": "linux",
2019            "author": "microsandbox-test",
2020            "config": {
2021                "Env": ["PATH=/usr/bin"],
2022                "Cmd": ["cat", "/hello.txt"],
2023            },
2024            "rootfs": {
2025                "type": "layers",
2026                "diff_ids": [diff_id],
2027            },
2028            "history": [{
2029                "created_by": "fixture",
2030                "comment": "keep me",
2031            }],
2032        }))
2033        .unwrap();
2034        let config_name = format!("{}.json", sha256_hex(&config_bytes));
2035
2036        write_test_docker_archive_entries(
2037            &input,
2038            "config-fidelity:latest",
2039            config_name,
2040            "layer/layer.tar".into(),
2041            config_bytes,
2042            layer_bytes,
2043        );
2044
2045        let first_cache = temp.path().join("cache-1");
2046        let loaded = runtime
2047            .block_on(load_archive(
2048                &first_cache,
2049                &input,
2050                ImageLoadOptions::default(),
2051            ))
2052            .unwrap();
2053        let saved = temp.path().join("saved-config-fidelity.tar");
2054        let request = save_request_from_loaded(&loaded[0]);
2055        let cache = GlobalCache::new(&first_cache).unwrap();
2056        save_docker_archive(&cache, &saved, &[request]).unwrap();
2057
2058        let manifest_bytes = read_archive_entry(&saved, "manifest.json")
2059            .unwrap()
2060            .unwrap();
2061        let manifest: Vec<DockerManifestEntry> = serde_json::from_slice(&manifest_bytes).unwrap();
2062        let saved_config = read_archive_entry(&saved, &manifest[0].config)
2063            .unwrap()
2064            .unwrap();
2065        let saved_config: serde_json::Value = serde_json::from_slice(&saved_config).unwrap();
2066
2067        assert_eq!(saved_config["author"], "microsandbox-test");
2068        assert_eq!(saved_config["history"][0]["comment"], "keep me");
2069    }
2070
2071    #[test]
2072    fn oci_load_hits_early_cache_gate_when_materialized() {
2073        let runtime = tokio::runtime::Builder::new_current_thread()
2074            .enable_all()
2075            .build()
2076            .unwrap();
2077        let temp = tempdir().unwrap();
2078        let input = temp.path().join("gate.tar");
2079        write_test_oci_archive_from_layer(&input, "gate:latest", simple_layer_tar());
2080        let cache = temp.path().join("cache");
2081
2082        // First load materializes the image (layer EROFS + fsmeta + VMDK).
2083        let loaded = runtime
2084            .block_on(load_archive(&cache, &input, ImageLoadOptions::default()))
2085            .unwrap();
2086        assert_eq!(loaded.len(), 1);
2087
2088        // A second load must take the early cache gate: the metadata is rebuilt from the manifest + config blobs alone, and no layer blob is staged.
2089        let prepared =
2090            load_oci_archive_blocking(&cache, &input, ImageLoadOptions::default()).unwrap();
2091        assert!(
2092            prepared.staged_layers.is_empty(),
2093            "early cache gate should skip staging layer blobs on a warm cache"
2094        );
2095        assert_eq!(prepared.images.len(), 1);
2096        assert_eq!(prepared.images[0].reference, "gate:latest");
2097        assert_eq!(
2098            prepared.images[0].metadata.manifest_digest,
2099            loaded[0].metadata.manifest_digest,
2100        );
2101    }
2102
2103    #[test]
2104    fn docker_load_hits_early_cache_gate_when_materialized() {
2105        let runtime = tokio::runtime::Builder::new_current_thread()
2106            .enable_all()
2107            .build()
2108            .unwrap();
2109        let temp = tempdir().unwrap();
2110        let input = temp.path().join("gate.tar");
2111        write_test_docker_archive(&input, "gate-docker:latest");
2112        let cache = temp.path().join("cache");
2113
2114        // First load materializes the image and records its metadata by reference.
2115        let loaded = runtime
2116            .block_on(load_archive(&cache, &input, ImageLoadOptions::default()))
2117            .unwrap();
2118        assert_eq!(loaded.len(), 1);
2119
2120        // A second load must take the early cache gate: the cached metadata is found by reference (RepoTags) and no layer blob is staged.
2121        let prepared = load_archive_blocking(&cache, &input, ImageLoadOptions::default()).unwrap();
2122        assert!(
2123            prepared.staged_layers.is_empty(),
2124            "early cache gate should skip staging layer blobs on a warm cache"
2125        );
2126        assert_eq!(prepared.images.len(), 1);
2127        assert_eq!(prepared.images[0].reference, "gate-docker:latest");
2128        assert_eq!(
2129            prepared.images[0].metadata.manifest_digest,
2130            loaded[0].metadata.manifest_digest,
2131        );
2132    }
2133
2134    #[test]
2135    fn docker_load_misses_gate_after_content_change() {
2136        let runtime = tokio::runtime::Builder::new_current_thread()
2137            .enable_all()
2138            .build()
2139            .unwrap();
2140        let temp = tempdir().unwrap();
2141        let cache = temp.path().join("cache");
2142
2143        // Materialize `app:latest` from one layer, recording metadata under that ref.
2144        let first = temp.path().join("first.tar");
2145        write_test_docker_archive_from_layer(&first, "app:latest", simple_layer_tar());
2146        runtime
2147            .block_on(load_archive(&cache, &first, ImageLoadOptions::default()))
2148            .unwrap();
2149
2150        // Rebuild `app:latest` with different content (a new layer) and reload. The diff_ids no longer match the cached metadata, so the gate must fall through to
2151        // staging rather than reuse the stale image.
2152        let second = temp.path().join("second.tar");
2153        write_test_docker_archive_from_layer(&second, "app:latest", complex_layer_tar());
2154        let prepared = load_archive_blocking(&cache, &second, ImageLoadOptions::default()).unwrap();
2155        assert!(
2156            !prepared.staged_layers.is_empty(),
2157            "a rebuilt tag with new content must not hit the gate"
2158        );
2159    }
2160
2161    #[test]
2162    fn docker_load_misses_gate_after_config_change() {
2163        let runtime = tokio::runtime::Builder::new_current_thread()
2164            .enable_all()
2165            .build()
2166            .unwrap();
2167        let temp = tempdir().unwrap();
2168        let cache = temp.path().join("cache");
2169        let layer_bytes = simple_layer_tar();
2170        let diff_id = format!("sha256:{}", sha256_hex(&layer_bytes));
2171
2172        // Materialize the first image under `app:latest`.
2173        let first = temp.path().join("first.tar");
2174        write_test_docker_archive_from_layer(&first, "app:latest", layer_bytes.clone());
2175        runtime
2176            .block_on(load_archive(&cache, &first, ImageLoadOptions::default()))
2177            .unwrap();
2178
2179        // Rebuild the same filesystem layer with a different image config. Diff IDs
2180        // alone would hit the old gate, but the config digest must force a miss.
2181        let second = temp.path().join("second.tar");
2182        let config_bytes = test_config_bytes_with_cmd(&diff_id, &["sh", "-c", "echo changed"]);
2183        write_test_docker_archive_entries(
2184            &second,
2185            "app:latest",
2186            format!("{}.json", sha256_hex(&config_bytes)),
2187            "layer/layer.tar".into(),
2188            config_bytes,
2189            layer_bytes,
2190        );
2191
2192        let prepared = load_archive_blocking(&cache, &second, ImageLoadOptions::default()).unwrap();
2193        assert!(
2194            !prepared.staged_layers.is_empty(),
2195            "a config-only rebuild must not reuse stale cached metadata"
2196        );
2197        let expected_cmd = vec!["sh".into(), "-c".into(), "echo changed".into()];
2198        assert_eq!(
2199            prepared.images[0].metadata.config.cmd.as_ref(),
2200            Some(&expected_cmd)
2201        );
2202    }
2203
2204    #[test]
2205    fn docker_load_rejects_warm_archive_missing_layer_entry() {
2206        let runtime = tokio::runtime::Builder::new_current_thread()
2207            .enable_all()
2208            .build()
2209            .unwrap();
2210        let temp = tempdir().unwrap();
2211        let cache = temp.path().join("cache");
2212        let layer_bytes = simple_layer_tar();
2213
2214        let valid = temp.path().join("valid.tar");
2215        write_test_docker_archive_from_layer(&valid, "app:latest", layer_bytes.clone());
2216        runtime
2217            .block_on(load_archive(&cache, &valid, ImageLoadOptions::default()))
2218            .unwrap();
2219
2220        let missing_layer = temp.path().join("missing-layer.tar");
2221        write_test_docker_archive_without_layer(&missing_layer, "app:latest", layer_bytes);
2222        let err =
2223            load_archive_blocking(&cache, &missing_layer, ImageLoadOptions::default()).unwrap_err();
2224        match err {
2225            ImageError::ManifestParse(message) => {
2226                assert!(message.contains("docker archive missing layer layer/layer.tar"));
2227            }
2228            other => panic!("expected missing layer error, got {other:?}"),
2229        }
2230    }
2231
2232    #[test]
2233    fn oci_load_rejects_warm_archive_missing_layer_entry() {
2234        let runtime = tokio::runtime::Builder::new_current_thread()
2235            .enable_all()
2236            .build()
2237            .unwrap();
2238        let temp = tempdir().unwrap();
2239        let cache = temp.path().join("cache");
2240        let layer_bytes = simple_layer_tar();
2241
2242        let valid = temp.path().join("valid.tar");
2243        write_test_oci_archive_from_layer(&valid, "app:latest", layer_bytes.clone());
2244        runtime
2245            .block_on(load_archive(&cache, &valid, ImageLoadOptions::default()))
2246            .unwrap();
2247
2248        let missing_layer = temp.path().join("missing-layer.tar");
2249        write_test_oci_archive_without_layer(&missing_layer, "app:latest", layer_bytes);
2250        let err = load_oci_archive_blocking(&cache, &missing_layer, ImageLoadOptions::default())
2251            .unwrap_err();
2252        match err {
2253            ImageError::ManifestParse(message) => {
2254                assert!(message.contains("OCI layout missing layer blob"));
2255            }
2256            other => panic!("expected missing layer error, got {other:?}"),
2257        }
2258    }
2259
2260    #[test]
2261    fn oci_load_rejects_warm_archive_with_mismatched_config_blob() {
2262        let runtime = tokio::runtime::Builder::new_current_thread()
2263            .enable_all()
2264            .build()
2265            .unwrap();
2266        let temp = tempdir().unwrap();
2267        let cache = temp.path().join("cache");
2268        let layer_bytes = simple_layer_tar();
2269        let diff_id = format!("sha256:{}", sha256_hex(&layer_bytes));
2270
2271        let valid = temp.path().join("valid.tar");
2272        write_test_oci_archive_from_layer(&valid, "app:latest", layer_bytes.clone());
2273        runtime
2274            .block_on(load_archive(&cache, &valid, ImageLoadOptions::default()))
2275            .unwrap();
2276
2277        let bad_config = test_config_bytes_with_cmd(&diff_id, &["sh", "-c", "echo changed"]);
2278        let corrupt = temp.path().join("corrupt-config.tar");
2279        write_test_oci_archive_with_config_blob(&corrupt, "app:latest", layer_bytes, bad_config);
2280        let err =
2281            load_oci_archive_blocking(&cache, &corrupt, ImageLoadOptions::default()).unwrap_err();
2282        match err {
2283            ImageError::ManifestParse(message) => {
2284                assert!(message.contains("OCI blob sha256:"));
2285            }
2286            other => panic!("expected config descriptor error, got {other:?}"),
2287        }
2288    }
2289
2290    fn write_test_docker_archive(path: &Path, reference: &str) {
2291        write_test_docker_archive_from_layer(path, reference, simple_layer_tar());
2292    }
2293
2294    fn write_test_docker_archive_from_layer(path: &Path, reference: &str, layer_bytes: Vec<u8>) {
2295        let diff_id = format!("sha256:{}", sha256_hex(&layer_bytes));
2296        let config_bytes = test_config_bytes(&diff_id);
2297        let config_name = format!("{}.json", sha256_hex(&config_bytes));
2298
2299        write_test_docker_archive_entries(
2300            path,
2301            reference,
2302            config_name,
2303            "layer/layer.tar".into(),
2304            config_bytes,
2305            layer_bytes,
2306        );
2307    }
2308
2309    fn write_test_docker_archive_without_layer(path: &Path, reference: &str, layer_bytes: Vec<u8>) {
2310        let diff_id = format!("sha256:{}", sha256_hex(&layer_bytes));
2311        let config_bytes = test_config_bytes(&diff_id);
2312        let config_name = format!("{}.json", sha256_hex(&config_bytes));
2313        let layer_name = "layer/layer.tar".to_string();
2314        let manifest_bytes = serde_json::to_vec(&vec![DockerManifestOut {
2315            config: config_name.clone(),
2316            repo_tags: vec![reference.into()],
2317            layers: vec![layer_name],
2318        }])
2319        .unwrap();
2320
2321        let file = File::create(path).unwrap();
2322        let mut archive = tar::Builder::new(file);
2323        append_bytes(&mut archive, &config_name, &config_bytes).unwrap();
2324        append_bytes(&mut archive, "manifest.json", &manifest_bytes).unwrap();
2325        archive.finish().unwrap();
2326    }
2327
2328    fn write_test_docker_blob_archive_from_layer(
2329        path: &Path,
2330        reference: &str,
2331        layer_bytes: Vec<u8>,
2332    ) {
2333        let diff_id = format!("sha256:{}", sha256_hex(&layer_bytes));
2334        let config_bytes = test_config_bytes(&diff_id);
2335        let config_name = format!("blobs/sha256/{}", sha256_hex(&config_bytes));
2336        let layer_name = format!("blobs/sha256/{}", sha256_hex(&layer_bytes));
2337
2338        write_test_docker_archive_entries(
2339            path,
2340            reference,
2341            config_name,
2342            layer_name,
2343            config_bytes,
2344            layer_bytes,
2345        );
2346    }
2347
2348    fn write_test_oci_archive_from_layer(path: &Path, reference: &str, layer_bytes: Vec<u8>) {
2349        let diff_id = format!("sha256:{}", sha256_hex(&layer_bytes));
2350        let config_bytes = test_config_bytes(&diff_id);
2351        let config_hex = sha256_hex(&config_bytes);
2352        let layer_hex = sha256_hex(&layer_bytes);
2353        let manifest_bytes = serde_json::to_vec(&serde_json::json!({
2354            "schemaVersion": 2,
2355            "mediaType": OCI_MANIFEST_MEDIA_TYPE,
2356            "config": {
2357                "mediaType": OCI_CONFIG_MEDIA_TYPE,
2358                "digest": format!("sha256:{config_hex}"),
2359                "size": config_bytes.len(),
2360            },
2361            "layers": [{
2362                "mediaType": OCI_LAYER_MEDIA_TYPE,
2363                "digest": format!("sha256:{layer_hex}"),
2364                "size": layer_bytes.len(),
2365            }],
2366        }))
2367        .unwrap();
2368        let manifest_hex = sha256_hex(&manifest_bytes);
2369        let host = Platform::host_linux();
2370        let index_bytes = serde_json::to_vec(&serde_json::json!({
2371            "schemaVersion": 2,
2372            "mediaType": OCI_INDEX_MEDIA_TYPE,
2373            "manifests": [{
2374                "mediaType": OCI_MANIFEST_MEDIA_TYPE,
2375                "digest": format!("sha256:{manifest_hex}"),
2376                "size": manifest_bytes.len(),
2377                "platform": {
2378                    "architecture": host.arch.to_string(),
2379                    "os": host.os.to_string(),
2380                },
2381                "annotations": {
2382                    (OCI_REF_NAME_ANNOTATION): reference,
2383                },
2384            }],
2385        }))
2386        .unwrap();
2387
2388        let file = File::create(path).unwrap();
2389        let mut archive = tar::Builder::new(file);
2390        append_bytes(
2391            &mut archive,
2392            "oci-layout",
2393            br#"{"imageLayoutVersion":"1.0.0"}"#,
2394        )
2395        .unwrap();
2396        append_bytes(&mut archive, "index.json", &index_bytes).unwrap();
2397        append_bytes(
2398            &mut archive,
2399            &format!("blobs/sha256/{config_hex}"),
2400            &config_bytes,
2401        )
2402        .unwrap();
2403        append_bytes(
2404            &mut archive,
2405            &format!("blobs/sha256/{manifest_hex}"),
2406            &manifest_bytes,
2407        )
2408        .unwrap();
2409        append_bytes(
2410            &mut archive,
2411            &format!("blobs/sha256/{layer_hex}"),
2412            &layer_bytes,
2413        )
2414        .unwrap();
2415        archive.finish().unwrap();
2416    }
2417
2418    fn write_test_oci_archive_without_layer(path: &Path, reference: &str, layer_bytes: Vec<u8>) {
2419        let diff_id = format!("sha256:{}", sha256_hex(&layer_bytes));
2420        let config_bytes = test_config_bytes(&diff_id);
2421        let config_hex = sha256_hex(&config_bytes);
2422        let layer_hex = sha256_hex(&layer_bytes);
2423        let manifest_bytes = serde_json::to_vec(&serde_json::json!({
2424            "schemaVersion": 2,
2425            "mediaType": OCI_MANIFEST_MEDIA_TYPE,
2426            "config": {
2427                "mediaType": OCI_CONFIG_MEDIA_TYPE,
2428                "digest": format!("sha256:{config_hex}"),
2429                "size": config_bytes.len(),
2430            },
2431            "layers": [{
2432                "mediaType": OCI_LAYER_MEDIA_TYPE,
2433                "digest": format!("sha256:{layer_hex}"),
2434                "size": layer_bytes.len(),
2435            }],
2436        }))
2437        .unwrap();
2438        let manifest_hex = sha256_hex(&manifest_bytes);
2439        let index_bytes = test_oci_index_bytes(reference, &manifest_hex, manifest_bytes.len());
2440
2441        let file = File::create(path).unwrap();
2442        let mut archive = tar::Builder::new(file);
2443        append_bytes(
2444            &mut archive,
2445            "oci-layout",
2446            br#"{"imageLayoutVersion":"1.0.0"}"#,
2447        )
2448        .unwrap();
2449        append_bytes(&mut archive, "index.json", &index_bytes).unwrap();
2450        append_bytes(
2451            &mut archive,
2452            &format!("blobs/sha256/{config_hex}"),
2453            &config_bytes,
2454        )
2455        .unwrap();
2456        append_bytes(
2457            &mut archive,
2458            &format!("blobs/sha256/{manifest_hex}"),
2459            &manifest_bytes,
2460        )
2461        .unwrap();
2462        archive.finish().unwrap();
2463    }
2464
2465    fn write_test_oci_archive_with_config_blob(
2466        path: &Path,
2467        reference: &str,
2468        layer_bytes: Vec<u8>,
2469        stored_config_bytes: Vec<u8>,
2470    ) {
2471        let diff_id = format!("sha256:{}", sha256_hex(&layer_bytes));
2472        let manifest_config_bytes = test_config_bytes(&diff_id);
2473        let config_hex = sha256_hex(&manifest_config_bytes);
2474        let layer_hex = sha256_hex(&layer_bytes);
2475        let manifest_bytes = serde_json::to_vec(&serde_json::json!({
2476            "schemaVersion": 2,
2477            "mediaType": OCI_MANIFEST_MEDIA_TYPE,
2478            "config": {
2479                "mediaType": OCI_CONFIG_MEDIA_TYPE,
2480                "digest": format!("sha256:{config_hex}"),
2481                "size": manifest_config_bytes.len(),
2482            },
2483            "layers": [{
2484                "mediaType": OCI_LAYER_MEDIA_TYPE,
2485                "digest": format!("sha256:{layer_hex}"),
2486                "size": layer_bytes.len(),
2487            }],
2488        }))
2489        .unwrap();
2490        let manifest_hex = sha256_hex(&manifest_bytes);
2491        let index_bytes = test_oci_index_bytes(reference, &manifest_hex, manifest_bytes.len());
2492
2493        let file = File::create(path).unwrap();
2494        let mut archive = tar::Builder::new(file);
2495        append_bytes(
2496            &mut archive,
2497            "oci-layout",
2498            br#"{"imageLayoutVersion":"1.0.0"}"#,
2499        )
2500        .unwrap();
2501        append_bytes(&mut archive, "index.json", &index_bytes).unwrap();
2502        append_bytes(
2503            &mut archive,
2504            &format!("blobs/sha256/{config_hex}"),
2505            &stored_config_bytes,
2506        )
2507        .unwrap();
2508        append_bytes(
2509            &mut archive,
2510            &format!("blobs/sha256/{manifest_hex}"),
2511            &manifest_bytes,
2512        )
2513        .unwrap();
2514        append_bytes(
2515            &mut archive,
2516            &format!("blobs/sha256/{layer_hex}"),
2517            &layer_bytes,
2518        )
2519        .unwrap();
2520        archive.finish().unwrap();
2521    }
2522
2523    fn simple_layer_tar() -> Vec<u8> {
2524        let mut layer_bytes = Vec::new();
2525        {
2526            let mut layer = tar::Builder::new(&mut layer_bytes);
2527            let data = b"hello from archive\n";
2528            let mut header = tar::Header::new_gnu();
2529            header.set_entry_type(tar::EntryType::Regular);
2530            header.set_mode(0o644);
2531            header.set_uid(0);
2532            header.set_gid(0);
2533            header.set_mtime(0);
2534            header.set_size(data.len() as u64);
2535            header.set_cksum();
2536            layer
2537                .append_data(&mut header, "hello.txt", Cursor::new(data))
2538                .unwrap();
2539            layer.finish().unwrap();
2540        }
2541
2542        layer_bytes
2543    }
2544
2545    fn test_config_bytes(diff_id: &str) -> Vec<u8> {
2546        test_config_bytes_with_cmd(diff_id, &["cat", "/hello.txt"])
2547    }
2548
2549    fn test_config_bytes_with_cmd(diff_id: &str, cmd: &[&str]) -> Vec<u8> {
2550        serde_json::to_vec(&serde_json::json!({
2551            "architecture": "arm64",
2552            "os": "linux",
2553            "config": {
2554                "Env": ["PATH=/usr/bin"],
2555                "Cmd": cmd,
2556            },
2557            "rootfs": {
2558                "type": "layers",
2559                "diff_ids": [diff_id],
2560            },
2561        }))
2562        .unwrap()
2563    }
2564
2565    fn test_oci_index_bytes(reference: &str, manifest_hex: &str, manifest_len: usize) -> Vec<u8> {
2566        let host = Platform::host_linux();
2567        serde_json::to_vec(&serde_json::json!({
2568            "schemaVersion": 2,
2569            "mediaType": OCI_INDEX_MEDIA_TYPE,
2570            "manifests": [{
2571                "mediaType": OCI_MANIFEST_MEDIA_TYPE,
2572                "digest": format!("sha256:{manifest_hex}"),
2573                "size": manifest_len,
2574                "platform": {
2575                    "architecture": host.arch.to_string(),
2576                    "os": host.os.to_string(),
2577                },
2578                "annotations": {
2579                    (OCI_REF_NAME_ANNOTATION): reference,
2580                },
2581            }],
2582        }))
2583        .unwrap()
2584    }
2585
2586    fn write_test_docker_archive_entries(
2587        path: &Path,
2588        reference: &str,
2589        config_name: String,
2590        layer_name: String,
2591        config_bytes: Vec<u8>,
2592        layer_bytes: Vec<u8>,
2593    ) {
2594        let manifest_bytes = serde_json::to_vec(&vec![DockerManifestOut {
2595            config: config_name.clone(),
2596            repo_tags: vec![reference.into()],
2597            layers: vec![layer_name.clone()],
2598        }])
2599        .unwrap();
2600
2601        let file = File::create(path).unwrap();
2602        let mut archive = tar::Builder::new(file);
2603        append_bytes(&mut archive, &config_name, &config_bytes).unwrap();
2604        append_bytes(&mut archive, "manifest.json", &manifest_bytes).unwrap();
2605
2606        let mut header = tar::Header::new_gnu();
2607        header.set_entry_type(tar::EntryType::Regular);
2608        header.set_mode(0o644);
2609        header.set_uid(0);
2610        header.set_gid(0);
2611        header.set_mtime(0);
2612        header.set_size(layer_bytes.len() as u64);
2613        header.set_cksum();
2614        archive
2615            .append_data(&mut header, layer_name, Cursor::new(layer_bytes))
2616            .unwrap();
2617        archive.finish().unwrap();
2618    }
2619
2620    fn complex_layer_tar() -> Vec<u8> {
2621        let mut layer_bytes = Vec::new();
2622        {
2623            let mut layer = tar::Builder::new(&mut layer_bytes);
2624            append_test_dir(&mut layer, "bin", 0o755, 0, 0, 1);
2625            append_test_dir(&mut layer, "cache", 0o755, 0, 0, 1);
2626            append_test_dir(&mut layer, "etc", 0o755, 0, 0, 1);
2627            append_test_dir(&mut layer, "var", 0o755, 0, 0, 1);
2628            append_test_file(
2629                &mut layer,
2630                "etc/config.txt",
2631                b"shared config\n",
2632                0o640,
2633                1000,
2634                1001,
2635                42,
2636            );
2637            append_test_hardlink(&mut layer, "etc/config.link", "etc/config.txt");
2638            append_test_symlink(&mut layer, "bin/config", "../etc/config.txt");
2639            append_test_file(&mut layer, "var/.wh.deleted", b"", 0o000, 0, 0, 1);
2640            append_test_file(&mut layer, "cache/.wh..wh..opq", b"", 0o000, 0, 0, 1);
2641            layer.finish().unwrap();
2642        }
2643        layer_bytes
2644    }
2645
2646    fn append_test_dir(
2647        layer: &mut tar::Builder<&mut Vec<u8>>,
2648        path: &str,
2649        mode: u32,
2650        uid: u64,
2651        gid: u64,
2652        mtime: u64,
2653    ) {
2654        let mut header = tar::Header::new_gnu();
2655        header.set_entry_type(tar::EntryType::Directory);
2656        header.set_mode(mode);
2657        header.set_uid(uid);
2658        header.set_gid(gid);
2659        header.set_mtime(mtime);
2660        header.set_size(0);
2661        header.set_cksum();
2662        layer.append_data(&mut header, path, io::empty()).unwrap();
2663    }
2664
2665    fn append_test_file(
2666        layer: &mut tar::Builder<&mut Vec<u8>>,
2667        path: &str,
2668        data: &[u8],
2669        mode: u32,
2670        uid: u64,
2671        gid: u64,
2672        mtime: u64,
2673    ) {
2674        let mut header = tar::Header::new_gnu();
2675        header.set_entry_type(tar::EntryType::Regular);
2676        header.set_mode(mode);
2677        header.set_uid(uid);
2678        header.set_gid(gid);
2679        header.set_mtime(mtime);
2680        header.set_size(data.len() as u64);
2681        header.set_cksum();
2682        layer
2683            .append_data(&mut header, path, Cursor::new(data))
2684            .unwrap();
2685    }
2686
2687    fn append_test_hardlink(layer: &mut tar::Builder<&mut Vec<u8>>, path: &str, target: &str) {
2688        let mut header = tar::Header::new_gnu();
2689        header.set_entry_type(tar::EntryType::Link);
2690        header.set_link_name(target).unwrap();
2691        header.set_size(0);
2692        header.set_cksum();
2693        layer.append_data(&mut header, path, io::empty()).unwrap();
2694    }
2695
2696    fn append_test_symlink(layer: &mut tar::Builder<&mut Vec<u8>>, path: &str, target: &str) {
2697        let mut header = tar::Header::new_gnu();
2698        header.set_entry_type(tar::EntryType::Symlink);
2699        header.set_link_name(target).unwrap();
2700        header.set_mode(0o777);
2701        header.set_size(0);
2702        header.set_cksum();
2703        layer.append_data(&mut header, path, io::empty()).unwrap();
2704    }
2705
2706    #[derive(Debug)]
2707    struct SavedLayerEntry {
2708        entry_type: tar::EntryType,
2709        link_name: Option<String>,
2710        mode: u32,
2711        uid: u64,
2712        gid: u64,
2713        mtime: u64,
2714        data: Vec<u8>,
2715    }
2716
2717    fn saved_layer_entries(path: &Path) -> BTreeMap<String, SavedLayerEntry> {
2718        let file = File::open(path).unwrap();
2719        let mut archive = tar::Archive::new(file);
2720        let mut layer_bytes = None;
2721
2722        for entry in archive.entries().unwrap() {
2723            let mut entry = entry.unwrap();
2724            let entry_path = entry.path().unwrap().to_string_lossy().into_owned();
2725            if entry_path.ends_with("/layer.tar") {
2726                assert!(layer_bytes.is_none());
2727                let mut data = Vec::new();
2728                entry.read_to_end(&mut data).unwrap();
2729                layer_bytes = Some(data);
2730            }
2731        }
2732
2733        let layer_bytes = layer_bytes.unwrap();
2734        let mut layer = tar::Archive::new(Cursor::new(layer_bytes));
2735        let mut entries = BTreeMap::new();
2736
2737        for entry in layer.entries().unwrap() {
2738            let mut entry = entry.unwrap();
2739            let path = entry.path().unwrap().to_string_lossy().into_owned();
2740            let header = entry.header();
2741            let entry_type = header.entry_type();
2742            let mode = header.mode().unwrap();
2743            let uid = header.uid().unwrap();
2744            let gid = header.gid().unwrap();
2745            let mtime = header.mtime().unwrap();
2746            let link_name = if matches!(entry_type, tar::EntryType::Link | tar::EntryType::Symlink)
2747            {
2748                Some(String::from_utf8_lossy(entry.link_name_bytes().unwrap().as_ref()).into())
2749            } else {
2750                None
2751            };
2752            let mut data = Vec::new();
2753            entry.read_to_end(&mut data).unwrap();
2754
2755            entries.insert(
2756                path,
2757                SavedLayerEntry {
2758                    entry_type,
2759                    link_name,
2760                    mode,
2761                    uid,
2762                    gid,
2763                    mtime,
2764                    data,
2765                },
2766            );
2767        }
2768
2769        entries
2770    }
2771
2772    fn save_request_from_loaded(image: &LoadedImage) -> ImageSaveRequest {
2773        let host = Platform::host_linux();
2774        ImageSaveRequest {
2775            reference: image.reference.clone(),
2776            config: ImageSaveConfig {
2777                architecture: Some(host.arch.to_string()),
2778                os: Some(host.os.to_string()),
2779                env: image.metadata.config.env.clone(),
2780                entrypoint: image.metadata.config.entrypoint.clone(),
2781                cmd: image.metadata.config.cmd.clone(),
2782                working_dir: image.metadata.config.working_dir.clone(),
2783                user: image.metadata.config.user.clone(),
2784                labels: image
2785                    .metadata
2786                    .config
2787                    .labels
2788                    .iter()
2789                    .map(|(key, value)| (key.clone(), value.clone()))
2790                    .collect(),
2791            },
2792            raw_config_json: image.metadata.raw_config_json.clone(),
2793            layers: image
2794                .metadata
2795                .layers
2796                .iter()
2797                .map(|layer| ImageSaveLayer {
2798                    diff_id: layer.diff_id.clone(),
2799                })
2800                .collect(),
2801        }
2802    }
2803}