zlayer-builder 0.13.0

Dockerfile parsing and buildah-based container image building
Documentation
//! Assemble an OCI **image-layout archive (tar)** — the on-disk format that
//! [`zlayer_registry::import_image`] consumes (`oci-layout` + `index.json` +
//! `blobs/sha256/*`).
//!
//! Build backends that do NOT shell out to `buildah` (the macOS Seatbelt
//! [`crate::backend::SandboxBackend`]) use this to hand a freshly built image to
//! the local-registry import without a `buildah push … oci-archive:` step. The
//! assembly is OS-agnostic so it can be unit-tested off macOS.

use std::path::Path;

use tar::Builder;

use crate::error::{BuildError, Result};

/// One content-addressed blob to place under `blobs/sha256/` in the archive.
pub(crate) struct ArchiveBlob<'a> {
    /// Digest in canonical `sha256:<hex>` form.
    pub digest: &'a str,
    /// Raw blob bytes.
    pub bytes: &'a [u8],
}

/// Write an OCI image-layout archive (tar) to `dest`.
///
/// Produces a single-manifest layout: an `index.json` whose one descriptor is
/// `manifest` (size `manifest_size`, annotation
/// `org.opencontainers.image.ref.name = tag`), plus `blobs/sha256/*` for the
/// `manifest`, `config`, and `layer` blobs, and the `oci-layout` marker. Blocking
/// (fs + tar) — call it from `spawn_blocking`.
//
// Only the macOS `SandboxBackend` (cfg(macos) + `cache` feature) and the
// in-module `#[cfg(test)]` regression call this three-blob wrapper. On Windows
// the module still compiles (the HCS export uses
// `write_oci_image_layout_archive_multi`) but this wrapper has no live caller,
// so silence the resulting dead_code warning without changing the compile set.
#[cfg_attr(target_os = "windows", allow(dead_code))]
pub(crate) fn write_oci_image_layout_archive(
    dest: &Path,
    tag: &str,
    manifest: &ArchiveBlob<'_>,
    manifest_size: i64,
    config: &ArchiveBlob<'_>,
    layer: &ArchiveBlob<'_>,
) -> Result<()> {
    // The single-layer case is the two-blob (config + one layer) special case of
    // the general multi-blob assembler.
    write_oci_image_layout_archive_multi(
        dest,
        tag,
        manifest,
        manifest_size,
        &[
            ArchiveBlob {
                digest: config.digest,
                bytes: config.bytes,
            },
            ArchiveBlob {
                digest: layer.digest,
                bytes: layer.bytes,
            },
        ],
    )
}

/// Write an OCI image-layout archive (tar) to `dest` carrying an arbitrary set of
/// content blobs.
///
/// Same layout as [`write_oci_image_layout_archive`] (`oci-layout` + a
/// single-manifest `index.json` annotated with `tag`), but `blobs` is the full
/// ordered set of `blobs/sha256/*` entries to embed — typically the image config
/// followed by every **non-foreign** layer. WCOW foreign / nondistributable base
/// layers are deliberately NOT included here (their bytes are non-distributable);
/// the manifest blob still references them by digest + `urls[]` so the runtime
/// rehydrates them from MCR, and the importer skips them. De-duplicates blobs by
/// digest so a config/layer that happens to share bytes is only written once.
/// Blocking (fs + tar) — call it from `spawn_blocking`.
pub(crate) fn write_oci_image_layout_archive_multi(
    dest: &Path,
    tag: &str,
    manifest: &ArchiveBlob<'_>,
    manifest_size: i64,
    blobs: &[ArchiveBlob<'_>],
) -> Result<()> {
    let strip = |d: &str| d.strip_prefix("sha256:").unwrap_or(d).to_string();

    // Single-manifest OCI image index. The tag is recorded as the ref.name
    // annotation; `import_image` also accepts an explicit tag override.
    let index = serde_json::json!({
        "schemaVersion": 2,
        "mediaType": "application/vnd.oci.image.index.v1+json",
        "manifests": [{
            "mediaType": "application/vnd.oci.image.manifest.v1+json",
            "digest": manifest.digest,
            "size": manifest_size,
            "annotations": { "org.opencontainers.image.ref.name": tag },
        }],
    });
    let index_bytes = serde_json::to_vec(&index).map_err(|e| {
        BuildError::IoError(std::io::Error::other(format!(
            "failed to serialize OCI index.json: {e}"
        )))
    })?;

    let file = std::fs::File::create(dest).map_err(|e| {
        BuildError::IoError(std::io::Error::new(
            e.kind(),
            format!("failed to create OCI archive {}: {e}", dest.display()),
        ))
    })?;
    let mut ar = Builder::new(file);

    append_entry(&mut ar, "oci-layout", br#"{"imageLayoutVersion":"1.0.0"}"#)?;
    append_entry(&mut ar, "index.json", &index_bytes)?;
    append_entry(
        &mut ar,
        &format!("blobs/sha256/{}", strip(manifest.digest)),
        manifest.bytes,
    )?;

    // A blob path is content-addressed by its digest, so writing the same digest
    // twice would just be a redundant (and tar-noisy) duplicate entry. Track what
    // we've emitted and skip repeats.
    let mut seen = std::collections::HashSet::new();
    for blob in blobs {
        let name = strip(blob.digest);
        if !seen.insert(name.clone()) {
            continue;
        }
        append_entry(&mut ar, &format!("blobs/sha256/{name}"), blob.bytes)?;
    }

    ar.finish().map_err(|e| {
        BuildError::IoError(std::io::Error::new(
            e.kind(),
            format!("failed to finalize OCI archive: {e}"),
        ))
    })?;
    Ok(())
}

/// Append one in-memory file to the tar archive under `name`.
fn append_entry(ar: &mut Builder<std::fs::File>, name: &str, data: &[u8]) -> Result<()> {
    let mut header = tar::Header::new_gnu();
    header.set_size(data.len() as u64);
    header.set_mode(0o644);
    // `append_data` rewrites the path and recomputes the checksum; this keeps the
    // header self-consistent regardless.
    header.set_cksum();
    ar.append_data(&mut header, name, data).map_err(|e| {
        BuildError::IoError(std::io::Error::new(
            e.kind(),
            format!("failed to append {name} to OCI archive: {e}"),
        ))
    })
}

#[cfg(test)]
mod tests {
    use super::*;
    use sha2::{Digest, Sha256};
    use std::io::Read;

    fn digest(bytes: &[u8]) -> String {
        format!("sha256:{:x}", Sha256::digest(bytes))
    }

    /// Regression for the macOS `zlayer build` failure: the sandbox backend must
    /// produce an OCI image-layout archive importable by
    /// `zlayer_registry::import_from_oci_layout` (which keys off `oci-layout`,
    /// reads `index.json` `manifests[0]`, and resolves blobs at
    /// `blobs/sha256/<hex>`) — WITHOUT any buildah involvement.
    #[test]
    fn writes_importable_single_manifest_layout() {
        let layer = b"fake-layer-tar-gz".as_slice();
        let config = br#"{"architecture":"arm64","os":"linux"}"#.as_slice();
        let manifest =
            br#"{"schemaVersion":2,"mediaType":"application/vnd.oci.image.manifest.v1+json"}"#
                .as_slice();
        let (ld, cd, md) = (digest(layer), digest(config), digest(manifest));

        let dir = tempfile::tempdir().unwrap();
        let dest = dir.path().join("img.tar");
        write_oci_image_layout_archive(
            &dest,
            "repro/buildah-fail:latest",
            &ArchiveBlob {
                digest: &md,
                bytes: manifest,
            },
            i64::try_from(manifest.len()).unwrap(),
            &ArchiveBlob {
                digest: &cd,
                bytes: config,
            },
            &ArchiveBlob {
                digest: &ld,
                bytes: layer,
            },
        )
        .unwrap();

        // Read the tar back into a name->bytes map.
        let f = std::fs::File::open(&dest).unwrap();
        let mut ar = tar::Archive::new(f);
        let mut files = std::collections::HashMap::new();
        for entry in ar.entries().unwrap() {
            let mut entry = entry.unwrap();
            let path = entry.path().unwrap().to_string_lossy().to_string();
            let mut buf = Vec::new();
            entry.read_to_end(&mut buf).unwrap();
            files.insert(path, buf);
        }

        // The exact layout `import_from_oci_layout` requires.
        assert_eq!(
            files.get("oci-layout").map(Vec::as_slice),
            Some(&b"{\"imageLayoutVersion\":\"1.0.0\"}"[..])
        );
        let strip = |d: &str| d.strip_prefix("sha256:").unwrap().to_string();
        assert!(files.contains_key(&format!("blobs/sha256/{}", strip(&md))));
        assert!(files.contains_key(&format!("blobs/sha256/{}", strip(&cd))));
        assert!(files.contains_key(&format!("blobs/sha256/{}", strip(&ld))));

        let index: serde_json::Value =
            serde_json::from_slice(files.get("index.json").unwrap()).unwrap();
        let m0 = &index["manifests"][0];
        assert_eq!(m0["digest"], md);
        assert_eq!(m0["size"], i64::try_from(manifest.len()).unwrap());
        assert_eq!(
            m0["annotations"]["org.opencontainers.image.ref.name"],
            "repro/buildah-fail:latest"
        );
    }
}