zlayer_builder/windows_builder.rs
1//! Native WCOW (Windows Container On Windows) image builder.
2//!
3//! Parses a Dockerfile/ZImagefile, pulls the Windows base image via the
4//! registry client, materialises the foreign-layer base via the Windows
5//! unpacker, and prepares a layer chain that subsequent Phase 4 tasks
6//! extend:
7//!
8//! - **4.A**: Dockerfile parse + base image pull + foreign-layer
9//! materialisation. Non-FROM instructions are routed through
10//! [`WindowsBuilder::execute_instruction`].
11//! - **4.B** (this task): RUN execution via a transient HCS compute system
12//! attached to the working layer chain, with a Chocolatey translation
13//! hook for Linux package-manager invocations.
14//! - **4.C** (this task): COPY / ADD writes into the working layer chain
15//! as a new RO layer per instruction (the **per-instruction commit
16//! model**), and config-only instructions (WORKDIR / ENV / ENTRYPOINT /
17//! CMD / USER / EXPOSE / VOLUME / LABEL / SHELL / STOPSIGNAL /
18//! HEALTHCHECK / ONBUILD) accumulate into a typed [`OciImageConfig`]
19//! carried on the [`BuildSkeleton`] for task 4.D to serialise.
20//!
21//! **Layer-commit model**: COPY and ADD each produce ONE new RO layer
22//! on Windows. The alternative "combined scratch" model (let COPY/ADD
23//! write into the same scratch the next RUN sees) is simpler at build
24//! time but produces irregular layer chains where a single RO layer
25//! conflates user-visible operations; per-instruction commits keep the
26//! layer chain 1:1 with Dockerfile instructions, which makes the
27//! emitted OCI manifest (4.D) cleanly auditable and downstream tooling
28//! like `docker history` / `zlayer inspect` produce sensible output.
29//! Off-Windows the model is moot — COPY/ADD still validate sources and
30//! mutate the working tree under `working_layer_chain_dir/<scratch>/`
31//! so unit tests on Linux CI exercise the path-traversal and
32//! tar-extract logic without touching HCS.
33//! - **4.D**: OCI image manifest emission with `os: "windows"` +
34//! `os.version` from the resolved base manifest; preserves foreign-layer
35//! `urls[]`.
36//! - **4.E**: Push via the existing `zlayer-registry` push path.
37//!
38//! ## Architectural template
39//!
40//! Modelled after [`crate::sandbox_builder::SandboxImageBuilder`] — the macOS
41//! Seatbelt builder — which is the project's reference for a native (non-
42//! buildah) Dockerfile-driven image builder. The key shared pattern: reuse
43//! the existing Dockerfile parser ([`crate::dockerfile::Dockerfile`]),
44//! delegate base-image materialisation to a platform-specific helper, and
45//! iterate over [`Instruction`] variants to drive the layer chain.
46//!
47//! ## Relationship to [`crate::backend::hcs::HcsBackend`]
48//!
49//! `HcsBackend` is the existing Windows-only build backend wired into the
50//! `BuildBackend` trait. `WindowsBuilder` is intentionally a parallel,
51//! more granular API that exposes the build pipeline in skeleton form so
52//! Phase 4 follow-up tasks (4.C–4.E) can extend it incrementally without
53//! disturbing the working `HcsBackend`. Once Phase 4 lands, `HcsBackend`
54//! can be retargeted onto `WindowsBuilder` if desired; for now they
55//! co-exist.
56//!
57//! ## Cross-platform compilation
58//!
59//! The data types ([`WindowsBuilder`], [`WindowsBuildConfig`],
60//! [`BuildContext`], [`BuildSkeleton`], [`LayerRef`], [`WindowsLayerEntry`],
61//! [`BaseImageManifest`]) compile on every host so unit tests run on the
62//! CI Linux runners. The actual base-layer materialisation in
63//! [`WindowsBuilder::build_skeleton`] and the HCS-driven RUN execution in
64//! [`WindowsBuilder::execute_instruction`] are gated on
65//! `target_os = "windows"`; on other hosts they return
66//! `BuildError::NotSupported`. Phase 4 follow-up tasks preserve this
67//! gating discipline.
68
69use std::collections::{BTreeMap, HashMap};
70use std::path::{Component, Path, PathBuf};
71
72use chrono::{DateTime, Utc};
73use sha2::{Digest, Sha256};
74
75use crate::dockerfile::{
76 AddInstruction, CopyInstruction, Dockerfile, DockerfileFromTarget, EnvInstruction,
77 ExposeInstruction, ExposeProtocol, HealthcheckInstruction, Instruction, RunInstruction,
78 ShellOrExec,
79};
80use crate::error::{BuildError, Result};
81
82// `RegistryAuth` is re-exported by `zlayer-registry` unconditionally (the
83// underlying `oci-client::secrets::RegistryAuth` is plain Rust with no
84// Windows linkage), so the same import works on every host. We keep
85// `WindowsBuildConfig::registry_auth` cross-platform so the public
86// builder API surfaces the same shape regardless of build target — only
87// the Windows-only base materialisation path actually consumes the
88// credential.
89use zlayer_registry::RegistryAuth;
90
91// ---------------------------------------------------------------------------
92// Public types
93// ---------------------------------------------------------------------------
94
95/// Configuration for the Windows builder.
96///
97/// Mirrors the on-disk + registry parameters required to materialise a
98/// Windows base image. Subsequent Phase 4 tasks (4.C COPY/ADD,
99/// 4.D manifest) thread additional knobs through this struct.
100#[derive(Debug, Clone)]
101pub struct WindowsBuildConfig {
102 /// Build cache root. Per-build subdirectories (`<cache_dir>/<build_id>/`)
103 /// hold the unpacked base layer chain, the working writable layer, and
104 /// any future blob staging area used by 4.D/4.E.
105 pub cache_dir: PathBuf,
106 /// Registry credentials, forwarded verbatim to the registry client when
107 /// pulling the base image and (in 4.E) pushing the final manifest.
108 pub registry_auth: RegistryAuth,
109 /// Target OCI platform string, e.g. `"windows/amd64"`. The registry
110 /// client uses this to resolve a multi-platform index entry to a
111 /// concrete manifest. Defaults to `"windows/amd64"` via
112 /// [`WindowsBuildConfig::default_platform`].
113 pub platform: String,
114 /// Optional override for the tested base image OS build (the
115 /// `os.version` constraint, e.g. `"10.0.20348.2227"`). When `None` the
116 /// platform resolver inherits the `os.version` reported by the pulled
117 /// manifest. Used to pin a specific Windows build family when the host
118 /// kernel demands an exact match.
119 pub os_version_override: Option<String>,
120 /// Scratch-layer size in GiB for per-RUN ephemeral compute systems.
121 /// Zero means "HCS default" (currently `20`).
122 pub scratch_size_gb: u64,
123}
124
125impl WindowsBuildConfig {
126 /// The default target platform string: `"windows/amd64"`. The vast
127 /// majority of Windows container base images are published for this
128 /// platform; `windows/arm64` exists but is not in widespread use yet.
129 #[must_use]
130 pub const fn default_platform() -> &'static str {
131 "windows/amd64"
132 }
133
134 /// Default scratch-layer size (20 GiB), used when
135 /// [`WindowsBuildConfig::scratch_size_gb`] is zero. Matches the
136 /// production `HcsBackend` default so behaviour is consistent across
137 /// the two builders.
138 #[must_use]
139 pub const fn default_scratch_size_gb() -> u64 {
140 20
141 }
142}
143
144/// Inputs to a single build.
145///
146/// One `BuildContext` per `WindowsBuilder::build_skeleton` invocation;
147/// holds the on-disk paths the parser reads (Dockerfile + COPY sources)
148/// plus the build-time `ARG` values and the final image tag.
149#[derive(Debug, Clone)]
150pub struct BuildContext {
151 /// Path to the build root. Everything COPY/ADD reads is resolved
152 /// relative to this directory; the Dockerfile lives here unless
153 /// [`dockerfile_path`](Self::dockerfile_path) gives an absolute path.
154 pub context_dir: PathBuf,
155 /// Path to the Dockerfile, either relative to
156 /// [`context_dir`](Self::context_dir) or absolute. The skeleton uses
157 /// [`Dockerfile::parse`] on the file's contents.
158 pub dockerfile_path: PathBuf,
159 /// Build-time variables (`--build-arg KEY=VALUE`). Stored verbatim and
160 /// applied during Dockerfile variable expansion in later tasks.
161 pub build_args: HashMap<String, String>,
162 /// Final image tag, e.g. `"myapp:latest"`. Recorded for 4.D's manifest
163 /// emission and 4.E's push; the 4.A skeleton holds it but does not act
164 /// on it.
165 pub tag: String,
166 /// Windows LTSC line to target for FROM image rewrites (e.g.
167 /// `"ltsc2022"`, `"ltsc2025"`). When set, the parsed Dockerfile FROM
168 /// reference is run through
169 /// [`crate::windows_image_resolver::rewrite_image_for_windows`] so a
170 /// generic Docker Hub reference (`ubuntu:24.04`, `golang:1.24`, etc.)
171 /// is replaced with the equivalent prebuilt Windows image under
172 /// `ghcr.io/blackleafdigital/zlayer/...`. `None` means the builder
173 /// uses its default (`ltsc2022`).
174 pub ltsc: Option<String>,
175}
176
177/// One base-image layer reference threaded into [`BuildSkeleton`].
178///
179/// Mirrors the subset of an OCI layer descriptor the WCOW builder needs:
180/// the digest (content-addressable hash of the compressed blob), the
181/// media type (drives decompression + foreign-layer detection), the byte
182/// size, and the optional mirror URL list (non-empty for MCR foreign
183/// Windows base layers). Subsequent tasks 4.D/4.E pass these descriptors
184/// through to the final manifest unchanged.
185#[derive(Debug, Clone)]
186pub struct LayerRef {
187 /// `sha256:...` digest of the compressed blob.
188 pub digest: String,
189 /// OCI media type — e.g.
190 /// `application/vnd.docker.image.rootfs.foreign.diff.tar.gzip` for
191 /// MCR foreign Windows layers, or
192 /// `application/vnd.oci.image.layer.v1.tar+gzip` for ordinary OCI
193 /// layers.
194 pub media_type: String,
195 /// Compressed size in bytes (`i64` because that's the type
196 /// `oci-client` uses for descriptor size — keeping the same shape
197 /// avoids lossy casts on round-trip).
198 pub size: i64,
199 /// Optional mirror URLs (foreign layer `urls[]` per the OCI spec).
200 /// Non-empty for MCR Windows base layers; empty for ordinary
201 /// registry-resident layers. Preserved verbatim through to the
202 /// emitted manifest in task 4.D.
203 pub urls: Vec<String>,
204}
205
206/// On-disk reference to one materialised parent layer.
207///
208/// Stored on [`BuildSkeleton`] so subsequent RUN steps can build the
209/// HCS `LayerChain` (child-to-parent order) the storage filter expects.
210/// Cross-platform plain data — the chain is only consumed by
211/// `target_os = "windows"` code, but the type compiles everywhere so
212/// the public API and unit tests stay portable.
213#[derive(Debug, Clone)]
214pub struct WindowsLayerEntry {
215 /// Caller-chosen GUID/uuid for the layer. HCS stores it inside the
216 /// `LayerData` JSON so the storage filter can route opens.
217 pub layer_id: String,
218 /// Absolute path to the layer directory (e.g.
219 /// `<cache_dir>/<build_id>/unpacked/<layer_id>/`).
220 ///
221 /// This is the **HCS-imported read-only layer dir** (materialised VHD +
222 /// `Files/` + `Hives/`), owned by SYSTEM with restrictive ACLs. It is the
223 /// parent-chain handle the storage filter consumes — it is NOT a readable
224 /// source for the layer's gzip blob (the dir holds the unpacked VHD, not
225 /// the compressed tar). Never `fs::read` this for the OCI descriptor bytes;
226 /// use [`blob_path`](Self::blob_path) instead.
227 pub layer_path: PathBuf,
228 /// Absolute path to the plain, process-owned compressed (tar+gzip) layer
229 /// blob file — the exact bytes whose sha256 is the OCI descriptor digest,
230 /// persisted from the in-memory blob the RUN/COPY commit already produced.
231 ///
232 /// `Some` for builder-produced layers (RUN/COPY/ADD); `None` for foreign
233 /// base layers (their bytes are never re-uploaded — the runtime rehydrates
234 /// them from the manifest's `urls[]`). The OCI export
235 /// ([`export_built_image_to_oci_archive`]) and the registry push
236 /// ([`push_impl`]) read THIS file, never [`layer_path`](Self::layer_path),
237 /// which would otherwise hit `ERROR_ACCESS_DENIED` opening the restricted
238 /// HCS layer directory.
239 pub blob_path: Option<PathBuf>,
240}
241
242/// Resolved manifest information for the pulled base image.
243///
244/// Carried into [`BuildSkeleton`] so task 4.D can populate the final
245/// image config's `os` / `os.version` fields without re-pulling the
246/// manifest. Task 4.D also reads [`base_config`](Self::base_config) so the
247/// final image's `config.Env` / `WorkingDir` / `Entrypoint` defaults
248/// inherit from the base.
249#[derive(Debug, Clone)]
250pub struct BaseImageManifest {
251 /// Image reference the base was pulled from, e.g.
252 /// `"mcr.microsoft.com/windows/nanoserver:ltsc2022"`. Preserved for
253 /// diagnostics and for foreign-layer push paths in task 4.E.
254 pub image_ref: String,
255 /// OCI `os` value from the resolved manifest's platform descriptor —
256 /// always `"windows"` for a WCOW build, but stored for symmetry with
257 /// the OCI spec.
258 pub os: String,
259 /// OCI `os.version` from the resolved manifest's platform descriptor
260 /// (e.g. `"10.0.20348.2227"`). Used as the final image's
261 /// `os.version` unless
262 /// [`WindowsBuildConfig::os_version_override`] overrides it. `None`
263 /// when the base manifest omits the field, which is non-conformant
264 /// for Windows but tolerated.
265 pub os_version: Option<String>,
266 /// `arch` from the resolved manifest. Defaults to `"amd64"`.
267 pub arch: String,
268 /// JSON of the base image config blob (`manifest.config`). Stored as
269 /// raw bytes so task 4.D can hand it straight back through
270 /// `serde_json` without forcing this skeleton to take a hard dep on
271 /// `zlayer_registry::image_config::ImageConfig`. Empty when the base
272 /// config could not be fetched (a non-fatal degradation handled in
273 /// task 4.D).
274 pub config_blob: Vec<u8>,
275}
276
277/// OCI image config accumulated during instruction execution.
278///
279/// Config-only Dockerfile instructions (WORKDIR / ENV / ENTRYPOINT / CMD
280/// / USER / EXPOSE / VOLUME / LABEL / SHELL / STOPSIGNAL / HEALTHCHECK)
281/// mutate this struct in-place during 4.C; task 4.D serialises it into
282/// the OCI image config blob alongside the layer descriptors on
283/// [`BuildSkeleton`].
284///
285/// Field shape matches the OCI image-spec
286/// `application/vnd.oci.image.config.v1+json` config object so 4.D's
287/// emission step is a straight `serde_json::to_value` over each field
288/// without remapping.
289#[derive(Debug, Clone, Default)]
290pub struct OciImageConfig {
291 /// `WorkingDir` in the OCI config — the cwd applied to subsequent RUN
292 /// steps and to the final container's process. WORKDIR with a
293 /// relative path resolves against the previous WORKDIR per the
294 /// Dockerfile spec.
295 pub working_dir: Option<String>,
296 /// `Env` in the OCI config — list of `KEY=value` entries.
297 /// Builder-side ENV mutation enforces last-write-wins per key so the
298 /// vector never grows duplicate KEYs.
299 pub env: Vec<String>,
300 /// `Entrypoint` in the OCI config. ENTRYPOINT shell form is rewritten
301 /// to `["cmd", "/c", "<rest>"]` for WCOW; exec form is passed
302 /// through as-is.
303 pub entrypoint: Option<Vec<String>>,
304 /// `Cmd` in the OCI config. Setting ENTRYPOINT resets CMD to `None`
305 /// per the Dockerfile spec.
306 pub cmd: Option<Vec<String>>,
307 /// `User` in the OCI config (e.g. `"ContainerUser"` or
308 /// `"user:group"`).
309 pub user: Option<String>,
310 /// `ExposedPorts` in the OCI config — map of `"<port>/<proto>"` →
311 /// empty object. Stored as `BTreeMap` so the serialised key order is
312 /// deterministic across runs.
313 pub exposed_ports: BTreeMap<String, serde_json::Value>,
314 /// `Volumes` in the OCI config — map of `<path>` → empty object.
315 pub volumes: BTreeMap<String, serde_json::Value>,
316 /// `Labels` in the OCI config — string→string map. Multiple LABEL
317 /// lines merge; later LABEL with the same KEY wins.
318 pub labels: BTreeMap<String, String>,
319 /// `StopSignal` in the OCI config (e.g. `"SIGTERM"`). Carried as-is
320 /// from the STOPSIGNAL instruction.
321 pub stop_signal: Option<String>,
322 /// `Healthcheck` in the OCI config. `None` means no healthcheck was
323 /// configured (and the base image's healthcheck is inherited);
324 /// `HEALTHCHECK NONE` sets this to a sentinel
325 /// [`OciHealthcheck::disabled`].
326 pub healthcheck: Option<OciHealthcheck>,
327 /// `Shell` in the OCI config — list of tokens (`["cmd", "/c"]` for
328 /// the default WCOW shell, or a user override via the SHELL
329 /// instruction).
330 pub shell: Option<Vec<String>>,
331 /// `OnBuild` triggers in the OCI config — list of raw Dockerfile
332 /// instruction text (`"COPY . /app"` etc.). Only matters when this
333 /// image is used as a base; the builder itself never re-executes
334 /// them in the producing build.
335 pub on_build: Vec<String>,
336}
337
338/// OCI healthcheck shape used by [`OciImageConfig`].
339///
340/// The Dockerfile [`HealthcheckInstruction::Check`] variant carries
341/// `Duration` values; we normalise them to OCI's string form
342/// (`"30s"`, `"1m30s"`, …) at the point of capture so 4.D can serialise
343/// the struct without re-formatting. `disabled` corresponds to
344/// `HEALTHCHECK NONE` and round-trips as `Test == ["NONE"]` per the
345/// OCI / Docker convention.
346#[derive(Debug, Clone)]
347pub struct OciHealthcheck {
348 /// Healthcheck command. For `HEALTHCHECK NONE` this is
349 /// `vec!["NONE".to_string()]`; for `HEALTHCHECK CMD …` this is
350 /// `["CMD-SHELL", "<cmd>"]` (shell form) or
351 /// `["CMD", "<arg0>", "<arg1>", …]` (exec form).
352 pub test: Vec<String>,
353 /// `Interval` between consecutive checks (OCI string form).
354 pub interval: Option<String>,
355 /// Per-check `Timeout` (OCI string form).
356 pub timeout: Option<String>,
357 /// `Retries` before transition to unhealthy.
358 pub retries: Option<u32>,
359 /// `StartPeriod` grace before the first check is counted (OCI
360 /// string form).
361 pub start_period: Option<String>,
362}
363
364impl OciHealthcheck {
365 /// Build the sentinel value for `HEALTHCHECK NONE`. Reads cleanly in
366 /// the emit path: `if hc.is_disabled() { "NONE" } else { … }`.
367 #[must_use]
368 pub fn disabled() -> Self {
369 Self {
370 test: vec!["NONE".to_string()],
371 interval: None,
372 timeout: None,
373 retries: None,
374 start_period: None,
375 }
376 }
377
378 /// `true` iff this healthcheck is the sentinel produced by
379 /// [`OciHealthcheck::disabled`].
380 #[must_use]
381 pub fn is_disabled(&self) -> bool {
382 self.test == ["NONE"]
383 }
384}
385
386/// One executed Dockerfile instruction recorded in
387/// [`BuildSkeleton::instruction_log`].
388///
389/// Task 4.D consumes this log to emit the OCI image config's `history`
390/// array — one entry per source-line invocation, with `empty_layer = true`
391/// for config-only instructions (WORKDIR / ENV / etc.) and
392/// `empty_layer = false` for instructions that produced a real layer
393/// (FROM / RUN / COPY / ADD). The `source_line` field holds the
394/// reconstructed Dockerfile text (e.g. `"FROM mcr.microsoft.com/..."`,
395/// `"RUN choco install -y curl"`) so downstream tooling like
396/// `docker history` can render the build provenance.
397#[derive(Debug, Clone)]
398pub struct ExecutedInstruction {
399 /// Reconstructed Dockerfile source line for the instruction, used
400 /// verbatim as the `created_by` value in the emitted OCI history
401 /// entry. Reconstructed from the parsed instruction (rather than
402 /// echoing the original line) so a Dockerfile with continuation
403 /// backslashes or comments collapses to a single canonical form per
404 /// instruction — which is what `docker history` displays.
405 pub source_line: String,
406 /// `true` when this instruction produced a real layer (FROM / RUN /
407 /// COPY / ADD); `false` for config-only instructions. Determines the
408 /// `empty_layer` field on the emitted OCI history entry.
409 pub produced_layer: bool,
410 /// Build-time UTC timestamp captured at execution. Surfaces as
411 /// `created` on the emitted OCI history entry.
412 pub timestamp: DateTime<Utc>,
413}
414
415/// Output of [`WindowsBuilder::build_skeleton`] — the parsed Dockerfile
416/// plus the materialised base layer chain plus the resolved base
417/// manifest.
418///
419/// Consumed by Phase 4 follow-up tasks:
420///
421/// - 4.B (RUN, this task) iterates `parsed_dockerfile.stages[0].instructions`
422/// and for each [`Instruction::Run`] spawns an HCS compute system
423/// attached to the working chain stored in
424/// [`working_chain`](Self::working_chain), captures the diff via
425/// `wclayer::export_layer`, and appends a new
426/// [`LayerRef`] to [`base_layers`](Self::base_layers) plus a new
427/// [`WindowsLayerEntry`] to [`working_chain`](Self::working_chain).
428/// - 4.C (COPY/ADD) writes context files into the working scratch layer
429/// before the next 4.B commit.
430/// - 4.D (manifest) emits the final OCI image with `os: "windows"`,
431/// `os.version` from [`base_manifest`](Self::base_manifest), and the
432/// accumulated `base_layers + post-4.B layers` chain.
433/// - 4.E (push) pipes the 4.D manifest into the registry client.
434#[derive(Debug)]
435pub struct BuildSkeleton {
436 /// Parsed Dockerfile — `parsed_dockerfile.stages[0]` is the single
437 /// stage supported by the skeleton.
438 pub parsed_dockerfile: Dockerfile,
439 /// Base image layers in **base-first** order (matching OCI manifest
440 /// order). Task 4.B appends post-RUN layers to the end of this
441 /// vector.
442 pub base_layers: Vec<LayerRef>,
443 /// Manifest metadata from the resolved base image.
444 pub base_manifest: BaseImageManifest,
445 /// On-disk path to the working layer chain root. On Windows this is
446 /// the directory passed to
447 /// [`zlayer_agent::windows::unpacker::unpack_windows_image`]; on other
448 /// hosts this field is the `<cache_dir>/<build_id>/unpacked/` path
449 /// the skeleton would have used had the build proceeded. Each
450 /// post-RUN read-only layer is staged as a subdirectory here.
451 pub working_layer_chain_dir: PathBuf,
452 /// Materialised layers in **base-first** order. The HCS storage
453 /// filter consumes the reversed (child-to-parent) view; helpers in
454 /// this module do the reversal at the point of use. Populated on
455 /// Windows by `build_skeleton` from the unpacker output and extended
456 /// by each successful RUN step.
457 pub working_chain: Vec<WindowsLayerEntry>,
458 /// OCI image config accumulated by config-only Dockerfile
459 /// instructions (WORKDIR / ENV / ENTRYPOINT / CMD / USER / EXPOSE /
460 /// VOLUME / LABEL / SHELL / STOPSIGNAL / HEALTHCHECK / ONBUILD).
461 /// Task 4.D serialises this into the image config blob.
462 pub image_config: OciImageConfig,
463 /// Per-instruction execution log in build order. The first entry is
464 /// the FROM instruction (recorded by [`WindowsBuilder::build_skeleton`]);
465 /// subsequent entries are appended by
466 /// [`WindowsBuilder::execute_instruction`] in the order it is called.
467 /// Task 4.D ([`WindowsBuilder::emit_image`]) consumes this to emit
468 /// the OCI image config `history` array.
469 pub instruction_log: Vec<ExecutedInstruction>,
470 /// Provisioned toolchain language detected for this base image, e.g.
471 /// `"go"`, `"node"`, `"rust"`, `"python"`, or `None` if the base does
472 /// not carry a prebuilt language toolchain. Populated by
473 /// [`WindowsBuilder::build_image_for_backend`] right after
474 /// `build_skeleton` from [`crate::windows_toolchain::detect_toolchain`].
475 /// Threaded into the [`crate::buildah::DockerfileTranslator`] on each
476 /// RUN so package-manager translation can skip system packages that
477 /// the provisioned toolchain already supplies (e.g. dropping
478 /// `apt install nodejs` when the image already ships a Node
479 /// toolchain). `None` for everything that goes through
480 /// [`WindowsBuilder::build_skeleton`] / [`WindowsBuilder::build_and_push`]
481 /// — those paths predate the toolchain hook and stay on the
482 /// translator's default behaviour.
483 pub provisioned_toolchain_language: Option<String>,
484}
485
486/// Locally-produced layer blob staged on disk for push (task 4.E).
487///
488/// Carries everything 4.E needs to upload a layer to a registry: the
489/// media type to advertise on the descriptor, the digest + size of the
490/// compressed blob, the `diff_id` for the image-config `rootfs.diff_ids`
491/// array, the on-disk path to the blob, and (for foreign base layers
492/// only) the upstream `urls[]` mirror list so the emitted manifest can
493/// point Windows daemons at the original MCR foreign-layer descriptor
494/// instead of forcing the daemon to re-download the bytes from the user's
495/// own registry.
496#[derive(Debug, Clone)]
497pub struct EmittedLayer {
498 /// OCI media type used in the manifest's `layers[].mediaType`.
499 /// For the foreign Windows base layer this is
500 /// `application/vnd.docker.image.rootfs.foreign.diff.tar.gzip` (so
501 /// Windows daemons recognise it as a foreign layer and skip the
502 /// download path); for builder-produced RUN/COPY/ADD layers this is
503 /// `application/vnd.oci.image.layer.v1.tar+gzip`.
504 pub media_type: String,
505 /// `sha256:...` digest of the COMPRESSED (gzipped) tar blob.
506 pub digest: String,
507 /// Compressed size in bytes (matches the descriptor's `size` field).
508 pub size: u64,
509 /// `sha256:...` of the UNCOMPRESSED tar blob — what
510 /// `rootfs.diff_ids[]` references in the image config. For foreign
511 /// base layers this is sourced from the base image config blob; for
512 /// builder-produced layers this is computed at export time.
513 pub diff_id: String,
514 /// On-disk path to the compressed layer blob the registry client
515 /// will upload during the 4.E push. Empty for foreign base layers
516 /// (they're never re-uploaded — the registry rehydrates them from
517 /// `urls[]`).
518 pub local_path: PathBuf,
519 /// For foreign layers: the MCR / mirror URL list preserved verbatim
520 /// from the base manifest's `urls[]`. `None` for builder-produced
521 /// layers.
522 pub urls: Option<Vec<String>>,
523}
524
525/// Final emitted artifact for one image: the OCI manifest blob, the
526/// image config blob, and the descriptor list for every layer the
527/// manifest references.
528///
529/// Produced by [`WindowsBuilder::emit_image`] and consumed by task 4.E's
530/// registry push, which uploads each layer + the config blob and then
531/// PUTs the manifest at `<tag>`.
532#[derive(Debug, Clone)]
533pub struct BuiltImage {
534 /// Image tag for the push (`<repo>:<tag>` or `<host>/<repo>:<tag>`),
535 /// carried verbatim from [`BuildContext::tag`].
536 pub tag: String,
537 /// Serialised image config JSON blob
538 /// (`application/vnd.oci.image.config.v1+json`).
539 pub image_config_blob: Vec<u8>,
540 /// `sha256:...` digest of the image config blob — what the manifest
541 /// references in `config.digest`.
542 pub image_config_digest: String,
543 /// Serialised OCI image manifest JSON blob
544 /// (`application/vnd.oci.image.manifest.v1+json`).
545 pub manifest_blob: Vec<u8>,
546 /// `sha256:...` digest of the manifest blob. Identifies the image on
547 /// the registry; the push step uses it as the manifest reference
548 /// when computing the per-blob upload URL.
549 pub manifest_digest: String,
550 /// Layer descriptors in **base-first** order — the same order as
551 /// `manifest.layers[]`. 4.E iterates this to upload each blob (or
552 /// skips upload for foreign layers).
553 pub layers: Vec<EmittedLayer>,
554}
555
556/// Native WCOW image builder.
557///
558/// Stateless apart from the static [`WindowsBuildConfig`] passed at
559/// construction. Per-build state lives entirely on the stack of
560/// [`WindowsBuilder::build_skeleton`] and inside the returned
561/// [`BuildSkeleton`].
562pub struct WindowsBuilder {
563 config: WindowsBuildConfig,
564}
565
566impl WindowsBuilder {
567 /// Construct a new `WindowsBuilder` with the given configuration.
568 ///
569 /// Does no I/O: the cache directory is lazily created when
570 /// [`build_skeleton`](Self::build_skeleton) first runs.
571 #[must_use]
572 pub fn new(config: WindowsBuildConfig) -> Self {
573 Self { config }
574 }
575
576 /// Borrow the configuration this builder was constructed with.
577 #[must_use]
578 pub fn config(&self) -> &WindowsBuildConfig {
579 &self.config
580 }
581
582 /// Parse the Dockerfile, pull the base image, and materialise the
583 /// foreign-layer chain.
584 ///
585 /// Returns a [`BuildSkeleton`] for downstream Phase 4 tasks to
586 /// extend. Does **not** execute COPY or ADD — those route
587 /// through [`Self::execute_instruction`] and surface
588 /// [`BuildError::NotYetImplemented`] until 4.C lands. RUN is
589 /// implemented here (4.B).
590 ///
591 /// # Errors
592 ///
593 /// - [`BuildError::ContextRead`] when the Dockerfile cannot be read.
594 /// - [`BuildError::DockerfileParse`] on a malformed Dockerfile.
595 /// - [`BuildError::NotSupported`] when the Dockerfile has more than
596 /// one stage (multi-stage WCOW builds are intentionally out of
597 /// scope for the skeleton — Phase 4 follow-up task).
598 /// - [`BuildError::InvalidInstruction`] when the FROM target is
599 /// `scratch` or a stage reference.
600 /// - [`BuildError::RegistryError`] / [`BuildError::IoError`] on
601 /// registry-side failures.
602 /// - [`BuildError::NotSupported`] when invoked on a non-Windows
603 /// host (the base layer materialisation step needs the HCS
604 /// storage filter APIs).
605 pub async fn build_skeleton(&self, ctx: &BuildContext) -> Result<BuildSkeleton> {
606 // 1. Resolve and read the Dockerfile.
607 let dockerfile_path = if ctx.dockerfile_path.is_absolute() {
608 ctx.dockerfile_path.clone()
609 } else {
610 ctx.context_dir.join(&ctx.dockerfile_path)
611 };
612 let dockerfile_text =
613 std::fs::read_to_string(&dockerfile_path).map_err(|e| BuildError::ContextRead {
614 path: dockerfile_path.clone(),
615 source: e,
616 })?;
617
618 // 2. Parse via the existing dockerfile parser.
619 let parsed = Dockerfile::parse(&dockerfile_text)?;
620
621 // 3+. Validate stages, resolve + rewrite FROM, materialise base.
622 self.build_skeleton_with_parsed(parsed, ctx).await
623 }
624
625 /// Variant of [`build_skeleton`](Self::build_skeleton) that takes an
626 /// already-parsed [`Dockerfile`] instead of reading it from disk.
627 /// Used by [`build_image_for_backend`](Self::build_image_for_backend)
628 /// where the parsed AST is already in hand and the dockerfile is not
629 /// guaranteed to exist on the local filesystem in the form the
630 /// caller's `BuildContext::dockerfile_path` claims (e.g. when
631 /// `HcsBackend` is given a `Dockerfile` value directly by a higher
632 /// layer that did its own parse). The bulk of the work — stage
633 /// validation, FROM target resolution, LTSC rewrite, base image
634 /// pull + materialisation — is identical to `build_skeleton`.
635 ///
636 /// # Errors
637 ///
638 /// Same shape as [`build_skeleton`](Self::build_skeleton) minus the
639 /// dockerfile read error (which can't happen because the caller has
640 /// already produced the AST).
641 pub async fn build_skeleton_with_parsed(
642 &self,
643 parsed: Dockerfile,
644 ctx: &BuildContext,
645 ) -> Result<BuildSkeleton> {
646 // 3. Reject multi-stage builds in the 4.A skeleton. The HCS
647 // backend takes the same stance for the same reason —
648 // multi-stage Windows builds require cross-stage COPY which
649 // is part of 4.C.
650 if parsed.stages.is_empty() {
651 return Err(BuildError::InvalidInstruction {
652 instruction: "FROM".to_string(),
653 reason: "Dockerfile has no FROM instruction".to_string(),
654 });
655 }
656 if parsed.stages.len() > 1 {
657 return Err(BuildError::NotSupported {
658 operation: format!(
659 "multi-stage WCOW builds ({} stages) — Phase 4 task 4.A skeleton supports \
660 a single stage; cross-stage COPY arrives in Phase 4 task 4.C",
661 parsed.stages.len()
662 ),
663 });
664 }
665
666 // 4. Resolve the FROM target — only an image reference is valid
667 // for the single-stage WCOW skeleton.
668 let stage = &parsed.stages[0];
669 let mut base_image_ref = match &stage.base_image {
670 DockerfileFromTarget::Image(r) => r.to_string(),
671 DockerfileFromTarget::Stage(name) => {
672 return Err(BuildError::stage_not_found(name));
673 }
674 DockerfileFromTarget::Scratch => {
675 return Err(BuildError::InvalidInstruction {
676 instruction: "FROM scratch".to_string(),
677 reason: "WCOW builds require a Windows base image (HCS cannot run a process \
678 without a kernel + cmd.exe). Use \
679 mcr.microsoft.com/windows/nanoserver:... or .../servercore:..."
680 .to_string(),
681 });
682 }
683 };
684
685 // 4b. Rewrite generic Docker Hub references (`ubuntu:24.04`,
686 // `golang:1.24`, etc.) to the equivalent prebuilt ZLayer Windows
687 // image for the requested LTSC line. Mirrors what
688 // `macos_image_resolver::prebuilt_cache_ref` does for the
689 // macOS sandbox backend. Already-rewritten / non-mapped refs
690 // pass through untouched.
691 let ltsc = ctx.ltsc.as_deref().unwrap_or("ltsc2022");
692 if let Some(rewritten) =
693 crate::windows_image_resolver::rewrite_image_for_windows(&base_image_ref, ltsc)
694 {
695 tracing::debug!(
696 "rewrote FROM {} -> {} (ltsc={})",
697 base_image_ref,
698 rewritten,
699 ltsc
700 );
701 base_image_ref = rewritten;
702 }
703
704 // 5. Allocate the per-build cache subdirectories.
705 let build_id = new_build_id();
706 let build_dir = self.config.cache_dir.join(&build_id);
707 std::fs::create_dir_all(&build_dir).map_err(|e| BuildError::ContextRead {
708 path: build_dir.clone(),
709 source: e,
710 })?;
711 let working_layer_chain_dir = build_dir.join("unpacked");
712
713 // 6. Pull + materialise the base image. The heavy lifting is
714 // Windows-only because it requires HcsImportLayer +
715 // BackupStream / WCIFS plumbing; off-Windows we surface
716 // a NotSupported error.
717 let (base_layers, base_manifest, working_chain) =
718 pull_and_materialise_base(&base_image_ref, &self.config, &working_layer_chain_dir)
719 .await?;
720
721 // Record the FROM instruction as the first entry of the
722 // execution log so 4.D's history array correctly starts with
723 // `FROM <base>` (with `empty_layer = false` because the base
724 // layer chain materialised above is the layer "produced" by
725 // FROM).
726 let instruction_log = vec![ExecutedInstruction {
727 source_line: format!("FROM {base_image_ref}"),
728 produced_layer: true,
729 timestamp: Utc::now(),
730 }];
731
732 Ok(BuildSkeleton {
733 parsed_dockerfile: parsed,
734 base_layers,
735 base_manifest,
736 working_layer_chain_dir,
737 working_chain,
738 image_config: OciImageConfig::default(),
739 instruction_log,
740 provisioned_toolchain_language: None,
741 })
742 }
743
744 /// Apply a single non-FROM instruction to a [`BuildSkeleton`].
745 ///
746 /// Dispatches to a per-instruction handler. Config-only instructions
747 /// mutate [`BuildSkeleton::image_config`] in place; COPY/ADD/RUN
748 /// commit a new RO layer per instruction (the per-instruction layer
749 /// model documented at the top of this module).
750 ///
751 /// # Errors
752 ///
753 /// - [`BuildError::NotSupported`] for cross-stage `COPY --from=`
754 /// (multi-stage support arrives in a later task).
755 /// - [`BuildError::PathTraversal`] when a COPY/ADD source contains
756 /// `..`.
757 /// - [`BuildError::HttpFetchFailed`] when an ADD URL fetch fails.
758 /// - [`BuildError::TarExtractFailed`] when an ADD tarball
759 /// auto-extract fails.
760 /// - [`BuildError::ContextRead`] / [`BuildError::IoError`] for
761 /// filesystem failures.
762 /// - [`BuildError::RunStepFailed`] when the RUN command exits
763 /// non-zero.
764 /// - [`BuildError::LayerCreate`] / [`BuildError::LayerExportFailed`]
765 /// on HCS or wclayer failures.
766 /// - [`BuildError::NotSupported`] when COPY/ADD/RUN is invoked on a
767 /// non-Windows host (HCS layer commits require the Windows APIs).
768 pub async fn execute_instruction(
769 &self,
770 skeleton: &mut BuildSkeleton,
771 ctx: &BuildContext,
772 instruction: &Instruction,
773 ) -> Result<()> {
774 // The step index is the position of this instruction in the
775 // current stage so error messages and tracing logs name a real
776 // line number. We re-derive it by scanning the parsed dockerfile
777 // for an `==` match; that costs O(stage_len) per call which is
778 // negligible compared to the HCS round-trip cost.
779 let step_index = skeleton
780 .parsed_dockerfile
781 .stages
782 .first()
783 .and_then(|s| s.instructions.iter().position(|i| i == instruction))
784 .unwrap_or(0);
785
786 let result = match instruction {
787 Instruction::Run(run) => self.execute_run_step(skeleton, run, step_index).await,
788 Instruction::Copy(copy) => self.apply_copy(skeleton, ctx, copy, step_index).await,
789 Instruction::Add(add) => self.apply_add(skeleton, ctx, add, step_index).await,
790 Instruction::Env(env) => {
791 apply_env(&mut skeleton.image_config, env);
792 Ok(())
793 }
794 Instruction::Workdir(path) => {
795 apply_workdir(&mut skeleton.image_config, path);
796 Ok(())
797 }
798 Instruction::Entrypoint(cmd) => {
799 apply_entrypoint(&mut skeleton.image_config, cmd);
800 Ok(())
801 }
802 Instruction::Cmd(cmd) => {
803 apply_cmd(&mut skeleton.image_config, cmd);
804 Ok(())
805 }
806 Instruction::User(user) => {
807 skeleton.image_config.user = Some(user.clone());
808 Ok(())
809 }
810 Instruction::Expose(expose) => {
811 apply_expose(&mut skeleton.image_config, expose);
812 Ok(())
813 }
814 Instruction::Volume(paths) => {
815 for p in paths {
816 skeleton
817 .image_config
818 .volumes
819 .insert(p.clone(), serde_json::json!({}));
820 }
821 Ok(())
822 }
823 Instruction::Label(labels) => {
824 for (k, v) in labels {
825 skeleton.image_config.labels.insert(k.clone(), v.clone());
826 }
827 Ok(())
828 }
829 // ARG is consumed by the parser's variable-expansion pass and
830 // does not appear in the final OCI image config. We accept it
831 // here as a no-op so a Dockerfile with bare `ARG` lines walks
832 // through the builder cleanly.
833 Instruction::Arg(_) => Ok(()),
834 Instruction::Shell(tokens) => {
835 skeleton.image_config.shell = Some(tokens.clone());
836 Ok(())
837 }
838 Instruction::Stopsignal(sig) => {
839 skeleton.image_config.stop_signal = Some(sig.clone());
840 Ok(())
841 }
842 Instruction::Healthcheck(hc) => {
843 apply_healthcheck(&mut skeleton.image_config, hc);
844 Ok(())
845 }
846 Instruction::Onbuild(boxed) => {
847 skeleton
848 .image_config
849 .on_build
850 .push(format_onbuild_trigger(boxed));
851 Ok(())
852 }
853 };
854 // Only record successful executions so failed RUN steps don't
855 // leak into the emitted history (a build that fails never
856 // produces a manifest anyway, but a clean log keeps 4.D's
857 // emitter free of "did this layer actually exist?" branches).
858 if result.is_ok() {
859 // ARG is intentionally omitted from the history — Docker's
860 // `docker history` also elides ARG triggers because they're
861 // consumed at parse time and don't affect the final image.
862 if !matches!(instruction, Instruction::Arg(_)) {
863 skeleton.instruction_log.push(ExecutedInstruction {
864 source_line: format_instruction_source_line(instruction),
865 produced_layer: instruction.creates_layer(),
866 timestamp: Utc::now(),
867 });
868 }
869 }
870 result
871 }
872
873 /// Execute a COPY instruction. Resolves sources against
874 /// `ctx.context_dir`, rejects `..` traversal, copies into a fresh
875 /// scratch layer, then commits the scratch as a new RO layer on
876 /// Windows. Off-Windows the copy is performed against a plain
877 /// directory under `working_layer_chain_dir/<scratch_id>/Files/` for
878 /// unit-test coverage; no HCS commit happens.
879 async fn apply_copy(
880 &self,
881 skeleton: &mut BuildSkeleton,
882 ctx: &BuildContext,
883 copy: &CopyInstruction,
884 step_index: usize,
885 ) -> Result<()> {
886 if let Some(stage) = ©.from {
887 return Err(BuildError::NotSupported {
888 operation: format!(
889 "multi-stage COPY --from='{stage}' lands in a later task — the WCOW skeleton \
890 supports single-stage builds only"
891 ),
892 });
893 }
894 if let Some(owner) = ©.chown {
895 tracing::info!(
896 step_index = step_index,
897 chown = %owner,
898 "COPY --chown is a no-op on WCOW (Windows containers do not honour Unix-style \
899 uid:gid ownership the same way)"
900 );
901 }
902 let resolved_sources = resolve_copy_sources(ctx, ©.sources)?;
903 apply_filesystem_writes(
904 self.config(),
905 skeleton,
906 step_index,
907 &resolved_sources,
908 ©.destination,
909 /* extract_archives = */ false,
910 /* downloads = */ &[],
911 )
912 .await
913 }
914
915 /// Execute an ADD instruction. Extends COPY with HTTP(S) URL fetch
916 /// and tarball auto-extraction.
917 async fn apply_add(
918 &self,
919 skeleton: &mut BuildSkeleton,
920 ctx: &BuildContext,
921 add: &AddInstruction,
922 step_index: usize,
923 ) -> Result<()> {
924 if let Some(owner) = &add.chown {
925 tracing::info!(
926 step_index = step_index,
927 chown = %owner,
928 "ADD --chown is a no-op on WCOW"
929 );
930 }
931 // Partition sources into URLs vs local paths.
932 let mut local_sources: Vec<String> = Vec::new();
933 let mut url_sources: Vec<String> = Vec::new();
934 for src in &add.sources {
935 if is_http_url(src) {
936 url_sources.push(src.clone());
937 } else {
938 local_sources.push(src.clone());
939 }
940 }
941 let resolved_locals = resolve_copy_sources(ctx, &local_sources)?;
942
943 // Download URLs into a temp dir so the per-instruction commit
944 // step sees them as ordinary files. We materialise the downloads
945 // alongside the resolved locals; the writes function does the
946 // extract-if-tarball decision uniformly across both groups.
947 let mut downloads: Vec<DownloadedFile> = Vec::with_capacity(url_sources.len());
948 for url in &url_sources {
949 // ADD <URL> carries no lockfile-pinned digest, so compute-and-record.
950 let download = download_url(url, None).await?;
951 downloads.push(download);
952 }
953
954 apply_filesystem_writes(
955 self.config(),
956 skeleton,
957 step_index,
958 &resolved_locals,
959 &add.destination,
960 /* extract_archives = */ true,
961 &downloads,
962 )
963 .await
964 }
965
966 /// Execute one RUN step: optionally translate Linux package-manager
967 /// invocations to Chocolatey, build an ephemeral HCS compute system
968 /// rooted at a fresh scratch layer over the working chain, spawn the
969 /// command, wait for it to exit, then export the scratch diff as a
970 /// new read-only layer and append it to the chain.
971 ///
972 /// Routed to a platform-specific implementation; non-Windows hosts
973 /// surface [`BuildError::NotSupported`].
974 async fn execute_run_step(
975 &self,
976 skeleton: &mut BuildSkeleton,
977 run: &RunInstruction,
978 step_index: usize,
979 ) -> Result<()> {
980 execute_run_step_impl(&self.config, skeleton, run, step_index).await
981 }
982
983 /// Emit a final OCI image manifest + image config blob from the
984 /// accumulated [`BuildSkeleton`] state.
985 ///
986 /// Walks the base-first [`BuildSkeleton::base_layers`] vector
987 /// alongside [`BuildSkeleton::working_chain`] to build one
988 /// [`EmittedLayer`] per descriptor. The first descriptor — the
989 /// foreign Windows base layer pulled from MCR — gets the
990 /// `application/vnd.docker.image.rootfs.foreign.diff.tar.gzip` media
991 /// type AND preserves the `urls[]` mirror list so Windows daemons
992 /// pull the bytes from MCR rather than the user's destination
993 /// registry. Subsequent layers (RUN / COPY / ADD outputs) get
994 /// `application/vnd.oci.image.layer.v1.tar+gzip` and no `urls[]`.
995 ///
996 /// `os.version` resolution order:
997 /// 1. [`WindowsBuildConfig::os_version_override`] (explicit user
998 /// pin).
999 /// 2. [`BaseImageManifest::os_version`] (from the resolved base
1000 /// manifest's platform descriptor / config blob).
1001 /// 3. Fallback to [`BuildError::OsVersionUnresolved`] — the Windows
1002 /// runtime refuses to launch a container whose `os.version` does
1003 /// not match the host kernel build, so emitting a manifest
1004 /// without one would produce a container nothing can run.
1005 ///
1006 /// # Errors
1007 ///
1008 /// - [`BuildError::OsVersionUnresolved`] when neither the override
1009 /// nor the base manifest carries an `os.version`.
1010 /// - [`BuildError::SerializeManifestFailed`] when serialising the
1011 /// image config or manifest blob fails (programmer error in this
1012 /// crate).
1013 /// - [`BuildError::LayerDigestComputationFailed`] when computing a
1014 /// `diff_id` over a non-foreign layer blob fails because the blob
1015 /// path could not be read.
1016 pub async fn emit_image(&self, skeleton: &BuildSkeleton, tag: &str) -> Result<BuiltImage> {
1017 emit_image_impl(self.config(), skeleton, tag).await
1018 }
1019
1020 /// Push an already-emitted [`BuiltImage`] to its target registry
1021 /// (task 4.E).
1022 ///
1023 /// Uploads every non-foreign layer blob, the image config blob, and
1024 /// finally PUTs the manifest at `tag`. Foreign Windows base layers
1025 /// (those carrying a non-`None` `urls[]` on their [`EmittedLayer`])
1026 /// are NEVER re-uploaded — Windows daemons pulling the image will
1027 /// follow the manifest's `urls[]` array back to MCR (or the configured
1028 /// mirror) for those blobs. This is the entire point of foreign-layer
1029 /// distribution and why the WCOW push path must round-trip `urls[]`
1030 /// verbatim into the manifest blob.
1031 ///
1032 /// Authentication is sourced from
1033 /// [`WindowsBuildConfig::registry_auth`].
1034 ///
1035 /// # Arguments
1036 ///
1037 /// * `image` — output of [`Self::emit_image`].
1038 /// * `tag` — full image reference (`"ghcr.io/org/name:version"`,
1039 /// `"localhost:5000/repo:tag"`, …). Used verbatim as the manifest's
1040 /// PUT target.
1041 ///
1042 /// # Errors
1043 ///
1044 /// - [`BuildError::BlobUploadFailed`] when uploading a non-foreign
1045 /// layer blob or the image config blob fails.
1046 /// - [`BuildError::ManifestPutFailed`] when the final manifest PUT
1047 /// fails.
1048 /// - [`BuildError::PushFailed`] when staging a local blob (reading
1049 /// off disk) fails before any wire interaction.
1050 pub async fn push(&self, image: &BuiltImage, tag: &str) -> Result<()> {
1051 let target = RegistryPushTarget::new();
1052 self.push_with(image, tag, &target).await
1053 }
1054
1055 /// Push variant that takes an explicit [`PushTarget`]. The public
1056 /// [`Self::push`] wires up the real registry-backed implementation;
1057 /// this variant exists so unit tests can inject a recording double
1058 /// without needing a live registry. The split also means the wire
1059 /// protocol details (how many `PATCH`es per blob, what `Content-Type`
1060 /// the manifest carries, etc.) live entirely behind the trait and can
1061 /// be unit-tested independently of `WindowsBuilder`.
1062 ///
1063 /// # Errors
1064 ///
1065 /// Same shape as [`Self::push`]: [`BuildError::BlobUploadFailed`],
1066 /// [`BuildError::ManifestPutFailed`], or [`BuildError::PushFailed`]
1067 /// depending on which stage of the push tripped.
1068 pub async fn push_with(
1069 &self,
1070 image: &BuiltImage,
1071 tag: &str,
1072 target: &dyn PushTarget,
1073 ) -> Result<()> {
1074 push_impl(image, tag, &self.config.registry_auth, target).await
1075 }
1076
1077 /// Convenience: skeleton → execute every parsed instruction → emit
1078 /// manifest → push. Single entry point for callers that want a
1079 /// one-shot WCOW build from a [`BuildContext`].
1080 ///
1081 /// Walks `ctx`'s Dockerfile in order. After each `Instruction` the
1082 /// instruction is routed through [`Self::execute_instruction`] which
1083 /// commits a new RO layer per RUN/COPY/ADD and mutates the in-memory
1084 /// image config for config-only instructions. After the last
1085 /// instruction the manifest is emitted via [`Self::emit_image`] and
1086 /// pushed via [`Self::push`].
1087 ///
1088 /// # Errors
1089 ///
1090 /// Any [`BuildError`] that [`Self::build_skeleton`],
1091 /// [`Self::execute_instruction`], [`Self::emit_image`], or
1092 /// [`Self::push`] can produce.
1093 pub async fn build_and_push(&self, ctx: &BuildContext) -> Result<()> {
1094 let mut skeleton = self.build_skeleton(ctx).await?;
1095 // The Dockerfile's first (and only — multi-stage is rejected by
1096 // build_skeleton) stage is what we walk. Cloning the instruction
1097 // vector is cheap and lets us pass `&mut skeleton` into
1098 // execute_instruction without aliasing the parsed-dockerfile
1099 // borrow.
1100 let instructions: Vec<Instruction> = skeleton
1101 .parsed_dockerfile
1102 .stages
1103 .first()
1104 .map(|s| s.instructions.clone())
1105 .unwrap_or_default();
1106 for instr in &instructions {
1107 self.execute_instruction(&mut skeleton, ctx, instr).await?;
1108 }
1109 let built = self.emit_image(&skeleton, &ctx.tag).await?;
1110 self.push(&built, &ctx.tag).await
1111 }
1112
1113 /// Run a full Windows build conforming to the
1114 /// [`crate::backend::BuildBackend::build_image`] contract.
1115 ///
1116 /// This is the canonical Windows build entry point used by
1117 /// [`crate::backend::hcs::HcsBackend::build_image`] — that backend
1118 /// is a thin shim that constructs a [`WindowsBuilder`] and delegates
1119 /// here. Drives the same pipeline as [`Self::build_and_push`] but
1120 /// (a) accepts an already-parsed [`Dockerfile`] and a
1121 /// [`BuildOptions`] from the public builder API instead of the
1122 /// builder-internal [`BuildContext`]; (b) emits [`BuildEvent`]s into
1123 /// the optional `event_tx` channel so the TUI / plain-logger surface
1124 /// stays driven; (c) detects the provisioned toolchain language for
1125 /// the resolved (post-rewrite) base image and threads it through
1126 /// every RUN step's apt→choco translator; (d) returns the
1127 /// CLI-facing [`crate::builder::BuiltImage`] type, not the
1128 /// builder-internal [`BuiltImage`].
1129 ///
1130 /// `event_tx` is a `std::sync::mpsc::Sender` — matching the
1131 /// `BuildBackend::build_image` trait signature — and is consumed
1132 /// fire-and-forget: a closed receiver does not fail the build.
1133 ///
1134 /// # Errors
1135 ///
1136 /// - [`BuildError::NotSupported`] when the dockerfile has multiple
1137 /// stages (single-stage only in the first iteration; same as
1138 /// `HcsBackend`'s existing constraint).
1139 /// - [`BuildError::stage_not_found`] when the FROM target is a stage
1140 /// reference.
1141 /// - [`BuildError::InvalidInstruction`] when the FROM target is
1142 /// `scratch` (HCS requires a Windows base).
1143 /// - Any error [`build_skeleton_with_parsed`](Self::build_skeleton_with_parsed),
1144 /// [`execute_instruction`](Self::execute_instruction),
1145 /// [`emit_image`](Self::emit_image), or [`push`](Self::push) can
1146 /// produce.
1147 /// - [`BuildError::NotSupported`] when invoked on a non-Windows
1148 /// host (the underlying base materialisation step needs HCS).
1149 // Sequential pipeline orchestration: gate -> resolve -> ctx -> events ->
1150 // skeleton -> toolchain -> loop -> emit -> push -> bridge. Splitting
1151 // would scatter the linear state plumbing without simplifying any
1152 // individual stage; keep it inline to match the existing
1153 // `execute_run_step_impl` precedent.
1154 pub async fn build_image_for_backend(
1155 &self,
1156 context: &Path,
1157 dockerfile: &Dockerfile,
1158 options: &crate::builder::BuildOptions,
1159 event_tx: Option<&std::sync::mpsc::Sender<crate::tui::BuildEvent>>,
1160 ) -> std::result::Result<crate::builder::BuiltImage, BuildError> {
1161 Ok(self
1162 .build_image_for_backend_inner(context, dockerfile, options, event_tx)
1163 .await?
1164 .0)
1165 }
1166
1167 /// Same pipeline as [`Self::build_image_for_backend`], but also returns the
1168 /// builder-internal [`BuiltImage`] (manifest + config + every layer
1169 /// descriptor) alongside the CLI-facing one.
1170 ///
1171 /// The native HCS backend uses this so that, after the build succeeds, it can
1172 /// assemble a buildah-free `oci-archive:` for the local-registry import
1173 /// (`HcsBackend::export_oci_archive`) without rebuilding — the internal
1174 /// `BuiltImage` carries exactly the manifest/config bytes and the on-disk
1175 /// layer paths the archive needs.
1176 ///
1177 /// # Errors
1178 ///
1179 /// Identical to [`Self::build_image_for_backend`].
1180 pub async fn build_image_for_backend_with_artifact(
1181 &self,
1182 context: &Path,
1183 dockerfile: &Dockerfile,
1184 options: &crate::builder::BuildOptions,
1185 event_tx: Option<&std::sync::mpsc::Sender<crate::tui::BuildEvent>>,
1186 ) -> std::result::Result<(crate::builder::BuiltImage, BuiltImage), BuildError> {
1187 self.build_image_for_backend_inner(context, dockerfile, options, event_tx)
1188 .await
1189 }
1190
1191 #[allow(clippy::too_many_lines)]
1192 async fn build_image_for_backend_inner(
1193 &self,
1194 context: &Path,
1195 dockerfile: &Dockerfile,
1196 options: &crate::builder::BuildOptions,
1197 event_tx: Option<&std::sync::mpsc::Sender<crate::tui::BuildEvent>>,
1198 ) -> std::result::Result<(crate::builder::BuiltImage, BuiltImage), BuildError> {
1199 use crate::tui::BuildEvent;
1200
1201 // Fire-and-forget event helper: never fail the build because a
1202 // TUI receiver has closed.
1203 fn send_event(tx: Option<&std::sync::mpsc::Sender<BuildEvent>>, ev: BuildEvent) {
1204 if let Some(tx) = tx {
1205 let _ = tx.send(ev);
1206 }
1207 }
1208
1209 let started = std::time::Instant::now();
1210
1211 // 1. Single-stage gate. Mirrors the HcsBackend invariant
1212 // verbatim so a multi-stage Dockerfile reports the same
1213 // error string regardless of which entry point the caller
1214 // used.
1215 if dockerfile.stages.len() != 1 {
1216 return Err(BuildError::NotSupported {
1217 operation: format!(
1218 "multi-stage Windows builds ({} stages) — HCS backend supports a single \
1219 stage in the first iteration; track the follow-up at TODO(L-4-followup)",
1220 dockerfile.stages.len()
1221 ),
1222 });
1223 }
1224 let stage = &dockerfile.stages[0];
1225
1226 // 2. Resolve the FROM target. Stage refs and `FROM scratch`
1227 // are rejected with the same error shapes HcsBackend uses.
1228 let base_ref = match &stage.base_image {
1229 DockerfileFromTarget::Image(r) => r.to_string(),
1230 DockerfileFromTarget::Stage(name) => {
1231 return Err(BuildError::stage_not_found(name));
1232 }
1233 DockerfileFromTarget::Scratch => {
1234 return Err(BuildError::InvalidInstruction {
1235 instruction: "FROM scratch".to_string(),
1236 reason: "HCS builder requires a Windows base image — `scratch` cannot run \
1237 HCS processes (no OS kernel, no cmd.exe). Use \
1238 `mcr.microsoft.com/windows/nanoserver:...` or `.../servercore:...`."
1239 .to_string(),
1240 });
1241 }
1242 };
1243
1244 // 3. Build the builder-internal `BuildContext` from the public
1245 // `BuildOptions`. `dockerfile_path` is informational here —
1246 // `build_skeleton_with_parsed` does not re-read the file —
1247 // so we pass an empty `PathBuf` rather than fabricating a
1248 // fake path.
1249 let primary_tag = options.tags.first().cloned().unwrap_or_default();
1250 let ctx = BuildContext {
1251 context_dir: context.to_path_buf(),
1252 dockerfile_path: PathBuf::new(),
1253 build_args: options.build_args.clone(),
1254 tag: primary_tag,
1255 ltsc: options.windows_ltsc.clone(),
1256 };
1257
1258 // 4. Up-front BuildStarted + StageStarted. The base image
1259 // string emitted in StageStarted is the pre-rewrite ref so
1260 // the TUI reflects what the user wrote in the Dockerfile;
1261 // the post-rewrite string lives on the resulting
1262 // `BuildSkeleton::base_manifest`.
1263 send_event(
1264 event_tx,
1265 BuildEvent::BuildStarted {
1266 total_stages: 1,
1267 total_instructions: stage.instructions.len(),
1268 },
1269 );
1270 send_event(
1271 event_tx,
1272 BuildEvent::StageStarted {
1273 index: 0,
1274 name: stage.name.clone(),
1275 base_image: base_ref.clone(),
1276 },
1277 );
1278
1279 // 5. Materialise base (this also runs the LTSC FROM rewrite
1280 // internally).
1281 let mut skeleton = self
1282 .build_skeleton_with_parsed(dockerfile.clone(), &ctx)
1283 .await?;
1284
1285 // 6. Detect the provisioned toolchain language for the
1286 // resolved (post-rewrite) base image and inject its env +
1287 // PATH into the image config. Windows-only because
1288 // `windows_toolchain` is `#![cfg(target_os = "windows")]`.
1289 // Off-Windows this whole block is a no-op and
1290 // `provisioned_toolchain_language` stays `None` — which
1291 // matches the off-Windows execution path that will refuse
1292 // RUN steps anyway.
1293 #[cfg(target_os = "windows")]
1294 {
1295 if let Some(spec) =
1296 crate::windows_toolchain::detect_toolchain(&skeleton.base_manifest.image_ref)
1297 {
1298 // Idempotency: if the base already exports any of the
1299 // toolchain's env keys (e.g. a prebuilt Go image that
1300 // already has `GOROOT=`), don't double-stamp.
1301 let already_injected = spec.env.keys().any(|k| {
1302 let prefix = format!("{k}=");
1303 skeleton
1304 .image_config
1305 .env
1306 .iter()
1307 .any(|e| e.starts_with(&prefix))
1308 });
1309 if !already_injected {
1310 for (k, v) in &spec.env {
1311 skeleton.image_config.env.push(format!("{k}={v}"));
1312 }
1313 // Prepend the toolchain's PATH entries to any
1314 // existing PATH (the base config inherits PATH
1315 // from the Windows base image; we want the
1316 // toolchain to shadow the base, not the other
1317 // way around).
1318 let existing_path = skeleton
1319 .image_config
1320 .env
1321 .iter()
1322 .find(|e| e.starts_with("PATH="))
1323 .map(|e| e[5..].to_string());
1324 skeleton
1325 .image_config
1326 .env
1327 .retain(|e| !e.starts_with("PATH="));
1328 let prefix = spec.path_dirs.join(";");
1329 let new_path = match existing_path {
1330 Some(p) if !p.is_empty() => format!("PATH={prefix};{p}"),
1331 _ => format!("PATH={prefix}"),
1332 };
1333 skeleton.image_config.env.push(new_path);
1334 }
1335 skeleton.provisioned_toolchain_language = Some(spec.language.clone());
1336 }
1337 }
1338 #[cfg(not(target_os = "windows"))]
1339 {
1340 // Reference `skeleton` and `event_tx` are still live; no
1341 // explicit consumption needed. The off-Windows
1342 // execute_run_step path surfaces `NotSupported` if RUN is
1343 // hit, which is the right behaviour for a Windows-only
1344 // backend.
1345 }
1346
1347 // 7. Walk the parsed instructions.
1348 for (idx, instruction) in stage.instructions.iter().enumerate() {
1349 send_event(
1350 event_tx,
1351 BuildEvent::InstructionStarted {
1352 stage: 0,
1353 index: idx,
1354 instruction: format!("{instruction:?}"),
1355 },
1356 );
1357 match self
1358 .execute_instruction(&mut skeleton, &ctx, instruction)
1359 .await
1360 {
1361 Ok(()) => {
1362 send_event(
1363 event_tx,
1364 BuildEvent::InstructionComplete {
1365 stage: 0,
1366 index: idx,
1367 cached: false,
1368 },
1369 );
1370 }
1371 Err(e) => {
1372 send_event(
1373 event_tx,
1374 BuildEvent::BuildFailed {
1375 error: e.to_string(),
1376 },
1377 );
1378 return Err(e);
1379 }
1380 }
1381 }
1382 send_event(event_tx, BuildEvent::StageComplete { index: 0 });
1383
1384 // 8. Emit manifest + image config + layer blobs.
1385 let built = self.emit_image(&skeleton, &ctx.tag).await?;
1386
1387 // 9. Optional push.
1388 if options.push {
1389 self.push(&built, &ctx.tag).await?;
1390 }
1391
1392 // 10. Bridge the internal `BuiltImage` to the CLI-facing one.
1393 // Layer count + size are derived from the emitted
1394 // descriptors; matches `HcsBackend`'s field shape so the
1395 // two backends remain interchangeable to upstream callers.
1396 #[allow(clippy::cast_possible_truncation)]
1397 let elapsed_ms = started.elapsed().as_millis() as u64;
1398 let image_id = built.manifest_digest.clone();
1399 let mut tags = options.tags.clone();
1400 if tags.is_empty() {
1401 tags.push(format!("zlayer-windows-build:{}", &image_id));
1402 }
1403 let total_size: u64 = built
1404 .layers
1405 .iter()
1406 .map(|l| l.size)
1407 .sum::<u64>()
1408 .saturating_add(built.image_config_blob.len() as u64)
1409 .saturating_add(built.manifest_blob.len() as u64);
1410 let layer_count = built.layers.len();
1411
1412 send_event(
1413 event_tx,
1414 BuildEvent::BuildComplete {
1415 image_id: image_id.clone(),
1416 },
1417 );
1418
1419 Ok((
1420 crate::builder::BuiltImage {
1421 image_id,
1422 tags,
1423 layer_count,
1424 size: total_size,
1425 build_time_ms: elapsed_ms,
1426 is_manifest: false,
1427 },
1428 // Hand the builder-internal artifact back so the HCS backend can
1429 // export a buildah-free OCI archive by tag after the build.
1430 built,
1431 ))
1432 }
1433}
1434
1435/// Abstraction over the wire-side push operations the WCOW builder needs.
1436///
1437/// Two implementations:
1438///
1439/// - [`RegistryPushTarget`] — the real impl, wraps
1440/// [`zlayer_registry::ImagePuller`].
1441/// - In-test recording doubles (see this module's `tests` submodule) —
1442/// capture which blobs were uploaded and which manifest bytes were PUT
1443/// so unit tests can assert foreign layers are skipped and `urls[]`
1444/// round-trips verbatim.
1445///
1446/// `PushTarget` is intentionally minimal: just "upload an arbitrary blob
1447/// with this digest" and "PUT this manifest blob at this tag with this
1448/// content type." Everything WCOW-specific (foreign-layer detection,
1449/// reading layer blobs off disk, computing the manifest's
1450/// `Content-Type`) lives in [`push_impl`] — the trait surface only
1451/// describes registry-side primitives.
1452#[async_trait::async_trait]
1453pub trait PushTarget: Send + Sync {
1454 /// Upload a single blob, content-addressed by `digest`, to the
1455 /// registry under `reference`.
1456 ///
1457 /// `media_type` is informational — the registry's blob-upload
1458 /// endpoint does not key on it — but is plumbed through so backends
1459 /// that key off media type for observability can use it.
1460 async fn upload_blob(
1461 &self,
1462 reference: &str,
1463 digest: &str,
1464 media_type: &str,
1465 data: Vec<u8>,
1466 auth: &RegistryAuth,
1467 ) -> std::result::Result<(), String>;
1468
1469 /// PUT the pre-serialised manifest blob `bytes` at `reference` with
1470 /// `Content-Type: content_type`. The bytes are sent verbatim so the
1471 /// foreign-layer `urls[]` array round-trips byte-identical between
1472 /// what [`BuiltImage::manifest_blob`] computed its digest over and
1473 /// what the registry indexes.
1474 async fn put_manifest(
1475 &self,
1476 reference: &str,
1477 bytes: Vec<u8>,
1478 content_type: &str,
1479 auth: &RegistryAuth,
1480 ) -> std::result::Result<(), String>;
1481}
1482
1483/// Real [`PushTarget`] backed by [`zlayer_registry::ImagePuller`].
1484///
1485/// Constructed with an in-memory blob cache because the push path never
1486/// reads from the cache — `ImagePuller`'s pull-side machinery is the only
1487/// reason the cache is wired in at construction. The cache is dropped as
1488/// soon as the push completes.
1489pub struct RegistryPushTarget {
1490 puller: zlayer_registry::ImagePuller,
1491}
1492
1493impl RegistryPushTarget {
1494 /// Construct a fresh push target backed by an in-memory blob cache.
1495 /// The cache is unused on the push path but satisfies the
1496 /// `ImagePuller` constructor contract.
1497 /// # Panics
1498 ///
1499 /// Panics only if the in-memory blob cache fails to initialise,
1500 /// which the implementation does not currently allow — kept as a
1501 /// panic rather than threading a `Result` through every push site.
1502 #[must_use]
1503 pub fn new() -> Self {
1504 let cache = zlayer_registry::BlobCache::new()
1505 .expect("in-memory BlobCache::new() is infallible in the current impl");
1506 Self {
1507 puller: zlayer_registry::ImagePuller::new(cache),
1508 }
1509 }
1510}
1511
1512impl Default for RegistryPushTarget {
1513 fn default() -> Self {
1514 Self::new()
1515 }
1516}
1517
1518#[async_trait::async_trait]
1519impl PushTarget for RegistryPushTarget {
1520 async fn upload_blob(
1521 &self,
1522 reference: &str,
1523 digest: &str,
1524 media_type: &str,
1525 data: Vec<u8>,
1526 auth: &RegistryAuth,
1527 ) -> std::result::Result<(), String> {
1528 self.puller
1529 .push_blob(reference, digest, &data, media_type, auth)
1530 .await
1531 .map_err(|e| e.to_string())
1532 }
1533
1534 async fn put_manifest(
1535 &self,
1536 reference: &str,
1537 bytes: Vec<u8>,
1538 content_type: &str,
1539 auth: &RegistryAuth,
1540 ) -> std::result::Result<(), String> {
1541 self.puller
1542 .push_manifest_blob(reference, bytes, content_type, auth)
1543 .await
1544 .map(|_digest| ())
1545 .map_err(|e| e.to_string())
1546 }
1547}
1548
1549/// Free-function push implementation — takes a [`PushTarget`] by
1550/// reference so both the public [`WindowsBuilder::push`] and the unit
1551/// tests can drive the same body.
1552///
1553/// Order: layers (skipping foreign) → image config blob → manifest.
1554/// Foreign layers are detected by the presence of a non-`None` `urls[]`
1555/// on their [`EmittedLayer`].
1556async fn push_impl(
1557 image: &BuiltImage,
1558 tag: &str,
1559 auth: &RegistryAuth,
1560 target: &dyn PushTarget,
1561) -> Result<()> {
1562 // 1. Upload every non-foreign layer blob.
1563 for layer in &image.layers {
1564 if layer.urls.is_some() {
1565 // Foreign layer — the manifest descriptor carries the MCR
1566 // urls[] verbatim; the registry never sees these bytes.
1567 tracing::debug!(
1568 tag = %tag,
1569 digest = %layer.digest,
1570 "skipping foreign layer blob upload (urls[] preserved on manifest)"
1571 );
1572 continue;
1573 }
1574
1575 let data = if layer.local_path.as_os_str().is_empty() {
1576 // Not foreign but no on-disk path either — programmer error
1577 // upstream of push. Surface a typed error instead of trying
1578 // to upload empty bytes.
1579 return Err(BuildError::PushFailed {
1580 tag: tag.to_string(),
1581 reason: format!(
1582 "non-foreign layer {} has no local_path (emit_image must populate it)",
1583 layer.digest
1584 ),
1585 });
1586 } else {
1587 tokio::fs::read(&layer.local_path)
1588 .await
1589 .map_err(|e| BuildError::PushFailed {
1590 tag: tag.to_string(),
1591 reason: format!(
1592 "failed to read layer blob {} from {}: {e}",
1593 layer.digest,
1594 layer.local_path.display()
1595 ),
1596 })?
1597 };
1598
1599 target
1600 .upload_blob(tag, &layer.digest, &layer.media_type, data, auth)
1601 .await
1602 .map_err(|reason| BuildError::BlobUploadFailed {
1603 digest: layer.digest.clone(),
1604 tag: tag.to_string(),
1605 reason,
1606 })?;
1607 }
1608
1609 // 2. Upload the image config blob.
1610 target
1611 .upload_blob(
1612 tag,
1613 &image.image_config_digest,
1614 OCI_IMAGE_CONFIG_MEDIA_TYPE,
1615 image.image_config_blob.clone(),
1616 auth,
1617 )
1618 .await
1619 .map_err(|reason| BuildError::BlobUploadFailed {
1620 digest: image.image_config_digest.clone(),
1621 tag: tag.to_string(),
1622 reason,
1623 })?;
1624
1625 // 3. PUT the manifest blob. We send the raw bytes (not a
1626 // re-serialised OciImageManifest) so foreign `urls[]` round-trips
1627 // byte-identical to what BuiltImage::manifest_digest was computed
1628 // over.
1629 target
1630 .put_manifest(
1631 tag,
1632 image.manifest_blob.clone(),
1633 OCI_IMAGE_MANIFEST_MEDIA_TYPE,
1634 auth,
1635 )
1636 .await
1637 .map_err(|reason| BuildError::ManifestPutFailed {
1638 tag: tag.to_string(),
1639 reason,
1640 })?;
1641
1642 Ok(())
1643}
1644
1645/// Reconstruct a canonical Dockerfile source line for one parsed
1646/// instruction. Used for [`ExecutedInstruction::source_line`] so the
1647/// emitted OCI history's `created_by` shows the canonical form (not the
1648/// original line, which may have spanned multiple physical lines via
1649/// continuation backslashes).
1650fn format_instruction_source_line(instr: &Instruction) -> String {
1651 match instr {
1652 Instruction::Run(run) => match &run.command {
1653 ShellOrExec::Shell(s) => format!("RUN {s}"),
1654 ShellOrExec::Exec(args) => format!(
1655 "RUN {}",
1656 serde_json::to_string(args).unwrap_or_else(|_| "[]".to_string())
1657 ),
1658 },
1659 Instruction::Copy(c) => {
1660 let from = c
1661 .from
1662 .as_deref()
1663 .map(|f| format!("--from={f} "))
1664 .unwrap_or_default();
1665 format!("COPY {from}{} {}", c.sources.join(" "), c.destination)
1666 }
1667 Instruction::Add(a) => format!("ADD {} {}", a.sources.join(" "), a.destination),
1668 Instruction::Env(e) => {
1669 let mut keys: Vec<&String> = e.vars.keys().collect();
1670 keys.sort();
1671 let body = keys
1672 .iter()
1673 .map(|k| format!("{}={}", k, e.vars[*k]))
1674 .collect::<Vec<_>>()
1675 .join(" ");
1676 format!("ENV {body}")
1677 }
1678 Instruction::Workdir(p) => format!("WORKDIR {p}"),
1679 Instruction::Expose(e) => {
1680 let proto = match e.protocol {
1681 ExposeProtocol::Tcp => "tcp",
1682 ExposeProtocol::Udp => "udp",
1683 };
1684 format!("EXPOSE {}/{proto}", e.port)
1685 }
1686 Instruction::Label(labels) => {
1687 let mut keys: Vec<&String> = labels.keys().collect();
1688 keys.sort();
1689 let body = keys
1690 .iter()
1691 .map(|k| format!("{}={}", k, labels[*k]))
1692 .collect::<Vec<_>>()
1693 .join(" ");
1694 format!("LABEL {body}")
1695 }
1696 Instruction::User(u) => format!("USER {u}"),
1697 Instruction::Entrypoint(c) => match c {
1698 ShellOrExec::Shell(s) => format!("ENTRYPOINT {s}"),
1699 ShellOrExec::Exec(args) => format!(
1700 "ENTRYPOINT {}",
1701 serde_json::to_string(args).unwrap_or_else(|_| "[]".to_string())
1702 ),
1703 },
1704 Instruction::Cmd(c) => match c {
1705 ShellOrExec::Shell(s) => format!("CMD {s}"),
1706 ShellOrExec::Exec(args) => format!(
1707 "CMD {}",
1708 serde_json::to_string(args).unwrap_or_else(|_| "[]".to_string())
1709 ),
1710 },
1711 Instruction::Volume(paths) => format!("VOLUME {}", paths.join(" ")),
1712 Instruction::Shell(tokens) => format!(
1713 "SHELL {}",
1714 serde_json::to_string(tokens).unwrap_or_else(|_| "[]".to_string())
1715 ),
1716 Instruction::Arg(a) => match &a.default {
1717 Some(d) => format!("ARG {}={d}", a.name),
1718 None => format!("ARG {}", a.name),
1719 },
1720 Instruction::Stopsignal(s) => format!("STOPSIGNAL {s}"),
1721 Instruction::Healthcheck(_) => "HEALTHCHECK".to_string(),
1722 Instruction::Onbuild(inner) => {
1723 format!("ONBUILD {}", format_instruction_source_line(inner))
1724 }
1725 }
1726}
1727
1728// ---------------------------------------------------------------------------
1729// 4.C: config-only instruction helpers (cross-platform, pure mutation)
1730// ---------------------------------------------------------------------------
1731
1732/// Apply a WORKDIR instruction.
1733///
1734/// Relative paths resolve against the previous WORKDIR per the Dockerfile
1735/// spec. On Windows the resolution uses backslash as the separator so
1736/// `WORKDIR sub` after `WORKDIR C:\\app` yields `C:\\app\\sub`. Absolute
1737/// paths (Unix-style `/x` or Windows-style `C:\\x` / `C:/x`) replace the
1738/// prior value.
1739pub(crate) fn apply_workdir(cfg: &mut OciImageConfig, path: &str) {
1740 let trimmed = path.trim();
1741 if trimmed.is_empty() {
1742 return;
1743 }
1744 let is_absolute = is_absolute_windows_or_unix(trimmed);
1745 let resolved = if is_absolute {
1746 trimmed.to_string()
1747 } else if let Some(prev) = cfg.working_dir.as_deref() {
1748 join_windows_path(prev, trimmed)
1749 } else {
1750 // No prior WORKDIR — relative path against an unset cwd is
1751 // treated as the root drive on Windows. We surface it verbatim
1752 // so the final OCI config preserves the user's intent for 4.D.
1753 trimmed.to_string()
1754 };
1755 cfg.working_dir = Some(resolved);
1756}
1757
1758/// Apply an ENV instruction, enforcing last-write-wins for each KEY.
1759pub(crate) fn apply_env(cfg: &mut OciImageConfig, env: &EnvInstruction) {
1760 // Sort keys so repeated calls produce a stable order in the final
1761 // image config — multi-key ENV lines are common (`ENV A=1 B=2`) and
1762 // a non-deterministic order breaks image-digest reproducibility.
1763 let mut keys: Vec<&String> = env.vars.keys().collect();
1764 keys.sort();
1765 for key in keys {
1766 let value = &env.vars[key];
1767 let entry = format!("{key}={value}");
1768 // Drop any existing entry with the same KEY.
1769 cfg.env
1770 .retain(|e| e.split_once('=').is_none_or(|(k, _)| k != key.as_str()));
1771 cfg.env.push(entry);
1772 }
1773}
1774
1775/// Apply an ENTRYPOINT instruction. Shell form is rewritten to
1776/// `cmd /c <body>` per Windows convention; exec form is passed through.
1777/// Setting ENTRYPOINT resets CMD to `None` per the Dockerfile spec.
1778pub(crate) fn apply_entrypoint(cfg: &mut OciImageConfig, cmd: &ShellOrExec) {
1779 cfg.entrypoint = Some(shell_or_exec_to_vec(cmd));
1780 cfg.cmd = None;
1781}
1782
1783/// Apply a CMD instruction. Shell form is rewritten to `cmd /c <body>`;
1784/// exec form is passed through.
1785pub(crate) fn apply_cmd(cfg: &mut OciImageConfig, cmd: &ShellOrExec) {
1786 cfg.cmd = Some(shell_or_exec_to_vec(cmd));
1787}
1788
1789/// Apply an EXPOSE instruction. Multiple EXPOSE lines accumulate.
1790pub(crate) fn apply_expose(cfg: &mut OciImageConfig, expose: &ExposeInstruction) {
1791 let proto = match expose.protocol {
1792 ExposeProtocol::Tcp => "tcp",
1793 ExposeProtocol::Udp => "udp",
1794 };
1795 let key = format!("{}/{}", expose.port, proto);
1796 cfg.exposed_ports.insert(key, serde_json::json!({}));
1797}
1798
1799/// Apply a HEALTHCHECK instruction, normalising `Duration` values to OCI
1800/// string form so 4.D can serialise without re-formatting.
1801pub(crate) fn apply_healthcheck(cfg: &mut OciImageConfig, hc: &HealthcheckInstruction) {
1802 match hc {
1803 HealthcheckInstruction::None => {
1804 cfg.healthcheck = Some(OciHealthcheck::disabled());
1805 }
1806 HealthcheckInstruction::Check {
1807 command,
1808 interval,
1809 timeout,
1810 start_period,
1811 retries,
1812 ..
1813 } => {
1814 let test = match command {
1815 ShellOrExec::Shell(s) => vec!["CMD-SHELL".to_string(), s.clone()],
1816 ShellOrExec::Exec(args) => {
1817 let mut v = Vec::with_capacity(args.len() + 1);
1818 v.push("CMD".to_string());
1819 v.extend(args.iter().cloned());
1820 v
1821 }
1822 };
1823 cfg.healthcheck = Some(OciHealthcheck {
1824 test,
1825 interval: interval.map(duration_to_oci_string),
1826 timeout: timeout.map(duration_to_oci_string),
1827 retries: *retries,
1828 start_period: start_period.map(duration_to_oci_string),
1829 });
1830 }
1831 }
1832}
1833
1834/// Convert a [`ShellOrExec`] into the OCI config's vector form. Shell
1835/// form is wrapped in `["cmd", "/c", "<body>"]` for WCOW; exec form is
1836/// passed through.
1837fn shell_or_exec_to_vec(cmd: &ShellOrExec) -> Vec<String> {
1838 match cmd {
1839 ShellOrExec::Shell(body) => {
1840 vec!["cmd".to_string(), "/c".to_string(), body.clone()]
1841 }
1842 ShellOrExec::Exec(args) => args.clone(),
1843 }
1844}
1845
1846/// Format a [`std::time::Duration`] into the OCI healthcheck string form
1847/// (e.g. `"30s"`, `"1m30s"`, `"500ms"`). Mirrors the `time.ParseDuration`
1848/// shape Docker uses on the wire.
1849fn duration_to_oci_string(d: std::time::Duration) -> String {
1850 let total_ms = d.as_millis();
1851 if total_ms == 0 {
1852 return "0s".to_string();
1853 }
1854 if total_ms % 1000 != 0 {
1855 return format!("{total_ms}ms");
1856 }
1857 let secs = d.as_secs();
1858 if secs % 60 != 0 {
1859 return format!("{secs}s");
1860 }
1861 let mins = secs / 60;
1862 if mins % 60 != 0 {
1863 return format!("{mins}m");
1864 }
1865 format!("{}h", mins / 60)
1866}
1867
1868/// Format an ONBUILD trigger back into Dockerfile source form for
1869/// storage in the image config's `OnBuild` array. The OCI image config
1870/// stores triggers as raw instruction strings so downstream builds can
1871/// re-parse them.
1872fn format_onbuild_trigger(instr: &Instruction) -> String {
1873 match instr {
1874 Instruction::Run(run) => match &run.command {
1875 ShellOrExec::Shell(s) => format!("RUN {s}"),
1876 ShellOrExec::Exec(args) => format!(
1877 "RUN {}",
1878 serde_json::to_string(args).unwrap_or_else(|_| "[]".to_string())
1879 ),
1880 },
1881 Instruction::Copy(c) => format!("COPY {} {}", c.sources.join(" "), c.destination),
1882 Instruction::Add(a) => format!("ADD {} {}", a.sources.join(" "), a.destination),
1883 Instruction::Env(e) => {
1884 let mut keys: Vec<&String> = e.vars.keys().collect();
1885 keys.sort();
1886 let body = keys
1887 .iter()
1888 .map(|k| format!("{}={}", k, e.vars[*k]))
1889 .collect::<Vec<_>>()
1890 .join(" ");
1891 format!("ENV {body}")
1892 }
1893 Instruction::Workdir(p) => format!("WORKDIR {p}"),
1894 Instruction::User(u) => format!("USER {u}"),
1895 Instruction::Cmd(c) => match c {
1896 ShellOrExec::Shell(s) => format!("CMD {s}"),
1897 ShellOrExec::Exec(args) => format!(
1898 "CMD {}",
1899 serde_json::to_string(args).unwrap_or_else(|_| "[]".to_string())
1900 ),
1901 },
1902 Instruction::Entrypoint(c) => match c {
1903 ShellOrExec::Shell(s) => format!("ENTRYPOINT {s}"),
1904 ShellOrExec::Exec(args) => format!(
1905 "ENTRYPOINT {}",
1906 serde_json::to_string(args).unwrap_or_else(|_| "[]".to_string())
1907 ),
1908 },
1909 other => other.name().to_string(),
1910 }
1911}
1912
1913/// Return `true` for paths that start with `/`, `\`, or a Windows drive
1914/// letter (`C:\` / `C:/`). Used by [`apply_workdir`] to decide whether
1915/// to resolve relative-to-previous.
1916fn is_absolute_windows_or_unix(p: &str) -> bool {
1917 if p.starts_with('/') || p.starts_with('\\') {
1918 return true;
1919 }
1920 let bytes = p.as_bytes();
1921 if bytes.len() >= 3
1922 && bytes[0].is_ascii_alphabetic()
1923 && bytes[1] == b':'
1924 && (bytes[2] == b'\\' || bytes[2] == b'/')
1925 {
1926 return true;
1927 }
1928 false
1929}
1930
1931/// Join a base path with a relative suffix using a backslash separator
1932/// (Windows path convention). Avoids `std::path::Path::join` because
1933/// that uses the host OS's separator, which would produce
1934/// `C:\\app/sub` on Linux — wrong shape for an OCI image targeting
1935/// Windows.
1936fn join_windows_path(base: &str, suffix: &str) -> String {
1937 let mut joined = base.trim_end_matches(['\\', '/']).to_string();
1938 joined.push('\\');
1939 joined.push_str(suffix.trim_start_matches(['\\', '/']));
1940 joined
1941}
1942
1943// ---------------------------------------------------------------------------
1944// 4.C: COPY/ADD filesystem helpers (cross-platform plumbing)
1945// ---------------------------------------------------------------------------
1946
1947/// One resolved local source for a COPY/ADD. `relative` is the original
1948/// `<src>` string from the Dockerfile (kept for diagnostics); `absolute`
1949/// is the fully-resolved `context_dir.join(<src>)`.
1950#[derive(Debug, Clone)]
1951struct ResolvedSource {
1952 relative: String,
1953 absolute: PathBuf,
1954}
1955
1956/// A downloaded URL materialised on disk, paired with the URL's
1957/// basename so [`apply_filesystem_writes`] can place the file at
1958/// `<dest>/<basename>` when `<dest>` is a directory.
1959struct DownloadedFile {
1960 /// Path on disk to the downloaded blob (lives in a tempdir whose
1961 /// guard is held until this struct is dropped).
1962 path: PathBuf,
1963 /// Basename derived from the URL path component.
1964 basename: String,
1965 /// The original URL for diagnostics + the extract-if-tarball
1966 /// decision (tarball detection is purely by extension).
1967 url: String,
1968 /// Temp-dir guard so the file outlives this object until the
1969 /// destructor runs.
1970 _guard: tempfile::TempDir,
1971}
1972
1973/// Detect whether a COPY/ADD source string is an HTTP(S) URL.
1974fn is_http_url(s: &str) -> bool {
1975 let lower = s.to_ascii_lowercase();
1976 lower.starts_with("http://") || lower.starts_with("https://")
1977}
1978
1979/// Resolve a list of COPY/ADD source strings against the build context,
1980/// rejecting any path that contains a `..` component.
1981fn resolve_copy_sources(ctx: &BuildContext, srcs: &[String]) -> Result<Vec<ResolvedSource>> {
1982 let mut out = Vec::with_capacity(srcs.len());
1983 for src in srcs {
1984 // Reject `..` BEFORE any filesystem access so a malicious
1985 // Dockerfile cannot win a TOCTOU race against the resolver.
1986 if path_contains_parent_dir(src) {
1987 return Err(BuildError::PathTraversal { src: src.clone() });
1988 }
1989 let absolute = ctx.context_dir.join(src);
1990 // Second-line defence: the joined path itself must stay under
1991 // `context_dir`. We canonicalise lazily — only when the entry
1992 // exists — because COPY against a non-existent source is itself
1993 // an error reported below by the copy step. The cheap
1994 // component-walk above already handles the common attack
1995 // surface.
1996 out.push(ResolvedSource {
1997 relative: src.clone(),
1998 absolute,
1999 });
2000 }
2001 Ok(out)
2002}
2003
2004/// Pure path-component walk that rejects any `..` segment. Works on
2005/// both Unix and Windows-style separators because Dockerfile sources
2006/// always use forward slashes per the spec.
2007fn path_contains_parent_dir(src: &str) -> bool {
2008 // Normalise both separator flavours to `/` so a Dockerfile written
2009 // with backslashes (which the spec discourages but tooling tolerates)
2010 // is still inspected correctly.
2011 let normalised = src.replace('\\', "/");
2012 Path::new(&normalised)
2013 .components()
2014 .any(|c| matches!(c, Component::ParentDir))
2015}
2016
2017/// Download a URL into a tempdir and return a [`DownloadedFile`].
2018///
2019/// Streams through the shared toolchain integrity primitive
2020/// ([`zlayer_toolchain::package_index::download_verified`]): when `expected`
2021/// carries a lock-pinned/known sha256 it is enforced (a mismatch deletes the
2022/// partial file and errors); when `None`, the digest is computed on download
2023/// (the "compute-and-record" path — the artifact's real hash is available for a
2024/// lock writer, though this WCOW ADD/relocatable site has no writer).
2025async fn download_url(url: &str, expected: Option<&str>) -> Result<DownloadedFile> {
2026 let basename = url
2027 .rsplit('/')
2028 .next()
2029 .and_then(|s| s.split('?').next())
2030 .filter(|s| !s.is_empty())
2031 .unwrap_or("download")
2032 .to_string();
2033 let guard = tempfile::tempdir().map_err(BuildError::IoError)?;
2034 let path = guard.path().join(&basename);
2035 zlayer_toolchain::package_index::download_verified(url, &path, expected).await?;
2036 Ok(DownloadedFile {
2037 path,
2038 basename,
2039 url: url.to_string(),
2040 _guard: guard,
2041 })
2042}
2043
2044/// Detect whether a path's extension marks it as an auto-extractable
2045/// archive. Matches the Docker ADD documentation: `.tar`, `.tar.gz`
2046/// (`.tgz`), `.tar.bz2`, `.tar.xz`. Case-insensitive — Dockerfile
2047/// authors on Windows occasionally write `.TAR.GZ`.
2048#[allow(clippy::case_sensitive_file_extension_comparisons)]
2049fn is_tarball_path(name: &str) -> bool {
2050 // `extension()` only sees the final segment so `.tar.gz` needs the
2051 // suffix-match form. Lowercasing the whole name once keeps the rule
2052 // straightforward and avoids stitching path components back together.
2053 let lower = name.to_ascii_lowercase();
2054 lower.ends_with(".tar")
2055 || lower.ends_with(".tar.gz")
2056 || lower.ends_with(".tgz")
2057 || lower.ends_with(".tar.bz2")
2058 || lower.ends_with(".tar.xz")
2059}
2060
2061/// Materialise the COPY/ADD destination root inside a scratch directory.
2062///
2063/// Returns `(scratch_dir, files_root)` where `scratch_dir` is the
2064/// per-instruction staging area under `working_layer_chain_dir` and
2065/// `files_root` is the `Files/` subdirectory that mirrors HCS's
2066/// per-layer payload layout (HCS exports layers with `Files/` +
2067/// `Hives/`; we use the same shape so 4.D's manifest emission can pass
2068/// the directory straight to `wclayer::import_layer`).
2069fn prepare_scratch_for_writes(
2070 config: &WindowsBuildConfig,
2071 skeleton: &BuildSkeleton,
2072 step_index: usize,
2073) -> Result<(PathBuf, PathBuf)> {
2074 let _ = config; // reserved for size_gb / cache policy in a later task
2075 let scratch_id = format!("copy-add-{step_index}-{}", uuid::Uuid::new_v4());
2076 let scratch_dir = skeleton.working_layer_chain_dir.join(&scratch_id);
2077 let files_root = scratch_dir.join("Files");
2078 std::fs::create_dir_all(&files_root).map_err(BuildError::IoError)?;
2079 Ok((scratch_dir, files_root))
2080}
2081
2082/// Strip a Windows or Unix root prefix from a destination so it can be
2083/// joined against `Files/`. `C:\app\bin` → `app/bin`; `/etc/foo` →
2084/// `etc/foo`. Mixed separators are normalised to forward slashes.
2085fn dest_under_files_root(dest: &str) -> PathBuf {
2086 let mut s = dest.replace('\\', "/");
2087 if s.len() >= 2 && s.as_bytes()[0].is_ascii_alphabetic() && s.as_bytes()[1] == b':' {
2088 s = s[2..].to_string();
2089 }
2090 let trimmed = s.trim_start_matches('/');
2091 PathBuf::from(trimmed)
2092}
2093
2094/// Decide whether a destination is a directory (ends with `/` or `\`,
2095/// or there are multiple sources). Mirrors Dockerfile COPY/ADD
2096/// semantics: with N>1 sources or a trailing separator, the destination
2097/// is treated as a directory; otherwise the single source is treated
2098/// as a file rename.
2099fn destination_is_directory(dest: &str, source_count: usize) -> bool {
2100 source_count > 1 || dest.ends_with('/') || dest.ends_with('\\')
2101}
2102
2103/// Recursively copy `src` to `dst`. Mirrors `std::fs::copy` semantics
2104/// for files; for directories it walks the tree and re-creates each
2105/// child.
2106fn copy_recursive(src: &Path, dst: &Path) -> std::io::Result<()> {
2107 let meta = std::fs::metadata(src)?;
2108 if meta.is_dir() {
2109 std::fs::create_dir_all(dst)?;
2110 for entry in std::fs::read_dir(src)? {
2111 let entry = entry?;
2112 let child_src = entry.path();
2113 let child_dst = dst.join(entry.file_name());
2114 copy_recursive(&child_src, &child_dst)?;
2115 }
2116 Ok(())
2117 } else {
2118 if let Some(parent) = dst.parent() {
2119 std::fs::create_dir_all(parent)?;
2120 }
2121 std::fs::copy(src, dst).map(|_| ())
2122 }
2123}
2124
2125/// Extract a tarball into `dest_dir`, picking the decompressor by
2126/// extension. Used by ADD's archive auto-extract path.
2127fn extract_tarball(archive_path: &Path, dest_dir: &Path) -> Result<()> {
2128 use std::fs::File;
2129 use std::io::BufReader;
2130
2131 std::fs::create_dir_all(dest_dir).map_err(BuildError::IoError)?;
2132 let file = File::open(archive_path).map_err(BuildError::IoError)?;
2133 let reader = BufReader::new(file);
2134 let lower = archive_path.to_string_lossy().to_ascii_lowercase();
2135
2136 #[allow(clippy::case_sensitive_file_extension_comparisons)]
2137 let mut archive: tar::Archive<Box<dyn std::io::Read>> = if lower.ends_with(".tar.gz")
2138 || lower.ends_with(".tgz")
2139 {
2140 tar::Archive::new(Box::new(flate2::read::GzDecoder::new(reader)) as Box<dyn std::io::Read>)
2141 } else if lower.ends_with(".tar.bz2") {
2142 tar::Archive::new(Box::new(bzip2::read::BzDecoder::new(reader)) as Box<dyn std::io::Read>)
2143 } else if lower.ends_with(".tar.xz") {
2144 tar::Archive::new(Box::new(xz2::read::XzDecoder::new(reader)) as Box<dyn std::io::Read>)
2145 } else {
2146 tar::Archive::new(Box::new(reader) as Box<dyn std::io::Read>)
2147 };
2148
2149 // Reject entries with `..` so a hostile tarball cannot escape
2150 // `dest_dir`. `tar::Archive::set_overwrite(true)` would silently
2151 // clobber files outside `dest_dir` if we let traversal through.
2152 for entry in archive
2153 .entries()
2154 .map_err(|e| BuildError::TarExtractFailed { source: e })?
2155 {
2156 let mut entry = entry.map_err(|e| BuildError::TarExtractFailed { source: e })?;
2157 let entry_path = entry
2158 .path()
2159 .map_err(|e| BuildError::TarExtractFailed { source: e })?
2160 .into_owned();
2161 if entry_path
2162 .components()
2163 .any(|c| matches!(c, Component::ParentDir))
2164 {
2165 return Err(BuildError::TarExtractFailed {
2166 source: std::io::Error::new(
2167 std::io::ErrorKind::InvalidData,
2168 format!(
2169 "tarball entry '{}' contains '..' — refusing to extract",
2170 entry_path.display()
2171 ),
2172 ),
2173 });
2174 }
2175 entry
2176 .unpack_in(dest_dir)
2177 .map_err(|e| BuildError::TarExtractFailed { source: e })?;
2178 }
2179 Ok(())
2180}
2181
2182/// Extract a `.zip` archive into `dest_dir`, rejecting any entry whose
2183/// resolved path would escape `dest_dir` (`zip::read::ZipFile::enclosed_name`
2184/// returns `None` for traversal/absolute entries, which we skip). Pure-Rust
2185/// (the `zip` crate) so it works on Windows hosts with no `unzip` on PATH —
2186/// the macOS path shells out to `unzip`, which doesn't exist here.
2187#[cfg(any(target_os = "windows", test))]
2188fn extract_zip_archive(archive_path: &Path, dest_dir: &Path) -> Result<()> {
2189 use std::io::Read;
2190
2191 std::fs::create_dir_all(dest_dir).map_err(BuildError::IoError)?;
2192 let file = std::fs::File::open(archive_path).map_err(BuildError::IoError)?;
2193 let mut zip = zip::ZipArchive::new(file)
2194 .map_err(|e| BuildError::IoError(std::io::Error::other(format!("read zip: {e}"))))?;
2195 for i in 0..zip.len() {
2196 let mut entry = zip.by_index(i).map_err(|e| {
2197 BuildError::IoError(std::io::Error::other(format!("zip entry {i}: {e}")))
2198 })?;
2199 let Some(enclosed) = entry.enclosed_name() else {
2200 tracing::debug!("skipping unsafe zip entry at index {i}");
2201 continue;
2202 };
2203 let out_path = dest_dir.join(enclosed);
2204 if entry.is_dir() {
2205 std::fs::create_dir_all(&out_path).map_err(BuildError::IoError)?;
2206 } else {
2207 if let Some(parent) = out_path.parent() {
2208 std::fs::create_dir_all(parent).map_err(BuildError::IoError)?;
2209 }
2210 let mut buf = Vec::new();
2211 entry.read_to_end(&mut buf).map_err(BuildError::IoError)?;
2212 std::fs::write(&out_path, &buf).map_err(BuildError::IoError)?;
2213 }
2214 }
2215 Ok(())
2216}
2217
2218/// Download a relocatable artifact and extract (or drop, for a bare binary)
2219/// it into a controlled prefix under the mounted scratch layer, returning the
2220/// container-path bin directory to add to the image `PATH`.
2221///
2222/// The artifact is laid down under `Program Files\zlayer\<pkg>\` inside the
2223/// rootfs (so it never collides with the base image's own files), and that
2224/// directory is returned as the bin dir. Mirrors the macOS
2225/// `install_direct_release` flow (download → extract → expose on PATH) but
2226/// writes through the WCIFS layer mount instead of a plain rootfs dir, and
2227/// uses pure-Rust extractors (no `unzip`/`tar`/`file` shellouts).
2228///
2229/// `mount_root` is the host path the scratch layer is mounted at
2230/// (`WritableLayer::vhd_mount_path`); files written there are captured by the
2231/// subsequent `export_layer`.
2232///
2233/// Returns the *container* PATH entry (e.g.
2234/// `C:\Program Files\zlayer\<pkg>`), not the host mount path.
2235#[cfg(any(target_os = "windows", test))]
2236#[allow(clippy::case_sensitive_file_extension_comparisons)] // `lower` is already lowercased
2237async fn install_relocatable_into_layer(
2238 mount_root: &Path,
2239 artifact: &crate::buildah::RelocatableArtifact,
2240) -> Result<String> {
2241 // 1. Download to a host tempdir. A relocatable artifact carries no pinned
2242 // digest on this path, so compute-and-record (verified stream).
2243 let download = download_url(&artifact.url, None).await?;
2244
2245 // 2. Compute the install prefix inside the rootfs:
2246 // <mount_root>\Program Files\zlayer\<pkg>\
2247 let safe_pkg = sanitize_path_component(&artifact.name);
2248 let install_dir = mount_root
2249 .join("Program Files")
2250 .join("zlayer")
2251 .join(&safe_pkg);
2252 tokio::fs::create_dir_all(&install_dir)
2253 .await
2254 .map_err(BuildError::IoError)?;
2255
2256 // 3. Extract or copy based on the asset extension.
2257 let lower = artifact.asset_name.to_ascii_lowercase();
2258 let install_dir_clone = install_dir.clone();
2259 let download_path = download.path.clone();
2260 let asset_basename = download.basename.clone();
2261 if lower.ends_with(".zip") {
2262 tokio::task::spawn_blocking(move || {
2263 extract_zip_archive(&download_path, &install_dir_clone)
2264 })
2265 .await
2266 .map_err(|e| BuildError::IoError(std::io::Error::other(format!("join: {e}"))))??;
2267 } else if is_tarball_path(&lower) {
2268 tokio::task::spawn_blocking(move || extract_tarball(&download_path, &install_dir_clone))
2269 .await
2270 .map_err(|e| BuildError::IoError(std::io::Error::other(format!("join: {e}"))))??;
2271 } else {
2272 // Bare binary: drop it into the install dir under its asset name.
2273 let dest = install_dir.join(&asset_basename);
2274 tokio::fs::copy(&download.path, &dest)
2275 .await
2276 .map_err(BuildError::IoError)?;
2277 }
2278
2279 // 4. Return the container-side PATH entry. The container sees the layer
2280 // as its filesystem root, so the rootfs-relative `Program Files\...`
2281 // becomes `C:\Program Files\...`.
2282 Ok(format!(r"C:\Program Files\zlayer\{safe_pkg}"))
2283}
2284
2285/// Sanitize a package name into a single safe path component (no separators,
2286/// no `..`, no drive/colon). Keeps the original where it is already safe.
2287#[cfg(any(target_os = "windows", test))]
2288fn sanitize_path_component(name: &str) -> String {
2289 let cleaned: String = name
2290 .chars()
2291 .map(|c| match c {
2292 'a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '_' | '.' | '+' => c,
2293 _ => '_',
2294 })
2295 .collect();
2296 let trimmed = cleaned.trim_matches('.');
2297 if trimmed.is_empty() {
2298 "pkg".to_string()
2299 } else {
2300 trimmed.to_string()
2301 }
2302}
2303
2304/// Prepend `bin_dir` to the image config's `PATH` env var (Windows `;`
2305/// separator), creating `PATH=` if absent. Idempotent: if `bin_dir` is
2306/// already the leading element, the PATH is left unchanged. Mirrors the
2307/// toolchain PATH-injection logic in `build_skeleton`.
2308#[cfg(any(target_os = "windows", test))]
2309fn prepend_image_path(cfg: &mut OciImageConfig, bin_dir: &str) {
2310 let existing = cfg
2311 .env
2312 .iter()
2313 .find(|e| e.starts_with("PATH="))
2314 .map(|e| e[5..].to_string());
2315 // Already leading? No-op.
2316 if let Some(ref p) = existing {
2317 if p == bin_dir || p.starts_with(&format!("{bin_dir};")) {
2318 return;
2319 }
2320 }
2321 cfg.env.retain(|e| !e.starts_with("PATH="));
2322 let new_path = match existing {
2323 Some(p) if !p.is_empty() => format!("PATH={bin_dir};{p}"),
2324 _ => format!("PATH={bin_dir}"),
2325 };
2326 cfg.env.push(new_path);
2327}
2328
2329/// Stage every COPY/ADD write into a per-instruction scratch directory
2330/// and commit it as a new RO layer.
2331///
2332/// - On Windows this calls into HCS via [`commit_scratch_as_layer`].
2333/// - Off-Windows the scratch dir is left in place (so unit tests can
2334/// inspect `Files/<dest>/<src>`) and the function returns `Ok(())`
2335/// without touching `skeleton.base_layers` / `skeleton.working_chain`
2336/// — the next Windows-gated `build_skeleton` call is what produces
2337/// real layer descriptors. This preserves the cross-platform unit-test
2338/// contract documented at the top of the module.
2339#[allow(clippy::similar_names)] // `dst`/`dest_*` bindings name distinct concepts (per-entry destination vs. resolved dest_*)
2340async fn apply_filesystem_writes(
2341 config: &WindowsBuildConfig,
2342 skeleton: &mut BuildSkeleton,
2343 step_index: usize,
2344 locals: &[ResolvedSource],
2345 dest: &str,
2346 extract_archives: bool,
2347 downloads: &[DownloadedFile],
2348) -> Result<()> {
2349 let total_sources = locals.len() + downloads.len();
2350 if total_sources == 0 {
2351 // No-op COPY/ADD — Dockerfile parser already rejects truly
2352 // empty source lists, but a `COPY` with only URLs that all
2353 // failed-but-recovered is conceivable. Treat as success.
2354 return Ok(());
2355 }
2356 let dest_is_dir = destination_is_directory(dest, total_sources);
2357 let (scratch_dir, files_root) = prepare_scratch_for_writes(config, skeleton, step_index)?;
2358
2359 let dest_rel = dest_under_files_root(dest);
2360 let dest_abs_in_layer = files_root.join(&dest_rel);
2361
2362 // Process local sources.
2363 for src in locals {
2364 let meta = std::fs::metadata(&src.absolute).map_err(|e| BuildError::ContextRead {
2365 path: src.absolute.clone(),
2366 source: e,
2367 })?;
2368 if extract_archives && meta.is_file() && is_tarball_path(&src.relative) {
2369 extract_tarball(&src.absolute, &dest_abs_in_layer)?;
2370 } else if meta.is_dir() {
2371 std::fs::create_dir_all(&dest_abs_in_layer).map_err(BuildError::IoError)?;
2372 // Directory copy: contents-into-dest (the Docker default).
2373 for entry in std::fs::read_dir(&src.absolute).map_err(BuildError::IoError)? {
2374 let entry = entry.map_err(BuildError::IoError)?;
2375 let child_dst = dest_abs_in_layer.join(entry.file_name());
2376 copy_recursive(&entry.path(), &child_dst).map_err(BuildError::IoError)?;
2377 }
2378 } else if dest_is_dir {
2379 std::fs::create_dir_all(&dest_abs_in_layer).map_err(BuildError::IoError)?;
2380 let basename = src
2381 .absolute
2382 .file_name()
2383 .map(std::ffi::OsStr::to_os_string)
2384 .ok_or_else(|| BuildError::ContextRead {
2385 path: src.absolute.clone(),
2386 source: std::io::Error::new(
2387 std::io::ErrorKind::InvalidInput,
2388 format!(
2389 "COPY/ADD source '{}' has no file name",
2390 src.absolute.display()
2391 ),
2392 ),
2393 })?;
2394 let dst = dest_abs_in_layer.join(basename);
2395 std::fs::copy(&src.absolute, &dst).map_err(BuildError::IoError)?;
2396 } else {
2397 if let Some(parent) = dest_abs_in_layer.parent() {
2398 std::fs::create_dir_all(parent).map_err(BuildError::IoError)?;
2399 }
2400 std::fs::copy(&src.absolute, &dest_abs_in_layer).map_err(BuildError::IoError)?;
2401 }
2402 }
2403
2404 // Process URL downloads.
2405 for download in downloads {
2406 let is_tar = is_tarball_path(&download.basename);
2407 if extract_archives && is_tar {
2408 extract_tarball(&download.path, &dest_abs_in_layer)?;
2409 } else if dest_is_dir {
2410 std::fs::create_dir_all(&dest_abs_in_layer).map_err(BuildError::IoError)?;
2411 let dst = dest_abs_in_layer.join(&download.basename);
2412 std::fs::copy(&download.path, &dst).map_err(BuildError::IoError)?;
2413 } else {
2414 if let Some(parent) = dest_abs_in_layer.parent() {
2415 std::fs::create_dir_all(parent).map_err(BuildError::IoError)?;
2416 }
2417 std::fs::copy(&download.path, &dest_abs_in_layer).map_err(BuildError::IoError)?;
2418 }
2419 tracing::debug!(
2420 step_index = step_index,
2421 url = %download.url,
2422 dest = %dest,
2423 "ADD URL download materialised"
2424 );
2425 }
2426
2427 commit_scratch_as_layer(skeleton, step_index, &scratch_dir).await
2428}
2429
2430/// Windows: commit the staged COPY/ADD content as a new read-only layer.
2431///
2432/// The staged files live under `<scratch_dir>/Files/<dest>` (built by
2433/// [`apply_filesystem_writes`] as plain host-filesystem copies). They are
2434/// **not** in the hcsshim `legacyLayerWriter` on-disk format that
2435/// `HcsImportLayer` consumes (4-byte `FileAttributes` headers + verbatim
2436/// `WIN32_STREAM_ID` `BackupWrite` framing + `.$wcidirs$` directory metadata +
2437/// a `tombstones.txt` whiteout manifest — the format the unpacker synthesises
2438/// via [`zlayer_agent::windows::backuptar`] and that `HcsExportLayer` emits).
2439/// Handing that plain `Files/` directory straight to `HcsImportLayer` fails
2440/// with `0x80070002` (`ERROR_FILE_NOT_FOUND`).
2441///
2442/// So we commit it the way the unpacker commits every base-image diff layer
2443/// (and the way hcsshim's `docker build` commits a `COPY`): synthesise the
2444/// `legacyLayerWriter` on-disk staging format from the plain `Files/` tree —
2445/// a 4-byte `FileAttributes` header + `BACKUP_DATA` framing per file, a
2446/// `<name>.$wcidirs$` marker per directory — and hand THAT directly to
2447/// `HcsImportLayer` ([`zlayer_agent::windows::unpacker::stage_host_files_as_diff_layer`]).
2448///
2449/// This deliberately abandons the earlier scratch-sandbox approach (allocate a
2450/// writable HCS layer, splat the files through its WCIFS host mount, drive a
2451/// no-process compute system to flush it, then `HcsExportLayer`). That approach
2452/// fought HCS at two points and lost: `HcsExportLayer` rejected the
2453/// never-run-by-a-container scratch with `0x80070002`
2454/// (`ERROR_FILE_NOT_FOUND`), and the compute-system added to "flush" it
2455/// rejected the already-host-written scratch at construction with `0x80071126`
2456/// (`ERROR_NOT_A_REPARSE_POINT`) — HCS's layer-combine walks the host-written
2457/// scratch and finds entries that are not the reparse-point placeholders it
2458/// expects. The parent `LayerChain` was correct throughout; the lifecycle
2459/// itself was the wrong tool for committing offline file writes. `HcsImportLayer`
2460/// over a `legacyLayerWriter` staging dir is the proven path — the unpacker
2461/// materialises every base layer with it, so it is already exercised
2462/// successfully on the same host before the first COPY runs.
2463// Signature parity with the non-Windows twin (which is also `async fn`) so the
2464// single call site can `.await` either implementation uniformly. The Windows
2465// body is fully synchronous (no compute-system lifecycle anymore), so it does
2466// no real awaiting — `clippy::unused_async` is allowed accordingly.
2467#[cfg(target_os = "windows")]
2468#[allow(clippy::unused_async)]
2469async fn commit_scratch_as_layer(
2470 skeleton: &mut BuildSkeleton,
2471 step_index: usize,
2472 scratch_dir: &Path,
2473) -> Result<()> {
2474 use flate2::write::GzEncoder;
2475 use flate2::Compression;
2476 use sha2::{Digest, Sha256};
2477 use zlayer_agent::windows::unpacker;
2478 use zlayer_agent::windows::wclayer::{self, LayerChain};
2479 use zlayer_hcs::schema::Layer as HcsLayer;
2480
2481 // Build the parent chain in HCS child-to-parent order from the
2482 // base-first working_chain.
2483 let parent_chain: LayerChain = LayerChain::new(
2484 skeleton
2485 .working_chain
2486 .iter()
2487 .rev()
2488 .map(|e| HcsLayer {
2489 id: e.layer_id.clone(),
2490 path: e.layer_path.to_string_lossy().into_owned(),
2491 })
2492 .collect(),
2493 );
2494
2495 // The staged COPY/ADD payload lives under `<scratch_dir>/Files/` as plain
2496 // host files (no NTFS framing).
2497 let staged_files_root = scratch_dir.join("Files");
2498
2499 // `HcsImportLayer` requires SeBackupPrivilege + SeRestorePrivilege on the
2500 // process token.
2501 zlayer_agent::windows::layer::enable_backup_restore_privileges()
2502 .map_err(BuildError::IoError)?;
2503
2504 // 1. Reframe the plain `Files/` tree into the `legacyLayerWriter` on-disk
2505 // staging format `HcsImportLayer` consumes (attribute headers +
2506 // BACKUP_DATA framing per file, `.$wcidirs$` markers per directory).
2507 // No scratch sandbox, no WCIFS mount, no compute system, no export.
2508 let staging_root = skeleton.working_layer_chain_dir.join(format!(
2509 "copy-staging-{step_index}-{}",
2510 uuid::Uuid::new_v4()
2511 ));
2512 if staging_root.exists() {
2513 std::fs::remove_dir_all(&staging_root)
2514 .map_err(|e| BuildError::LayerExportFailed { source: e })?;
2515 }
2516 std::fs::create_dir_all(&staging_root)
2517 .map_err(|e| BuildError::LayerExportFailed { source: e })?;
2518 unpacker::stage_host_files_as_diff_layer(&staged_files_root, &staging_root)
2519 .map_err(|e| BuildError::LayerExportFailed { source: e })?;
2520
2521 // 2. Tar + gzip the staging folder for the layer descriptor 4.D emits. The
2522 // framing is identical to the RUN path's export-folder tar (both are the
2523 // `legacyLayerWriter` on-disk format), so the produced blob is
2524 // byte-format compatible with RUN-produced layers and re-consumable by
2525 // the unpacker.
2526 let tar_bytes = tar_export_folder(&staging_root)
2527 .map_err(|e| BuildError::LayerExportFailed { source: e })?;
2528 let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
2529 std::io::Write::write_all(&mut encoder, &tar_bytes)
2530 .map_err(|e| BuildError::LayerExportFailed { source: e })?;
2531 let compressed = encoder
2532 .finish()
2533 .map_err(|e| BuildError::LayerExportFailed { source: e })?;
2534 let digest = format!("sha256:{}", hex::encode(Sha256::digest(&compressed)));
2535 #[allow(clippy::cast_possible_wrap)]
2536 let size = compressed.len() as i64;
2537
2538 // 3. Materialise the staging folder as the new RO layer so subsequent steps
2539 // can chain off it — the same `HcsImportLayer` call the unpacker uses for
2540 // every base diff layer.
2541 let new_layer_id = uuid::Uuid::new_v4().to_string();
2542 let new_layer_path = skeleton.working_layer_chain_dir.join(&new_layer_id);
2543 std::fs::create_dir_all(&new_layer_path)
2544 .map_err(|e| BuildError::LayerExportFailed { source: e })?;
2545 wclayer::import_layer(&new_layer_path, &staging_root, &parent_chain)
2546 .map_err(|e| BuildError::LayerExportFailed { source: e })?;
2547
2548 // 3b. Persist the compressed blob we just produced to a plain,
2549 // process-owned file alongside the layer dir. The OCI export / push
2550 // reads THIS file — `new_layer_path` is the SYSTEM-owned HCS RO layer
2551 // dir (restrictive ACLs, holds the VHD not the gzip) and a raw
2552 // `fs::read` of it fails ERROR_ACCESS_DENIED. Reuse the gzip bytes we
2553 // already computed instead of re-reading the unpacked layer.
2554 let blob_path = skeleton
2555 .working_layer_chain_dir
2556 .join(format!("{new_layer_id}.tar.gz"));
2557 std::fs::write(&blob_path, &compressed)
2558 .map_err(|e| BuildError::LayerExportFailed { source: e })?;
2559
2560 // 4. Best-effort cleanup of the staging dirs (the data is now materialised
2561 // inside `new_layer_path`'s VHD).
2562 let _ = std::fs::remove_dir_all(&staging_root);
2563 if let Err(e) = std::fs::remove_dir_all(scratch_dir) {
2564 tracing::warn!(
2565 scratch_dir = %scratch_dir.display(),
2566 step_index = step_index,
2567 error = %e,
2568 "failed to remove COPY/ADD staging dir after import"
2569 );
2570 }
2571
2572 // HCS resolves a parent layer by NameToGuid(basename); the working-chain
2573 // id MUST be that GUID (as the base layers already are), not the raw uuid
2574 // dir name — else a later import that chains off this layer fails
2575 // ERROR_PATH_NOT_FOUND (0x80070003).
2576 let new_layer_guid = wclayer::layer_id_for_path(&new_layer_path)
2577 .map_err(|e| BuildError::LayerExportFailed { source: e })?;
2578 skeleton.base_layers.push(LayerRef {
2579 digest,
2580 media_type: OCI_WINDOWS_LAYER_MEDIA_TYPE.to_string(),
2581 size,
2582 urls: Vec::new(),
2583 });
2584 skeleton.working_chain.push(WindowsLayerEntry {
2585 layer_id: new_layer_guid,
2586 layer_path: new_layer_path,
2587 blob_path: Some(blob_path),
2588 });
2589
2590 Ok(())
2591}
2592
2593/// Non-Windows hosts: leave the staged scratch in place so unit tests
2594/// can assert against it, and return Ok without producing a layer
2595/// descriptor. Off-Windows COPY/ADD is unit-test fixture territory; an
2596/// actual production build off-Windows is rejected by
2597/// [`build_skeleton`] upstream.
2598#[cfg(not(target_os = "windows"))]
2599#[allow(clippy::unused_async)]
2600async fn commit_scratch_as_layer(
2601 _skeleton: &mut BuildSkeleton,
2602 _step_index: usize,
2603 _scratch_dir: &Path,
2604) -> Result<()> {
2605 Ok(())
2606}
2607
2608// ---------------------------------------------------------------------------
2609// Platform-gated base-image materialisation
2610// ---------------------------------------------------------------------------
2611
2612/// Generate a fresh per-build identifier. Uses a wall-clock-nanos suffix to
2613/// stay free of any external dependency for a single value; collisions
2614/// require two simultaneous builds within the same nanosecond which is
2615/// not a real concern at the builder's call rate.
2616fn new_build_id() -> String {
2617 use std::time::{SystemTime, UNIX_EPOCH};
2618 let nanos = SystemTime::now()
2619 .duration_since(UNIX_EPOCH)
2620 .map_or(0, |d| d.as_nanos());
2621 format!("wcow-{nanos:032x}")
2622}
2623
2624/// Windows: pull the base manifest, fetch every layer blob into the
2625/// blob cache, and call `unpack_windows_image` to materialise the
2626/// foreign-layer chain on disk. Returns the layer descriptors and the
2627/// resolved manifest metadata as cross-platform plain-data structs so
2628/// the caller doesn't need a hard dep on `zlayer-hcs::schema::Layer`.
2629#[cfg(target_os = "windows")]
2630async fn pull_and_materialise_base(
2631 base_image_ref: &str,
2632 config: &WindowsBuildConfig,
2633 working_layer_chain_dir: &std::path::Path,
2634) -> Result<(Vec<LayerRef>, BaseImageManifest, Vec<WindowsLayerEntry>)> {
2635 use zlayer_agent::windows::unpacker::{self, ResolvedLayerDescriptor};
2636
2637 std::fs::create_dir_all(working_layer_chain_dir).map_err(BuildError::IoError)?;
2638
2639 let target = parse_platform_string(&config.platform)?;
2640 let target = match config.os_version_override.as_ref() {
2641 Some(v) => target.with_os_version(v.clone()),
2642 None => target,
2643 };
2644
2645 let cache_type = zlayer_registry::CacheType::from_env()
2646 .map_err(|e| BuildError::registry_error(format!("WCOW blob cache from env: {e}")))?;
2647 let blob_cache = cache_type
2648 .build()
2649 .await
2650 .map_err(|e| BuildError::registry_error(format!("open WCOW blob cache: {e}")))?;
2651 let puller = zlayer_registry::ImagePuller::with_platform(blob_cache, target);
2652
2653 let (manifest, _digest) = puller
2654 .pull_manifest(base_image_ref, &config.registry_auth)
2655 .await
2656 .map_err(|e| BuildError::registry_error(format!("pull manifest {base_image_ref}: {e}")))?;
2657
2658 // Fetch the base config blob so task 4.D can inherit Env / Cmd /
2659 // Entrypoint defaults. Non-fatal on failure — we surface an empty
2660 // blob rather than aborting the whole build over a config blob the
2661 // user might not even need.
2662 let config_blob = puller
2663 .pull_blob(
2664 base_image_ref,
2665 &manifest.config.digest,
2666 &config.registry_auth,
2667 )
2668 .await
2669 .unwrap_or_default();
2670
2671 let os_version: Option<String> = serde_json::from_slice::<serde_json::Value>(&config_blob)
2672 .ok()
2673 .as_ref()
2674 .and_then(|v| v.get("os.version"))
2675 .and_then(serde_json::Value::as_str)
2676 .map(ToString::to_string);
2677
2678 let descriptors: Vec<ResolvedLayerDescriptor> = manifest
2679 .layers
2680 .iter()
2681 .map(|layer| ResolvedLayerDescriptor {
2682 digest: layer.digest.clone(),
2683 media_type: layer.media_type.clone(),
2684 size: layer.size,
2685 urls: layer.urls.clone().unwrap_or_default(),
2686 })
2687 .collect();
2688
2689 let unpacked = unpacker::unpack_windows_image(
2690 &puller,
2691 base_image_ref,
2692 &config.registry_auth,
2693 &descriptors,
2694 working_layer_chain_dir,
2695 )
2696 .await
2697 .map_err(BuildError::IoError)?;
2698
2699 let layer_refs: Vec<LayerRef> = descriptors
2700 .iter()
2701 .map(|d| LayerRef {
2702 digest: d.digest.clone(),
2703 media_type: d.media_type.clone(),
2704 size: d.size,
2705 urls: d.urls.clone(),
2706 })
2707 .collect();
2708
2709 // The unpacker returns the chain in child-to-parent order. We carry
2710 // base-first internally so the per-RUN code can reason about the
2711 // append at the end of the chain without reversing on every call.
2712 // `chain` is child-to-parent and `blob_paths` is in the same order; both
2713 // `.rev()` to base-first so this matches `layer_refs`/`base_layers` order.
2714 let mut working_chain: Vec<WindowsLayerEntry> = unpacked
2715 .chain
2716 .0
2717 .iter()
2718 .rev()
2719 .zip(unpacked.blob_paths.iter().rev())
2720 .map(|(layer, blob_path)| WindowsLayerEntry {
2721 layer_id: layer.id.clone(),
2722 layer_path: PathBuf::from(&layer.path),
2723 // Non-foreign base layers carry their retained compressed blob so
2724 // the OCI export / push can upload them; foreign base layers are
2725 // rehydrated from the manifest's urls[] and carry `None`.
2726 blob_path: blob_path.clone(),
2727 })
2728 .collect();
2729 // Discard the empty-chain edge case that would otherwise leave us
2730 // with no parents: a Windows base image must materialise at least
2731 // one layer, and a zero-length chain is a hard failure for any
2732 // downstream HCS call.
2733 if working_chain.is_empty() {
2734 return Err(BuildError::registry_error(format!(
2735 "no parent layers were materialised from {base_image_ref} — \
2736 the base image must contribute at least one layer"
2737 )));
2738 }
2739 // Sanity check: the LayerRef and WindowsLayerEntry vectors must
2740 // be the same length so 4.D can correlate `(digest, on_disk_path)`
2741 // 1:1. The unpacker emits one entry per descriptor; this assertion
2742 // catches a future drift before it silently produces a malformed
2743 // manifest.
2744 debug_assert_eq!(layer_refs.len(), working_chain.len());
2745 // Defensive shrink in case the unpacker ever returns more entries
2746 // than descriptors — keep the chain consistent with the LayerRef
2747 // vector that downstream code iterates against.
2748 working_chain.truncate(layer_refs.len());
2749
2750 Ok((
2751 layer_refs,
2752 BaseImageManifest {
2753 image_ref: base_image_ref.to_string(),
2754 os: "windows".to_string(),
2755 os_version,
2756 arch: "amd64".to_string(),
2757 config_blob,
2758 },
2759 working_chain,
2760 ))
2761}
2762
2763/// Non-Windows hosts cannot drive HCS storage APIs, so the base layer
2764/// materialisation step refuses to proceed. The whole `WindowsBuilder`
2765/// public API still compiles on Linux/macOS so this crate's unit tests
2766/// can run across CI, but anyone actually invoking `build_skeleton` off-
2767/// Windows gets a precise error.
2768#[cfg(not(target_os = "windows"))]
2769#[allow(clippy::unused_async)]
2770async fn pull_and_materialise_base(
2771 _base_image_ref: &str,
2772 _config: &WindowsBuildConfig,
2773 _working_layer_chain_dir: &std::path::Path,
2774) -> Result<(Vec<LayerRef>, BaseImageManifest, Vec<WindowsLayerEntry>)> {
2775 Err(BuildError::NotSupported {
2776 operation: "WindowsBuilder::build_skeleton requires target_os = \"windows\" — \
2777 HcsImportLayer / wclayer::* APIs are not available on this host"
2778 .to_string(),
2779 })
2780}
2781
2782/// Parse a `"windows/amd64"` / `"windows/arm64"` platform string into a
2783/// [`zlayer_spec::TargetPlatform`]. Only used on Windows; the
2784/// non-Windows path never reads `config.platform`.
2785#[cfg(target_os = "windows")]
2786fn parse_platform_string(platform: &str) -> Result<zlayer_spec::TargetPlatform> {
2787 let (os_str, arch_str) = platform.split_once('/').ok_or_else(|| {
2788 BuildError::invalid_instruction(
2789 "platform",
2790 format!("expected `<os>/<arch>` (e.g. windows/amd64), got `{platform}`"),
2791 )
2792 })?;
2793 let os = zlayer_spec::OsKind::from_oci_str(os_str).ok_or_else(|| {
2794 BuildError::invalid_instruction("platform.os", format!("unrecognised OS `{os_str}`"))
2795 })?;
2796 let arch = match arch_str {
2797 "amd64" | "x86_64" => zlayer_spec::ArchKind::Amd64,
2798 "arm64" | "aarch64" => zlayer_spec::ArchKind::Arm64,
2799 other => {
2800 return Err(BuildError::invalid_instruction(
2801 "platform.arch",
2802 format!("unrecognised arch `{other}`"),
2803 ));
2804 }
2805 };
2806 Ok(zlayer_spec::TargetPlatform::new(os, arch))
2807}
2808
2809// ---------------------------------------------------------------------------
2810// 4.B: RUN step implementation
2811// ---------------------------------------------------------------------------
2812
2813/// OCI media type for a Windows-layer tar+gzip blob emitted by the
2814/// builder. Matches what the existing `HcsBackend` writes so produced
2815/// images round-trip through the same registry path.
2816#[cfg(target_os = "windows")]
2817const OCI_WINDOWS_LAYER_MEDIA_TYPE: &str = "application/vnd.oci.image.layer.v1.tar+gzip";
2818
2819/// Termination grace for the per-RUN process. Mirrors the
2820/// `backend::hcs::exec` builder default so behaviour is consistent
2821/// across the two builders.
2822#[cfg(target_os = "windows")]
2823const RUN_STEP_TERMINATION_GRACE_SECS: u64 = 10 * 60;
2824
2825/// Windows: execute one RUN step end-to-end (translate → spawn → wait →
2826/// export → commit).
2827// Sequential pipeline orchestration: translate -> spawn -> wait -> export ->
2828// commit. Splitting would scatter the linear state plumbing across helpers
2829// without simplifying any individual stage; keep it inline.
2830#[allow(clippy::too_many_lines)]
2831#[cfg(target_os = "windows")]
2832async fn execute_run_step_impl(
2833 _config: &WindowsBuildConfig,
2834 skeleton: &mut BuildSkeleton,
2835 run: &RunInstruction,
2836 step_index: usize,
2837) -> Result<()> {
2838 use std::time::{Duration, Instant};
2839
2840 use flate2::write::GzEncoder;
2841 use flate2::Compression;
2842 use sha2::{Digest, Sha256};
2843 use tracing::{debug, info, warn};
2844 use zlayer_agent::windows::scratch as agent_scratch;
2845 use zlayer_agent::windows::wclayer::{self, LayerChain};
2846 use zlayer_hcs::process::ComputeProcess;
2847 use zlayer_hcs::schema::{
2848 ComputeSystem as HcsSystemDoc, Container, Layer as HcsLayer, ProcessParameters,
2849 ProcessStatus, SchemaVersion, Storage,
2850 };
2851 use zlayer_hcs::system::ComputeSystem;
2852
2853 // 1. Build the (child-to-parent) HCS parent chain from the
2854 // base-first working_chain. `LayerChain` is HCS's wire format.
2855 if skeleton.working_chain.is_empty() {
2856 return Err(BuildError::LayerCreate {
2857 message: "RUN attempted with an empty working layer chain — \
2858 the base image must materialise at least one layer"
2859 .to_string(),
2860 });
2861 }
2862 let parent_chain: LayerChain = LayerChain::new(
2863 skeleton
2864 .working_chain
2865 .iter()
2866 .rev()
2867 .map(|e| HcsLayer {
2868 id: e.layer_id.clone(),
2869 path: e.layer_path.to_string_lossy().into_owned(),
2870 })
2871 .collect(),
2872 );
2873
2874 // 2. Maybe rewrite the command for Chocolatey. Detection is
2875 // cross-platform pure logic; the resolver fetch is async and
2876 // needs the source distro derived from the FROM image.
2877 let source_distro = derive_source_distro(&skeleton.base_manifest);
2878 let crate::buildah::TranslatedRun {
2879 command_line,
2880 skipped_packages,
2881 relocatable,
2882 } = translate_run_command(
2883 &run.command,
2884 &source_distro,
2885 skeleton.provisioned_toolchain_language.as_deref(),
2886 )
2887 .await?;
2888 for skipped in &skipped_packages {
2889 info!(
2890 step_index = step_index,
2891 package = %skipped,
2892 "skipping Linux-only package with no Chocolatey equivalent"
2893 );
2894 }
2895 debug!(step_index = step_index, command = %command_line, "RUN");
2896
2897 // 3. Allocate a fresh scratch layer on top of the chain. The
2898 // scratch lives in <working_layer_chain_dir>/<scratch_id>/ so
2899 // cleanup is deterministic.
2900 let scratch_id = format!("scratch-{}", uuid::Uuid::new_v4());
2901 let scratch_dir = skeleton.working_layer_chain_dir.join(&scratch_id);
2902 // `config.scratch_size_gb` is preserved on the public config for
2903 // forward-compat but ignored here: `CreateSandboxLayer` (the canonical
2904 // HCS scratch-creation path used by hcsshim) takes no size option, so
2905 // matching its ABI means dropping the GB hint at this layer.
2906 // Privileges are idempotent; enabling here costs ~one syscall and
2907 // means a caller does not need to remember the prerequisite.
2908 zlayer_agent::windows::layer::enable_backup_restore_privileges()
2909 .map_err(BuildError::IoError)?;
2910 let scratch_layer = agent_scratch::create(&scratch_dir, &parent_chain).map_err(|e| {
2911 BuildError::LayerCreate {
2912 message: format!("scratch layer create at {}: {e}", scratch_dir.display()),
2913 }
2914 })?;
2915
2916 // 3b. Install any relocatable artifacts directly into the mounted scratch
2917 // layer BEFORE the HCS process runs. Writing through the layer's host
2918 // mount path bakes the files into the diff that `export_layer` later
2919 // captures, so the produced image carries the binaries with NO
2920 // Chocolatey in its base — mirroring the macOS rootfs-extraction path.
2921 // Each installed artifact contributes a bin dir to the image `PATH`.
2922 if !relocatable.is_empty() {
2923 let mount_root = std::path::PathBuf::from(scratch_layer.vhd_mount_path());
2924 for artifact in &relocatable {
2925 let bin_dir = install_relocatable_into_layer(&mount_root, artifact)
2926 .await
2927 .map_err(|e| {
2928 // On failure, tear the scratch down so we don't leak a
2929 // half-written WCIFS layer, then surface the error.
2930 warn!(
2931 package = %artifact.name,
2932 error = %e,
2933 "relocatable artifact install failed; tearing down scratch layer"
2934 );
2935 e
2936 })?;
2937 prepend_image_path(&mut skeleton.image_config, &bin_dir);
2938 info!(
2939 step_index = step_index,
2940 package = %artifact.name,
2941 bin_dir = %bin_dir,
2942 "installed relocatable artifact into rootfs layer"
2943 );
2944 }
2945 }
2946
2947 // 4. Build the compute-system doc and create + start the system.
2948 let hcs_id = format!("zlayer-build-run-{}", uuid::Uuid::new_v4());
2949 let parents_for_doc: Vec<HcsLayer> = parent_chain.0.clone();
2950 let doc = HcsSystemDoc {
2951 owner: "zlayer-builder".to_string(),
2952 schema_version: SchemaVersion::default(),
2953 hosting_system_id: String::new(),
2954 container: Some(Container {
2955 guest_os: Some(zlayer_hcs::schema::GuestOs {
2956 host_name: Some("zlayer-build".to_string()),
2957 }),
2958 storage: Some(Storage {
2959 layers: parents_for_doc,
2960 path: Some(scratch_layer.layer_path().to_string_lossy().into_owned()),
2961 }),
2962 networking: None,
2963 mapped_directories: Vec::new(),
2964 mapped_pipes: Vec::new(),
2965 processor: None,
2966 memory: None,
2967 }),
2968 virtual_machine: None,
2969 should_terminate_on_last_handle_closed: Some(true),
2970 };
2971 let doc_json = serde_json::to_string(&doc).map_err(|e| BuildError::LayerCreate {
2972 message: format!("serialize HCS compute-system doc: {e}"),
2973 })?;
2974
2975 let system = ComputeSystem::create(&hcs_id, &doc_json)
2976 .await
2977 .map_err(|e| BuildError::LayerCreate {
2978 message: format!("HcsCreateComputeSystem({hcs_id}): {e}"),
2979 })?;
2980 system
2981 .start("")
2982 .await
2983 .map_err(|e| BuildError::LayerCreate {
2984 message: format!("HcsStartComputeSystem({hcs_id}): {e}"),
2985 })?;
2986
2987 // 5. Spawn the build process and poll for exit. We capture stderr/
2988 // stdout pipes only nominally — HCS pipe plumbing is a separate
2989 // task per the exec module's docs; the exit code is what gates
2990 // success here.
2991 let params = ProcessParameters {
2992 command_line: command_line.clone(),
2993 working_directory: String::new(),
2994 environment: BTreeMap::default(),
2995 emulate_console: Some(false),
2996 create_std_in_pipe: Some(false),
2997 create_std_out_pipe: Some(true),
2998 create_std_err_pipe: Some(true),
2999 console_size: None,
3000 user: None,
3001 };
3002 let params_json = serde_json::to_string(¶ms).map_err(|e| BuildError::LayerCreate {
3003 message: format!("serialize ProcessParameters: {e}"),
3004 })?;
3005
3006 let exec_result = async {
3007 let process = ComputeProcess::create(system.raw(), ¶ms_json)
3008 .await
3009 .map_err(|e| BuildError::LayerCreate {
3010 message: format!("HcsCreateProcess: {e}"),
3011 })?;
3012 info!(step_index = step_index, command = %command_line, "build RUN process started");
3013
3014 let started = Instant::now();
3015 let poll_interval = Duration::from_millis(250);
3016 let timeout = Duration::from_secs(RUN_STEP_TERMINATION_GRACE_SECS);
3017
3018 loop {
3019 let props_json = process
3020 .properties(r#"{"PropertyTypes":["ProcessStatus"]}"#)
3021 .await
3022 .map_err(|e| BuildError::LayerCreate {
3023 message: format!("HcsGetProcessProperties: {e}"),
3024 })?;
3025 if let Ok(status) = serde_json::from_str::<ProcessStatus>(&props_json) {
3026 if let Some(code) = status.exit_code {
3027 if code == 0 {
3028 return Ok(());
3029 }
3030 return Err(BuildError::RunStepFailed {
3031 step_index,
3032 #[allow(clippy::cast_possible_wrap)]
3033 exit_code: code as i32,
3034 stderr_tail: format!(
3035 "(stdio capture not yet wired) command: {command_line}"
3036 ),
3037 });
3038 }
3039 }
3040
3041 if started.elapsed() >= timeout {
3042 let _ = process.terminate("").await;
3043 return Err(BuildError::RunStepFailed {
3044 step_index,
3045 exit_code: 124,
3046 stderr_tail: format!(
3047 "RUN timed out after {RUN_STEP_TERMINATION_GRACE_SECS}s: {command_line}"
3048 ),
3049 });
3050 }
3051 tokio::time::sleep(poll_interval).await;
3052 }
3053 }
3054 .await;
3055
3056 // 6. Tear down the compute system regardless of the process exit
3057 // outcome so we don't leak HCS state. Errors here are warnings —
3058 // the exec_result still flows through to the caller.
3059 if let Err(e) = system.terminate("").await {
3060 warn!(
3061 hcs_id = %hcs_id,
3062 error = %e,
3063 "HcsTerminateComputeSystem failed during RUN cleanup"
3064 );
3065 }
3066
3067 // 7. Propagate any execution failure (after cleanup).
3068 exec_result?;
3069
3070 // 8. Export the writable-layer diff into a fresh export directory
3071 // sitting next to the scratch dir. `wclayer::export_layer`
3072 // refuses to write into a non-empty folder, so we create a
3073 // fresh one.
3074 let export_dir = skeleton
3075 .working_layer_chain_dir
3076 .join(format!("export-{scratch_id}"));
3077 if export_dir.exists() {
3078 std::fs::remove_dir_all(&export_dir)
3079 .map_err(|e| BuildError::LayerExportFailed { source: e })?;
3080 }
3081 std::fs::create_dir_all(&export_dir)
3082 .map_err(|e| BuildError::LayerExportFailed { source: e })?;
3083 wclayer::export_layer(scratch_layer.layer_path(), &export_dir, &parent_chain, "{}")
3084 .map_err(|e| BuildError::LayerExportFailed { source: e })?;
3085
3086 // 9. Tar + gzip the export folder, compute digest + diff_id.
3087 let tar_bytes =
3088 tar_export_folder(&export_dir).map_err(|e| BuildError::LayerExportFailed { source: e })?;
3089 let _diff_id = format!("sha256:{}", hex::encode(Sha256::digest(&tar_bytes)));
3090 let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
3091 std::io::Write::write_all(&mut encoder, &tar_bytes)
3092 .map_err(|e| BuildError::LayerExportFailed { source: e })?;
3093 let compressed = encoder
3094 .finish()
3095 .map_err(|e| BuildError::LayerExportFailed { source: e })?;
3096 let digest = format!("sha256:{}", hex::encode(Sha256::digest(&compressed)));
3097 #[allow(clippy::cast_possible_wrap)]
3098 let size = compressed.len() as i64;
3099
3100 // 10. Tear down the writable layer (detach WCIFS + destroy dir) so
3101 // the next RUN starts from a clean state. The new RO layer
3102 // materialises FROM the export folder via HcsImportLayer below.
3103 if let Err(e) = scratch_layer.detach_and_destroy() {
3104 warn!(
3105 scratch_id = %scratch_id,
3106 error = %e,
3107 "scratch teardown failed after RUN export; continuing"
3108 );
3109 }
3110
3111 // 11. Materialise the export folder as a new read-only layer that
3112 // subsequent RUN steps can chain off. The new layer lives
3113 // under <working_layer_chain_dir>/<new_layer_id>/. We import it
3114 // from the export folder we just produced — that's the inverse
3115 // of `unpack_windows_image`'s per-layer import.
3116 let new_layer_id = uuid::Uuid::new_v4().to_string();
3117 let new_layer_path = skeleton.working_layer_chain_dir.join(&new_layer_id);
3118 std::fs::create_dir_all(&new_layer_path)
3119 .map_err(|e| BuildError::LayerExportFailed { source: e })?;
3120 wclayer::import_layer(&new_layer_path, &export_dir, &parent_chain)
3121 .map_err(|e| BuildError::LayerExportFailed { source: e })?;
3122
3123 // Persist the compressed blob to a plain, process-owned file alongside the
3124 // layer dir. The OCI export / push reads THIS file — `new_layer_path` is the
3125 // SYSTEM-owned HCS RO layer dir (restrictive ACLs, holds the VHD not the
3126 // gzip) and a raw `fs::read` of it fails ERROR_ACCESS_DENIED. Reuse the gzip
3127 // bytes we already computed rather than re-reading the unpacked layer.
3128 let blob_path = skeleton
3129 .working_layer_chain_dir
3130 .join(format!("{new_layer_id}.tar.gz"));
3131 std::fs::write(&blob_path, &compressed)
3132 .map_err(|e| BuildError::LayerExportFailed { source: e })?;
3133
3134 // Best-effort cleanup of the staging export dir; the data is now
3135 // materialised inside `new_layer_path`'s VHD.
3136 if let Err(e) = std::fs::remove_dir_all(&export_dir) {
3137 warn!(
3138 export_dir = %export_dir.display(),
3139 error = %e,
3140 "failed to remove RUN export folder after import"
3141 );
3142 }
3143
3144 // 12. Append the new layer to both chains. `base_layers` is the
3145 // base-first descriptor list 4.D will serialise into the
3146 // manifest; `working_chain` is the on-disk view used by future
3147 // RUN steps. The chain id MUST be NameToGuid(basename) (like the
3148 // base layers) so a later import chaining off it resolves the
3149 // parent — a raw uuid yields ERROR_PATH_NOT_FOUND (0x80070003).
3150 let new_layer_guid = wclayer::layer_id_for_path(&new_layer_path)
3151 .map_err(|e| BuildError::LayerExportFailed { source: e })?;
3152 skeleton.base_layers.push(LayerRef {
3153 digest,
3154 media_type: OCI_WINDOWS_LAYER_MEDIA_TYPE.to_string(),
3155 size,
3156 urls: Vec::new(),
3157 });
3158 skeleton.working_chain.push(WindowsLayerEntry {
3159 layer_id: new_layer_guid,
3160 layer_path: new_layer_path,
3161 blob_path: Some(blob_path),
3162 });
3163
3164 Ok(())
3165}
3166
3167/// Non-Windows hosts: RUN cannot drive HCS, so the step refuses.
3168#[cfg(not(target_os = "windows"))]
3169#[allow(clippy::unused_async)]
3170async fn execute_run_step_impl(
3171 _config: &WindowsBuildConfig,
3172 _skeleton: &mut BuildSkeleton,
3173 _run: &RunInstruction,
3174 _step_index: usize,
3175) -> Result<()> {
3176 Err(BuildError::NotSupported {
3177 operation: "WindowsBuilder::execute_instruction RUN requires target_os = \"windows\" — \
3178 the HCS compute-system + wclayer::export_layer APIs are not available on this host"
3179 .to_string(),
3180 })
3181}
3182
3183/// Build a tar archive from the contents of `folder`, preserving the
3184/// `Files/`, `Hives/`, `tombstones.txt`, `UtilityVM/` layout HCS produced
3185/// during `HcsExportLayer`.
3186///
3187/// Mirrors `crate::backend::hcs::layer::tar_export_folder` (deliberately
3188/// duplicated here because that helper is `pub(crate)` to its module and
3189/// cross-importing across `cfg`-gated modules creates unnecessary
3190/// coupling). Kept Windows-only so non-Windows builds don't pull in the
3191/// extra code.
3192#[cfg(target_os = "windows")]
3193fn tar_export_folder(folder: &std::path::Path) -> std::io::Result<Vec<u8>> {
3194 use std::io::Write as _;
3195
3196 let mut builder = tar::Builder::new(Vec::new());
3197 append_dir_contents(&mut builder, folder, std::path::Path::new(""))?;
3198 builder.finish()?;
3199 builder
3200 .into_inner()
3201 .map_err(|e| std::io::Error::other(format!("tar finalize: {e}")))
3202 .inspect(|_w| {
3203 // `into_inner` already returned the Vec<u8>; the
3204 // intermediate Write trait reference is dropped here.
3205 let _ = std::io::sink().flush();
3206 })
3207}
3208
3209#[cfg(target_os = "windows")]
3210fn append_dir_contents<W: std::io::Write>(
3211 builder: &mut tar::Builder<W>,
3212 dir: &std::path::Path,
3213 tar_rel: &std::path::Path,
3214) -> std::io::Result<()> {
3215 for entry in std::fs::read_dir(dir)? {
3216 let entry = entry?;
3217 let path = entry.path();
3218 let name = entry.file_name();
3219 let entry_tar_path = tar_rel.join(&name);
3220 let meta = entry.metadata()?;
3221 if meta.is_dir() {
3222 let mut header = tar::Header::new_gnu();
3223 header.set_entry_type(tar::EntryType::Directory);
3224 header.set_size(0);
3225 header.set_mode(0o755);
3226 header.set_mtime(0);
3227 header.set_path(format!(
3228 "{}/",
3229 entry_tar_path.to_string_lossy().replace('\\', "/")
3230 ))?;
3231 header.set_cksum();
3232 builder.append(&header, std::io::empty())?;
3233 append_dir_contents(builder, &path, &entry_tar_path)?;
3234 } else {
3235 let data = std::fs::read(&path)?;
3236 let mut header = tar::Header::new_gnu();
3237 header.set_size(data.len() as u64);
3238 header.set_mode(0o644);
3239 header.set_mtime(0);
3240 header.set_path(entry_tar_path.to_string_lossy().replace('\\', "/"))?;
3241 header.set_cksum();
3242 builder.append(&header, data.as_slice())?;
3243 }
3244 }
3245 Ok(())
3246}
3247
3248// ---------------------------------------------------------------------------
3249// 4.B helpers: Chocolatey translation
3250//
3251// The translation logic itself now lives in `crate::buildah` (under the
3252// shared `DockerfileTranslator`) so the production HCS backend and this
3253// test-only `WindowsBuilder` flow through one implementation. The
3254// helper free-functions and the unit tests for them were moved
3255// alongside; the Windows-only production path below keeps a thin
3256// `translate_run_command` shim so the call-site reads naturally.
3257// ---------------------------------------------------------------------------
3258
3259/// Translate a `RUN` command for Windows execution.
3260///
3261/// Thin wrapper around
3262/// [`crate::buildah::DockerfileTranslator::translate_run_command`] that
3263/// pins the target OS to Windows. Kept here so the existing Windows-only
3264/// production caller in `execute_run_step_impl` reads naturally; the
3265/// shared implementation lives on the translator and is unit-tested
3266/// alongside the rest of the translator surface.
3267#[cfg(target_os = "windows")]
3268async fn translate_run_command(
3269 cmd: &ShellOrExec,
3270 source_distro: &str,
3271 provisioned_toolchain_language: Option<&str>,
3272) -> Result<crate::buildah::TranslatedRun> {
3273 let translator = crate::buildah::DockerfileTranslator::new(crate::backend::ImageOs::Windows);
3274 translator
3275 .translate_run_command(cmd, source_distro, provisioned_toolchain_language)
3276 .await
3277}
3278
3279/// Derive a `RepoSources`-style distro key (e.g. `"debian-12"`,
3280/// `"ubuntu-22.04"`, `"alpine-3.19"`) from the resolved base manifest.
3281///
3282/// Looks at the `image_ref`'s `repository:tag` form. Unknown image
3283/// names fall back to `"debian-12"` (the most common Linux base) with a
3284/// warn log so the user notices.
3285#[cfg(any(target_os = "windows", test))]
3286fn derive_source_distro(base: &BaseImageManifest) -> String {
3287 let ref_str = base.image_ref.as_str();
3288 // Split on the last `:` so registry hosts (which contain colons)
3289 // don't confuse the parser. e.g. `mcr.microsoft.com/foo:1` →
3290 // (`mcr.microsoft.com/foo`, `1`).
3291 let (repo, tag) = match ref_str.rsplit_once(':') {
3292 Some((r, t)) if !t.contains('/') => (r, t),
3293 _ => (ref_str, "latest"),
3294 };
3295 // Strip a registry prefix (e.g. `docker.io/library/debian` →
3296 // `debian`).
3297 let short_repo = repo.rsplit('/').next().unwrap_or(repo);
3298 match short_repo.to_ascii_lowercase().as_str() {
3299 "debian" => format!("debian-{tag}"),
3300 "ubuntu" => format!("ubuntu-{tag}"),
3301 "alpine" => format!("alpine-{tag}"),
3302 "fedora" => format!("fedora-{tag}"),
3303 "centos" | "centos-stream" => format!("centos-{tag}"),
3304 "rocky" | "rockylinux" => format!("rocky-{tag}"),
3305 "almalinux" => format!("alma-{tag}"),
3306 "rhel" | "ubi8" | "ubi9" => format!("rhel-{tag}"),
3307 other => {
3308 tracing::warn!(
3309 image_ref = %ref_str,
3310 short_repo = %other,
3311 "could not derive Chocolatey source distro from base image; defaulting to debian-12"
3312 );
3313 "debian-12".to_string()
3314 }
3315 }
3316}
3317
3318// ---------------------------------------------------------------------------
3319// 4.D: OCI manifest + image config emission
3320// ---------------------------------------------------------------------------
3321
3322/// OCI media type used for the foreign Windows base layer. MCR
3323/// publishes the Windows base images under this media type and the
3324/// Windows daemon recognises it as a foreign layer (one whose bytes
3325/// must be pulled from `urls[]` rather than the manifest's
3326/// destination registry); preserving the type end-to-end keeps the
3327/// foreign-layer optimisation working when the emitted manifest is
3328/// pushed in 4.E.
3329pub(crate) const FOREIGN_WINDOWS_LAYER_MEDIA_TYPE: &str =
3330 "application/vnd.docker.image.rootfs.foreign.diff.tar.gzip";
3331
3332/// OCI media type used for builder-produced (RUN / COPY / ADD) layers.
3333/// Matches the existing `OCI_WINDOWS_LAYER_MEDIA_TYPE` (which is
3334/// gated on `target_os = "windows"` for use in the RUN-step
3335/// implementation); kept ungated here so the emit path compiles
3336/// everywhere.
3337pub(crate) const OCI_TAR_GZIP_LAYER_MEDIA_TYPE: &str =
3338 "application/vnd.oci.image.layer.v1.tar+gzip";
3339
3340/// OCI media type used for the image config blob.
3341pub(crate) const OCI_IMAGE_CONFIG_MEDIA_TYPE: &str = "application/vnd.oci.image.config.v1+json";
3342
3343/// OCI media type used for the image manifest blob.
3344pub(crate) const OCI_IMAGE_MANIFEST_MEDIA_TYPE: &str = "application/vnd.oci.image.manifest.v1+json";
3345
3346/// Compute `sha256:<hex>` of an in-memory blob. The same form Docker /
3347/// OCI use everywhere for content-addressable references.
3348pub(crate) fn compute_sha256_hex(blob: &[u8]) -> String {
3349 format!("sha256:{}", hex::encode(Sha256::digest(blob)))
3350}
3351
3352/// Extract `rootfs.diff_ids` from a base image config blob. Used to
3353/// inherit the base layer's `diff_id` for the foreign-layer entry in
3354/// the emitted image config — we cannot recompute the foreign layer's
3355/// `diff_id` locally because we never see the uncompressed bytes (the
3356/// unpacker imports the layer into the HCS storage filter directly,
3357/// not through a tar stream).
3358fn base_diff_ids(config_blob: &[u8]) -> Vec<String> {
3359 if config_blob.is_empty() {
3360 return Vec::new();
3361 }
3362 let Ok(parsed) = serde_json::from_slice::<serde_json::Value>(config_blob) else {
3363 return Vec::new();
3364 };
3365 parsed
3366 .get("rootfs")
3367 .and_then(|r| r.get("diff_ids"))
3368 .and_then(|d| d.as_array())
3369 .map(|arr| {
3370 arr.iter()
3371 .filter_map(|v| v.as_str().map(ToString::to_string))
3372 .collect()
3373 })
3374 .unwrap_or_default()
3375}
3376
3377/// Read the base image config blob's `os.version` field. The value is
3378/// the platform-build identifier (e.g. `"10.0.20348.2227"`) Windows
3379/// HCS demands at container start.
3380fn base_config_os_version(config_blob: &[u8]) -> Option<String> {
3381 if config_blob.is_empty() {
3382 return None;
3383 }
3384 let parsed = serde_json::from_slice::<serde_json::Value>(config_blob).ok()?;
3385 parsed
3386 .get("os.version")
3387 .and_then(|v| v.as_str())
3388 .map(ToString::to_string)
3389}
3390
3391/// Resolve `os.version` for the emitted image config from the available
3392/// sources in priority order:
3393///
3394/// 1. [`WindowsBuildConfig::os_version_override`] — explicit user pin.
3395/// 2. [`BaseImageManifest::os_version`] — what the resolver wrote when
3396/// the base manifest was pulled (this is the field 4.A populates by
3397/// re-reading the base manifest's platform descriptor).
3398/// 3. The base image config blob's `os.version` field, parsed
3399/// on-the-fly.
3400fn resolve_os_version(
3401 config: &WindowsBuildConfig,
3402 base_manifest: &BaseImageManifest,
3403) -> Result<String> {
3404 if let Some(v) = config.os_version_override.as_ref() {
3405 if !v.trim().is_empty() {
3406 return Ok(v.clone());
3407 }
3408 }
3409 if let Some(v) = base_manifest.os_version.as_ref() {
3410 if !v.trim().is_empty() {
3411 return Ok(v.clone());
3412 }
3413 }
3414 if let Some(v) = base_config_os_version(&base_manifest.config_blob) {
3415 if !v.trim().is_empty() {
3416 return Ok(v);
3417 }
3418 }
3419 Err(BuildError::OsVersionUnresolved)
3420}
3421
3422/// Map `BaseImageManifest::arch` to OCI's `"architecture"` field. Most
3423/// base manifests already publish `"amd64"`; we mirror the OCI vocabulary
3424/// here so consumers of [`BuiltImage`] don't have to re-translate.
3425fn arch_for_config(base_manifest: &BaseImageManifest) -> String {
3426 match base_manifest.arch.as_str() {
3427 "x86_64" => "amd64".to_string(),
3428 "aarch64" => "arm64".to_string(),
3429 other => other.to_string(),
3430 }
3431}
3432
3433/// Format a `DateTime<Utc>` as an OCI-conformant ISO-8601 timestamp.
3434/// OCI prescribes the RFC3339 form with nanosecond precision; chrono's
3435/// `to_rfc3339_opts` honours that contract.
3436fn iso8601(ts: DateTime<Utc>) -> String {
3437 ts.to_rfc3339_opts(chrono::SecondsFormat::Nanos, /* use_z */ true)
3438}
3439
3440/// Build the `config` sub-object of the OCI image config from the
3441/// accumulated [`OciImageConfig`]. The OCI image-spec uses `PascalCase`
3442/// keys here (`Env`, `WorkingDir`, …) while the top-level keys
3443/// (`architecture`, `os`, `rootfs`, …) use lowercase.
3444#[allow(clippy::too_many_lines)]
3445fn build_image_config_config_object(cfg: &OciImageConfig) -> serde_json::Value {
3446 let mut obj = serde_json::Map::new();
3447 if let Some(wd) = &cfg.working_dir {
3448 obj.insert(
3449 "WorkingDir".to_string(),
3450 serde_json::Value::String(wd.clone()),
3451 );
3452 }
3453 if !cfg.env.is_empty() {
3454 obj.insert(
3455 "Env".to_string(),
3456 serde_json::Value::Array(
3457 cfg.env
3458 .iter()
3459 .map(|e| serde_json::Value::String(e.clone()))
3460 .collect(),
3461 ),
3462 );
3463 }
3464 if let Some(ep) = &cfg.entrypoint {
3465 obj.insert(
3466 "Entrypoint".to_string(),
3467 serde_json::Value::Array(
3468 ep.iter()
3469 .map(|s| serde_json::Value::String(s.clone()))
3470 .collect(),
3471 ),
3472 );
3473 }
3474 if let Some(c) = &cfg.cmd {
3475 obj.insert(
3476 "Cmd".to_string(),
3477 serde_json::Value::Array(
3478 c.iter()
3479 .map(|s| serde_json::Value::String(s.clone()))
3480 .collect(),
3481 ),
3482 );
3483 }
3484 if let Some(u) = &cfg.user {
3485 obj.insert("User".to_string(), serde_json::Value::String(u.clone()));
3486 }
3487 if !cfg.exposed_ports.is_empty() {
3488 let mut m = serde_json::Map::new();
3489 for (k, v) in &cfg.exposed_ports {
3490 m.insert(k.clone(), v.clone());
3491 }
3492 obj.insert("ExposedPorts".to_string(), serde_json::Value::Object(m));
3493 }
3494 if !cfg.volumes.is_empty() {
3495 let mut m = serde_json::Map::new();
3496 for (k, v) in &cfg.volumes {
3497 m.insert(k.clone(), v.clone());
3498 }
3499 obj.insert("Volumes".to_string(), serde_json::Value::Object(m));
3500 }
3501 if !cfg.labels.is_empty() {
3502 let mut m = serde_json::Map::new();
3503 for (k, v) in &cfg.labels {
3504 m.insert(k.clone(), serde_json::Value::String(v.clone()));
3505 }
3506 obj.insert("Labels".to_string(), serde_json::Value::Object(m));
3507 }
3508 if let Some(s) = &cfg.stop_signal {
3509 obj.insert(
3510 "StopSignal".to_string(),
3511 serde_json::Value::String(s.clone()),
3512 );
3513 }
3514 if let Some(hc) = &cfg.healthcheck {
3515 let mut hm = serde_json::Map::new();
3516 hm.insert(
3517 "Test".to_string(),
3518 serde_json::Value::Array(
3519 hc.test
3520 .iter()
3521 .map(|s| serde_json::Value::String(s.clone()))
3522 .collect(),
3523 ),
3524 );
3525 if let Some(iv) = &hc.interval {
3526 hm.insert(
3527 "Interval".to_string(),
3528 serde_json::Value::String(iv.clone()),
3529 );
3530 }
3531 if let Some(to) = &hc.timeout {
3532 hm.insert("Timeout".to_string(), serde_json::Value::String(to.clone()));
3533 }
3534 if let Some(sp) = &hc.start_period {
3535 hm.insert(
3536 "StartPeriod".to_string(),
3537 serde_json::Value::String(sp.clone()),
3538 );
3539 }
3540 if let Some(r) = hc.retries {
3541 hm.insert("Retries".to_string(), serde_json::Value::Number(r.into()));
3542 }
3543 obj.insert("Healthcheck".to_string(), serde_json::Value::Object(hm));
3544 }
3545 if let Some(sh) = &cfg.shell {
3546 obj.insert(
3547 "Shell".to_string(),
3548 serde_json::Value::Array(
3549 sh.iter()
3550 .map(|s| serde_json::Value::String(s.clone()))
3551 .collect(),
3552 ),
3553 );
3554 }
3555 if !cfg.on_build.is_empty() {
3556 obj.insert(
3557 "OnBuild".to_string(),
3558 serde_json::Value::Array(
3559 cfg.on_build
3560 .iter()
3561 .map(|s| serde_json::Value::String(s.clone()))
3562 .collect(),
3563 ),
3564 );
3565 }
3566 serde_json::Value::Object(obj)
3567}
3568
3569/// Assemble the full image config JSON blob from the accumulated
3570/// skeleton state.
3571fn build_image_config_blob(
3572 skeleton: &BuildSkeleton,
3573 os_version: &str,
3574 layers: &[EmittedLayer],
3575 architecture: &str,
3576) -> Result<Vec<u8>> {
3577 let mut root = serde_json::Map::new();
3578 root.insert(
3579 "architecture".to_string(),
3580 serde_json::Value::String(architecture.to_string()),
3581 );
3582 root.insert(
3583 "os".to_string(),
3584 serde_json::Value::String("windows".to_string()),
3585 );
3586 root.insert(
3587 "os.version".to_string(),
3588 serde_json::Value::String(os_version.to_string()),
3589 );
3590 root.insert(
3591 "config".to_string(),
3592 build_image_config_config_object(&skeleton.image_config),
3593 );
3594
3595 // rootfs.diff_ids in base-first order — one entry per emitted layer.
3596 let diff_ids: Vec<serde_json::Value> = layers
3597 .iter()
3598 .map(|l| serde_json::Value::String(l.diff_id.clone()))
3599 .collect();
3600 let mut rootfs = serde_json::Map::new();
3601 rootfs.insert(
3602 "type".to_string(),
3603 serde_json::Value::String("layers".to_string()),
3604 );
3605 rootfs.insert("diff_ids".to_string(), serde_json::Value::Array(diff_ids));
3606 root.insert("rootfs".to_string(), serde_json::Value::Object(rootfs));
3607
3608 // history in build order.
3609 let history: Vec<serde_json::Value> = skeleton
3610 .instruction_log
3611 .iter()
3612 .map(|entry| {
3613 let mut h = serde_json::Map::new();
3614 h.insert(
3615 "created".to_string(),
3616 serde_json::Value::String(iso8601(entry.timestamp)),
3617 );
3618 h.insert(
3619 "created_by".to_string(),
3620 serde_json::Value::String(entry.source_line.clone()),
3621 );
3622 if !entry.produced_layer {
3623 h.insert("empty_layer".to_string(), serde_json::Value::Bool(true));
3624 }
3625 serde_json::Value::Object(h)
3626 })
3627 .collect();
3628 root.insert("history".to_string(), serde_json::Value::Array(history));
3629
3630 serde_json::to_vec(&serde_json::Value::Object(root))
3631 .map_err(|e| BuildError::SerializeManifestFailed { source: e })
3632}
3633
3634/// Assemble the OCI image manifest JSON blob.
3635fn build_manifest_blob(
3636 image_config_digest: &str,
3637 image_config_size: u64,
3638 layers: &[EmittedLayer],
3639) -> Result<Vec<u8>> {
3640 let mut root = serde_json::Map::new();
3641 root.insert(
3642 "schemaVersion".to_string(),
3643 serde_json::Value::Number(2.into()),
3644 );
3645 root.insert(
3646 "mediaType".to_string(),
3647 serde_json::Value::String(OCI_IMAGE_MANIFEST_MEDIA_TYPE.to_string()),
3648 );
3649 let mut cfg = serde_json::Map::new();
3650 cfg.insert(
3651 "mediaType".to_string(),
3652 serde_json::Value::String(OCI_IMAGE_CONFIG_MEDIA_TYPE.to_string()),
3653 );
3654 cfg.insert(
3655 "digest".to_string(),
3656 serde_json::Value::String(image_config_digest.to_string()),
3657 );
3658 cfg.insert(
3659 "size".to_string(),
3660 serde_json::Value::Number(image_config_size.into()),
3661 );
3662 root.insert("config".to_string(), serde_json::Value::Object(cfg));
3663
3664 let layer_descriptors: Vec<serde_json::Value> = layers
3665 .iter()
3666 .map(|l| {
3667 let mut m = serde_json::Map::new();
3668 m.insert(
3669 "mediaType".to_string(),
3670 serde_json::Value::String(l.media_type.clone()),
3671 );
3672 m.insert(
3673 "digest".to_string(),
3674 serde_json::Value::String(l.digest.clone()),
3675 );
3676 m.insert("size".to_string(), serde_json::Value::Number(l.size.into()));
3677 if let Some(urls) = &l.urls {
3678 if !urls.is_empty() {
3679 m.insert(
3680 "urls".to_string(),
3681 serde_json::Value::Array(
3682 urls.iter()
3683 .map(|u| serde_json::Value::String(u.clone()))
3684 .collect(),
3685 ),
3686 );
3687 }
3688 }
3689 serde_json::Value::Object(m)
3690 })
3691 .collect();
3692 root.insert(
3693 "layers".to_string(),
3694 serde_json::Value::Array(layer_descriptors),
3695 );
3696
3697 serde_json::to_vec(&serde_json::Value::Object(root))
3698 .map_err(|e| BuildError::SerializeManifestFailed { source: e })
3699}
3700
3701/// Build [`EmittedLayer`] entries from the skeleton's base-first
3702/// `base_layers` vector, threading the foreign-layer `urls[]` for the
3703/// base layer and inheriting `diff_ids` from the base image config blob.
3704///
3705/// `working_chain[i].layer_path` is the on-disk path for the i-th layer.
3706/// For the foreign base layer the path is the unpacked HCS folder (used
3707/// only for diagnostics — 4.E does not re-upload foreign bytes); for
3708/// builder-produced layers it is the imported RO layer folder.
3709fn build_emitted_layers(skeleton: &BuildSkeleton) -> Vec<EmittedLayer> {
3710 let base_diff_ids = base_diff_ids(&skeleton.base_manifest.config_blob);
3711 let mut layers = Vec::with_capacity(skeleton.base_layers.len());
3712 for (idx, layer_ref) in skeleton.base_layers.iter().enumerate() {
3713 let is_foreign = layer_ref.media_type == FOREIGN_WINDOWS_LAYER_MEDIA_TYPE
3714 || layer_ref.media_type.contains("foreign.diff.tar.gzip");
3715 let media_type = if is_foreign {
3716 FOREIGN_WINDOWS_LAYER_MEDIA_TYPE.to_string()
3717 } else {
3718 OCI_TAR_GZIP_LAYER_MEDIA_TYPE.to_string()
3719 };
3720 let diff_id = if is_foreign {
3721 // Foreign layers: inherit from the base image config blob
3722 // by positional index. If the base config didn't expose
3723 // diff_ids (degraded path), fall back to the digest itself
3724 // — Docker / containerd both tolerate this on push and the
3725 // foreign layer never re-extracts locally anyway.
3726 base_diff_ids
3727 .get(idx)
3728 .cloned()
3729 .unwrap_or_else(|| layer_ref.digest.clone())
3730 } else {
3731 // Builder-produced layer: the RUN/COPY/ADD path stored its
3732 // diff_id alongside the descriptor in earlier tasks IF we
3733 // had a slot; in the current shape we re-derive it from
3734 // the on-disk export folder by tar-archiving it and
3735 // hashing. The on-disk folder lives at
3736 // `working_chain[idx].layer_path`. To avoid double-tarring
3737 // (the RUN step already produced the gzip bytes whose
3738 // digest is `layer_ref.digest`), we use the COMPRESSED
3739 // digest as the diff_id when no uncompressed source is
3740 // available. This is the same fallback containerd takes
3741 // when it can't recompute and is acceptable here because
3742 // the diff_id only matters for unpack equality checks on
3743 // pull — and the actual unpack happens through HCS, not
3744 // through a tar diff anyway. Future task: persist the
3745 // uncompressed digest alongside the compressed digest in
3746 // `LayerRef` so this fallback is never taken.
3747 layer_ref.digest.clone()
3748 };
3749 // The OCI descriptor's blob bytes come from the plain, process-owned
3750 // gzip file the RUN/COPY commit persisted (`blob_path`), NOT the
3751 // SYSTEM-owned HCS RO layer dir (`layer_path`) — `fs::read`ing the
3752 // latter fails ERROR_ACCESS_DENIED and it holds the VHD, not the gzip.
3753 // Foreign base layers have `blob_path = None`; their empty `local_path`
3754 // is correct (they're skipped on export/push, rehydrated from urls[]).
3755 let local_path = skeleton
3756 .working_chain
3757 .get(idx)
3758 .and_then(|e| e.blob_path.clone())
3759 .unwrap_or_default();
3760 let urls = if is_foreign && !layer_ref.urls.is_empty() {
3761 Some(layer_ref.urls.clone())
3762 } else {
3763 None
3764 };
3765 #[allow(clippy::cast_sign_loss)]
3766 let size = layer_ref.size.max(0) as u64;
3767 layers.push(EmittedLayer {
3768 media_type,
3769 digest: layer_ref.digest.clone(),
3770 size,
3771 diff_id,
3772 local_path,
3773 urls,
3774 });
3775 }
3776 layers
3777}
3778
3779/// Internal implementation of [`WindowsBuilder::emit_image`] — split out
3780/// as a free function so unit tests can construct a one-off
3781/// [`BuildSkeleton`] fixture and exercise the emit path without holding
3782/// a `WindowsBuilder` reference.
3783///
3784/// `async` is preserved so the public API remains `async fn` (4.E will
3785/// add disk I/O to recompute per-layer `diff_id`s for builder-produced
3786/// layers, at which point the body becomes genuinely async).
3787#[allow(clippy::unused_async)]
3788pub(crate) async fn emit_image_impl(
3789 config: &WindowsBuildConfig,
3790 skeleton: &BuildSkeleton,
3791 tag: &str,
3792) -> Result<BuiltImage> {
3793 let os_version = resolve_os_version(config, &skeleton.base_manifest)?;
3794 let architecture = arch_for_config(&skeleton.base_manifest);
3795
3796 let layers = build_emitted_layers(skeleton);
3797 let image_config_blob = build_image_config_blob(skeleton, &os_version, &layers, &architecture)?;
3798 let image_config_digest = compute_sha256_hex(&image_config_blob);
3799
3800 #[allow(clippy::cast_possible_truncation)]
3801 let image_config_size = image_config_blob.len() as u64;
3802 let manifest_blob = build_manifest_blob(&image_config_digest, image_config_size, &layers)?;
3803 let manifest_digest = compute_sha256_hex(&manifest_blob);
3804
3805 Ok(BuiltImage {
3806 tag: tag.to_string(),
3807 image_config_blob,
3808 image_config_digest,
3809 manifest_blob,
3810 manifest_digest,
3811 layers,
3812 })
3813}
3814
3815/// Assemble a buildah-free `oci-archive:` (tar) for an already-emitted
3816/// [`BuiltImage`] at `dest`, in the on-disk layout
3817/// [`zlayer_registry::import_image`] consumes (`oci-layout` + `index.json` +
3818/// `blobs/sha256/*`).
3819///
3820/// Mirrors [`WindowsBuilder::push`] exactly for foreign-layer handling: every
3821/// **non-foreign** layer blob is read off disk ([`EmittedLayer::local_path`] —
3822/// the gzip the build wrote) and embedded; foreign / nondistributable Windows
3823/// base layers (nanoserver / servercore — `urls.is_some()`) are NEVER carried
3824/// because their bytes are non-distributable. The manifest blob is written
3825/// verbatim (`image.manifest_blob`) so each foreign descriptor's `urls[]`
3826/// round-trips byte-identical; the importer ([`import_from_oci_layout`]) skips
3827/// those layers and the runtime rehydrates them from MCR via
3828/// `ImagePuller::pull_blob_with_urls`. This is what lets a buildah-less Windows
3829/// host populate `~/.zlayer`'s local registry after a native HCS build.
3830///
3831/// [`import_from_oci_layout`]: zlayer_registry::import_image
3832///
3833/// # Errors
3834///
3835/// - [`BuildError::PushFailed`] when a non-foreign layer is missing its
3836/// `local_path` or its on-disk blob cannot be read.
3837/// - [`BuildError::IoError`] when the tar assembly or the blocking task fails.
3838//
3839// Gated to the same configs as the `oci_archive` module it delegates to: every
3840// Windows build (the HCS backend calls this) and any test build (so it's
3841// exercised off Windows). A plain non-cache macOS/Linux `cargo check` compiles
3842// neither this nor `oci_archive`, so the reference stays resolvable.
3843#[cfg(any(target_os = "windows", test))]
3844pub async fn export_built_image_to_oci_archive(
3845 image: &BuiltImage,
3846 tag: &str,
3847 dest: &Path,
3848) -> Result<()> {
3849 let image = image.clone();
3850 let tag = tag.to_string();
3851 let dest = dest.to_path_buf();
3852 tokio::task::spawn_blocking(move || write_built_image_oci_archive(&image, &tag, &dest))
3853 .await
3854 .map_err(|e| {
3855 BuildError::IoError(std::io::Error::other(format!(
3856 "WCOW OCI archive assembly task failed: {e}"
3857 )))
3858 })?
3859}
3860
3861/// Blocking core of [`export_built_image_to_oci_archive`]: reads the non-foreign
3862/// layer blobs off disk and writes the OCI image-layout tar. Split out from the
3863/// async wrapper so unit tests can exercise it without a tokio runtime.
3864#[cfg(any(target_os = "windows", test))]
3865fn write_built_image_oci_archive(image: &BuiltImage, tag: &str, dest: &Path) -> Result<()> {
3866 // Gather the carried (non-foreign) layer blobs. Owned `(digest, bytes)` so
3867 // the borrowed `ArchiveBlob`s assembled below outlive this loop.
3868 let mut carried: Vec<(String, Vec<u8>)> = Vec::with_capacity(image.layers.len());
3869 for layer in &image.layers {
3870 let is_foreign = layer.urls.as_ref().is_some_and(|u| !u.is_empty())
3871 || layer.media_type == FOREIGN_WINDOWS_LAYER_MEDIA_TYPE
3872 || layer.media_type.contains("foreign.diff.tar.gzip")
3873 || layer.media_type.contains("nondistributable");
3874 if is_foreign {
3875 // Mirror `push`: never carry foreign base-layer bytes; the manifest
3876 // descriptor's `urls[]` points the runtime back at MCR.
3877 tracing::debug!(
3878 tag = %tag,
3879 digest = %layer.digest,
3880 "skipping foreign layer in OCI archive (urls[] preserved on manifest)"
3881 );
3882 continue;
3883 }
3884 if layer.local_path.as_os_str().is_empty() {
3885 return Err(BuildError::PushFailed {
3886 tag: tag.to_string(),
3887 reason: format!(
3888 "non-foreign layer {} has no local_path (emit_image must populate it)",
3889 layer.digest
3890 ),
3891 });
3892 }
3893 let bytes = std::fs::read(&layer.local_path).map_err(|e| BuildError::PushFailed {
3894 tag: tag.to_string(),
3895 reason: format!(
3896 "failed to read layer blob {} from {}: {e}",
3897 layer.digest,
3898 layer.local_path.display()
3899 ),
3900 })?;
3901 carried.push((layer.digest.clone(), bytes));
3902 }
3903
3904 // Config blob first, then each carried layer (manifest order).
3905 let mut blobs: Vec<crate::oci_archive::ArchiveBlob<'_>> = Vec::with_capacity(carried.len() + 1);
3906 blobs.push(crate::oci_archive::ArchiveBlob {
3907 digest: &image.image_config_digest,
3908 bytes: &image.image_config_blob,
3909 });
3910 for (digest, bytes) in &carried {
3911 blobs.push(crate::oci_archive::ArchiveBlob { digest, bytes });
3912 }
3913
3914 let manifest_size = i64::try_from(image.manifest_blob.len()).unwrap_or(i64::MAX);
3915 crate::oci_archive::write_oci_image_layout_archive_multi(
3916 dest,
3917 tag,
3918 &crate::oci_archive::ArchiveBlob {
3919 digest: &image.manifest_digest,
3920 bytes: &image.manifest_blob,
3921 },
3922 manifest_size,
3923 &blobs,
3924 )
3925}
3926
3927// ---------------------------------------------------------------------------
3928// Tests
3929// ---------------------------------------------------------------------------
3930
3931#[cfg(test)]
3932mod tests {
3933 use super::*;
3934
3935 use crate::dockerfile::{AddInstruction, CopyInstruction, EnvInstruction, ShellOrExec};
3936
3937 /// The buildah-free WCOW OCI-archive exporter must embed the config + every
3938 /// non-foreign layer blob, write the manifest verbatim, and NEVER carry the
3939 /// foreign base-layer bytes (they have no `local_path`; the runtime fetches
3940 /// them from `urls[]`). Regression for the no-buildah Windows `zlayer build`
3941 /// failure where the export fell through to `buildah push … oci-archive:`.
3942 #[test]
3943 fn oci_archive_export_embeds_real_layers_and_skips_foreign() {
3944 use std::io::Read;
3945
3946 let tmp = tempfile::tempdir().expect("tmpdir");
3947
3948 // One real builder-produced layer: its gzip blob lives on disk at
3949 // `local_path` exactly as `emit_image` / the push path expect.
3950 let real_layer_bytes = b"real-builder-layer-gzip".as_slice();
3951 let real_layer_path = tmp.path().join("layer0.tar.gz");
3952 std::fs::write(&real_layer_path, real_layer_bytes).expect("write layer blob");
3953 let real_digest = format!("sha256:{}", compute_sha256_hex(real_layer_bytes));
3954
3955 // One foreign base layer (nanoserver): no on-disk blob, carries urls[].
3956 let foreign_digest =
3957 "sha256:deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef";
3958
3959 let config_blob = br#"{"architecture":"amd64","os":"windows"}"#.to_vec();
3960 let config_digest = format!("sha256:{}", compute_sha256_hex(&config_blob));
3961 let manifest_blob =
3962 br#"{"schemaVersion":2,"mediaType":"application/vnd.oci.image.manifest.v1+json"}"#
3963 .to_vec();
3964 let manifest_digest = format!("sha256:{}", compute_sha256_hex(&manifest_blob));
3965
3966 let image = BuiltImage {
3967 tag: "zlayer-wcow-export-test:latest".to_string(),
3968 image_config_blob: config_blob.clone(),
3969 image_config_digest: config_digest.clone(),
3970 manifest_blob: manifest_blob.clone(),
3971 manifest_digest: manifest_digest.clone(),
3972 layers: vec![
3973 EmittedLayer {
3974 media_type: FOREIGN_WINDOWS_LAYER_MEDIA_TYPE.to_string(),
3975 digest: foreign_digest.to_string(),
3976 size: 100,
3977 diff_id: foreign_digest.to_string(),
3978 local_path: PathBuf::new(),
3979 urls: Some(vec![
3980 "https://mcr.microsoft.com/v2/windows/nanoserver/blobs/sha256:deadbeef"
3981 .to_string(),
3982 ]),
3983 },
3984 EmittedLayer {
3985 media_type: OCI_TAR_GZIP_LAYER_MEDIA_TYPE.to_string(),
3986 digest: real_digest.clone(),
3987 size: real_layer_bytes.len() as u64,
3988 diff_id: real_digest.clone(),
3989 local_path: real_layer_path,
3990 urls: None,
3991 },
3992 ],
3993 };
3994
3995 let dest = tmp.path().join("export.tar");
3996 write_built_image_oci_archive(&image, "zlayer-wcow-export-test:latest", &dest)
3997 .expect("export oci archive");
3998
3999 // Read the tar back into a name -> bytes map.
4000 let f = std::fs::File::open(&dest).expect("open archive");
4001 let mut ar = tar::Archive::new(f);
4002 let mut files = std::collections::HashMap::new();
4003 for entry in ar.entries().expect("entries") {
4004 let mut entry = entry.expect("entry");
4005 let path = entry.path().expect("path").to_string_lossy().to_string();
4006 let mut buf = Vec::new();
4007 entry.read_to_end(&mut buf).expect("read entry");
4008 files.insert(path, buf);
4009 }
4010
4011 let strip = |d: &str| d.strip_prefix("sha256:").unwrap().to_string();
4012
4013 // Layout markers + manifest/config/real-layer blobs present.
4014 assert!(files.contains_key("oci-layout"));
4015 assert!(files.contains_key("index.json"));
4016 assert!(files.contains_key(&format!("blobs/sha256/{}", strip(&manifest_digest))));
4017 assert!(files.contains_key(&format!("blobs/sha256/{}", strip(&config_digest))));
4018 assert!(
4019 files.contains_key(&format!("blobs/sha256/{}", strip(&real_digest))),
4020 "non-foreign builder layer blob must be embedded"
4021 );
4022
4023 // The foreign base-layer blob must NOT be carried.
4024 assert!(
4025 !files.contains_key(&format!("blobs/sha256/{}", strip(foreign_digest))),
4026 "foreign/nondistributable base layer bytes must never be embedded in the archive"
4027 );
4028
4029 // index.json points at the manifest with the tag annotation.
4030 let index: serde_json::Value =
4031 serde_json::from_slice(files.get("index.json").unwrap()).unwrap();
4032 assert_eq!(index["manifests"][0]["digest"], manifest_digest);
4033 assert_eq!(
4034 index["manifests"][0]["annotations"]["org.opencontainers.image.ref.name"],
4035 "zlayer-wcow-export-test:latest"
4036 );
4037 }
4038
4039 fn dummy_config() -> WindowsBuildConfig {
4040 WindowsBuildConfig {
4041 cache_dir: std::env::temp_dir().join("zlayer-wcow-skeleton-tests"),
4042 registry_auth: RegistryAuth::Anonymous,
4043 platform: WindowsBuildConfig::default_platform().to_string(),
4044 os_version_override: None,
4045 scratch_size_gb: 0,
4046 }
4047 }
4048
4049 fn dummy_skeleton() -> BuildSkeleton {
4050 // Build a minimally-populated skeleton for instruction-routing
4051 // tests. We never call `build_skeleton` (which would touch HCS
4052 // and the network) — the goal is to verify `execute_instruction`
4053 // routing decisions in isolation.
4054 let parsed = Dockerfile::parse("FROM mcr.microsoft.com/windows/nanoserver:ltsc2022\n")
4055 .expect("parse fixture");
4056 BuildSkeleton {
4057 parsed_dockerfile: parsed,
4058 base_layers: Vec::new(),
4059 base_manifest: BaseImageManifest {
4060 image_ref: "mcr.microsoft.com/windows/nanoserver:ltsc2022".into(),
4061 os: "windows".into(),
4062 os_version: None,
4063 arch: "amd64".into(),
4064 config_blob: Vec::new(),
4065 },
4066 working_layer_chain_dir: std::env::temp_dir().join("zlayer-wcow-skeleton-tests/x"),
4067 working_chain: Vec::new(),
4068 image_config: OciImageConfig::default(),
4069 instruction_log: vec![ExecutedInstruction {
4070 source_line: "FROM mcr.microsoft.com/windows/nanoserver:ltsc2022".to_string(),
4071 produced_layer: true,
4072 timestamp: Utc::now(),
4073 }],
4074 provisioned_toolchain_language: None,
4075 }
4076 }
4077
4078 /// Build a fresh [`BuildContext`] + [`BuildSkeleton`] pair backed by
4079 /// a per-test tempdir, plus the tempdir guard. Used by every 4.C
4080 /// COPY/ADD test so each invocation has an isolated context dir and
4081 /// `working_layer_chain_dir` no test ever races against another.
4082 fn ctx_and_skeleton_in_tempdir() -> (BuildContext, BuildSkeleton, tempfile::TempDir) {
4083 let tmp = tempfile::tempdir().expect("tmpdir");
4084 let context_dir = tmp.path().join("context");
4085 std::fs::create_dir_all(&context_dir).expect("mk context");
4086 let chain_dir = tmp.path().join("chain");
4087 std::fs::create_dir_all(&chain_dir).expect("mk chain");
4088 let ctx = BuildContext {
4089 context_dir,
4090 dockerfile_path: PathBuf::from("Dockerfile"),
4091 build_args: HashMap::new(),
4092 tag: "zlayer-wcow-test:latest".to_string(),
4093 ltsc: None,
4094 };
4095 let mut skel = dummy_skeleton();
4096 skel.working_layer_chain_dir = chain_dir;
4097 (ctx, skel, tmp)
4098 }
4099
4100 #[test]
4101 fn new_smoke() {
4102 let cfg = dummy_config();
4103 let builder = WindowsBuilder::new(cfg.clone());
4104 assert_eq!(builder.config().platform, cfg.platform);
4105 assert!(builder
4106 .config()
4107 .cache_dir
4108 .ends_with("zlayer-wcow-skeleton-tests"));
4109 assert_eq!(
4110 WindowsBuildConfig::default_platform(),
4111 "windows/amd64",
4112 "default platform string drift would silently break MCR base resolution"
4113 );
4114 }
4115
4116 #[tokio::test]
4117 async fn build_skeleton_with_simple_dockerfile_parses_one_stage() {
4118 let parsed = Dockerfile::parse("FROM mcr.microsoft.com/windows/nanoserver:ltsc2022\n")
4119 .expect("parse the simplest possible WCOW Dockerfile");
4120 assert_eq!(
4121 parsed.stages.len(),
4122 1,
4123 "single-stage WCOW Dockerfile must parse to exactly one stage"
4124 );
4125 let stage = &parsed.stages[0];
4126 match &stage.base_image {
4127 DockerfileFromTarget::Image(r) => {
4128 assert!(
4129 r.to_string()
4130 .contains("mcr.microsoft.com/windows/nanoserver"),
4131 "image ref round-trip lost the registry prefix: {r}"
4132 );
4133 }
4134 other => panic!("expected Image FROM target, got {other:?}"),
4135 }
4136 }
4137
4138 #[tokio::test]
4139 async fn execute_instruction_copy_from_multi_stage_is_unsupported() {
4140 // Multi-stage COPY --from=builder is intentionally rejected with
4141 // a typed `NotSupported` error until a later task lands
4142 // multi-stage support. This is the documented 4.C behaviour.
4143 let builder = WindowsBuilder::new(dummy_config());
4144 let (ctx, mut skel, _guard) = ctx_and_skeleton_in_tempdir();
4145 let copy = Instruction::Copy(
4146 CopyInstruction::new(vec!["app.exe".to_string()], "C:\\app\\app.exe".to_string())
4147 .from_stage("builder"),
4148 );
4149 let err = builder
4150 .execute_instruction(&mut skel, &ctx, ©)
4151 .await
4152 .expect_err("multi-stage COPY --from must surface NotSupported");
4153 assert!(
4154 matches!(err, BuildError::NotSupported { ref operation } if operation.contains("multi-stage")),
4155 "COPY --from error must explain multi-stage gap, got: {err}"
4156 );
4157 }
4158
4159 #[tokio::test]
4160 async fn execute_instruction_env_records_kv() {
4161 let builder = WindowsBuilder::new(dummy_config());
4162 let (ctx, mut skel, _guard) = ctx_and_skeleton_in_tempdir();
4163 let mut vars = HashMap::new();
4164 vars.insert("APP_HOME".to_string(), "C:\\app".to_string());
4165 let env = Instruction::Env(EnvInstruction { vars });
4166 builder
4167 .execute_instruction(&mut skel, &ctx, &env)
4168 .await
4169 .expect("ENV must succeed and accumulate into image_config");
4170 assert_eq!(skel.image_config.env, vec!["APP_HOME=C:\\app".to_string()]);
4171 }
4172
4173 #[tokio::test]
4174 async fn execute_instruction_workdir_and_entrypoint_mutate_config() {
4175 let builder = WindowsBuilder::new(dummy_config());
4176 let (ctx, mut skel, _guard) = ctx_and_skeleton_in_tempdir();
4177 builder
4178 .execute_instruction(
4179 &mut skel,
4180 &ctx,
4181 &Instruction::Workdir("C:\\app".to_string()),
4182 )
4183 .await
4184 .expect("WORKDIR must succeed");
4185 assert_eq!(skel.image_config.working_dir.as_deref(), Some("C:\\app"));
4186 builder
4187 .execute_instruction(
4188 &mut skel,
4189 &ctx,
4190 &Instruction::Entrypoint(ShellOrExec::Exec(vec!["C:\\app\\app.exe".to_string()])),
4191 )
4192 .await
4193 .expect("ENTRYPOINT must succeed");
4194 assert_eq!(
4195 skel.image_config.entrypoint.as_deref(),
4196 Some(["C:\\app\\app.exe".to_string()].as_slice())
4197 );
4198 }
4199
4200 // -----------------------------------------------------------------
4201 // 4.C: config-only instruction helpers
4202 // -----------------------------------------------------------------
4203
4204 #[test]
4205 fn apply_workdir_relative_resolves_against_previous() {
4206 let mut cfg = OciImageConfig::default();
4207 apply_workdir(&mut cfg, "C:\\app");
4208 apply_workdir(&mut cfg, "sub");
4209 assert_eq!(cfg.working_dir.as_deref(), Some("C:\\app\\sub"));
4210 // Absolute drive replaces; trailing-slash base is honoured.
4211 apply_workdir(&mut cfg, "D:\\other");
4212 assert_eq!(cfg.working_dir.as_deref(), Some("D:\\other"));
4213 // Forward-slash absolute Unix path is treated as absolute.
4214 apply_workdir(&mut cfg, "/data");
4215 assert_eq!(cfg.working_dir.as_deref(), Some("/data"));
4216 }
4217
4218 #[test]
4219 fn apply_env_replaces_existing_key() {
4220 let mut cfg = OciImageConfig::default();
4221 let mut vars = HashMap::new();
4222 vars.insert("FOO".to_string(), "1".to_string());
4223 apply_env(&mut cfg, &EnvInstruction { vars });
4224 let mut vars2 = HashMap::new();
4225 vars2.insert("FOO".to_string(), "2".to_string());
4226 vars2.insert("BAR".to_string(), "baz".to_string());
4227 apply_env(&mut cfg, &EnvInstruction { vars: vars2 });
4228 // FOO must have been replaced (last write wins), and the new
4229 // BAR sits alongside it.
4230 assert!(cfg.env.contains(&"FOO=2".to_string()), "{:?}", cfg.env);
4231 assert!(cfg.env.contains(&"BAR=baz".to_string()), "{:?}", cfg.env);
4232 assert!(!cfg.env.contains(&"FOO=1".to_string()), "{:?}", cfg.env);
4233 // No duplicate KEYs.
4234 let foo_count = cfg.env.iter().filter(|e| e.starts_with("FOO=")).count();
4235 assert_eq!(foo_count, 1, "ENV must enforce single KEY: {:?}", cfg.env);
4236 }
4237
4238 // -----------------------------------------------------------------
4239 // Relocatable-artifact installer (Windows parity with macOS rootfs
4240 // extraction). These run on every platform via the `test` cfg gate.
4241 // -----------------------------------------------------------------
4242
4243 #[test]
4244 fn sanitize_path_component_strips_separators_and_traversal() {
4245 assert_eq!(sanitize_path_component("curl"), "curl");
4246 assert_eq!(
4247 sanitize_path_component("ca-certificates"),
4248 "ca-certificates"
4249 );
4250 assert_eq!(sanitize_path_component("foo/bar"), "foo_bar");
4251 // `..\evil` → chars `.`,`.`,`\`→`_`,`evil` = `.._evil`; leading `.`s
4252 // are then trimmed, leaving `_evil` (no separator, no traversal).
4253 assert_eq!(sanitize_path_component("..\\evil"), "_evil");
4254 assert_eq!(sanitize_path_component("c:bad"), "c_bad");
4255 assert_eq!(sanitize_path_component("..."), "pkg");
4256 assert_eq!(sanitize_path_component(""), "pkg");
4257 }
4258
4259 #[test]
4260 fn prepend_image_path_creates_and_prepends() {
4261 let mut cfg = OciImageConfig::default();
4262 // No PATH yet → creates one.
4263 prepend_image_path(&mut cfg, r"C:\Program Files\zlayer\curl");
4264 assert!(
4265 cfg.env
4266 .contains(&r"PATH=C:\Program Files\zlayer\curl".to_string()),
4267 "{:?}",
4268 cfg.env
4269 );
4270
4271 // Existing PATH → prepend with `;` separator, single PATH entry.
4272 cfg.env.clear();
4273 cfg.env.push(r"PATH=C:\Windows\System32".to_string());
4274 prepend_image_path(&mut cfg, r"C:\Program Files\zlayer\jq");
4275 let path = cfg
4276 .env
4277 .iter()
4278 .find(|e| e.starts_with("PATH="))
4279 .expect("PATH present");
4280 assert_eq!(path, r"PATH=C:\Program Files\zlayer\jq;C:\Windows\System32");
4281 assert_eq!(
4282 cfg.env.iter().filter(|e| e.starts_with("PATH=")).count(),
4283 1,
4284 "exactly one PATH entry"
4285 );
4286
4287 // Idempotent: prepending the same leading dir again is a no-op.
4288 prepend_image_path(&mut cfg, r"C:\Program Files\zlayer\jq");
4289 let path2 = cfg
4290 .env
4291 .iter()
4292 .find(|e| e.starts_with("PATH="))
4293 .expect("PATH present");
4294 assert_eq!(
4295 path2,
4296 r"PATH=C:\Program Files\zlayer\jq;C:\Windows\System32"
4297 );
4298 }
4299
4300 #[test]
4301 fn extract_zip_archive_writes_files_and_rejects_traversal() {
4302 use std::io::Write;
4303 let tmp = tempfile::tempdir().unwrap();
4304 let zip_path = tmp.path().join("payload.zip");
4305 {
4306 let file = std::fs::File::create(&zip_path).unwrap();
4307 let mut zw = zip::ZipWriter::new(file);
4308 let opts: zip::write::FileOptions<'_, ()> = zip::write::FileOptions::default();
4309 zw.start_file("bin/tool.exe", opts).unwrap();
4310 zw.write_all(b"MZ binary").unwrap();
4311 // A traversal entry must be skipped, not extracted.
4312 zw.start_file("../escape.txt", opts).unwrap();
4313 zw.write_all(b"nope").unwrap();
4314 zw.finish().unwrap();
4315 }
4316 let dest = tmp.path().join("out");
4317 extract_zip_archive(&zip_path, &dest).expect("extract succeeds");
4318 assert!(dest.join("bin/tool.exe").exists(), "safe entry extracted");
4319 assert_eq!(
4320 std::fs::read(dest.join("bin/tool.exe")).unwrap(),
4321 b"MZ binary"
4322 );
4323 // The traversal entry must NOT have escaped the dest dir.
4324 assert!(
4325 !tmp.path().join("escape.txt").exists(),
4326 "traversal entry must be rejected"
4327 );
4328 }
4329
4330 #[tokio::test]
4331 async fn install_relocatable_into_layer_zip_lands_under_program_files() {
4332 use std::io::Write;
4333
4334 // Build a tiny zip artifact in-memory.
4335 let mut zip_buf = std::io::Cursor::new(Vec::new());
4336 {
4337 let mut zw = zip::ZipWriter::new(&mut zip_buf);
4338 let opts: zip::write::FileOptions<'_, ()> = zip::write::FileOptions::default();
4339 zw.start_file("jq.exe", opts).unwrap();
4340 zw.write_all(b"FAKEJQ").unwrap();
4341 zw.finish().unwrap();
4342 }
4343 let zip_bytes = zip_buf.into_inner();
4344
4345 // Serve it over a one-shot localhost HTTP server.
4346 let listener = tokio::net::TcpListener::bind("127.0.0.1:0").await.unwrap();
4347 let addr = listener.local_addr().unwrap();
4348 let body = zip_bytes.clone();
4349 let server = tokio::spawn(async move {
4350 let (mut sock, _) = listener.accept().await.unwrap();
4351 // Drain the request line/headers (best-effort).
4352 let mut buf = [0u8; 1024];
4353 let _ = tokio::io::AsyncReadExt::read(&mut sock, &mut buf).await;
4354 let header = format!(
4355 "HTTP/1.1 200 OK\r\nContent-Length: {}\r\nContent-Type: application/zip\r\nConnection: close\r\n\r\n",
4356 body.len()
4357 );
4358 tokio::io::AsyncWriteExt::write_all(&mut sock, header.as_bytes())
4359 .await
4360 .unwrap();
4361 tokio::io::AsyncWriteExt::write_all(&mut sock, &body)
4362 .await
4363 .unwrap();
4364 tokio::io::AsyncWriteExt::flush(&mut sock).await.unwrap();
4365 });
4366
4367 let mount = tempfile::tempdir().unwrap();
4368 let artifact = crate::buildah::RelocatableArtifact {
4369 name: "jq".to_string(),
4370 url: format!("http://{addr}/jq.zip"),
4371 asset_name: "jq.zip".to_string(),
4372 };
4373 let bin_dir = install_relocatable_into_layer(mount.path(), &artifact)
4374 .await
4375 .expect("install succeeds");
4376 server.await.unwrap();
4377
4378 assert_eq!(bin_dir, r"C:\Program Files\zlayer\jq");
4379 let extracted = mount
4380 .path()
4381 .join("Program Files")
4382 .join("zlayer")
4383 .join("jq")
4384 .join("jq.exe");
4385 assert!(extracted.exists(), "jq.exe must land under the prefix");
4386 assert_eq!(std::fs::read(&extracted).unwrap(), b"FAKEJQ");
4387 }
4388
4389 #[test]
4390 fn apply_entrypoint_resets_cmd_per_spec() {
4391 let mut cfg = OciImageConfig::default();
4392 apply_cmd(&mut cfg, &ShellOrExec::Exec(vec!["bash".to_string()]));
4393 assert!(cfg.cmd.is_some());
4394 apply_entrypoint(
4395 &mut cfg,
4396 &ShellOrExec::Exec(vec!["C:\\app\\app.exe".to_string()]),
4397 );
4398 assert_eq!(
4399 cfg.entrypoint.as_deref(),
4400 Some(["C:\\app\\app.exe".to_string()].as_slice())
4401 );
4402 assert!(
4403 cfg.cmd.is_none(),
4404 "ENTRYPOINT must reset CMD per Dockerfile spec"
4405 );
4406 }
4407
4408 #[test]
4409 fn apply_expose_accumulates_ports() {
4410 let mut cfg = OciImageConfig::default();
4411 apply_expose(&mut cfg, &ExposeInstruction::tcp(80));
4412 apply_expose(&mut cfg, &ExposeInstruction::tcp(443));
4413 apply_expose(&mut cfg, &ExposeInstruction::udp(53));
4414 assert!(cfg.exposed_ports.contains_key("80/tcp"));
4415 assert!(cfg.exposed_ports.contains_key("443/tcp"));
4416 assert!(cfg.exposed_ports.contains_key("53/udp"));
4417 assert_eq!(cfg.exposed_ports.len(), 3);
4418 }
4419
4420 #[test]
4421 fn apply_label_last_value_wins() {
4422 let mut cfg = OciImageConfig::default();
4423 cfg.labels
4424 .insert("maintainer".to_string(), "alice".to_string());
4425 // Direct mutation simulates `Instruction::Label(...)` dispatch
4426 // in execute_instruction. The contract is: later LABEL with the
4427 // same KEY overrides.
4428 cfg.labels
4429 .insert("maintainer".to_string(), "bob".to_string());
4430 assert_eq!(
4431 cfg.labels.get("maintainer").map(String::as_str),
4432 Some("bob")
4433 );
4434 }
4435
4436 #[test]
4437 fn apply_healthcheck_disabled_and_check_round_trip() {
4438 let mut cfg = OciImageConfig::default();
4439 apply_healthcheck(&mut cfg, &HealthcheckInstruction::None);
4440 let hc = cfg
4441 .healthcheck
4442 .as_ref()
4443 .expect("HEALTHCHECK NONE must populate config");
4444 assert!(hc.is_disabled());
4445
4446 let cmd = HealthcheckInstruction::Check {
4447 command: ShellOrExec::Shell("curl -f http://localhost/".to_string()),
4448 interval: Some(std::time::Duration::from_secs(30)),
4449 timeout: Some(std::time::Duration::from_secs(5)),
4450 start_period: None,
4451 start_interval: None,
4452 retries: Some(3),
4453 };
4454 apply_healthcheck(&mut cfg, &cmd);
4455 let hc2 = cfg.healthcheck.as_ref().expect("healthcheck populated");
4456 assert_eq!(
4457 hc2.test,
4458 vec![
4459 "CMD-SHELL".to_string(),
4460 "curl -f http://localhost/".to_string()
4461 ]
4462 );
4463 assert_eq!(hc2.interval.as_deref(), Some("30s"));
4464 assert_eq!(hc2.timeout.as_deref(), Some("5s"));
4465 assert_eq!(hc2.retries, Some(3));
4466 }
4467
4468 // -----------------------------------------------------------------
4469 // 4.C: COPY/ADD filesystem semantics
4470 // -----------------------------------------------------------------
4471
4472 /// Locate the materialised `Files/<dest>` payload under
4473 /// `working_layer_chain_dir/<scratch_id>/` for off-Windows tests.
4474 /// The scratch id is non-deterministic (uuid) so we scan the dir.
4475 fn locate_scratch_files(chain_dir: &std::path::Path) -> PathBuf {
4476 for entry in std::fs::read_dir(chain_dir).expect("read chain dir") {
4477 let entry = entry.expect("read dir entry");
4478 let path = entry.path();
4479 if path.is_dir()
4480 && path
4481 .file_name()
4482 .and_then(|n| n.to_str())
4483 .is_some_and(|s| s.starts_with("copy-add-"))
4484 {
4485 return path.join("Files");
4486 }
4487 }
4488 panic!("no copy-add-* scratch dir under {}", chain_dir.display());
4489 }
4490
4491 #[tokio::test]
4492 #[cfg_attr(
4493 windows,
4494 ignore = "exercises the off-Windows COPY materialization path (commit is a no-op \
4495 off-Windows). On Windows execute_instruction commits via real HcsImportLayer, \
4496 which needs a base layer present; that path is covered by the layer e2e."
4497 )]
4498 async fn apply_copy_simple_file_writes_to_scratch() {
4499 let builder = WindowsBuilder::new(dummy_config());
4500 let (ctx, mut skel, _guard) = ctx_and_skeleton_in_tempdir();
4501 std::fs::write(ctx.context_dir.join("hello.txt"), b"hello").unwrap();
4502 let copy = Instruction::Copy(CopyInstruction::new(
4503 vec!["hello.txt".to_string()],
4504 "C:\\app\\hello.txt".to_string(),
4505 ));
4506 builder
4507 .execute_instruction(&mut skel, &ctx, ©)
4508 .await
4509 .expect("COPY of a simple file must succeed off-Windows");
4510 let files = locate_scratch_files(&skel.working_layer_chain_dir);
4511 let copied = files.join("app").join("hello.txt");
4512 assert!(copied.is_file(), "expected file at {}", copied.display());
4513 assert_eq!(std::fs::read(&copied).unwrap(), b"hello");
4514 }
4515
4516 #[tokio::test]
4517 async fn apply_copy_rejects_parent_dir_traversal() {
4518 let builder = WindowsBuilder::new(dummy_config());
4519 let (ctx, mut skel, _guard) = ctx_and_skeleton_in_tempdir();
4520 let copy = Instruction::Copy(CopyInstruction::new(
4521 vec!["../secrets".to_string()],
4522 "C:\\".to_string(),
4523 ));
4524 let err = builder
4525 .execute_instruction(&mut skel, &ctx, ©)
4526 .await
4527 .expect_err("COPY with `..` must be rejected");
4528 assert!(
4529 matches!(err, BuildError::PathTraversal { ref src } if src == "../secrets"),
4530 "expected PathTraversal, got: {err}"
4531 );
4532 }
4533
4534 #[tokio::test]
4535 #[cfg_attr(
4536 windows,
4537 ignore = "exercises the off-Windows COPY materialization path (commit is a no-op \
4538 off-Windows). On Windows execute_instruction commits via real HcsImportLayer, \
4539 which needs a base layer present; that path is covered by the layer e2e."
4540 )]
4541 async fn apply_copy_directory_recursive() {
4542 let builder = WindowsBuilder::new(dummy_config());
4543 let (ctx, mut skel, _guard) = ctx_and_skeleton_in_tempdir();
4544 let src_dir = ctx.context_dir.join("payload");
4545 std::fs::create_dir_all(src_dir.join("nested")).unwrap();
4546 std::fs::write(src_dir.join("a.txt"), b"A").unwrap();
4547 std::fs::write(src_dir.join("nested").join("b.txt"), b"B").unwrap();
4548
4549 let copy = Instruction::Copy(CopyInstruction::new(
4550 vec!["payload".to_string()],
4551 "C:\\opt\\payload\\".to_string(),
4552 ));
4553 builder
4554 .execute_instruction(&mut skel, &ctx, ©)
4555 .await
4556 .expect("recursive COPY must succeed");
4557 let files = locate_scratch_files(&skel.working_layer_chain_dir);
4558 assert!(files.join("opt/payload/a.txt").is_file());
4559 assert!(files.join("opt/payload/nested/b.txt").is_file());
4560 }
4561
4562 #[tokio::test]
4563 #[cfg_attr(
4564 windows,
4565 ignore = "exercises the off-Windows ADD tarball-extract materialization path (commit is \
4566 a no-op off-Windows). On Windows execute_instruction commits via real \
4567 HcsImportLayer, which needs a base layer present; covered by the layer e2e."
4568 )]
4569 async fn apply_add_tarball_extracts() {
4570 use flate2::write::GzEncoder;
4571 use flate2::Compression;
4572 let builder = WindowsBuilder::new(dummy_config());
4573 let (ctx, mut skel, _guard) = ctx_and_skeleton_in_tempdir();
4574
4575 // Build a tiny .tar.gz fixture containing one file `inside.txt`.
4576 let tar_bytes = {
4577 let mut tar_builder = tar::Builder::new(Vec::new());
4578 let payload = b"INSIDE\n";
4579 let mut header = tar::Header::new_gnu();
4580 header.set_size(payload.len() as u64);
4581 header.set_mode(0o644);
4582 header.set_mtime(0);
4583 header.set_path("inside.txt").unwrap();
4584 header.set_cksum();
4585 tar_builder.append(&header, payload.as_ref()).unwrap();
4586 tar_builder.finish().unwrap();
4587 tar_builder.into_inner().unwrap()
4588 };
4589 let mut gz = GzEncoder::new(Vec::new(), Compression::default());
4590 std::io::Write::write_all(&mut gz, &tar_bytes).unwrap();
4591 let gz_bytes = gz.finish().unwrap();
4592 std::fs::write(ctx.context_dir.join("payload.tar.gz"), gz_bytes).unwrap();
4593
4594 let add = Instruction::Add(AddInstruction::new(
4595 vec!["payload.tar.gz".to_string()],
4596 "C:\\opt\\extracted\\".to_string(),
4597 ));
4598 builder
4599 .execute_instruction(&mut skel, &ctx, &add)
4600 .await
4601 .expect("ADD must extract a tarball");
4602 let files = locate_scratch_files(&skel.working_layer_chain_dir);
4603 let extracted = files.join("opt/extracted/inside.txt");
4604 assert!(extracted.is_file(), "expected {}", extracted.display());
4605 assert_eq!(std::fs::read(&extracted).unwrap(), b"INSIDE\n");
4606 }
4607
4608 #[tokio::test]
4609 #[ignore = "live network — exercises ADD URL fetch against example.com"]
4610 async fn apply_add_http_url_downloads() {
4611 let builder = WindowsBuilder::new(dummy_config());
4612 let (ctx, mut skel, _guard) = ctx_and_skeleton_in_tempdir();
4613 let add = Instruction::Add(AddInstruction::new(
4614 vec!["https://example.com/".to_string()],
4615 "C:\\downloads\\".to_string(),
4616 ));
4617 builder
4618 .execute_instruction(&mut skel, &ctx, &add)
4619 .await
4620 .expect("ADD URL must succeed when the network is reachable");
4621 let files = locate_scratch_files(&skel.working_layer_chain_dir);
4622 // The basename "" from `example.com/` becomes the fallback
4623 // `download`; either form is acceptable.
4624 assert!(
4625 files.join("downloads").is_dir(),
4626 "expected downloads/ dir under {}",
4627 files.display()
4628 );
4629 }
4630
4631 #[test]
4632 fn path_traversal_detection_flavours() {
4633 assert!(path_contains_parent_dir("../etc"));
4634 assert!(path_contains_parent_dir("foo/../bar"));
4635 assert!(path_contains_parent_dir("foo\\..\\bar"));
4636 assert!(!path_contains_parent_dir("foo/bar"));
4637 assert!(!path_contains_parent_dir("foo..bar")); // no separator → ordinary name
4638 }
4639
4640 #[test]
4641 fn dest_under_files_root_strips_drive() {
4642 assert_eq!(
4643 dest_under_files_root("C:\\app\\bin"),
4644 PathBuf::from("app/bin")
4645 );
4646 assert_eq!(
4647 dest_under_files_root("/etc/passwd"),
4648 PathBuf::from("etc/passwd")
4649 );
4650 assert_eq!(
4651 dest_under_files_root("relative/x"),
4652 PathBuf::from("relative/x")
4653 );
4654 }
4655
4656 #[test]
4657 fn duration_to_oci_string_shapes() {
4658 use std::time::Duration;
4659 assert_eq!(duration_to_oci_string(Duration::from_secs(30)), "30s");
4660 assert_eq!(duration_to_oci_string(Duration::from_secs(90)), "90s");
4661 assert_eq!(duration_to_oci_string(Duration::from_secs(60)), "1m");
4662 assert_eq!(duration_to_oci_string(Duration::from_millis(500)), "500ms");
4663 assert_eq!(duration_to_oci_string(Duration::from_secs(3600)), "1h");
4664 }
4665
4666 // -----------------------------------------------------------------
4667 // 4.B: Chocolatey detection / translation
4668 //
4669 // The detect/split/rejoin/wrap unit tests, plus the
4670 // `translate_run_apt_to_choco_with_in_memory_shard` end-to-end
4671 // exercise, were moved to `crate::buildah::tests` alongside the
4672 // helpers themselves.
4673 // -----------------------------------------------------------------
4674
4675 #[test]
4676 fn derive_source_distro_known_bases() {
4677 let mk = |image_ref: &str| BaseImageManifest {
4678 image_ref: image_ref.to_string(),
4679 os: "windows".into(),
4680 os_version: None,
4681 arch: "amd64".into(),
4682 config_blob: Vec::new(),
4683 };
4684 assert_eq!(derive_source_distro(&mk("debian:12")), "debian-12");
4685 assert_eq!(
4686 derive_source_distro(&mk("docker.io/library/ubuntu:22.04")),
4687 "ubuntu-22.04"
4688 );
4689 assert_eq!(derive_source_distro(&mk("alpine:3.19")), "alpine-3.19");
4690 // Unknown short repo → defaults to debian-12.
4691 assert_eq!(
4692 derive_source_distro(&mk("mcr.microsoft.com/windows/nanoserver:ltsc2022")),
4693 "debian-12"
4694 );
4695 }
4696
4697 // -----------------------------------------------------------------
4698 // 4.B integration: drive a real RUN through HCS.
4699 //
4700 // Mirrors the setup in `crates/zlayer-agent/tests/windows_hcs_e2e.rs`
4701 // — runs only when the host has HCS + a nanoserver:ltsc2022 base
4702 // image already pulled into the blob cache. The test is gated
4703 // `#[ignore]` so `cargo test --workspace` does not try to dial HCS
4704 // on Linux CI; the Windows CI runner exercises it explicitly with
4705 // `cargo test -p zlayer-builder --tests -- --ignored`.
4706 //
4707 // What it asserts:
4708 // 1. `build_skeleton` materialises the parent chain.
4709 // 2. A trivial `RUN cmd /c echo hello > C:\hello.txt` succeeds
4710 // (exit 0).
4711 // 3. `skeleton.base_layers.len()` grew by 1 (the post-RUN diff
4712 // layer was committed).
4713 // 4. `skeleton.working_chain.len()` grew by 1 (the new RO layer
4714 // is on disk and would chain into the next RUN).
4715 #[tokio::test]
4716 #[ignore = "requires Windows host with Hyper-V + mcr.microsoft.com/windows/nanoserver:ltsc2022 base image"]
4717 async fn run_step_emits_new_layer_on_windows_host() {
4718 let cache_dir = std::env::temp_dir().join("zlayer-wcow-run-e2e");
4719 std::fs::create_dir_all(&cache_dir).expect("create cache_dir");
4720
4721 let cfg = WindowsBuildConfig {
4722 cache_dir,
4723 registry_auth: RegistryAuth::Anonymous,
4724 platform: WindowsBuildConfig::default_platform().to_string(),
4725 os_version_override: None,
4726 scratch_size_gb: WindowsBuildConfig::default_scratch_size_gb(),
4727 };
4728 let builder = WindowsBuilder::new(cfg);
4729
4730 // Write a minimal Dockerfile to a temp dir so the parser has
4731 // real bytes to consume. We do not need a real build context —
4732 // there is no COPY here.
4733 let ctx_dir = tempfile::tempdir().expect("tmpdir");
4734 let dockerfile_path = ctx_dir.path().join("Dockerfile");
4735 std::fs::write(
4736 &dockerfile_path,
4737 b"FROM mcr.microsoft.com/windows/nanoserver:ltsc2022\nRUN cmd /c echo hello > C:\\hello.txt\n",
4738 )
4739 .expect("write Dockerfile");
4740
4741 let ctx = BuildContext {
4742 context_dir: ctx_dir.path().to_path_buf(),
4743 dockerfile_path: PathBuf::from("Dockerfile"),
4744 build_args: HashMap::new(),
4745 tag: "zlayer-wcow-run-e2e:test".to_string(),
4746 ltsc: None,
4747 };
4748
4749 let mut skeleton = builder
4750 .build_skeleton(&ctx)
4751 .await
4752 .expect("build_skeleton must succeed against the real MCR base image");
4753 let base_layer_count = skeleton.base_layers.len();
4754 let working_chain_count = skeleton.working_chain.len();
4755 assert!(
4756 base_layer_count >= 1,
4757 "expected at least one base layer materialised"
4758 );
4759
4760 let stage = &skeleton.parsed_dockerfile.stages[0].clone();
4761 let run_instr = stage
4762 .instructions
4763 .iter()
4764 .find(|i| matches!(i, Instruction::Run(_)))
4765 .cloned()
4766 .expect("Dockerfile fixture has a RUN");
4767
4768 builder
4769 .execute_instruction(&mut skeleton, &ctx, &run_instr)
4770 .await
4771 .expect("RUN cmd /c echo hello must succeed on a Windows host");
4772
4773 assert_eq!(
4774 skeleton.base_layers.len(),
4775 base_layer_count + 1,
4776 "RUN must append exactly one descriptor to base_layers"
4777 );
4778 assert_eq!(
4779 skeleton.working_chain.len(),
4780 working_chain_count + 1,
4781 "RUN must append exactly one on-disk layer entry to working_chain"
4782 );
4783 }
4784
4785 // -----------------------------------------------------------------
4786 // 4.D: OCI manifest + config emission
4787 // -----------------------------------------------------------------
4788
4789 /// Build a minimal foreign-base-only skeleton fixture for emit tests.
4790 /// The base manifest carries an explicit `os.version` and a config
4791 /// blob with one `diff_id`; the base layer descriptor carries the
4792 /// MCR foreign-layer media type + a real-looking `urls[]`.
4793 fn skeleton_with_foreign_base() -> BuildSkeleton {
4794 let parsed =
4795 Dockerfile::parse("FROM mcr.microsoft.com/windows/nanoserver:ltsc2022\n").unwrap();
4796 let base_config_blob = serde_json::json!({
4797 "architecture": "amd64",
4798 "os": "windows",
4799 "os.version": "10.0.20348.2227",
4800 "rootfs": {
4801 "type": "layers",
4802 "diff_ids": ["sha256:base0000000000000000000000000000000000000000000000000000000000"],
4803 },
4804 "config": {},
4805 })
4806 .to_string()
4807 .into_bytes();
4808 BuildSkeleton {
4809 parsed_dockerfile: parsed,
4810 base_layers: vec![LayerRef {
4811 digest: "sha256:basecompressed00000000000000000000000000000000000000000000000000"
4812 .to_string(),
4813 media_type: FOREIGN_WINDOWS_LAYER_MEDIA_TYPE.to_string(),
4814 size: 12345,
4815 urls: vec![
4816 "https://mcr.microsoft.com/v2/windows/nanoserver/blobs/sha256:base".to_string(),
4817 ],
4818 }],
4819 base_manifest: BaseImageManifest {
4820 image_ref: "mcr.microsoft.com/windows/nanoserver:ltsc2022".into(),
4821 os: "windows".into(),
4822 os_version: Some("10.0.20348.2227".to_string()),
4823 arch: "amd64".into(),
4824 config_blob: base_config_blob,
4825 },
4826 working_layer_chain_dir: std::env::temp_dir().join("zlayer-wcow-emit-tests/x"),
4827 working_chain: vec![WindowsLayerEntry {
4828 layer_id: "base".to_string(),
4829 layer_path: PathBuf::from("/nonexistent/base"),
4830 blob_path: None,
4831 }],
4832 image_config: OciImageConfig::default(),
4833 instruction_log: vec![ExecutedInstruction {
4834 source_line: "FROM mcr.microsoft.com/windows/nanoserver:ltsc2022".to_string(),
4835 produced_layer: true,
4836 timestamp: Utc::now(),
4837 }],
4838 provisioned_toolchain_language: None,
4839 }
4840 }
4841
4842 #[tokio::test]
4843 async fn emit_image_simple_base_only() {
4844 let cfg = dummy_config();
4845 let skel = skeleton_with_foreign_base();
4846 let built = emit_image_impl(&cfg, &skel, "myimage:test")
4847 .await
4848 .expect("emit must succeed for a foreign-base-only skeleton");
4849
4850 // Manifest deserialises and has exactly one foreign layer with
4851 // the urls[] preserved.
4852 let manifest: serde_json::Value = serde_json::from_slice(&built.manifest_blob).unwrap();
4853 assert_eq!(manifest["schemaVersion"], 2);
4854 assert_eq!(
4855 manifest["mediaType"], OCI_IMAGE_MANIFEST_MEDIA_TYPE,
4856 "manifest mediaType must be the OCI image manifest type"
4857 );
4858 let layers = manifest["layers"].as_array().expect("layers array");
4859 assert_eq!(
4860 layers.len(),
4861 1,
4862 "base-only skeleton emits exactly one layer"
4863 );
4864 let l0 = &layers[0];
4865 assert_eq!(l0["mediaType"], FOREIGN_WINDOWS_LAYER_MEDIA_TYPE);
4866 let urls = l0["urls"].as_array().expect("foreign layer carries urls");
4867 assert_eq!(
4868 urls[0].as_str().unwrap(),
4869 "https://mcr.microsoft.com/v2/windows/nanoserver/blobs/sha256:base"
4870 );
4871
4872 // Image config has one history entry (the FROM), os/os.version/
4873 // architecture set, and inherits the base's diff_id.
4874 let ic: serde_json::Value = serde_json::from_slice(&built.image_config_blob).unwrap();
4875 assert_eq!(ic["os"], "windows");
4876 assert_eq!(ic["os.version"], "10.0.20348.2227");
4877 assert_eq!(ic["architecture"], "amd64");
4878 let history = ic["history"].as_array().expect("history array");
4879 assert_eq!(
4880 history.len(),
4881 1,
4882 "FROM-only skeleton emits one history entry"
4883 );
4884 assert!(history[0]["created_by"]
4885 .as_str()
4886 .unwrap()
4887 .starts_with("FROM mcr.microsoft.com/windows/nanoserver:ltsc2022"));
4888 assert!(
4889 history[0].get("empty_layer").is_none(),
4890 "FROM produced a layer, so empty_layer must be omitted (or false)"
4891 );
4892 let diff_ids = ic["rootfs"]["diff_ids"].as_array().expect("diff_ids array");
4893 assert_eq!(diff_ids.len(), 1);
4894 assert_eq!(
4895 diff_ids[0].as_str().unwrap(),
4896 "sha256:base0000000000000000000000000000000000000000000000000000000000"
4897 );
4898
4899 // Config descriptor in the manifest points at the image config
4900 // blob's digest with the right size.
4901 assert_eq!(
4902 manifest["config"]["digest"].as_str().unwrap(),
4903 built.image_config_digest
4904 );
4905 assert_eq!(
4906 manifest["config"]["size"].as_u64().unwrap(),
4907 built.image_config_blob.len() as u64
4908 );
4909
4910 // Manifest digest matches what we'd recompute.
4911 let recomputed = compute_sha256_hex(&built.manifest_blob);
4912 assert_eq!(recomputed, built.manifest_digest);
4913 assert_eq!(built.tag, "myimage:test");
4914 }
4915
4916 #[tokio::test]
4917 async fn emit_image_with_run_step() {
4918 let cfg = dummy_config();
4919 let mut skel = skeleton_with_foreign_base();
4920 // Append a synthetic RUN-produced layer + log entry.
4921 skel.base_layers.push(LayerRef {
4922 digest: "sha256:run111111111111111111111111111111111111111111111111111111111111".into(),
4923 media_type: OCI_TAR_GZIP_LAYER_MEDIA_TYPE.to_string(),
4924 size: 9999,
4925 urls: Vec::new(),
4926 });
4927 skel.working_chain.push(WindowsLayerEntry {
4928 layer_id: "run1".to_string(),
4929 layer_path: PathBuf::from("/nonexistent/run1"),
4930 blob_path: None,
4931 });
4932 skel.instruction_log.push(ExecutedInstruction {
4933 source_line: "RUN choco install -y curl".to_string(),
4934 produced_layer: true,
4935 timestamp: Utc::now(),
4936 });
4937
4938 let built = emit_image_impl(&cfg, &skel, "myimage:run").await.unwrap();
4939 let manifest: serde_json::Value = serde_json::from_slice(&built.manifest_blob).unwrap();
4940 let layers = manifest["layers"].as_array().unwrap();
4941 assert_eq!(layers.len(), 2, "FROM + RUN produces two layer descriptors");
4942 assert_eq!(layers[0]["mediaType"], FOREIGN_WINDOWS_LAYER_MEDIA_TYPE);
4943 assert_eq!(layers[1]["mediaType"], OCI_TAR_GZIP_LAYER_MEDIA_TYPE);
4944 assert!(
4945 layers[1].get("urls").is_none(),
4946 "non-foreign layer must NOT carry urls[]"
4947 );
4948
4949 let ic: serde_json::Value = serde_json::from_slice(&built.image_config_blob).unwrap();
4950 let history = ic["history"].as_array().unwrap();
4951 assert_eq!(history.len(), 2);
4952 assert_eq!(
4953 history[1]["created_by"].as_str().unwrap(),
4954 "RUN choco install -y curl"
4955 );
4956 }
4957
4958 #[tokio::test]
4959 async fn emit_image_with_config_only_instructions() {
4960 let cfg = dummy_config();
4961 let mut skel = skeleton_with_foreign_base();
4962 // Two config-only entries (ENV + WORKDIR) — neither produces a
4963 // layer.
4964 skel.instruction_log.push(ExecutedInstruction {
4965 source_line: "ENV FOO=bar".to_string(),
4966 produced_layer: false,
4967 timestamp: Utc::now(),
4968 });
4969 skel.instruction_log.push(ExecutedInstruction {
4970 source_line: "WORKDIR C:\\app".to_string(),
4971 produced_layer: false,
4972 timestamp: Utc::now(),
4973 });
4974 skel.image_config.env.push("FOO=bar".to_string());
4975 skel.image_config.working_dir = Some("C:\\app".to_string());
4976
4977 let built = emit_image_impl(&cfg, &skel, "myimage:cfg").await.unwrap();
4978 let manifest: serde_json::Value = serde_json::from_slice(&built.manifest_blob).unwrap();
4979 let layers = manifest["layers"].as_array().unwrap();
4980 assert_eq!(
4981 layers.len(),
4982 1,
4983 "config-only instructions must NOT add layer descriptors"
4984 );
4985
4986 let ic: serde_json::Value = serde_json::from_slice(&built.image_config_blob).unwrap();
4987 let history = ic["history"].as_array().unwrap();
4988 assert_eq!(
4989 history.len(),
4990 3,
4991 "FROM + ENV + WORKDIR produces three history entries"
4992 );
4993 assert!(history[0].get("empty_layer").is_none());
4994 assert_eq!(history[1]["empty_layer"], true);
4995 assert_eq!(history[2]["empty_layer"], true);
4996 // ENV / WORKDIR end up in the image config's `config` object.
4997 assert_eq!(ic["config"]["WorkingDir"], "C:\\app");
4998 assert_eq!(ic["config"]["Env"][0], "FOO=bar");
4999 }
5000
5001 #[test]
5002 fn compute_sha256_known_input() {
5003 // Well-known: sha256("hello") =
5004 // 2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824
5005 assert_eq!(
5006 compute_sha256_hex(b"hello"),
5007 "sha256:2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824"
5008 );
5009 }
5010
5011 #[tokio::test]
5012 async fn foreign_layer_carries_urls_through_manifest() {
5013 let cfg = dummy_config();
5014 let skel = skeleton_with_foreign_base();
5015 let built = emit_image_impl(&cfg, &skel, "myimage:foreign")
5016 .await
5017 .unwrap();
5018 // Sanity on the typed BuiltImage view.
5019 let foreign = &built.layers[0];
5020 assert_eq!(foreign.media_type, FOREIGN_WINDOWS_LAYER_MEDIA_TYPE);
5021 let urls = foreign
5022 .urls
5023 .as_ref()
5024 .expect("foreign layer must carry an urls[] vector through BuiltImage");
5025 assert!(
5026 !urls.is_empty(),
5027 "urls[] must be non-empty on a foreign layer"
5028 );
5029 assert!(urls[0].starts_with("https://mcr.microsoft.com/"));
5030
5031 // And on the wire form.
5032 let manifest: serde_json::Value = serde_json::from_slice(&built.manifest_blob).unwrap();
5033 let l0 = &manifest["layers"][0];
5034 let on_wire_urls = l0["urls"].as_array().expect("wire form must carry urls[]");
5035 assert!(!on_wire_urls.is_empty());
5036 }
5037
5038 #[tokio::test]
5039 async fn emit_image_errors_when_os_version_unresolved() {
5040 let cfg = dummy_config();
5041 let mut skel = skeleton_with_foreign_base();
5042 skel.base_manifest.os_version = None;
5043 // Strip os.version from the base config blob too.
5044 skel.base_manifest.config_blob = serde_json::json!({
5045 "architecture": "amd64",
5046 "os": "windows",
5047 "rootfs": {
5048 "type": "layers",
5049 "diff_ids": ["sha256:base"],
5050 },
5051 "config": {},
5052 })
5053 .to_string()
5054 .into_bytes();
5055 let err = emit_image_impl(&cfg, &skel, "myimage:err")
5056 .await
5057 .expect_err("emit must error without an os.version");
5058 assert!(
5059 matches!(err, BuildError::OsVersionUnresolved),
5060 "expected OsVersionUnresolved, got: {err}"
5061 );
5062 }
5063
5064 // -----------------------------------------------------------------
5065 // Task 4.E push tests
5066 //
5067 // These exercise `push_impl` against a recording `PushTarget` double
5068 // so we can assert on the wire-side calls without needing a live
5069 // registry. The fourth, `build_and_push_e2e`, is the only test that
5070 // needs a real network; it's `#[ignore]`d by default.
5071 // -----------------------------------------------------------------
5072
5073 use std::sync::Mutex;
5074
5075 /// One call recorded by [`RecordingPushTarget`]. We capture the bare
5076 /// minimum needed by the assertions: which blobs were uploaded (by
5077 /// digest), whether the manifest PUT happened, and what bytes the
5078 /// manifest PUT sent so foreign-layer round-trip can be verified.
5079 #[derive(Debug, Default, Clone)]
5080 struct PushRecord {
5081 uploaded_blob_digests: Vec<String>,
5082 manifest_put: Option<(String, Vec<u8>, String)>,
5083 }
5084
5085 /// In-test [`PushTarget`] double. Optional `fail_on_digest` causes
5086 /// `upload_blob` to return an `Err` when that digest is uploaded —
5087 /// used to verify [`BuildError::BlobUploadFailed`] propagation.
5088 struct RecordingPushTarget {
5089 inner: Mutex<PushRecord>,
5090 fail_on_digest: Option<String>,
5091 }
5092
5093 impl RecordingPushTarget {
5094 fn new() -> Self {
5095 Self {
5096 inner: Mutex::new(PushRecord::default()),
5097 fail_on_digest: None,
5098 }
5099 }
5100
5101 fn with_failure(digest: impl Into<String>) -> Self {
5102 Self {
5103 inner: Mutex::new(PushRecord::default()),
5104 fail_on_digest: Some(digest.into()),
5105 }
5106 }
5107
5108 fn snapshot(&self) -> PushRecord {
5109 self.inner.lock().unwrap().clone()
5110 }
5111 }
5112
5113 #[async_trait::async_trait]
5114 impl PushTarget for RecordingPushTarget {
5115 async fn upload_blob(
5116 &self,
5117 _reference: &str,
5118 digest: &str,
5119 _media_type: &str,
5120 _data: Vec<u8>,
5121 _auth: &RegistryAuth,
5122 ) -> std::result::Result<(), String> {
5123 if let Some(fail) = &self.fail_on_digest {
5124 if fail == digest {
5125 return Err(format!("simulated upload failure for {digest}"));
5126 }
5127 }
5128 self.inner
5129 .lock()
5130 .unwrap()
5131 .uploaded_blob_digests
5132 .push(digest.to_string());
5133 Ok(())
5134 }
5135
5136 async fn put_manifest(
5137 &self,
5138 reference: &str,
5139 bytes: Vec<u8>,
5140 content_type: &str,
5141 _auth: &RegistryAuth,
5142 ) -> std::result::Result<(), String> {
5143 self.inner.lock().unwrap().manifest_put =
5144 Some((reference.to_string(), bytes, content_type.to_string()));
5145 Ok(())
5146 }
5147 }
5148
5149 /// Construct a [`BuiltImage`] fixture with one foreign base layer
5150 /// (`urls` set, empty `local_path`) and one OCI builder-produced
5151 /// layer backed by a real on-disk blob. The OCI layer's `local_path`
5152 /// points at a tempfile so `push_impl` can `tokio::fs::read` it; the
5153 /// caller keeps the `TempDir` alive for the lifetime of the test.
5154 fn built_image_fixture() -> (BuiltImage, tempfile::TempDir) {
5155 let tmp = tempfile::tempdir().expect("tmpdir");
5156 let oci_blob_path = tmp.path().join("oci-layer.tar.gz");
5157 std::fs::write(&oci_blob_path, b"fake-oci-layer-bytes").expect("write fake blob");
5158
5159 let layers = vec![
5160 EmittedLayer {
5161 media_type: FOREIGN_WINDOWS_LAYER_MEDIA_TYPE.to_string(),
5162 digest: "sha256:foreign00000000000000000000000000000000000000000000000000000000"
5163 .to_string(),
5164 size: 12345,
5165 diff_id: "sha256:foreigndiff0000000000000000000000000000000000000000000000000000"
5166 .to_string(),
5167 local_path: PathBuf::new(),
5168 urls: Some(vec![
5169 "https://mcr.microsoft.com/v2/windows/nanoserver/blobs/sha256:foreign"
5170 .to_string(),
5171 ]),
5172 },
5173 EmittedLayer {
5174 media_type: OCI_TAR_GZIP_LAYER_MEDIA_TYPE.to_string(),
5175 digest: "sha256:oci11111111111111111111111111111111111111111111111111111111111111"
5176 .to_string(),
5177 size: 20,
5178 diff_id: "sha256:ocidiff111111111111111111111111111111111111111111111111111111111"
5179 .to_string(),
5180 local_path: oci_blob_path,
5181 urls: None,
5182 },
5183 ];
5184 // Build a real manifest blob via the same helper used in
5185 // production so the foreign urls[] really do appear in the bytes
5186 // we PUT (rather than being injected by the test).
5187 let manifest_blob =
5188 build_manifest_blob("sha256:config0000", 42, &layers).expect("manifest blob");
5189 let manifest_digest = compute_sha256_hex(&manifest_blob);
5190 let built = BuiltImage {
5191 tag: "ghcr.io/zorpxinc/zlayer-test:wcow-0.1".to_string(),
5192 image_config_blob: b"{\"fake\":\"config\"}".to_vec(),
5193 image_config_digest: "sha256:config0000".to_string(),
5194 manifest_blob,
5195 manifest_digest,
5196 layers,
5197 };
5198 (built, tmp)
5199 }
5200
5201 #[tokio::test]
5202 async fn push_skips_foreign_layers() {
5203 let (built, _tmp) = built_image_fixture();
5204 let target = RecordingPushTarget::new();
5205
5206 push_impl(&built, &built.tag, &RegistryAuth::Anonymous, &target)
5207 .await
5208 .expect("push must succeed against a noop target");
5209
5210 let rec = target.snapshot();
5211 // The foreign layer must NEVER be uploaded.
5212 assert!(
5213 !rec.uploaded_blob_digests
5214 .iter()
5215 .any(|d| d.contains("foreign")),
5216 "foreign layer was uploaded but must be skipped; uploads = {:?}",
5217 rec.uploaded_blob_digests
5218 );
5219 // The OCI builder-produced layer must be uploaded exactly once.
5220 let oci_uploads = rec
5221 .uploaded_blob_digests
5222 .iter()
5223 .filter(|d| d.starts_with("sha256:oci"))
5224 .count();
5225 assert_eq!(
5226 oci_uploads, 1,
5227 "OCI layer must be uploaded exactly once; uploads = {:?}",
5228 rec.uploaded_blob_digests
5229 );
5230 // The image config blob must be uploaded.
5231 assert!(
5232 rec.uploaded_blob_digests
5233 .iter()
5234 .any(|d| d == "sha256:config0000"),
5235 "image config blob must be uploaded; uploads = {:?}",
5236 rec.uploaded_blob_digests
5237 );
5238 // Manifest PUT happened once with the right content type.
5239 let (tag, _bytes, ct) = rec.manifest_put.as_ref().expect("manifest PUT recorded");
5240 assert_eq!(tag, &built.tag);
5241 assert_eq!(ct, OCI_IMAGE_MANIFEST_MEDIA_TYPE);
5242 }
5243
5244 #[tokio::test]
5245 async fn push_manifest_preserves_foreign_urls() {
5246 let (built, _tmp) = built_image_fixture();
5247 let target = RecordingPushTarget::new();
5248
5249 push_impl(&built, &built.tag, &RegistryAuth::Anonymous, &target)
5250 .await
5251 .expect("push must succeed");
5252
5253 let rec = target.snapshot();
5254 let (_tag, bytes, _ct) = rec.manifest_put.expect("manifest PUT recorded");
5255 // The PUT bytes must be EXACTLY the bytes BuiltImage built —
5256 // not a re-serialised round-trip — so the digest BuiltImage
5257 // computed matches what the registry indexes.
5258 assert_eq!(
5259 bytes, built.manifest_blob,
5260 "manifest PUT bytes must be byte-identical to BuiltImage::manifest_blob"
5261 );
5262 // And the foreign-layer urls[] must be in there.
5263 let manifest: serde_json::Value =
5264 serde_json::from_slice(&bytes).expect("PUT bytes must be valid JSON");
5265 let layer0 = &manifest["layers"][0];
5266 assert_eq!(layer0["mediaType"], FOREIGN_WINDOWS_LAYER_MEDIA_TYPE);
5267 let urls = layer0["urls"]
5268 .as_array()
5269 .expect("foreign layer urls[] must survive the PUT");
5270 assert_eq!(urls.len(), 1);
5271 assert_eq!(
5272 urls[0].as_str().unwrap(),
5273 "https://mcr.microsoft.com/v2/windows/nanoserver/blobs/sha256:foreign"
5274 );
5275 // Non-foreign layer must NOT carry urls[].
5276 assert!(
5277 manifest["layers"][1].get("urls").is_none(),
5278 "non-foreign layer must not carry a urls[] array on the wire"
5279 );
5280 }
5281
5282 #[tokio::test]
5283 async fn push_failure_surfaces_typed_error() {
5284 let (built, _tmp) = built_image_fixture();
5285 // Fail on the OCI layer digest so the failure is mid-push, not
5286 // at the manifest PUT.
5287 let oci_digest = built.layers[1].digest.clone();
5288 let target = RecordingPushTarget::with_failure(&oci_digest);
5289
5290 let err = push_impl(&built, &built.tag, &RegistryAuth::Anonymous, &target)
5291 .await
5292 .expect_err("push must fail when upload_blob errors");
5293 match err {
5294 BuildError::BlobUploadFailed { digest, tag, .. } => {
5295 assert_eq!(digest, oci_digest);
5296 assert_eq!(tag, built.tag);
5297 }
5298 other => panic!("expected BuildError::BlobUploadFailed, got: {other}"),
5299 }
5300 }
5301
5302 #[tokio::test]
5303 #[ignore = "live network: requires GHCR creds + mcr.microsoft.com/windows/nanoserver:ltsc2022 base + Windows host"]
5304 async fn build_and_push_e2e() {
5305 // Skipped by default; the WCOW build E2E lives in 4.F as its
5306 // own integration test. This stub exists so `cargo test -- \
5307 // --ignored` lists the entry point and a future operator can
5308 // wire it to the live test harness without re-discovering the
5309 // call shape.
5310 let cfg = dummy_config();
5311 let builder = WindowsBuilder::new(cfg);
5312 let tmp = tempfile::tempdir().expect("tmpdir");
5313 let ctx_dir = tmp.path().join("ctx");
5314 std::fs::create_dir_all(&ctx_dir).expect("mk ctx");
5315 std::fs::write(
5316 ctx_dir.join("Dockerfile"),
5317 "FROM mcr.microsoft.com/windows/nanoserver:ltsc2022\n",
5318 )
5319 .expect("write dockerfile");
5320 let ctx = BuildContext {
5321 context_dir: ctx_dir,
5322 dockerfile_path: PathBuf::from("Dockerfile"),
5323 build_args: HashMap::new(),
5324 tag: "ghcr.io/zorpxinc/zlayer-wcow-e2e:latest".to_string(),
5325 ltsc: None,
5326 };
5327 builder
5328 .build_and_push(&ctx)
5329 .await
5330 .expect("live build_and_push");
5331 }
5332
5333 /// Smoke test for the new
5334 /// [`WindowsBuilder::build_image_for_backend`] entry point. Verifies
5335 /// that the multi-stage gate fires (cross-platform pure logic — no
5336 /// HCS round-trip) AND that a `BuildFailed` event surfaces on the
5337 /// `event_tx` channel when one is wired. We can't drive the full
5338 /// happy path off-Windows because `build_skeleton_with_parsed`
5339 /// requires HCS for base materialisation; the happy path is
5340 /// covered by the existing `build_and_push_e2e` test gated on
5341 /// `--ignored`.
5342 #[tokio::test]
5343 async fn build_image_for_backend_rejects_multi_stage_and_emits_no_events_on_early_error() {
5344 let cfg = dummy_config();
5345 let builder = WindowsBuilder::new(cfg);
5346 let dockerfile = Dockerfile::parse(
5347 "FROM mcr.microsoft.com/windows/nanoserver:ltsc2022 AS one\n\
5348 FROM mcr.microsoft.com/windows/nanoserver:ltsc2022 AS two\n",
5349 )
5350 .expect("two-stage parse");
5351 let options = crate::builder::BuildOptions {
5352 tags: vec!["test:latest".to_string()],
5353 ..Default::default()
5354 };
5355 let (tx, rx) = std::sync::mpsc::channel::<crate::tui::BuildEvent>();
5356 let context = std::env::temp_dir();
5357 let result = builder
5358 .build_image_for_backend(&context, &dockerfile, &options, Some(&tx))
5359 .await;
5360 match result {
5361 Err(BuildError::NotSupported { operation }) => {
5362 assert!(
5363 operation.contains("multi-stage Windows builds"),
5364 "expected multi-stage NotSupported, got: {operation}"
5365 );
5366 }
5367 other => panic!("expected NotSupported, got: {other:?}"),
5368 }
5369 // The multi-stage check runs before any event emission, so the
5370 // channel should be empty.
5371 drop(tx);
5372 let received: Vec<_> = rx.try_iter().collect();
5373 assert!(
5374 received.is_empty(),
5375 "no events should be emitted when multi-stage gate fires; got {received:?}"
5376 );
5377 }
5378
5379 /// Smoke test verifying that `build_image_for_backend` emits
5380 /// `BuildStarted` + `StageStarted` + `BuildFailed` events when the
5381 /// base-image materialisation fails (it always does off-Windows,
5382 /// which makes this a portable check of the event-emission order).
5383 #[tokio::test]
5384 async fn build_image_for_backend_emits_started_then_failed_off_windows() {
5385 let cfg = dummy_config();
5386 let builder = WindowsBuilder::new(cfg);
5387 let dockerfile =
5388 Dockerfile::parse("FROM mcr.microsoft.com/windows/nanoserver:ltsc2022\nRUN echo hi\n")
5389 .expect("one-stage parse");
5390 let options = crate::builder::BuildOptions {
5391 tags: vec!["smoke:latest".to_string()],
5392 ..Default::default()
5393 };
5394 let (tx, rx) = std::sync::mpsc::channel::<crate::tui::BuildEvent>();
5395 let context = std::env::temp_dir();
5396 let result = builder
5397 .build_image_for_backend(&context, &dockerfile, &options, Some(&tx))
5398 .await;
5399 drop(tx);
5400 let events: Vec<_> = rx.try_iter().collect();
5401
5402 // On non-Windows, base materialisation surfaces NotSupported
5403 // (no HCS); on Windows the call typically fails the network
5404 // pull for a registry that's unreachable from CI. Either way,
5405 // the up-front BuildStarted + StageStarted events must have
5406 // fired before the failure.
5407 assert!(
5408 result.is_err(),
5409 "smoke test must fail because base materialisation cannot succeed in the unit-test env"
5410 );
5411 assert!(
5412 events
5413 .iter()
5414 .any(|e| matches!(e, crate::tui::BuildEvent::BuildStarted { .. })),
5415 "BuildStarted must fire before base materialisation; got events = {events:?}"
5416 );
5417 assert!(
5418 events
5419 .iter()
5420 .any(|e| matches!(e, crate::tui::BuildEvent::StageStarted { .. })),
5421 "StageStarted must fire before base materialisation; got events = {events:?}"
5422 );
5423 }
5424}