zlayer-agent 0.12.9

Container runtime agent using libcontainer/youki
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
#![cfg(target_os = "windows")]
//! Windows-gated end-to-end tests for the HCS (Host Compute Service)
//! container runtime under **Hyper-V isolation**.
//!
//! These tests mirror `tests/windows_hcs_e2e.rs` (the W2 process-isolation
//! suite) but pin `isolation: hyperv` on every `ServiceSpec`. They require a
//! real Windows 10/11 or Windows Server host with the **Hyper-V** feature
//! AND the **Windows Containers** feature enabled (the latter installs the
//! UVM boot files under
//! `%ProgramData%\Microsoft\Windows\Hyper-V\Containers\` that
//! `zlayer_agent::windows::uvm::Uvm::create` probes for), plus outbound
//! network access to `mcr.microsoft.com` to pull the nanoserver layers.
//!
//! They are `#[ignore]` by default — run with:
//!
//! ```powershell
//! cargo test --package zlayer-agent --test windows_hcs_hyperv_e2e -- --ignored --nocapture
//! ```
//!
//! # Why this file exists
//!
//! W3 wires Hyper-V isolation end-to-end through the same `Runtime` trait
//! surface that process isolation already used. The compute-system doc
//! switches from a populated `Container` body to a populated
//! `VirtualMachine` body backed by a per-container UVM scratch VHDX. Without
//! this file, that Hyper-V code path has no automated coverage of the full
//! `pull_image → create_container → start_container → poll → exec → stop →
//! remove` lifecycle on a real Hyper-V host. Test 1 pins the single-container
//! lifecycle, test 2 proves `exec` round-trips through the UVM, and test 3
//! exercises two independent UVMs in parallel to make sure scratch VHDX
//! allocation and teardown do not collide.
#![allow(
    clippy::too_many_lines,
    clippy::used_underscore_binding,
    clippy::doc_markdown
)]

use std::path::PathBuf;
use std::sync::Arc;
use std::time::Duration;

use zlayer_agent::runtimes::hcs::{HcsConfig, HcsRuntime};
use zlayer_agent::{ContainerId, ContainerState, Runtime};
use zlayer_spec::{DeploymentSpec, ServiceSpec};

/// MCR nanoserver tag known to be available for ltsc2022. Chosen because
/// it is small (~120 MB compressed) and exits-on-empty-entrypoint friendly.
/// The same tag also boots fine inside the Hyper-V UVM that the Windows
/// Containers feature ships, so we use it for both isolation modes.
///
/// Operators can override at runtime with `ZLAYER_HCS_TEST_IMAGE` — most
/// commonly to swap to `mcr.microsoft.com/windows/servercore:ltsc2022` for
/// A/B testing whether a Hyper-V dial-out failure is image-specific (the
/// stripped nanoserver UVM may be missing a DLL that `vmcomputeagent.exe`
/// depends on) versus host-side.
const TEST_IMAGE_DEFAULT: &str = "mcr.microsoft.com/windows/nanoserver:ltsc2022";

fn test_image() -> String {
    std::env::var("ZLAYER_HCS_TEST_IMAGE").unwrap_or_else(|_| TEST_IMAGE_DEFAULT.to_string())
}

/// Generate a unique short suffix so parallel CI shards do not collide on
/// the per-test storage root or HCS system id.
#[allow(clippy::cast_possible_truncation)]
fn unique_name(prefix: &str) -> String {
    let pid = std::process::id();
    let ts = std::time::SystemTime::now()
        .duration_since(std::time::UNIX_EPOCH)
        .unwrap_or_default()
        .as_millis() as u64
        % 1_000_000;
    format!("{prefix}-{pid}-{ts}")
}

/// Allocate a per-test storage root under the OS temp dir. We deliberately
/// avoid `tempfile::TempDir` so that a teardown failure leaves the directory
/// (and the scratch VHDX inside it) visible for post-mortem on CI — the
/// explicit `remove_dir_all` calls in each test perform the cleanup.
fn fresh_storage_root(tag: &str) -> PathBuf {
    std::env::temp_dir().join(format!("zlayer-hcs-hyperv-e2e-{}", unique_name(tag)))
}

/// Build the test-local `HcsConfig`. Every spec in this file sets
/// `isolation: hyperv` explicitly, so the runtime's image-aware resolver
/// (`resolve_isolation_for_image`) routes them through the Hyper-V path
/// regardless of the host's Windows build.
///
/// `slice_cidr` is set to a tiny per-test /28 — Part A made overlay
/// attachment a hard requirement (no more silent degrade), so every
/// `create_container` needs a slice to allocate an IP from. The slice is
/// scoped to the test's `tag` so concurrent tests don't collide on IPs.
fn test_config(tag: &str) -> (HcsConfig, PathBuf) {
    let storage_root = fresh_storage_root(tag);
    let slice: ipnet::IpNet = "10.220.99.16/28"
        .parse()
        .expect("test slice CIDR must be valid");
    let cfg = HcsConfig {
        storage_root: storage_root.clone(),
        default_scratch_size_gb: 20,
        slice_cidr: Some(slice),
        ..HcsConfig::default()
    };
    let _ = tag; // tag is currently used only for storage root naming
    (cfg, storage_root)
}

/// Construct a fresh `HcsRuntime` wrapped in `Arc` together with the
/// per-test storage root the caller is responsible for cleaning up. We do
/// this in a single helper because all three tests share the exact same
/// setup boilerplate — extracting it keeps each test focused on its
/// assertion shape.
async fn make_runtime(tag: &str) -> (Arc<HcsRuntime>, PathBuf) {
    let (cfg, storage_root) = test_config(tag);
    let runtime = HcsRuntime::new(cfg)
        .await
        .expect("HcsRuntime::new must succeed on a Windows host with HCS available");
    (Arc::new(runtime), storage_root)
}

/// Parse a single-service `DeploymentSpec` YAML and pop the named service
/// out. Mirrors the helper pattern used in `windows_hcs_e2e.rs`.
fn spec_from_yaml(yaml: &str, service: &str) -> ServiceSpec {
    serde_yaml::from_str::<DeploymentSpec>(yaml)
        .expect("test YAML must parse into DeploymentSpec")
        .services
        .remove(service)
        .unwrap_or_else(|| panic!("test YAML is missing service {service:?}"))
}

/// Build a short-lived spec that exits with code 0 immediately, pinned to
/// Hyper-V isolation. `isolation: hyperv` is `serde(rename_all = "kebab-case")`
/// for [`zlayer_spec::IsolationMode::Hyperv`].
fn echo_spec_hyperv() -> ServiceSpec {
    let image = test_image();
    let yaml = format!(
        r#"
version: v1
deployment: hcs-hyperv-e2e
services:
  echo:
    rtype: service
    isolation: hyperv
    image:
      name: {image}
    command:
      entrypoint: ["cmd", "/c", "echo", "hello from hyperv"]
    endpoints:
      - name: dummy
        protocol: tcp
        port: 8080
    scale:
      mode: fixed
      replicas: 1
"#
    );
    spec_from_yaml(&yaml, "echo")
}

/// Build a long-lived spec used for the exec round-trip test, also pinned
/// to Hyper-V. `ping -n 60` keeps the container alive ~60 seconds — plenty
/// for `cmd /c hostname` to round-trip through the UVM.
fn long_lived_spec_hyperv() -> ServiceSpec {
    let image = test_image();
    let yaml = format!(
        r#"
version: v1
deployment: hcs-hyperv-e2e
services:
  longlived:
    rtype: service
    isolation: hyperv
    image:
      name: {image}
    command:
      entrypoint: ["cmd", "/c", "ping", "-n", "60", "127.0.0.1"]
    endpoints:
      - name: dummy
        protocol: tcp
        port: 8080
    scale:
      mode: fixed
      replicas: 1
"#
    );
    spec_from_yaml(&yaml, "longlived")
}

/// Poll `container_state` until it matches the expected variant or the
/// budget is exhausted. For `Exited` we match the variant (not the code)
/// because the caller asserts on the code separately. `Running` is matched
/// exactly. Returns the last observed state on success so callers can
/// inspect the exit code.
async fn wait_for_state(
    runtime: &dyn Runtime,
    id: &ContainerId,
    expected: ContainerState,
    budget: Duration,
) -> Result<ContainerState, String> {
    let start = std::time::Instant::now();
    let poll = Duration::from_millis(200);
    let mut last: Option<ContainerState> = None;

    while start.elapsed() < budget {
        match runtime.container_state(id).await {
            Ok(state) => {
                let matches = match (&state, &expected) {
                    (ContainerState::Exited { .. }, ContainerState::Exited { .. }) => true,
                    (a, b) => a == b,
                };
                if matches {
                    return Ok(state);
                }
                last = Some(state);
            }
            Err(e) => return Err(format!("container_state error: {e}")),
        }
        tokio::time::sleep(poll).await;
    }

    Err(format!(
        "timed out after {budget:?} waiting for {expected:?}; last observed = {last:?}"
    ))
}

/// Best-effort directory removal that swallows errors. Used in teardown
/// where we must not mask the original assertion failure. The per-container
/// scratch VHDX inside is dropped by `Uvm::Drop` when the runtime tears
/// down its `ContainerEntry`, so by the time we get here only an empty
/// (or near-empty) directory should remain.
fn rm_storage_root(root: &PathBuf) {
    let _ = std::fs::remove_dir_all(root);
}

/// Test 1: end-to-end smoke for the Hyper-V isolation path. Pull the image,
/// create a Hyper-V-isolated container with a short-lived entrypoint, start
/// it, poll for exit, and remove it. Asserts the captured exit code is 0
/// because `cmd /c echo` exits cleanly once the line is printed.
///
/// Reaching `Exited { code: 0 }` here implies the UVM scratch VHDX was
/// allocated, the `VirtualMachine` compute-system doc was accepted by HCS,
/// the UVM booted, the container started inside it, and the UVM tore down
/// cleanly on `remove_container` (otherwise `Uvm::Drop` would log a leak).
#[tokio::test(flavor = "multi_thread")]
#[ignore = "requires Windows host with Hyper-V feature + Windows Containers feature enabled, plus nanoserver:ltsc2022 layers"]
async fn windows_hcs_hyperv_smoke_create_start_stop_remove() {
    let outcome = tokio::time::timeout(Duration::from_secs(900), async {
        let (runtime, storage_root) = make_runtime("smoke").await;

        runtime
            .pull_image(&test_image())
            .await
            .expect("pull_image must succeed before create_container");

        let id = ContainerId::new(unique_name("smoke"), 1);
        let spec = echo_spec_hyperv();

        let create_result = runtime.create_container(&id, &spec).await;

        let runtime_for_body = runtime.clone();
        let id_for_body = id.clone();
        let body = async move {
            create_result.expect("create_container must succeed under Hyper-V isolation");
            runtime_for_body
                .start_container(&id_for_body)
                .await
                .expect("start_container must succeed under Hyper-V isolation");

            let final_state = wait_for_state(
                runtime_for_body.as_ref(),
                &id_for_body,
                ContainerState::Exited { code: 0 },
                Duration::from_secs(180),
            )
            .await
            .expect("Hyper-V container must reach Exited within 180s");

            match final_state {
                ContainerState::Exited { code } => {
                    assert_eq!(code, 0, "echo entrypoint must exit with code 0");
                }
                other => panic!("expected Exited, got {other:?}"),
            }
        };

        let body_outcome = std::panic::AssertUnwindSafe(body)
            .catch_unwind_async()
            .await;

        let _ = runtime.remove_container(&id).await;
        rm_storage_root(&storage_root);

        if let Err(p) = body_outcome {
            std::panic::resume_unwind(p);
        }
    })
    .await;

    outcome
        .expect("windows_hcs_hyperv_smoke_create_start_stop_remove exceeded the 900s outer budget");
}

/// Test 2: `exec` round-trips through the Hyper-V UVM into the container.
/// `cmd /c hostname` is a safe builtin available in nanoserver. We assert
/// exit code 0 and non-empty stdout — the HCS exec path returns
/// authoritative exit codes from `HcsGetProcessProperties`, and `hostname`
/// always prints at least the container's NetBIOS name + newline, so a
/// non-empty stdout means the exec actually ran inside the UVM (vs.
/// shorting out at the runtime boundary).
#[tokio::test(flavor = "multi_thread")]
#[ignore = "requires Windows host with Hyper-V feature + Windows Containers feature enabled, plus nanoserver:ltsc2022 layers"]
async fn windows_hcs_hyperv_exec_inside_container() {
    let outcome = tokio::time::timeout(Duration::from_secs(900), async {
        let (runtime, storage_root) = make_runtime("exec").await;

        runtime
            .pull_image(&test_image())
            .await
            .expect("pull_image must succeed before create_container");

        let id = ContainerId::new(unique_name("exec"), 1);
        let spec = long_lived_spec_hyperv();

        let create_result = runtime.create_container(&id, &spec).await;

        let runtime_for_body = runtime.clone();
        let id_for_body = id.clone();
        let body = async move {
            create_result.expect("create_container must succeed under Hyper-V isolation");
            runtime_for_body
                .start_container(&id_for_body)
                .await
                .expect("start_container must succeed under Hyper-V isolation");

            wait_for_state(
                runtime_for_body.as_ref(),
                &id_for_body,
                ContainerState::Running,
                Duration::from_secs(120),
            )
            .await
            .expect("Hyper-V container must reach Running within 120s");

            let cmd: Vec<String> = vec!["cmd".into(), "/c".into(), "hostname".into()];
            let (exit_code, stdout, _stderr) = runtime_for_body
                .exec(&id_for_body, &cmd)
                .await
                .expect("exec must succeed against a Running Hyper-V container");

            assert_eq!(
                exit_code, 0,
                "`cmd /c hostname` must exit with code 0 inside the UVM",
            );
            assert!(
                !stdout.is_empty(),
                "`cmd /c hostname` must print at least the container hostname to stdout",
            );
        };

        let body_outcome = std::panic::AssertUnwindSafe(body)
            .catch_unwind_async()
            .await;

        let _ = runtime.stop_container(&id, Duration::from_secs(5)).await;
        let _ = runtime.remove_container(&id).await;
        rm_storage_root(&storage_root);

        if let Err(p) = body_outcome {
            std::panic::resume_unwind(p);
        }
    })
    .await;

    outcome.expect("windows_hcs_hyperv_exec_inside_container exceeded the 900s outer budget");
}

/// Test 3: two independent Hyper-V containers run concurrently inside the
/// same daemon. Each container gets its own UVM with its own scratch VHDX,
/// and tearing one down must not affect the other. This catches scratch
/// VHDX path collisions, UVM ID collisions, and any cross-container
/// resource sharing that would silently corrupt one side.
///
/// We create both, start both in parallel, wait for both to reach
/// `Running`, then stop and remove them independently and verify each
/// finishes its full lifecycle without interfering with the other.
#[tokio::test(flavor = "multi_thread")]
#[ignore = "requires Windows host with Hyper-V feature + Windows Containers feature enabled, plus nanoserver:ltsc2022 layers"]
async fn windows_hcs_hyperv_concurrent_pair() {
    let outcome = tokio::time::timeout(Duration::from_secs(1200), async {
        let (runtime, storage_root) = make_runtime("pair").await;

        runtime
            .pull_image(&test_image())
            .await
            .expect("pull_image must succeed before create_container");

        let id_a = ContainerId::new(unique_name("pair-a"), 1);
        let id_b = ContainerId::new(unique_name("pair-b"), 1);
        let spec_a = long_lived_spec_hyperv();
        let spec_b = long_lived_spec_hyperv();

        // Create both containers serially — `create_container` mutates the
        // shared `containers` map under a write lock, so back-to-back
        // serial creates are the correct shape (not concurrent).
        let create_a = runtime.create_container(&id_a, &spec_a).await;
        let create_b = runtime.create_container(&id_b, &spec_b).await;

        let runtime_for_body = runtime.clone();
        let id_a_for_body = id_a.clone();
        let id_other_for_body = id_b.clone();
        let body = async move {
            create_a.expect("create_container(a) must succeed under Hyper-V isolation");
            create_b.expect("create_container(b) must succeed under Hyper-V isolation");

            // Start both UVMs in parallel — this is the part that would
            // collide if scratch VHDX paths or UVM IDs were shared.
            let (start_a, start_b) = tokio::join!(
                runtime_for_body.start_container(&id_a_for_body),
                runtime_for_body.start_container(&id_other_for_body),
            );
            start_a.expect("start_container(a) must succeed");
            start_b.expect("start_container(b) must succeed");

            // Wait for both to reach Running in parallel.
            let (wait_a, wait_b) = tokio::join!(
                wait_for_state(
                    runtime_for_body.as_ref(),
                    &id_a_for_body,
                    ContainerState::Running,
                    Duration::from_secs(180),
                ),
                wait_for_state(
                    runtime_for_body.as_ref(),
                    &id_other_for_body,
                    ContainerState::Running,
                    Duration::from_secs(180),
                ),
            );
            wait_a.expect("container a must reach Running within 180s");
            wait_b.expect("container b must reach Running within 180s");

            // Stop and remove `a` first, then verify `b` is still Running.
            // This is the critical assertion — tearing down one UVM must
            // not affect the other.
            runtime_for_body
                .stop_container(&id_a_for_body, Duration::from_secs(10))
                .await
                .expect("stop_container(a) must succeed independently of b");
            runtime_for_body
                .remove_container(&id_a_for_body)
                .await
                .expect("remove_container(a) must succeed independently of b");

            let state_b = runtime_for_body
                .container_state(&id_other_for_body)
                .await
                .expect("container_state(b) must still be queryable after a is gone");
            assert_eq!(
                state_b,
                ContainerState::Running,
                "tearing down container a must not affect container b",
            );

            // Now tear down `b`.
            runtime_for_body
                .stop_container(&id_other_for_body, Duration::from_secs(10))
                .await
                .expect("stop_container(b) must succeed");
            runtime_for_body
                .remove_container(&id_other_for_body)
                .await
                .expect("remove_container(b) must succeed");
        };

        let body_outcome = std::panic::AssertUnwindSafe(body)
            .catch_unwind_async()
            .await;

        // Best-effort cleanup in case the body panicked partway through.
        let _ = runtime.stop_container(&id_a, Duration::from_secs(5)).await;
        let _ = runtime.remove_container(&id_a).await;
        let _ = runtime.stop_container(&id_b, Duration::from_secs(5)).await;
        let _ = runtime.remove_container(&id_b).await;
        rm_storage_root(&storage_root);

        if let Err(p) = body_outcome {
            std::panic::resume_unwind(p);
        }
    })
    .await;

    outcome.expect("windows_hcs_hyperv_concurrent_pair exceeded the 1200s outer budget");
}

/// Local async-aware `catch_unwind` adapter. `std::panic::catch_unwind`
/// cannot be wrapped around an `async` block directly because the future
/// must be polled, not just called once. This helper polls the wrapped
/// future and catches any panic raised inside `poll`, converting it into
/// the same `Err` shape `catch_unwind` produces for sync code.
trait CatchUnwindAsyncExt: std::future::Future + Sized {
    async fn catch_unwind_async(
        self,
    ) -> Result<Self::Output, Box<dyn std::any::Any + Send + 'static>>;
}

impl<F> CatchUnwindAsyncExt for std::panic::AssertUnwindSafe<F>
where
    F: std::future::Future,
{
    async fn catch_unwind_async(
        self,
    ) -> Result<F::Output, Box<dyn std::any::Any + Send + 'static>> {
        use std::future::Future;
        use std::pin::Pin;
        use std::task::Poll;

        let mut inner = Box::pin(self.0);
        std::future::poll_fn(move |cx| {
            let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
                Pin::new(&mut inner).as_mut().poll(cx)
            }));
            match result {
                Ok(Poll::Pending) => Poll::Pending,
                Ok(Poll::Ready(v)) => Poll::Ready(Ok(v)),
                Err(panic) => Poll::Ready(Err(panic)),
            }
        })
        .await
    }
}