#![cfg(target_os = "windows")]
#![allow(
clippy::too_many_lines,
clippy::used_underscore_binding,
clippy::doc_markdown
)]
use std::path::PathBuf;
use std::sync::Arc;
use std::time::Duration;
use zlayer_agent::runtimes::hcs::{HcsConfig, HcsRuntime};
use zlayer_agent::{ContainerId, ContainerState, Runtime};
use zlayer_spec::{DeploymentSpec, ServiceSpec};
const TEST_IMAGE: &str = "mcr.microsoft.com/windows/nanoserver:ltsc2025";
#[allow(clippy::cast_possible_truncation)]
fn unique_name(prefix: &str) -> String {
let pid = std::process::id();
let ts = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_millis() as u64
% 1_000_000;
format!("{prefix}-{pid}-{ts}")
}
fn fresh_storage_root(tag: &str) -> PathBuf {
std::env::temp_dir().join(format!("zlayer-hcs-e2e-{}", unique_name(tag)))
}
fn test_config(tag: &str) -> (HcsConfig, PathBuf) {
let storage_root = fresh_storage_root(tag);
let cfg = HcsConfig {
storage_root: storage_root.clone(),
default_scratch_size_gb: 20,
..HcsConfig::default()
};
(cfg, storage_root)
}
fn spec_from_yaml(yaml: &str, service: &str) -> ServiceSpec {
serde_yaml::from_str::<DeploymentSpec>(yaml)
.expect("test YAML must parse into DeploymentSpec")
.services
.remove(service)
.unwrap_or_else(|| panic!("test YAML is missing service {service:?}"))
}
fn echo_spec() -> ServiceSpec {
let yaml = format!(
r#"
version: v1
deployment: hcs-e2e
services:
echo:
rtype: service
image:
name: {TEST_IMAGE}
command:
entrypoint: ["cmd", "/c", "echo", "hello from hcs"]
endpoints:
- name: dummy
protocol: tcp
port: 8080
scale:
mode: fixed
replicas: 1
"#
);
spec_from_yaml(&yaml, "echo")
}
fn long_lived_spec() -> ServiceSpec {
let yaml = format!(
r#"
version: v1
deployment: hcs-e2e
services:
longlived:
rtype: service
image:
name: {TEST_IMAGE}
command:
entrypoint: ["cmd", "/c", "ping", "-n", "60", "127.0.0.1"]
endpoints:
- name: dummy
protocol: tcp
port: 8080
scale:
mode: fixed
replicas: 1
"#
);
spec_from_yaml(&yaml, "longlived")
}
async fn wait_for_state(
runtime: &dyn Runtime,
id: &ContainerId,
expected: ContainerState,
budget: Duration,
) -> Result<ContainerState, String> {
let start = std::time::Instant::now();
let poll = Duration::from_millis(200);
let mut last: Option<ContainerState> = None;
while start.elapsed() < budget {
match runtime.container_state(id).await {
Ok(state) => {
let matches = match (&state, &expected) {
(ContainerState::Exited { .. }, ContainerState::Exited { .. }) => true,
(a, b) => a == b,
};
if matches {
return Ok(state);
}
last = Some(state);
}
Err(e) => return Err(format!("container_state error: {e}")),
}
tokio::time::sleep(poll).await;
}
Err(format!(
"timed out after {budget:?} waiting for {expected:?}; last observed = {last:?}"
))
}
fn rm_storage_root(root: &PathBuf) {
let _ = std::fs::remove_dir_all(root);
}
#[tokio::test(flavor = "multi_thread")]
#[ignore = "requires Windows host with HCS + outbound access to mcr.microsoft.com"]
async fn test_pull_nanoserver_image() {
let outcome = tokio::time::timeout(Duration::from_secs(600), async {
let (cfg, storage_root) = test_config("pull");
let runtime = HcsRuntime::new(cfg)
.await
.expect("HcsRuntime::new must succeed on a Windows host with HCS available");
let pull_result = runtime.pull_image(TEST_IMAGE).await;
let assertion = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
assert!(
pull_result.is_ok(),
"pull_image({TEST_IMAGE}) must succeed; got {pull_result:?}",
);
}));
rm_storage_root(&storage_root);
if let Err(p) = assertion {
std::panic::resume_unwind(p);
}
})
.await;
outcome.expect("test_pull_nanoserver_image exceeded the 600s outer budget");
}
#[tokio::test(flavor = "multi_thread")]
#[ignore = "requires Windows host with HCS + outbound access to mcr.microsoft.com"]
async fn test_run_short_lived_container_lifecycle() {
let outcome = tokio::time::timeout(Duration::from_secs(600), async {
let (cfg, storage_root) = test_config("life");
let runtime: Arc<HcsRuntime> = Arc::new(
HcsRuntime::new(cfg)
.await
.expect("HcsRuntime::new must succeed"),
);
runtime
.pull_image(TEST_IMAGE)
.await
.expect("pull_image must succeed before create_container");
let id = ContainerId::new(unique_name("life"), 1);
let spec = echo_spec();
let create_result = runtime.create_container(&id, &spec).await;
let runtime_for_body = runtime.clone();
let id_for_body = id.clone();
let body = async move {
create_result.expect("create_container must succeed");
runtime_for_body
.start_container(&id_for_body)
.await
.expect("start_container must succeed");
let final_state = wait_for_state(
runtime_for_body.as_ref(),
&id_for_body,
ContainerState::Exited { code: 0 },
Duration::from_secs(60),
)
.await
.expect("container must reach Exited within 60s");
match final_state {
ContainerState::Exited { code } => {
assert_eq!(code, 0, "echo entrypoint must exit with code 0");
}
other => panic!("expected Exited, got {other:?}"),
}
};
let body_outcome = std::panic::AssertUnwindSafe(body)
.catch_unwind_async()
.await;
let _ = runtime.remove_container(&id).await;
rm_storage_root(&storage_root);
if let Err(p) = body_outcome {
std::panic::resume_unwind(p);
}
})
.await;
outcome.expect("test_run_short_lived_container_lifecycle exceeded the 600s outer budget");
}
#[tokio::test(flavor = "multi_thread")]
#[ignore = "requires Windows host with HCS + outbound access to mcr.microsoft.com"]
async fn test_exec_into_running_container() {
let outcome = tokio::time::timeout(Duration::from_secs(600), async {
let (cfg, storage_root) = test_config("exec");
let runtime: Arc<HcsRuntime> = Arc::new(
HcsRuntime::new(cfg)
.await
.expect("HcsRuntime::new must succeed"),
);
runtime
.pull_image(TEST_IMAGE)
.await
.expect("pull_image must succeed before create_container");
let id = ContainerId::new(unique_name("exec"), 1);
let spec = long_lived_spec();
let create_result = runtime.create_container(&id, &spec).await;
let runtime_for_body = runtime.clone();
let id_for_body = id.clone();
let body = async move {
create_result.expect("create_container must succeed");
runtime_for_body
.start_container(&id_for_body)
.await
.expect("start_container must succeed");
wait_for_state(
runtime_for_body.as_ref(),
&id_for_body,
ContainerState::Running,
Duration::from_secs(30),
)
.await
.expect("container must reach Running within 30s");
let cmd: Vec<String> = vec!["cmd".into(), "/c".into(), "exit".into(), "7".into()];
let (exit_code, _stdout, _stderr) = runtime_for_body
.exec(&id_for_body, &cmd)
.await
.expect("exec must succeed against a Running container");
assert_eq!(
exit_code, 7,
"exec must round-trip the child's exit code through HCS",
);
};
let body_outcome = std::panic::AssertUnwindSafe(body)
.catch_unwind_async()
.await;
let _ = runtime.stop_container(&id, Duration::from_secs(5)).await;
let _ = runtime.remove_container(&id).await;
rm_storage_root(&storage_root);
if let Err(p) = body_outcome {
std::panic::resume_unwind(p);
}
})
.await;
outcome.expect("test_exec_into_running_container exceeded the 600s outer budget");
}
trait CatchUnwindAsyncExt: std::future::Future + Sized {
async fn catch_unwind_async(
self,
) -> Result<Self::Output, Box<dyn std::any::Any + Send + 'static>>;
}
impl<F> CatchUnwindAsyncExt for std::panic::AssertUnwindSafe<F>
where
F: std::future::Future,
{
async fn catch_unwind_async(
self,
) -> Result<F::Output, Box<dyn std::any::Any + Send + 'static>> {
use std::future::Future;
use std::pin::Pin;
use std::task::Poll;
let mut inner = Box::pin(self.0);
std::future::poll_fn(move |cx| {
let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
Pin::new(&mut inner).as_mut().poll(cx)
}));
match result {
Ok(Poll::Pending) => Poll::Pending,
Ok(Poll::Ready(v)) => Poll::Ready(Ok(v)),
Err(panic) => Poll::Ready(Err(panic)),
}
})
.await
}
}