Skip to main content

zlayer_agent/
lib.rs

1//! `ZLayer` Agent - Container Runtime
2//!
3//! Manages container lifecycle, health checking, init actions, and proxy integration.
4
5pub mod auth;
6pub mod autoscale_controller;
7pub mod bundle;
8pub mod cdi;
9pub mod cgroups_stats;
10pub mod container_supervisor;
11pub mod cron_scheduler;
12pub mod dependency;
13pub mod env;
14pub mod error;
15pub mod gpu_detector;
16pub mod gpu_metrics;
17pub mod gpu_sharing;
18pub mod health;
19pub mod init;
20pub mod job;
21pub mod metrics_providers;
22pub mod netlink;
23pub mod overlay_manager;
24pub mod proxy_manager;
25pub mod runtime;
26pub mod runtimes;
27pub mod service;
28pub mod stabilization;
29pub mod storage_manager;
30
31#[cfg(target_os = "windows")]
32pub mod windows;
33
34pub use autoscale_controller::{has_adaptive_scaling, AutoscaleController};
35pub use bundle::*;
36pub use container_supervisor::{
37    ContainerSupervisor, SupervisedContainer, SupervisedState, SupervisorConfig, SupervisorEvent,
38};
39pub use cron_scheduler::{CronJobInfo, CronScheduler};
40pub use dependency::{
41    DependencyConditionChecker, DependencyError, DependencyGraph, DependencyNode, DependencyWaiter,
42    WaitResult,
43};
44pub use env::{
45    resolve_env_value, resolve_env_vars, resolve_env_with_secrets, EnvResolutionError, ResolvedEnv,
46};
47pub use error::*;
48pub use gpu_detector::{detect_gpus, GpuInfo};
49pub use health::*;
50pub use init::{BackoffConfig, InitOrchestrator};
51pub use job::{
52    JobExecution, JobExecutionId, JobExecutor, JobExecutorConfig, JobStatus, JobTrigger,
53};
54pub use metrics_providers::{RuntimeStatsProvider, ServiceManagerContainerProvider};
55pub use overlay_manager::{make_interface_name, OverlayManager};
56pub use proxy_manager::{ProxyManager, ProxyManagerConfig};
57pub use runtime::*;
58pub use runtimes::{create_runtime_for_image, detect_image_artifact_type};
59
60// Youki runtime types are only available on Linux with the `youki-runtime` feature.
61#[cfg(all(target_os = "linux", feature = "youki-runtime"))]
62pub use runtimes::{YoukiConfig, YoukiRuntime};
63
64#[cfg(feature = "docker")]
65pub use runtimes::DockerRuntime;
66
67#[cfg(feature = "wasm")]
68pub use runtimes::{WasmConfig, WasmRuntime};
69
70#[cfg(target_os = "macos")]
71pub use runtimes::macos_sandbox::SandboxRuntime;
72#[cfg(target_os = "macos")]
73pub use runtimes::macos_vm::VmRuntime;
74
75pub use service::*;
76pub use stabilization::{
77    wait_for_stabilization, ServiceHealthSummary, StabilizationOutcome, StabilizationResult,
78};
79pub use storage_manager::{StorageError, StorageManager, VolumeInfo};
80
81#[cfg(target_os = "macos")]
82use std::path::PathBuf;
83use std::sync::Arc;
84
85/// Configuration for macOS sandbox-based container runtime
86///
87/// Uses Apple's sandbox framework (sandbox_init/sandbox-exec) to provide
88/// process isolation on macOS. This is the preferred runtime on macOS
89/// when Docker is not available or not desired.
90#[cfg(target_os = "macos")]
91#[derive(Debug, Clone)]
92pub struct MacSandboxConfig {
93    /// Directory for container data and rootfs
94    pub data_dir: PathBuf,
95    /// Directory for container logs
96    pub log_dir: PathBuf,
97    /// Whether to enable GPU access (Metal/MPS) for containers
98    pub gpu_access: bool,
99}
100
101#[cfg(target_os = "macos")]
102impl Default for MacSandboxConfig {
103    fn default() -> Self {
104        let dirs = zlayer_paths::ZLayerDirs::system_default();
105        Self {
106            data_dir: dirs.data_dir().to_path_buf(),
107            log_dir: dirs.logs(),
108            gpu_access: true,
109        }
110    }
111}
112
113/// Configuration for selecting and configuring a container runtime
114#[derive(Debug, Clone, Default)]
115pub enum RuntimeConfig {
116    /// Automatically select the best available runtime
117    ///
118    /// Selection logic:
119    /// - On Linux: Uses bundled libcontainer runtime (no external binary needed), falls back to Docker
120    /// - On macOS: Uses sandbox runtime if available, falls back to Docker
121    /// - On Windows: Use Docker directly
122    /// - If no runtime can be initialized, returns an error
123    #[default]
124    Auto,
125    /// Use the mock runtime for testing and development
126    Mock,
127    /// Use youki/libcontainer as the container runtime (Linux only, requires the `youki-runtime` feature)
128    #[cfg(all(target_os = "linux", feature = "youki-runtime"))]
129    Youki(YoukiConfig),
130    /// Use Docker daemon as the container runtime (cross-platform)
131    #[cfg(feature = "docker")]
132    Docker,
133    /// Use WebAssembly runtime with wasmtime for WASM workloads
134    #[cfg(feature = "wasm")]
135    Wasm(WasmConfig),
136    /// Use macOS sandbox-based container runtime
137    #[cfg(target_os = "macos")]
138    MacSandbox(MacSandboxConfig),
139    /// Use macOS Virtualization.framework for full VM isolation
140    #[cfg(target_os = "macos")]
141    MacVm,
142    /// WSL2 backend (deprecated).
143    ///
144    /// Preserved for one release for back-compat with existing `runtime: wsl2`
145    /// configs. No real WSL2-specific backend was ever shipped — this variant
146    /// was a stub that suggested using Docker Desktop with the WSL2 backend.
147    #[cfg(target_os = "windows")]
148    #[deprecated(
149        note = "Wsl2 is deprecated in favor of Hcs (native Windows containers via the \
150                Host Compute Service). This variant is preserved for one release and \
151                currently aliases to Hcs with a default config at dispatch time."
152    )]
153    Wsl2,
154    /// Native Windows container runtime via the Host Compute Service (HCS).
155    ///
156    /// Windows-only. Drives containers directly against the Windows HCS API
157    /// (see [`crate::runtimes::hcs`]) without requiring Docker Desktop or a
158    /// WSL2 VM.
159    #[cfg(target_os = "windows")]
160    Hcs(crate::runtimes::hcs::HcsConfig),
161}
162
163/// Check if Docker daemon is available and responsive
164///
165/// This function attempts to connect to the Docker daemon using
166/// platform-specific defaults and pings it to verify connectivity.
167///
168/// # Returns
169/// `true` if Docker is available, `false` otherwise
170#[cfg(feature = "docker")]
171pub async fn is_docker_available() -> bool {
172    use bollard::Docker;
173
174    match Docker::connect_with_local_defaults() {
175        Ok(docker) => match docker.ping().await {
176            Ok(_) => {
177                tracing::debug!("Docker daemon is available");
178                true
179            }
180            Err(e) => {
181                tracing::debug!(error = %e, "Docker daemon ping failed");
182                false
183            }
184        },
185        Err(e) => {
186            tracing::debug!(error = %e, "Failed to connect to Docker daemon");
187            false
188        }
189    }
190}
191
192/// Check if Docker daemon is available (stub when docker feature is disabled)
193#[cfg(not(feature = "docker"))]
194#[allow(clippy::unused_async)]
195pub async fn is_docker_available() -> bool {
196    false
197}
198
199/// Check if the WASM runtime is available (compiled in)
200///
201/// Returns `true` if the `wasm` feature is enabled and the wasmtime
202/// runtime is compiled into this binary.
203///
204/// # Example
205///
206/// ```
207/// use zlayer_agent::is_wasm_available;
208///
209/// if is_wasm_available() {
210///     println!("WASM runtime is available");
211/// } else {
212///     println!("WASM runtime is not compiled in");
213/// }
214/// ```
215#[cfg(feature = "wasm")]
216#[must_use]
217pub fn is_wasm_available() -> bool {
218    true
219}
220
221/// Check if the WASM runtime is available (stub when wasm feature is disabled)
222#[cfg(not(feature = "wasm"))]
223#[must_use]
224pub fn is_wasm_available() -> bool {
225    false
226}
227
228/// Create a runtime based on the provided configuration
229///
230/// # Arguments
231/// * `config` - The runtime configuration specifying which runtime to use
232///
233/// # Returns
234/// An `Arc<dyn Runtime + Send + Sync>` that can be used with `ServiceManager`
235///
236/// # Errors
237/// Returns `AgentError` if the runtime cannot be initialized (e.g., failed to create
238/// required directories, no runtime available for Auto mode)
239///
240/// # Runtime Selection for Auto Mode
241///
242/// When `RuntimeConfig::Auto` is specified:
243/// - **Linux**: Uses bundled libcontainer runtime (no external binary needed), falls back to Docker
244/// - **macOS**: Uses sandbox runtime (native Metal/MPS), falls back to VM runtime (libkrun), then Docker
245/// - **Windows**: Uses Docker directly
246/// - If no runtime can be initialized, returns an error
247///
248/// # Example
249/// ```no_run
250/// use zlayer_agent::{RuntimeConfig, create_runtime};
251///
252/// # async fn example() -> Result<(), zlayer_agent::AgentError> {
253/// let runtime = create_runtime(RuntimeConfig::Auto, None).await?;
254/// # Ok(())
255/// # }
256/// ```
257pub async fn create_runtime(
258    config: RuntimeConfig,
259    auth_ctx: Option<ContainerAuthContext>,
260) -> Result<Arc<dyn Runtime + Send + Sync>> {
261    match config {
262        RuntimeConfig::Auto => create_auto_runtime(auth_ctx).await,
263        RuntimeConfig::Mock => Ok(Arc::new(MockRuntime::new())),
264        #[cfg(all(target_os = "linux", feature = "youki-runtime"))]
265        RuntimeConfig::Youki(youki_config) => {
266            let runtime = YoukiRuntime::new(youki_config, auth_ctx).await?;
267            Ok(Arc::new(runtime))
268        }
269        #[cfg(feature = "docker")]
270        RuntimeConfig::Docker => {
271            let runtime = DockerRuntime::new(auth_ctx).await?;
272            Ok(Arc::new(runtime))
273        }
274        #[cfg(feature = "wasm")]
275        RuntimeConfig::Wasm(wasm_config) => {
276            let runtime = WasmRuntime::new(wasm_config, auth_ctx).await?;
277            Ok(Arc::new(runtime))
278        }
279        #[cfg(target_os = "macos")]
280        RuntimeConfig::MacSandbox(config) => {
281            let primary: Arc<dyn Runtime> = Arc::new(runtimes::macos_sandbox::SandboxRuntime::new(
282                config,
283                auth_ctx.clone(),
284            )?);
285            let delegate: Option<Arc<dyn Runtime>> = match runtimes::macos_vm::VmRuntime::new(
286                auth_ctx,
287            ) {
288                Ok(rt) => {
289                    tracing::info!(
290                            "macOS VM (libkrun) delegate available — Linux containers will execute in a micro-VM"
291                        );
292                    Some(Arc::new(rt))
293                }
294                Err(e) => {
295                    tracing::warn!(
296                        error = %e,
297                        "macOS VM delegate unavailable; node will only run mac-native containers"
298                    );
299                    None
300                }
301            };
302            Ok(Arc::new(runtimes::composite::CompositeRuntime::new(
303                primary, delegate,
304            )))
305        }
306        #[cfg(target_os = "macos")]
307        RuntimeConfig::MacVm => Ok(Arc::new(runtimes::macos_vm::VmRuntime::new(auth_ctx)?)),
308        #[cfg(target_os = "windows")]
309        #[allow(deprecated)]
310        RuntimeConfig::Wsl2 => {
311            tracing::warn!(
312                "RuntimeConfig::Wsl2 is deprecated; treating as RuntimeConfig::Hcs with default config"
313            );
314            Box::pin(create_runtime(
315                RuntimeConfig::Hcs(crate::runtimes::hcs::HcsConfig::default()),
316                auth_ctx,
317            ))
318            .await
319        }
320        #[cfg(target_os = "windows")]
321        RuntimeConfig::Hcs(hcs_config) => {
322            let primary: Arc<dyn Runtime> =
323                Arc::new(crate::runtimes::hcs::HcsRuntime::new(hcs_config).await?);
324
325            #[cfg(feature = "wsl")]
326            let delegate: Option<Arc<dyn Runtime>> =
327                match runtimes::wsl2_delegate::Wsl2DelegateRuntime::try_new().await {
328                    Ok(Some(rt)) => {
329                        tracing::info!(
330                            "WSL2 delegate runtime available — Linux containers will execute inside the zlayer distro"
331                        );
332                        Some(Arc::new(rt))
333                    }
334                    Ok(None) => {
335                        tracing::info!(
336                            "WSL2 not available; node will only run Windows-image containers"
337                        );
338                        None
339                    }
340                    Err(e) => {
341                        tracing::warn!(
342                            error = %e,
343                            "WSL2 delegate setup failed; node will only run Windows-image containers"
344                        );
345                        None
346                    }
347                };
348            #[cfg(not(feature = "wsl"))]
349            let delegate: Option<Arc<dyn Runtime>> = None;
350
351            Ok(Arc::new(runtimes::composite::CompositeRuntime::new(
352                primary, delegate,
353            )))
354        }
355    }
356}
357
358/// Automatically select and create the best available runtime
359///
360/// Selection logic:
361/// - On Linux: Uses bundled libcontainer runtime directly (no external binary needed), falls back to Docker
362/// - On macOS: `SandboxRuntime` (native Metal/MPS) → `VmRuntime` (libkrun Linux compat with GPU) → Docker
363/// - On Windows: Use Docker directly
364/// - Returns an error if no runtime can be initialized
365#[cfg_attr(
366    not(all(target_os = "linux", feature = "youki-runtime")),
367    allow(clippy::unused_async)
368)]
369#[cfg_attr(
370    not(any(
371        all(target_os = "linux", feature = "youki-runtime"),
372        target_os = "macos",
373        feature = "docker"
374    )),
375    allow(unused_variables)
376)]
377#[allow(clippy::too_many_lines)]
378async fn create_auto_runtime(
379    auth_ctx: Option<ContainerAuthContext>,
380) -> Result<Arc<dyn Runtime + Send + Sync>> {
381    tracing::info!("Auto-selecting container runtime");
382
383    // On Linux, use bundled libcontainer runtime (no daemon overhead, no external binary needed)
384    #[cfg(all(target_os = "linux", feature = "youki-runtime"))]
385    {
386        match YoukiRuntime::new(YoukiConfig::default(), auth_ctx.clone()).await {
387            Ok(runtime) => {
388                tracing::info!("Using bundled libcontainer runtime (Linux-native, no daemon)");
389                return Ok(Arc::new(runtime));
390            }
391            Err(e) => {
392                tracing::warn!(error = %e, "Failed to initialize libcontainer runtime, trying Docker");
393            }
394        }
395    }
396
397    // On macOS, build a composite runtime:
398    //   primary  = SandboxRuntime (native Metal/MPS), when available
399    //   delegate = VmRuntime (libkrun Linux compat), when available
400    // If at least the primary is available, return the composite. Otherwise
401    // (e.g. sandbox init failed), fall through to Docker.
402    #[cfg(target_os = "macos")]
403    {
404        let primary: Option<Arc<dyn Runtime>> = match runtimes::macos_sandbox::SandboxRuntime::new(
405            MacSandboxConfig::default(),
406            auth_ctx.clone(),
407        ) {
408            Ok(rt) => Some(Arc::new(rt)),
409            Err(e) => {
410                tracing::warn!("macOS sandbox runtime unavailable: {e}");
411                None
412            }
413        };
414        let delegate: Option<Arc<dyn Runtime>> = match runtimes::macos_vm::VmRuntime::new(
415            auth_ctx.clone(),
416        ) {
417            Ok(rt) => {
418                tracing::info!(
419                        "macOS VM (libkrun) delegate available — Linux containers will execute in a micro-VM"
420                    );
421                Some(Arc::new(rt))
422            }
423            Err(e) => {
424                tracing::warn!("macOS VM runtime (libkrun) unavailable: {e}");
425                None
426            }
427        };
428
429        if let Some(p) = primary {
430            return Ok(Arc::new(runtimes::composite::CompositeRuntime::new(
431                p, delegate,
432            )));
433        }
434        // If sandbox failed but VM succeeded, use the VM runtime on its own —
435        // it's still the best available native macOS path before falling back
436        // to Docker.
437        if let Some(d) = delegate {
438            return Ok(d);
439        }
440    }
441
442    // On Windows, build a composite runtime:
443    //   primary  = HcsRuntime (native Windows containers), when available
444    //   delegate = Wsl2DelegateRuntime (Linux containers via WSL2), when available
445    // If the primary is available, return the composite. Otherwise fall
446    // through to Docker.
447    #[cfg(target_os = "windows")]
448    {
449        let primary: Option<Arc<dyn Runtime>> =
450            match crate::runtimes::hcs::HcsRuntime::new(crate::runtimes::hcs::HcsConfig::default())
451                .await
452            {
453                Ok(rt) => {
454                    tracing::info!(
455                        "Using native Windows HCS runtime (no Docker Desktop / WSL2 required)"
456                    );
457                    Some(Arc::new(rt))
458                }
459                Err(e) => {
460                    tracing::warn!(error = %e, "HCS runtime unavailable, falling back to Docker");
461                    None
462                }
463            };
464
465        #[cfg(feature = "wsl")]
466        let delegate: Option<Arc<dyn Runtime>> =
467            match runtimes::wsl2_delegate::Wsl2DelegateRuntime::try_new().await {
468                Ok(Some(rt)) => {
469                    tracing::info!(
470                        "WSL2 delegate runtime available — Linux containers will execute inside the zlayer distro"
471                    );
472                    Some(Arc::new(rt))
473                }
474                Ok(None) => {
475                    tracing::info!(
476                        "WSL2 not available; node will only run Windows-image containers"
477                    );
478                    None
479                }
480                Err(e) => {
481                    tracing::warn!(
482                        error = %e,
483                        "WSL2 delegate setup failed; node will only run Windows-image containers"
484                    );
485                    None
486                }
487            };
488        #[cfg(not(feature = "wsl"))]
489        let delegate: Option<Arc<dyn Runtime>> = None;
490
491        if let Some(p) = primary {
492            return Ok(Arc::new(runtimes::composite::CompositeRuntime::new(
493                p, delegate,
494            )));
495        }
496    }
497
498    // On non-Linux or if libcontainer failed, try Docker
499    #[cfg(feature = "docker")]
500    {
501        if is_docker_available().await {
502            tracing::info!("Selected Docker runtime");
503            let runtime = DockerRuntime::new(auth_ctx).await?;
504            return Ok(Arc::new(runtime));
505        }
506        tracing::debug!("Docker daemon not available");
507    }
508
509    // No runtime available
510    #[cfg(all(target_os = "linux", feature = "docker"))]
511    {
512        Err(AgentError::Configuration(
513            "Bundled libcontainer runtime failed to initialize and Docker daemon is not available."
514                .to_string(),
515        ))
516    }
517
518    #[cfg(all(target_os = "linux", not(feature = "docker")))]
519    {
520        Err(AgentError::Configuration(
521            "Bundled libcontainer runtime failed to initialize. Enable the 'docker' feature for an alternative."
522                .to_string(),
523        ))
524    }
525
526    #[cfg(all(not(target_os = "linux"), feature = "docker"))]
527    {
528        Err(AgentError::Configuration(
529            "No container runtime available. Start the Docker daemon.".to_string(),
530        ))
531    }
532
533    #[cfg(all(not(target_os = "linux"), not(feature = "docker")))]
534    {
535        Err(AgentError::Configuration(
536            "No container runtime available. Enable the 'docker' feature and start the Docker daemon.".to_string(),
537        ))
538    }
539}