Skip to main content

zlayer_types/spec/
types.rs

1//! `ZLayer` V1 Service Specification Types
2//!
3//! This module defines all types for parsing and validating `ZLayer` deployment specs.
4
5mod duration {
6    use humantime::format_duration;
7    use serde::{Deserialize, Deserializer, Serializer};
8    use std::time::Duration;
9
10    #[allow(clippy::ref_option)]
11    pub fn serialize<S>(duration: &Option<Duration>, serializer: S) -> Result<S::Ok, S::Error>
12    where
13        S: Serializer,
14    {
15        match duration {
16            Some(d) => serializer.serialize_str(&format_duration(*d).to_string()),
17            None => serializer.serialize_none(),
18        }
19    }
20
21    pub fn deserialize<'de, D>(deserializer: D) -> Result<Option<Duration>, D::Error>
22    where
23        D: Deserializer<'de>,
24    {
25        use serde::de::Error;
26        let s: Option<String> = Option::deserialize(deserializer)?;
27        match s {
28            Some(s) => humantime::parse_duration(&s)
29                .map(Some)
30                .map_err(|e| D::Error::custom(format!("invalid duration: {e}"))),
31            None => Ok(None),
32        }
33    }
34
35    pub mod option {
36        pub use super::*;
37    }
38
39    /// Serde module for required (non-Option) Duration fields
40    pub mod required {
41        use humantime::format_duration;
42        use serde::{Deserialize, Deserializer, Serializer};
43        use std::time::Duration;
44
45        pub fn serialize<S>(duration: &Duration, serializer: S) -> Result<S::Ok, S::Error>
46        where
47            S: Serializer,
48        {
49            serializer.serialize_str(&format_duration(*duration).to_string())
50        }
51
52        pub fn deserialize<'de, D>(deserializer: D) -> Result<Duration, D::Error>
53        where
54            D: Deserializer<'de>,
55        {
56            use serde::de::Error;
57            let s: String = String::deserialize(deserializer)?;
58            humantime::parse_duration(&s)
59                .map_err(|e| D::Error::custom(format!("invalid duration: {e}")))
60        }
61    }
62}
63
64use serde::{Deserialize, Serialize};
65use std::collections::HashMap;
66use validator::Validate;
67
68/// How service replicas are allocated to nodes
69#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
70#[serde(rename_all = "snake_case")]
71pub enum NodeMode {
72    /// Containers placed on any node with capacity (default, bin-packing)
73    #[default]
74    Shared,
75    /// Each replica gets its own dedicated node (1:1 mapping)
76    Dedicated,
77    /// Service is the ONLY thing on its nodes (no other services)
78    Exclusive,
79}
80
81/// Service type - determines runtime behavior and scaling model
82#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
83#[serde(rename_all = "snake_case")]
84pub enum ServiceType {
85    /// Standard long-running container service
86    #[default]
87    Standard,
88    /// WASM-based HTTP service (wasi:http/incoming-handler)
89    WasmHttp,
90    /// WASM-based general plugin (zlayer:plugin handler - full host access)
91    WasmPlugin,
92    /// WASM-based stateless request/response transformer
93    WasmTransformer,
94    /// WASM-based authenticator plugin (secrets + KV + HTTP)
95    WasmAuthenticator,
96    /// WASM-based rate limiter (KV + metrics)
97    WasmRateLimiter,
98    /// WASM-based request/response middleware
99    WasmMiddleware,
100    /// WASM-based custom router
101    WasmRouter,
102    /// Run-to-completion job
103    Job,
104}
105
106/// Storage performance tier
107#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
108#[serde(rename_all = "snake_case")]
109pub enum StorageTier {
110    /// Direct local filesystem (SSD/NVMe) - SQLite-safe, fast fsync
111    #[default]
112    Local,
113    /// bcache-backed tiered storage (SSD cache + slower backend)
114    Cached,
115    /// NFS/network storage - NOT SQLite-safe (will warn)
116    Network,
117}
118
119/// Node selection constraints for service placement
120#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize, utoipa::ToSchema)]
121#[serde(deny_unknown_fields)]
122pub struct NodeSelector {
123    /// Required labels that nodes must have (all must match)
124    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
125    pub labels: HashMap<String, String>,
126    /// Preferred labels (soft constraint, nodes with these are preferred)
127    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
128    pub prefer_labels: HashMap<String, String>,
129}
130
131/// Affinity hint for a single replica group's placement.
132///
133/// Three behaviors:
134/// - `Spread`: try to put each replica on a different node (default).
135/// - `Pack`: bin-pack onto the fewest nodes that can fit.
136/// - `Pin`: pin all replicas to a single node, identified either by
137///   node id (`"id=2"`) or label match (`"role=database"`).
138#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
139#[serde(rename_all = "snake_case", deny_unknown_fields)]
140pub enum GroupAffinity {
141    /// Default: spread across distinct nodes.
142    #[default]
143    Spread,
144    /// Pack onto fewest nodes.
145    Pack,
146    /// Pin to a specific node selector.
147    ///
148    /// Examples:
149    /// - `Pin("id=2")` — exact node id match
150    /// - `Pin("zone=us-east-1a")` — label match
151    Pin(String),
152}
153
154/// Regex for [`ReplicaGroup::role`] validation. A valid DNS label: starts with
155/// a lowercase letter, then any mix of lowercase letters, digits, or
156/// internal hyphens, ending with a letter or digit. 1-30 chars total.
157static REPLICA_GROUP_ROLE_RE: std::sync::LazyLock<regex::Regex> = std::sync::LazyLock::new(|| {
158    regex::Regex::new(r"^[a-z]([a-z0-9-]{0,28}[a-z0-9])?$").expect("valid regex literal")
159});
160
161/// One named replica group within a service.
162///
163/// When `ServiceSpec.replica_groups` is set, the service is composed of one
164/// or more groups, each with its own count, optional overrides, and
165/// affinity hint. Containers in each group get DNS names of the form
166/// `<role>.<service>.<deployment>.zlayer.internal` and proxy backends
167/// can target a single role via `EndpointSpec.target_role`.
168///
169/// Backward compat: services without `replica_groups` are treated as a
170/// single implicit group `{role: "default", count: <scale.replicas>}`.
171#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Validate)]
172#[serde(deny_unknown_fields)]
173pub struct ReplicaGroup {
174    /// Group identifier. Becomes part of container IDs and DNS names.
175    /// Must be a valid DNS label: lowercase letters, digits, and hyphens;
176    /// must not start or end with a hyphen; ≤ 30 chars.
177    #[validate(length(min = 1, max = 30))]
178    #[validate(regex(path = *REPLICA_GROUP_ROLE_RE))]
179    pub role: String,
180
181    /// Number of replicas in this group.
182    #[validate(range(min = 1))]
183    pub count: u32,
184
185    /// Image override (inherits `ServiceSpec.image` when None).
186    #[serde(default, skip_serializing_if = "Option::is_none")]
187    pub image: Option<ImageSpec>,
188
189    /// Environment variables MERGED on top of `ServiceSpec.env`. Entries
190    /// in this map win on conflict (group overrides service default).
191    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
192    pub env: HashMap<String, String>,
193
194    /// Command override (inherits `ServiceSpec.command` when None).
195    #[serde(default, skip_serializing_if = "Option::is_none")]
196    pub command: Option<CommandSpec>,
197
198    /// Resources override (inherits `ServiceSpec.resources` when None).
199    #[serde(default, skip_serializing_if = "Option::is_none")]
200    pub resources: Option<ResourcesSpec>,
201
202    /// Affinity hint for placement of this group's replicas.
203    #[serde(default)]
204    pub affinity: GroupAffinity,
205}
206
207/// Validate that no two [`ReplicaGroup`]s share the same `role` within a
208/// single [`ServiceSpec`].
209///
210/// Called from the deploy handler before storing the spec; not wired into
211/// the `Validate` derive on `ServiceSpec` because validator 0.19's `custom`
212/// only sees the field type (`Option<Vec<ReplicaGroup>>`) and not the
213/// surrounding struct.
214///
215/// # Errors
216/// Returns the duplicated role name on first collision.
217pub fn validate_unique_replica_group_roles(groups: &[ReplicaGroup]) -> Result<(), String> {
218    let mut seen = std::collections::HashSet::new();
219    for g in groups {
220        if !seen.insert(g.role.as_str()) {
221            return Err(g.role.clone());
222        }
223    }
224    Ok(())
225}
226
227/// Operating system a service needs to run on.
228///
229/// Mirrors the OS half of an OCI platform descriptor. Canonical wire strings
230/// match Go's `GOOS` values (e.g. `"linux"`, `"windows"`, `"darwin"`).
231#[derive(
232    Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize, utoipa::ToSchema,
233)]
234#[serde(rename_all = "lowercase")]
235pub enum OsKind {
236    Linux,
237    Windows,
238    Macos,
239}
240
241impl OsKind {
242    /// Canonical OCI-style string (`"linux"` / `"windows"` / `"darwin"`).
243    /// This is the same convention `Runtime.platform_resolver` uses.
244    #[must_use]
245    pub const fn as_oci_str(self) -> &'static str {
246        match self {
247            OsKind::Linux => "linux",
248            OsKind::Windows => "windows",
249            OsKind::Macos => "darwin",
250        }
251    }
252
253    /// Detect from `std::env::consts::OS`. Unknown values return `None`.
254    #[must_use]
255    pub fn from_rust_os(s: &str) -> Option<Self> {
256        match s {
257            "linux" => Some(Self::Linux),
258            "windows" => Some(Self::Windows),
259            "macos" => Some(Self::Macos),
260            _ => None,
261        }
262    }
263
264    /// Parse the OCI-canonical OS string as written in an image manifest's
265    /// `config.os` field (lowercase: `"linux"` / `"windows"` / `"darwin"`).
266    /// Unknown or empty values return `None`.
267    ///
268    /// This is the inverse of [`Self::as_oci_str`] and is used by the
269    /// registry's manifest-OS inspection (see `fetch_image_os`).
270    #[must_use]
271    pub fn from_oci_str(s: &str) -> Option<Self> {
272        match s {
273            "linux" => Some(Self::Linux),
274            "windows" => Some(Self::Windows),
275            "darwin" => Some(Self::Macos),
276            _ => None,
277        }
278    }
279}
280
281/// CPU architecture a service needs. Mirrors the arch half of an OCI platform.
282#[derive(
283    Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize, utoipa::ToSchema,
284)]
285#[serde(rename_all = "lowercase")]
286pub enum ArchKind {
287    Amd64,
288    Arm64,
289}
290
291impl ArchKind {
292    /// Canonical OCI-style string (`"amd64"` / `"arm64"`).
293    #[must_use]
294    pub const fn as_oci_str(self) -> &'static str {
295        match self {
296            ArchKind::Amd64 => "amd64",
297            ArchKind::Arm64 => "arm64",
298        }
299    }
300
301    /// Detect from `std::env::consts::ARCH`. Unknown values return `None`.
302    #[must_use]
303    pub fn from_rust_arch(s: &str) -> Option<Self> {
304        match s {
305            "x86_64" => Some(Self::Amd64),
306            "aarch64" => Some(Self::Arm64),
307            _ => None,
308        }
309    }
310}
311
312/// Platform a service targets. `None` on `ServiceSpec.platform` means
313/// "any agent is acceptable" (preserves backward compatibility).
314//
315// NOTE: no `Copy`. `os_version: Option<String>` rules it out. `OsKind` / `ArchKind`
316// are still `Copy`, so field-level borrows stay ergonomic.
317#[derive(
318    Debug, Clone, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize, utoipa::ToSchema,
319)]
320pub struct TargetPlatform {
321    pub os: OsKind,
322    pub arch: ArchKind,
323    /// Optional OS version constraint — primarily for Windows multi-platform
324    /// images, where `platform.os.version` in the OCI index distinguishes build
325    /// families (e.g. `10.0.26100.*` for Server 2025 / Win11 24H2,
326    /// `10.0.20348.*` for Server 2022). When set on a Windows target the
327    /// registry platform resolver prefers manifest entries whose `os.version`
328    /// matches this value exactly or shares a `major.minor.build` prefix.
329    /// Unused on Linux/macOS platforms.
330    #[serde(default, rename = "osVersion", skip_serializing_if = "Option::is_none")]
331    pub os_version: Option<String>,
332}
333
334impl TargetPlatform {
335    #[must_use]
336    pub const fn new(os: OsKind, arch: ArchKind) -> Self {
337        Self {
338            os,
339            arch,
340            os_version: None,
341        }
342    }
343
344    /// Constrain the platform to a specific `os.version` string.
345    ///
346    /// Applies to Windows targets: the registry resolver matches manifest
347    /// entries whose `platform.os.version` equals this value or starts with it
348    /// (treated as a `major.minor.build` prefix). Has no effect on Linux/macOS.
349    #[must_use]
350    pub fn with_os_version(mut self, v: impl Into<String>) -> Self {
351        self.os_version = Some(v.into());
352        self
353    }
354
355    /// Canonical OCI-style string (`"linux/amd64"`, `"windows/arm64"`).
356    ///
357    /// Does NOT include `os_version` — use [`Self::as_detailed_str`] when the
358    /// version matters (e.g. for error/log messages that need to distinguish
359    /// between Windows build families).
360    #[must_use]
361    pub fn as_oci_str(self) -> String {
362        format!("{}/{}", self.os.as_oci_str(), self.arch.as_oci_str())
363    }
364
365    /// Like [`Self::as_oci_str`] but appends ` (os.version=…)` when an
366    /// `os_version` constraint is set. Intended for diagnostics, not for
367    /// matching against manifest entries.
368    #[must_use]
369    pub fn as_detailed_str(&self) -> String {
370        match &self.os_version {
371            Some(v) => format!(
372                "{}/{} (os.version={v})",
373                self.os.as_oci_str(),
374                self.arch.as_oci_str()
375            ),
376            None => format!("{}/{}", self.os.as_oci_str(), self.arch.as_oci_str()),
377        }
378    }
379}
380
381impl std::fmt::Display for TargetPlatform {
382    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
383        write!(f, "{}/{}", self.os.as_oci_str(), self.arch.as_oci_str())
384    }
385}
386
387/// Explicit capability declarations for WASM modules.
388/// Controls which host interfaces are linked and available to the component.
389#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
390#[serde(deny_unknown_fields)]
391#[allow(clippy::struct_excessive_bools)]
392pub struct WasmCapabilities {
393    /// Config interface access (zlayer:plugin/config)
394    #[serde(default = "default_true")]
395    pub config: bool,
396    /// Key-value storage access (zlayer:plugin/keyvalue)
397    #[serde(default = "default_true")]
398    pub keyvalue: bool,
399    /// Logging access (zlayer:plugin/logging)
400    #[serde(default = "default_true")]
401    pub logging: bool,
402    /// Secrets access (zlayer:plugin/secrets)
403    #[serde(default)]
404    pub secrets: bool,
405    /// Metrics emission (zlayer:plugin/metrics)
406    #[serde(default = "default_true")]
407    pub metrics: bool,
408    /// HTTP client for outgoing requests (wasi:http/outgoing-handler)
409    #[serde(default)]
410    pub http_client: bool,
411    /// WASI CLI access (args, env, stdio)
412    #[serde(default)]
413    pub cli: bool,
414    /// WASI filesystem access
415    #[serde(default)]
416    pub filesystem: bool,
417    /// WASI sockets access (TCP/UDP)
418    #[serde(default)]
419    pub sockets: bool,
420}
421
422impl Default for WasmCapabilities {
423    fn default() -> Self {
424        Self {
425            config: true,
426            keyvalue: true,
427            logging: true,
428            secrets: false,
429            metrics: true,
430            http_client: false,
431            cli: false,
432            filesystem: false,
433            sockets: false,
434        }
435    }
436}
437
438/// Pre-opened directory for WASM filesystem access
439#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
440#[serde(deny_unknown_fields)]
441pub struct WasmPreopen {
442    /// Host path to mount
443    pub source: String,
444    /// Guest path (visible to WASM module)
445    pub target: String,
446    /// Read-only access (default: false)
447    #[serde(default)]
448    pub readonly: bool,
449}
450
451/// Comprehensive configuration for all WASM service types.
452///
453/// Replaces the previous `WasmHttpConfig` with resource limits, capability
454/// declarations, networking controls, and storage configuration.
455#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
456#[serde(deny_unknown_fields)]
457#[allow(clippy::struct_excessive_bools)]
458pub struct WasmConfig {
459    // --- Instance Management ---
460    /// Minimum number of warm instances to keep ready
461    #[serde(default = "default_min_instances")]
462    pub min_instances: u32,
463    /// Maximum number of instances to scale to
464    #[serde(default = "default_max_instances")]
465    pub max_instances: u32,
466    /// Time before idle instances are terminated
467    #[serde(default = "default_idle_timeout", with = "duration::required")]
468    pub idle_timeout: std::time::Duration,
469    /// Maximum time for a single request
470    #[serde(default = "default_request_timeout", with = "duration::required")]
471    pub request_timeout: std::time::Duration,
472
473    // --- Resource Limits ---
474    /// Maximum linear memory (e.g., "64Mi", "256Mi")
475    #[serde(default, skip_serializing_if = "Option::is_none")]
476    pub max_memory: Option<String>,
477    /// Maximum fuel (instruction count limit, 0 = unlimited)
478    #[serde(default)]
479    pub max_fuel: u64,
480    /// Epoch interval for cooperative preemption
481    #[serde(
482        default,
483        skip_serializing_if = "Option::is_none",
484        with = "duration::option"
485    )]
486    pub epoch_interval: Option<std::time::Duration>,
487
488    // --- Capabilities ---
489    /// Explicit capability grants (overrides world defaults when restricting)
490    #[serde(default, skip_serializing_if = "Option::is_none")]
491    pub capabilities: Option<WasmCapabilities>,
492
493    // --- Networking ---
494    /// Allow outgoing HTTP requests (default: true)
495    #[serde(default = "default_true")]
496    pub allow_http_outgoing: bool,
497    /// Allowed outgoing HTTP hosts (empty = all allowed)
498    #[serde(default, skip_serializing_if = "Vec::is_empty")]
499    pub allowed_hosts: Vec<String>,
500    /// Allow raw TCP sockets (default: false)
501    #[serde(default)]
502    pub allow_tcp: bool,
503    /// Allow raw UDP sockets (default: false)
504    #[serde(default)]
505    pub allow_udp: bool,
506
507    // --- Storage ---
508    /// Pre-opened directories (host path -> guest path)
509    #[serde(default, skip_serializing_if = "Vec::is_empty")]
510    pub preopens: Vec<WasmPreopen>,
511    /// Enable KV store access (default: true)
512    #[serde(default = "default_true")]
513    pub kv_enabled: bool,
514    /// KV store namespace (default: service name)
515    #[serde(default, skip_serializing_if = "Option::is_none")]
516    pub kv_namespace: Option<String>,
517    /// KV store max value size in bytes (default: 1MB)
518    #[serde(default = "default_kv_max_value_size")]
519    pub kv_max_value_size: u64,
520
521    // --- Secrets ---
522    /// Secret names accessible to this WASM module
523    #[serde(default, skip_serializing_if = "Vec::is_empty")]
524    pub secrets: Vec<String>,
525
526    // --- Performance ---
527    /// Pre-compile on deploy to reduce cold start (default: true)
528    #[serde(default = "default_true")]
529    pub precompile: bool,
530}
531
532fn default_kv_max_value_size() -> u64 {
533    1_048_576 // 1MB
534}
535
536impl Default for WasmConfig {
537    fn default() -> Self {
538        Self {
539            min_instances: default_min_instances(),
540            max_instances: default_max_instances(),
541            idle_timeout: default_idle_timeout(),
542            request_timeout: default_request_timeout(),
543            max_memory: None,
544            max_fuel: 0,
545            epoch_interval: None,
546            capabilities: None,
547            allow_http_outgoing: true,
548            allowed_hosts: Vec::new(),
549            allow_tcp: false,
550            allow_udp: false,
551            preopens: Vec::new(),
552            kv_enabled: true,
553            kv_namespace: None,
554            kv_max_value_size: default_kv_max_value_size(),
555            secrets: Vec::new(),
556            precompile: true,
557        }
558    }
559}
560
561/// Configuration for WASM HTTP services with instance pooling
562#[deprecated(note = "Use WasmConfig instead")]
563#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
564#[serde(deny_unknown_fields)]
565pub struct WasmHttpConfig {
566    /// Minimum number of warm instances to keep ready
567    #[serde(default = "default_min_instances")]
568    pub min_instances: u32,
569    /// Maximum number of instances to scale to
570    #[serde(default = "default_max_instances")]
571    pub max_instances: u32,
572    /// Time before idle instances are terminated
573    #[serde(default = "default_idle_timeout", with = "duration::required")]
574    pub idle_timeout: std::time::Duration,
575    /// Maximum time for a single request
576    #[serde(default = "default_request_timeout", with = "duration::required")]
577    pub request_timeout: std::time::Duration,
578}
579
580fn default_min_instances() -> u32 {
581    0
582}
583
584fn default_max_instances() -> u32 {
585    10
586}
587
588fn default_idle_timeout() -> std::time::Duration {
589    std::time::Duration::from_secs(300)
590}
591
592fn default_request_timeout() -> std::time::Duration {
593    std::time::Duration::from_secs(30)
594}
595
596#[allow(deprecated)]
597impl Default for WasmHttpConfig {
598    fn default() -> Self {
599        Self {
600            min_instances: default_min_instances(),
601            max_instances: default_max_instances(),
602            idle_timeout: default_idle_timeout(),
603            request_timeout: default_request_timeout(),
604        }
605    }
606}
607
608#[allow(deprecated)]
609impl From<WasmHttpConfig> for WasmConfig {
610    fn from(old: WasmHttpConfig) -> Self {
611        Self {
612            min_instances: old.min_instances,
613            max_instances: old.max_instances,
614            idle_timeout: old.idle_timeout,
615            request_timeout: old.request_timeout,
616            ..Default::default()
617        }
618    }
619}
620
621impl ServiceType {
622    /// Returns true if this is any WASM service type
623    #[must_use]
624    pub fn is_wasm(&self) -> bool {
625        matches!(
626            self,
627            ServiceType::WasmHttp
628                | ServiceType::WasmPlugin
629                | ServiceType::WasmTransformer
630                | ServiceType::WasmAuthenticator
631                | ServiceType::WasmRateLimiter
632                | ServiceType::WasmMiddleware
633                | ServiceType::WasmRouter
634        )
635    }
636
637    /// Returns the default capabilities for this WASM service type.
638    /// Returns None for non-WASM types.
639    #[must_use]
640    pub fn default_wasm_capabilities(&self) -> Option<WasmCapabilities> {
641        match self {
642            ServiceType::WasmHttp | ServiceType::WasmRouter => Some(WasmCapabilities {
643                config: true,
644                keyvalue: true,
645                logging: true,
646                secrets: false,
647                metrics: false,
648                http_client: true,
649                cli: false,
650                filesystem: false,
651                sockets: false,
652            }),
653            ServiceType::WasmPlugin => Some(WasmCapabilities {
654                config: true,
655                keyvalue: true,
656                logging: true,
657                secrets: true,
658                metrics: true,
659                http_client: true,
660                cli: true,
661                filesystem: true,
662                sockets: false,
663            }),
664            ServiceType::WasmTransformer => Some(WasmCapabilities {
665                config: false,
666                keyvalue: false,
667                logging: true,
668                secrets: false,
669                metrics: false,
670                http_client: false,
671                cli: true,
672                filesystem: false,
673                sockets: false,
674            }),
675            ServiceType::WasmAuthenticator => Some(WasmCapabilities {
676                config: true,
677                keyvalue: false,
678                logging: true,
679                secrets: true,
680                metrics: false,
681                http_client: true,
682                cli: false,
683                filesystem: false,
684                sockets: false,
685            }),
686            ServiceType::WasmRateLimiter => Some(WasmCapabilities {
687                config: true,
688                keyvalue: true,
689                logging: true,
690                secrets: false,
691                metrics: true,
692                http_client: false,
693                cli: true,
694                filesystem: false,
695                sockets: false,
696            }),
697            ServiceType::WasmMiddleware => Some(WasmCapabilities {
698                config: true,
699                keyvalue: false,
700                logging: true,
701                secrets: false,
702                metrics: false,
703                http_client: true,
704                cli: false,
705                filesystem: false,
706                sockets: false,
707            }),
708            _ => None,
709        }
710    }
711}
712
713fn default_api_bind() -> String {
714    "0.0.0.0:3669".to_string()
715}
716
717/// API server configuration (embedded in deploy/up flows)
718#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
719pub struct ApiSpec {
720    /// Enable the API server (default: true)
721    #[serde(default = "default_true")]
722    pub enabled: bool,
723    /// Bind address (default: "0.0.0.0:3669")
724    #[serde(default = "default_api_bind")]
725    pub bind: String,
726    /// JWT secret (reads `ZLAYER_JWT_SECRET` env var if not set)
727    #[serde(default)]
728    pub jwt_secret: Option<String>,
729    /// Enable Swagger UI (default: true)
730    #[serde(default = "default_true")]
731    pub swagger: bool,
732}
733
734impl Default for ApiSpec {
735    fn default() -> Self {
736        Self {
737            enabled: true,
738            bind: default_api_bind(),
739            jwt_secret: None,
740            swagger: true,
741        }
742    }
743}
744
745/// Top-level deployment specification
746#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Validate)]
747#[serde(deny_unknown_fields)]
748pub struct DeploymentSpec {
749    /// Spec version (must be "v1")
750    #[validate(custom(function = "crate::spec::validate::validate_version_wrapper"))]
751    pub version: String,
752
753    /// Deployment name (used for overlays, DNS)
754    #[validate(custom(function = "crate::spec::validate::validate_deployment_name_wrapper"))]
755    pub deployment: String,
756
757    /// Service definitions
758    #[serde(default)]
759    #[validate(nested)]
760    pub services: HashMap<String, ServiceSpec>,
761
762    /// External service definitions (proxy backends without containers)
763    ///
764    /// External services register static backend addresses with the proxy
765    /// for host/path-based routing without starting any containers.
766    /// Useful for proxying to services running outside of `ZLayer`
767    /// (e.g., on other machines reachable via VPN).
768    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
769    #[validate(nested)]
770    pub externals: HashMap<String, ExternalSpec>,
771
772    /// Top-level tunnel definitions (not tied to service endpoints)
773    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
774    pub tunnels: HashMap<String, TunnelDefinition>,
775
776    /// API server configuration (enabled by default)
777    #[serde(default)]
778    pub api: ApiSpec,
779}
780
781/// External service specification (proxy backend without a container)
782///
783/// Defines a service that is not managed by `ZLayer` but should be proxied
784/// through `ZLayer`'s reverse proxy. The proxy registers static backend
785/// addresses and routes traffic based on endpoint host/path matching.
786#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Validate)]
787#[serde(deny_unknown_fields)]
788pub struct ExternalSpec {
789    /// Static backend addresses (e.g., `["100.64.1.5:8096", "192.168.1.10:8096"]`)
790    ///
791    /// These are the upstream addresses the proxy will forward traffic to.
792    /// At least one backend is required.
793    #[validate(length(min = 1, message = "at least one backend address is required"))]
794    pub backends: Vec<String>,
795
796    /// Endpoint definitions (proxy bindings)
797    ///
798    /// Defines how public/internal traffic is routed to this external service.
799    #[serde(default)]
800    #[validate(nested)]
801    pub endpoints: Vec<EndpointSpec>,
802
803    /// Health check configuration
804    ///
805    /// When specified, the proxy will health-check backends and remove
806    /// unhealthy ones from the rotation.
807    #[serde(default, skip_serializing_if = "Option::is_none")]
808    pub health: Option<HealthSpec>,
809}
810
811/// Top-level tunnel definition (not tied to a service endpoint)
812#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
813#[serde(deny_unknown_fields)]
814pub struct TunnelDefinition {
815    /// Source node
816    pub from: String,
817
818    /// Destination node
819    pub to: String,
820
821    /// Local port on source
822    pub local_port: u16,
823
824    /// Remote port on destination
825    pub remote_port: u16,
826
827    /// Protocol (tcp/udp, defaults to tcp)
828    #[serde(default)]
829    pub protocol: TunnelProtocol,
830
831    /// Exposure type (defaults to internal)
832    #[serde(default)]
833    pub expose: ExposeType,
834}
835
836/// Protocol for tunnel connections (tcp or udp only)
837#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
838#[serde(rename_all = "lowercase")]
839pub enum TunnelProtocol {
840    #[default]
841    Tcp,
842    Udp,
843}
844
845/// Log output configuration for services and jobs.
846#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
847pub struct LogsConfig {
848    /// Where to write logs: "disk" (default) or "memory"
849    #[serde(default = "default_logs_destination")]
850    pub destination: String,
851
852    /// Maximum log size in bytes (default: 100MB)
853    #[serde(default = "default_logs_max_size")]
854    pub max_size_bytes: u64,
855
856    /// Log retention in seconds (default: 7 days)
857    #[serde(default = "default_logs_retention")]
858    pub retention_secs: u64,
859}
860
861fn default_logs_destination() -> String {
862    "disk".to_string()
863}
864
865fn default_logs_max_size() -> u64 {
866    100 * 1024 * 1024 // 100MB
867}
868
869fn default_logs_retention() -> u64 {
870    7 * 24 * 60 * 60 // 7 days
871}
872
873impl Default for LogsConfig {
874    fn default() -> Self {
875        Self {
876            destination: default_logs_destination(),
877            max_size_bytes: default_logs_max_size(),
878            retention_secs: default_logs_retention(),
879        }
880    }
881}
882
883/// Network mode for a service container.
884///
885/// Mirrors Docker's `HostConfig.NetworkMode` semantics. Accepts both an
886/// enum-tagged form (e.g. `network_mode: { bridge: { name: my-net } }`) and a
887/// string form (e.g. `"host"`, `"bridge:my-net"`, `"container:abc123"`).
888#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default, utoipa::ToSchema)]
889#[serde(rename_all = "lowercase")]
890pub enum NetworkMode {
891    /// Default networking (overlay / bridge as configured by the platform).
892    #[default]
893    Default,
894    /// Share the host network namespace (Docker `--network host`).
895    Host,
896    /// Disable networking entirely (Docker `--network none`).
897    None,
898    /// Attach to a Docker bridge network. When `name` is `None`, uses the
899    /// default `bridge` network.
900    Bridge {
901        #[serde(default)]
902        name: Option<String>,
903    },
904    /// Attach to another container's network namespace
905    /// (Docker `--network container:<id>`).
906    Container { id: String },
907}
908
909/// String-or-enum deserializer for [`NetworkMode`].
910///
911/// Accepts the same strings Docker accepts on `HostConfig.NetworkMode`:
912/// `"default"`, `"host"`, `"none"`, `"bridge"`, `"bridge:<name>"`, and
913/// `"container:<id>"`. Also accepts the enum-tagged YAML/JSON form produced by
914/// the derived [`Serialize`] impl (e.g. `bridge: { name: my-net }`).
915fn deserialize_network_mode<'de, D>(deserializer: D) -> Result<NetworkMode, D::Error>
916where
917    D: serde::Deserializer<'de>,
918{
919    use serde::de::Error;
920
921    /// Inline mirror of [`NetworkMode`] used purely for the "object" form.
922    /// We re-deserialize the captured YAML/JSON value into this and then map
923    /// it back, which correctly drives `deserialize_enum` even when the input
924    /// originally came from a `deserialize_any` path.
925    #[derive(Deserialize)]
926    #[serde(rename_all = "lowercase")]
927    enum Inner {
928        Default,
929        Host,
930        None,
931        Bridge {
932            #[serde(default)]
933            name: Option<String>,
934        },
935        Container {
936            id: String,
937        },
938    }
939
940    impl From<Inner> for NetworkMode {
941        fn from(i: Inner) -> Self {
942            match i {
943                Inner::Default => Self::Default,
944                Inner::Host => Self::Host,
945                Inner::None => Self::None,
946                Inner::Bridge { name } => Self::Bridge { name },
947                Inner::Container { id } => Self::Container { id },
948            }
949        }
950    }
951
952    // Capture the input as a self-describing serde value so we can branch
953    // on whether it is a string (Docker-style) or an externally-tagged
954    // enum (`{ bridge: { name } }`-style).
955    let value = serde_yaml::Value::deserialize(deserializer)?;
956
957    if let Some(s) = value.as_str() {
958        return match s {
959            "default" => Ok(NetworkMode::Default),
960            "host" => Ok(NetworkMode::Host),
961            "none" => Ok(NetworkMode::None),
962            "bridge" => Ok(NetworkMode::Bridge { name: None }),
963            _ => {
964                if let Some(rest) = s.strip_prefix("bridge:") {
965                    if rest.is_empty() {
966                        Ok(NetworkMode::Bridge { name: None })
967                    } else {
968                        Ok(NetworkMode::Bridge {
969                            name: Some(rest.to_string()),
970                        })
971                    }
972                } else if let Some(rest) = s.strip_prefix("container:") {
973                    if rest.is_empty() {
974                        Err(D::Error::custom(
975                            "network mode \"container:<id>\" requires a non-empty id",
976                        ))
977                    } else {
978                        Ok(NetworkMode::Container {
979                            id: rest.to_string(),
980                        })
981                    }
982                } else {
983                    Err(D::Error::custom(format!("unknown network mode: {s}")))
984                }
985            }
986        };
987    }
988
989    let inner: Inner = serde_yaml::from_value(value).map_err(D::Error::custom)?;
990    Ok(NetworkMode::from(inner))
991}
992
993/// Container isolation mode (Windows containers only; ignored on Linux/macOS).
994///
995/// * `Auto` (default) — runtime picks: Hyper-V on Windows client SKUs, Process on Server with matching build.
996/// * `Process` — shared host kernel (fast, requires container OS build to match host).
997/// * `Hyperv` — utility VM (stronger boundary, cross-version compatible).
998#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
999#[serde(rename_all = "kebab-case")]
1000pub enum IsolationMode {
1001    #[default]
1002    Auto,
1003    Process,
1004    Hyperv,
1005}
1006
1007/// Per-process resource limit (Docker `--ulimit` style).
1008#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, utoipa::ToSchema)]
1009#[serde(deny_unknown_fields)]
1010pub struct UlimitSpec {
1011    /// Soft limit.
1012    #[serde(default)]
1013    pub soft: i64,
1014    /// Hard limit.
1015    #[serde(default)]
1016    pub hard: i64,
1017}
1018
1019/// Per-service specification
1020#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Validate)]
1021#[serde(from = "ServiceSpecCompat")]
1022#[allow(clippy::struct_excessive_bools)]
1023pub struct ServiceSpec {
1024    /// Resource type (service, job, cron)
1025    #[serde(default = "default_resource_type")]
1026    pub rtype: ResourceType,
1027
1028    /// Cron schedule expression (only for rtype: cron)
1029    /// Uses 7-field cron syntax: "sec min hour day-of-month month day-of-week year"
1030    /// Examples:
1031    ///   - "0 0 0 * * * *" (daily at midnight)
1032    ///   - "0 */5 * * * * *" (every 5 minutes)
1033    ///   - "0 0 12 * * MON-FRI *" (weekdays at noon)
1034    #[serde(default, skip_serializing_if = "Option::is_none")]
1035    #[validate(custom(function = "crate::spec::validate::validate_schedule_wrapper"))]
1036    pub schedule: Option<String>,
1037
1038    /// Container image specification
1039    #[validate(nested)]
1040    pub image: ImageSpec,
1041
1042    /// Resource limits
1043    #[serde(default)]
1044    #[validate(nested)]
1045    pub resources: ResourcesSpec,
1046
1047    /// Environment variables for the service
1048    ///
1049    /// Values can be:
1050    /// - Plain strings: `"value"`
1051    /// - Host env refs: `$E:VAR_NAME`
1052    /// - Secret refs: `$S:secret-name` or `$S:@service/secret-name`
1053    #[serde(default)]
1054    pub env: HashMap<String, String>,
1055
1056    /// Command override (entrypoint, args, workdir)
1057    #[serde(default)]
1058    pub command: CommandSpec,
1059
1060    /// Network configuration
1061    #[serde(default)]
1062    pub network: ServiceNetworkSpec,
1063
1064    /// Endpoint definitions (proxy bindings)
1065    #[serde(default)]
1066    #[validate(nested)]
1067    pub endpoints: Vec<EndpointSpec>,
1068
1069    /// Scaling configuration
1070    #[serde(default)]
1071    #[validate(custom(function = "crate::spec::validate::validate_scale_spec"))]
1072    pub scale: ScaleSpec,
1073
1074    /// Heterogeneous replica groups within this service.
1075    ///
1076    /// When set, the service is composed of multiple named groups (e.g.
1077    /// `primary` + `read` + `cache`) instead of a flat `scale.replicas`.
1078    /// Each group inherits `ServiceSpec` defaults (image, env, command,
1079    /// resources) and overrides per-group fields.
1080    ///
1081    /// When `None` (default), the service uses `scale` directly with an
1082    /// implicit single group `{role: "default", count: <scale.replicas>}`.
1083    /// This is the backward-compatible path used by all existing
1084    /// specifications.
1085    ///
1086    /// Cross-group role uniqueness is validated separately by
1087    /// [`validate_unique_replica_group_roles`] from the deploy handler.
1088    #[serde(default, skip_serializing_if = "Option::is_none")]
1089    #[validate(nested)]
1090    pub replica_groups: Option<Vec<ReplicaGroup>>,
1091
1092    /// Dependency specifications
1093    #[serde(default)]
1094    pub depends: Vec<DependsSpec>,
1095
1096    /// Health check configuration
1097    #[serde(default = "default_health")]
1098    pub health: HealthSpec,
1099
1100    /// Init actions (pre-start lifecycle steps)
1101    #[serde(default)]
1102    pub init: InitSpec,
1103
1104    /// Error handling policies
1105    #[serde(default)]
1106    pub errors: ErrorsSpec,
1107
1108    /// Container lifecycle policy (e.g., delete-on-exit).
1109    ///
1110    /// Purely declarative on this type; downstream layers (agent / API /
1111    /// scheduler) read this field to decide whether to clean up the
1112    /// container record after termination.
1113    #[serde(default)]
1114    pub lifecycle: LifecycleSpec,
1115
1116    /// Container isolation mode (Windows containers only; ignored on Linux/macOS).
1117    #[serde(default, skip_serializing_if = "Option::is_none")]
1118    pub isolation: Option<IsolationMode>,
1119
1120    /// Device passthrough (e.g., /dev/kvm for VMs)
1121    #[serde(default)]
1122    pub devices: Vec<DeviceSpec>,
1123
1124    /// Storage mounts for the container
1125    #[serde(default, skip_serializing_if = "Vec::is_empty")]
1126    pub storage: Vec<StorageSpec>,
1127
1128    /// Host-to-container port mappings (Docker's `-p host:container/proto`).
1129    ///
1130    /// Each entry publishes a container port on the host. When `host_port` is
1131    /// `None` (or zero), the daemon assigns an ephemeral host port.
1132    #[serde(default, skip_serializing_if = "Vec::is_empty")]
1133    pub port_mappings: Vec<PortMapping>,
1134
1135    /// Linux capabilities to add (e.g., `SYS_ADMIN`, `NET_ADMIN`).
1136    ///
1137    /// Also accepts the Docker-compatible alias `cap_add` on input.
1138    #[serde(default, alias = "cap_add", skip_serializing_if = "Vec::is_empty")]
1139    pub capabilities: Vec<String>,
1140
1141    /// Linux capabilities to drop (Docker `--cap-drop`).
1142    #[serde(default, skip_serializing_if = "Vec::is_empty")]
1143    pub cap_drop: Vec<String>,
1144
1145    /// Run container in privileged mode (all capabilities + all devices)
1146    #[serde(default)]
1147    pub privileged: bool,
1148
1149    /// Node allocation mode (shared, dedicated, exclusive)
1150    #[serde(default)]
1151    pub node_mode: NodeMode,
1152
1153    /// Node selection constraints (required/preferred labels)
1154    #[serde(default, skip_serializing_if = "Option::is_none")]
1155    pub node_selector: Option<NodeSelector>,
1156
1157    /// Placement affinity for this service's replicas when the service is NOT
1158    /// composed of `replica_groups` (each group carries its own affinity).
1159    ///
1160    /// `None` (the default) preserves historical shared-mode behavior:
1161    /// bin-pack / concentrate consecutive replicas onto the fewest nodes that
1162    /// fit. Set to `spread` for same-service anti-affinity (replicas land on
1163    /// distinct nodes for higher availability), `pack` to concentrate
1164    /// explicitly, or `pin` to bind all replicas to one node.
1165    ///
1166    /// Note: capacity always wins — a replica that does not fit on a node is
1167    /// placed elsewhere regardless of affinity.
1168    #[serde(default, skip_serializing_if = "Option::is_none")]
1169    pub affinity: Option<GroupAffinity>,
1170
1171    /// Target platform for this service. When `None` (default), the service is
1172    /// eligible to run on any agent regardless of OS/architecture. When `Some`,
1173    /// the scheduler will only place replicas on agents whose platform matches.
1174    #[serde(default, skip_serializing_if = "Option::is_none")]
1175    pub platform: Option<TargetPlatform>,
1176
1177    /// Service type (standard, `wasm_http`, `wasm_plugin`, etc.)
1178    #[serde(default)]
1179    pub service_type: ServiceType,
1180
1181    /// WASM configuration (used when `service_type` is any Wasm* variant)
1182    /// Also accepts the deprecated `wasm_http` key for backward compatibility.
1183    #[serde(default, skip_serializing_if = "Option::is_none", alias = "wasm_http")]
1184    pub wasm: Option<WasmConfig>,
1185
1186    /// Log output configuration. If not set, uses platform defaults.
1187    #[serde(default, skip_serializing_if = "Option::is_none")]
1188    pub logs: Option<LogsConfig>,
1189
1190    /// Use host networking (container shares host network namespace)
1191    ///
1192    /// When true, the container will NOT get its own network namespace.
1193    /// This is set programmatically via the `--host-network` CLI flag, not in YAML specs.
1194    #[serde(skip)]
1195    pub host_network: bool,
1196
1197    /// Container hostname (maps to Docker's `--hostname`).
1198    ///
1199    /// When set, the container's `/etc/hostname` and initial kernel hostname
1200    /// are configured to this value. Ignored when `host_network` is true
1201    /// (the container inherits the host's hostname).
1202    #[serde(default, skip_serializing_if = "Option::is_none")]
1203    pub hostname: Option<String>,
1204
1205    /// Additional DNS servers for the container (maps to Docker's `--dns`).
1206    ///
1207    /// Each entry must be a plausible IPv4 or IPv6 address. Forwarded to the
1208    /// container runtime as resolver addresses ahead of the platform defaults.
1209    /// Ignored when `host_network` is true.
1210    #[serde(default, skip_serializing_if = "Vec::is_empty")]
1211    pub dns: Vec<String>,
1212
1213    /// Extra `hostname:ip` entries appended to `/etc/hosts` (maps to Docker's
1214    /// `--add-host`).
1215    ///
1216    /// Each entry must be in the form `"<hostname>:<ip>"`. The special literal
1217    /// `host-gateway` is accepted as the `<ip>` half (resolved by Docker /
1218    /// bollard to the host-visible gateway address, commonly used with
1219    /// `host.docker.internal:host-gateway`).
1220    #[serde(default, skip_serializing_if = "Vec::is_empty")]
1221    pub extra_hosts: Vec<String>,
1222
1223    /// Container restart policy (Docker-style).
1224    ///
1225    /// Controls when the runtime should automatically restart the container
1226    /// after it exits. Maps to Docker's `HostConfig.RestartPolicy`. Named
1227    /// `ContainerRestartPolicy` to avoid colliding with `ZLayer`'s existing
1228    /// `PanicPolicy` (which controls post-panic behavior, not runtime-level
1229    /// restarts).
1230    #[serde(default, skip_serializing_if = "Option::is_none")]
1231    pub restart_policy: Option<ContainerRestartPolicy>,
1232
1233    /// Free-form key/value labels attached to the container
1234    /// (Docker `--label`).
1235    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
1236    pub labels: HashMap<String, String>,
1237
1238    /// User and group override for the container's main process
1239    /// (Docker `--user uid:gid`).
1240    #[serde(default, skip_serializing_if = "Option::is_none")]
1241    pub user: Option<String>,
1242
1243    /// Signal sent to the container's main process to request a graceful
1244    /// shutdown (Docker `--stop-signal`). Accepts e.g. `"SIGTERM"` or `"15"`.
1245    #[serde(default, skip_serializing_if = "Option::is_none")]
1246    pub stop_signal: Option<String>,
1247
1248    /// Grace period to wait between the stop signal and a forced kill
1249    /// (Docker `--stop-timeout`).
1250    #[serde(
1251        default,
1252        with = "duration::option",
1253        skip_serializing_if = "Option::is_none"
1254    )]
1255    pub stop_grace_period: Option<std::time::Duration>,
1256
1257    /// Kernel sysctl overrides (Docker `--sysctl`).
1258    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
1259    pub sysctls: HashMap<String, String>,
1260
1261    /// Per-process ulimits (Docker `--ulimit`).
1262    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
1263    pub ulimits: HashMap<String, UlimitSpec>,
1264
1265    /// Security options such as `apparmor=...`, `seccomp=...`,
1266    /// `no-new-privileges:true` (Docker `--security-opt`).
1267    #[serde(default, skip_serializing_if = "Vec::is_empty")]
1268    pub security_opt: Vec<String>,
1269
1270    /// PID namespace mode (Docker `--pid`). Accepts e.g. `"host"` or
1271    /// `"container:<id>"`.
1272    #[serde(default, skip_serializing_if = "Option::is_none")]
1273    pub pid_mode: Option<String>,
1274
1275    /// IPC namespace mode (Docker `--ipc`). Accepts e.g. `"host"`,
1276    /// `"shareable"`, `"private"`, or `"container:<id>"`.
1277    #[serde(default, skip_serializing_if = "Option::is_none")]
1278    pub ipc_mode: Option<String>,
1279
1280    /// Network mode (Docker `--network`). Accepts both the enum-tagged form
1281    /// and the Docker-style strings (`"host"`, `"none"`, `"bridge"`,
1282    /// `"bridge:<name>"`, `"container:<id>"`).
1283    #[serde(default, deserialize_with = "deserialize_network_mode")]
1284    pub network_mode: NetworkMode,
1285
1286    /// Additional groups to add to the container process
1287    /// (Docker `--group-add`).
1288    #[serde(default, skip_serializing_if = "Vec::is_empty")]
1289    pub extra_groups: Vec<String>,
1290
1291    /// Mount the container's root filesystem read-only (Docker `--read-only`).
1292    #[serde(default)]
1293    pub read_only_root_fs: bool,
1294
1295    /// Run a Docker-supplied init process (PID 1) inside the container
1296    /// (Docker `--init`). Distinct from [`ServiceSpec::init`] which controls
1297    /// `ZLayer`'s pre-start init actions.
1298    #[serde(default, skip_serializing_if = "Option::is_none")]
1299    pub init_container: Option<bool>,
1300
1301    /// Allocate a TTY for the container's main process (Docker `--tty`,
1302    /// compose `tty: true`).
1303    #[serde(default)]
1304    pub tty: bool,
1305
1306    /// Keep STDIN open even when nothing is attached (Docker `--interactive`,
1307    /// compose `stdin_open: true`).
1308    #[serde(default)]
1309    pub stdin_open: bool,
1310
1311    /// User namespace mode (Docker `--userns`). Accepts e.g. `"host"` or
1312    /// a remap-spec name configured on the daemon.
1313    #[serde(default, skip_serializing_if = "Option::is_none")]
1314    pub userns_mode: Option<String>,
1315
1316    /// Cgroup parent path (Docker `--cgroup-parent`). When set, the runtime
1317    /// places the container under the given cgroup hierarchy.
1318    #[serde(default, skip_serializing_if = "Option::is_none")]
1319    pub cgroup_parent: Option<String>,
1320
1321    /// Container ports exposed but not published to the host (compose
1322    /// `expose:`). Each entry is a port string, optionally `port/proto`
1323    /// (e.g. `"3000"`, `"8080/tcp"`). Treated as documentation by the
1324    /// runtime; downstream networking layers may use this list to allow
1325    /// inter-service traffic without publishing to the host.
1326    #[serde(default, skip_serializing_if = "Vec::is_empty")]
1327    pub expose: Vec<String>,
1328
1329    /// Per-service overlay-network configuration.
1330    ///
1331    /// When `None` (default), the daemon uses the cluster-level overlay
1332    /// default. When `Some`, the service opts into an explicit mode /
1333    /// parent. See [`crate::overlay::OverlayConfig`] for the v0.51
1334    /// implementation status.
1335    #[serde(default, skip_serializing_if = "Option::is_none")]
1336    pub overlay: Option<crate::overlay::OverlayConfig>,
1337
1338    /// Policy for making this service's exposed ports reachable on the node's
1339    /// loopback (`127.0.0.1:<port>`) for same-node consumers — the GitHub
1340    /// Actions "service published to localhost" convention. See
1341    /// [`LocalhostReachability`]. Default [`LocalhostReachability::Auto`].
1342    #[serde(default, skip_serializing_if = "LocalhostReachability::is_default")]
1343    pub localhost_reachability: LocalhostReachability,
1344}
1345
1346/// How a service's exposed ports are made reachable on the node's loopback
1347/// (`127.0.0.1:<port>`) for same-service / same-node consumers.
1348///
1349/// `127.0.0.1` always means *this container's own* loopback — isolated per
1350/// container on Linux (youki netns), macOS VZ, and Windows HCS; shared with the
1351/// host on the macOS seatbelt / libkrun runtimes. This setting never rewrites a
1352/// container's own loopback. It controls only whether the daemon ALSO binds the
1353/// service's exposed port on the *node's* loopback and L4-forwards it to the
1354/// container, so a consumer that shares the node loopback can reach the service
1355/// at `localhost:<port>`.
1356#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
1357#[serde(rename_all = "snake_case")]
1358pub enum LocalhostReachability {
1359    /// Publish to the node loopback only when the service is effectively
1360    /// single-member (no replica groups, scaling disabled or capped at one
1361    /// replica). A multi-member service is not a "pod", so name-based overlay
1362    /// DNS (`<service>.service.local`) stays the addressing path to avoid an
1363    /// ambiguous single loopback port fronting many replicas. Default.
1364    #[default]
1365    Auto,
1366    /// Always publish each exposed port on the node loopback.
1367    Always,
1368    /// Never publish to the node loopback (name / overlay addressing only).
1369    Never,
1370}
1371
1372impl LocalhostReachability {
1373    /// True for the serde default ([`LocalhostReachability::Auto`]); used to
1374    /// skip serializing the field when it carries the default value.
1375    #[must_use]
1376    pub fn is_default(&self) -> bool {
1377        matches!(self, Self::Auto)
1378    }
1379}
1380
1381/// Deserialization shim for [`ServiceSpec`].
1382///
1383/// Mirrors `ServiceSpec`'s field shape so that the derived `Deserialize` impl
1384/// can pick up the YAML/JSON value, then [`From::from`] folds the deprecated
1385/// `host_network: bool` flag into the typed [`NetworkMode`] before handing the
1386/// finalized struct back to the caller.
1387#[derive(Deserialize)]
1388#[serde(deny_unknown_fields)]
1389#[allow(clippy::struct_excessive_bools)]
1390struct ServiceSpecCompat {
1391    #[serde(default = "default_resource_type")]
1392    rtype: ResourceType,
1393    #[serde(default)]
1394    schedule: Option<String>,
1395    image: ImageSpec,
1396    #[serde(default)]
1397    resources: ResourcesSpec,
1398    #[serde(default)]
1399    env: HashMap<String, String>,
1400    #[serde(default)]
1401    command: CommandSpec,
1402    #[serde(default)]
1403    network: ServiceNetworkSpec,
1404    #[serde(default)]
1405    endpoints: Vec<EndpointSpec>,
1406    #[serde(default)]
1407    scale: ScaleSpec,
1408    #[serde(default)]
1409    replica_groups: Option<Vec<ReplicaGroup>>,
1410    #[serde(default)]
1411    depends: Vec<DependsSpec>,
1412    #[serde(default = "default_health")]
1413    health: HealthSpec,
1414    #[serde(default)]
1415    init: InitSpec,
1416    #[serde(default)]
1417    errors: ErrorsSpec,
1418    #[serde(default)]
1419    lifecycle: LifecycleSpec,
1420    #[serde(default)]
1421    isolation: Option<IsolationMode>,
1422    #[serde(default)]
1423    devices: Vec<DeviceSpec>,
1424    #[serde(default)]
1425    storage: Vec<StorageSpec>,
1426    #[serde(default)]
1427    port_mappings: Vec<PortMapping>,
1428    #[serde(default, alias = "cap_add")]
1429    capabilities: Vec<String>,
1430    #[serde(default)]
1431    cap_drop: Vec<String>,
1432    #[serde(default)]
1433    privileged: bool,
1434    #[serde(default)]
1435    node_mode: NodeMode,
1436    #[serde(default)]
1437    node_selector: Option<NodeSelector>,
1438    #[serde(default)]
1439    affinity: Option<GroupAffinity>,
1440    #[serde(default)]
1441    platform: Option<TargetPlatform>,
1442    #[serde(default)]
1443    service_type: ServiceType,
1444    #[serde(default, alias = "wasm_http")]
1445    wasm: Option<WasmConfig>,
1446    #[serde(default)]
1447    logs: Option<LogsConfig>,
1448    /// Backwards-compat shim: when `host_network: true` is present in the input,
1449    /// it is folded into `network_mode = NetworkMode::Host` during conversion.
1450    #[serde(default)]
1451    host_network: Option<bool>,
1452    #[serde(default)]
1453    hostname: Option<String>,
1454    #[serde(default)]
1455    dns: Vec<String>,
1456    #[serde(default)]
1457    extra_hosts: Vec<String>,
1458    #[serde(default)]
1459    restart_policy: Option<ContainerRestartPolicy>,
1460    #[serde(default)]
1461    labels: HashMap<String, String>,
1462    #[serde(default)]
1463    user: Option<String>,
1464    #[serde(default)]
1465    stop_signal: Option<String>,
1466    #[serde(default, with = "duration::option")]
1467    stop_grace_period: Option<std::time::Duration>,
1468    #[serde(default)]
1469    sysctls: HashMap<String, String>,
1470    #[serde(default)]
1471    ulimits: HashMap<String, UlimitSpec>,
1472    #[serde(default)]
1473    security_opt: Vec<String>,
1474    #[serde(default)]
1475    pid_mode: Option<String>,
1476    #[serde(default)]
1477    ipc_mode: Option<String>,
1478    #[serde(default, deserialize_with = "deserialize_network_mode")]
1479    network_mode: NetworkMode,
1480    #[serde(default)]
1481    extra_groups: Vec<String>,
1482    #[serde(default)]
1483    read_only_root_fs: bool,
1484    #[serde(default)]
1485    init_container: Option<bool>,
1486    #[serde(default)]
1487    tty: bool,
1488    #[serde(default)]
1489    stdin_open: bool,
1490    #[serde(default)]
1491    userns_mode: Option<String>,
1492    #[serde(default)]
1493    cgroup_parent: Option<String>,
1494    #[serde(default)]
1495    expose: Vec<String>,
1496    #[serde(default)]
1497    overlay: Option<crate::overlay::OverlayConfig>,
1498    #[serde(default)]
1499    localhost_reachability: LocalhostReachability,
1500}
1501
1502impl From<ServiceSpecCompat> for ServiceSpec {
1503    fn from(c: ServiceSpecCompat) -> Self {
1504        // If the deprecated `host_network: true` flag is set, fold it into
1505        // the typed network mode unless the caller already supplied a
1506        // non-default value. This keeps existing in-process callers and
1507        // any legacy YAML that still emits `host_network: true` working.
1508        let network_mode = match (c.host_network, &c.network_mode) {
1509            (Some(true), NetworkMode::Default) => NetworkMode::Host,
1510            _ => c.network_mode,
1511        };
1512        let host_network = c.host_network.unwrap_or(false) || network_mode == NetworkMode::Host;
1513
1514        Self {
1515            rtype: c.rtype,
1516            schedule: c.schedule,
1517            image: c.image,
1518            resources: c.resources,
1519            env: c.env,
1520            command: c.command,
1521            network: c.network,
1522            endpoints: c.endpoints,
1523            scale: c.scale,
1524            replica_groups: c.replica_groups,
1525            depends: c.depends,
1526            health: c.health,
1527            init: c.init,
1528            errors: c.errors,
1529            lifecycle: c.lifecycle,
1530            isolation: c.isolation,
1531            devices: c.devices,
1532            storage: c.storage,
1533            port_mappings: c.port_mappings,
1534            capabilities: c.capabilities,
1535            cap_drop: c.cap_drop,
1536            privileged: c.privileged,
1537            node_mode: c.node_mode,
1538            node_selector: c.node_selector,
1539            affinity: c.affinity,
1540            platform: c.platform,
1541            service_type: c.service_type,
1542            wasm: c.wasm,
1543            logs: c.logs,
1544            host_network,
1545            hostname: c.hostname,
1546            dns: c.dns,
1547            extra_hosts: c.extra_hosts,
1548            restart_policy: c.restart_policy,
1549            labels: c.labels,
1550            user: c.user,
1551            stop_signal: c.stop_signal,
1552            stop_grace_period: c.stop_grace_period,
1553            sysctls: c.sysctls,
1554            ulimits: c.ulimits,
1555            security_opt: c.security_opt,
1556            pid_mode: c.pid_mode,
1557            ipc_mode: c.ipc_mode,
1558            network_mode,
1559            extra_groups: c.extra_groups,
1560            read_only_root_fs: c.read_only_root_fs,
1561            init_container: c.init_container,
1562            tty: c.tty,
1563            stdin_open: c.stdin_open,
1564            userns_mode: c.userns_mode,
1565            cgroup_parent: c.cgroup_parent,
1566            expose: c.expose,
1567            overlay: c.overlay,
1568            localhost_reachability: c.localhost_reachability,
1569        }
1570    }
1571}
1572
1573impl ServiceSpec {
1574    /// True when this service is effectively a single member: it has no
1575    /// (multi-member) replica groups and a scale policy that cannot exceed one
1576    /// replica (`Fixed { 0 | 1 }`, `Adaptive { max <= 1 }`, or `Manual`).
1577    ///
1578    /// Used by [`LocalhostReachability::Auto`] to decide whether publishing the
1579    /// service's ports on the node loopback is unambiguous — a genuine
1580    /// multi-member service would put several backends behind one loopback port,
1581    /// so name-based overlay DNS is the correct addressing for those instead.
1582    #[must_use]
1583    pub fn is_single_member(&self) -> bool {
1584        if let Some(groups) = &self.replica_groups {
1585            let total: u32 = groups.iter().map(|g| g.count).sum();
1586            return groups.len() <= 1 && total <= 1;
1587        }
1588        match &self.scale {
1589            ScaleSpec::Fixed { replicas } => *replicas <= 1,
1590            ScaleSpec::Adaptive { max, .. } => *max <= 1,
1591            ScaleSpec::Manual => true,
1592        }
1593    }
1594
1595    /// Whether the daemon should publish this service's exposed ports on the
1596    /// node loopback (`127.0.0.1:<port>`), per its [`LocalhostReachability`]
1597    /// policy. `Auto` publishes only for effectively single-member services
1598    /// (see [`ServiceSpec::is_single_member`]).
1599    #[must_use]
1600    pub fn publish_to_node_loopback(&self) -> bool {
1601        match self.localhost_reachability {
1602            LocalhostReachability::Always => true,
1603            LocalhostReachability::Never => false,
1604            LocalhostReachability::Auto => self.is_single_member(),
1605        }
1606    }
1607
1608    /// Construct a minimally-populated [`ServiceSpec`] with just the two
1609    /// fields callers always have to supply explicitly: the logical service
1610    /// name (used for diagnostics / labels at the call site — this struct
1611    /// does not carry the service name itself; it is the key in
1612    /// [`DeploymentSpec::services`]) and the container image. Every other
1613    /// field is filled in from [`Default::default`].
1614    ///
1615    /// Intended for tests and one-off in-memory fixtures. Production code
1616    /// paths that build a `ServiceSpec` from user input should still go
1617    /// through `serde` deserialization or an explicit struct literal so that
1618    /// every field is consciously set.
1619    ///
1620    /// # Examples
1621    /// ```ignore
1622    /// let spec = ServiceSpec::minimal("api", "ghcr.io/acme/api:1.2");
1623    /// ```
1624    ///
1625    /// # Panics
1626    /// Panics only if the fixed fallback string `"scratch:latest"` cannot
1627    /// be parsed as an [`ImageReference`] — which would indicate a bug in
1628    /// the OCI reference parser, not in caller input.
1629    #[must_use]
1630    pub fn minimal(_name: impl Into<String>, image: impl Into<String>) -> Self {
1631        use std::str::FromStr;
1632        let image_str = image.into();
1633        let image_ref = crate::ImageRef::from_str(&image_str).unwrap_or_else(|_| {
1634            crate::ImageRef::from_str("scratch:latest")
1635                .expect("'scratch:latest' is a valid image reference")
1636        });
1637        Self {
1638            image: ImageSpec {
1639                name: image_ref,
1640                pull_policy: default_pull_policy(),
1641                source_policy: None,
1642            },
1643            ..Self::default()
1644        }
1645    }
1646}
1647
1648/// Command override specification (Section 5.5)
1649#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
1650#[serde(deny_unknown_fields)]
1651pub struct CommandSpec {
1652    /// Override image ENTRYPOINT
1653    #[serde(default, skip_serializing_if = "Option::is_none")]
1654    pub entrypoint: Option<Vec<String>>,
1655
1656    /// Override image CMD
1657    #[serde(default, skip_serializing_if = "Option::is_none")]
1658    pub args: Option<Vec<String>>,
1659
1660    /// Override working directory
1661    #[serde(default, skip_serializing_if = "Option::is_none")]
1662    pub workdir: Option<String>,
1663}
1664
1665fn default_resource_type() -> ResourceType {
1666    ResourceType::Service
1667}
1668
1669fn default_health() -> HealthSpec {
1670    HealthSpec {
1671        start_grace: Some(std::time::Duration::from_secs(5)),
1672        interval: None,
1673        timeout: None,
1674        retries: 3,
1675        check: HealthCheck::Tcp { port: 0 },
1676    }
1677}
1678
1679/// Resource type - determines container lifecycle
1680#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
1681#[serde(rename_all = "lowercase")]
1682pub enum ResourceType {
1683    /// Long-running container, receives traffic, load-balanced
1684    #[default]
1685    Service,
1686    /// Run-to-completion, triggered by endpoint/CLI/internal system
1687    Job,
1688    /// Scheduled run-to-completion, time-triggered
1689    Cron,
1690}
1691
1692/// Per-image override for the registry resolution chain order.
1693///
1694/// The puller's default chain is LOCAL store → local CACHE → shared S3 tier →
1695/// the ref's own registry (URL) → last-resort default registry. A spec/compose
1696/// entry may pin a different behavior; `None` on [`ImageSpec`] == [`Self::LocalFirst`].
1697#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
1698#[serde(rename_all = "snake_case")]
1699pub enum SourcePolicy {
1700    /// Default chain order (local store → cache → S3 → URL → fallback).
1701    #[default]
1702    LocalFirst,
1703    /// Probe the shared S3 tier BEFORE the local in-process cache (otherwise
1704    /// the default order). Useful when S3 is the fleet's canonical warm pool.
1705    S3First,
1706    /// Skip every local/cached/S3 source — always resolve from the ref's own
1707    /// registry (or the configured default registry for a bare name).
1708    RemoteOnly,
1709    /// Resolve ONLY from local sources (local store + cache); never touch S3,
1710    /// the network, or the default-registry fallback. A miss is an error.
1711    LocalOnly,
1712}
1713
1714/// Container image specification
1715#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Validate)]
1716#[serde(deny_unknown_fields)]
1717pub struct ImageSpec {
1718    /// Image name (e.g., "ghcr.io/org/api:latest")
1719    pub name: crate::ImageRef,
1720
1721    /// When to pull the image
1722    #[serde(default = "default_pull_policy")]
1723    pub pull_policy: PullPolicy,
1724
1725    /// Optional override for the registry resolution chain order.
1726    /// `None` is treated as [`SourcePolicy::LocalFirst`] (the default chain).
1727    #[serde(default, skip_serializing_if = "Option::is_none")]
1728    pub source_policy: Option<SourcePolicy>,
1729}
1730
1731fn default_pull_policy() -> PullPolicy {
1732    PullPolicy::IfNotPresent
1733}
1734
1735impl Default for ImageSpec {
1736    /// Placeholder default used by [`ServiceSpec::default`] (and downstream
1737    /// tests). The wrapped reference (`scratch:latest`) is not meaningful on
1738    /// its own — every real construction path should override this via
1739    /// [`ServiceSpec::minimal`] or an explicit literal. The point of having a
1740    /// `Default` is to make `ServiceSpec` itself `Default`-able so adding a new
1741    /// optional field on it does not force every existing literal site to be
1742    /// touched.
1743    fn default() -> Self {
1744        use std::str::FromStr;
1745        Self {
1746            name: crate::ImageRef::from_str("scratch:latest")
1747                .expect("'scratch:latest' is a valid image reference"),
1748            pull_policy: default_pull_policy(),
1749            source_policy: None,
1750        }
1751    }
1752}
1753
1754/// Image pull policy
1755#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
1756#[serde(rename_all = "snake_case")]
1757pub enum PullPolicy {
1758    /// Always pull the image, even if cached.
1759    Always,
1760    /// Resolve remote digest; pull and recreate when it differs from local/running.
1761    Newer,
1762    /// Use the local image if present; otherwise pull. Never contact a
1763    /// registry for revalidation when the image is already cached locally.
1764    /// This is the literal Docker/Kubernetes semantics — no silent upgrade
1765    /// to `Newer` for `:latest` tags (set `pull_policy: newer` explicitly
1766    /// when you want redeploy-picks-up-new-latest behavior).
1767    IfNotPresent,
1768    /// Never pull, use local image only.
1769    Never,
1770}
1771
1772/// Device passthrough specification
1773#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Validate, utoipa::ToSchema)]
1774#[serde(deny_unknown_fields)]
1775pub struct DeviceSpec {
1776    /// Host device path (e.g., /dev/kvm, /dev/net/tun)
1777    #[validate(length(min = 1, message = "device path cannot be empty"))]
1778    pub path: String,
1779
1780    /// Allow read access
1781    #[serde(default = "default_true")]
1782    pub read: bool,
1783
1784    /// Allow write access
1785    #[serde(default = "default_true")]
1786    pub write: bool,
1787
1788    /// Allow mknod (create device nodes)
1789    #[serde(default)]
1790    pub mknod: bool,
1791}
1792
1793fn default_true() -> bool {
1794    true
1795}
1796
1797/// Storage mount specification
1798#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
1799#[serde(deny_unknown_fields, tag = "type", rename_all = "snake_case")]
1800pub enum StorageSpec {
1801    /// Bind mount from host path to container
1802    Bind {
1803        source: String,
1804        target: String,
1805        #[serde(default)]
1806        readonly: bool,
1807    },
1808    /// Named persistent storage volume
1809    Named {
1810        name: String,
1811        target: String,
1812        #[serde(default)]
1813        readonly: bool,
1814        /// Performance tier (default: local, SQLite-safe)
1815        #[serde(default)]
1816        tier: StorageTier,
1817        /// Optional size limit (e.g., "1Gi", "512Mi")
1818        #[serde(default, skip_serializing_if = "Option::is_none")]
1819        size: Option<String>,
1820    },
1821    /// Anonymous storage (auto-named, container lifecycle)
1822    Anonymous {
1823        target: String,
1824        /// Performance tier (default: local)
1825        #[serde(default)]
1826        tier: StorageTier,
1827    },
1828    /// Memory-backed tmpfs mount
1829    Tmpfs {
1830        target: String,
1831        #[serde(default)]
1832        size: Option<String>,
1833        #[serde(default)]
1834        mode: Option<u32>,
1835    },
1836    /// S3-backed FUSE mount
1837    S3 {
1838        bucket: String,
1839        #[serde(default)]
1840        prefix: Option<String>,
1841        target: String,
1842        #[serde(default)]
1843        readonly: bool,
1844        #[serde(default)]
1845        endpoint: Option<String>,
1846        #[serde(default)]
1847        credentials: Option<String>,
1848    },
1849}
1850
1851/// Resource limits (upper bounds, not reservations)
1852#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default, Validate)]
1853#[serde(deny_unknown_fields)]
1854pub struct ResourcesSpec {
1855    /// CPU limit (cores, e.g., 0.5, 1, 2)
1856    #[serde(default)]
1857    #[validate(custom(function = "crate::spec::validate::validate_cpu_option_wrapper"))]
1858    pub cpu: Option<f64>,
1859
1860    /// Memory limit (e.g., "512Mi", "1Gi", "2Gi")
1861    #[serde(default)]
1862    #[validate(custom(function = "crate::spec::validate::validate_memory_option_wrapper"))]
1863    pub memory: Option<String>,
1864
1865    /// GPU resource request
1866    #[serde(default, skip_serializing_if = "Option::is_none")]
1867    pub gpu: Option<GpuSpec>,
1868
1869    /// Maximum number of processes the container may spawn
1870    /// (Docker `--pids-limit`).
1871    #[serde(default, skip_serializing_if = "Option::is_none")]
1872    pub pids_limit: Option<i64>,
1873
1874    /// CPUs that the container is allowed to execute on (Docker `--cpuset-cpus`).
1875    #[serde(default, skip_serializing_if = "Option::is_none")]
1876    pub cpuset: Option<String>,
1877
1878    /// Relative CPU shares (Docker `--cpu-shares`). Default weight is 1024.
1879    #[serde(default, skip_serializing_if = "Option::is_none")]
1880    pub cpu_shares: Option<u32>,
1881
1882    /// Total memory limit including swap (Docker `--memory-swap`).
1883    #[serde(default, skip_serializing_if = "Option::is_none")]
1884    pub memory_swap: Option<String>,
1885
1886    /// Soft memory limit (Docker `--memory-reservation`).
1887    #[serde(default, skip_serializing_if = "Option::is_none")]
1888    pub memory_reservation: Option<String>,
1889
1890    /// Container memory swappiness, 0-100 (Docker `--memory-swappiness`).
1891    #[serde(default, skip_serializing_if = "Option::is_none")]
1892    pub memory_swappiness: Option<u8>,
1893
1894    /// OOM-killer score adjustment (Docker `--oom-score-adj`).
1895    #[serde(default, skip_serializing_if = "Option::is_none")]
1896    pub oom_score_adj: Option<i32>,
1897
1898    /// Disable the OOM killer for the container (Docker `--oom-kill-disable`).
1899    #[serde(default, skip_serializing_if = "Option::is_none")]
1900    pub oom_kill_disable: Option<bool>,
1901
1902    /// Block IO weight, 10-1000 (Docker `--blkio-weight`).
1903    #[serde(default, skip_serializing_if = "Option::is_none")]
1904    pub blkio_weight: Option<u16>,
1905}
1906
1907/// Scheduling policy for GPU workloads
1908#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
1909#[serde(rename_all = "kebab-case")]
1910pub enum SchedulingPolicy {
1911    /// Place as many replicas as possible; partial placement is acceptable (default)
1912    #[default]
1913    BestEffort,
1914    /// All replicas must be placed or none are; prevents partial GPU job deployment
1915    Gang,
1916    /// Spread replicas across nodes to maximize GPU distribution
1917    Spread,
1918}
1919
1920/// GPU sharing mode controlling how GPU resources are multiplexed.
1921#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
1922#[serde(rename_all = "kebab-case")]
1923pub enum GpuSharingMode {
1924    /// Whole GPU per container (default). No sharing.
1925    #[default]
1926    Exclusive,
1927    /// NVIDIA Multi-Process Service: concurrent GPU compute sharing.
1928    /// Multiple containers run GPU kernels simultaneously with hardware isolation.
1929    Mps,
1930    /// NVIDIA time-slicing: round-robin GPU access across containers.
1931    /// Lower overhead than MPS but no concurrent execution.
1932    TimeSlice,
1933}
1934
1935/// Configuration for distributed GPU job coordination.
1936///
1937/// When enabled on a multi-replica GPU service, `ZLayer` injects standard
1938/// distributed training environment variables (`MASTER_ADDR`, `MASTER_PORT`,
1939/// `WORLD_SIZE`, `RANK`, `LOCAL_RANK`) so frameworks like `PyTorch`, `Horovod`,
1940/// and `DeepSpeed` can coordinate automatically.
1941#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Validate)]
1942#[serde(deny_unknown_fields)]
1943pub struct DistributedConfig {
1944    /// Communication backend: "nccl" (default), "gloo", or "mpi"
1945    #[serde(default = "default_dist_backend")]
1946    pub backend: String,
1947    /// Port for rank-0 master coordination (default: 29500)
1948    #[serde(default = "default_dist_port")]
1949    pub master_port: u16,
1950}
1951
1952fn default_dist_backend() -> String {
1953    "nccl".to_string()
1954}
1955
1956fn default_dist_port() -> u16 {
1957    29500
1958}
1959
1960/// GPU resource specification
1961///
1962/// Supported vendors:
1963/// - `nvidia` - NVIDIA GPUs via NVIDIA Container Toolkit (default)
1964/// - `amd` - AMD GPUs via `ROCm` (/dev/kfd + /dev/dri/renderD*)
1965/// - `intel` - Intel GPUs via VAAPI/i915 (/dev/dri/renderD*)
1966/// - `apple` - Apple Silicon GPUs via Metal/MPS (macOS only)
1967///
1968/// Unknown vendors fall back to DRI render node passthrough.
1969///
1970/// ## GPU mode (macOS only)
1971///
1972/// When `vendor` is `"apple"`, the `mode` field controls how GPU access is provided:
1973/// - `"native"` -- Seatbelt sandbox with direct Metal/MPS access (lowest overhead)
1974/// - `"vm"` -- libkrun micro-VM with GPU forwarding (stronger isolation)
1975/// - `None` (default) -- Auto-select based on platform and vendor
1976///
1977/// On Linux, `mode` is ignored; GPU passthrough always uses device node binding.
1978#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Validate)]
1979#[serde(deny_unknown_fields)]
1980pub struct GpuSpec {
1981    /// Number of GPUs to request
1982    #[serde(default = "default_gpu_count")]
1983    pub count: u32,
1984    /// GPU vendor (`nvidia`, `amd`, `intel`, `apple`) - defaults to `nvidia`
1985    #[serde(default = "default_gpu_vendor")]
1986    pub vendor: String,
1987    /// GPU access mode (macOS only): `"native"`, `"vm"`, or `None` for auto-select
1988    #[serde(default, skip_serializing_if = "Option::is_none")]
1989    pub mode: Option<String>,
1990    /// Pin to a specific GPU model (e.g. "A100", "H100").
1991    /// Substring match against detected GPU model names.
1992    #[serde(default, skip_serializing_if = "Option::is_none")]
1993    pub model: Option<String>,
1994    /// Scheduling policy for GPU workloads.
1995    /// - `best-effort` (default): place what fits
1996    /// - `gang`: all-or-nothing for distributed jobs
1997    /// - `spread`: distribute across nodes
1998    #[serde(default, skip_serializing_if = "Option::is_none")]
1999    pub scheduling: Option<SchedulingPolicy>,
2000    /// Distributed GPU job coordination.
2001    /// When set, injects `MASTER_ADDR`, `WORLD_SIZE`, `RANK`, `LOCAL_RANK` env vars.
2002    #[serde(default, skip_serializing_if = "Option::is_none")]
2003    pub distributed: Option<DistributedConfig>,
2004    /// GPU sharing mode: exclusive (default), mps, or time-slice.
2005    #[serde(default, skip_serializing_if = "Option::is_none")]
2006    pub sharing: Option<GpuSharingMode>,
2007    /// Host directory for the NVIDIA MPS control pipe.
2008    ///
2009    /// Only consulted when `sharing == Mps`. Defaults to `/tmp/nvidia-mps`
2010    /// when unset. The directory MUST exist on the host (created by the
2011    /// `nvidia-cuda-mps-control` daemon). It is bind-mounted into the
2012    /// container at the same path and exported as `CUDA_MPS_PIPE_DIRECTORY`.
2013    #[serde(default, skip_serializing_if = "Option::is_none")]
2014    pub mps_pipe_dir: Option<String>,
2015    /// Host directory for NVIDIA MPS log output.
2016    ///
2017    /// Only consulted when `sharing == Mps`. Defaults to `/tmp/nvidia-log`
2018    /// when unset. The directory MUST exist on the host. It is bind-mounted
2019    /// into the container and exported as `CUDA_MPS_LOG_DIRECTORY`.
2020    #[serde(default, skip_serializing_if = "Option::is_none")]
2021    pub mps_log_dir: Option<String>,
2022    /// CUDA device index this replica should see when `sharing == TimeSlice`.
2023    ///
2024    /// Emitted as `CUDA_VISIBLE_DEVICES=<slice_index>`, overriding the default
2025    /// 0..count visibility list. Use this together with a host-side NVIDIA
2026    /// time-slicing config to advertise a single physical GPU as multiple
2027    /// virtual slices.
2028    #[serde(default, skip_serializing_if = "Option::is_none")]
2029    pub time_slice_index: Option<u32>,
2030    /// Optional host path to a NVIDIA time-slicing config YAML.
2031    ///
2032    /// When set, the file is bind-mounted read-only at
2033    /// `/etc/nvidia/gpu-time-slicing.yaml` inside the container so tools that
2034    /// inspect the slicing topology (e.g. monitoring sidecars) can read it.
2035    /// The file is not interpreted by `ZLayer` — it's purely informational for
2036    /// the workload.
2037    #[serde(default, skip_serializing_if = "Option::is_none")]
2038    pub time_slicing_config_path: Option<String>,
2039}
2040
2041fn default_gpu_count() -> u32 {
2042    1
2043}
2044
2045fn default_gpu_vendor() -> String {
2046    "nvidia".to_string()
2047}
2048
2049/// Per-service network configuration (overlay + join policy).
2050#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
2051#[serde(deny_unknown_fields)]
2052#[derive(Default)]
2053pub struct ServiceNetworkSpec {
2054    /// Overlay network configuration
2055    #[serde(default)]
2056    pub overlays: OverlayConfig,
2057
2058    /// Join policy (who can join this service)
2059    #[serde(default)]
2060    pub join: JoinPolicy,
2061}
2062
2063/// Overlay network configuration
2064#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
2065#[serde(deny_unknown_fields)]
2066pub struct OverlayConfig {
2067    /// Service-scoped overlay (service replicas only)
2068    #[serde(default)]
2069    pub service: OverlaySettings,
2070
2071    /// Global overlay (all services in deployment)
2072    #[serde(default)]
2073    pub global: OverlaySettings,
2074}
2075
2076impl Default for OverlayConfig {
2077    fn default() -> Self {
2078        Self {
2079            service: OverlaySettings {
2080                enabled: true,
2081                encrypted: true,
2082                isolated: true,
2083            },
2084            global: OverlaySettings {
2085                enabled: true,
2086                encrypted: true,
2087                isolated: false,
2088            },
2089        }
2090    }
2091}
2092
2093/// Overlay network settings
2094#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
2095#[serde(deny_unknown_fields)]
2096pub struct OverlaySettings {
2097    /// Enable this overlay
2098    #[serde(default = "default_enabled")]
2099    pub enabled: bool,
2100
2101    /// Use encryption
2102    #[serde(default = "default_encrypted")]
2103    pub encrypted: bool,
2104
2105    /// Isolate from other services/groups
2106    #[serde(default)]
2107    pub isolated: bool,
2108}
2109
2110fn default_enabled() -> bool {
2111    true
2112}
2113
2114fn default_encrypted() -> bool {
2115    true
2116}
2117
2118/// Join policy - controls who can join a service
2119#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
2120#[serde(deny_unknown_fields)]
2121pub struct JoinPolicy {
2122    /// Join mode
2123    #[serde(default = "default_join_mode")]
2124    pub mode: JoinMode,
2125
2126    /// Scope of join
2127    #[serde(default = "default_join_scope")]
2128    pub scope: JoinScope,
2129}
2130
2131impl Default for JoinPolicy {
2132    fn default() -> Self {
2133        Self {
2134            mode: default_join_mode(),
2135            scope: default_join_scope(),
2136        }
2137    }
2138}
2139
2140fn default_join_mode() -> JoinMode {
2141    JoinMode::Token
2142}
2143
2144fn default_join_scope() -> JoinScope {
2145    JoinScope::Service
2146}
2147
2148/// Join mode
2149#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
2150#[serde(rename_all = "snake_case")]
2151pub enum JoinMode {
2152    /// Any trusted node in deployment can self-enroll
2153    Open,
2154    /// Requires a join key (recommended)
2155    Token,
2156    /// Only control-plane/scheduler can place replicas
2157    Closed,
2158}
2159
2160/// Join scope
2161#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
2162#[serde(rename_all = "snake_case")]
2163pub enum JoinScope {
2164    /// Join this specific service
2165    Service,
2166    /// Join all services in deployment
2167    Global,
2168}
2169
2170/// Endpoint specification (proxy binding)
2171#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Validate)]
2172#[serde(deny_unknown_fields)]
2173pub struct EndpointSpec {
2174    /// Endpoint name (for routing)
2175    #[validate(length(min = 1, message = "endpoint name cannot be empty"))]
2176    pub name: String,
2177
2178    /// Protocol
2179    pub protocol: Protocol,
2180
2181    /// Proxy listen port (external-facing port)
2182    #[validate(custom(function = "crate::spec::validate::validate_port_wrapper"))]
2183    pub port: u16,
2184
2185    /// Container port the service actually listens on.
2186    /// Defaults to `port` when not specified.
2187    #[serde(default, skip_serializing_if = "Option::is_none")]
2188    pub target_port: Option<u16>,
2189
2190    /// URL path prefix (for http/https/websocket)
2191    pub path: Option<String>,
2192
2193    /// Host pattern for routing (e.g. "api.example.com" or "*.example.com").
2194    /// `None` means match any host.
2195    #[serde(default, skip_serializing_if = "Option::is_none")]
2196    pub host: Option<String>,
2197
2198    /// Exposure type
2199    #[serde(default = "default_expose")]
2200    pub expose: ExposeType,
2201
2202    /// Optional stream (L4) proxy configuration
2203    /// Only applicable when protocol is tcp or udp
2204    #[serde(default, skip_serializing_if = "Option::is_none")]
2205    pub stream: Option<StreamEndpointConfig>,
2206
2207    /// Restrict this endpoint to backends in a specific replica role.
2208    ///
2209    /// When `Some`, only containers whose `replica_groups.role` matches this
2210    /// value receive traffic from this endpoint. When `None` (default), the
2211    /// endpoint accepts all containers of the service (legacy behavior).
2212    ///
2213    /// Validation: when set, the role MUST appear in the parent
2214    /// `ServiceSpec.replica_groups` (enforced at deploy time in the API
2215    /// handler, not via derive(Validate)).
2216    ///
2217    /// Example (a postgres service with primary + read replicas):
2218    ///
2219    /// ```yaml
2220    /// endpoints:
2221    ///   - name: write
2222    ///     port: 5432
2223    ///     protocol: tcp
2224    ///     target_role: primary
2225    ///   - name: read
2226    ///     port: 5433
2227    ///     protocol: tcp
2228    ///     target_role: read
2229    /// ```
2230    #[serde(default, skip_serializing_if = "Option::is_none")]
2231    pub target_role: Option<String>,
2232
2233    /// Optional tunnel configuration for this endpoint
2234    #[serde(default, skip_serializing_if = "Option::is_none")]
2235    pub tunnel: Option<EndpointTunnelConfig>,
2236}
2237
2238impl EndpointSpec {
2239    /// Returns the port the container actually listens on.
2240    /// Falls back to `port` when `target_port` is not specified.
2241    #[must_use]
2242    pub fn target_port(&self) -> u16 {
2243        self.target_port.unwrap_or(self.port)
2244    }
2245}
2246
2247/// Tunnel configuration for an endpoint
2248#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
2249#[serde(deny_unknown_fields)]
2250pub struct EndpointTunnelConfig {
2251    /// Enable tunneling for this endpoint
2252    #[serde(default)]
2253    pub enabled: bool,
2254
2255    /// Source node name (defaults to service's node)
2256    #[serde(default, skip_serializing_if = "Option::is_none")]
2257    pub from: Option<String>,
2258
2259    /// Destination node name (defaults to cluster ingress)
2260    #[serde(default, skip_serializing_if = "Option::is_none")]
2261    pub to: Option<String>,
2262
2263    /// Remote port to expose (0 = auto-assign)
2264    #[serde(default)]
2265    pub remote_port: u16,
2266
2267    /// Override exposure for tunnel (public/internal)
2268    #[serde(default, skip_serializing_if = "Option::is_none")]
2269    pub expose: Option<ExposeType>,
2270
2271    /// On-demand access configuration
2272    #[serde(default, skip_serializing_if = "Option::is_none")]
2273    pub access: Option<TunnelAccessConfig>,
2274}
2275
2276/// On-demand access settings for `zlayer tunnel access`
2277#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
2278#[serde(deny_unknown_fields)]
2279pub struct TunnelAccessConfig {
2280    /// Allow on-demand access via CLI
2281    #[serde(default)]
2282    pub enabled: bool,
2283
2284    /// Maximum session duration (e.g., "4h", "30m")
2285    #[serde(default, skip_serializing_if = "Option::is_none")]
2286    pub max_ttl: Option<String>,
2287
2288    /// Log all access sessions
2289    #[serde(default)]
2290    pub audit: bool,
2291}
2292
2293fn default_expose() -> ExposeType {
2294    ExposeType::Internal
2295}
2296
2297/// Protocol type
2298#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
2299#[serde(rename_all = "lowercase")]
2300pub enum Protocol {
2301    Http,
2302    Https,
2303    Tcp,
2304    Udp,
2305    Websocket,
2306}
2307
2308/// Exposure type
2309#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
2310#[serde(rename_all = "lowercase")]
2311pub enum ExposeType {
2312    Public,
2313    #[default]
2314    Internal,
2315}
2316
2317/// Stream (L4) proxy configuration for TCP/UDP endpoints
2318#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
2319#[serde(deny_unknown_fields)]
2320pub struct StreamEndpointConfig {
2321    /// Enable TLS termination for TCP (auto-provision cert)
2322    #[serde(default)]
2323    pub tls: bool,
2324
2325    /// Enable PROXY protocol for passing client IP
2326    #[serde(default)]
2327    pub proxy_protocol: bool,
2328
2329    /// Custom session timeout for UDP (default: 60s)
2330    /// Format: duration string like "60s", "5m"
2331    #[serde(default, skip_serializing_if = "Option::is_none")]
2332    pub session_timeout: Option<String>,
2333
2334    /// Health check configuration for L4
2335    #[serde(default, skip_serializing_if = "Option::is_none")]
2336    pub health_check: Option<StreamHealthCheck>,
2337}
2338
2339/// Health check types for stream (L4) endpoints
2340#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
2341#[serde(tag = "type", rename_all = "snake_case")]
2342pub enum StreamHealthCheck {
2343    /// TCP connect check - verifies port is accepting connections
2344    TcpConnect,
2345    /// UDP probe - sends request and optionally validates response
2346    UdpProbe {
2347        /// Request payload to send (can use hex escapes like \\xFF)
2348        request: String,
2349        /// Expected response pattern (optional regex)
2350        #[serde(default, skip_serializing_if = "Option::is_none")]
2351        expect: Option<String>,
2352    },
2353}
2354
2355/// Scaling configuration
2356#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
2357#[serde(tag = "mode", rename_all = "lowercase", deny_unknown_fields)]
2358pub enum ScaleSpec {
2359    /// Adaptive scaling with metrics
2360    #[serde(rename = "adaptive")]
2361    Adaptive {
2362        /// Minimum replicas
2363        min: u32,
2364
2365        /// Maximum replicas
2366        max: u32,
2367
2368        /// Cooldown period between scale events
2369        #[serde(default, with = "duration::option")]
2370        cooldown: Option<std::time::Duration>,
2371
2372        /// Target metrics for scaling
2373        #[serde(default)]
2374        targets: ScaleTargets,
2375    },
2376
2377    /// Fixed number of replicas
2378    #[serde(rename = "fixed")]
2379    Fixed { replicas: u32 },
2380
2381    /// Manual scaling (no automatic scaling)
2382    #[serde(rename = "manual")]
2383    Manual,
2384}
2385
2386impl Default for ScaleSpec {
2387    fn default() -> Self {
2388        Self::Adaptive {
2389            min: 1,
2390            max: 10,
2391            cooldown: Some(std::time::Duration::from_secs(30)),
2392            targets: ScaleTargets::default(),
2393        }
2394    }
2395}
2396
2397/// Target metrics for adaptive scaling
2398#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
2399#[serde(deny_unknown_fields)]
2400#[derive(Default)]
2401pub struct ScaleTargets {
2402    /// CPU percentage threshold (0-100)
2403    #[serde(default)]
2404    pub cpu: Option<u8>,
2405
2406    /// Memory percentage threshold (0-100)
2407    #[serde(default)]
2408    pub memory: Option<u8>,
2409
2410    /// Requests per second threshold
2411    #[serde(default)]
2412    pub rps: Option<u32>,
2413}
2414
2415/// Dependency specification
2416#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
2417#[serde(deny_unknown_fields)]
2418pub struct DependsSpec {
2419    /// Service name to depend on
2420    pub service: String,
2421
2422    /// Condition for dependency
2423    #[serde(default = "default_condition")]
2424    pub condition: DependencyCondition,
2425
2426    /// Maximum time to wait
2427    #[serde(default = "default_timeout", with = "duration::option")]
2428    pub timeout: Option<std::time::Duration>,
2429
2430    /// Action on timeout
2431    #[serde(default = "default_on_timeout")]
2432    pub on_timeout: TimeoutAction,
2433}
2434
2435fn default_condition() -> DependencyCondition {
2436    DependencyCondition::Healthy
2437}
2438
2439#[allow(clippy::unnecessary_wraps)]
2440fn default_timeout() -> Option<std::time::Duration> {
2441    Some(std::time::Duration::from_secs(300))
2442}
2443
2444fn default_on_timeout() -> TimeoutAction {
2445    TimeoutAction::Fail
2446}
2447
2448/// Dependency condition
2449#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
2450#[serde(rename_all = "lowercase")]
2451pub enum DependencyCondition {
2452    /// Container process exists
2453    Started,
2454    /// Health check passes
2455    Healthy,
2456    /// Service is available for routing
2457    Ready,
2458}
2459
2460/// Timeout action
2461#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
2462#[serde(rename_all = "lowercase")]
2463pub enum TimeoutAction {
2464    Fail,
2465    Warn,
2466    Continue,
2467}
2468
2469/// Health check specification
2470#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
2471#[serde(deny_unknown_fields)]
2472pub struct HealthSpec {
2473    /// Grace period before first check
2474    #[serde(default, with = "duration::option")]
2475    pub start_grace: Option<std::time::Duration>,
2476
2477    /// Interval between checks
2478    #[serde(default, with = "duration::option")]
2479    pub interval: Option<std::time::Duration>,
2480
2481    /// Timeout per check
2482    #[serde(default, with = "duration::option")]
2483    pub timeout: Option<std::time::Duration>,
2484
2485    /// Number of retries before marking unhealthy
2486    #[serde(default = "default_retries")]
2487    pub retries: u32,
2488
2489    /// Health check type and parameters
2490    pub check: HealthCheck,
2491}
2492
2493fn default_retries() -> u32 {
2494    3
2495}
2496
2497impl Default for HealthSpec {
2498    /// Returns the same shape as the per-field serde defaults: a 5-second
2499    /// start grace, 3 retries, and a TCP check against port 0 ("use first
2500    /// endpoint"). Matches [`default_health`] which is the serde fallback
2501    /// when no `health:` block is supplied in a deployment spec.
2502    fn default() -> Self {
2503        default_health()
2504    }
2505}
2506
2507/// Health check type
2508#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
2509#[serde(tag = "type", rename_all = "lowercase")]
2510pub enum HealthCheck {
2511    /// TCP port check
2512    Tcp {
2513        /// Port to check (0 = use first endpoint)
2514        port: u16,
2515    },
2516
2517    /// HTTP check
2518    Http {
2519        /// URL to check
2520        url: String,
2521        /// Expected status code
2522        #[serde(default = "default_expect_status")]
2523        expect_status: u16,
2524    },
2525
2526    /// Command check
2527    Command {
2528        /// Command to run
2529        command: String,
2530    },
2531}
2532
2533fn default_expect_status() -> u16 {
2534    200
2535}
2536
2537/// Init actions specification
2538#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
2539#[serde(deny_unknown_fields)]
2540#[derive(Default)]
2541pub struct InitSpec {
2542    /// Init steps to run before container starts
2543    #[serde(default)]
2544    pub steps: Vec<InitStep>,
2545}
2546
2547/// Lifecycle policy for service / job / cron containers.
2548///
2549/// Currently exposes a single `delete_on_exit` knob that, when `true`,
2550/// instructs higher layers to remove the container record (and its bundle)
2551/// once it has terminated. Other layers consume this field; this type is
2552/// purely descriptive.
2553#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize, utoipa::ToSchema)]
2554#[serde(deny_unknown_fields)]
2555pub struct LifecycleSpec {
2556    /// When true, terminated containers (and their bundles) are removed
2557    /// automatically rather than retained for inspection. Defaults to
2558    /// `false`, preserving the historical retain-on-exit behavior.
2559    #[serde(default)]
2560    pub delete_on_exit: bool,
2561}
2562
2563/// Init action step
2564#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
2565#[serde(deny_unknown_fields)]
2566pub struct InitStep {
2567    /// Step identifier
2568    pub id: String,
2569
2570    /// Action to perform (e.g., "`init.wait_tcp`")
2571    pub uses: String,
2572
2573    /// Parameters for the action
2574    #[serde(default)]
2575    pub with: InitParams,
2576
2577    /// Number of retries
2578    #[serde(default)]
2579    pub retry: Option<u32>,
2580
2581    /// Maximum time for this step
2582    #[serde(default, with = "duration::option")]
2583    pub timeout: Option<std::time::Duration>,
2584
2585    /// Action on failure
2586    #[serde(default = "default_on_failure")]
2587    pub on_failure: FailureAction,
2588}
2589
2590fn default_on_failure() -> FailureAction {
2591    FailureAction::Fail
2592}
2593
2594/// Init action parameters
2595pub type InitParams = std::collections::HashMap<String, serde_json::Value>;
2596
2597/// Failure action for init steps
2598#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
2599#[serde(rename_all = "lowercase")]
2600pub enum FailureAction {
2601    Fail,
2602    Warn,
2603    Continue,
2604}
2605
2606/// Error handling policies
2607#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
2608#[serde(deny_unknown_fields)]
2609#[derive(Default)]
2610pub struct ErrorsSpec {
2611    /// Init failure policy
2612    #[serde(default)]
2613    pub on_init_failure: InitFailurePolicy,
2614
2615    /// Panic/restart policy
2616    #[serde(default)]
2617    pub on_panic: PanicPolicy,
2618}
2619
2620/// Init failure policy
2621#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
2622#[serde(deny_unknown_fields)]
2623pub struct InitFailurePolicy {
2624    #[serde(default = "default_init_action")]
2625    pub action: InitFailureAction,
2626}
2627
2628impl Default for InitFailurePolicy {
2629    fn default() -> Self {
2630        Self {
2631            action: default_init_action(),
2632        }
2633    }
2634}
2635
2636fn default_init_action() -> InitFailureAction {
2637    InitFailureAction::Fail
2638}
2639
2640/// Init failure action
2641#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
2642#[serde(rename_all = "lowercase")]
2643pub enum InitFailureAction {
2644    Fail,
2645    Restart,
2646    Backoff,
2647}
2648
2649/// Panic policy
2650#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
2651#[serde(deny_unknown_fields)]
2652pub struct PanicPolicy {
2653    #[serde(default = "default_panic_action")]
2654    pub action: PanicAction,
2655}
2656
2657impl Default for PanicPolicy {
2658    fn default() -> Self {
2659        Self {
2660            action: default_panic_action(),
2661        }
2662    }
2663}
2664
2665fn default_panic_action() -> PanicAction {
2666    PanicAction::Restart
2667}
2668
2669/// Panic action
2670#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
2671#[serde(rename_all = "lowercase")]
2672pub enum PanicAction {
2673    Restart,
2674    Shutdown,
2675    Isolate,
2676}
2677
2678// ==========================================================================
2679// Network / Access Control types
2680// ==========================================================================
2681
2682/// A network policy defines an access control group with membership rules
2683/// and service access policies.
2684#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)]
2685pub struct NetworkPolicySpec {
2686    /// Unique network name.
2687    pub name: String,
2688
2689    /// Human-readable description.
2690    #[serde(default, skip_serializing_if = "Option::is_none")]
2691    pub description: Option<String>,
2692
2693    /// CIDR ranges that belong to this network (e.g., "10.200.0.0/16", "192.168.1.0/24").
2694    #[serde(default)]
2695    pub cidrs: Vec<String>,
2696
2697    /// Named members (users, groups, nodes) of this network.
2698    #[serde(default)]
2699    pub members: Vec<NetworkMember>,
2700
2701    /// Access rules defining which services this network can reach.
2702    #[serde(default)]
2703    pub access_rules: Vec<AccessRule>,
2704}
2705
2706/// A member of a network.
2707#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
2708pub struct NetworkMember {
2709    /// Member identifier (username, group name, node ID, or CIDR).
2710    pub name: String,
2711    /// Type of member.
2712    #[serde(default)]
2713    pub kind: MemberKind,
2714}
2715
2716/// Type of network member.
2717#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
2718#[serde(rename_all = "lowercase")]
2719pub enum MemberKind {
2720    /// An individual user identity.
2721    #[default]
2722    User,
2723    /// A group of users.
2724    Group,
2725    /// A specific cluster node.
2726    Node,
2727    /// A CIDR range (redundant with NetworkPolicySpec.cidrs but allows per-member CIDR).
2728    Cidr,
2729}
2730
2731/// An access rule determining what a network can reach.
2732#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
2733pub struct AccessRule {
2734    /// Target service name, or "*" for all services.
2735    #[serde(default = "wildcard")]
2736    pub service: String,
2737
2738    /// Target deployment name, or "*" for all deployments.
2739    #[serde(default = "wildcard")]
2740    pub deployment: String,
2741
2742    /// Specific ports allowed. None means all ports.
2743    #[serde(default, skip_serializing_if = "Option::is_none")]
2744    pub ports: Option<Vec<u16>>,
2745
2746    /// Whether to allow or deny access.
2747    #[serde(default)]
2748    pub action: AccessAction,
2749}
2750
2751fn wildcard() -> String {
2752    "*".to_string()
2753}
2754
2755/// Access control action.
2756#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
2757#[serde(rename_all = "lowercase")]
2758pub enum AccessAction {
2759    /// Allow access (default).
2760    #[default]
2761    Allow,
2762    /// Deny access.
2763    Deny,
2764}
2765
2766// ==========================================================================
2767// Container bridge / overlay network types (Docker-compatible)
2768// ==========================================================================
2769//
2770// These types model user-defined bridge or overlay networks that standalone
2771// containers can attach to — the Docker-style "docker network create" model.
2772// They are intentionally named `BridgeNetwork*` to avoid colliding with the
2773// CIDR-ACL `NetworkPolicySpec` types above, which model a completely
2774// different concept (access-control groups).
2775
2776/// A user-defined bridge or overlay network that containers can attach to.
2777#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, utoipa::ToSchema)]
2778pub struct BridgeNetwork {
2779    /// Opaque server-generated identifier (UUID v4).
2780    pub id: String,
2781
2782    /// Human-readable, unique name (must match `^[a-z0-9][a-z0-9_-]{0,63}$`).
2783    pub name: String,
2784
2785    /// Driver backing the network (bridge vs. overlay).
2786    #[serde(default)]
2787    pub driver: BridgeNetworkDriver,
2788
2789    /// IPv4/IPv6 subnet in CIDR notation (e.g. `"10.240.0.0/24"`).
2790    #[serde(default, skip_serializing_if = "Option::is_none")]
2791    pub subnet: Option<String>,
2792
2793    /// Arbitrary key/value labels for filtering and grouping.
2794    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
2795    pub labels: HashMap<String, String>,
2796
2797    /// If true, containers attached to this network cannot reach the outside
2798    /// world — only other containers on the same network.
2799    #[serde(default)]
2800    pub internal: bool,
2801
2802    /// Creation timestamp (UTC, RFC 3339).
2803    #[schema(value_type = String, format = "date-time")]
2804    pub created_at: chrono::DateTime<chrono::Utc>,
2805}
2806
2807/// Backing driver for a [`BridgeNetwork`].
2808#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default, utoipa::ToSchema)]
2809#[serde(rename_all = "lowercase")]
2810pub enum BridgeNetworkDriver {
2811    /// Linux bridge on the local host (single-host, default).
2812    #[default]
2813    Bridge,
2814    /// Overlay network spanning multiple hosts.
2815    Overlay,
2816}
2817
2818/// A container attached to a [`BridgeNetwork`].
2819#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, utoipa::ToSchema)]
2820pub struct BridgeNetworkAttachment {
2821    /// Runtime-provided container id.
2822    pub container_id: String,
2823
2824    /// Container name, if known.
2825    #[serde(default, skip_serializing_if = "Option::is_none")]
2826    pub container_name: Option<String>,
2827
2828    /// DNS aliases the container can be reached by on this network.
2829    #[serde(default, skip_serializing_if = "Vec::is_empty")]
2830    pub aliases: Vec<String>,
2831
2832    /// Assigned IPv4 address on the network (if any).
2833    #[serde(default, skip_serializing_if = "Option::is_none")]
2834    pub ipv4: Option<String>,
2835}
2836
2837// ==========================================================================
2838// Registry auth (inline, not persisted) — §3.10 of ZLAYER_SDK_FIXES.md
2839// ==========================================================================
2840//
2841// Inline credentials a client can attach to a single pull or container-create
2842// request without first POSTing them to `/api/v1/credentials/registry`. The
2843// daemon uses them exactly once — they are never logged, never persisted, and
2844// never echoed back on a response.
2845//
2846// For requests that instead want to reuse an already-stored credential, the
2847// `CreateContainerRequest` / `PullImageRequest` DTOs also accept a
2848// `registry_credential_id` pointing at the `RegistryCredentialStore`. Inline
2849// `RegistryAuth` takes precedence when both are provided.
2850
2851/// Inline Docker/OCI registry credentials attached to a single pull request.
2852///
2853/// Prefer persistent credentials via `/api/v1/credentials/registry` for
2854/// long-lived services. Use this inline form for one-off pulls (e.g. CI
2855/// runners fetching a private image for a single job) where persisting a
2856/// credential is undesirable.
2857#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, utoipa::ToSchema)]
2858pub struct RegistryAuth {
2859    /// Username for the registry (for basic auth) or a placeholder
2860    /// identifier when `auth_type == Token`.
2861    pub username: String,
2862    /// Password or bearer token. **Never** logged or returned on any
2863    /// response — consumed once and dropped.
2864    pub password: String,
2865    /// Which authentication scheme to use against the registry.
2866    #[serde(default = "default_registry_auth_type")]
2867    pub auth_type: RegistryAuthType,
2868}
2869
2870/// Authentication scheme for a [`RegistryAuth`].
2871#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default, utoipa::ToSchema)]
2872#[serde(rename_all = "snake_case")]
2873pub enum RegistryAuthType {
2874    /// HTTP Basic authentication (username + password). Default.
2875    #[default]
2876    Basic,
2877    /// Bearer token authentication. `password` carries the token; `username`
2878    /// is typically a placeholder such as `"oauth2accesstoken"` or `"<token>"`.
2879    Token,
2880}
2881
2882/// Serde default for [`RegistryAuth::auth_type`]. Kept as a free function so
2883/// `#[serde(default = "...")]` can reference it.
2884#[must_use]
2885pub fn default_registry_auth_type() -> RegistryAuthType {
2886    RegistryAuthType::Basic
2887}
2888
2889// ==========================================================================
2890// Container restart policy (Docker-style) — §3.4 of ZLAYER_SDK_FIXES.md
2891// ==========================================================================
2892//
2893// Named `ContainerRestartPolicy` / `ContainerRestartKind` rather than
2894// `RestartPolicy` / `RestartKind` to avoid colliding with ZLayer's existing
2895// `PanicPolicy`/`PanicAction` types and to make the runtime-level (as opposed
2896// to panic-driven) nature of this policy explicit.
2897
2898/// Container-runtime-level restart policy.
2899///
2900/// Maps onto Docker's `HostConfig.RestartPolicy`. Distinct from
2901/// [`PanicPolicy`], which governs what `ZLayer` does in response to an
2902/// application panic (it does not set a Docker restart policy).
2903#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, utoipa::ToSchema)]
2904#[serde(rename_all = "snake_case", deny_unknown_fields)]
2905pub struct ContainerRestartPolicy {
2906    /// Which restart policy to apply.
2907    pub kind: ContainerRestartKind,
2908
2909    /// For `on_failure` only: maximum number of restart attempts before
2910    /// giving up. Ignored by other kinds. `None` means "retry forever".
2911    #[serde(default, skip_serializing_if = "Option::is_none")]
2912    pub max_attempts: Option<u32>,
2913
2914    /// Humantime-formatted delay between restarts (e.g. `"500ms"`,
2915    /// `"2s"`). Accepted for forward-compatibility but currently ignored
2916    /// by the Docker backend: bollard's `RestartPolicy` has no per-kind
2917    /// delay field. When set, the runtime emits a warning.
2918    #[serde(default, skip_serializing_if = "Option::is_none")]
2919    pub delay: Option<String>,
2920}
2921
2922/// Which flavor of container restart policy to apply.
2923#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, utoipa::ToSchema)]
2924#[serde(rename_all = "snake_case")]
2925pub enum ContainerRestartKind {
2926    /// Never restart (Docker's `"no"`).
2927    No,
2928    /// Always restart (Docker's `"always"`).
2929    Always,
2930    /// Restart unless the user explicitly stopped the container
2931    /// (Docker's `"unless-stopped"`).
2932    UnlessStopped,
2933    /// Restart only when the container exits with a non-zero code
2934    /// (Docker's `"on-failure"`). Respects `max_attempts`.
2935    OnFailure,
2936}
2937
2938// ==========================================================================
2939// Port mappings (Docker-style container port publishing)
2940// ==========================================================================
2941
2942/// Transport protocol for a published container port.
2943#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, utoipa::ToSchema)]
2944#[serde(rename_all = "snake_case")]
2945pub enum PortProtocol {
2946    /// TCP (default).
2947    Tcp,
2948    /// UDP.
2949    Udp,
2950}
2951
2952impl Default for PortProtocol {
2953    fn default() -> Self {
2954        default_port_protocol()
2955    }
2956}
2957
2958impl PortProtocol {
2959    /// Return the lowercase string form Docker uses in port-binding keys
2960    /// (e.g. `"tcp"` or `"udp"`).
2961    #[must_use]
2962    pub fn as_str(&self) -> &'static str {
2963        match self {
2964            PortProtocol::Tcp => "tcp",
2965            PortProtocol::Udp => "udp",
2966        }
2967    }
2968}
2969
2970fn default_port_protocol() -> PortProtocol {
2971    PortProtocol::Tcp
2972}
2973
2974fn default_host_ip() -> String {
2975    "0.0.0.0".to_string()
2976}
2977
2978/// A single host-to-container port publish rule (Docker's `-p`).
2979///
2980/// When `host_port` is `None` (or explicitly `Some(0)`), the container runtime
2981/// assigns an ephemeral host port. `host_ip` defaults to `"0.0.0.0"` to bind
2982/// on all interfaces.
2983#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, utoipa::ToSchema)]
2984#[serde(rename_all = "snake_case")]
2985pub struct PortMapping {
2986    /// Host port. `None` (or zero) means "assign an ephemeral port".
2987    #[serde(default, skip_serializing_if = "Option::is_none")]
2988    pub host_port: Option<u16>,
2989    /// Container-side port.
2990    pub container_port: u16,
2991    /// Transport protocol (defaults to TCP).
2992    #[serde(default = "default_port_protocol")]
2993    pub protocol: PortProtocol,
2994    /// Host interface to bind on. Defaults to `"0.0.0.0"` (all interfaces).
2995    #[serde(default = "default_host_ip", skip_serializing_if = "String::is_empty")]
2996    pub host_ip: String,
2997}
2998
2999#[cfg(test)]
3000mod tests {
3001    use super::*;
3002
3003    #[test]
3004    fn service_spec_default_round_trips_through_json() {
3005        // Building `ServiceSpec::default()` must succeed (no panics on the
3006        // placeholder image reference) and the result must round-trip through
3007        // serde_json so callers can store / transport a default spec without
3008        // surprises.
3009        let spec = ServiceSpec::default();
3010
3011        // Sanity on a handful of fields that depend on custom Default impls.
3012        assert_eq!(spec.rtype, ResourceType::Service);
3013        assert_eq!(spec.image.pull_policy, PullPolicy::IfNotPresent);
3014        assert_eq!(spec.health.retries, 3);
3015        assert_eq!(spec.network_mode, NetworkMode::Default);
3016        assert!(spec.env.is_empty());
3017        assert!(spec.endpoints.is_empty());
3018        assert!(spec.overlay.is_none());
3019
3020        let json = serde_json::to_string(&spec).expect("serialize default ServiceSpec");
3021        let parsed: ServiceSpec =
3022            serde_json::from_str(&json).expect("re-parse default ServiceSpec");
3023        assert_eq!(spec, parsed);
3024    }
3025
3026    #[test]
3027    fn service_spec_minimal_sets_name_and_image() {
3028        let spec = ServiceSpec::minimal("api", "ghcr.io/acme/api:1.2");
3029        assert_eq!(spec.image.name.repository(), "acme/api");
3030        assert_eq!(spec.image.name.tag(), Some("1.2"));
3031        // Everything else should match Default exactly.
3032        let baseline = ServiceSpec::default();
3033        assert_eq!(spec.rtype, baseline.rtype);
3034        assert_eq!(spec.scale, baseline.scale);
3035        assert_eq!(spec.network_mode, baseline.network_mode);
3036    }
3037
3038    #[test]
3039    fn port_mapping_defaults_via_serde() {
3040        // Minimal JSON: only container_port. host_port omitted, protocol defaults
3041        // to "tcp", host_ip defaults to "0.0.0.0".
3042        let json = r#"{"container_port": 8080}"#;
3043        let m: PortMapping = serde_json::from_str(json).expect("parse minimal PortMapping");
3044        assert_eq!(m.container_port, 8080);
3045        assert_eq!(m.host_port, None);
3046        assert_eq!(m.protocol, PortProtocol::Tcp);
3047        assert_eq!(m.host_ip, "0.0.0.0");
3048    }
3049
3050    #[test]
3051    fn port_mapping_skips_none_host_port_and_empty_host_ip() {
3052        let m = PortMapping {
3053            host_port: None,
3054            container_port: 443,
3055            protocol: PortProtocol::Tcp,
3056            host_ip: String::new(),
3057        };
3058        let s = serde_json::to_string(&m).expect("serialize");
3059        // host_port = None should be skipped, host_ip = "" should be skipped.
3060        assert!(!s.contains("host_port"), "host_port should be skipped: {s}");
3061        assert!(!s.contains("host_ip"), "host_ip should be skipped: {s}");
3062        assert!(s.contains("\"container_port\":443"));
3063        assert!(s.contains("\"protocol\":\"tcp\""));
3064    }
3065
3066    #[test]
3067    fn test_parse_simple_spec() {
3068        let yaml = r"
3069version: v1
3070deployment: test
3071services:
3072  hello:
3073    rtype: service
3074    image:
3075      name: hello-world:latest
3076    endpoints:
3077      - name: http
3078        protocol: http
3079        port: 8080
3080        expose: public
3081";
3082
3083        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
3084        assert_eq!(spec.version, "v1");
3085        assert_eq!(spec.deployment, "test");
3086        assert!(spec.services.contains_key("hello"));
3087    }
3088
3089    #[test]
3090    fn test_parse_duration() {
3091        let yaml = r"
3092version: v1
3093deployment: test
3094services:
3095  test:
3096    rtype: service
3097    image:
3098      name: test:latest
3099    health:
3100      timeout: 30s
3101      interval: 1m
3102      start_grace: 5s
3103      check:
3104        type: tcp
3105        port: 8080
3106";
3107
3108        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
3109        let health = &spec.services["test"].health;
3110        assert_eq!(health.timeout, Some(std::time::Duration::from_secs(30)));
3111        assert_eq!(health.interval, Some(std::time::Duration::from_secs(60)));
3112        assert_eq!(health.start_grace, Some(std::time::Duration::from_secs(5)));
3113        match &health.check {
3114            HealthCheck::Tcp { port } => assert_eq!(*port, 8080),
3115            _ => panic!("Expected TCP health check"),
3116        }
3117    }
3118
3119    #[test]
3120    fn test_parse_adaptive_scale() {
3121        let yaml = r"
3122version: v1
3123deployment: test
3124services:
3125  test:
3126    rtype: service
3127    image:
3128      name: test:latest
3129    scale:
3130      mode: adaptive
3131      min: 2
3132      max: 10
3133      cooldown: 15s
3134      targets:
3135        cpu: 70
3136        rps: 800
3137";
3138
3139        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
3140        let scale = &spec.services["test"].scale;
3141        match scale {
3142            ScaleSpec::Adaptive {
3143                min,
3144                max,
3145                cooldown,
3146                targets,
3147            } => {
3148                assert_eq!(*min, 2);
3149                assert_eq!(*max, 10);
3150                assert_eq!(*cooldown, Some(std::time::Duration::from_secs(15)));
3151                assert_eq!(targets.cpu, Some(70));
3152                assert_eq!(targets.rps, Some(800));
3153            }
3154            _ => panic!("Expected Adaptive scale mode"),
3155        }
3156    }
3157
3158    #[test]
3159    fn test_node_mode_default() {
3160        let yaml = r"
3161version: v1
3162deployment: test
3163services:
3164  hello:
3165    rtype: service
3166    image:
3167      name: hello-world:latest
3168";
3169
3170        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
3171        assert_eq!(spec.services["hello"].node_mode, NodeMode::Shared);
3172        assert!(spec.services["hello"].node_selector.is_none());
3173    }
3174
3175    #[test]
3176    fn test_node_mode_dedicated() {
3177        let yaml = r"
3178version: v1
3179deployment: test
3180services:
3181  api:
3182    rtype: service
3183    image:
3184      name: api:latest
3185    node_mode: dedicated
3186";
3187
3188        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
3189        assert_eq!(spec.services["api"].node_mode, NodeMode::Dedicated);
3190    }
3191
3192    #[test]
3193    fn test_node_mode_exclusive() {
3194        let yaml = r"
3195version: v1
3196deployment: test
3197services:
3198  database:
3199    rtype: service
3200    image:
3201      name: postgres:15
3202    node_mode: exclusive
3203";
3204
3205        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
3206        assert_eq!(spec.services["database"].node_mode, NodeMode::Exclusive);
3207    }
3208
3209    #[test]
3210    fn test_node_selector_with_labels() {
3211        let yaml = r#"
3212version: v1
3213deployment: test
3214services:
3215  ml-worker:
3216    rtype: service
3217    image:
3218      name: ml-worker:latest
3219    node_mode: dedicated
3220    node_selector:
3221      labels:
3222        gpu: "true"
3223        zone: us-east
3224      prefer_labels:
3225        storage: ssd
3226"#;
3227
3228        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
3229        let service = &spec.services["ml-worker"];
3230        assert_eq!(service.node_mode, NodeMode::Dedicated);
3231
3232        let selector = service.node_selector.as_ref().unwrap();
3233        assert_eq!(selector.labels.get("gpu"), Some(&"true".to_string()));
3234        assert_eq!(selector.labels.get("zone"), Some(&"us-east".to_string()));
3235        assert_eq!(
3236            selector.prefer_labels.get("storage"),
3237            Some(&"ssd".to_string())
3238        );
3239    }
3240
3241    #[test]
3242    fn test_node_mode_serialization_roundtrip() {
3243        use serde_json;
3244
3245        // Test all variants serialize/deserialize correctly
3246        let modes = [NodeMode::Shared, NodeMode::Dedicated, NodeMode::Exclusive];
3247        let expected_json = ["\"shared\"", "\"dedicated\"", "\"exclusive\""];
3248
3249        for (mode, expected) in modes.iter().zip(expected_json.iter()) {
3250            let json = serde_json::to_string(mode).unwrap();
3251            assert_eq!(&json, *expected, "Serialization failed for {mode:?}");
3252
3253            let deserialized: NodeMode = serde_json::from_str(&json).unwrap();
3254            assert_eq!(deserialized, *mode, "Roundtrip failed for {mode:?}");
3255        }
3256    }
3257
3258    #[test]
3259    fn test_node_selector_empty() {
3260        let yaml = r"
3261version: v1
3262deployment: test
3263services:
3264  api:
3265    rtype: service
3266    image:
3267      name: api:latest
3268    node_selector:
3269      labels: {}
3270";
3271
3272        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
3273        let selector = spec.services["api"].node_selector.as_ref().unwrap();
3274        assert!(selector.labels.is_empty());
3275        assert!(selector.prefer_labels.is_empty());
3276    }
3277
3278    #[test]
3279    fn test_mixed_node_modes_in_deployment() {
3280        let yaml = r"
3281version: v1
3282deployment: test
3283services:
3284  redis:
3285    rtype: service
3286    image:
3287      name: redis:alpine
3288    # Default shared mode
3289  api:
3290    rtype: service
3291    image:
3292      name: api:latest
3293    node_mode: dedicated
3294  database:
3295    rtype: service
3296    image:
3297      name: postgres:15
3298    node_mode: exclusive
3299    node_selector:
3300      labels:
3301        storage: ssd
3302";
3303
3304        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
3305        assert_eq!(spec.services["redis"].node_mode, NodeMode::Shared);
3306        assert_eq!(spec.services["api"].node_mode, NodeMode::Dedicated);
3307        assert_eq!(spec.services["database"].node_mode, NodeMode::Exclusive);
3308
3309        let db_selector = spec.services["database"].node_selector.as_ref().unwrap();
3310        assert_eq!(db_selector.labels.get("storage"), Some(&"ssd".to_string()));
3311    }
3312
3313    #[test]
3314    fn test_storage_bind_mount() {
3315        let yaml = r"
3316version: v1
3317deployment: test
3318services:
3319  app:
3320    image:
3321      name: app:latest
3322    storage:
3323      - type: bind
3324        source: /host/data
3325        target: /app/data
3326        readonly: true
3327";
3328        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
3329        let storage = &spec.services["app"].storage;
3330        assert_eq!(storage.len(), 1);
3331        match &storage[0] {
3332            StorageSpec::Bind {
3333                source,
3334                target,
3335                readonly,
3336            } => {
3337                assert_eq!(source, "/host/data");
3338                assert_eq!(target, "/app/data");
3339                assert!(*readonly);
3340            }
3341            _ => panic!("Expected Bind storage"),
3342        }
3343    }
3344
3345    #[test]
3346    fn test_storage_named_with_tier() {
3347        let yaml = r"
3348version: v1
3349deployment: test
3350services:
3351  app:
3352    image:
3353      name: app:latest
3354    storage:
3355      - type: named
3356        name: my-data
3357        target: /app/data
3358        tier: cached
3359";
3360        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
3361        let storage = &spec.services["app"].storage;
3362        match &storage[0] {
3363            StorageSpec::Named {
3364                name, target, tier, ..
3365            } => {
3366                assert_eq!(name, "my-data");
3367                assert_eq!(target, "/app/data");
3368                assert_eq!(*tier, StorageTier::Cached);
3369            }
3370            _ => panic!("Expected Named storage"),
3371        }
3372    }
3373
3374    #[test]
3375    fn test_storage_anonymous() {
3376        let yaml = r"
3377version: v1
3378deployment: test
3379services:
3380  app:
3381    image:
3382      name: app:latest
3383    storage:
3384      - type: anonymous
3385        target: /app/cache
3386";
3387        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
3388        let storage = &spec.services["app"].storage;
3389        match &storage[0] {
3390            StorageSpec::Anonymous { target, tier } => {
3391                assert_eq!(target, "/app/cache");
3392                assert_eq!(*tier, StorageTier::Local); // default
3393            }
3394            _ => panic!("Expected Anonymous storage"),
3395        }
3396    }
3397
3398    #[test]
3399    fn test_storage_tmpfs() {
3400        let yaml = r"
3401version: v1
3402deployment: test
3403services:
3404  app:
3405    image:
3406      name: app:latest
3407    storage:
3408      - type: tmpfs
3409        target: /app/tmp
3410        size: 256Mi
3411        mode: 1777
3412";
3413        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
3414        let storage = &spec.services["app"].storage;
3415        match &storage[0] {
3416            StorageSpec::Tmpfs { target, size, mode } => {
3417                assert_eq!(target, "/app/tmp");
3418                assert_eq!(size.as_deref(), Some("256Mi"));
3419                assert_eq!(*mode, Some(1777));
3420            }
3421            _ => panic!("Expected Tmpfs storage"),
3422        }
3423    }
3424
3425    #[test]
3426    fn test_storage_s3() {
3427        let yaml = r"
3428version: v1
3429deployment: test
3430services:
3431  app:
3432    image:
3433      name: app:latest
3434    storage:
3435      - type: s3
3436        bucket: my-bucket
3437        prefix: models/
3438        target: /app/models
3439        readonly: true
3440        endpoint: https://s3.us-west-2.amazonaws.com
3441        credentials: aws-creds
3442";
3443        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
3444        let storage = &spec.services["app"].storage;
3445        match &storage[0] {
3446            StorageSpec::S3 {
3447                bucket,
3448                prefix,
3449                target,
3450                readonly,
3451                endpoint,
3452                credentials,
3453            } => {
3454                assert_eq!(bucket, "my-bucket");
3455                assert_eq!(prefix.as_deref(), Some("models/"));
3456                assert_eq!(target, "/app/models");
3457                assert!(*readonly);
3458                assert_eq!(
3459                    endpoint.as_deref(),
3460                    Some("https://s3.us-west-2.amazonaws.com")
3461                );
3462                assert_eq!(credentials.as_deref(), Some("aws-creds"));
3463            }
3464            _ => panic!("Expected S3 storage"),
3465        }
3466    }
3467
3468    #[test]
3469    fn test_storage_multiple_types() {
3470        let yaml = r"
3471version: v1
3472deployment: test
3473services:
3474  app:
3475    image:
3476      name: app:latest
3477    storage:
3478      - type: bind
3479        source: /etc/config
3480        target: /app/config
3481        readonly: true
3482      - type: named
3483        name: app-data
3484        target: /app/data
3485      - type: tmpfs
3486        target: /app/tmp
3487";
3488        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
3489        let storage = &spec.services["app"].storage;
3490        assert_eq!(storage.len(), 3);
3491        assert!(matches!(&storage[0], StorageSpec::Bind { .. }));
3492        assert!(matches!(&storage[1], StorageSpec::Named { .. }));
3493        assert!(matches!(&storage[2], StorageSpec::Tmpfs { .. }));
3494    }
3495
3496    #[test]
3497    fn test_storage_tier_default() {
3498        let yaml = r"
3499version: v1
3500deployment: test
3501services:
3502  app:
3503    image:
3504      name: app:latest
3505    storage:
3506      - type: named
3507        name: data
3508        target: /data
3509";
3510        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
3511        match &spec.services["app"].storage[0] {
3512            StorageSpec::Named { tier, .. } => {
3513                assert_eq!(*tier, StorageTier::Local); // default should be Local
3514            }
3515            _ => panic!("Expected Named storage"),
3516        }
3517    }
3518
3519    // ==========================================================================
3520    // Tunnel configuration tests
3521    // ==========================================================================
3522
3523    #[test]
3524    fn test_endpoint_tunnel_config_basic() {
3525        let yaml = r"
3526version: v1
3527deployment: test
3528services:
3529  api:
3530    image:
3531      name: api:latest
3532    endpoints:
3533      - name: http
3534        protocol: http
3535        port: 8080
3536        tunnel:
3537          enabled: true
3538          remote_port: 8080
3539";
3540        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
3541        let endpoint = &spec.services["api"].endpoints[0];
3542        let tunnel = endpoint.tunnel.as_ref().unwrap();
3543        assert!(tunnel.enabled);
3544        assert_eq!(tunnel.remote_port, 8080);
3545        assert!(tunnel.from.is_none());
3546        assert!(tunnel.to.is_none());
3547    }
3548
3549    #[test]
3550    fn test_endpoint_tunnel_config_full() {
3551        let yaml = r"
3552version: v1
3553deployment: test
3554services:
3555  api:
3556    image:
3557      name: api:latest
3558    endpoints:
3559      - name: http
3560        protocol: http
3561        port: 8080
3562        tunnel:
3563          enabled: true
3564          from: node-1
3565          to: ingress-node
3566          remote_port: 9000
3567          expose: public
3568          access:
3569            enabled: true
3570            max_ttl: 4h
3571            audit: true
3572";
3573        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
3574        let endpoint = &spec.services["api"].endpoints[0];
3575        let tunnel = endpoint.tunnel.as_ref().unwrap();
3576        assert!(tunnel.enabled);
3577        assert_eq!(tunnel.from, Some("node-1".to_string()));
3578        assert_eq!(tunnel.to, Some("ingress-node".to_string()));
3579        assert_eq!(tunnel.remote_port, 9000);
3580        assert_eq!(tunnel.expose, Some(ExposeType::Public));
3581
3582        let access = tunnel.access.as_ref().unwrap();
3583        assert!(access.enabled);
3584        assert_eq!(access.max_ttl, Some("4h".to_string()));
3585        assert!(access.audit);
3586    }
3587
3588    #[test]
3589    fn test_top_level_tunnel_definition() {
3590        let yaml = r"
3591version: v1
3592deployment: test
3593services: {}
3594tunnels:
3595  db-tunnel:
3596    from: app-node
3597    to: db-node
3598    local_port: 5432
3599    remote_port: 5432
3600    protocol: tcp
3601    expose: internal
3602";
3603        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
3604        let tunnel = spec.tunnels.get("db-tunnel").unwrap();
3605        assert_eq!(tunnel.from, "app-node");
3606        assert_eq!(tunnel.to, "db-node");
3607        assert_eq!(tunnel.local_port, 5432);
3608        assert_eq!(tunnel.remote_port, 5432);
3609        assert_eq!(tunnel.protocol, TunnelProtocol::Tcp);
3610        assert_eq!(tunnel.expose, ExposeType::Internal);
3611    }
3612
3613    #[test]
3614    fn test_top_level_tunnel_defaults() {
3615        let yaml = r"
3616version: v1
3617deployment: test
3618services: {}
3619tunnels:
3620  simple-tunnel:
3621    from: node-a
3622    to: node-b
3623    local_port: 3000
3624    remote_port: 3000
3625";
3626        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
3627        let tunnel = spec.tunnels.get("simple-tunnel").unwrap();
3628        assert_eq!(tunnel.protocol, TunnelProtocol::Tcp); // default
3629        assert_eq!(tunnel.expose, ExposeType::Internal); // default
3630    }
3631
3632    #[test]
3633    fn test_tunnel_protocol_udp() {
3634        let yaml = r"
3635version: v1
3636deployment: test
3637services: {}
3638tunnels:
3639  udp-tunnel:
3640    from: node-a
3641    to: node-b
3642    local_port: 5353
3643    remote_port: 5353
3644    protocol: udp
3645";
3646        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
3647        let tunnel = spec.tunnels.get("udp-tunnel").unwrap();
3648        assert_eq!(tunnel.protocol, TunnelProtocol::Udp);
3649    }
3650
3651    #[test]
3652    fn test_endpoint_without_tunnel() {
3653        let yaml = r"
3654version: v1
3655deployment: test
3656services:
3657  api:
3658    image:
3659      name: api:latest
3660    endpoints:
3661      - name: http
3662        protocol: http
3663        port: 8080
3664";
3665        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
3666        let endpoint = &spec.services["api"].endpoints[0];
3667        assert!(endpoint.tunnel.is_none());
3668    }
3669
3670    #[test]
3671    fn test_deployment_without_tunnels() {
3672        let yaml = r"
3673version: v1
3674deployment: test
3675services:
3676  api:
3677    image:
3678      name: api:latest
3679";
3680        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
3681        assert!(spec.tunnels.is_empty());
3682    }
3683
3684    // ==========================================================================
3685    // ApiSpec tests
3686    // ==========================================================================
3687
3688    #[test]
3689    fn test_spec_without_api_block_uses_defaults() {
3690        let yaml = r"
3691version: v1
3692deployment: test
3693services:
3694  hello:
3695    image:
3696      name: hello-world:latest
3697";
3698        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
3699        assert!(spec.api.enabled);
3700        assert_eq!(spec.api.bind, "0.0.0.0:3669");
3701        assert!(spec.api.jwt_secret.is_none());
3702        assert!(spec.api.swagger);
3703    }
3704
3705    #[test]
3706    fn test_spec_with_explicit_api_block() {
3707        let yaml = r#"
3708version: v1
3709deployment: test
3710services:
3711  hello:
3712    image:
3713      name: hello-world:latest
3714api:
3715  enabled: false
3716  bind: "127.0.0.1:9090"
3717  jwt_secret: "my-secret"
3718  swagger: false
3719"#;
3720        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
3721        assert!(!spec.api.enabled);
3722        assert_eq!(spec.api.bind, "127.0.0.1:9090");
3723        assert_eq!(spec.api.jwt_secret, Some("my-secret".to_string()));
3724        assert!(!spec.api.swagger);
3725    }
3726
3727    #[test]
3728    fn test_spec_with_partial_api_block() {
3729        let yaml = r#"
3730version: v1
3731deployment: test
3732services:
3733  hello:
3734    image:
3735      name: hello-world:latest
3736api:
3737  bind: "0.0.0.0:3000"
3738"#;
3739        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
3740        assert!(spec.api.enabled); // default true
3741        assert_eq!(spec.api.bind, "0.0.0.0:3000");
3742        assert!(spec.api.jwt_secret.is_none()); // default None
3743        assert!(spec.api.swagger); // default true
3744    }
3745
3746    // ==========================================================================
3747    // NetworkPolicySpec tests
3748    // ==========================================================================
3749
3750    #[test]
3751    fn test_network_policy_spec_roundtrip() {
3752        let spec = NetworkPolicySpec {
3753            name: "corp-vpn".to_string(),
3754            description: Some("Corporate VPN network".to_string()),
3755            cidrs: vec!["10.200.0.0/16".to_string()],
3756            members: vec![
3757                NetworkMember {
3758                    name: "alice".to_string(),
3759                    kind: MemberKind::User,
3760                },
3761                NetworkMember {
3762                    name: "ops-team".to_string(),
3763                    kind: MemberKind::Group,
3764                },
3765                NetworkMember {
3766                    name: "node-01".to_string(),
3767                    kind: MemberKind::Node,
3768                },
3769            ],
3770            access_rules: vec![
3771                AccessRule {
3772                    service: "api-gateway".to_string(),
3773                    deployment: "*".to_string(),
3774                    ports: Some(vec![443, 8080]),
3775                    action: AccessAction::Allow,
3776                },
3777                AccessRule {
3778                    service: "*".to_string(),
3779                    deployment: "staging".to_string(),
3780                    ports: None,
3781                    action: AccessAction::Deny,
3782                },
3783            ],
3784        };
3785
3786        let yaml = serde_yaml::to_string(&spec).unwrap();
3787        let deserialized: NetworkPolicySpec = serde_yaml::from_str(&yaml).unwrap();
3788        assert_eq!(spec, deserialized);
3789    }
3790
3791    #[test]
3792    fn test_network_policy_spec_defaults() {
3793        let yaml = r"
3794name: minimal
3795";
3796        let spec: NetworkPolicySpec = serde_yaml::from_str(yaml).unwrap();
3797        assert_eq!(spec.name, "minimal");
3798        assert!(spec.description.is_none());
3799        assert!(spec.cidrs.is_empty());
3800        assert!(spec.members.is_empty());
3801        assert!(spec.access_rules.is_empty());
3802    }
3803
3804    #[test]
3805    fn test_access_rule_defaults() {
3806        let yaml = "{}";
3807        let rule: AccessRule = serde_yaml::from_str(yaml).unwrap();
3808        assert_eq!(rule.service, "*");
3809        assert_eq!(rule.deployment, "*");
3810        assert!(rule.ports.is_none());
3811        assert_eq!(rule.action, AccessAction::Allow);
3812    }
3813
3814    #[test]
3815    fn test_member_kind_defaults_to_user() {
3816        let yaml = r"
3817name: bob
3818";
3819        let member: NetworkMember = serde_yaml::from_str(yaml).unwrap();
3820        assert_eq!(member.name, "bob");
3821        assert_eq!(member.kind, MemberKind::User);
3822    }
3823
3824    #[test]
3825    fn test_member_kind_variants() {
3826        for (input, expected) in [
3827            ("user", MemberKind::User),
3828            ("group", MemberKind::Group),
3829            ("node", MemberKind::Node),
3830            ("cidr", MemberKind::Cidr),
3831        ] {
3832            let yaml = format!("name: test\nkind: {input}");
3833            let member: NetworkMember = serde_yaml::from_str(&yaml).unwrap();
3834            assert_eq!(member.kind, expected);
3835        }
3836    }
3837
3838    #[test]
3839    fn test_access_action_variants() {
3840        // Test via a wrapper struct since bare enums need a YAML tag
3841        #[derive(Debug, Deserialize)]
3842        struct Wrapper {
3843            action: AccessAction,
3844        }
3845
3846        let allow: Wrapper = serde_yaml::from_str("action: allow").unwrap();
3847        let deny: Wrapper = serde_yaml::from_str("action: deny").unwrap();
3848
3849        assert_eq!(allow.action, AccessAction::Allow);
3850        assert_eq!(deny.action, AccessAction::Deny);
3851    }
3852
3853    #[test]
3854    fn test_network_policy_spec_default_impl() {
3855        let spec = NetworkPolicySpec::default();
3856        assert_eq!(spec.name, "");
3857        assert!(spec.description.is_none());
3858        assert!(spec.cidrs.is_empty());
3859        assert!(spec.members.is_empty());
3860        assert!(spec.access_rules.is_empty());
3861    }
3862
3863    #[test]
3864    fn container_restart_policy_serde_roundtrip_all_kinds() {
3865        // Exercise every `ContainerRestartKind` variant via a JSON roundtrip.
3866        // Covers the `snake_case` rename (`unless_stopped`, `on_failure`) and
3867        // the optional `max_attempts` / `delay` fields. Validates the wire
3868        // format the API will expose under `/v1/containers`.
3869        let cases = [
3870            (
3871                ContainerRestartPolicy {
3872                    kind: ContainerRestartKind::No,
3873                    max_attempts: None,
3874                    delay: None,
3875                },
3876                r#"{"kind":"no"}"#,
3877            ),
3878            (
3879                ContainerRestartPolicy {
3880                    kind: ContainerRestartKind::Always,
3881                    max_attempts: None,
3882                    delay: Some("500ms".to_string()),
3883                },
3884                r#"{"kind":"always","delay":"500ms"}"#,
3885            ),
3886            (
3887                ContainerRestartPolicy {
3888                    kind: ContainerRestartKind::UnlessStopped,
3889                    max_attempts: None,
3890                    delay: None,
3891                },
3892                r#"{"kind":"unless_stopped"}"#,
3893            ),
3894            (
3895                ContainerRestartPolicy {
3896                    kind: ContainerRestartKind::OnFailure,
3897                    max_attempts: Some(5),
3898                    delay: None,
3899                },
3900                r#"{"kind":"on_failure","max_attempts":5}"#,
3901            ),
3902        ];
3903
3904        for (value, expected_json) in &cases {
3905            let serialized = serde_json::to_string(value).expect("serialize");
3906            assert_eq!(&serialized, expected_json, "serialize mismatch");
3907            let round: ContainerRestartPolicy =
3908                serde_json::from_str(&serialized).expect("deserialize");
3909            assert_eq!(&round, value, "roundtrip mismatch");
3910        }
3911    }
3912
3913    // -- §3.10: RegistryAuth ------------------------------------------------
3914
3915    #[test]
3916    fn registry_auth_type_serializes_snake_case() {
3917        assert_eq!(
3918            serde_json::to_string(&RegistryAuthType::Basic).unwrap(),
3919            "\"basic\""
3920        );
3921        assert_eq!(
3922            serde_json::to_string(&RegistryAuthType::Token).unwrap(),
3923            "\"token\""
3924        );
3925    }
3926
3927    #[test]
3928    fn registry_auth_default_auth_type_is_basic() {
3929        // When `auth_type` is omitted on the wire, the serde default kicks in.
3930        let json = r#"{"username":"u","password":"p"}"#;
3931        let parsed: RegistryAuth = serde_json::from_str(json).expect("parse");
3932        assert_eq!(parsed.auth_type, RegistryAuthType::Basic);
3933        assert_eq!(parsed.username, "u");
3934        assert_eq!(parsed.password, "p");
3935    }
3936
3937    #[test]
3938    fn registry_auth_serde_roundtrip_both_variants() {
3939        for variant in [RegistryAuthType::Basic, RegistryAuthType::Token] {
3940            let cred = RegistryAuth {
3941                username: "ci-bot".to_string(),
3942                password: "s3cret".to_string(),
3943                auth_type: variant,
3944            };
3945            let serialized = serde_json::to_string(&cred).expect("serialize");
3946            let back: RegistryAuth = serde_json::from_str(&serialized).expect("deserialize");
3947            assert_eq!(back, cred, "roundtrip mismatch for {variant:?}");
3948        }
3949    }
3950
3951    #[test]
3952    fn registry_auth_explicit_token_type_parses() {
3953        let json = r#"{"username":"oauth2accesstoken","password":"ghp_abc","auth_type":"token"}"#;
3954        let parsed: RegistryAuth = serde_json::from_str(json).expect("parse");
3955        assert_eq!(parsed.auth_type, RegistryAuthType::Token);
3956    }
3957
3958    #[test]
3959    fn target_platform_as_oci_str() {
3960        assert_eq!(
3961            TargetPlatform::new(OsKind::Linux, ArchKind::Amd64).as_oci_str(),
3962            "linux/amd64"
3963        );
3964        assert_eq!(
3965            TargetPlatform::new(OsKind::Windows, ArchKind::Arm64).as_oci_str(),
3966            "windows/arm64"
3967        );
3968        assert_eq!(
3969            TargetPlatform::new(OsKind::Macos, ArchKind::Arm64).as_oci_str(),
3970            "darwin/arm64"
3971        );
3972    }
3973
3974    #[test]
3975    fn os_kind_from_rust_consts() {
3976        assert_eq!(OsKind::from_rust_os("linux"), Some(OsKind::Linux));
3977        assert_eq!(OsKind::from_rust_os("windows"), Some(OsKind::Windows));
3978        assert_eq!(OsKind::from_rust_os("macos"), Some(OsKind::Macos));
3979        assert_eq!(OsKind::from_rust_os("freebsd"), None);
3980    }
3981
3982    #[test]
3983    fn arch_kind_from_rust_consts() {
3984        assert_eq!(ArchKind::from_rust_arch("x86_64"), Some(ArchKind::Amd64));
3985        assert_eq!(ArchKind::from_rust_arch("aarch64"), Some(ArchKind::Arm64));
3986        assert_eq!(ArchKind::from_rust_arch("riscv64"), None);
3987    }
3988
3989    #[test]
3990    fn service_spec_platform_yaml_round_trip_none() {
3991        // Omitting `platform` from YAML should deserialize as None without error,
3992        // even though ServiceSpec has `#[serde(deny_unknown_fields)]`.
3993        let yaml = r"
3994version: v1
3995deployment: test
3996services:
3997  app:
3998    rtype: service
3999    image:
4000      name: nginx:latest
4001";
4002        let spec: DeploymentSpec = serde_yaml::from_str(yaml).expect("yaml parse");
4003        assert!(spec.services["app"].platform.is_none());
4004    }
4005
4006    #[test]
4007    fn service_spec_platform_yaml_round_trip_some() {
4008        let yaml = r"
4009version: v1
4010deployment: test
4011services:
4012  app:
4013    rtype: service
4014    image:
4015      name: nginx:latest
4016    platform:
4017      os: windows
4018      arch: amd64
4019";
4020        let spec: DeploymentSpec = serde_yaml::from_str(yaml).expect("yaml parse");
4021        assert_eq!(
4022            spec.services["app"].platform,
4023            Some(TargetPlatform::new(OsKind::Windows, ArchKind::Amd64))
4024        );
4025    }
4026
4027    #[test]
4028    fn service_spec_platform_serializes_omitted_when_none() {
4029        // Build a minimal ServiceSpec via YAML to avoid enumerating every field
4030        // (ServiceSpec has no Default impl and no named-struct helper).
4031        let yaml = r"
4032version: v1
4033deployment: test
4034services:
4035  app:
4036    rtype: service
4037    image:
4038      name: nginx:latest
4039";
4040        let mut spec: DeploymentSpec = serde_yaml::from_str(yaml).expect("yaml parse");
4041        let service = spec.services.get_mut("app").expect("service present");
4042        service.platform = None;
4043        let rendered = serde_yaml::to_string(service).expect("render");
4044        assert!(
4045            !rendered.contains("platform"),
4046            "platform must be omitted when None: {rendered}"
4047        );
4048    }
4049
4050    #[test]
4051    fn target_platform_os_version_builder() {
4052        let p =
4053            TargetPlatform::new(OsKind::Windows, ArchKind::Amd64).with_os_version("10.0.26100.1");
4054        assert_eq!(p.os_version.as_deref(), Some("10.0.26100.1"));
4055        assert_eq!(p.os, OsKind::Windows);
4056        assert_eq!(p.arch, ArchKind::Amd64);
4057    }
4058
4059    #[test]
4060    fn target_platform_os_version_yaml_roundtrip() {
4061        let yaml = "os: windows\narch: amd64\nosVersion: 10.0.26100.1\n";
4062        let p: TargetPlatform = serde_yaml::from_str(yaml).expect("yaml parse");
4063        assert_eq!(p.os_version.as_deref(), Some("10.0.26100.1"));
4064        assert_eq!(p.os, OsKind::Windows);
4065        assert_eq!(p.arch, ArchKind::Amd64);
4066    }
4067
4068    #[test]
4069    fn target_platform_os_version_yaml_omits_when_none() {
4070        let p = TargetPlatform::new(OsKind::Linux, ArchKind::Amd64);
4071        let rendered = serde_yaml::to_string(&p).expect("render");
4072        assert!(
4073            !rendered.contains("osVersion"),
4074            "osVersion must be omitted when None: {rendered}"
4075        );
4076    }
4077
4078    #[test]
4079    fn target_platform_as_detailed_str_includes_version() {
4080        let without = TargetPlatform::new(OsKind::Windows, ArchKind::Amd64).as_detailed_str();
4081        assert_eq!(without, "windows/amd64");
4082
4083        let with = TargetPlatform::new(OsKind::Windows, ArchKind::Amd64)
4084            .with_os_version("10.0.26100.1")
4085            .as_detailed_str();
4086        assert_eq!(with, "windows/amd64 (os.version=10.0.26100.1)");
4087    }
4088
4089    #[test]
4090    fn target_platform_display_ignores_version() {
4091        // Display deliberately stays terse so existing log lines don't change.
4092        let p =
4093            TargetPlatform::new(OsKind::Windows, ArchKind::Amd64).with_os_version("10.0.26100.1");
4094        assert_eq!(format!("{p}"), "windows/amd64");
4095    }
4096
4097    // ----------------------------------------------------------------------
4098    // Phase 1 Task 1.1: Docker-compat ServiceSpec/ResourcesSpec extensions.
4099    // ----------------------------------------------------------------------
4100
4101    /// Build a minimal-but-valid `ServiceSpec` for round-trip tests.
4102    fn fixture_service_spec_full() -> ServiceSpec {
4103        let yaml = r"
4104version: v1
4105deployment: phase1-task1
4106services:
4107  hello:
4108    rtype: service
4109    image:
4110      name: hello-world:latest
4111";
4112        let spec: DeploymentSpec = serde_yaml::from_str(yaml).expect("parse fixture");
4113        spec.services.get("hello").expect("hello service").clone()
4114    }
4115
4116    #[test]
4117    fn service_spec_round_trip_with_all_new_fields() {
4118        let mut spec = fixture_service_spec_full();
4119        spec.labels
4120            .insert("zlayer.team".to_string(), "platform".to_string());
4121        spec.user = Some("1000:1000".to_string());
4122        spec.stop_signal = Some("SIGTERM".to_string());
4123        spec.stop_grace_period = Some(std::time::Duration::from_secs(30));
4124        spec.sysctls
4125            .insert("net.core.somaxconn".to_string(), "1024".to_string());
4126        spec.ulimits.insert(
4127            "nofile".to_string(),
4128            UlimitSpec {
4129                soft: 65_536,
4130                hard: 65_536,
4131            },
4132        );
4133        spec.security_opt.push("no-new-privileges:true".to_string());
4134        spec.pid_mode = Some("host".to_string());
4135        spec.ipc_mode = Some("private".to_string());
4136        spec.network_mode = NetworkMode::Bridge {
4137            name: Some("custom-net".to_string()),
4138        };
4139        spec.cap_drop.push("NET_RAW".to_string());
4140        spec.extra_groups.push("docker".to_string());
4141        spec.read_only_root_fs = true;
4142        spec.init_container = Some(true);
4143        spec.resources.pids_limit = Some(2048);
4144        spec.resources.cpuset = Some("0-3".to_string());
4145        spec.resources.cpu_shares = Some(1024);
4146        spec.resources.memory_swap = Some("2Gi".to_string());
4147        spec.resources.memory_reservation = Some("256Mi".to_string());
4148        spec.resources.memory_swappiness = Some(10);
4149        spec.resources.oom_score_adj = Some(-500);
4150        spec.resources.oom_kill_disable = Some(false);
4151        spec.resources.blkio_weight = Some(500);
4152
4153        let yaml = serde_yaml::to_string(&spec).expect("serialize");
4154        let round: ServiceSpec = serde_yaml::from_str(&yaml).expect("deserialize");
4155        assert_eq!(spec, round, "round-trip mismatch:\n{yaml}");
4156    }
4157
4158    #[test]
4159    fn network_mode_string_form_round_trip() {
4160        let cases: &[(&str, NetworkMode)] = &[
4161            ("default", NetworkMode::Default),
4162            ("host", NetworkMode::Host),
4163            ("none", NetworkMode::None),
4164            ("bridge", NetworkMode::Bridge { name: None }),
4165            (
4166                "bridge:custom",
4167                NetworkMode::Bridge {
4168                    name: Some("custom".to_string()),
4169                },
4170            ),
4171            (
4172                "container:abc123",
4173                NetworkMode::Container {
4174                    id: "abc123".to_string(),
4175                },
4176            ),
4177        ];
4178
4179        for (input, expected) in cases {
4180            #[derive(Deserialize)]
4181            struct Wrap {
4182                #[serde(deserialize_with = "deserialize_network_mode")]
4183                m: NetworkMode,
4184            }
4185            let yaml = format!("m: \"{input}\"\n");
4186            let parsed: Wrap = serde_yaml::from_str(&yaml).expect("parse network mode");
4187            assert_eq!(&parsed.m, expected, "mismatch for {input}");
4188        }
4189    }
4190
4191    #[test]
4192    fn ulimit_spec_round_trip() {
4193        let u = UlimitSpec {
4194            soft: 1024,
4195            hard: 65_536,
4196        };
4197        let yaml = serde_yaml::to_string(&u).expect("serialize");
4198        let parsed: UlimitSpec = serde_yaml::from_str(&yaml).expect("parse");
4199        assert_eq!(u, parsed);
4200    }
4201
4202    #[test]
4203    fn host_network_true_yaml_promotes_to_network_mode_host() {
4204        let yaml = r"
4205version: v1
4206deployment: bc-test
4207services:
4208  hello:
4209    rtype: service
4210    image:
4211      name: hello-world:latest
4212    host_network: true
4213";
4214        let dep: DeploymentSpec = serde_yaml::from_str(yaml).expect("parse");
4215        let svc = dep.services.get("hello").expect("hello service");
4216        assert_eq!(svc.network_mode, NetworkMode::Host);
4217        // The legacy bool stays mirrored so in-process callers that still
4218        // read `host_network` continue to work.
4219        assert!(svc.host_network);
4220    }
4221
4222    #[test]
4223    fn capabilities_yaml_alias_cap_add_round_trip() {
4224        // Forward-compat: ZLayer keeps the field named `capabilities`, but the
4225        // Docker-style key `cap_add` must also deserialize into it.
4226        let yaml = r"
4227version: v1
4228deployment: cap-test
4229services:
4230  hello:
4231    rtype: service
4232    image:
4233      name: hello-world:latest
4234    cap_add:
4235      - NET_ADMIN
4236      - SYS_PTRACE
4237";
4238        let dep: DeploymentSpec = serde_yaml::from_str(yaml).expect("parse cap_add alias");
4239        let svc = dep.services.get("hello").expect("hello service");
4240        assert_eq!(
4241            svc.capabilities,
4242            vec!["NET_ADMIN".to_string(), "SYS_PTRACE".to_string()]
4243        );
4244    }
4245
4246    #[test]
4247    fn lifecycle_omitted_defaults_to_false() {
4248        // When `lifecycle` is absent from the YAML/JSON entirely, the
4249        // deserialized service must fall back to `LifecycleSpec::default()`,
4250        // i.e. `delete_on_exit: false` — the historical retain-on-exit
4251        // behavior. This guards against accidental policy flips when the
4252        // field is added to existing specs.
4253        let yaml = r"
4254version: v1
4255deployment: lifecycle-default-test
4256services:
4257  app:
4258    rtype: service
4259    image:
4260      name: hello-world:latest
4261";
4262        let dep: DeploymentSpec = serde_yaml::from_str(yaml).expect("parse spec without lifecycle");
4263        let svc = dep.services.get("app").expect("app service");
4264        assert_eq!(svc.lifecycle, LifecycleSpec::default());
4265        assert!(!svc.lifecycle.delete_on_exit);
4266    }
4267
4268    #[test]
4269    fn lifecycle_delete_on_exit_round_trips() {
4270        // `lifecycle.delete_on_exit: true` must survive a full YAML
4271        // deserialize → serialize → deserialize cycle, and the explicit
4272        // value must propagate into the parsed `ServiceSpec`.
4273        let yaml = r"
4274version: v1
4275deployment: lifecycle-delete-test
4276services:
4277  app:
4278    rtype: service
4279    image:
4280      name: hello-world:latest
4281    lifecycle:
4282      delete_on_exit: true
4283";
4284        let dep: DeploymentSpec = serde_yaml::from_str(yaml).expect("parse spec with lifecycle");
4285        let svc = dep.services.get("app").expect("app service");
4286        assert!(svc.lifecycle.delete_on_exit);
4287
4288        // Round-trip via YAML to confirm Serialize emits the field and
4289        // Deserialize folds it back identically.
4290        let dumped = serde_yaml::to_string(&dep).expect("serialize spec with lifecycle");
4291        let reparsed: DeploymentSpec =
4292            serde_yaml::from_str(&dumped).expect("reparse round-tripped spec");
4293        let reparsed_svc = reparsed.services.get("app").expect("app service after rt");
4294        assert!(reparsed_svc.lifecycle.delete_on_exit);
4295        assert_eq!(svc.lifecycle, reparsed_svc.lifecycle);
4296    }
4297}
4298
4299#[cfg(test)]
4300mod replica_group_tests {
4301    use super::{
4302        validate_unique_replica_group_roles, EndpointSpec, GroupAffinity, LocalhostReachability,
4303        ReplicaGroup, ScaleSpec, ScaleTargets, ServiceSpec, REPLICA_GROUP_ROLE_RE,
4304    };
4305
4306    #[test]
4307    fn yaml_roundtrip_basic_group() {
4308        let yaml = r"
4309role: primary
4310count: 1
4311env:
4312  POSTGRES_REPLICATION_MODE: primary
4313affinity: spread
4314";
4315        let group: ReplicaGroup = serde_yaml::from_str(yaml).expect("parse basic group");
4316        assert_eq!(group.role, "primary");
4317        assert_eq!(group.count, 1);
4318        assert_eq!(group.affinity, GroupAffinity::Spread);
4319        assert_eq!(
4320            group.env.get("POSTGRES_REPLICATION_MODE"),
4321            Some(&"primary".to_string())
4322        );
4323    }
4324
4325    #[test]
4326    fn yaml_default_affinity_is_spread() {
4327        let yaml = "role: x\ncount: 2\n";
4328        let group: ReplicaGroup = serde_yaml::from_str(yaml).expect("parse minimal group");
4329        assert_eq!(group.affinity, GroupAffinity::Spread);
4330    }
4331
4332    #[test]
4333    fn role_regex_accepts_valid_labels() {
4334        for ok in ["a", "primary", "read-only", "x1", "ab-cd-ef"] {
4335            assert!(
4336                REPLICA_GROUP_ROLE_RE.is_match(ok),
4337                "regex should accept: {ok}"
4338            );
4339        }
4340    }
4341
4342    #[test]
4343    fn role_regex_rejects_invalid_labels() {
4344        for bad in [
4345            "",
4346            "-primary",
4347            "primary-",
4348            "Primary",
4349            "0primary",
4350            "primary_role",
4351            "this-is-way-too-long-of-a-role-name-here",
4352        ] {
4353            assert!(
4354                !REPLICA_GROUP_ROLE_RE.is_match(bad),
4355                "regex should reject: {bad}"
4356            );
4357        }
4358    }
4359
4360    #[test]
4361    fn group_affinity_pin_roundtrips_via_serde_yaml() {
4362        // Externally-tagged enum with a single string payload serializes as
4363        // a mapping `pin: <value>` under snake_case naming.
4364        let pinned = GroupAffinity::Pin("id=2".to_string());
4365        let dumped = serde_yaml::to_string(&pinned).expect("serialize pin");
4366        let reparsed: GroupAffinity = serde_yaml::from_str(&dumped).expect("reparse pin");
4367        match reparsed {
4368            GroupAffinity::Pin(s) => assert_eq!(s, "id=2"),
4369            other => panic!("expected Pin, got {other:?}"),
4370        }
4371    }
4372
4373    #[test]
4374    fn unique_role_validator_rejects_duplicates() {
4375        let mk = |role: &str| ReplicaGroup {
4376            role: role.to_string(),
4377            count: 1,
4378            image: None,
4379            env: std::collections::HashMap::new(),
4380            command: None,
4381            resources: None,
4382            affinity: GroupAffinity::Spread,
4383        };
4384        assert!(validate_unique_replica_group_roles(&[mk("a"), mk("b")]).is_ok());
4385        let err = validate_unique_replica_group_roles(&[mk("a"), mk("a")])
4386            .expect_err("duplicate should fail");
4387        assert_eq!(err, "a");
4388    }
4389
4390    #[test]
4391    fn endpoint_target_role_yaml_roundtrip() {
4392        let yaml = "name: read\nprotocol: tcp\nport: 5433\ntarget_role: read\n";
4393        let ep: EndpointSpec = serde_yaml::from_str(yaml).unwrap();
4394        assert_eq!(ep.target_role, Some("read".to_string()));
4395    }
4396
4397    #[test]
4398    fn endpoint_without_target_role_is_none() {
4399        let yaml = "name: any\nprotocol: tcp\nport: 5432\n";
4400        let ep: EndpointSpec = serde_yaml::from_str(yaml).unwrap();
4401        assert_eq!(ep.target_role, None);
4402    }
4403
4404    // ==========================================================================
4405    // LocalhostReachability / single-member publishing tests
4406    // ==========================================================================
4407
4408    fn spec_with_scale(scale: ScaleSpec) -> ServiceSpec {
4409        let mut s = ServiceSpec::minimal("svc", "scratch:latest");
4410        s.scale = scale;
4411        s
4412    }
4413
4414    fn replica_group(role: &str, count: u32) -> ReplicaGroup {
4415        ReplicaGroup {
4416            role: role.to_string(),
4417            count,
4418            image: None,
4419            env: std::collections::HashMap::new(),
4420            command: None,
4421            resources: None,
4422            affinity: GroupAffinity::Spread,
4423        }
4424    }
4425
4426    #[test]
4427    fn is_single_member_across_scale_modes() {
4428        assert!(spec_with_scale(ScaleSpec::Fixed { replicas: 1 }).is_single_member());
4429        assert!(spec_with_scale(ScaleSpec::Fixed { replicas: 0 }).is_single_member());
4430        assert!(!spec_with_scale(ScaleSpec::Fixed { replicas: 3 }).is_single_member());
4431
4432        let adaptive = |min, max| ScaleSpec::Adaptive {
4433            min,
4434            max,
4435            cooldown: None,
4436            targets: ScaleTargets::default(),
4437        };
4438        assert!(spec_with_scale(adaptive(1, 1)).is_single_member());
4439        assert!(!spec_with_scale(adaptive(1, 5)).is_single_member());
4440
4441        assert!(spec_with_scale(ScaleSpec::Manual).is_single_member());
4442    }
4443
4444    #[test]
4445    fn is_single_member_with_replica_groups() {
4446        // One group, total 1 -> single member.
4447        let mut s = ServiceSpec::minimal("svc", "scratch:latest");
4448        s.replica_groups = Some(vec![replica_group("only", 1)]);
4449        assert!(s.is_single_member());
4450
4451        // One group, total 2 -> multi member.
4452        s.replica_groups = Some(vec![replica_group("only", 2)]);
4453        assert!(!s.is_single_member());
4454
4455        // Two groups, total 2 -> multi member.
4456        s.replica_groups = Some(vec![replica_group("a", 1), replica_group("b", 1)]);
4457        assert!(!s.is_single_member());
4458
4459        // replica_groups takes precedence over scale.
4460        s.scale = ScaleSpec::Fixed { replicas: 1 };
4461        s.replica_groups = Some(vec![replica_group("a", 1), replica_group("b", 1)]);
4462        assert!(!s.is_single_member());
4463    }
4464
4465    #[test]
4466    fn publish_to_node_loopback_override_matrix() {
4467        // Single-member base spec.
4468        let single = spec_with_scale(ScaleSpec::Fixed { replicas: 1 });
4469        // Multi-member base spec.
4470        let multi = spec_with_scale(ScaleSpec::Fixed { replicas: 3 });
4471
4472        // Auto: follows single-member-ness.
4473        let mut s = single.clone();
4474        s.localhost_reachability = LocalhostReachability::Auto;
4475        assert!(s.publish_to_node_loopback());
4476        let mut m = multi.clone();
4477        m.localhost_reachability = LocalhostReachability::Auto;
4478        assert!(!m.publish_to_node_loopback());
4479
4480        // Always: publishes regardless of member count.
4481        let mut s = single.clone();
4482        s.localhost_reachability = LocalhostReachability::Always;
4483        assert!(s.publish_to_node_loopback());
4484        let mut m = multi.clone();
4485        m.localhost_reachability = LocalhostReachability::Always;
4486        assert!(m.publish_to_node_loopback());
4487
4488        // Never: never publishes regardless of member count.
4489        let mut s = single;
4490        s.localhost_reachability = LocalhostReachability::Never;
4491        assert!(!s.publish_to_node_loopback());
4492        let mut m = multi;
4493        m.localhost_reachability = LocalhostReachability::Never;
4494        assert!(!m.publish_to_node_loopback());
4495    }
4496
4497    #[test]
4498    fn localhost_reachability_default_is_auto() {
4499        assert_eq!(
4500            LocalhostReachability::default(),
4501            LocalhostReachability::Auto
4502        );
4503        assert!(LocalhostReachability::Auto.is_default());
4504        assert!(!LocalhostReachability::Always.is_default());
4505        assert!(!LocalhostReachability::Never.is_default());
4506        // A minimal spec defaults to Auto reachability, but the default scale
4507        // is Adaptive { max: 10 } (multi-member), so Auto does NOT publish.
4508        let minimal = ServiceSpec::minimal("svc", "scratch:latest");
4509        assert_eq!(minimal.localhost_reachability, LocalhostReachability::Auto);
4510        assert!(!minimal.is_single_member());
4511        assert!(!minimal.publish_to_node_loopback());
4512    }
4513}