Skip to main content

zlayer_types/spec/
types.rs

1//! `ZLayer` V1 Service Specification Types
2//!
3//! This module defines all types for parsing and validating `ZLayer` deployment specs.
4
5mod duration {
6    use humantime::format_duration;
7    use serde::{Deserialize, Deserializer, Serializer};
8    use std::time::Duration;
9
10    #[allow(clippy::ref_option)]
11    pub fn serialize<S>(duration: &Option<Duration>, serializer: S) -> Result<S::Ok, S::Error>
12    where
13        S: Serializer,
14    {
15        match duration {
16            Some(d) => serializer.serialize_str(&format_duration(*d).to_string()),
17            None => serializer.serialize_none(),
18        }
19    }
20
21    pub fn deserialize<'de, D>(deserializer: D) -> Result<Option<Duration>, D::Error>
22    where
23        D: Deserializer<'de>,
24    {
25        use serde::de::Error;
26        let s: Option<String> = Option::deserialize(deserializer)?;
27        match s {
28            Some(s) => humantime::parse_duration(&s)
29                .map(Some)
30                .map_err(|e| D::Error::custom(format!("invalid duration: {e}"))),
31            None => Ok(None),
32        }
33    }
34
35    pub mod option {
36        pub use super::*;
37    }
38
39    /// Serde module for required (non-Option) Duration fields
40    pub mod required {
41        use humantime::format_duration;
42        use serde::{Deserialize, Deserializer, Serializer};
43        use std::time::Duration;
44
45        pub fn serialize<S>(duration: &Duration, serializer: S) -> Result<S::Ok, S::Error>
46        where
47            S: Serializer,
48        {
49            serializer.serialize_str(&format_duration(*duration).to_string())
50        }
51
52        pub fn deserialize<'de, D>(deserializer: D) -> Result<Duration, D::Error>
53        where
54            D: Deserializer<'de>,
55        {
56            use serde::de::Error;
57            let s: String = String::deserialize(deserializer)?;
58            humantime::parse_duration(&s)
59                .map_err(|e| D::Error::custom(format!("invalid duration: {e}")))
60        }
61    }
62}
63
64use crate::secrets::SecretScope;
65use serde::{Deserialize, Serialize};
66use std::collections::HashMap;
67use validator::Validate;
68
69/// How service replicas are allocated to nodes
70#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
71#[serde(rename_all = "snake_case")]
72pub enum NodeMode {
73    /// Containers placed on any node with capacity (default, bin-packing)
74    #[default]
75    Shared,
76    /// Each replica gets its own dedicated node (1:1 mapping)
77    Dedicated,
78    /// Service is the ONLY thing on its nodes (no other services)
79    Exclusive,
80}
81
82/// Service type - determines runtime behavior and scaling model
83#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
84#[serde(rename_all = "snake_case")]
85pub enum ServiceType {
86    /// Standard long-running container service
87    #[default]
88    Standard,
89    /// WASM-based HTTP service (wasi:http/incoming-handler)
90    WasmHttp,
91    /// WASM-based general plugin (zlayer:plugin handler - full host access)
92    WasmPlugin,
93    /// WASM-based stateless request/response transformer
94    WasmTransformer,
95    /// WASM-based authenticator plugin (secrets + KV + HTTP)
96    WasmAuthenticator,
97    /// WASM-based rate limiter (KV + metrics)
98    WasmRateLimiter,
99    /// WASM-based request/response middleware
100    WasmMiddleware,
101    /// WASM-based custom router
102    WasmRouter,
103    /// Run-to-completion job
104    Job,
105}
106
107/// Storage performance tier
108#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
109#[serde(rename_all = "snake_case")]
110pub enum StorageTier {
111    /// Direct local filesystem (SSD/NVMe) - SQLite-safe, fast fsync
112    #[default]
113    Local,
114    /// bcache-backed tiered storage (SSD cache + slower backend)
115    Cached,
116    /// NFS/network storage - NOT SQLite-safe (will warn)
117    Network,
118}
119
120/// Node selection constraints for service placement
121#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize, utoipa::ToSchema)]
122#[serde(deny_unknown_fields)]
123pub struct NodeSelector {
124    /// Required labels that nodes must have (all must match)
125    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
126    pub labels: HashMap<String, String>,
127    /// Preferred labels (soft constraint, nodes with these are preferred)
128    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
129    pub prefer_labels: HashMap<String, String>,
130}
131
132/// Affinity hint for a single replica group's placement.
133///
134/// Three behaviors:
135/// - `Spread`: try to put each replica on a different node (default).
136/// - `Pack`: bin-pack onto the fewest nodes that can fit.
137/// - `Pin`: pin all replicas to a single node, identified either by
138///   node id (`"id=2"`) or label match (`"role=database"`).
139#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
140#[serde(rename_all = "snake_case", deny_unknown_fields)]
141pub enum GroupAffinity {
142    /// Default: spread across distinct nodes.
143    #[default]
144    Spread,
145    /// Pack onto fewest nodes.
146    Pack,
147    /// Pin to a specific node selector.
148    ///
149    /// Examples:
150    /// - `Pin("id=2")` — exact node id match
151    /// - `Pin("zone=us-east-1a")` — label match
152    Pin(String),
153}
154
155/// Regex for [`ReplicaGroup::role`] validation. A valid DNS label: starts with
156/// a lowercase letter, then any mix of lowercase letters, digits, or
157/// internal hyphens, ending with a letter or digit. 1-30 chars total.
158static REPLICA_GROUP_ROLE_RE: std::sync::LazyLock<regex::Regex> = std::sync::LazyLock::new(|| {
159    regex::Regex::new(r"^[a-z]([a-z0-9-]{0,28}[a-z0-9])?$").expect("valid regex literal")
160});
161
162/// One named replica group within a service.
163///
164/// When `ServiceSpec.replica_groups` is set, the service is composed of one
165/// or more groups, each with its own count, optional overrides, and
166/// affinity hint. Containers in each group get DNS names of the form
167/// `<role>.<service>.<deployment>.zlayer.internal` and proxy backends
168/// can target a single role via `EndpointSpec.target_role`.
169///
170/// Backward compat: services without `replica_groups` are treated as a
171/// single implicit group `{role: "default", count: <scale.replicas>}`.
172#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Validate)]
173#[serde(deny_unknown_fields)]
174pub struct ReplicaGroup {
175    /// Group identifier. Becomes part of container IDs and DNS names.
176    /// Must be a valid DNS label: lowercase letters, digits, and hyphens;
177    /// must not start or end with a hyphen; ≤ 30 chars.
178    #[validate(length(min = 1, max = 30))]
179    #[validate(regex(path = *REPLICA_GROUP_ROLE_RE))]
180    pub role: String,
181
182    /// Number of replicas in this group.
183    #[validate(range(min = 1))]
184    pub count: u32,
185
186    /// Image override (inherits `ServiceSpec.image` when None).
187    #[serde(default, skip_serializing_if = "Option::is_none")]
188    pub image: Option<ImageSpec>,
189
190    /// Environment variables MERGED on top of `ServiceSpec.env`. Entries
191    /// in this map win on conflict (group overrides service default).
192    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
193    pub env: HashMap<String, String>,
194
195    /// Command override (inherits `ServiceSpec.command` when None).
196    #[serde(default, skip_serializing_if = "Option::is_none")]
197    pub command: Option<CommandSpec>,
198
199    /// Resources override (inherits `ServiceSpec.resources` when None).
200    #[serde(default, skip_serializing_if = "Option::is_none")]
201    pub resources: Option<ResourcesSpec>,
202
203    /// Affinity hint for placement of this group's replicas.
204    #[serde(default)]
205    pub affinity: GroupAffinity,
206}
207
208/// Validate that no two [`ReplicaGroup`]s share the same `role` within a
209/// single [`ServiceSpec`].
210///
211/// Called from the deploy handler before storing the spec; not wired into
212/// the `Validate` derive on `ServiceSpec` because validator 0.19's `custom`
213/// only sees the field type (`Option<Vec<ReplicaGroup>>`) and not the
214/// surrounding struct.
215///
216/// # Errors
217/// Returns the duplicated role name on first collision.
218pub fn validate_unique_replica_group_roles(groups: &[ReplicaGroup]) -> Result<(), String> {
219    let mut seen = std::collections::HashSet::new();
220    for g in groups {
221        if !seen.insert(g.role.as_str()) {
222            return Err(g.role.clone());
223        }
224    }
225    Ok(())
226}
227
228/// Operating system a service needs to run on.
229///
230/// Mirrors the OS half of an OCI platform descriptor. Canonical wire strings
231/// match Go's `GOOS` values (e.g. `"linux"`, `"windows"`, `"darwin"`).
232#[derive(
233    Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize, utoipa::ToSchema,
234)]
235#[serde(rename_all = "lowercase")]
236pub enum OsKind {
237    Linux,
238    Windows,
239    Macos,
240}
241
242impl OsKind {
243    /// Canonical OCI-style string (`"linux"` / `"windows"` / `"darwin"`).
244    /// This is the same convention `Runtime.platform_resolver` uses.
245    #[must_use]
246    pub const fn as_oci_str(self) -> &'static str {
247        match self {
248            OsKind::Linux => "linux",
249            OsKind::Windows => "windows",
250            OsKind::Macos => "darwin",
251        }
252    }
253
254    /// Detect from `std::env::consts::OS`. Unknown values return `None`.
255    #[must_use]
256    pub fn from_rust_os(s: &str) -> Option<Self> {
257        match s {
258            "linux" => Some(Self::Linux),
259            "windows" => Some(Self::Windows),
260            "macos" => Some(Self::Macos),
261            _ => None,
262        }
263    }
264
265    /// Parse the OCI-canonical OS string as written in an image manifest's
266    /// `config.os` field (lowercase: `"linux"` / `"windows"` / `"darwin"`).
267    /// Unknown or empty values return `None`.
268    ///
269    /// This is the inverse of [`Self::as_oci_str`] and is used by the
270    /// registry's manifest-OS inspection (see `fetch_image_os`).
271    #[must_use]
272    pub fn from_oci_str(s: &str) -> Option<Self> {
273        match s {
274            "linux" => Some(Self::Linux),
275            "windows" => Some(Self::Windows),
276            "darwin" => Some(Self::Macos),
277            _ => None,
278        }
279    }
280}
281
282/// CPU architecture a service needs. Mirrors the arch half of an OCI platform.
283#[derive(
284    Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize, utoipa::ToSchema,
285)]
286#[serde(rename_all = "lowercase")]
287pub enum ArchKind {
288    Amd64,
289    Arm64,
290}
291
292impl ArchKind {
293    /// Canonical OCI-style string (`"amd64"` / `"arm64"`).
294    #[must_use]
295    pub const fn as_oci_str(self) -> &'static str {
296        match self {
297            ArchKind::Amd64 => "amd64",
298            ArchKind::Arm64 => "arm64",
299        }
300    }
301
302    /// Detect from `std::env::consts::ARCH`. Unknown values return `None`.
303    #[must_use]
304    pub fn from_rust_arch(s: &str) -> Option<Self> {
305        match s {
306            "x86_64" => Some(Self::Amd64),
307            "aarch64" => Some(Self::Arm64),
308            _ => None,
309        }
310    }
311}
312
313/// Platform a service targets. `None` on `ServiceSpec.platform` means
314/// "any agent is acceptable" (preserves backward compatibility).
315//
316// NOTE: no `Copy`. `os_version: Option<String>` rules it out. `OsKind` / `ArchKind`
317// are still `Copy`, so field-level borrows stay ergonomic.
318#[derive(
319    Debug, Clone, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize, utoipa::ToSchema,
320)]
321pub struct TargetPlatform {
322    pub os: OsKind,
323    pub arch: ArchKind,
324    /// Optional OS version constraint — primarily for Windows multi-platform
325    /// images, where `platform.os.version` in the OCI index distinguishes build
326    /// families (e.g. `10.0.26100.*` for Server 2025 / Win11 24H2,
327    /// `10.0.20348.*` for Server 2022). When set on a Windows target the
328    /// registry platform resolver prefers manifest entries whose `os.version`
329    /// matches this value exactly or shares a `major.minor.build` prefix.
330    /// Unused on Linux/macOS platforms.
331    #[serde(default, rename = "osVersion", skip_serializing_if = "Option::is_none")]
332    pub os_version: Option<String>,
333}
334
335impl TargetPlatform {
336    #[must_use]
337    pub const fn new(os: OsKind, arch: ArchKind) -> Self {
338        Self {
339            os,
340            arch,
341            os_version: None,
342        }
343    }
344
345    /// Constrain the platform to a specific `os.version` string.
346    ///
347    /// Applies to Windows targets: the registry resolver matches manifest
348    /// entries whose `platform.os.version` equals this value or starts with it
349    /// (treated as a `major.minor.build` prefix). Has no effect on Linux/macOS.
350    #[must_use]
351    pub fn with_os_version(mut self, v: impl Into<String>) -> Self {
352        self.os_version = Some(v.into());
353        self
354    }
355
356    /// Canonical OCI-style string (`"linux/amd64"`, `"windows/arm64"`).
357    ///
358    /// Does NOT include `os_version` — use [`Self::as_detailed_str`] when the
359    /// version matters (e.g. for error/log messages that need to distinguish
360    /// between Windows build families).
361    #[must_use]
362    pub fn as_oci_str(self) -> String {
363        format!("{}/{}", self.os.as_oci_str(), self.arch.as_oci_str())
364    }
365
366    /// Like [`Self::as_oci_str`] but appends ` (os.version=…)` when an
367    /// `os_version` constraint is set. Intended for diagnostics, not for
368    /// matching against manifest entries.
369    #[must_use]
370    pub fn as_detailed_str(&self) -> String {
371        match &self.os_version {
372            Some(v) => format!(
373                "{}/{} (os.version={v})",
374                self.os.as_oci_str(),
375                self.arch.as_oci_str()
376            ),
377            None => format!("{}/{}", self.os.as_oci_str(), self.arch.as_oci_str()),
378        }
379    }
380}
381
382impl std::fmt::Display for TargetPlatform {
383    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
384        write!(f, "{}/{}", self.os.as_oci_str(), self.arch.as_oci_str())
385    }
386}
387
388/// Explicit capability declarations for WASM modules.
389/// Controls which host interfaces are linked and available to the component.
390#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
391#[serde(deny_unknown_fields)]
392#[allow(clippy::struct_excessive_bools)]
393pub struct WasmCapabilities {
394    /// Config interface access (zlayer:plugin/config)
395    #[serde(default = "default_true")]
396    pub config: bool,
397    /// Key-value storage access (zlayer:plugin/keyvalue)
398    #[serde(default = "default_true")]
399    pub keyvalue: bool,
400    /// Logging access (zlayer:plugin/logging)
401    #[serde(default = "default_true")]
402    pub logging: bool,
403    /// Secrets access (zlayer:plugin/secrets)
404    #[serde(default)]
405    pub secrets: bool,
406    /// Metrics emission (zlayer:plugin/metrics)
407    #[serde(default = "default_true")]
408    pub metrics: bool,
409    /// HTTP client for outgoing requests (wasi:http/outgoing-handler)
410    #[serde(default)]
411    pub http_client: bool,
412    /// WASI CLI access (args, env, stdio)
413    #[serde(default)]
414    pub cli: bool,
415    /// WASI filesystem access
416    #[serde(default)]
417    pub filesystem: bool,
418    /// WASI sockets access (TCP/UDP)
419    #[serde(default)]
420    pub sockets: bool,
421}
422
423impl Default for WasmCapabilities {
424    fn default() -> Self {
425        Self {
426            config: true,
427            keyvalue: true,
428            logging: true,
429            secrets: false,
430            metrics: true,
431            http_client: false,
432            cli: false,
433            filesystem: false,
434            sockets: false,
435        }
436    }
437}
438
439/// Pre-opened directory for WASM filesystem access
440#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
441#[serde(deny_unknown_fields)]
442pub struct WasmPreopen {
443    /// Host path to mount
444    pub source: String,
445    /// Guest path (visible to WASM module)
446    pub target: String,
447    /// Read-only access (default: false)
448    #[serde(default)]
449    pub readonly: bool,
450}
451
452/// Comprehensive configuration for all WASM service types.
453///
454/// Replaces the previous `WasmHttpConfig` with resource limits, capability
455/// declarations, networking controls, and storage configuration.
456#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
457#[serde(deny_unknown_fields)]
458#[allow(clippy::struct_excessive_bools)]
459pub struct WasmConfig {
460    // --- Instance Management ---
461    /// Minimum number of warm instances to keep ready
462    #[serde(default = "default_min_instances")]
463    pub min_instances: u32,
464    /// Maximum number of instances to scale to
465    #[serde(default = "default_max_instances")]
466    pub max_instances: u32,
467    /// Time before idle instances are terminated
468    #[serde(default = "default_idle_timeout", with = "duration::required")]
469    pub idle_timeout: std::time::Duration,
470    /// Maximum time for a single request
471    #[serde(default = "default_request_timeout", with = "duration::required")]
472    pub request_timeout: std::time::Duration,
473
474    // --- Resource Limits ---
475    /// Maximum linear memory (e.g., "64Mi", "256Mi")
476    #[serde(default, skip_serializing_if = "Option::is_none")]
477    pub max_memory: Option<String>,
478    /// Maximum fuel (instruction count limit, 0 = unlimited)
479    #[serde(default)]
480    pub max_fuel: u64,
481    /// Epoch interval for cooperative preemption
482    #[serde(
483        default,
484        skip_serializing_if = "Option::is_none",
485        with = "duration::option"
486    )]
487    pub epoch_interval: Option<std::time::Duration>,
488
489    // --- Capabilities ---
490    /// Explicit capability grants (overrides world defaults when restricting)
491    #[serde(default, skip_serializing_if = "Option::is_none")]
492    pub capabilities: Option<WasmCapabilities>,
493
494    // --- Networking ---
495    /// Allow outgoing HTTP requests (default: true)
496    #[serde(default = "default_true")]
497    pub allow_http_outgoing: bool,
498    /// Allowed outgoing HTTP hosts (empty = all allowed)
499    #[serde(default, skip_serializing_if = "Vec::is_empty")]
500    pub allowed_hosts: Vec<String>,
501    /// Allow raw TCP sockets (default: false)
502    #[serde(default)]
503    pub allow_tcp: bool,
504    /// Allow raw UDP sockets (default: false)
505    #[serde(default)]
506    pub allow_udp: bool,
507
508    // --- Storage ---
509    /// Pre-opened directories (host path -> guest path)
510    #[serde(default, skip_serializing_if = "Vec::is_empty")]
511    pub preopens: Vec<WasmPreopen>,
512    /// Enable KV store access (default: true)
513    #[serde(default = "default_true")]
514    pub kv_enabled: bool,
515    /// KV store namespace (default: service name)
516    #[serde(default, skip_serializing_if = "Option::is_none")]
517    pub kv_namespace: Option<String>,
518    /// KV store max value size in bytes (default: 1MB)
519    #[serde(default = "default_kv_max_value_size")]
520    pub kv_max_value_size: u64,
521
522    // --- Secrets ---
523    /// Secret names accessible to this WASM module
524    #[serde(default, skip_serializing_if = "Vec::is_empty")]
525    pub secrets: Vec<String>,
526
527    // --- Performance ---
528    /// Pre-compile on deploy to reduce cold start (default: true)
529    #[serde(default = "default_true")]
530    pub precompile: bool,
531}
532
533fn default_kv_max_value_size() -> u64 {
534    1_048_576 // 1MB
535}
536
537impl Default for WasmConfig {
538    fn default() -> Self {
539        Self {
540            min_instances: default_min_instances(),
541            max_instances: default_max_instances(),
542            idle_timeout: default_idle_timeout(),
543            request_timeout: default_request_timeout(),
544            max_memory: None,
545            max_fuel: 0,
546            epoch_interval: None,
547            capabilities: None,
548            allow_http_outgoing: true,
549            allowed_hosts: Vec::new(),
550            allow_tcp: false,
551            allow_udp: false,
552            preopens: Vec::new(),
553            kv_enabled: true,
554            kv_namespace: None,
555            kv_max_value_size: default_kv_max_value_size(),
556            secrets: Vec::new(),
557            precompile: true,
558        }
559    }
560}
561
562/// Configuration for WASM HTTP services with instance pooling
563#[deprecated(note = "Use WasmConfig instead")]
564#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
565#[serde(deny_unknown_fields)]
566pub struct WasmHttpConfig {
567    /// Minimum number of warm instances to keep ready
568    #[serde(default = "default_min_instances")]
569    pub min_instances: u32,
570    /// Maximum number of instances to scale to
571    #[serde(default = "default_max_instances")]
572    pub max_instances: u32,
573    /// Time before idle instances are terminated
574    #[serde(default = "default_idle_timeout", with = "duration::required")]
575    pub idle_timeout: std::time::Duration,
576    /// Maximum time for a single request
577    #[serde(default = "default_request_timeout", with = "duration::required")]
578    pub request_timeout: std::time::Duration,
579}
580
581fn default_min_instances() -> u32 {
582    0
583}
584
585fn default_max_instances() -> u32 {
586    10
587}
588
589fn default_idle_timeout() -> std::time::Duration {
590    std::time::Duration::from_secs(300)
591}
592
593fn default_request_timeout() -> std::time::Duration {
594    std::time::Duration::from_secs(30)
595}
596
597#[allow(deprecated)]
598impl Default for WasmHttpConfig {
599    fn default() -> Self {
600        Self {
601            min_instances: default_min_instances(),
602            max_instances: default_max_instances(),
603            idle_timeout: default_idle_timeout(),
604            request_timeout: default_request_timeout(),
605        }
606    }
607}
608
609#[allow(deprecated)]
610impl From<WasmHttpConfig> for WasmConfig {
611    fn from(old: WasmHttpConfig) -> Self {
612        Self {
613            min_instances: old.min_instances,
614            max_instances: old.max_instances,
615            idle_timeout: old.idle_timeout,
616            request_timeout: old.request_timeout,
617            ..Default::default()
618        }
619    }
620}
621
622impl ServiceType {
623    /// Returns true if this is any WASM service type
624    #[must_use]
625    pub fn is_wasm(&self) -> bool {
626        matches!(
627            self,
628            ServiceType::WasmHttp
629                | ServiceType::WasmPlugin
630                | ServiceType::WasmTransformer
631                | ServiceType::WasmAuthenticator
632                | ServiceType::WasmRateLimiter
633                | ServiceType::WasmMiddleware
634                | ServiceType::WasmRouter
635        )
636    }
637
638    /// Returns the default capabilities for this WASM service type.
639    /// Returns None for non-WASM types.
640    #[must_use]
641    pub fn default_wasm_capabilities(&self) -> Option<WasmCapabilities> {
642        match self {
643            ServiceType::WasmHttp | ServiceType::WasmRouter => Some(WasmCapabilities {
644                config: true,
645                keyvalue: true,
646                logging: true,
647                secrets: false,
648                metrics: false,
649                http_client: true,
650                cli: false,
651                filesystem: false,
652                sockets: false,
653            }),
654            ServiceType::WasmPlugin => Some(WasmCapabilities {
655                config: true,
656                keyvalue: true,
657                logging: true,
658                secrets: true,
659                metrics: true,
660                http_client: true,
661                cli: true,
662                filesystem: true,
663                sockets: false,
664            }),
665            ServiceType::WasmTransformer => Some(WasmCapabilities {
666                config: false,
667                keyvalue: false,
668                logging: true,
669                secrets: false,
670                metrics: false,
671                http_client: false,
672                cli: true,
673                filesystem: false,
674                sockets: false,
675            }),
676            ServiceType::WasmAuthenticator => Some(WasmCapabilities {
677                config: true,
678                keyvalue: false,
679                logging: true,
680                secrets: true,
681                metrics: false,
682                http_client: true,
683                cli: false,
684                filesystem: false,
685                sockets: false,
686            }),
687            ServiceType::WasmRateLimiter => Some(WasmCapabilities {
688                config: true,
689                keyvalue: true,
690                logging: true,
691                secrets: false,
692                metrics: true,
693                http_client: false,
694                cli: true,
695                filesystem: false,
696                sockets: false,
697            }),
698            ServiceType::WasmMiddleware => Some(WasmCapabilities {
699                config: true,
700                keyvalue: false,
701                logging: true,
702                secrets: false,
703                metrics: false,
704                http_client: true,
705                cli: false,
706                filesystem: false,
707                sockets: false,
708            }),
709            _ => None,
710        }
711    }
712}
713
714fn default_api_bind() -> String {
715    "0.0.0.0:3669".to_string()
716}
717
718/// API server configuration (embedded in deploy/up flows)
719#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
720pub struct ApiSpec {
721    /// Enable the API server (default: true)
722    #[serde(default = "default_true")]
723    pub enabled: bool,
724    /// Bind address (default: "0.0.0.0:3669")
725    #[serde(default = "default_api_bind")]
726    pub bind: String,
727    /// JWT secret (reads `ZLAYER_JWT_SECRET` env var if not set)
728    #[serde(default)]
729    pub jwt_secret: Option<String>,
730    /// Enable Swagger UI (default: true)
731    #[serde(default = "default_true")]
732    pub swagger: bool,
733}
734
735impl Default for ApiSpec {
736    fn default() -> Self {
737        Self {
738            enabled: true,
739            bind: default_api_bind(),
740            jwt_secret: None,
741            swagger: true,
742        }
743    }
744}
745
746/// Top-level deployment specification
747#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Validate)]
748#[serde(deny_unknown_fields)]
749pub struct DeploymentSpec {
750    /// Spec version (must be "v1")
751    #[validate(custom(function = "crate::spec::validate::validate_version_wrapper"))]
752    pub version: String,
753
754    /// Deployment name (used for overlays, DNS)
755    #[validate(custom(function = "crate::spec::validate::validate_deployment_name_wrapper"))]
756    pub deployment: String,
757
758    /// Service definitions
759    #[serde(default)]
760    #[validate(nested)]
761    pub services: HashMap<String, ServiceSpec>,
762
763    /// External service definitions (proxy backends without containers)
764    ///
765    /// External services register static backend addresses with the proxy
766    /// for host/path-based routing without starting any containers.
767    /// Useful for proxying to services running outside of `ZLayer`
768    /// (e.g., on other machines reachable via VPN).
769    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
770    #[validate(nested)]
771    pub externals: HashMap<String, ExternalSpec>,
772
773    /// Top-level tunnel definitions (not tied to service endpoints)
774    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
775    pub tunnels: HashMap<String, TunnelDefinition>,
776
777    /// API server configuration (enabled by default)
778    #[serde(default)]
779    pub api: ApiSpec,
780
781    /// Environment name whose secrets `$S:` references resolve against.
782    /// Resolved to a concrete scope and stamped onto each service at deploy time.
783    #[serde(default, skip_serializing_if = "Option::is_none")]
784    pub environment: Option<String>,
785
786    /// Project name that scopes `environment` (matches how secrets were imported,
787    /// e.g. `--env dev --project zatabase`). `None` => global environment.
788    #[serde(default, skip_serializing_if = "Option::is_none")]
789    pub project: Option<String>,
790}
791
792/// External service specification (proxy backend without a container)
793///
794/// Defines a service that is not managed by `ZLayer` but should be proxied
795/// through `ZLayer`'s reverse proxy. The proxy registers static backend
796/// addresses and routes traffic based on endpoint host/path matching.
797#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Validate)]
798#[serde(deny_unknown_fields)]
799pub struct ExternalSpec {
800    /// Static backend addresses (e.g., `["100.64.1.5:8096", "192.168.1.10:8096"]`)
801    ///
802    /// These are the upstream addresses the proxy will forward traffic to.
803    /// At least one backend is required.
804    #[validate(length(min = 1, message = "at least one backend address is required"))]
805    pub backends: Vec<String>,
806
807    /// Endpoint definitions (proxy bindings)
808    ///
809    /// Defines how public/internal traffic is routed to this external service.
810    #[serde(default)]
811    #[validate(nested)]
812    pub endpoints: Vec<EndpointSpec>,
813
814    /// Health check configuration
815    ///
816    /// When specified, the proxy will health-check backends and remove
817    /// unhealthy ones from the rotation.
818    #[serde(default, skip_serializing_if = "Option::is_none")]
819    pub health: Option<HealthSpec>,
820}
821
822/// Top-level tunnel definition (not tied to a service endpoint)
823#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
824#[serde(deny_unknown_fields)]
825pub struct TunnelDefinition {
826    /// Source node
827    pub from: String,
828
829    /// Destination node
830    pub to: String,
831
832    /// Local port on source
833    pub local_port: u16,
834
835    /// Remote port on destination
836    pub remote_port: u16,
837
838    /// Protocol (tcp/udp, defaults to tcp)
839    #[serde(default)]
840    pub protocol: TunnelProtocol,
841
842    /// Exposure type (defaults to internal)
843    #[serde(default)]
844    pub expose: ExposeType,
845}
846
847/// Protocol for tunnel connections (tcp or udp only)
848#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
849#[serde(rename_all = "lowercase")]
850pub enum TunnelProtocol {
851    #[default]
852    Tcp,
853    Udp,
854}
855
856/// Log output configuration for services and jobs.
857#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
858pub struct LogsConfig {
859    /// Where to write logs: "disk" (default) or "memory"
860    #[serde(default = "default_logs_destination")]
861    pub destination: String,
862
863    /// Maximum log size in bytes (default: 100MB)
864    #[serde(default = "default_logs_max_size")]
865    pub max_size_bytes: u64,
866
867    /// Log retention in seconds (default: 7 days)
868    #[serde(default = "default_logs_retention")]
869    pub retention_secs: u64,
870}
871
872fn default_logs_destination() -> String {
873    "disk".to_string()
874}
875
876fn default_logs_max_size() -> u64 {
877    100 * 1024 * 1024 // 100MB
878}
879
880fn default_logs_retention() -> u64 {
881    7 * 24 * 60 * 60 // 7 days
882}
883
884impl Default for LogsConfig {
885    fn default() -> Self {
886        Self {
887            destination: default_logs_destination(),
888            max_size_bytes: default_logs_max_size(),
889            retention_secs: default_logs_retention(),
890        }
891    }
892}
893
894/// Network mode for a service container.
895///
896/// Mirrors Docker's `HostConfig.NetworkMode` semantics. Accepts both an
897/// enum-tagged form (e.g. `network_mode: { bridge: { name: my-net } }`) and a
898/// string form (e.g. `"host"`, `"bridge:my-net"`, `"container:abc123"`).
899#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default, utoipa::ToSchema)]
900#[serde(rename_all = "lowercase")]
901pub enum NetworkMode {
902    /// Default networking (overlay / bridge as configured by the platform).
903    #[default]
904    Default,
905    /// Share the host network namespace (Docker `--network host`).
906    Host,
907    /// Disable networking entirely (Docker `--network none`).
908    None,
909    /// Attach to a Docker bridge network. When `name` is `None`, uses the
910    /// default `bridge` network.
911    Bridge {
912        #[serde(default)]
913        name: Option<String>,
914    },
915    /// Attach to another container's network namespace
916    /// (Docker `--network container:<id>`).
917    Container { id: String },
918}
919
920/// String-or-enum deserializer for [`NetworkMode`].
921///
922/// Accepts the same strings Docker accepts on `HostConfig.NetworkMode`:
923/// `"default"`, `"host"`, `"none"`, `"bridge"`, `"bridge:<name>"`, and
924/// `"container:<id>"`. Also accepts the enum-tagged YAML/JSON form produced by
925/// the derived [`Serialize`] impl (e.g. `bridge: { name: my-net }`).
926#[allow(clippy::too_many_lines)]
927fn deserialize_network_mode<'de, D>(deserializer: D) -> Result<NetworkMode, D::Error>
928where
929    D: serde::Deserializer<'de>,
930{
931    use serde::de::Error;
932
933    // Parse a Docker-style string form: "default" | "host" | "none" | "bridge"
934    // | "bridge:<name>" | "container:<id>".
935    fn from_str<E: Error>(s: &str) -> Result<NetworkMode, E> {
936        match s {
937            "default" => Ok(NetworkMode::Default),
938            "host" => Ok(NetworkMode::Host),
939            "none" => Ok(NetworkMode::None),
940            "bridge" => Ok(NetworkMode::Bridge { name: None }),
941            _ => {
942                if let Some(rest) = s.strip_prefix("bridge:") {
943                    if rest.is_empty() {
944                        Ok(NetworkMode::Bridge { name: None })
945                    } else {
946                        Ok(NetworkMode::Bridge {
947                            name: Some(rest.to_string()),
948                        })
949                    }
950                } else if let Some(rest) = s.strip_prefix("container:") {
951                    if rest.is_empty() {
952                        Err(E::custom(
953                            "network mode \"container:<id>\" requires a non-empty id",
954                        ))
955                    } else {
956                        Ok(NetworkMode::Container {
957                            id: rest.to_string(),
958                        })
959                    }
960                } else {
961                    Err(E::custom(format!("unknown network mode: {s}")))
962                }
963            }
964        }
965    }
966
967    // A `Visitor` driven through `deserialize_any` is the only FORMAT-AGNOSTIC
968    // way to accept both forms across formats: for the externally-tagged object
969    // form (`{ bridge: { name } }`), serde_json presents it to `deserialize_any`
970    // as a MAP (`visit_map`) while serde_yaml presents it as an ENUM
971    // (`visit_enum`), so BOTH must be handled. The previous `serde_yaml::Value`
972    // capture only worked under serde_yaml (it rejected a serde_json map for an
973    // externally-tagged enum with "invalid type: map, expected a Value::Tagged
974    // enum"), and a `#[serde(untagged)]` helper only worked under serde_json
975    // (serde_yaml errors with "untagged ... enums do not support enum input").
976    // The deployment store round-trips records through serde_json, so it must work
977    // there; YAML specs must keep working too — hence the hand-written visitor.
978    #[derive(Deserialize)]
979    struct BridgeFields {
980        #[serde(default)]
981        name: Option<String>,
982    }
983    #[derive(Deserialize)]
984    struct ContainerFields {
985        id: String,
986    }
987
988    struct NetworkModeVisitor;
989
990    impl<'de> serde::de::Visitor<'de> for NetworkModeVisitor {
991        type Value = NetworkMode;
992
993        fn expecting(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
994            f.write_str(
995                "a network mode string (\"host\", \"bridge:<name>\", …) or an \
996                 externally-tagged map ({ bridge: { name } } / { container: { id } })",
997            )
998        }
999
1000        fn visit_str<E: Error>(self, s: &str) -> Result<NetworkMode, E> {
1001            from_str(s)
1002        }
1003
1004        fn visit_string<E: Error>(self, s: String) -> Result<NetworkMode, E> {
1005            from_str(&s)
1006        }
1007
1008        // serde_json drives the externally-tagged object form through here.
1009        fn visit_map<A>(self, mut map: A) -> Result<NetworkMode, A::Error>
1010        where
1011            A: serde::de::MapAccess<'de>,
1012        {
1013            let key: String = map
1014                .next_key()?
1015                .ok_or_else(|| A::Error::custom("empty network mode map"))?;
1016            let mode = match key.as_str() {
1017                "bridge" => {
1018                    let b: BridgeFields = map.next_value()?;
1019                    NetworkMode::Bridge { name: b.name }
1020                }
1021                "container" => {
1022                    let c: ContainerFields = map.next_value()?;
1023                    NetworkMode::Container { id: c.id }
1024                }
1025                // Unit variants never serialize as a map (the derived `Serialize`
1026                // emits a bare string), but accept the lenient form anyway.
1027                "default" => {
1028                    let _: serde::de::IgnoredAny = map.next_value()?;
1029                    NetworkMode::Default
1030                }
1031                "host" => {
1032                    let _: serde::de::IgnoredAny = map.next_value()?;
1033                    NetworkMode::Host
1034                }
1035                "none" => {
1036                    let _: serde::de::IgnoredAny = map.next_value()?;
1037                    NetworkMode::None
1038                }
1039                other => {
1040                    return Err(A::Error::custom(format!(
1041                        "unknown network mode variant: {other}"
1042                    )));
1043                }
1044            };
1045            // Reject a second key (an externally-tagged enum has exactly one).
1046            if map.next_key::<String>()?.is_some() {
1047                return Err(A::Error::custom(
1048                    "network mode map must have exactly one variant key",
1049                ));
1050            }
1051            Ok(mode)
1052        }
1053
1054        // serde_yaml drives the externally-tagged object form through here
1055        // (it models the serialized enum as enum input, not a plain map).
1056        fn visit_enum<A>(self, data: A) -> Result<NetworkMode, A::Error>
1057        where
1058            A: serde::de::EnumAccess<'de>,
1059        {
1060            use serde::de::VariantAccess;
1061            let (tag, va): (String, _) = data.variant()?;
1062            match tag.as_str() {
1063                "default" => {
1064                    va.unit_variant()?;
1065                    Ok(NetworkMode::Default)
1066                }
1067                "host" => {
1068                    va.unit_variant()?;
1069                    Ok(NetworkMode::Host)
1070                }
1071                "none" => {
1072                    va.unit_variant()?;
1073                    Ok(NetworkMode::None)
1074                }
1075                "bridge" => {
1076                    let b: BridgeFields = va.newtype_variant()?;
1077                    Ok(NetworkMode::Bridge { name: b.name })
1078                }
1079                "container" => {
1080                    let c: ContainerFields = va.newtype_variant()?;
1081                    Ok(NetworkMode::Container { id: c.id })
1082                }
1083                other => Err(<A::Error as Error>::custom(format!(
1084                    "unknown network mode variant: {other}"
1085                ))),
1086            }
1087        }
1088    }
1089
1090    deserializer.deserialize_any(NetworkModeVisitor)
1091}
1092
1093/// Container isolation mode (Windows containers only; ignored on Linux/macOS).
1094///
1095/// * `Auto` (default) — runtime picks: Hyper-V on Windows client SKUs, Process on Server with matching build.
1096/// * `Process` — shared host kernel (fast, requires container OS build to match host).
1097/// * `Hyperv` — utility VM (stronger boundary, cross-version compatible).
1098#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
1099#[serde(rename_all = "kebab-case")]
1100pub enum IsolationMode {
1101    #[default]
1102    Auto,
1103    Process,
1104    Hyperv,
1105}
1106
1107/// Selects which execution backend runs a service's containers. `Auto` keeps
1108/// today's behavior (image-marker / platform.os / image-OS auto-detection).
1109/// macOS: `Sandbox` = Seatbelt native-process sandbox; `Vz` = native-macOS
1110/// guest VM; `VzLinux` = Linux-guest VZ VM; `Vm` = libkrun micro-VM. Honored
1111/// by the macOS `CompositeRuntime`; ignored where it doesn't apply.
1112#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
1113#[serde(rename_all = "kebab-case")]
1114pub enum RuntimeIsolation {
1115    #[default]
1116    Auto,
1117    Sandbox,
1118    Vz,
1119    VzLinux,
1120    Vm,
1121}
1122
1123impl RuntimeIsolation {
1124    /// The `com.zlayer.isolation` label value, or `None` for `Auto`.
1125    #[must_use]
1126    pub fn label_value(self) -> Option<&'static str> {
1127        match self {
1128            RuntimeIsolation::Auto => None,
1129            RuntimeIsolation::Sandbox => Some("sandbox"),
1130            RuntimeIsolation::Vz => Some("vz"),
1131            RuntimeIsolation::VzLinux => Some("vz-linux"),
1132            RuntimeIsolation::Vm => Some("vm"),
1133        }
1134    }
1135}
1136
1137/// Per-process resource limit (Docker `--ulimit` style).
1138///
1139/// # Default-fill rules (mirrors Docker `--ulimit`)
1140///
1141/// Docker requires `soft` and lets `hard` default to `soft`. We are liberal in
1142/// what we accept so that an omitted bound never yields the footgun of
1143/// `hard < soft` (which makes `setrlimit` fail with `EINVAL`, or pins the hard
1144/// cap at `0`):
1145///
1146/// * both present  → used as-is.
1147/// * `hard` absent  → `hard = soft` (Docker's documented behavior).
1148/// * `soft` absent  → `soft = hard` (a lone bound applies to both; never
1149///   produces `soft > hard`).
1150/// * both absent    → both `0` (preserves the `Default` shape).
1151///
1152/// The public fields stay `soft: i64, hard: i64`; the fill happens during
1153/// deserialization so consumers always see a fully-populated, consistent pair.
1154#[derive(Debug, Clone, Default, Serialize, PartialEq, Eq, utoipa::ToSchema)]
1155#[serde(deny_unknown_fields)]
1156pub struct UlimitSpec {
1157    /// Soft limit. If omitted in the input, defaults to `hard` (or `0` when
1158    /// both are omitted).
1159    #[serde(default)]
1160    pub soft: i64,
1161    /// Hard limit. If omitted in the input, defaults to `soft` (Docker's
1162    /// `--ulimit` behavior), so an omitted hard cap never falls below the soft
1163    /// limit.
1164    #[serde(default)]
1165    pub hard: i64,
1166}
1167
1168impl<'de> Deserialize<'de> for UlimitSpec {
1169    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
1170    where
1171        D: serde::Deserializer<'de>,
1172    {
1173        /// Shadow of [`UlimitSpec`] with optional bounds so we can tell an
1174        /// omitted field apart from an explicit `0` and apply the Docker-style
1175        /// default-fill rules afterwards.
1176        #[derive(Deserialize)]
1177        #[serde(deny_unknown_fields)]
1178        struct Shadow {
1179            #[serde(default)]
1180            soft: Option<i64>,
1181            #[serde(default)]
1182            hard: Option<i64>,
1183        }
1184
1185        let Shadow { soft, hard } = Shadow::deserialize(deserializer)?;
1186        Ok(match (soft, hard) {
1187            (Some(soft), Some(hard)) => Self { soft, hard },
1188            // `hard` omitted: mirror Docker — hard defaults to soft.
1189            (Some(soft), None) => Self { soft, hard: soft },
1190            // `soft` omitted: a lone bound applies to both (never soft > hard).
1191            (None, Some(hard)) => Self { soft: hard, hard },
1192            (None, None) => Self::default(),
1193        })
1194    }
1195}
1196
1197/// Per-service specification
1198#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Validate)]
1199#[serde(from = "ServiceSpecCompat")]
1200#[allow(clippy::struct_excessive_bools)]
1201pub struct ServiceSpec {
1202    /// Owning deployment name (the `zlayer up` / deploy request's deployment).
1203    ///
1204    /// Identifies which deployment this service instance belongs to so that
1205    /// service discovery (DNS) can be scoped per-deployment: two deployments
1206    /// each with a service named `postgres` must NOT clobber each other's DNS
1207    /// records. Populated by the deploy orchestrator before the service is
1208    /// registered with the `ServiceManager`. `None` for standalone /
1209    /// single-deployment callers (the daemon falls back to its global zone),
1210    /// kept optional with a serde default for wire back-compat.
1211    #[serde(default, skip_serializing_if = "Option::is_none")]
1212    pub deployment: Option<String>,
1213
1214    /// Resolved secret scope for this service, stamped by the deploy handler from
1215    /// the deployment's `environment`/`project`. Carries the env/project ids so the
1216    /// agent can resolve `$S:` refs without re-looking-up the environment. Not
1217    /// authored in YAML; populated server-side. MUST round-trip across restart.
1218    #[serde(default, skip_serializing_if = "Option::is_none")]
1219    pub secret_scope: Option<SecretScope>,
1220
1221    /// Resource type (service, job, cron)
1222    #[serde(default = "default_resource_type")]
1223    pub rtype: ResourceType,
1224
1225    /// Cron schedule expression (only for rtype: cron)
1226    /// Uses 7-field cron syntax: "sec min hour day-of-month month day-of-week year"
1227    /// Examples:
1228    ///   - "0 0 0 * * * *" (daily at midnight)
1229    ///   - "0 */5 * * * * *" (every 5 minutes)
1230    ///   - "0 0 12 * * MON-FRI *" (weekdays at noon)
1231    #[serde(default, skip_serializing_if = "Option::is_none")]
1232    #[validate(custom(function = "crate::spec::validate::validate_schedule_wrapper"))]
1233    pub schedule: Option<String>,
1234
1235    /// Container image specification
1236    #[validate(nested)]
1237    pub image: ImageSpec,
1238
1239    /// Resource limits
1240    #[serde(default)]
1241    #[validate(nested)]
1242    pub resources: ResourcesSpec,
1243
1244    /// Environment variables for the service
1245    ///
1246    /// Values can be:
1247    /// - Plain strings: `"value"`
1248    /// - Host env refs: `$E:VAR_NAME`
1249    /// - Secret refs: `$S:secret-name` or `$S:@service/secret-name`
1250    #[serde(default)]
1251    pub env: HashMap<String, String>,
1252
1253    /// Command override (entrypoint, args, workdir)
1254    #[serde(default)]
1255    pub command: CommandSpec,
1256
1257    /// Network configuration
1258    #[serde(default)]
1259    pub network: ServiceNetworkSpec,
1260
1261    /// Endpoint definitions (proxy bindings)
1262    #[serde(default)]
1263    #[validate(nested)]
1264    pub endpoints: Vec<EndpointSpec>,
1265
1266    /// Scaling configuration
1267    #[serde(default)]
1268    #[validate(custom(function = "crate::spec::validate::validate_scale_spec"))]
1269    pub scale: ScaleSpec,
1270
1271    /// Heterogeneous replica groups within this service.
1272    ///
1273    /// When set, the service is composed of multiple named groups (e.g.
1274    /// `primary` + `read` + `cache`) instead of a flat `scale.replicas`.
1275    /// Each group inherits `ServiceSpec` defaults (image, env, command,
1276    /// resources) and overrides per-group fields.
1277    ///
1278    /// When `None` (default), the service uses `scale` directly with an
1279    /// implicit single group `{role: "default", count: <scale.replicas>}`.
1280    /// This is the backward-compatible path used by all existing
1281    /// specifications.
1282    ///
1283    /// Cross-group role uniqueness is validated separately by
1284    /// [`validate_unique_replica_group_roles`] from the deploy handler.
1285    #[serde(default, skip_serializing_if = "Option::is_none")]
1286    #[validate(nested)]
1287    pub replica_groups: Option<Vec<ReplicaGroup>>,
1288
1289    /// Dependency specifications
1290    #[serde(default)]
1291    pub depends: Vec<DependsSpec>,
1292
1293    /// Health check configuration
1294    #[serde(default = "default_health")]
1295    pub health: HealthSpec,
1296
1297    /// Init actions (pre-start lifecycle steps)
1298    #[serde(default)]
1299    pub init: InitSpec,
1300
1301    /// Error handling policies
1302    #[serde(default)]
1303    pub errors: ErrorsSpec,
1304
1305    /// Container lifecycle policy (e.g., delete-on-exit).
1306    ///
1307    /// Purely declarative on this type; downstream layers (agent / API /
1308    /// scheduler) read this field to decide whether to clean up the
1309    /// container record after termination.
1310    #[serde(default)]
1311    pub lifecycle: LifecycleSpec,
1312
1313    /// Container isolation mode (Windows containers only; ignored on Linux/macOS).
1314    #[serde(default, skip_serializing_if = "Option::is_none")]
1315    pub isolation: Option<IsolationMode>,
1316
1317    /// macOS execution backend selector (see [`RuntimeIsolation`]). `None` =
1318    /// `Auto`. Maps to the `com.zlayer.isolation` label the runtime consults.
1319    #[serde(default, skip_serializing_if = "Option::is_none")]
1320    pub runtime: Option<RuntimeIsolation>,
1321
1322    /// Device passthrough (e.g., /dev/kvm for VMs)
1323    #[serde(default)]
1324    pub devices: Vec<DeviceSpec>,
1325
1326    /// Storage mounts for the container
1327    #[serde(default, skip_serializing_if = "Vec::is_empty")]
1328    pub storage: Vec<StorageSpec>,
1329
1330    /// Host-to-container port mappings (Docker's `-p host:container/proto`).
1331    ///
1332    /// Each entry publishes a container port on the host. When `host_port` is
1333    /// `None` (or zero), the daemon assigns an ephemeral host port.
1334    #[serde(default, skip_serializing_if = "Vec::is_empty")]
1335    pub port_mappings: Vec<PortMapping>,
1336
1337    /// Linux capabilities to add (e.g., `SYS_ADMIN`, `NET_ADMIN`).
1338    ///
1339    /// Also accepts the Docker-compatible alias `cap_add` on input.
1340    #[serde(default, alias = "cap_add", skip_serializing_if = "Vec::is_empty")]
1341    pub capabilities: Vec<String>,
1342
1343    /// Linux capabilities to drop (Docker `--cap-drop`).
1344    #[serde(default, skip_serializing_if = "Vec::is_empty")]
1345    pub cap_drop: Vec<String>,
1346
1347    /// Run container in privileged mode (all capabilities + all devices)
1348    #[serde(default)]
1349    pub privileged: bool,
1350
1351    /// Node allocation mode (shared, dedicated, exclusive)
1352    #[serde(default)]
1353    pub node_mode: NodeMode,
1354
1355    /// Node selection constraints (required/preferred labels)
1356    #[serde(default, skip_serializing_if = "Option::is_none")]
1357    pub node_selector: Option<NodeSelector>,
1358
1359    /// Placement affinity for this service's replicas when the service is NOT
1360    /// composed of `replica_groups` (each group carries its own affinity).
1361    ///
1362    /// `None` (the default) preserves historical shared-mode behavior:
1363    /// bin-pack / concentrate consecutive replicas onto the fewest nodes that
1364    /// fit. Set to `spread` for same-service anti-affinity (replicas land on
1365    /// distinct nodes for higher availability), `pack` to concentrate
1366    /// explicitly, or `pin` to bind all replicas to one node.
1367    ///
1368    /// Note: capacity always wins — a replica that does not fit on a node is
1369    /// placed elsewhere regardless of affinity.
1370    #[serde(default, skip_serializing_if = "Option::is_none")]
1371    pub affinity: Option<GroupAffinity>,
1372
1373    /// Target platform for this service. When `None` (default), the service is
1374    /// eligible to run on any agent regardless of OS/architecture. When `Some`,
1375    /// the scheduler will only place replicas on agents whose platform matches.
1376    #[serde(default, skip_serializing_if = "Option::is_none")]
1377    pub platform: Option<TargetPlatform>,
1378
1379    /// Service type (standard, `wasm_http`, `wasm_plugin`, etc.)
1380    #[serde(default)]
1381    pub service_type: ServiceType,
1382
1383    /// WASM configuration (used when `service_type` is any Wasm* variant)
1384    /// Also accepts the deprecated `wasm_http` key for backward compatibility.
1385    #[serde(default, skip_serializing_if = "Option::is_none", alias = "wasm_http")]
1386    pub wasm: Option<WasmConfig>,
1387
1388    /// Log output configuration. If not set, uses platform defaults.
1389    #[serde(default, skip_serializing_if = "Option::is_none")]
1390    pub logs: Option<LogsConfig>,
1391
1392    /// Use host networking (container shares host network namespace)
1393    ///
1394    /// When true, the container will NOT get its own network namespace.
1395    /// This is set programmatically via the `--host-network` CLI flag, not in YAML specs.
1396    #[serde(skip)]
1397    pub host_network: bool,
1398
1399    /// Container hostname (maps to Docker's `--hostname`).
1400    ///
1401    /// When set, the container's `/etc/hostname` and initial kernel hostname
1402    /// are configured to this value. Ignored when `host_network` is true
1403    /// (the container inherits the host's hostname).
1404    #[serde(default, skip_serializing_if = "Option::is_none")]
1405    pub hostname: Option<String>,
1406
1407    /// Additional DNS servers for the container (maps to Docker's `--dns`).
1408    ///
1409    /// Each entry must be a plausible IPv4 or IPv6 address. Forwarded to the
1410    /// container runtime as resolver addresses ahead of the platform defaults.
1411    /// Ignored when `host_network` is true.
1412    #[serde(default, skip_serializing_if = "Vec::is_empty")]
1413    pub dns: Vec<String>,
1414
1415    /// DNS `search` domains for the container's `/etc/resolv.conf` (maps to
1416    /// Docker's `--dns-search`).
1417    ///
1418    /// Used by overlay service discovery: the deploy path stamps the
1419    /// per-deployment search domain (`<deployment>.<zone> <zone>`) so a bare
1420    /// `<svc>` / `<svc>.service` query expands to the registered
1421    /// deployment-scoped FQDN. Ignored when `host_network` is true.
1422    #[serde(default, skip_serializing_if = "Vec::is_empty")]
1423    pub dns_search: Vec<String>,
1424
1425    /// Extra `hostname:ip` entries appended to `/etc/hosts` (maps to Docker's
1426    /// `--add-host`).
1427    ///
1428    /// Each entry must be in the form `"<hostname>:<ip>"`. The special literal
1429    /// `host-gateway` is accepted as the `<ip>` half (resolved by Docker /
1430    /// bollard to the host-visible gateway address, commonly used with
1431    /// `host.docker.internal:host-gateway`).
1432    #[serde(default, skip_serializing_if = "Vec::is_empty")]
1433    pub extra_hosts: Vec<String>,
1434
1435    /// Container restart policy (Docker-style).
1436    ///
1437    /// Controls when the runtime should automatically restart the container
1438    /// after it exits. Maps to Docker's `HostConfig.RestartPolicy`. Named
1439    /// `ContainerRestartPolicy` to avoid colliding with `ZLayer`'s existing
1440    /// `PanicPolicy` (which controls post-panic behavior, not runtime-level
1441    /// restarts).
1442    #[serde(default, skip_serializing_if = "Option::is_none")]
1443    pub restart_policy: Option<ContainerRestartPolicy>,
1444
1445    /// Free-form key/value labels attached to the container
1446    /// (Docker `--label`).
1447    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
1448    pub labels: HashMap<String, String>,
1449
1450    /// User and group override for the container's main process
1451    /// (Docker `--user uid:gid`).
1452    #[serde(default, skip_serializing_if = "Option::is_none")]
1453    pub user: Option<String>,
1454
1455    /// Signal sent to the container's main process to request a graceful
1456    /// shutdown (Docker `--stop-signal`). Accepts e.g. `"SIGTERM"` or `"15"`.
1457    #[serde(default, skip_serializing_if = "Option::is_none")]
1458    pub stop_signal: Option<String>,
1459
1460    /// Grace period to wait between the stop signal and a forced kill
1461    /// (Docker `--stop-timeout`).
1462    #[serde(
1463        default,
1464        with = "duration::option",
1465        skip_serializing_if = "Option::is_none"
1466    )]
1467    pub stop_grace_period: Option<std::time::Duration>,
1468
1469    /// Kernel sysctl overrides (Docker `--sysctl`).
1470    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
1471    pub sysctls: HashMap<String, String>,
1472
1473    /// Per-process ulimits (Docker `--ulimit`).
1474    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
1475    pub ulimits: HashMap<String, UlimitSpec>,
1476
1477    /// Security options such as `apparmor=...`, `seccomp=...`,
1478    /// `no-new-privileges:true` (Docker `--security-opt`).
1479    #[serde(default, skip_serializing_if = "Vec::is_empty")]
1480    pub security_opt: Vec<String>,
1481
1482    /// PID namespace mode (Docker `--pid`). Accepts e.g. `"host"` or
1483    /// `"container:<id>"`.
1484    #[serde(default, skip_serializing_if = "Option::is_none")]
1485    pub pid_mode: Option<String>,
1486
1487    /// IPC namespace mode (Docker `--ipc`). Accepts e.g. `"host"`,
1488    /// `"shareable"`, `"private"`, or `"container:<id>"`.
1489    #[serde(default, skip_serializing_if = "Option::is_none")]
1490    pub ipc_mode: Option<String>,
1491
1492    /// Network mode (Docker `--network`). Accepts both the enum-tagged form
1493    /// and the Docker-style strings (`"host"`, `"none"`, `"bridge"`,
1494    /// `"bridge:<name>"`, `"container:<id>"`).
1495    #[serde(default, deserialize_with = "deserialize_network_mode")]
1496    pub network_mode: NetworkMode,
1497
1498    /// Additional groups to add to the container process
1499    /// (Docker `--group-add`).
1500    #[serde(default, skip_serializing_if = "Vec::is_empty")]
1501    pub extra_groups: Vec<String>,
1502
1503    /// Mount the container's root filesystem read-only (Docker `--read-only`).
1504    #[serde(default)]
1505    pub read_only_root_fs: bool,
1506
1507    /// Run a Docker-supplied init process (PID 1) inside the container
1508    /// (Docker `--init`). Distinct from [`ServiceSpec::init`] which controls
1509    /// `ZLayer`'s pre-start init actions.
1510    #[serde(default, skip_serializing_if = "Option::is_none")]
1511    pub init_container: Option<bool>,
1512
1513    /// Allocate a TTY for the container's main process (Docker `--tty`,
1514    /// compose `tty: true`).
1515    #[serde(default)]
1516    pub tty: bool,
1517
1518    /// Keep STDIN open even when nothing is attached (Docker `--interactive`,
1519    /// compose `stdin_open: true`).
1520    #[serde(default)]
1521    pub stdin_open: bool,
1522
1523    /// User namespace mode (Docker `--userns`). Accepts e.g. `"host"` or
1524    /// a remap-spec name configured on the daemon.
1525    #[serde(default, skip_serializing_if = "Option::is_none")]
1526    pub userns_mode: Option<String>,
1527
1528    /// Cgroup parent path (Docker `--cgroup-parent`). When set, the runtime
1529    /// places the container under the given cgroup hierarchy.
1530    #[serde(default, skip_serializing_if = "Option::is_none")]
1531    pub cgroup_parent: Option<String>,
1532
1533    /// Container ports exposed but not published to the host (compose
1534    /// `expose:`). Each entry is a port string, optionally `port/proto`
1535    /// (e.g. `"3000"`, `"8080/tcp"`). Treated as documentation by the
1536    /// runtime; downstream networking layers may use this list to allow
1537    /// inter-service traffic without publishing to the host.
1538    #[serde(default, skip_serializing_if = "Vec::is_empty")]
1539    pub expose: Vec<String>,
1540
1541    /// Per-service overlay-network configuration.
1542    ///
1543    /// When `None` (default), the daemon uses the cluster-level overlay
1544    /// default. When `Some`, the service opts into an explicit mode /
1545    /// parent. See [`crate::overlay::OverlayConfig`] for the v0.51
1546    /// implementation status.
1547    #[serde(default, skip_serializing_if = "Option::is_none")]
1548    pub overlay: Option<crate::overlay::OverlayConfig>,
1549
1550    /// Policy for making this service's exposed ports reachable on the node's
1551    /// loopback (`127.0.0.1:<port>`) for same-node consumers — the GitHub
1552    /// Actions "service published to localhost" convention. See
1553    /// [`LocalhostReachability`]. Default [`LocalhostReachability::Auto`].
1554    #[serde(default, skip_serializing_if = "LocalhostReachability::is_default")]
1555    pub localhost_reachability: LocalhostReachability,
1556}
1557
1558/// How a service's exposed ports are made reachable on the node's loopback
1559/// (`127.0.0.1:<port>`) for same-service / same-node consumers.
1560///
1561/// `127.0.0.1` always means *this container's own* loopback — isolated per
1562/// container on Linux (youki netns), macOS VZ, and Windows HCS; shared with the
1563/// host on the macOS seatbelt / libkrun runtimes. This setting never rewrites a
1564/// container's own loopback. It controls only whether the daemon ALSO binds the
1565/// service's exposed port on the *node's* loopback and L4-forwards it to the
1566/// container, so a consumer that shares the node loopback can reach the service
1567/// at `localhost:<port>`.
1568#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
1569#[serde(rename_all = "snake_case")]
1570pub enum LocalhostReachability {
1571    /// Publish to the node loopback only when the service is effectively
1572    /// single-member (no replica groups, scaling disabled or capped at one
1573    /// replica). A multi-member service is not a "pod", so name-based overlay
1574    /// DNS (`<service>.service.local`) stays the addressing path to avoid an
1575    /// ambiguous single loopback port fronting many replicas. Default.
1576    #[default]
1577    Auto,
1578    /// Always publish each exposed port on the node loopback.
1579    Always,
1580    /// Never publish to the node loopback (name / overlay addressing only).
1581    Never,
1582}
1583
1584impl LocalhostReachability {
1585    /// True for the serde default ([`LocalhostReachability::Auto`]); used to
1586    /// skip serializing the field when it carries the default value.
1587    #[must_use]
1588    pub fn is_default(&self) -> bool {
1589        matches!(self, Self::Auto)
1590    }
1591}
1592
1593/// Deserialization shim for [`ServiceSpec`].
1594///
1595/// Mirrors `ServiceSpec`'s field shape so that the derived `Deserialize` impl
1596/// can pick up the YAML/JSON value, then [`From::from`] folds the deprecated
1597/// `host_network: bool` flag into the typed [`NetworkMode`] before handing the
1598/// finalized struct back to the caller.
1599#[derive(Deserialize)]
1600#[serde(deny_unknown_fields)]
1601#[allow(clippy::struct_excessive_bools)]
1602struct ServiceSpecCompat {
1603    #[serde(default)]
1604    deployment: Option<String>,
1605    #[serde(default)]
1606    secret_scope: Option<SecretScope>,
1607    #[serde(default = "default_resource_type")]
1608    rtype: ResourceType,
1609    #[serde(default)]
1610    schedule: Option<String>,
1611    image: ImageSpec,
1612    #[serde(default)]
1613    resources: ResourcesSpec,
1614    #[serde(default)]
1615    env: HashMap<String, String>,
1616    #[serde(default)]
1617    command: CommandSpec,
1618    #[serde(default)]
1619    network: ServiceNetworkSpec,
1620    #[serde(default)]
1621    endpoints: Vec<EndpointSpec>,
1622    #[serde(default)]
1623    scale: ScaleSpec,
1624    #[serde(default)]
1625    replica_groups: Option<Vec<ReplicaGroup>>,
1626    #[serde(default)]
1627    depends: Vec<DependsSpec>,
1628    #[serde(default = "default_health")]
1629    health: HealthSpec,
1630    #[serde(default)]
1631    init: InitSpec,
1632    #[serde(default)]
1633    errors: ErrorsSpec,
1634    #[serde(default)]
1635    lifecycle: LifecycleSpec,
1636    #[serde(default)]
1637    isolation: Option<IsolationMode>,
1638    #[serde(default, skip_serializing_if = "Option::is_none")]
1639    runtime: Option<RuntimeIsolation>,
1640    #[serde(default)]
1641    devices: Vec<DeviceSpec>,
1642    #[serde(default)]
1643    storage: Vec<StorageSpec>,
1644    #[serde(default)]
1645    port_mappings: Vec<PortMapping>,
1646    #[serde(default, alias = "cap_add")]
1647    capabilities: Vec<String>,
1648    #[serde(default)]
1649    cap_drop: Vec<String>,
1650    #[serde(default)]
1651    privileged: bool,
1652    #[serde(default)]
1653    node_mode: NodeMode,
1654    #[serde(default)]
1655    node_selector: Option<NodeSelector>,
1656    #[serde(default)]
1657    affinity: Option<GroupAffinity>,
1658    #[serde(default)]
1659    platform: Option<TargetPlatform>,
1660    #[serde(default)]
1661    service_type: ServiceType,
1662    #[serde(default, alias = "wasm_http")]
1663    wasm: Option<WasmConfig>,
1664    #[serde(default)]
1665    logs: Option<LogsConfig>,
1666    /// Backwards-compat shim: when `host_network: true` is present in the input,
1667    /// it is folded into `network_mode = NetworkMode::Host` during conversion.
1668    #[serde(default)]
1669    host_network: Option<bool>,
1670    #[serde(default)]
1671    hostname: Option<String>,
1672    #[serde(default)]
1673    dns: Vec<String>,
1674    #[serde(default)]
1675    extra_hosts: Vec<String>,
1676    #[serde(default)]
1677    restart_policy: Option<ContainerRestartPolicy>,
1678    #[serde(default)]
1679    labels: HashMap<String, String>,
1680    #[serde(default)]
1681    user: Option<String>,
1682    #[serde(default)]
1683    stop_signal: Option<String>,
1684    #[serde(default, with = "duration::option")]
1685    stop_grace_period: Option<std::time::Duration>,
1686    #[serde(default)]
1687    sysctls: HashMap<String, String>,
1688    #[serde(default)]
1689    ulimits: HashMap<String, UlimitSpec>,
1690    #[serde(default)]
1691    security_opt: Vec<String>,
1692    #[serde(default)]
1693    pid_mode: Option<String>,
1694    #[serde(default)]
1695    ipc_mode: Option<String>,
1696    #[serde(default, deserialize_with = "deserialize_network_mode")]
1697    network_mode: NetworkMode,
1698    #[serde(default)]
1699    extra_groups: Vec<String>,
1700    #[serde(default)]
1701    read_only_root_fs: bool,
1702    #[serde(default)]
1703    init_container: Option<bool>,
1704    #[serde(default)]
1705    tty: bool,
1706    #[serde(default)]
1707    stdin_open: bool,
1708    #[serde(default)]
1709    userns_mode: Option<String>,
1710    #[serde(default)]
1711    cgroup_parent: Option<String>,
1712    #[serde(default)]
1713    expose: Vec<String>,
1714    #[serde(default)]
1715    overlay: Option<crate::overlay::OverlayConfig>,
1716    #[serde(default)]
1717    localhost_reachability: LocalhostReachability,
1718}
1719
1720impl From<ServiceSpecCompat> for ServiceSpec {
1721    fn from(c: ServiceSpecCompat) -> Self {
1722        // If the deprecated `host_network: true` flag is set, fold it into
1723        // the typed network mode unless the caller already supplied a
1724        // non-default value. This keeps existing in-process callers and
1725        // any legacy YAML that still emits `host_network: true` working.
1726        let network_mode = match (c.host_network, &c.network_mode) {
1727            (Some(true), NetworkMode::Default) => NetworkMode::Host,
1728            _ => c.network_mode,
1729        };
1730        let host_network = c.host_network.unwrap_or(false) || network_mode == NetworkMode::Host;
1731
1732        Self {
1733            deployment: c.deployment,
1734            secret_scope: c.secret_scope,
1735            rtype: c.rtype,
1736            schedule: c.schedule,
1737            image: c.image,
1738            resources: c.resources,
1739            env: c.env,
1740            command: c.command,
1741            network: c.network,
1742            endpoints: c.endpoints,
1743            scale: c.scale,
1744            replica_groups: c.replica_groups,
1745            depends: c.depends,
1746            health: c.health,
1747            init: c.init,
1748            errors: c.errors,
1749            lifecycle: c.lifecycle,
1750            isolation: c.isolation,
1751            runtime: c.runtime,
1752            devices: c.devices,
1753            storage: c.storage,
1754            port_mappings: c.port_mappings,
1755            capabilities: c.capabilities,
1756            cap_drop: c.cap_drop,
1757            privileged: c.privileged,
1758            node_mode: c.node_mode,
1759            node_selector: c.node_selector,
1760            affinity: c.affinity,
1761            platform: c.platform,
1762            service_type: c.service_type,
1763            wasm: c.wasm,
1764            logs: c.logs,
1765            host_network,
1766            hostname: c.hostname,
1767            dns: c.dns,
1768            // Search domains are stamped programmatically by the deploy path
1769            // (per-deployment overlay search domain), not carried on the source
1770            // container spec.
1771            dns_search: Vec::new(),
1772            extra_hosts: c.extra_hosts,
1773            restart_policy: c.restart_policy,
1774            labels: c.labels,
1775            user: c.user,
1776            stop_signal: c.stop_signal,
1777            stop_grace_period: c.stop_grace_period,
1778            sysctls: c.sysctls,
1779            ulimits: c.ulimits,
1780            security_opt: c.security_opt,
1781            pid_mode: c.pid_mode,
1782            ipc_mode: c.ipc_mode,
1783            network_mode,
1784            extra_groups: c.extra_groups,
1785            read_only_root_fs: c.read_only_root_fs,
1786            init_container: c.init_container,
1787            tty: c.tty,
1788            stdin_open: c.stdin_open,
1789            userns_mode: c.userns_mode,
1790            cgroup_parent: c.cgroup_parent,
1791            expose: c.expose,
1792            overlay: c.overlay,
1793            localhost_reachability: c.localhost_reachability,
1794        }
1795    }
1796}
1797
1798impl ServiceSpec {
1799    /// True when this service is effectively a single member: it has no
1800    /// (multi-member) replica groups and a scale policy that cannot exceed one
1801    /// replica (`Fixed { 0 | 1 }`, `Adaptive { max <= 1 }`, or `Manual`).
1802    ///
1803    /// Used by [`LocalhostReachability::Auto`] to decide whether publishing the
1804    /// service's ports on the node loopback is unambiguous — a genuine
1805    /// multi-member service would put several backends behind one loopback port,
1806    /// so name-based overlay DNS is the correct addressing for those instead.
1807    #[must_use]
1808    pub fn is_single_member(&self) -> bool {
1809        if let Some(groups) = &self.replica_groups {
1810            let total: u32 = groups.iter().map(|g| g.count).sum();
1811            return groups.len() <= 1 && total <= 1;
1812        }
1813        match &self.scale {
1814            ScaleSpec::Fixed { replicas } => *replicas <= 1,
1815            ScaleSpec::Adaptive { max, .. } => *max <= 1,
1816            ScaleSpec::Manual => true,
1817        }
1818    }
1819
1820    /// Whether the daemon should publish this service's exposed ports on the
1821    /// node loopback (`127.0.0.1:<port>`), per its [`LocalhostReachability`]
1822    /// policy. `Auto` publishes only for effectively single-member services
1823    /// (see [`ServiceSpec::is_single_member`]).
1824    #[must_use]
1825    pub fn publish_to_node_loopback(&self) -> bool {
1826        match self.localhost_reachability {
1827            LocalhostReachability::Always => true,
1828            LocalhostReachability::Never => false,
1829            LocalhostReachability::Auto => self.is_single_member(),
1830        }
1831    }
1832
1833    /// Construct a minimally-populated [`ServiceSpec`] with just the two
1834    /// fields callers always have to supply explicitly: the logical service
1835    /// name (used for diagnostics / labels at the call site — this struct
1836    /// does not carry the service name itself; it is the key in
1837    /// [`DeploymentSpec::services`]) and the container image. Every other
1838    /// field is filled in from [`Default::default`].
1839    ///
1840    /// Intended for tests and one-off in-memory fixtures. Production code
1841    /// paths that build a `ServiceSpec` from user input should still go
1842    /// through `serde` deserialization or an explicit struct literal so that
1843    /// every field is consciously set.
1844    ///
1845    /// # Examples
1846    /// ```ignore
1847    /// let spec = ServiceSpec::minimal("api", "ghcr.io/acme/api:1.2");
1848    /// ```
1849    ///
1850    /// # Panics
1851    /// Panics only if the fixed fallback string `"scratch:latest"` cannot
1852    /// be parsed as an [`ImageReference`] — which would indicate a bug in
1853    /// the OCI reference parser, not in caller input.
1854    #[must_use]
1855    pub fn minimal(_name: impl Into<String>, image: impl Into<String>) -> Self {
1856        use std::str::FromStr;
1857        let image_str = image.into();
1858        let image_ref = crate::ImageRef::from_str(&image_str).unwrap_or_else(|_| {
1859            crate::ImageRef::from_str("scratch:latest")
1860                .expect("'scratch:latest' is a valid image reference")
1861        });
1862        Self {
1863            image: ImageSpec {
1864                name: image_ref,
1865                pull_policy: default_pull_policy(),
1866                source_policy: None,
1867            },
1868            ..Self::default()
1869        }
1870    }
1871}
1872
1873/// Command override specification (Section 5.5)
1874#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
1875#[serde(deny_unknown_fields)]
1876pub struct CommandSpec {
1877    /// Override image ENTRYPOINT
1878    #[serde(default, skip_serializing_if = "Option::is_none")]
1879    pub entrypoint: Option<Vec<String>>,
1880
1881    /// Override image CMD
1882    #[serde(default, skip_serializing_if = "Option::is_none")]
1883    pub args: Option<Vec<String>>,
1884
1885    /// Override working directory
1886    #[serde(default, skip_serializing_if = "Option::is_none")]
1887    pub workdir: Option<String>,
1888}
1889
1890fn default_resource_type() -> ResourceType {
1891    ResourceType::Service
1892}
1893
1894fn default_health() -> HealthSpec {
1895    HealthSpec {
1896        start_grace: Some(std::time::Duration::from_secs(5)),
1897        interval: None,
1898        timeout: None,
1899        retries: 3,
1900        check: HealthCheck::Tcp { port: 0 },
1901    }
1902}
1903
1904/// Resource type - determines container lifecycle
1905#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
1906#[serde(rename_all = "lowercase")]
1907pub enum ResourceType {
1908    /// Long-running container, receives traffic, load-balanced
1909    #[default]
1910    Service,
1911    /// Run-to-completion, triggered by endpoint/CLI/internal system
1912    Job,
1913    /// Scheduled run-to-completion, time-triggered
1914    Cron,
1915}
1916
1917/// Per-image override for the registry resolution chain order.
1918///
1919/// The puller's default chain is LOCAL store → local CACHE → shared S3 tier →
1920/// the ref's own registry (URL) → last-resort default registry. A spec/compose
1921/// entry may pin a different behavior; `None` on [`ImageSpec`] == [`Self::LocalFirst`].
1922#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
1923#[serde(rename_all = "snake_case")]
1924pub enum SourcePolicy {
1925    /// Default chain order (local store → cache → S3 → URL → fallback).
1926    #[default]
1927    LocalFirst,
1928    /// Probe the shared S3 tier BEFORE the local in-process cache (otherwise
1929    /// the default order). Useful when S3 is the fleet's canonical warm pool.
1930    S3First,
1931    /// Skip every local/cached/S3 source — always resolve from the ref's own
1932    /// registry (or the configured default registry for a bare name).
1933    RemoteOnly,
1934    /// Resolve ONLY from local sources (local store + cache); never touch S3,
1935    /// the network, or the default-registry fallback. A miss is an error.
1936    LocalOnly,
1937}
1938
1939/// Container image specification
1940#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Validate)]
1941#[serde(deny_unknown_fields)]
1942pub struct ImageSpec {
1943    /// Image name (e.g., "ghcr.io/org/api:latest")
1944    pub name: crate::ImageRef,
1945
1946    /// When to pull the image
1947    #[serde(default = "default_pull_policy")]
1948    pub pull_policy: PullPolicy,
1949
1950    /// Optional override for the registry resolution chain order.
1951    /// `None` is treated as [`SourcePolicy::LocalFirst`] (the default chain).
1952    #[serde(default, skip_serializing_if = "Option::is_none")]
1953    pub source_policy: Option<SourcePolicy>,
1954}
1955
1956fn default_pull_policy() -> PullPolicy {
1957    PullPolicy::IfNotPresent
1958}
1959
1960impl Default for ImageSpec {
1961    /// Placeholder default used by [`ServiceSpec::default`] (and downstream
1962    /// tests). The wrapped reference (`scratch:latest`) is not meaningful on
1963    /// its own — every real construction path should override this via
1964    /// [`ServiceSpec::minimal`] or an explicit literal. The point of having a
1965    /// `Default` is to make `ServiceSpec` itself `Default`-able so adding a new
1966    /// optional field on it does not force every existing literal site to be
1967    /// touched.
1968    fn default() -> Self {
1969        use std::str::FromStr;
1970        Self {
1971            name: crate::ImageRef::from_str("scratch:latest")
1972                .expect("'scratch:latest' is a valid image reference"),
1973            pull_policy: default_pull_policy(),
1974            source_policy: None,
1975        }
1976    }
1977}
1978
1979/// Image pull policy
1980#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
1981#[serde(rename_all = "snake_case")]
1982pub enum PullPolicy {
1983    /// Always pull the image, even if cached.
1984    Always,
1985    /// Resolve remote digest; pull and recreate when it differs from local/running.
1986    Newer,
1987    /// Use the local image if present; otherwise pull. Never contact a
1988    /// registry for revalidation when the image is already cached locally.
1989    /// This is the literal Docker/Kubernetes semantics — no silent upgrade
1990    /// to `Newer` for `:latest` tags (set `pull_policy: newer` explicitly
1991    /// when you want redeploy-picks-up-new-latest behavior).
1992    IfNotPresent,
1993    /// Never pull, use local image only.
1994    Never,
1995}
1996
1997/// Device passthrough specification
1998#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Validate, utoipa::ToSchema)]
1999#[serde(deny_unknown_fields)]
2000pub struct DeviceSpec {
2001    /// Host device path (e.g., /dev/kvm, /dev/net/tun)
2002    #[validate(length(min = 1, message = "device path cannot be empty"))]
2003    pub path: String,
2004
2005    /// Allow read access
2006    #[serde(default = "default_true")]
2007    pub read: bool,
2008
2009    /// Allow write access
2010    #[serde(default = "default_true")]
2011    pub write: bool,
2012
2013    /// Allow mknod (create device nodes)
2014    #[serde(default)]
2015    pub mknod: bool,
2016}
2017
2018fn default_true() -> bool {
2019    true
2020}
2021
2022/// Storage mount specification
2023#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
2024#[serde(deny_unknown_fields, tag = "type", rename_all = "snake_case")]
2025pub enum StorageSpec {
2026    /// Bind mount from host path to container
2027    Bind {
2028        source: String,
2029        target: String,
2030        #[serde(default)]
2031        readonly: bool,
2032    },
2033    /// Named persistent storage volume
2034    Named {
2035        name: String,
2036        target: String,
2037        #[serde(default)]
2038        readonly: bool,
2039        /// Performance tier (default: local, SQLite-safe)
2040        #[serde(default)]
2041        tier: StorageTier,
2042        /// Optional size limit (e.g., "1Gi", "512Mi")
2043        #[serde(default, skip_serializing_if = "Option::is_none")]
2044        size: Option<String>,
2045    },
2046    /// Anonymous storage (auto-named, container lifecycle)
2047    Anonymous {
2048        target: String,
2049        /// Performance tier (default: local)
2050        #[serde(default)]
2051        tier: StorageTier,
2052    },
2053    /// Memory-backed tmpfs mount
2054    Tmpfs {
2055        target: String,
2056        #[serde(default)]
2057        size: Option<String>,
2058        #[serde(default)]
2059        mode: Option<u32>,
2060    },
2061    /// S3-backed FUSE mount
2062    S3 {
2063        bucket: String,
2064        #[serde(default)]
2065        prefix: Option<String>,
2066        target: String,
2067        #[serde(default)]
2068        readonly: bool,
2069        #[serde(default)]
2070        endpoint: Option<String>,
2071        #[serde(default)]
2072        credentials: Option<String>,
2073    },
2074}
2075
2076/// Resource limits (upper bounds, not reservations)
2077#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default, Validate)]
2078#[serde(deny_unknown_fields)]
2079pub struct ResourcesSpec {
2080    /// CPU limit (cores, e.g., 0.5, 1, 2)
2081    #[serde(default)]
2082    #[validate(custom(function = "crate::spec::validate::validate_cpu_option_wrapper"))]
2083    pub cpu: Option<f64>,
2084
2085    /// Memory limit (e.g., "512Mi", "1Gi", "2Gi")
2086    #[serde(default)]
2087    #[validate(custom(function = "crate::spec::validate::validate_memory_option_wrapper"))]
2088    pub memory: Option<String>,
2089
2090    /// GPU resource request
2091    #[serde(default, skip_serializing_if = "Option::is_none")]
2092    pub gpu: Option<GpuSpec>,
2093
2094    /// Maximum number of processes the container may spawn
2095    /// (Docker `--pids-limit`).
2096    #[serde(default, skip_serializing_if = "Option::is_none")]
2097    pub pids_limit: Option<i64>,
2098
2099    /// CPUs that the container is allowed to execute on (Docker `--cpuset-cpus`).
2100    #[serde(default, skip_serializing_if = "Option::is_none")]
2101    pub cpuset: Option<String>,
2102
2103    /// Relative CPU shares (Docker `--cpu-shares`). Default weight is 1024.
2104    #[serde(default, skip_serializing_if = "Option::is_none")]
2105    pub cpu_shares: Option<u32>,
2106
2107    /// Total memory limit including swap (Docker `--memory-swap`).
2108    #[serde(default, skip_serializing_if = "Option::is_none")]
2109    pub memory_swap: Option<String>,
2110
2111    /// Soft memory limit (Docker `--memory-reservation`).
2112    #[serde(default, skip_serializing_if = "Option::is_none")]
2113    pub memory_reservation: Option<String>,
2114
2115    /// Container memory swappiness, 0-100 (Docker `--memory-swappiness`).
2116    #[serde(default, skip_serializing_if = "Option::is_none")]
2117    pub memory_swappiness: Option<u8>,
2118
2119    /// OOM-killer score adjustment (Docker `--oom-score-adj`).
2120    #[serde(default, skip_serializing_if = "Option::is_none")]
2121    pub oom_score_adj: Option<i32>,
2122
2123    /// Disable the OOM killer for the container (Docker `--oom-kill-disable`).
2124    #[serde(default, skip_serializing_if = "Option::is_none")]
2125    pub oom_kill_disable: Option<bool>,
2126
2127    /// Block IO weight, 10-1000 (Docker `--blkio-weight`).
2128    #[serde(default, skip_serializing_if = "Option::is_none")]
2129    pub blkio_weight: Option<u16>,
2130}
2131
2132/// Scheduling policy for GPU workloads
2133#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
2134#[serde(rename_all = "kebab-case")]
2135pub enum SchedulingPolicy {
2136    /// Place as many replicas as possible; partial placement is acceptable (default)
2137    #[default]
2138    BestEffort,
2139    /// All replicas must be placed or none are; prevents partial GPU job deployment
2140    Gang,
2141    /// Spread replicas across nodes to maximize GPU distribution
2142    Spread,
2143}
2144
2145/// GPU sharing mode controlling how GPU resources are multiplexed.
2146#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
2147#[serde(rename_all = "kebab-case")]
2148pub enum GpuSharingMode {
2149    /// Whole GPU per container (default). No sharing.
2150    #[default]
2151    Exclusive,
2152    /// NVIDIA Multi-Process Service: concurrent GPU compute sharing.
2153    /// Multiple containers run GPU kernels simultaneously with hardware isolation.
2154    Mps,
2155    /// NVIDIA time-slicing: round-robin GPU access across containers.
2156    /// Lower overhead than MPS but no concurrent execution.
2157    TimeSlice,
2158}
2159
2160/// Configuration for distributed GPU job coordination.
2161///
2162/// When enabled on a multi-replica GPU service, `ZLayer` injects standard
2163/// distributed training environment variables (`MASTER_ADDR`, `MASTER_PORT`,
2164/// `WORLD_SIZE`, `RANK`, `LOCAL_RANK`) so frameworks like `PyTorch`, `Horovod`,
2165/// and `DeepSpeed` can coordinate automatically.
2166#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Validate)]
2167#[serde(deny_unknown_fields)]
2168pub struct DistributedConfig {
2169    /// Communication backend: "nccl" (default), "gloo", or "mpi"
2170    #[serde(default = "default_dist_backend")]
2171    pub backend: String,
2172    /// Port for rank-0 master coordination (default: 29500)
2173    #[serde(default = "default_dist_port")]
2174    pub master_port: u16,
2175}
2176
2177fn default_dist_backend() -> String {
2178    "nccl".to_string()
2179}
2180
2181fn default_dist_port() -> u16 {
2182    29500
2183}
2184
2185/// Role of a node within a pipeline-parallel inference swarm.
2186#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
2187#[serde(rename_all = "kebab-case")]
2188pub enum SwarmRole {
2189    /// Holds a contiguous block of model layers and streams activations onward (default).
2190    #[default]
2191    Stage,
2192    /// Orchestrates the swarm: accepts requests and drives the ring of stages.
2193    Coordinator,
2194}
2195
2196/// A peer stage in a pipeline-parallel inference swarm.
2197#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Validate)]
2198#[serde(deny_unknown_fields)]
2199pub struct SwarmPeer {
2200    /// The `ZLayer` service name of the peer stage.
2201    pub service: String,
2202    /// First model layer (inclusive) held by this peer.
2203    pub layer_start: u32,
2204    /// Last model layer (exclusive) held by this peer.
2205    pub layer_end: u32,
2206}
2207
2208/// Describes one node's contiguous layer-block in a pipeline-parallel swarm.
2209///
2210/// Activations stream coordinator -> stages in ring order: the coordinator
2211/// hands the input to the first stage, each stage runs its layer-block and
2212/// forwards the result to the next peer, and the final stage returns the
2213/// output back to the coordinator.
2214#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Validate)]
2215#[serde(deny_unknown_fields)]
2216pub struct ShardingSpec {
2217    /// Identifier shared by every node participating in this swarm.
2218    pub swarm_id: String,
2219    /// First model layer (inclusive) held by this replica.
2220    pub layer_start: u32,
2221    /// Last model layer (exclusive) held by this replica.
2222    pub layer_end: u32,
2223    /// Total number of layers in the full model.
2224    pub layer_count: u32,
2225    /// Role of this node within the swarm (defaults to `stage`).
2226    #[serde(default)]
2227    pub role: SwarmRole,
2228    /// Optional reference to the model/shard manifest this node loads.
2229    #[serde(default, skip_serializing_if = "Option::is_none")]
2230    pub manifest_ref: Option<String>,
2231    /// Ring peers this node streams activations to/from.
2232    #[serde(default)]
2233    #[validate(nested)]
2234    pub peers: Vec<SwarmPeer>,
2235    /// The coordinator service name driving this swarm.
2236    #[serde(default, skip_serializing_if = "Option::is_none")]
2237    pub coordinator: Option<String>,
2238}
2239
2240/// GPU resource specification
2241///
2242/// Supported vendors:
2243/// - `nvidia` - NVIDIA GPUs via NVIDIA Container Toolkit (default)
2244/// - `amd` - AMD GPUs via `ROCm` (/dev/kfd + /dev/dri/renderD*)
2245/// - `intel` - Intel GPUs via VAAPI/i915 (/dev/dri/renderD*)
2246/// - `apple` - Apple Silicon GPUs via Metal/MPS (macOS only)
2247///
2248/// Unknown vendors fall back to DRI render node passthrough.
2249///
2250/// ## GPU mode (macOS only)
2251///
2252/// When `vendor` is `"apple"`, the `mode` field controls how GPU access is provided:
2253/// - `"native"` -- Seatbelt sandbox with direct Metal/MPS access (lowest overhead)
2254/// - `"vm"` -- libkrun micro-VM with GPU forwarding (stronger isolation)
2255/// - `None` (default) -- Auto-select based on platform and vendor
2256///
2257/// On Linux, `mode` is ignored; GPU passthrough always uses device node binding.
2258#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Validate)]
2259#[serde(deny_unknown_fields)]
2260pub struct GpuSpec {
2261    /// Number of GPUs to request
2262    #[serde(default = "default_gpu_count")]
2263    pub count: u32,
2264    /// GPU vendor (`nvidia`, `amd`, `intel`, `apple`) - defaults to `nvidia`
2265    #[serde(default = "default_gpu_vendor")]
2266    pub vendor: String,
2267    /// GPU access mode (macOS only): `"native"`, `"vm"`, or `None` for auto-select
2268    #[serde(default, skip_serializing_if = "Option::is_none")]
2269    pub mode: Option<String>,
2270    /// Pin to a specific GPU model (e.g. "A100", "H100").
2271    /// Substring match against detected GPU model names.
2272    #[serde(default, skip_serializing_if = "Option::is_none")]
2273    pub model: Option<String>,
2274    /// Scheduling policy for GPU workloads.
2275    /// - `best-effort` (default): place what fits
2276    /// - `gang`: all-or-nothing for distributed jobs
2277    /// - `spread`: distribute across nodes
2278    #[serde(default, skip_serializing_if = "Option::is_none")]
2279    pub scheduling: Option<SchedulingPolicy>,
2280    /// Distributed GPU job coordination.
2281    /// When set, injects `MASTER_ADDR`, `WORLD_SIZE`, `RANK`, `LOCAL_RANK` env vars.
2282    #[serde(default, skip_serializing_if = "Option::is_none")]
2283    pub distributed: Option<DistributedConfig>,
2284    /// GPU sharing mode: exclusive (default), mps, or time-slice.
2285    #[serde(default, skip_serializing_if = "Option::is_none")]
2286    pub sharing: Option<GpuSharingMode>,
2287    /// Host directory for the NVIDIA MPS control pipe.
2288    ///
2289    /// Only consulted when `sharing == Mps`. Defaults to `/tmp/nvidia-mps`
2290    /// when unset. The directory MUST exist on the host (created by the
2291    /// `nvidia-cuda-mps-control` daemon). It is bind-mounted into the
2292    /// container at the same path and exported as `CUDA_MPS_PIPE_DIRECTORY`.
2293    #[serde(default, skip_serializing_if = "Option::is_none")]
2294    pub mps_pipe_dir: Option<String>,
2295    /// Host directory for NVIDIA MPS log output.
2296    ///
2297    /// Only consulted when `sharing == Mps`. Defaults to `/tmp/nvidia-log`
2298    /// when unset. The directory MUST exist on the host. It is bind-mounted
2299    /// into the container and exported as `CUDA_MPS_LOG_DIRECTORY`.
2300    #[serde(default, skip_serializing_if = "Option::is_none")]
2301    pub mps_log_dir: Option<String>,
2302    /// CUDA device index this replica should see when `sharing == TimeSlice`.
2303    ///
2304    /// Emitted as `CUDA_VISIBLE_DEVICES=<slice_index>`, overriding the default
2305    /// 0..count visibility list. Use this together with a host-side NVIDIA
2306    /// time-slicing config to advertise a single physical GPU as multiple
2307    /// virtual slices.
2308    #[serde(default, skip_serializing_if = "Option::is_none")]
2309    pub time_slice_index: Option<u32>,
2310    /// Optional host path to a NVIDIA time-slicing config YAML.
2311    ///
2312    /// When set, the file is bind-mounted read-only at
2313    /// `/etc/nvidia/gpu-time-slicing.yaml` inside the container so tools that
2314    /// inspect the slicing topology (e.g. monitoring sidecars) can read it.
2315    /// The file is not interpreted by `ZLayer` — it's purely informational for
2316    /// the workload.
2317    #[serde(default, skip_serializing_if = "Option::is_none")]
2318    pub time_slicing_config_path: Option<String>,
2319    /// Pipeline-parallel sharding: this replica holds a contiguous block of
2320    /// model layers and streams activations to ring peers. Set for swarm-inference
2321    /// stage/coordinator services.
2322    #[serde(default, skip_serializing_if = "Option::is_none")]
2323    pub sharding: Option<ShardingSpec>,
2324}
2325
2326fn default_gpu_count() -> u32 {
2327    1
2328}
2329
2330fn default_gpu_vendor() -> String {
2331    "nvidia".to_string()
2332}
2333
2334/// Per-service network configuration (overlay + join policy).
2335#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
2336#[serde(deny_unknown_fields)]
2337#[derive(Default)]
2338pub struct ServiceNetworkSpec {
2339    /// Overlay network configuration
2340    #[serde(default)]
2341    pub overlays: OverlayConfig,
2342
2343    /// Join policy (who can join this service)
2344    #[serde(default)]
2345    pub join: JoinPolicy,
2346}
2347
2348/// Overlay network configuration
2349#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
2350#[serde(deny_unknown_fields)]
2351pub struct OverlayConfig {
2352    /// Service-scoped overlay (service replicas only)
2353    #[serde(default)]
2354    pub service: OverlaySettings,
2355
2356    /// Global overlay (all services in deployment)
2357    #[serde(default)]
2358    pub global: OverlaySettings,
2359}
2360
2361impl Default for OverlayConfig {
2362    fn default() -> Self {
2363        Self {
2364            service: OverlaySettings {
2365                enabled: true,
2366                encrypted: true,
2367                isolated: true,
2368            },
2369            global: OverlaySettings {
2370                enabled: true,
2371                encrypted: true,
2372                isolated: false,
2373            },
2374        }
2375    }
2376}
2377
2378/// Overlay network settings
2379#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
2380#[serde(deny_unknown_fields)]
2381pub struct OverlaySettings {
2382    /// Enable this overlay
2383    #[serde(default = "default_enabled")]
2384    pub enabled: bool,
2385
2386    /// Use encryption
2387    #[serde(default = "default_encrypted")]
2388    pub encrypted: bool,
2389
2390    /// Isolate from other services/groups
2391    #[serde(default)]
2392    pub isolated: bool,
2393}
2394
2395fn default_enabled() -> bool {
2396    true
2397}
2398
2399fn default_encrypted() -> bool {
2400    true
2401}
2402
2403/// Join policy - controls who can join a service
2404#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
2405#[serde(deny_unknown_fields)]
2406pub struct JoinPolicy {
2407    /// Join mode
2408    #[serde(default = "default_join_mode")]
2409    pub mode: JoinMode,
2410
2411    /// Scope of join
2412    #[serde(default = "default_join_scope")]
2413    pub scope: JoinScope,
2414}
2415
2416impl Default for JoinPolicy {
2417    fn default() -> Self {
2418        Self {
2419            mode: default_join_mode(),
2420            scope: default_join_scope(),
2421        }
2422    }
2423}
2424
2425fn default_join_mode() -> JoinMode {
2426    JoinMode::Token
2427}
2428
2429fn default_join_scope() -> JoinScope {
2430    JoinScope::Service
2431}
2432
2433/// Join mode
2434#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
2435#[serde(rename_all = "snake_case")]
2436pub enum JoinMode {
2437    /// Any trusted node in deployment can self-enroll
2438    Open,
2439    /// Requires a join key (recommended)
2440    Token,
2441    /// Only control-plane/scheduler can place replicas
2442    Closed,
2443}
2444
2445/// Join scope
2446#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
2447#[serde(rename_all = "snake_case")]
2448pub enum JoinScope {
2449    /// Join this specific service
2450    Service,
2451    /// Join all services in deployment
2452    Global,
2453}
2454
2455/// Endpoint specification (proxy binding)
2456#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Validate)]
2457#[serde(deny_unknown_fields)]
2458pub struct EndpointSpec {
2459    /// Endpoint name (for routing)
2460    #[validate(length(min = 1, message = "endpoint name cannot be empty"))]
2461    pub name: String,
2462
2463    /// Protocol
2464    pub protocol: Protocol,
2465
2466    /// Proxy listen port (external-facing port)
2467    #[validate(custom(function = "crate::spec::validate::validate_port_wrapper"))]
2468    pub port: u16,
2469
2470    /// Container port the service actually listens on.
2471    /// Defaults to `port` when not specified.
2472    #[serde(default, skip_serializing_if = "Option::is_none")]
2473    pub target_port: Option<u16>,
2474
2475    /// URL path prefix (for http/https/websocket)
2476    pub path: Option<String>,
2477
2478    /// Host pattern for routing (e.g. "api.example.com" or "*.example.com").
2479    /// `None` means match any host.
2480    #[serde(default, skip_serializing_if = "Option::is_none")]
2481    pub host: Option<String>,
2482
2483    /// Exposure type
2484    #[serde(default = "default_expose")]
2485    pub expose: ExposeType,
2486
2487    /// Optional stream (L4) proxy configuration
2488    /// Only applicable when protocol is tcp or udp
2489    #[serde(default, skip_serializing_if = "Option::is_none")]
2490    pub stream: Option<StreamEndpointConfig>,
2491
2492    /// Restrict this endpoint to backends in a specific replica role.
2493    ///
2494    /// When `Some`, only containers whose `replica_groups.role` matches this
2495    /// value receive traffic from this endpoint. When `None` (default), the
2496    /// endpoint accepts all containers of the service (legacy behavior).
2497    ///
2498    /// Validation: when set, the role MUST appear in the parent
2499    /// `ServiceSpec.replica_groups` (enforced at deploy time in the API
2500    /// handler, not via derive(Validate)).
2501    ///
2502    /// Example (a postgres service with primary + read replicas):
2503    ///
2504    /// ```yaml
2505    /// endpoints:
2506    ///   - name: write
2507    ///     port: 5432
2508    ///     protocol: tcp
2509    ///     target_role: primary
2510    ///   - name: read
2511    ///     port: 5433
2512    ///     protocol: tcp
2513    ///     target_role: read
2514    /// ```
2515    #[serde(default, skip_serializing_if = "Option::is_none")]
2516    pub target_role: Option<String>,
2517
2518    /// Optional tunnel configuration for this endpoint
2519    #[serde(default, skip_serializing_if = "Option::is_none")]
2520    pub tunnel: Option<EndpointTunnelConfig>,
2521}
2522
2523impl EndpointSpec {
2524    /// Returns the port the container actually listens on.
2525    /// Falls back to `port` when `target_port` is not specified.
2526    #[must_use]
2527    pub fn target_port(&self) -> u16 {
2528        self.target_port.unwrap_or(self.port)
2529    }
2530}
2531
2532/// Tunnel configuration for an endpoint
2533#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
2534#[serde(deny_unknown_fields)]
2535pub struct EndpointTunnelConfig {
2536    /// Enable tunneling for this endpoint
2537    #[serde(default)]
2538    pub enabled: bool,
2539
2540    /// Source node name (defaults to service's node)
2541    #[serde(default, skip_serializing_if = "Option::is_none")]
2542    pub from: Option<String>,
2543
2544    /// Destination node name (defaults to cluster ingress)
2545    #[serde(default, skip_serializing_if = "Option::is_none")]
2546    pub to: Option<String>,
2547
2548    /// Remote port to expose (0 = auto-assign)
2549    #[serde(default)]
2550    pub remote_port: u16,
2551
2552    /// Override exposure for tunnel (public/internal)
2553    #[serde(default, skip_serializing_if = "Option::is_none")]
2554    pub expose: Option<ExposeType>,
2555
2556    /// On-demand access configuration
2557    #[serde(default, skip_serializing_if = "Option::is_none")]
2558    pub access: Option<TunnelAccessConfig>,
2559}
2560
2561/// On-demand access settings for `zlayer tunnel access`
2562#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
2563#[serde(deny_unknown_fields)]
2564pub struct TunnelAccessConfig {
2565    /// Allow on-demand access via CLI
2566    #[serde(default)]
2567    pub enabled: bool,
2568
2569    /// Maximum session duration (e.g., "4h", "30m")
2570    #[serde(default, skip_serializing_if = "Option::is_none")]
2571    pub max_ttl: Option<String>,
2572
2573    /// Log all access sessions
2574    #[serde(default)]
2575    pub audit: bool,
2576}
2577
2578fn default_expose() -> ExposeType {
2579    ExposeType::Internal
2580}
2581
2582/// Protocol type
2583#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
2584#[serde(rename_all = "lowercase")]
2585pub enum Protocol {
2586    Http,
2587    Https,
2588    Tcp,
2589    Udp,
2590    Websocket,
2591}
2592
2593/// Exposure type
2594#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
2595#[serde(rename_all = "lowercase")]
2596pub enum ExposeType {
2597    Public,
2598    #[default]
2599    Internal,
2600}
2601
2602/// Stream (L4) proxy configuration for TCP/UDP endpoints
2603#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
2604#[serde(deny_unknown_fields)]
2605pub struct StreamEndpointConfig {
2606    /// Enable TLS termination for TCP (auto-provision cert)
2607    #[serde(default)]
2608    pub tls: bool,
2609
2610    /// Enable PROXY protocol for passing client IP
2611    #[serde(default)]
2612    pub proxy_protocol: bool,
2613
2614    /// Custom session timeout for UDP (default: 60s)
2615    /// Format: duration string like "60s", "5m"
2616    #[serde(default, skip_serializing_if = "Option::is_none")]
2617    pub session_timeout: Option<String>,
2618
2619    /// Health check configuration for L4
2620    #[serde(default, skip_serializing_if = "Option::is_none")]
2621    pub health_check: Option<StreamHealthCheck>,
2622}
2623
2624impl StreamEndpointConfig {
2625    /// Parse the `session_timeout` string field into a [`std::time::Duration`].
2626    ///
2627    /// Accepts humantime-style durations (e.g. `"5m"`, `"90s"`, `"1h30m"`).
2628    /// Returns `None` when the field is unset or fails to parse (a malformed
2629    /// value falls back to the proxy's default session timeout rather than
2630    /// erroring at runtime).
2631    #[must_use]
2632    pub fn session_timeout_duration(&self) -> Option<std::time::Duration> {
2633        self.session_timeout
2634            .as_deref()
2635            .and_then(|s| humantime::parse_duration(s).ok())
2636    }
2637}
2638
2639/// Health check types for stream (L4) endpoints
2640#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
2641#[serde(tag = "type", rename_all = "snake_case")]
2642pub enum StreamHealthCheck {
2643    /// TCP connect check - verifies port is accepting connections
2644    TcpConnect,
2645    /// UDP probe - sends request and optionally validates response
2646    UdpProbe {
2647        /// Request payload to send (can use hex escapes like \\xFF)
2648        request: String,
2649        /// Expected response pattern (optional regex)
2650        #[serde(default, skip_serializing_if = "Option::is_none")]
2651        expect: Option<String>,
2652    },
2653}
2654
2655/// Scaling configuration
2656// The `Adaptive` variant is intentionally large (full adaptive/vertical/
2657// predictive config) while `Fixed`/`Manual` are tiny; this spec type is
2658// constructed rarely and read-mostly, so the size asymmetry is fine.
2659#[allow(clippy::large_enum_variant)]
2660#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
2661#[serde(tag = "mode", rename_all = "lowercase", deny_unknown_fields)]
2662pub enum ScaleSpec {
2663    /// Adaptive scaling with metrics
2664    #[serde(rename = "adaptive")]
2665    Adaptive {
2666        /// Minimum replicas (may be 0 to enable scale-to-zero)
2667        min: u32,
2668
2669        /// Maximum replicas
2670        max: u32,
2671
2672        /// Cooldown period between scale events
2673        #[serde(default, with = "duration::option")]
2674        cooldown: Option<std::time::Duration>,
2675
2676        /// Target metrics for scaling
2677        #[serde(default)]
2678        targets: ScaleTargets,
2679
2680        /// Scale-up/scale-down velocity & stabilization behavior.
2681        ///
2682        /// Mirrors the Kubernetes HPA v2 `behavior` field: bounds how fast
2683        /// replicas may be added/removed and how long recent recommendations
2684        /// are remembered to prevent flapping. Defaults to a conservative
2685        /// symmetric policy when omitted.
2686        #[serde(default, skip_serializing_if = "Option::is_none")]
2687        behavior: Option<ScaleBehavior>,
2688
2689        /// Event-driven scaling triggers (KEDA-style scalers: queue depth,
2690        /// Kafka lag, cron windows, Prometheus queries). The desired replica
2691        /// count is the maximum across all triggers and metric targets. A
2692        /// non-zero trigger gates wake-from-zero. Consumed in Phase 2.
2693        #[serde(default, skip_serializing_if = "Vec::is_empty")]
2694        triggers: Vec<ScaleTrigger>,
2695
2696        /// Idle window after which a service with `min: 0` scales to zero.
2697        /// `None` disables scale-to-zero even when `min == 0`. Consumed in Phase 2.
2698        #[serde(
2699            default,
2700            rename = "idleWindow",
2701            with = "duration::option",
2702            skip_serializing_if = "Option::is_none"
2703        )]
2704        idle_window: Option<std::time::Duration>,
2705
2706        /// Vertical (right-sizing) autoscaling for this service's containers.
2707        /// Consumed in Phase 3.
2708        #[serde(default, skip_serializing_if = "Option::is_none")]
2709        vertical: Option<VerticalScaleSpec>,
2710
2711        /// Predictive/proactive scaling that establishes a forecast-driven
2712        /// replica floor ahead of anticipated load. Consumed in Phase 4.
2713        #[serde(default, skip_serializing_if = "Option::is_none")]
2714        predictive: Option<PredictiveSpec>,
2715    },
2716
2717    /// Fixed number of replicas
2718    #[serde(rename = "fixed")]
2719    Fixed { replicas: u32 },
2720
2721    /// Manual scaling (no automatic scaling)
2722    #[serde(rename = "manual")]
2723    Manual,
2724}
2725
2726impl Default for ScaleSpec {
2727    fn default() -> Self {
2728        Self::Adaptive {
2729            min: 1,
2730            max: 10,
2731            cooldown: Some(std::time::Duration::from_secs(30)),
2732            targets: ScaleTargets::default(),
2733            behavior: None,
2734            triggers: Vec::new(),
2735            idle_window: None,
2736            vertical: None,
2737            predictive: None,
2738        }
2739    }
2740}
2741
2742/// Target metrics for adaptive scaling
2743#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
2744#[serde(deny_unknown_fields)]
2745#[derive(Default)]
2746pub struct ScaleTargets {
2747    /// CPU percentage threshold (0-100)
2748    #[serde(default)]
2749    pub cpu: Option<u8>,
2750
2751    /// Memory percentage threshold (0-100)
2752    #[serde(default)]
2753    pub memory: Option<u8>,
2754
2755    /// Requests per second threshold
2756    #[serde(default)]
2757    pub rps: Option<u32>,
2758
2759    /// Custom application metrics (scraped from the service itself, e.g. via a
2760    /// Prometheus endpoint the app exposes). Kubernetes-HPA "custom metrics".
2761    #[serde(default, skip_serializing_if = "Vec::is_empty")]
2762    pub custom: Vec<MetricTarget>,
2763
2764    /// External metrics (sourced from outside the service: another service's
2765    /// metrics endpoint, a queue, a database). Kubernetes-HPA "external metrics".
2766    #[serde(default, skip_serializing_if = "Vec::is_empty")]
2767    pub external: Vec<MetricTarget>,
2768}
2769
2770/// How a metric's value is compared against its target.
2771#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
2772#[serde(rename_all = "snake_case")]
2773pub enum MetricKind {
2774    /// Target is an average value per replica (total / replicas vs target).
2775    #[default]
2776    AverageValue,
2777    /// Target is a raw aggregate value (compared directly, replica-independent).
2778    Value,
2779}
2780
2781/// A single custom or external metric target.
2782#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
2783#[serde(deny_unknown_fields)]
2784pub struct MetricTarget {
2785    /// Metric name as reported by the metrics source.
2786    pub name: String,
2787
2788    /// Logical source identifier (e.g. `prometheus`, a service name, a queue).
2789    #[serde(default, skip_serializing_if = "Option::is_none")]
2790    pub source: Option<String>,
2791
2792    /// Target value. Scale up when the (per-replica or aggregate) value meets
2793    /// or exceeds this; scale down when comfortably below (hysteresis).
2794    ///
2795    /// `OrderedFloat` so the spec keeps `Eq`; serializes as a plain number.
2796    pub target: ordered_float::OrderedFloat<f64>,
2797
2798    /// How the value is compared. Defaults to per-replica average.
2799    #[serde(default)]
2800    pub kind: MetricKind,
2801}
2802
2803/// Scale-up/scale-down velocity & stabilization behavior (HPA v2 `behavior`).
2804#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
2805#[serde(deny_unknown_fields)]
2806pub struct ScaleBehavior {
2807    /// Behavior when adding replicas.
2808    #[serde(default, rename = "scaleUp", skip_serializing_if = "Option::is_none")]
2809    pub scale_up: Option<ScaleDirection>,
2810
2811    /// Behavior when removing replicas.
2812    #[serde(default, rename = "scaleDown", skip_serializing_if = "Option::is_none")]
2813    pub scale_down: Option<ScaleDirection>,
2814}
2815
2816/// Velocity policy for one scaling direction.
2817#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
2818#[serde(deny_unknown_fields)]
2819pub struct ScaleDirection {
2820    /// How long to remember prior recommendations and pick the most
2821    /// conservative (highest on scale-down, lowest on scale-up) to damp
2822    /// flapping. Kubernetes default: 0s scale-up, 300s scale-down.
2823    #[serde(
2824        default,
2825        rename = "stabilizationWindow",
2826        with = "duration::option",
2827        skip_serializing_if = "Option::is_none"
2828    )]
2829    pub stabilization_window: Option<std::time::Duration>,
2830
2831    /// Rate-limit policies; the `select` policy chooses among them.
2832    #[serde(default, skip_serializing_if = "Vec::is_empty")]
2833    pub policies: Vec<ScalePolicy>,
2834
2835    /// Which policy value to apply when multiple are given.
2836    #[serde(default)]
2837    pub select: PolicySelect,
2838}
2839
2840/// A single rate-limit policy: at most `value` Pods/Percent per `period`.
2841#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
2842#[serde(deny_unknown_fields)]
2843pub struct ScalePolicy {
2844    /// Whether `value` is an absolute pod count or a percentage of current.
2845    #[serde(rename = "type")]
2846    pub policy_type: PolicyType,
2847
2848    /// The allowed change magnitude.
2849    pub value: u32,
2850
2851    /// The period the `value` applies over.
2852    #[serde(
2853        default,
2854        with = "duration::option",
2855        skip_serializing_if = "Option::is_none"
2856    )]
2857    pub period: Option<std::time::Duration>,
2858}
2859
2860/// Unit of a [`ScalePolicy`] value.
2861#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
2862#[serde(rename_all = "snake_case")]
2863pub enum PolicyType {
2864    /// Absolute number of replicas.
2865    Pods,
2866    /// Percentage of the current replica count.
2867    Percent,
2868}
2869
2870/// How to combine multiple [`ScalePolicy`] entries.
2871#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
2872#[serde(rename_all = "snake_case")]
2873pub enum PolicySelect {
2874    /// Allow the largest change among the policies (HPA default).
2875    #[default]
2876    Max,
2877    /// Allow the smallest change among the policies (most conservative).
2878    Min,
2879    /// Disable scaling in this direction.
2880    Disabled,
2881}
2882
2883/// An event-driven scaling trigger (KEDA-style scaler). Consumed in Phase 2.
2884///
2885/// No `deny_unknown_fields`: serde's `flatten` collects the variant fields into
2886/// an internal map, which is incompatible with `deny_unknown_fields` (it would
2887/// reject the flattened keys as "unknown").
2888#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
2889pub struct ScaleTrigger {
2890    /// Scaler type discriminator and its configuration.
2891    #[serde(flatten)]
2892    pub kind: ScaleTriggerKind,
2893
2894    /// Optional replica floor this trigger asserts while active (e.g. a cron
2895    /// window that guarantees a warm baseline).
2896    #[serde(default, skip_serializing_if = "Option::is_none")]
2897    pub min: Option<u32>,
2898}
2899
2900/// The concrete kind of an event-driven scaler.
2901#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
2902#[serde(tag = "type", rename_all = "snake_case")]
2903pub enum ScaleTriggerKind {
2904    /// Scale on the depth of a queue (Redis list, AMQP, SQS, …).
2905    Queue {
2906        /// Connection/source URL of the queue backend.
2907        source: String,
2908        /// Queue/key name to measure.
2909        key: String,
2910        /// Target depth handled per replica.
2911        target: u64,
2912    },
2913    /// Scale on a Kafka consumer-group lag.
2914    Kafka {
2915        /// Bootstrap brokers.
2916        brokers: String,
2917        /// Topic to measure.
2918        topic: String,
2919        /// Consumer group whose lag is measured.
2920        group: String,
2921        /// Target lag handled per replica.
2922        target: u64,
2923    },
2924    /// Scale on observed requests-per-second from the proxy.
2925    HttpRps {
2926        /// Target RPS handled per replica.
2927        target: u32,
2928    },
2929    /// Scale on a Prometheus query result.
2930    Prometheus {
2931        /// Prometheus base URL.
2932        source: String,
2933        /// `PromQL` query returning a scalar.
2934        query: String,
2935        /// Target value handled per replica.
2936        target: ordered_float::OrderedFloat<f64>,
2937    },
2938    /// Maintain a replica floor during a recurring time window.
2939    Cron {
2940        /// Cron schedule (start of the window).
2941        schedule: String,
2942        /// Window duration.
2943        #[serde(with = "duration::option")]
2944        duration: Option<std::time::Duration>,
2945    },
2946}
2947
2948/// Vertical (right-sizing) autoscaling configuration. Consumed in Phase 3.
2949#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
2950#[serde(deny_unknown_fields)]
2951pub struct VerticalScaleSpec {
2952    /// Operating mode.
2953    #[serde(default)]
2954    pub mode: VerticalMode,
2955
2956    /// Lower bound on recommended CPU millicores (never recommend below).
2957    #[serde(
2958        default,
2959        rename = "minCpuMillis",
2960        skip_serializing_if = "Option::is_none"
2961    )]
2962    pub min_cpu_millis: Option<u32>,
2963
2964    /// Upper bound on recommended CPU millicores.
2965    #[serde(
2966        default,
2967        rename = "maxCpuMillis",
2968        skip_serializing_if = "Option::is_none"
2969    )]
2970    pub max_cpu_millis: Option<u32>,
2971
2972    /// Lower bound on recommended memory in MiB.
2973    #[serde(
2974        default,
2975        rename = "minMemoryMib",
2976        skip_serializing_if = "Option::is_none"
2977    )]
2978    pub min_memory_mib: Option<u32>,
2979
2980    /// Upper bound on recommended memory in MiB.
2981    #[serde(
2982        default,
2983        rename = "maxMemoryMib",
2984        skip_serializing_if = "Option::is_none"
2985    )]
2986    pub max_memory_mib: Option<u32>,
2987
2988    /// Target headroom percentile to size to (e.g. 90 = size to p90 usage).
2989    #[serde(default = "default_vertical_percentile")]
2990    pub percentile: u8,
2991}
2992
2993fn default_vertical_percentile() -> u8 {
2994    90
2995}
2996
2997/// Vertical autoscaling operating mode.
2998#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
2999#[serde(rename_all = "snake_case")]
3000pub enum VerticalMode {
3001    /// Disabled.
3002    #[default]
3003    Off,
3004    /// Compute and report recommendations but never apply them.
3005    Recommend,
3006    /// Compute and automatically apply recommendations.
3007    Auto,
3008}
3009
3010/// Predictive/proactive scaling configuration. Consumed in Phase 4.
3011#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
3012#[serde(deny_unknown_fields)]
3013pub struct PredictiveSpec {
3014    /// Whether predictive scaling is enabled.
3015    #[serde(default)]
3016    pub enabled: bool,
3017
3018    /// Forecasting method.
3019    #[serde(default)]
3020    pub method: ForecastMethod,
3021
3022    /// How far ahead to forecast.
3023    #[serde(
3024        default,
3025        with = "duration::option",
3026        skip_serializing_if = "Option::is_none"
3027    )]
3028    pub horizon: Option<std::time::Duration>,
3029}
3030
3031/// Forecasting method for predictive scaling.
3032#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
3033#[serde(rename_all = "snake_case")]
3034pub enum ForecastMethod {
3035    /// Exponentially weighted moving average (level only).
3036    #[default]
3037    Ewma,
3038    /// Holt-Winters triple exponential smoothing (level + trend + seasonality).
3039    HoltWinters,
3040}
3041
3042/// Dependency specification
3043#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
3044#[serde(deny_unknown_fields)]
3045pub struct DependsSpec {
3046    /// Service name to depend on
3047    pub service: String,
3048
3049    /// Condition for dependency
3050    #[serde(default = "default_condition")]
3051    pub condition: DependencyCondition,
3052
3053    /// Maximum time to wait
3054    #[serde(default = "default_timeout", with = "duration::option")]
3055    pub timeout: Option<std::time::Duration>,
3056
3057    /// Action on timeout
3058    #[serde(default = "default_on_timeout")]
3059    pub on_timeout: TimeoutAction,
3060}
3061
3062fn default_condition() -> DependencyCondition {
3063    DependencyCondition::Healthy
3064}
3065
3066#[allow(clippy::unnecessary_wraps)]
3067fn default_timeout() -> Option<std::time::Duration> {
3068    Some(std::time::Duration::from_secs(300))
3069}
3070
3071fn default_on_timeout() -> TimeoutAction {
3072    TimeoutAction::Fail
3073}
3074
3075/// Dependency condition
3076#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
3077#[serde(rename_all = "lowercase")]
3078pub enum DependencyCondition {
3079    /// Container process exists
3080    Started,
3081    /// Health check passes
3082    Healthy,
3083    /// Service is available for routing
3084    Ready,
3085}
3086
3087/// Timeout action
3088#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
3089#[serde(rename_all = "lowercase")]
3090pub enum TimeoutAction {
3091    Fail,
3092    Warn,
3093    Continue,
3094}
3095
3096/// Health check specification
3097#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
3098#[serde(deny_unknown_fields)]
3099pub struct HealthSpec {
3100    /// Grace period before first check
3101    #[serde(default, with = "duration::option")]
3102    pub start_grace: Option<std::time::Duration>,
3103
3104    /// Interval between checks
3105    #[serde(default, with = "duration::option")]
3106    pub interval: Option<std::time::Duration>,
3107
3108    /// Timeout per check
3109    #[serde(default, with = "duration::option")]
3110    pub timeout: Option<std::time::Duration>,
3111
3112    /// Number of retries before marking unhealthy
3113    #[serde(default = "default_retries")]
3114    pub retries: u32,
3115
3116    /// Health check type and parameters
3117    pub check: HealthCheck,
3118}
3119
3120fn default_retries() -> u32 {
3121    3
3122}
3123
3124impl Default for HealthSpec {
3125    /// Returns the same shape as the per-field serde defaults: a 5-second
3126    /// start grace, 3 retries, and a TCP check against port 0 ("use first
3127    /// endpoint"). Matches [`default_health`] which is the serde fallback
3128    /// when no `health:` block is supplied in a deployment spec.
3129    fn default() -> Self {
3130        default_health()
3131    }
3132}
3133
3134/// Health check type
3135#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
3136#[serde(tag = "type", rename_all = "lowercase")]
3137pub enum HealthCheck {
3138    /// TCP port check
3139    Tcp {
3140        /// Port to check (0 = use first endpoint)
3141        port: u16,
3142    },
3143
3144    /// HTTP check
3145    Http {
3146        /// URL to check
3147        url: String,
3148        /// Expected status code
3149        #[serde(default = "default_expect_status")]
3150        expect_status: u16,
3151    },
3152
3153    /// Command check
3154    Command {
3155        /// Command to run
3156        command: String,
3157    },
3158}
3159
3160fn default_expect_status() -> u16 {
3161    200
3162}
3163
3164/// Init actions specification
3165#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
3166#[serde(deny_unknown_fields)]
3167#[derive(Default)]
3168pub struct InitSpec {
3169    /// Init steps to run before container starts
3170    #[serde(default)]
3171    pub steps: Vec<InitStep>,
3172}
3173
3174/// Lifecycle policy for service / job / cron containers.
3175///
3176/// Currently exposes a single `delete_on_exit` knob that, when `true`,
3177/// instructs higher layers to remove the container record (and its bundle)
3178/// once it has terminated. Other layers consume this field; this type is
3179/// purely descriptive.
3180#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize, utoipa::ToSchema)]
3181#[serde(deny_unknown_fields)]
3182pub struct LifecycleSpec {
3183    /// When true, terminated containers (and their bundles) are removed
3184    /// automatically rather than retained for inspection. Defaults to
3185    /// `false`, preserving the historical retain-on-exit behavior.
3186    #[serde(default)]
3187    pub delete_on_exit: bool,
3188}
3189
3190/// Init action step
3191#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
3192#[serde(deny_unknown_fields)]
3193pub struct InitStep {
3194    /// Step identifier
3195    pub id: String,
3196
3197    /// Action to perform (e.g., "`init.wait_tcp`")
3198    pub uses: String,
3199
3200    /// Parameters for the action
3201    #[serde(default)]
3202    pub with: InitParams,
3203
3204    /// Number of retries
3205    #[serde(default)]
3206    pub retry: Option<u32>,
3207
3208    /// Maximum time for this step
3209    #[serde(default, with = "duration::option")]
3210    pub timeout: Option<std::time::Duration>,
3211
3212    /// Action on failure
3213    #[serde(default = "default_on_failure")]
3214    pub on_failure: FailureAction,
3215}
3216
3217fn default_on_failure() -> FailureAction {
3218    FailureAction::Fail
3219}
3220
3221/// Init action parameters
3222pub type InitParams = std::collections::HashMap<String, serde_json::Value>;
3223
3224/// Failure action for init steps
3225#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
3226#[serde(rename_all = "lowercase")]
3227pub enum FailureAction {
3228    Fail,
3229    Warn,
3230    Continue,
3231}
3232
3233/// Error handling policies
3234#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
3235#[serde(deny_unknown_fields)]
3236#[derive(Default)]
3237pub struct ErrorsSpec {
3238    /// Init failure policy
3239    #[serde(default)]
3240    pub on_init_failure: InitFailurePolicy,
3241
3242    /// Panic/restart policy
3243    #[serde(default)]
3244    pub on_panic: PanicPolicy,
3245}
3246
3247/// Init failure policy
3248#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
3249#[serde(deny_unknown_fields)]
3250pub struct InitFailurePolicy {
3251    #[serde(default = "default_init_action")]
3252    pub action: InitFailureAction,
3253}
3254
3255impl Default for InitFailurePolicy {
3256    fn default() -> Self {
3257        Self {
3258            action: default_init_action(),
3259        }
3260    }
3261}
3262
3263fn default_init_action() -> InitFailureAction {
3264    InitFailureAction::Fail
3265}
3266
3267/// Init failure action
3268#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
3269#[serde(rename_all = "lowercase")]
3270pub enum InitFailureAction {
3271    Fail,
3272    Restart,
3273    Backoff,
3274}
3275
3276/// Panic policy
3277#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
3278#[serde(deny_unknown_fields)]
3279pub struct PanicPolicy {
3280    #[serde(default = "default_panic_action")]
3281    pub action: PanicAction,
3282}
3283
3284impl Default for PanicPolicy {
3285    fn default() -> Self {
3286        Self {
3287            action: default_panic_action(),
3288        }
3289    }
3290}
3291
3292fn default_panic_action() -> PanicAction {
3293    PanicAction::Restart
3294}
3295
3296/// Panic action
3297#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
3298#[serde(rename_all = "lowercase")]
3299pub enum PanicAction {
3300    Restart,
3301    Shutdown,
3302    Isolate,
3303}
3304
3305// ==========================================================================
3306// Network / Access Control types
3307// ==========================================================================
3308
3309/// A network policy defines an access control group with membership rules
3310/// and service access policies.
3311#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)]
3312pub struct NetworkPolicySpec {
3313    /// Unique network name.
3314    pub name: String,
3315
3316    /// Human-readable description.
3317    #[serde(default, skip_serializing_if = "Option::is_none")]
3318    pub description: Option<String>,
3319
3320    /// CIDR ranges that belong to this network (e.g., "10.200.0.0/16", "192.168.1.0/24").
3321    #[serde(default)]
3322    pub cidrs: Vec<String>,
3323
3324    /// Named members (users, groups, nodes) of this network.
3325    #[serde(default)]
3326    pub members: Vec<NetworkMember>,
3327
3328    /// Access rules defining which services this network can reach.
3329    #[serde(default)]
3330    pub access_rules: Vec<AccessRule>,
3331}
3332
3333/// A member of a network.
3334#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
3335pub struct NetworkMember {
3336    /// Member identifier (username, group name, node ID, or CIDR).
3337    pub name: String,
3338    /// Type of member.
3339    #[serde(default)]
3340    pub kind: MemberKind,
3341}
3342
3343/// Type of network member.
3344#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
3345#[serde(rename_all = "lowercase")]
3346pub enum MemberKind {
3347    /// An individual user identity.
3348    #[default]
3349    User,
3350    /// A group of users.
3351    Group,
3352    /// A specific cluster node.
3353    Node,
3354    /// A CIDR range (redundant with NetworkPolicySpec.cidrs but allows per-member CIDR).
3355    Cidr,
3356}
3357
3358/// An access rule determining what a network can reach.
3359#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
3360pub struct AccessRule {
3361    /// Target service name, or "*" for all services.
3362    #[serde(default = "wildcard")]
3363    pub service: String,
3364
3365    /// Target deployment name, or "*" for all deployments.
3366    #[serde(default = "wildcard")]
3367    pub deployment: String,
3368
3369    /// Specific ports allowed. None means all ports.
3370    #[serde(default, skip_serializing_if = "Option::is_none")]
3371    pub ports: Option<Vec<u16>>,
3372
3373    /// Whether to allow or deny access.
3374    #[serde(default)]
3375    pub action: AccessAction,
3376}
3377
3378fn wildcard() -> String {
3379    "*".to_string()
3380}
3381
3382/// Access control action.
3383#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
3384#[serde(rename_all = "lowercase")]
3385pub enum AccessAction {
3386    /// Allow access (default).
3387    #[default]
3388    Allow,
3389    /// Deny access.
3390    Deny,
3391}
3392
3393// ==========================================================================
3394// Container bridge / overlay network types (Docker-compatible)
3395// ==========================================================================
3396//
3397// These types model user-defined bridge or overlay networks that standalone
3398// containers can attach to — the Docker-style "docker network create" model.
3399// They are intentionally named `BridgeNetwork*` to avoid colliding with the
3400// CIDR-ACL `NetworkPolicySpec` types above, which model a completely
3401// different concept (access-control groups).
3402
3403/// A user-defined bridge or overlay network that containers can attach to.
3404#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, utoipa::ToSchema)]
3405pub struct BridgeNetwork {
3406    /// Opaque server-generated identifier (UUID v4).
3407    pub id: String,
3408
3409    /// Human-readable, unique name (must match `^[a-z0-9][a-z0-9_-]{0,63}$`).
3410    pub name: String,
3411
3412    /// Driver backing the network (bridge vs. overlay).
3413    #[serde(default)]
3414    pub driver: BridgeNetworkDriver,
3415
3416    /// IPv4/IPv6 subnet in CIDR notation (e.g. `"10.240.0.0/24"`).
3417    #[serde(default, skip_serializing_if = "Option::is_none")]
3418    pub subnet: Option<String>,
3419
3420    /// Arbitrary key/value labels for filtering and grouping.
3421    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
3422    pub labels: HashMap<String, String>,
3423
3424    /// If true, containers attached to this network cannot reach the outside
3425    /// world — only other containers on the same network.
3426    #[serde(default)]
3427    pub internal: bool,
3428
3429    /// Inter-network isolation (Docker-style): members of this network reach
3430    /// each other and the internet (egress), but NOT other networks' members
3431    /// or arbitrary cluster services. Distinct from `internal`, which blocks
3432    /// egress entirely. When false, the network rides the flat cluster overlay
3433    /// mesh (today's behavior).
3434    #[serde(default)]
3435    pub isolated: bool,
3436
3437    /// Creation timestamp (UTC, RFC 3339).
3438    #[schema(value_type = String, format = "date-time")]
3439    pub created_at: chrono::DateTime<chrono::Utc>,
3440}
3441
3442/// Backing driver for a [`BridgeNetwork`].
3443#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default, utoipa::ToSchema)]
3444#[serde(rename_all = "lowercase")]
3445pub enum BridgeNetworkDriver {
3446    /// Linux bridge on the local host (single-host, default).
3447    #[default]
3448    Bridge,
3449    /// Overlay network spanning multiple hosts.
3450    Overlay,
3451}
3452
3453/// A container attached to a [`BridgeNetwork`].
3454#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, utoipa::ToSchema)]
3455pub struct BridgeNetworkAttachment {
3456    /// Runtime-provided container id.
3457    pub container_id: String,
3458
3459    /// Container name, if known.
3460    #[serde(default, skip_serializing_if = "Option::is_none")]
3461    pub container_name: Option<String>,
3462
3463    /// DNS aliases the container can be reached by on this network.
3464    #[serde(default, skip_serializing_if = "Vec::is_empty")]
3465    pub aliases: Vec<String>,
3466
3467    /// Assigned IPv4 address on the network (if any).
3468    #[serde(default, skip_serializing_if = "Option::is_none")]
3469    pub ipv4: Option<String>,
3470}
3471
3472// ==========================================================================
3473// Registry auth (inline, not persisted) — §3.10 of ZLAYER_SDK_FIXES.md
3474// ==========================================================================
3475//
3476// Inline credentials a client can attach to a single pull or container-create
3477// request without first POSTing them to `/api/v1/credentials/registry`. The
3478// daemon uses them exactly once — they are never logged, never persisted, and
3479// never echoed back on a response.
3480//
3481// For requests that instead want to reuse an already-stored credential, the
3482// `CreateContainerRequest` / `PullImageRequest` DTOs also accept a
3483// `registry_credential_id` pointing at the `RegistryCredentialStore`. Inline
3484// `RegistryAuth` takes precedence when both are provided.
3485
3486/// Inline Docker/OCI registry credentials attached to a single pull request.
3487///
3488/// Prefer persistent credentials via `/api/v1/credentials/registry` for
3489/// long-lived services. Use this inline form for one-off pulls (e.g. CI
3490/// runners fetching a private image for a single job) where persisting a
3491/// credential is undesirable.
3492#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, utoipa::ToSchema)]
3493pub struct RegistryAuth {
3494    /// Username for the registry (for basic auth) or a placeholder
3495    /// identifier when `auth_type == Token`.
3496    pub username: String,
3497    /// Password or bearer token. **Never** logged or returned on any
3498    /// response — consumed once and dropped.
3499    pub password: String,
3500    /// Which authentication scheme to use against the registry.
3501    #[serde(default = "default_registry_auth_type")]
3502    pub auth_type: RegistryAuthType,
3503}
3504
3505/// Authentication scheme for a [`RegistryAuth`].
3506#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default, utoipa::ToSchema)]
3507#[serde(rename_all = "snake_case")]
3508pub enum RegistryAuthType {
3509    /// HTTP Basic authentication (username + password). Default.
3510    #[default]
3511    Basic,
3512    /// Bearer token authentication. `password` carries the token; `username`
3513    /// is typically a placeholder such as `"oauth2accesstoken"` or `"<token>"`.
3514    Token,
3515}
3516
3517/// Serde default for [`RegistryAuth::auth_type`]. Kept as a free function so
3518/// `#[serde(default = "...")]` can reference it.
3519#[must_use]
3520pub fn default_registry_auth_type() -> RegistryAuthType {
3521    RegistryAuthType::Basic
3522}
3523
3524// ==========================================================================
3525// Container restart policy (Docker-style) — §3.4 of ZLAYER_SDK_FIXES.md
3526// ==========================================================================
3527//
3528// Named `ContainerRestartPolicy` / `ContainerRestartKind` rather than
3529// `RestartPolicy` / `RestartKind` to avoid colliding with ZLayer's existing
3530// `PanicPolicy`/`PanicAction` types and to make the runtime-level (as opposed
3531// to panic-driven) nature of this policy explicit.
3532
3533/// Container-runtime-level restart policy.
3534///
3535/// Maps onto Docker's `HostConfig.RestartPolicy`. Distinct from
3536/// [`PanicPolicy`], which governs what `ZLayer` does in response to an
3537/// application panic (it does not set a Docker restart policy).
3538#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, utoipa::ToSchema)]
3539#[serde(rename_all = "snake_case", deny_unknown_fields)]
3540pub struct ContainerRestartPolicy {
3541    /// Which restart policy to apply.
3542    pub kind: ContainerRestartKind,
3543
3544    /// For `on_failure` only: maximum number of restart attempts before
3545    /// giving up. Ignored by other kinds. `None` means "retry forever".
3546    #[serde(default, skip_serializing_if = "Option::is_none")]
3547    pub max_attempts: Option<u32>,
3548
3549    /// Humantime-formatted delay between restarts (e.g. `"500ms"`,
3550    /// `"2s"`). Accepted for forward-compatibility but currently ignored
3551    /// by the Docker backend: bollard's `RestartPolicy` has no per-kind
3552    /// delay field. When set, the runtime emits a warning.
3553    #[serde(default, skip_serializing_if = "Option::is_none")]
3554    pub delay: Option<String>,
3555}
3556
3557/// Which flavor of container restart policy to apply.
3558#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, utoipa::ToSchema)]
3559#[serde(rename_all = "snake_case")]
3560pub enum ContainerRestartKind {
3561    /// Never restart (Docker's `"no"`).
3562    No,
3563    /// Always restart (Docker's `"always"`).
3564    Always,
3565    /// Restart unless the user explicitly stopped the container
3566    /// (Docker's `"unless-stopped"`).
3567    UnlessStopped,
3568    /// Restart only when the container exits with a non-zero code
3569    /// (Docker's `"on-failure"`). Respects `max_attempts`.
3570    OnFailure,
3571}
3572
3573// ==========================================================================
3574// Port mappings (Docker-style container port publishing)
3575// ==========================================================================
3576
3577/// Transport protocol for a published container port.
3578#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, utoipa::ToSchema)]
3579#[serde(rename_all = "snake_case")]
3580pub enum PortProtocol {
3581    /// TCP (default).
3582    Tcp,
3583    /// UDP.
3584    Udp,
3585}
3586
3587impl Default for PortProtocol {
3588    fn default() -> Self {
3589        default_port_protocol()
3590    }
3591}
3592
3593impl PortProtocol {
3594    /// Return the lowercase string form Docker uses in port-binding keys
3595    /// (e.g. `"tcp"` or `"udp"`).
3596    #[must_use]
3597    pub fn as_str(&self) -> &'static str {
3598        match self {
3599            PortProtocol::Tcp => "tcp",
3600            PortProtocol::Udp => "udp",
3601        }
3602    }
3603}
3604
3605fn default_port_protocol() -> PortProtocol {
3606    PortProtocol::Tcp
3607}
3608
3609fn default_host_ip() -> String {
3610    "0.0.0.0".to_string()
3611}
3612
3613/// A single host-to-container port publish rule (Docker's `-p`).
3614///
3615/// When `host_port` is `None` (or explicitly `Some(0)`), the container runtime
3616/// assigns an ephemeral host port. `host_ip` defaults to `"0.0.0.0"` to bind
3617/// on all interfaces.
3618#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, utoipa::ToSchema)]
3619#[serde(rename_all = "snake_case")]
3620pub struct PortMapping {
3621    /// Host port. `None` (or zero) means "assign an ephemeral port".
3622    #[serde(default, skip_serializing_if = "Option::is_none")]
3623    pub host_port: Option<u16>,
3624    /// Container-side port.
3625    pub container_port: u16,
3626    /// Transport protocol (defaults to TCP).
3627    #[serde(default = "default_port_protocol")]
3628    pub protocol: PortProtocol,
3629    /// Host interface to bind on. Defaults to `"0.0.0.0"` (all interfaces).
3630    #[serde(default = "default_host_ip", skip_serializing_if = "String::is_empty")]
3631    pub host_ip: String,
3632}
3633
3634#[cfg(test)]
3635mod tests {
3636    use super::*;
3637
3638    #[test]
3639    fn runtime_isolation_serde_kebab_roundtrip() {
3640        // kebab-case serialization matches the strings the macOS composite matches on.
3641        assert_eq!(
3642            serde_json::to_string(&RuntimeIsolation::Auto).unwrap(),
3643            "\"auto\""
3644        );
3645        assert_eq!(
3646            serde_json::to_string(&RuntimeIsolation::Sandbox).unwrap(),
3647            "\"sandbox\""
3648        );
3649        assert_eq!(
3650            serde_json::to_string(&RuntimeIsolation::Vz).unwrap(),
3651            "\"vz\""
3652        );
3653        assert_eq!(
3654            serde_json::to_string(&RuntimeIsolation::VzLinux).unwrap(),
3655            "\"vz-linux\""
3656        );
3657        assert_eq!(
3658            serde_json::to_string(&RuntimeIsolation::Vm).unwrap(),
3659            "\"vm\""
3660        );
3661
3662        assert_eq!(
3663            serde_json::from_str::<RuntimeIsolation>("\"vz-linux\"").unwrap(),
3664            RuntimeIsolation::VzLinux
3665        );
3666        assert_eq!(
3667            serde_json::from_str::<RuntimeIsolation>("\"vm\"").unwrap(),
3668            RuntimeIsolation::Vm
3669        );
3670        assert_eq!(
3671            serde_json::from_str::<RuntimeIsolation>("\"sandbox\"").unwrap(),
3672            RuntimeIsolation::Sandbox
3673        );
3674    }
3675
3676    #[test]
3677    fn runtime_isolation_label_value_mapping() {
3678        assert_eq!(RuntimeIsolation::Auto.label_value(), None);
3679        assert_eq!(RuntimeIsolation::Sandbox.label_value(), Some("sandbox"));
3680        assert_eq!(RuntimeIsolation::Vz.label_value(), Some("vz"));
3681        assert_eq!(RuntimeIsolation::VzLinux.label_value(), Some("vz-linux"));
3682        assert_eq!(RuntimeIsolation::Vm.label_value(), Some("vm"));
3683    }
3684
3685    #[test]
3686    fn stream_endpoint_session_timeout_parses() {
3687        let cfg = StreamEndpointConfig {
3688            session_timeout: Some("5m".to_string()),
3689            ..Default::default()
3690        };
3691        assert_eq!(
3692            cfg.session_timeout_duration(),
3693            Some(std::time::Duration::from_secs(300))
3694        );
3695
3696        let cfg = StreamEndpointConfig {
3697            session_timeout: Some("90s".to_string()),
3698            ..Default::default()
3699        };
3700        assert_eq!(
3701            cfg.session_timeout_duration(),
3702            Some(std::time::Duration::from_secs(90))
3703        );
3704
3705        // Unset -> None.
3706        let cfg = StreamEndpointConfig::default();
3707        assert_eq!(cfg.session_timeout_duration(), None);
3708
3709        // Malformed -> None (falls back to proxy default).
3710        let cfg = StreamEndpointConfig {
3711            session_timeout: Some("not-a-duration".to_string()),
3712            ..Default::default()
3713        };
3714        assert_eq!(cfg.session_timeout_duration(), None);
3715    }
3716
3717    #[test]
3718    fn service_spec_secret_scope_survives_compat_roundtrip() {
3719        // The stamped secret scope MUST survive serialize -> deserialize
3720        // (which routes through ServiceSpecCompat + From), proving a restart
3721        // re-reading the persisted StoredDeployment JSON doesn't wipe it.
3722        let spec = ServiceSpec {
3723            secret_scope: Some(SecretScope::for_env(Some("zatabase"), "env-uuid-1")),
3724            ..Default::default()
3725        };
3726
3727        let json = serde_json::to_string(&spec).expect("serialize service spec");
3728        let round: ServiceSpec = serde_json::from_str(&json).expect("deserialize service spec");
3729
3730        assert_eq!(
3731            round.secret_scope,
3732            Some(SecretScope::for_env(Some("zatabase"), "env-uuid-1")),
3733            "secret_scope lost across the ServiceSpecCompat path:\n{json}"
3734        );
3735        assert_eq!(
3736            spec, round,
3737            "full service spec round-trip mismatch:\n{json}"
3738        );
3739    }
3740
3741    #[test]
3742    fn deployment_spec_environment_project_round_trip() {
3743        let spec = DeploymentSpec {
3744            version: "v1".to_string(),
3745            deployment: "my-deploy".to_string(),
3746            services: HashMap::new(),
3747            externals: HashMap::new(),
3748            tunnels: HashMap::new(),
3749            api: ApiSpec::default(),
3750            environment: Some("dev".to_string()),
3751            project: Some("zatabase".to_string()),
3752        };
3753
3754        let json = serde_json::to_string(&spec).expect("serialize deployment spec");
3755        let round: DeploymentSpec =
3756            serde_json::from_str(&json).expect("deserialize deployment spec");
3757
3758        assert_eq!(round.environment, Some("dev".to_string()));
3759        assert_eq!(round.project, Some("zatabase".to_string()));
3760        assert_eq!(spec, round, "deployment spec round-trip mismatch:\n{json}");
3761
3762        // Old stored JSON without the new fields must still deserialize
3763        // (deny_unknown_fields rejects EXTRA fields, not MISSING ones).
3764        let legacy = r#"{"version":"v1","deployment":"legacy"}"#;
3765        let parsed: DeploymentSpec =
3766            serde_json::from_str(legacy).expect("deserialize legacy deployment spec");
3767        assert!(parsed.environment.is_none());
3768        assert!(parsed.project.is_none());
3769    }
3770
3771    #[test]
3772    fn service_spec_default_round_trips_through_json() {
3773        // Building `ServiceSpec::default()` must succeed (no panics on the
3774        // placeholder image reference) and the result must round-trip through
3775        // serde_json so callers can store / transport a default spec without
3776        // surprises.
3777        let spec = ServiceSpec::default();
3778
3779        // Sanity on a handful of fields that depend on custom Default impls.
3780        assert_eq!(spec.rtype, ResourceType::Service);
3781        assert_eq!(spec.image.pull_policy, PullPolicy::IfNotPresent);
3782        assert_eq!(spec.health.retries, 3);
3783        assert_eq!(spec.network_mode, NetworkMode::Default);
3784        assert!(spec.env.is_empty());
3785        assert!(spec.endpoints.is_empty());
3786        assert!(spec.overlay.is_none());
3787
3788        let json = serde_json::to_string(&spec).expect("serialize default ServiceSpec");
3789        let parsed: ServiceSpec =
3790            serde_json::from_str(&json).expect("re-parse default ServiceSpec");
3791        assert_eq!(spec, parsed);
3792    }
3793
3794    #[test]
3795    fn service_spec_deployment_field_serde_round_trips() {
3796        // Absent `deployment` => None, and the field is omitted on serialize
3797        // (skip_serializing_if = Option::is_none) so the wire stays back-compat.
3798        let yaml_without = "image:\n  name: nginx:latest\n";
3799        let parsed: ServiceSpec =
3800            serde_yaml::from_str(yaml_without).expect("parse spec without deployment");
3801        assert_eq!(parsed.deployment, None);
3802        let reser = serde_json::to_string(&parsed).expect("serialize");
3803        assert!(
3804            !reser.contains("\"deployment\""),
3805            "absent deployment must not be serialized: {reser}"
3806        );
3807
3808        // Present `deployment` round-trips through both YAML-in and JSON.
3809        let yaml_with = "deployment: my-app\nimage:\n  name: nginx:latest\n";
3810        let parsed_with: ServiceSpec =
3811            serde_yaml::from_str(yaml_with).expect("parse spec with deployment");
3812        assert_eq!(parsed_with.deployment.as_deref(), Some("my-app"));
3813        let json = serde_json::to_string(&parsed_with).expect("serialize with deployment");
3814        let reparsed: ServiceSpec = serde_json::from_str(&json).expect("re-parse");
3815        assert_eq!(reparsed.deployment.as_deref(), Some("my-app"));
3816        assert_eq!(parsed_with, reparsed);
3817    }
3818
3819    #[test]
3820    fn service_spec_minimal_sets_name_and_image() {
3821        let spec = ServiceSpec::minimal("api", "ghcr.io/acme/api:1.2");
3822        assert_eq!(spec.image.name.repository(), "acme/api");
3823        assert_eq!(spec.image.name.tag(), Some("1.2"));
3824        // Everything else should match Default exactly.
3825        let baseline = ServiceSpec::default();
3826        assert_eq!(spec.rtype, baseline.rtype);
3827        assert_eq!(spec.scale, baseline.scale);
3828        assert_eq!(spec.network_mode, baseline.network_mode);
3829    }
3830
3831    #[test]
3832    fn port_mapping_defaults_via_serde() {
3833        // Minimal JSON: only container_port. host_port omitted, protocol defaults
3834        // to "tcp", host_ip defaults to "0.0.0.0".
3835        let json = r#"{"container_port": 8080}"#;
3836        let m: PortMapping = serde_json::from_str(json).expect("parse minimal PortMapping");
3837        assert_eq!(m.container_port, 8080);
3838        assert_eq!(m.host_port, None);
3839        assert_eq!(m.protocol, PortProtocol::Tcp);
3840        assert_eq!(m.host_ip, "0.0.0.0");
3841    }
3842
3843    #[test]
3844    fn port_mapping_skips_none_host_port_and_empty_host_ip() {
3845        let m = PortMapping {
3846            host_port: None,
3847            container_port: 443,
3848            protocol: PortProtocol::Tcp,
3849            host_ip: String::new(),
3850        };
3851        let s = serde_json::to_string(&m).expect("serialize");
3852        // host_port = None should be skipped, host_ip = "" should be skipped.
3853        assert!(!s.contains("host_port"), "host_port should be skipped: {s}");
3854        assert!(!s.contains("host_ip"), "host_ip should be skipped: {s}");
3855        assert!(s.contains("\"container_port\":443"));
3856        assert!(s.contains("\"protocol\":\"tcp\""));
3857    }
3858
3859    #[test]
3860    fn test_parse_simple_spec() {
3861        let yaml = r"
3862version: v1
3863deployment: test
3864services:
3865  hello:
3866    rtype: service
3867    image:
3868      name: hello-world:latest
3869    endpoints:
3870      - name: http
3871        protocol: http
3872        port: 8080
3873        expose: public
3874";
3875
3876        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
3877        assert_eq!(spec.version, "v1");
3878        assert_eq!(spec.deployment, "test");
3879        assert!(spec.services.contains_key("hello"));
3880    }
3881
3882    #[test]
3883    fn test_parse_duration() {
3884        let yaml = r"
3885version: v1
3886deployment: test
3887services:
3888  test:
3889    rtype: service
3890    image:
3891      name: test:latest
3892    health:
3893      timeout: 30s
3894      interval: 1m
3895      start_grace: 5s
3896      check:
3897        type: tcp
3898        port: 8080
3899";
3900
3901        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
3902        let health = &spec.services["test"].health;
3903        assert_eq!(health.timeout, Some(std::time::Duration::from_secs(30)));
3904        assert_eq!(health.interval, Some(std::time::Duration::from_secs(60)));
3905        assert_eq!(health.start_grace, Some(std::time::Duration::from_secs(5)));
3906        match &health.check {
3907            HealthCheck::Tcp { port } => assert_eq!(*port, 8080),
3908            _ => panic!("Expected TCP health check"),
3909        }
3910    }
3911
3912    #[test]
3913    fn test_parse_adaptive_scale() {
3914        let yaml = r"
3915version: v1
3916deployment: test
3917services:
3918  test:
3919    rtype: service
3920    image:
3921      name: test:latest
3922    scale:
3923      mode: adaptive
3924      min: 2
3925      max: 10
3926      cooldown: 15s
3927      targets:
3928        cpu: 70
3929        rps: 800
3930";
3931
3932        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
3933        let scale = &spec.services["test"].scale;
3934        match scale {
3935            ScaleSpec::Adaptive {
3936                min,
3937                max,
3938                cooldown,
3939                targets,
3940                ..
3941            } => {
3942                assert_eq!(*min, 2);
3943                assert_eq!(*max, 10);
3944                assert_eq!(*cooldown, Some(std::time::Duration::from_secs(15)));
3945                assert_eq!(targets.cpu, Some(70));
3946                assert_eq!(targets.rps, Some(800));
3947            }
3948            _ => panic!("Expected Adaptive scale mode"),
3949        }
3950    }
3951
3952    #[test]
3953    fn test_node_mode_default() {
3954        let yaml = r"
3955version: v1
3956deployment: test
3957services:
3958  hello:
3959    rtype: service
3960    image:
3961      name: hello-world:latest
3962";
3963
3964        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
3965        assert_eq!(spec.services["hello"].node_mode, NodeMode::Shared);
3966        assert!(spec.services["hello"].node_selector.is_none());
3967    }
3968
3969    #[test]
3970    fn test_node_mode_dedicated() {
3971        let yaml = r"
3972version: v1
3973deployment: test
3974services:
3975  api:
3976    rtype: service
3977    image:
3978      name: api:latest
3979    node_mode: dedicated
3980";
3981
3982        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
3983        assert_eq!(spec.services["api"].node_mode, NodeMode::Dedicated);
3984    }
3985
3986    #[test]
3987    fn test_node_mode_exclusive() {
3988        let yaml = r"
3989version: v1
3990deployment: test
3991services:
3992  database:
3993    rtype: service
3994    image:
3995      name: postgres:15
3996    node_mode: exclusive
3997";
3998
3999        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
4000        assert_eq!(spec.services["database"].node_mode, NodeMode::Exclusive);
4001    }
4002
4003    #[test]
4004    fn test_node_selector_with_labels() {
4005        let yaml = r#"
4006version: v1
4007deployment: test
4008services:
4009  ml-worker:
4010    rtype: service
4011    image:
4012      name: ml-worker:latest
4013    node_mode: dedicated
4014    node_selector:
4015      labels:
4016        gpu: "true"
4017        zone: us-east
4018      prefer_labels:
4019        storage: ssd
4020"#;
4021
4022        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
4023        let service = &spec.services["ml-worker"];
4024        assert_eq!(service.node_mode, NodeMode::Dedicated);
4025
4026        let selector = service.node_selector.as_ref().unwrap();
4027        assert_eq!(selector.labels.get("gpu"), Some(&"true".to_string()));
4028        assert_eq!(selector.labels.get("zone"), Some(&"us-east".to_string()));
4029        assert_eq!(
4030            selector.prefer_labels.get("storage"),
4031            Some(&"ssd".to_string())
4032        );
4033    }
4034
4035    #[test]
4036    fn test_node_mode_serialization_roundtrip() {
4037        use serde_json;
4038
4039        // Test all variants serialize/deserialize correctly
4040        let modes = [NodeMode::Shared, NodeMode::Dedicated, NodeMode::Exclusive];
4041        let expected_json = ["\"shared\"", "\"dedicated\"", "\"exclusive\""];
4042
4043        for (mode, expected) in modes.iter().zip(expected_json.iter()) {
4044            let json = serde_json::to_string(mode).unwrap();
4045            assert_eq!(&json, *expected, "Serialization failed for {mode:?}");
4046
4047            let deserialized: NodeMode = serde_json::from_str(&json).unwrap();
4048            assert_eq!(deserialized, *mode, "Roundtrip failed for {mode:?}");
4049        }
4050    }
4051
4052    #[test]
4053    fn test_node_selector_empty() {
4054        let yaml = r"
4055version: v1
4056deployment: test
4057services:
4058  api:
4059    rtype: service
4060    image:
4061      name: api:latest
4062    node_selector:
4063      labels: {}
4064";
4065
4066        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
4067        let selector = spec.services["api"].node_selector.as_ref().unwrap();
4068        assert!(selector.labels.is_empty());
4069        assert!(selector.prefer_labels.is_empty());
4070    }
4071
4072    #[test]
4073    fn test_mixed_node_modes_in_deployment() {
4074        let yaml = r"
4075version: v1
4076deployment: test
4077services:
4078  redis:
4079    rtype: service
4080    image:
4081      name: redis:alpine
4082    # Default shared mode
4083  api:
4084    rtype: service
4085    image:
4086      name: api:latest
4087    node_mode: dedicated
4088  database:
4089    rtype: service
4090    image:
4091      name: postgres:15
4092    node_mode: exclusive
4093    node_selector:
4094      labels:
4095        storage: ssd
4096";
4097
4098        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
4099        assert_eq!(spec.services["redis"].node_mode, NodeMode::Shared);
4100        assert_eq!(spec.services["api"].node_mode, NodeMode::Dedicated);
4101        assert_eq!(spec.services["database"].node_mode, NodeMode::Exclusive);
4102
4103        let db_selector = spec.services["database"].node_selector.as_ref().unwrap();
4104        assert_eq!(db_selector.labels.get("storage"), Some(&"ssd".to_string()));
4105    }
4106
4107    #[test]
4108    fn test_storage_bind_mount() {
4109        let yaml = r"
4110version: v1
4111deployment: test
4112services:
4113  app:
4114    image:
4115      name: app:latest
4116    storage:
4117      - type: bind
4118        source: /host/data
4119        target: /app/data
4120        readonly: true
4121";
4122        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
4123        let storage = &spec.services["app"].storage;
4124        assert_eq!(storage.len(), 1);
4125        match &storage[0] {
4126            StorageSpec::Bind {
4127                source,
4128                target,
4129                readonly,
4130            } => {
4131                assert_eq!(source, "/host/data");
4132                assert_eq!(target, "/app/data");
4133                assert!(*readonly);
4134            }
4135            _ => panic!("Expected Bind storage"),
4136        }
4137    }
4138
4139    #[test]
4140    fn test_storage_named_with_tier() {
4141        let yaml = r"
4142version: v1
4143deployment: test
4144services:
4145  app:
4146    image:
4147      name: app:latest
4148    storage:
4149      - type: named
4150        name: my-data
4151        target: /app/data
4152        tier: cached
4153";
4154        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
4155        let storage = &spec.services["app"].storage;
4156        match &storage[0] {
4157            StorageSpec::Named {
4158                name, target, tier, ..
4159            } => {
4160                assert_eq!(name, "my-data");
4161                assert_eq!(target, "/app/data");
4162                assert_eq!(*tier, StorageTier::Cached);
4163            }
4164            _ => panic!("Expected Named storage"),
4165        }
4166    }
4167
4168    #[test]
4169    fn test_storage_anonymous() {
4170        let yaml = r"
4171version: v1
4172deployment: test
4173services:
4174  app:
4175    image:
4176      name: app:latest
4177    storage:
4178      - type: anonymous
4179        target: /app/cache
4180";
4181        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
4182        let storage = &spec.services["app"].storage;
4183        match &storage[0] {
4184            StorageSpec::Anonymous { target, tier } => {
4185                assert_eq!(target, "/app/cache");
4186                assert_eq!(*tier, StorageTier::Local); // default
4187            }
4188            _ => panic!("Expected Anonymous storage"),
4189        }
4190    }
4191
4192    #[test]
4193    fn test_storage_tmpfs() {
4194        let yaml = r"
4195version: v1
4196deployment: test
4197services:
4198  app:
4199    image:
4200      name: app:latest
4201    storage:
4202      - type: tmpfs
4203        target: /app/tmp
4204        size: 256Mi
4205        mode: 1777
4206";
4207        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
4208        let storage = &spec.services["app"].storage;
4209        match &storage[0] {
4210            StorageSpec::Tmpfs { target, size, mode } => {
4211                assert_eq!(target, "/app/tmp");
4212                assert_eq!(size.as_deref(), Some("256Mi"));
4213                assert_eq!(*mode, Some(1777));
4214            }
4215            _ => panic!("Expected Tmpfs storage"),
4216        }
4217    }
4218
4219    #[test]
4220    fn test_storage_s3() {
4221        let yaml = r"
4222version: v1
4223deployment: test
4224services:
4225  app:
4226    image:
4227      name: app:latest
4228    storage:
4229      - type: s3
4230        bucket: my-bucket
4231        prefix: models/
4232        target: /app/models
4233        readonly: true
4234        endpoint: https://s3.us-west-2.amazonaws.com
4235        credentials: aws-creds
4236";
4237        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
4238        let storage = &spec.services["app"].storage;
4239        match &storage[0] {
4240            StorageSpec::S3 {
4241                bucket,
4242                prefix,
4243                target,
4244                readonly,
4245                endpoint,
4246                credentials,
4247            } => {
4248                assert_eq!(bucket, "my-bucket");
4249                assert_eq!(prefix.as_deref(), Some("models/"));
4250                assert_eq!(target, "/app/models");
4251                assert!(*readonly);
4252                assert_eq!(
4253                    endpoint.as_deref(),
4254                    Some("https://s3.us-west-2.amazonaws.com")
4255                );
4256                assert_eq!(credentials.as_deref(), Some("aws-creds"));
4257            }
4258            _ => panic!("Expected S3 storage"),
4259        }
4260    }
4261
4262    #[test]
4263    fn test_storage_multiple_types() {
4264        let yaml = r"
4265version: v1
4266deployment: test
4267services:
4268  app:
4269    image:
4270      name: app:latest
4271    storage:
4272      - type: bind
4273        source: /etc/config
4274        target: /app/config
4275        readonly: true
4276      - type: named
4277        name: app-data
4278        target: /app/data
4279      - type: tmpfs
4280        target: /app/tmp
4281";
4282        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
4283        let storage = &spec.services["app"].storage;
4284        assert_eq!(storage.len(), 3);
4285        assert!(matches!(&storage[0], StorageSpec::Bind { .. }));
4286        assert!(matches!(&storage[1], StorageSpec::Named { .. }));
4287        assert!(matches!(&storage[2], StorageSpec::Tmpfs { .. }));
4288    }
4289
4290    #[test]
4291    fn test_storage_tier_default() {
4292        let yaml = r"
4293version: v1
4294deployment: test
4295services:
4296  app:
4297    image:
4298      name: app:latest
4299    storage:
4300      - type: named
4301        name: data
4302        target: /data
4303";
4304        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
4305        match &spec.services["app"].storage[0] {
4306            StorageSpec::Named { tier, .. } => {
4307                assert_eq!(*tier, StorageTier::Local); // default should be Local
4308            }
4309            _ => panic!("Expected Named storage"),
4310        }
4311    }
4312
4313    // ==========================================================================
4314    // Tunnel configuration tests
4315    // ==========================================================================
4316
4317    #[test]
4318    fn test_endpoint_tunnel_config_basic() {
4319        let yaml = r"
4320version: v1
4321deployment: test
4322services:
4323  api:
4324    image:
4325      name: api:latest
4326    endpoints:
4327      - name: http
4328        protocol: http
4329        port: 8080
4330        tunnel:
4331          enabled: true
4332          remote_port: 8080
4333";
4334        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
4335        let endpoint = &spec.services["api"].endpoints[0];
4336        let tunnel = endpoint.tunnel.as_ref().unwrap();
4337        assert!(tunnel.enabled);
4338        assert_eq!(tunnel.remote_port, 8080);
4339        assert!(tunnel.from.is_none());
4340        assert!(tunnel.to.is_none());
4341    }
4342
4343    #[test]
4344    fn test_endpoint_tunnel_config_full() {
4345        let yaml = r"
4346version: v1
4347deployment: test
4348services:
4349  api:
4350    image:
4351      name: api:latest
4352    endpoints:
4353      - name: http
4354        protocol: http
4355        port: 8080
4356        tunnel:
4357          enabled: true
4358          from: node-1
4359          to: ingress-node
4360          remote_port: 9000
4361          expose: public
4362          access:
4363            enabled: true
4364            max_ttl: 4h
4365            audit: true
4366";
4367        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
4368        let endpoint = &spec.services["api"].endpoints[0];
4369        let tunnel = endpoint.tunnel.as_ref().unwrap();
4370        assert!(tunnel.enabled);
4371        assert_eq!(tunnel.from, Some("node-1".to_string()));
4372        assert_eq!(tunnel.to, Some("ingress-node".to_string()));
4373        assert_eq!(tunnel.remote_port, 9000);
4374        assert_eq!(tunnel.expose, Some(ExposeType::Public));
4375
4376        let access = tunnel.access.as_ref().unwrap();
4377        assert!(access.enabled);
4378        assert_eq!(access.max_ttl, Some("4h".to_string()));
4379        assert!(access.audit);
4380    }
4381
4382    #[test]
4383    fn test_top_level_tunnel_definition() {
4384        let yaml = r"
4385version: v1
4386deployment: test
4387services: {}
4388tunnels:
4389  db-tunnel:
4390    from: app-node
4391    to: db-node
4392    local_port: 5432
4393    remote_port: 5432
4394    protocol: tcp
4395    expose: internal
4396";
4397        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
4398        let tunnel = spec.tunnels.get("db-tunnel").unwrap();
4399        assert_eq!(tunnel.from, "app-node");
4400        assert_eq!(tunnel.to, "db-node");
4401        assert_eq!(tunnel.local_port, 5432);
4402        assert_eq!(tunnel.remote_port, 5432);
4403        assert_eq!(tunnel.protocol, TunnelProtocol::Tcp);
4404        assert_eq!(tunnel.expose, ExposeType::Internal);
4405    }
4406
4407    #[test]
4408    fn test_top_level_tunnel_defaults() {
4409        let yaml = r"
4410version: v1
4411deployment: test
4412services: {}
4413tunnels:
4414  simple-tunnel:
4415    from: node-a
4416    to: node-b
4417    local_port: 3000
4418    remote_port: 3000
4419";
4420        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
4421        let tunnel = spec.tunnels.get("simple-tunnel").unwrap();
4422        assert_eq!(tunnel.protocol, TunnelProtocol::Tcp); // default
4423        assert_eq!(tunnel.expose, ExposeType::Internal); // default
4424    }
4425
4426    #[test]
4427    fn test_tunnel_protocol_udp() {
4428        let yaml = r"
4429version: v1
4430deployment: test
4431services: {}
4432tunnels:
4433  udp-tunnel:
4434    from: node-a
4435    to: node-b
4436    local_port: 5353
4437    remote_port: 5353
4438    protocol: udp
4439";
4440        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
4441        let tunnel = spec.tunnels.get("udp-tunnel").unwrap();
4442        assert_eq!(tunnel.protocol, TunnelProtocol::Udp);
4443    }
4444
4445    #[test]
4446    fn test_endpoint_without_tunnel() {
4447        let yaml = r"
4448version: v1
4449deployment: test
4450services:
4451  api:
4452    image:
4453      name: api:latest
4454    endpoints:
4455      - name: http
4456        protocol: http
4457        port: 8080
4458";
4459        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
4460        let endpoint = &spec.services["api"].endpoints[0];
4461        assert!(endpoint.tunnel.is_none());
4462    }
4463
4464    #[test]
4465    fn test_deployment_without_tunnels() {
4466        let yaml = r"
4467version: v1
4468deployment: test
4469services:
4470  api:
4471    image:
4472      name: api:latest
4473";
4474        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
4475        assert!(spec.tunnels.is_empty());
4476    }
4477
4478    // ==========================================================================
4479    // ApiSpec tests
4480    // ==========================================================================
4481
4482    #[test]
4483    fn test_spec_without_api_block_uses_defaults() {
4484        let yaml = r"
4485version: v1
4486deployment: test
4487services:
4488  hello:
4489    image:
4490      name: hello-world:latest
4491";
4492        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
4493        assert!(spec.api.enabled);
4494        assert_eq!(spec.api.bind, "0.0.0.0:3669");
4495        assert!(spec.api.jwt_secret.is_none());
4496        assert!(spec.api.swagger);
4497    }
4498
4499    #[test]
4500    fn test_spec_with_explicit_api_block() {
4501        let yaml = r#"
4502version: v1
4503deployment: test
4504services:
4505  hello:
4506    image:
4507      name: hello-world:latest
4508api:
4509  enabled: false
4510  bind: "127.0.0.1:9090"
4511  jwt_secret: "my-secret"
4512  swagger: false
4513"#;
4514        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
4515        assert!(!spec.api.enabled);
4516        assert_eq!(spec.api.bind, "127.0.0.1:9090");
4517        assert_eq!(spec.api.jwt_secret, Some("my-secret".to_string()));
4518        assert!(!spec.api.swagger);
4519    }
4520
4521    #[test]
4522    fn test_spec_with_partial_api_block() {
4523        let yaml = r#"
4524version: v1
4525deployment: test
4526services:
4527  hello:
4528    image:
4529      name: hello-world:latest
4530api:
4531  bind: "0.0.0.0:3000"
4532"#;
4533        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
4534        assert!(spec.api.enabled); // default true
4535        assert_eq!(spec.api.bind, "0.0.0.0:3000");
4536        assert!(spec.api.jwt_secret.is_none()); // default None
4537        assert!(spec.api.swagger); // default true
4538    }
4539
4540    // ==========================================================================
4541    // NetworkPolicySpec tests
4542    // ==========================================================================
4543
4544    #[test]
4545    fn test_network_policy_spec_roundtrip() {
4546        let spec = NetworkPolicySpec {
4547            name: "corp-vpn".to_string(),
4548            description: Some("Corporate VPN network".to_string()),
4549            cidrs: vec!["10.200.0.0/16".to_string()],
4550            members: vec![
4551                NetworkMember {
4552                    name: "alice".to_string(),
4553                    kind: MemberKind::User,
4554                },
4555                NetworkMember {
4556                    name: "ops-team".to_string(),
4557                    kind: MemberKind::Group,
4558                },
4559                NetworkMember {
4560                    name: "node-01".to_string(),
4561                    kind: MemberKind::Node,
4562                },
4563            ],
4564            access_rules: vec![
4565                AccessRule {
4566                    service: "api-gateway".to_string(),
4567                    deployment: "*".to_string(),
4568                    ports: Some(vec![443, 8080]),
4569                    action: AccessAction::Allow,
4570                },
4571                AccessRule {
4572                    service: "*".to_string(),
4573                    deployment: "staging".to_string(),
4574                    ports: None,
4575                    action: AccessAction::Deny,
4576                },
4577            ],
4578        };
4579
4580        let yaml = serde_yaml::to_string(&spec).unwrap();
4581        let deserialized: NetworkPolicySpec = serde_yaml::from_str(&yaml).unwrap();
4582        assert_eq!(spec, deserialized);
4583    }
4584
4585    #[test]
4586    fn test_network_policy_spec_defaults() {
4587        let yaml = r"
4588name: minimal
4589";
4590        let spec: NetworkPolicySpec = serde_yaml::from_str(yaml).unwrap();
4591        assert_eq!(spec.name, "minimal");
4592        assert!(spec.description.is_none());
4593        assert!(spec.cidrs.is_empty());
4594        assert!(spec.members.is_empty());
4595        assert!(spec.access_rules.is_empty());
4596    }
4597
4598    #[test]
4599    fn test_access_rule_defaults() {
4600        let yaml = "{}";
4601        let rule: AccessRule = serde_yaml::from_str(yaml).unwrap();
4602        assert_eq!(rule.service, "*");
4603        assert_eq!(rule.deployment, "*");
4604        assert!(rule.ports.is_none());
4605        assert_eq!(rule.action, AccessAction::Allow);
4606    }
4607
4608    #[test]
4609    fn test_member_kind_defaults_to_user() {
4610        let yaml = r"
4611name: bob
4612";
4613        let member: NetworkMember = serde_yaml::from_str(yaml).unwrap();
4614        assert_eq!(member.name, "bob");
4615        assert_eq!(member.kind, MemberKind::User);
4616    }
4617
4618    #[test]
4619    fn test_member_kind_variants() {
4620        for (input, expected) in [
4621            ("user", MemberKind::User),
4622            ("group", MemberKind::Group),
4623            ("node", MemberKind::Node),
4624            ("cidr", MemberKind::Cidr),
4625        ] {
4626            let yaml = format!("name: test\nkind: {input}");
4627            let member: NetworkMember = serde_yaml::from_str(&yaml).unwrap();
4628            assert_eq!(member.kind, expected);
4629        }
4630    }
4631
4632    #[test]
4633    fn test_access_action_variants() {
4634        // Test via a wrapper struct since bare enums need a YAML tag
4635        #[derive(Debug, Deserialize)]
4636        struct Wrapper {
4637            action: AccessAction,
4638        }
4639
4640        let allow: Wrapper = serde_yaml::from_str("action: allow").unwrap();
4641        let deny: Wrapper = serde_yaml::from_str("action: deny").unwrap();
4642
4643        assert_eq!(allow.action, AccessAction::Allow);
4644        assert_eq!(deny.action, AccessAction::Deny);
4645    }
4646
4647    #[test]
4648    fn test_network_policy_spec_default_impl() {
4649        let spec = NetworkPolicySpec::default();
4650        assert_eq!(spec.name, "");
4651        assert!(spec.description.is_none());
4652        assert!(spec.cidrs.is_empty());
4653        assert!(spec.members.is_empty());
4654        assert!(spec.access_rules.is_empty());
4655    }
4656
4657    #[test]
4658    fn container_restart_policy_serde_roundtrip_all_kinds() {
4659        // Exercise every `ContainerRestartKind` variant via a JSON roundtrip.
4660        // Covers the `snake_case` rename (`unless_stopped`, `on_failure`) and
4661        // the optional `max_attempts` / `delay` fields. Validates the wire
4662        // format the API will expose under `/v1/containers`.
4663        let cases = [
4664            (
4665                ContainerRestartPolicy {
4666                    kind: ContainerRestartKind::No,
4667                    max_attempts: None,
4668                    delay: None,
4669                },
4670                r#"{"kind":"no"}"#,
4671            ),
4672            (
4673                ContainerRestartPolicy {
4674                    kind: ContainerRestartKind::Always,
4675                    max_attempts: None,
4676                    delay: Some("500ms".to_string()),
4677                },
4678                r#"{"kind":"always","delay":"500ms"}"#,
4679            ),
4680            (
4681                ContainerRestartPolicy {
4682                    kind: ContainerRestartKind::UnlessStopped,
4683                    max_attempts: None,
4684                    delay: None,
4685                },
4686                r#"{"kind":"unless_stopped"}"#,
4687            ),
4688            (
4689                ContainerRestartPolicy {
4690                    kind: ContainerRestartKind::OnFailure,
4691                    max_attempts: Some(5),
4692                    delay: None,
4693                },
4694                r#"{"kind":"on_failure","max_attempts":5}"#,
4695            ),
4696        ];
4697
4698        for (value, expected_json) in &cases {
4699            let serialized = serde_json::to_string(value).expect("serialize");
4700            assert_eq!(&serialized, expected_json, "serialize mismatch");
4701            let round: ContainerRestartPolicy =
4702                serde_json::from_str(&serialized).expect("deserialize");
4703            assert_eq!(&round, value, "roundtrip mismatch");
4704        }
4705    }
4706
4707    // -- §3.10: RegistryAuth ------------------------------------------------
4708
4709    #[test]
4710    fn registry_auth_type_serializes_snake_case() {
4711        assert_eq!(
4712            serde_json::to_string(&RegistryAuthType::Basic).unwrap(),
4713            "\"basic\""
4714        );
4715        assert_eq!(
4716            serde_json::to_string(&RegistryAuthType::Token).unwrap(),
4717            "\"token\""
4718        );
4719    }
4720
4721    #[test]
4722    fn registry_auth_default_auth_type_is_basic() {
4723        // When `auth_type` is omitted on the wire, the serde default kicks in.
4724        let json = r#"{"username":"u","password":"p"}"#;
4725        let parsed: RegistryAuth = serde_json::from_str(json).expect("parse");
4726        assert_eq!(parsed.auth_type, RegistryAuthType::Basic);
4727        assert_eq!(parsed.username, "u");
4728        assert_eq!(parsed.password, "p");
4729    }
4730
4731    #[test]
4732    fn registry_auth_serde_roundtrip_both_variants() {
4733        for variant in [RegistryAuthType::Basic, RegistryAuthType::Token] {
4734            let cred = RegistryAuth {
4735                username: "ci-bot".to_string(),
4736                password: "s3cret".to_string(),
4737                auth_type: variant,
4738            };
4739            let serialized = serde_json::to_string(&cred).expect("serialize");
4740            let back: RegistryAuth = serde_json::from_str(&serialized).expect("deserialize");
4741            assert_eq!(back, cred, "roundtrip mismatch for {variant:?}");
4742        }
4743    }
4744
4745    #[test]
4746    fn registry_auth_explicit_token_type_parses() {
4747        let json = r#"{"username":"oauth2accesstoken","password":"ghp_abc","auth_type":"token"}"#;
4748        let parsed: RegistryAuth = serde_json::from_str(json).expect("parse");
4749        assert_eq!(parsed.auth_type, RegistryAuthType::Token);
4750    }
4751
4752    #[test]
4753    fn target_platform_as_oci_str() {
4754        assert_eq!(
4755            TargetPlatform::new(OsKind::Linux, ArchKind::Amd64).as_oci_str(),
4756            "linux/amd64"
4757        );
4758        assert_eq!(
4759            TargetPlatform::new(OsKind::Windows, ArchKind::Arm64).as_oci_str(),
4760            "windows/arm64"
4761        );
4762        assert_eq!(
4763            TargetPlatform::new(OsKind::Macos, ArchKind::Arm64).as_oci_str(),
4764            "darwin/arm64"
4765        );
4766    }
4767
4768    #[test]
4769    fn os_kind_from_rust_consts() {
4770        assert_eq!(OsKind::from_rust_os("linux"), Some(OsKind::Linux));
4771        assert_eq!(OsKind::from_rust_os("windows"), Some(OsKind::Windows));
4772        assert_eq!(OsKind::from_rust_os("macos"), Some(OsKind::Macos));
4773        assert_eq!(OsKind::from_rust_os("freebsd"), None);
4774    }
4775
4776    #[test]
4777    fn arch_kind_from_rust_consts() {
4778        assert_eq!(ArchKind::from_rust_arch("x86_64"), Some(ArchKind::Amd64));
4779        assert_eq!(ArchKind::from_rust_arch("aarch64"), Some(ArchKind::Arm64));
4780        assert_eq!(ArchKind::from_rust_arch("riscv64"), None);
4781    }
4782
4783    #[test]
4784    fn service_spec_platform_yaml_round_trip_none() {
4785        // Omitting `platform` from YAML should deserialize as None without error,
4786        // even though ServiceSpec has `#[serde(deny_unknown_fields)]`.
4787        let yaml = r"
4788version: v1
4789deployment: test
4790services:
4791  app:
4792    rtype: service
4793    image:
4794      name: nginx:latest
4795";
4796        let spec: DeploymentSpec = serde_yaml::from_str(yaml).expect("yaml parse");
4797        assert!(spec.services["app"].platform.is_none());
4798    }
4799
4800    #[test]
4801    fn service_spec_platform_yaml_round_trip_some() {
4802        let yaml = r"
4803version: v1
4804deployment: test
4805services:
4806  app:
4807    rtype: service
4808    image:
4809      name: nginx:latest
4810    platform:
4811      os: windows
4812      arch: amd64
4813";
4814        let spec: DeploymentSpec = serde_yaml::from_str(yaml).expect("yaml parse");
4815        assert_eq!(
4816            spec.services["app"].platform,
4817            Some(TargetPlatform::new(OsKind::Windows, ArchKind::Amd64))
4818        );
4819    }
4820
4821    #[test]
4822    fn service_spec_platform_serializes_omitted_when_none() {
4823        // Build a minimal ServiceSpec via YAML to avoid enumerating every field
4824        // (ServiceSpec has no Default impl and no named-struct helper).
4825        let yaml = r"
4826version: v1
4827deployment: test
4828services:
4829  app:
4830    rtype: service
4831    image:
4832      name: nginx:latest
4833";
4834        let mut spec: DeploymentSpec = serde_yaml::from_str(yaml).expect("yaml parse");
4835        let service = spec.services.get_mut("app").expect("service present");
4836        service.platform = None;
4837        let rendered = serde_yaml::to_string(service).expect("render");
4838        assert!(
4839            !rendered.contains("platform"),
4840            "platform must be omitted when None: {rendered}"
4841        );
4842    }
4843
4844    #[test]
4845    fn target_platform_os_version_builder() {
4846        let p =
4847            TargetPlatform::new(OsKind::Windows, ArchKind::Amd64).with_os_version("10.0.26100.1");
4848        assert_eq!(p.os_version.as_deref(), Some("10.0.26100.1"));
4849        assert_eq!(p.os, OsKind::Windows);
4850        assert_eq!(p.arch, ArchKind::Amd64);
4851    }
4852
4853    #[test]
4854    fn target_platform_os_version_yaml_roundtrip() {
4855        let yaml = "os: windows\narch: amd64\nosVersion: 10.0.26100.1\n";
4856        let p: TargetPlatform = serde_yaml::from_str(yaml).expect("yaml parse");
4857        assert_eq!(p.os_version.as_deref(), Some("10.0.26100.1"));
4858        assert_eq!(p.os, OsKind::Windows);
4859        assert_eq!(p.arch, ArchKind::Amd64);
4860    }
4861
4862    #[test]
4863    fn target_platform_os_version_yaml_omits_when_none() {
4864        let p = TargetPlatform::new(OsKind::Linux, ArchKind::Amd64);
4865        let rendered = serde_yaml::to_string(&p).expect("render");
4866        assert!(
4867            !rendered.contains("osVersion"),
4868            "osVersion must be omitted when None: {rendered}"
4869        );
4870    }
4871
4872    #[test]
4873    fn target_platform_as_detailed_str_includes_version() {
4874        let without = TargetPlatform::new(OsKind::Windows, ArchKind::Amd64).as_detailed_str();
4875        assert_eq!(without, "windows/amd64");
4876
4877        let with = TargetPlatform::new(OsKind::Windows, ArchKind::Amd64)
4878            .with_os_version("10.0.26100.1")
4879            .as_detailed_str();
4880        assert_eq!(with, "windows/amd64 (os.version=10.0.26100.1)");
4881    }
4882
4883    #[test]
4884    fn target_platform_display_ignores_version() {
4885        // Display deliberately stays terse so existing log lines don't change.
4886        let p =
4887            TargetPlatform::new(OsKind::Windows, ArchKind::Amd64).with_os_version("10.0.26100.1");
4888        assert_eq!(format!("{p}"), "windows/amd64");
4889    }
4890
4891    // ----------------------------------------------------------------------
4892    // Phase 1 Task 1.1: Docker-compat ServiceSpec/ResourcesSpec extensions.
4893    // ----------------------------------------------------------------------
4894
4895    /// Build a minimal-but-valid `ServiceSpec` for round-trip tests.
4896    fn fixture_service_spec_full() -> ServiceSpec {
4897        let yaml = r"
4898version: v1
4899deployment: phase1-task1
4900services:
4901  hello:
4902    rtype: service
4903    image:
4904      name: hello-world:latest
4905";
4906        let spec: DeploymentSpec = serde_yaml::from_str(yaml).expect("parse fixture");
4907        spec.services.get("hello").expect("hello service").clone()
4908    }
4909
4910    #[test]
4911    fn service_spec_round_trip_with_all_new_fields() {
4912        let mut spec = fixture_service_spec_full();
4913        spec.labels
4914            .insert("zlayer.team".to_string(), "platform".to_string());
4915        spec.user = Some("1000:1000".to_string());
4916        spec.stop_signal = Some("SIGTERM".to_string());
4917        spec.stop_grace_period = Some(std::time::Duration::from_secs(30));
4918        spec.sysctls
4919            .insert("net.core.somaxconn".to_string(), "1024".to_string());
4920        spec.ulimits.insert(
4921            "nofile".to_string(),
4922            UlimitSpec {
4923                soft: 65_536,
4924                hard: 65_536,
4925            },
4926        );
4927        spec.security_opt.push("no-new-privileges:true".to_string());
4928        spec.pid_mode = Some("host".to_string());
4929        spec.ipc_mode = Some("private".to_string());
4930        spec.network_mode = NetworkMode::Bridge {
4931            name: Some("custom-net".to_string()),
4932        };
4933        spec.cap_drop.push("NET_RAW".to_string());
4934        spec.extra_groups.push("docker".to_string());
4935        spec.read_only_root_fs = true;
4936        spec.init_container = Some(true);
4937        spec.resources.pids_limit = Some(2048);
4938        spec.resources.cpuset = Some("0-3".to_string());
4939        spec.resources.cpu_shares = Some(1024);
4940        spec.resources.memory_swap = Some("2Gi".to_string());
4941        spec.resources.memory_reservation = Some("256Mi".to_string());
4942        spec.resources.memory_swappiness = Some(10);
4943        spec.resources.oom_score_adj = Some(-500);
4944        spec.resources.oom_kill_disable = Some(false);
4945        spec.resources.blkio_weight = Some(500);
4946
4947        let yaml = serde_yaml::to_string(&spec).expect("serialize");
4948        let round: ServiceSpec = serde_yaml::from_str(&yaml).expect("deserialize");
4949        assert_eq!(spec, round, "round-trip mismatch:\n{yaml}");
4950    }
4951
4952    #[test]
4953    fn network_mode_string_form_round_trip() {
4954        let cases: &[(&str, NetworkMode)] = &[
4955            ("default", NetworkMode::Default),
4956            ("host", NetworkMode::Host),
4957            ("none", NetworkMode::None),
4958            ("bridge", NetworkMode::Bridge { name: None }),
4959            (
4960                "bridge:custom",
4961                NetworkMode::Bridge {
4962                    name: Some("custom".to_string()),
4963                },
4964            ),
4965            (
4966                "container:abc123",
4967                NetworkMode::Container {
4968                    id: "abc123".to_string(),
4969                },
4970            ),
4971        ];
4972
4973        for (input, expected) in cases {
4974            #[derive(Deserialize)]
4975            struct Wrap {
4976                #[serde(deserialize_with = "deserialize_network_mode")]
4977                m: NetworkMode,
4978            }
4979            let yaml = format!("m: \"{input}\"\n");
4980            let parsed: Wrap = serde_yaml::from_str(&yaml).expect("parse network mode");
4981            assert_eq!(&parsed.m, expected, "mismatch for {input}");
4982        }
4983    }
4984
4985    /// Regression: the deployment store persists records as `serde_json`, but the
4986    /// `network_mode` deserializer used to be hard-wired to `serde_yaml`. A struct
4987    /// variant (`Bridge { name: Some }` / `Container { id }`) — exactly what
4988    /// `zlayer run --network <name>` produces — serialized to a JSON map and then
4989    /// failed to read back with "invalid type: map, expected a `Value::Tagged`
4990    /// enum", poisoning the whole `deployments.db`. This test drives the field
4991    /// through `serde_json` (the real storage format) for every variant.
4992    #[test]
4993    fn network_mode_json_round_trip_all_variants() {
4994        #[derive(Serialize, Deserialize, PartialEq, Debug)]
4995        struct Wrap {
4996            #[serde(deserialize_with = "deserialize_network_mode")]
4997            m: NetworkMode,
4998        }
4999
5000        let cases = [
5001            NetworkMode::Default,
5002            NetworkMode::Host,
5003            NetworkMode::None,
5004            NetworkMode::Bridge { name: None },
5005            NetworkMode::Bridge {
5006                name: Some("custom-net".to_string()),
5007            },
5008            NetworkMode::Container {
5009                id: "abc123".to_string(),
5010            },
5011        ];
5012
5013        for m in cases {
5014            let wrap = Wrap { m: m.clone() };
5015            let json = serde_json::to_string(&wrap).expect("serialize json");
5016            let round: Wrap = serde_json::from_str(&json).expect("deserialize json");
5017            assert_eq!(round.m, m, "json round-trip mismatch:\n{json}");
5018        }
5019    }
5020
5021    /// Regression: a full `ServiceSpec` carrying a struct-variant `network_mode`
5022    /// must survive the `serde_json` round-trip the storage layer performs.
5023    #[test]
5024    fn service_spec_json_round_trip_with_struct_network_mode() {
5025        let spec = ServiceSpec {
5026            network_mode: NetworkMode::Bridge {
5027                name: Some("custom-net".to_string()),
5028            },
5029            ..Default::default()
5030        };
5031
5032        let json = serde_json::to_string(&spec).expect("serialize json");
5033        let round: ServiceSpec = serde_json::from_str(&json).expect("deserialize json");
5034        assert_eq!(spec, round, "json round-trip mismatch:\n{json}");
5035    }
5036
5037    #[test]
5038    fn ulimit_spec_round_trip() {
5039        let u = UlimitSpec {
5040            soft: 1024,
5041            hard: 65_536,
5042        };
5043        let yaml = serde_yaml::to_string(&u).expect("serialize");
5044        let parsed: UlimitSpec = serde_yaml::from_str(&yaml).expect("parse");
5045        assert_eq!(u, parsed);
5046    }
5047
5048    #[test]
5049    fn ulimit_spec_full_form() {
5050        let parsed: UlimitSpec =
5051            serde_yaml::from_str("soft: 100000\nhard: 200000\n").expect("parse");
5052        assert_eq!(
5053            parsed,
5054            UlimitSpec {
5055                soft: 100_000,
5056                hard: 200_000,
5057            }
5058        );
5059    }
5060
5061    #[test]
5062    fn ulimit_spec_soft_only_defaults_hard_to_soft() {
5063        // The reported footgun: `{ nofile: { soft: 100000 } }` must NOT yield
5064        // hard = 0 (which is < soft and breaks setrlimit). Mirror Docker:
5065        // omitted hard defaults to soft.
5066        let parsed: UlimitSpec = serde_yaml::from_str("soft: 100000\n").expect("parse");
5067        assert_eq!(
5068            parsed,
5069            UlimitSpec {
5070                soft: 100_000,
5071                hard: 100_000,
5072            }
5073        );
5074    }
5075
5076    #[test]
5077    fn ulimit_spec_hard_only_defaults_soft_to_hard() {
5078        // A lone bound applies to both; never produces soft > hard.
5079        let parsed: UlimitSpec = serde_yaml::from_str("hard: 100000\n").expect("parse");
5080        assert_eq!(
5081            parsed,
5082            UlimitSpec {
5083                soft: 100_000,
5084                hard: 100_000,
5085            }
5086        );
5087    }
5088
5089    #[test]
5090    fn ulimit_spec_both_absent_is_zero() {
5091        let parsed: UlimitSpec = serde_yaml::from_str("{}\n").expect("parse");
5092        assert_eq!(parsed, UlimitSpec { soft: 0, hard: 0 });
5093    }
5094
5095    #[test]
5096    fn ulimit_spec_explicit_zero_hard_is_preserved() {
5097        // An explicit `hard: 0` is distinct from an omitted hard and is kept
5098        // as-is (the caller asked for an unlimited/zero hard cap on purpose).
5099        let parsed: UlimitSpec = serde_yaml::from_str("soft: 100000\nhard: 0\n").expect("parse");
5100        assert_eq!(
5101            parsed,
5102            UlimitSpec {
5103                soft: 100_000,
5104                hard: 0,
5105            }
5106        );
5107    }
5108
5109    #[test]
5110    fn ulimit_spec_in_service_map_soft_only() {
5111        // End-to-end through the ServiceSpec.ulimits map, matching the YAML
5112        // shape `ulimits: { nofile: { soft: 100000 } }`.
5113        #[derive(Deserialize)]
5114        struct Wrap {
5115            ulimits: std::collections::HashMap<String, UlimitSpec>,
5116        }
5117        let yaml = r"
5118ulimits:
5119  nofile:
5120    soft: 100000
5121";
5122        let parsed: Wrap = serde_yaml::from_str(yaml).expect("parse");
5123        assert_eq!(
5124            parsed.ulimits.get("nofile"),
5125            Some(&UlimitSpec {
5126                soft: 100_000,
5127                hard: 100_000,
5128            })
5129        );
5130    }
5131
5132    #[test]
5133    fn host_network_true_yaml_promotes_to_network_mode_host() {
5134        let yaml = r"
5135version: v1
5136deployment: bc-test
5137services:
5138  hello:
5139    rtype: service
5140    image:
5141      name: hello-world:latest
5142    host_network: true
5143";
5144        let dep: DeploymentSpec = serde_yaml::from_str(yaml).expect("parse");
5145        let svc = dep.services.get("hello").expect("hello service");
5146        assert_eq!(svc.network_mode, NetworkMode::Host);
5147        // The legacy bool stays mirrored so in-process callers that still
5148        // read `host_network` continue to work.
5149        assert!(svc.host_network);
5150    }
5151
5152    #[test]
5153    fn capabilities_yaml_alias_cap_add_round_trip() {
5154        // Forward-compat: ZLayer keeps the field named `capabilities`, but the
5155        // Docker-style key `cap_add` must also deserialize into it.
5156        let yaml = r"
5157version: v1
5158deployment: cap-test
5159services:
5160  hello:
5161    rtype: service
5162    image:
5163      name: hello-world:latest
5164    cap_add:
5165      - NET_ADMIN
5166      - SYS_PTRACE
5167";
5168        let dep: DeploymentSpec = serde_yaml::from_str(yaml).expect("parse cap_add alias");
5169        let svc = dep.services.get("hello").expect("hello service");
5170        assert_eq!(
5171            svc.capabilities,
5172            vec!["NET_ADMIN".to_string(), "SYS_PTRACE".to_string()]
5173        );
5174    }
5175
5176    #[test]
5177    fn lifecycle_omitted_defaults_to_false() {
5178        // When `lifecycle` is absent from the YAML/JSON entirely, the
5179        // deserialized service must fall back to `LifecycleSpec::default()`,
5180        // i.e. `delete_on_exit: false` — the historical retain-on-exit
5181        // behavior. This guards against accidental policy flips when the
5182        // field is added to existing specs.
5183        let yaml = r"
5184version: v1
5185deployment: lifecycle-default-test
5186services:
5187  app:
5188    rtype: service
5189    image:
5190      name: hello-world:latest
5191";
5192        let dep: DeploymentSpec = serde_yaml::from_str(yaml).expect("parse spec without lifecycle");
5193        let svc = dep.services.get("app").expect("app service");
5194        assert_eq!(svc.lifecycle, LifecycleSpec::default());
5195        assert!(!svc.lifecycle.delete_on_exit);
5196    }
5197
5198    #[test]
5199    fn lifecycle_delete_on_exit_round_trips() {
5200        // `lifecycle.delete_on_exit: true` must survive a full YAML
5201        // deserialize → serialize → deserialize cycle, and the explicit
5202        // value must propagate into the parsed `ServiceSpec`.
5203        let yaml = r"
5204version: v1
5205deployment: lifecycle-delete-test
5206services:
5207  app:
5208    rtype: service
5209    image:
5210      name: hello-world:latest
5211    lifecycle:
5212      delete_on_exit: true
5213";
5214        let dep: DeploymentSpec = serde_yaml::from_str(yaml).expect("parse spec with lifecycle");
5215        let svc = dep.services.get("app").expect("app service");
5216        assert!(svc.lifecycle.delete_on_exit);
5217
5218        // Round-trip via YAML to confirm Serialize emits the field and
5219        // Deserialize folds it back identically.
5220        let dumped = serde_yaml::to_string(&dep).expect("serialize spec with lifecycle");
5221        let reparsed: DeploymentSpec =
5222            serde_yaml::from_str(&dumped).expect("reparse round-tripped spec");
5223        let reparsed_svc = reparsed.services.get("app").expect("app service after rt");
5224        assert!(reparsed_svc.lifecycle.delete_on_exit);
5225        assert_eq!(svc.lifecycle, reparsed_svc.lifecycle);
5226    }
5227}
5228
5229#[cfg(test)]
5230mod replica_group_tests {
5231    use super::{
5232        validate_unique_replica_group_roles, EndpointSpec, GroupAffinity, LocalhostReachability,
5233        ReplicaGroup, ScaleSpec, ScaleTargets, ServiceSpec, REPLICA_GROUP_ROLE_RE,
5234    };
5235
5236    #[test]
5237    fn yaml_roundtrip_basic_group() {
5238        let yaml = r"
5239role: primary
5240count: 1
5241env:
5242  POSTGRES_REPLICATION_MODE: primary
5243affinity: spread
5244";
5245        let group: ReplicaGroup = serde_yaml::from_str(yaml).expect("parse basic group");
5246        assert_eq!(group.role, "primary");
5247        assert_eq!(group.count, 1);
5248        assert_eq!(group.affinity, GroupAffinity::Spread);
5249        assert_eq!(
5250            group.env.get("POSTGRES_REPLICATION_MODE"),
5251            Some(&"primary".to_string())
5252        );
5253    }
5254
5255    #[test]
5256    fn yaml_default_affinity_is_spread() {
5257        let yaml = "role: x\ncount: 2\n";
5258        let group: ReplicaGroup = serde_yaml::from_str(yaml).expect("parse minimal group");
5259        assert_eq!(group.affinity, GroupAffinity::Spread);
5260    }
5261
5262    #[test]
5263    fn role_regex_accepts_valid_labels() {
5264        for ok in ["a", "primary", "read-only", "x1", "ab-cd-ef"] {
5265            assert!(
5266                REPLICA_GROUP_ROLE_RE.is_match(ok),
5267                "regex should accept: {ok}"
5268            );
5269        }
5270    }
5271
5272    #[test]
5273    fn role_regex_rejects_invalid_labels() {
5274        for bad in [
5275            "",
5276            "-primary",
5277            "primary-",
5278            "Primary",
5279            "0primary",
5280            "primary_role",
5281            "this-is-way-too-long-of-a-role-name-here",
5282        ] {
5283            assert!(
5284                !REPLICA_GROUP_ROLE_RE.is_match(bad),
5285                "regex should reject: {bad}"
5286            );
5287        }
5288    }
5289
5290    #[test]
5291    fn group_affinity_pin_roundtrips_via_serde_yaml() {
5292        // Externally-tagged enum with a single string payload serializes as
5293        // a mapping `pin: <value>` under snake_case naming.
5294        let pinned = GroupAffinity::Pin("id=2".to_string());
5295        let dumped = serde_yaml::to_string(&pinned).expect("serialize pin");
5296        let reparsed: GroupAffinity = serde_yaml::from_str(&dumped).expect("reparse pin");
5297        match reparsed {
5298            GroupAffinity::Pin(s) => assert_eq!(s, "id=2"),
5299            other => panic!("expected Pin, got {other:?}"),
5300        }
5301    }
5302
5303    #[test]
5304    fn unique_role_validator_rejects_duplicates() {
5305        let mk = |role: &str| ReplicaGroup {
5306            role: role.to_string(),
5307            count: 1,
5308            image: None,
5309            env: std::collections::HashMap::new(),
5310            command: None,
5311            resources: None,
5312            affinity: GroupAffinity::Spread,
5313        };
5314        assert!(validate_unique_replica_group_roles(&[mk("a"), mk("b")]).is_ok());
5315        let err = validate_unique_replica_group_roles(&[mk("a"), mk("a")])
5316            .expect_err("duplicate should fail");
5317        assert_eq!(err, "a");
5318    }
5319
5320    #[test]
5321    fn endpoint_target_role_yaml_roundtrip() {
5322        let yaml = "name: read\nprotocol: tcp\nport: 5433\ntarget_role: read\n";
5323        let ep: EndpointSpec = serde_yaml::from_str(yaml).unwrap();
5324        assert_eq!(ep.target_role, Some("read".to_string()));
5325    }
5326
5327    #[test]
5328    fn endpoint_without_target_role_is_none() {
5329        let yaml = "name: any\nprotocol: tcp\nport: 5432\n";
5330        let ep: EndpointSpec = serde_yaml::from_str(yaml).unwrap();
5331        assert_eq!(ep.target_role, None);
5332    }
5333
5334    // ==========================================================================
5335    // LocalhostReachability / single-member publishing tests
5336    // ==========================================================================
5337
5338    fn spec_with_scale(scale: ScaleSpec) -> ServiceSpec {
5339        let mut s = ServiceSpec::minimal("svc", "scratch:latest");
5340        s.scale = scale;
5341        s
5342    }
5343
5344    fn replica_group(role: &str, count: u32) -> ReplicaGroup {
5345        ReplicaGroup {
5346            role: role.to_string(),
5347            count,
5348            image: None,
5349            env: std::collections::HashMap::new(),
5350            command: None,
5351            resources: None,
5352            affinity: GroupAffinity::Spread,
5353        }
5354    }
5355
5356    #[test]
5357    fn is_single_member_across_scale_modes() {
5358        assert!(spec_with_scale(ScaleSpec::Fixed { replicas: 1 }).is_single_member());
5359        assert!(spec_with_scale(ScaleSpec::Fixed { replicas: 0 }).is_single_member());
5360        assert!(!spec_with_scale(ScaleSpec::Fixed { replicas: 3 }).is_single_member());
5361
5362        let adaptive = |min, max| ScaleSpec::Adaptive {
5363            min,
5364            max,
5365            cooldown: None,
5366            targets: ScaleTargets::default(),
5367            behavior: None,
5368            triggers: Vec::new(),
5369            idle_window: None,
5370            vertical: None,
5371            predictive: None,
5372        };
5373        assert!(spec_with_scale(adaptive(1, 1)).is_single_member());
5374        assert!(!spec_with_scale(adaptive(1, 5)).is_single_member());
5375
5376        assert!(spec_with_scale(ScaleSpec::Manual).is_single_member());
5377    }
5378
5379    #[test]
5380    fn is_single_member_with_replica_groups() {
5381        // One group, total 1 -> single member.
5382        let mut s = ServiceSpec::minimal("svc", "scratch:latest");
5383        s.replica_groups = Some(vec![replica_group("only", 1)]);
5384        assert!(s.is_single_member());
5385
5386        // One group, total 2 -> multi member.
5387        s.replica_groups = Some(vec![replica_group("only", 2)]);
5388        assert!(!s.is_single_member());
5389
5390        // Two groups, total 2 -> multi member.
5391        s.replica_groups = Some(vec![replica_group("a", 1), replica_group("b", 1)]);
5392        assert!(!s.is_single_member());
5393
5394        // replica_groups takes precedence over scale.
5395        s.scale = ScaleSpec::Fixed { replicas: 1 };
5396        s.replica_groups = Some(vec![replica_group("a", 1), replica_group("b", 1)]);
5397        assert!(!s.is_single_member());
5398    }
5399
5400    #[test]
5401    fn publish_to_node_loopback_override_matrix() {
5402        // Single-member base spec.
5403        let single = spec_with_scale(ScaleSpec::Fixed { replicas: 1 });
5404        // Multi-member base spec.
5405        let multi = spec_with_scale(ScaleSpec::Fixed { replicas: 3 });
5406
5407        // Auto: follows single-member-ness.
5408        let mut s = single.clone();
5409        s.localhost_reachability = LocalhostReachability::Auto;
5410        assert!(s.publish_to_node_loopback());
5411        let mut m = multi.clone();
5412        m.localhost_reachability = LocalhostReachability::Auto;
5413        assert!(!m.publish_to_node_loopback());
5414
5415        // Always: publishes regardless of member count.
5416        let mut s = single.clone();
5417        s.localhost_reachability = LocalhostReachability::Always;
5418        assert!(s.publish_to_node_loopback());
5419        let mut m = multi.clone();
5420        m.localhost_reachability = LocalhostReachability::Always;
5421        assert!(m.publish_to_node_loopback());
5422
5423        // Never: never publishes regardless of member count.
5424        let mut s = single;
5425        s.localhost_reachability = LocalhostReachability::Never;
5426        assert!(!s.publish_to_node_loopback());
5427        let mut m = multi;
5428        m.localhost_reachability = LocalhostReachability::Never;
5429        assert!(!m.publish_to_node_loopback());
5430    }
5431
5432    #[test]
5433    fn localhost_reachability_default_is_auto() {
5434        assert_eq!(
5435            LocalhostReachability::default(),
5436            LocalhostReachability::Auto
5437        );
5438        assert!(LocalhostReachability::Auto.is_default());
5439        assert!(!LocalhostReachability::Always.is_default());
5440        assert!(!LocalhostReachability::Never.is_default());
5441        // A minimal spec defaults to Auto reachability, but the default scale
5442        // is Adaptive { max: 10 } (multi-member), so Auto does NOT publish.
5443        let minimal = ServiceSpec::minimal("svc", "scratch:latest");
5444        assert_eq!(minimal.localhost_reachability, LocalhostReachability::Auto);
5445        assert!(!minimal.is_single_member());
5446        assert!(!minimal.publish_to_node_loopback());
5447    }
5448}