Skip to main content

zlayer_spec/
types.rs

1//! `ZLayer` V1 Service Specification Types
2//!
3//! This module defines all types for parsing and validating `ZLayer` deployment specs.
4
5mod duration {
6    use humantime::format_duration;
7    use serde::{Deserialize, Deserializer, Serializer};
8    use std::time::Duration;
9
10    #[allow(clippy::ref_option)]
11    pub fn serialize<S>(duration: &Option<Duration>, serializer: S) -> Result<S::Ok, S::Error>
12    where
13        S: Serializer,
14    {
15        match duration {
16            Some(d) => serializer.serialize_str(&format_duration(*d).to_string()),
17            None => serializer.serialize_none(),
18        }
19    }
20
21    pub fn deserialize<'de, D>(deserializer: D) -> Result<Option<Duration>, D::Error>
22    where
23        D: Deserializer<'de>,
24    {
25        use serde::de::Error;
26        let s: Option<String> = Option::deserialize(deserializer)?;
27        match s {
28            Some(s) => humantime::parse_duration(&s)
29                .map(Some)
30                .map_err(|e| D::Error::custom(format!("invalid duration: {e}"))),
31            None => Ok(None),
32        }
33    }
34
35    pub mod option {
36        pub use super::*;
37    }
38
39    /// Serde module for required (non-Option) Duration fields
40    pub mod required {
41        use humantime::format_duration;
42        use serde::{Deserialize, Deserializer, Serializer};
43        use std::time::Duration;
44
45        pub fn serialize<S>(duration: &Duration, serializer: S) -> Result<S::Ok, S::Error>
46        where
47            S: Serializer,
48        {
49            serializer.serialize_str(&format_duration(*duration).to_string())
50        }
51
52        pub fn deserialize<'de, D>(deserializer: D) -> Result<Duration, D::Error>
53        where
54            D: Deserializer<'de>,
55        {
56            use serde::de::Error;
57            let s: String = String::deserialize(deserializer)?;
58            humantime::parse_duration(&s)
59                .map_err(|e| D::Error::custom(format!("invalid duration: {e}")))
60        }
61    }
62}
63
64use serde::{Deserialize, Serialize};
65use std::collections::HashMap;
66use validator::Validate;
67
68/// How service replicas are allocated to nodes
69#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
70#[serde(rename_all = "snake_case")]
71pub enum NodeMode {
72    /// Containers placed on any node with capacity (default, bin-packing)
73    #[default]
74    Shared,
75    /// Each replica gets its own dedicated node (1:1 mapping)
76    Dedicated,
77    /// Service is the ONLY thing on its nodes (no other services)
78    Exclusive,
79}
80
81/// Service type - determines runtime behavior and scaling model
82#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
83#[serde(rename_all = "snake_case")]
84pub enum ServiceType {
85    /// Standard long-running container service
86    #[default]
87    Standard,
88    /// WASM-based HTTP service (wasi:http/incoming-handler)
89    WasmHttp,
90    /// WASM-based general plugin (zlayer:plugin handler - full host access)
91    WasmPlugin,
92    /// WASM-based stateless request/response transformer
93    WasmTransformer,
94    /// WASM-based authenticator plugin (secrets + KV + HTTP)
95    WasmAuthenticator,
96    /// WASM-based rate limiter (KV + metrics)
97    WasmRateLimiter,
98    /// WASM-based request/response middleware
99    WasmMiddleware,
100    /// WASM-based custom router
101    WasmRouter,
102    /// Run-to-completion job
103    Job,
104}
105
106/// Storage performance tier
107#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
108#[serde(rename_all = "snake_case")]
109pub enum StorageTier {
110    /// Direct local filesystem (SSD/NVMe) - SQLite-safe, fast fsync
111    #[default]
112    Local,
113    /// bcache-backed tiered storage (SSD cache + slower backend)
114    Cached,
115    /// NFS/network storage - NOT SQLite-safe (will warn)
116    Network,
117}
118
119/// Node selection constraints for service placement
120#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
121#[serde(deny_unknown_fields)]
122pub struct NodeSelector {
123    /// Required labels that nodes must have (all must match)
124    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
125    pub labels: HashMap<String, String>,
126    /// Preferred labels (soft constraint, nodes with these are preferred)
127    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
128    pub prefer_labels: HashMap<String, String>,
129}
130
131/// Explicit capability declarations for WASM modules.
132/// Controls which host interfaces are linked and available to the component.
133#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
134#[serde(deny_unknown_fields)]
135#[allow(clippy::struct_excessive_bools)]
136pub struct WasmCapabilities {
137    /// Config interface access (zlayer:plugin/config)
138    #[serde(default = "default_true")]
139    pub config: bool,
140    /// Key-value storage access (zlayer:plugin/keyvalue)
141    #[serde(default = "default_true")]
142    pub keyvalue: bool,
143    /// Logging access (zlayer:plugin/logging)
144    #[serde(default = "default_true")]
145    pub logging: bool,
146    /// Secrets access (zlayer:plugin/secrets)
147    #[serde(default)]
148    pub secrets: bool,
149    /// Metrics emission (zlayer:plugin/metrics)
150    #[serde(default = "default_true")]
151    pub metrics: bool,
152    /// HTTP client for outgoing requests (wasi:http/outgoing-handler)
153    #[serde(default)]
154    pub http_client: bool,
155    /// WASI CLI access (args, env, stdio)
156    #[serde(default)]
157    pub cli: bool,
158    /// WASI filesystem access
159    #[serde(default)]
160    pub filesystem: bool,
161    /// WASI sockets access (TCP/UDP)
162    #[serde(default)]
163    pub sockets: bool,
164}
165
166impl Default for WasmCapabilities {
167    fn default() -> Self {
168        Self {
169            config: true,
170            keyvalue: true,
171            logging: true,
172            secrets: false,
173            metrics: true,
174            http_client: false,
175            cli: false,
176            filesystem: false,
177            sockets: false,
178        }
179    }
180}
181
182/// Pre-opened directory for WASM filesystem access
183#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
184#[serde(deny_unknown_fields)]
185pub struct WasmPreopen {
186    /// Host path to mount
187    pub source: String,
188    /// Guest path (visible to WASM module)
189    pub target: String,
190    /// Read-only access (default: false)
191    #[serde(default)]
192    pub readonly: bool,
193}
194
195/// Comprehensive configuration for all WASM service types.
196///
197/// Replaces the previous `WasmHttpConfig` with resource limits, capability
198/// declarations, networking controls, and storage configuration.
199#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
200#[serde(deny_unknown_fields)]
201#[allow(clippy::struct_excessive_bools)]
202pub struct WasmConfig {
203    // --- Instance Management ---
204    /// Minimum number of warm instances to keep ready
205    #[serde(default = "default_min_instances")]
206    pub min_instances: u32,
207    /// Maximum number of instances to scale to
208    #[serde(default = "default_max_instances")]
209    pub max_instances: u32,
210    /// Time before idle instances are terminated
211    #[serde(default = "default_idle_timeout", with = "duration::required")]
212    pub idle_timeout: std::time::Duration,
213    /// Maximum time for a single request
214    #[serde(default = "default_request_timeout", with = "duration::required")]
215    pub request_timeout: std::time::Duration,
216
217    // --- Resource Limits ---
218    /// Maximum linear memory (e.g., "64Mi", "256Mi")
219    #[serde(default, skip_serializing_if = "Option::is_none")]
220    pub max_memory: Option<String>,
221    /// Maximum fuel (instruction count limit, 0 = unlimited)
222    #[serde(default)]
223    pub max_fuel: u64,
224    /// Epoch interval for cooperative preemption
225    #[serde(
226        default,
227        skip_serializing_if = "Option::is_none",
228        with = "duration::option"
229    )]
230    pub epoch_interval: Option<std::time::Duration>,
231
232    // --- Capabilities ---
233    /// Explicit capability grants (overrides world defaults when restricting)
234    #[serde(default, skip_serializing_if = "Option::is_none")]
235    pub capabilities: Option<WasmCapabilities>,
236
237    // --- Networking ---
238    /// Allow outgoing HTTP requests (default: true)
239    #[serde(default = "default_true")]
240    pub allow_http_outgoing: bool,
241    /// Allowed outgoing HTTP hosts (empty = all allowed)
242    #[serde(default, skip_serializing_if = "Vec::is_empty")]
243    pub allowed_hosts: Vec<String>,
244    /// Allow raw TCP sockets (default: false)
245    #[serde(default)]
246    pub allow_tcp: bool,
247    /// Allow raw UDP sockets (default: false)
248    #[serde(default)]
249    pub allow_udp: bool,
250
251    // --- Storage ---
252    /// Pre-opened directories (host path -> guest path)
253    #[serde(default, skip_serializing_if = "Vec::is_empty")]
254    pub preopens: Vec<WasmPreopen>,
255    /// Enable KV store access (default: true)
256    #[serde(default = "default_true")]
257    pub kv_enabled: bool,
258    /// KV store namespace (default: service name)
259    #[serde(default, skip_serializing_if = "Option::is_none")]
260    pub kv_namespace: Option<String>,
261    /// KV store max value size in bytes (default: 1MB)
262    #[serde(default = "default_kv_max_value_size")]
263    pub kv_max_value_size: u64,
264
265    // --- Secrets ---
266    /// Secret names accessible to this WASM module
267    #[serde(default, skip_serializing_if = "Vec::is_empty")]
268    pub secrets: Vec<String>,
269
270    // --- Performance ---
271    /// Pre-compile on deploy to reduce cold start (default: true)
272    #[serde(default = "default_true")]
273    pub precompile: bool,
274}
275
276fn default_kv_max_value_size() -> u64 {
277    1_048_576 // 1MB
278}
279
280impl Default for WasmConfig {
281    fn default() -> Self {
282        Self {
283            min_instances: default_min_instances(),
284            max_instances: default_max_instances(),
285            idle_timeout: default_idle_timeout(),
286            request_timeout: default_request_timeout(),
287            max_memory: None,
288            max_fuel: 0,
289            epoch_interval: None,
290            capabilities: None,
291            allow_http_outgoing: true,
292            allowed_hosts: Vec::new(),
293            allow_tcp: false,
294            allow_udp: false,
295            preopens: Vec::new(),
296            kv_enabled: true,
297            kv_namespace: None,
298            kv_max_value_size: default_kv_max_value_size(),
299            secrets: Vec::new(),
300            precompile: true,
301        }
302    }
303}
304
305/// Configuration for WASM HTTP services with instance pooling
306#[deprecated(note = "Use WasmConfig instead")]
307#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
308#[serde(deny_unknown_fields)]
309pub struct WasmHttpConfig {
310    /// Minimum number of warm instances to keep ready
311    #[serde(default = "default_min_instances")]
312    pub min_instances: u32,
313    /// Maximum number of instances to scale to
314    #[serde(default = "default_max_instances")]
315    pub max_instances: u32,
316    /// Time before idle instances are terminated
317    #[serde(default = "default_idle_timeout", with = "duration::required")]
318    pub idle_timeout: std::time::Duration,
319    /// Maximum time for a single request
320    #[serde(default = "default_request_timeout", with = "duration::required")]
321    pub request_timeout: std::time::Duration,
322}
323
324fn default_min_instances() -> u32 {
325    0
326}
327
328fn default_max_instances() -> u32 {
329    10
330}
331
332fn default_idle_timeout() -> std::time::Duration {
333    std::time::Duration::from_secs(300)
334}
335
336fn default_request_timeout() -> std::time::Duration {
337    std::time::Duration::from_secs(30)
338}
339
340#[allow(deprecated)]
341impl Default for WasmHttpConfig {
342    fn default() -> Self {
343        Self {
344            min_instances: default_min_instances(),
345            max_instances: default_max_instances(),
346            idle_timeout: default_idle_timeout(),
347            request_timeout: default_request_timeout(),
348        }
349    }
350}
351
352#[allow(deprecated)]
353impl From<WasmHttpConfig> for WasmConfig {
354    fn from(old: WasmHttpConfig) -> Self {
355        Self {
356            min_instances: old.min_instances,
357            max_instances: old.max_instances,
358            idle_timeout: old.idle_timeout,
359            request_timeout: old.request_timeout,
360            ..Default::default()
361        }
362    }
363}
364
365impl ServiceType {
366    /// Returns true if this is any WASM service type
367    #[must_use]
368    pub fn is_wasm(&self) -> bool {
369        matches!(
370            self,
371            ServiceType::WasmHttp
372                | ServiceType::WasmPlugin
373                | ServiceType::WasmTransformer
374                | ServiceType::WasmAuthenticator
375                | ServiceType::WasmRateLimiter
376                | ServiceType::WasmMiddleware
377                | ServiceType::WasmRouter
378        )
379    }
380
381    /// Returns the default capabilities for this WASM service type.
382    /// Returns None for non-WASM types.
383    #[must_use]
384    pub fn default_wasm_capabilities(&self) -> Option<WasmCapabilities> {
385        match self {
386            ServiceType::WasmHttp | ServiceType::WasmRouter => Some(WasmCapabilities {
387                config: true,
388                keyvalue: true,
389                logging: true,
390                secrets: false,
391                metrics: false,
392                http_client: true,
393                cli: false,
394                filesystem: false,
395                sockets: false,
396            }),
397            ServiceType::WasmPlugin => Some(WasmCapabilities {
398                config: true,
399                keyvalue: true,
400                logging: true,
401                secrets: true,
402                metrics: true,
403                http_client: true,
404                cli: true,
405                filesystem: true,
406                sockets: false,
407            }),
408            ServiceType::WasmTransformer => Some(WasmCapabilities {
409                config: false,
410                keyvalue: false,
411                logging: true,
412                secrets: false,
413                metrics: false,
414                http_client: false,
415                cli: true,
416                filesystem: false,
417                sockets: false,
418            }),
419            ServiceType::WasmAuthenticator => Some(WasmCapabilities {
420                config: true,
421                keyvalue: false,
422                logging: true,
423                secrets: true,
424                metrics: false,
425                http_client: true,
426                cli: false,
427                filesystem: false,
428                sockets: false,
429            }),
430            ServiceType::WasmRateLimiter => Some(WasmCapabilities {
431                config: true,
432                keyvalue: true,
433                logging: true,
434                secrets: false,
435                metrics: true,
436                http_client: false,
437                cli: true,
438                filesystem: false,
439                sockets: false,
440            }),
441            ServiceType::WasmMiddleware => Some(WasmCapabilities {
442                config: true,
443                keyvalue: false,
444                logging: true,
445                secrets: false,
446                metrics: false,
447                http_client: true,
448                cli: false,
449                filesystem: false,
450                sockets: false,
451            }),
452            _ => None,
453        }
454    }
455}
456
457fn default_api_bind() -> String {
458    "0.0.0.0:3669".to_string()
459}
460
461/// API server configuration (embedded in deploy/up flows)
462#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
463pub struct ApiSpec {
464    /// Enable the API server (default: true)
465    #[serde(default = "default_true")]
466    pub enabled: bool,
467    /// Bind address (default: "0.0.0.0:3669")
468    #[serde(default = "default_api_bind")]
469    pub bind: String,
470    /// JWT secret (reads `ZLAYER_JWT_SECRET` env var if not set)
471    #[serde(default)]
472    pub jwt_secret: Option<String>,
473    /// Enable Swagger UI (default: true)
474    #[serde(default = "default_true")]
475    pub swagger: bool,
476}
477
478impl Default for ApiSpec {
479    fn default() -> Self {
480        Self {
481            enabled: true,
482            bind: default_api_bind(),
483            jwt_secret: None,
484            swagger: true,
485        }
486    }
487}
488
489/// Top-level deployment specification
490#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Validate)]
491#[serde(deny_unknown_fields)]
492pub struct DeploymentSpec {
493    /// Spec version (must be "v1")
494    #[validate(custom(function = "crate::validate::validate_version_wrapper"))]
495    pub version: String,
496
497    /// Deployment name (used for overlays, DNS)
498    #[validate(custom(function = "crate::validate::validate_deployment_name_wrapper"))]
499    pub deployment: String,
500
501    /// Service definitions
502    #[serde(default)]
503    #[validate(nested)]
504    pub services: HashMap<String, ServiceSpec>,
505
506    /// Top-level tunnel definitions (not tied to service endpoints)
507    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
508    pub tunnels: HashMap<String, TunnelDefinition>,
509
510    /// API server configuration (enabled by default)
511    #[serde(default)]
512    pub api: ApiSpec,
513}
514
515/// Top-level tunnel definition (not tied to a service endpoint)
516#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
517#[serde(deny_unknown_fields)]
518pub struct TunnelDefinition {
519    /// Source node
520    pub from: String,
521
522    /// Destination node
523    pub to: String,
524
525    /// Local port on source
526    pub local_port: u16,
527
528    /// Remote port on destination
529    pub remote_port: u16,
530
531    /// Protocol (tcp/udp, defaults to tcp)
532    #[serde(default)]
533    pub protocol: TunnelProtocol,
534
535    /// Exposure type (defaults to internal)
536    #[serde(default)]
537    pub expose: ExposeType,
538}
539
540/// Protocol for tunnel connections (tcp or udp only)
541#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
542#[serde(rename_all = "lowercase")]
543pub enum TunnelProtocol {
544    #[default]
545    Tcp,
546    Udp,
547}
548
549/// Per-service specification
550#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Validate)]
551#[serde(deny_unknown_fields)]
552pub struct ServiceSpec {
553    /// Resource type (service, job, cron)
554    #[serde(default = "default_resource_type")]
555    pub rtype: ResourceType,
556
557    /// Cron schedule expression (only for rtype: cron)
558    /// Uses 7-field cron syntax: "sec min hour day-of-month month day-of-week year"
559    /// Examples:
560    ///   - "0 0 0 * * * *" (daily at midnight)
561    ///   - "0 */5 * * * * *" (every 5 minutes)
562    ///   - "0 0 12 * * MON-FRI *" (weekdays at noon)
563    #[serde(default, skip_serializing_if = "Option::is_none")]
564    #[validate(custom(function = "crate::validate::validate_schedule_wrapper"))]
565    pub schedule: Option<String>,
566
567    /// Container image specification
568    #[validate(nested)]
569    pub image: ImageSpec,
570
571    /// Resource limits
572    #[serde(default)]
573    #[validate(nested)]
574    pub resources: ResourcesSpec,
575
576    /// Environment variables for the service
577    ///
578    /// Values can be:
579    /// - Plain strings: `"value"`
580    /// - Host env refs: `$E:VAR_NAME`
581    /// - Secret refs: `$S:secret-name` or `$S:@service/secret-name`
582    #[serde(default)]
583    pub env: HashMap<String, String>,
584
585    /// Command override (entrypoint, args, workdir)
586    #[serde(default)]
587    pub command: CommandSpec,
588
589    /// Network configuration
590    #[serde(default)]
591    pub network: NetworkSpec,
592
593    /// Endpoint definitions (proxy bindings)
594    #[serde(default)]
595    #[validate(nested)]
596    pub endpoints: Vec<EndpointSpec>,
597
598    /// Scaling configuration
599    #[serde(default)]
600    #[validate(custom(function = "crate::validate::validate_scale_spec"))]
601    pub scale: ScaleSpec,
602
603    /// Dependency specifications
604    #[serde(default)]
605    pub depends: Vec<DependsSpec>,
606
607    /// Health check configuration
608    #[serde(default = "default_health")]
609    pub health: HealthSpec,
610
611    /// Init actions (pre-start lifecycle steps)
612    #[serde(default)]
613    pub init: InitSpec,
614
615    /// Error handling policies
616    #[serde(default)]
617    pub errors: ErrorsSpec,
618
619    /// Device passthrough (e.g., /dev/kvm for VMs)
620    #[serde(default)]
621    pub devices: Vec<DeviceSpec>,
622
623    /// Storage mounts for the container
624    #[serde(default, skip_serializing_if = "Vec::is_empty")]
625    pub storage: Vec<StorageSpec>,
626
627    /// Linux capabilities to add (e.g., `SYS_ADMIN`, `NET_ADMIN`)
628    #[serde(default)]
629    pub capabilities: Vec<String>,
630
631    /// Run container in privileged mode (all capabilities + all devices)
632    #[serde(default)]
633    pub privileged: bool,
634
635    /// Node allocation mode (shared, dedicated, exclusive)
636    #[serde(default)]
637    pub node_mode: NodeMode,
638
639    /// Node selection constraints (required/preferred labels)
640    #[serde(default, skip_serializing_if = "Option::is_none")]
641    pub node_selector: Option<NodeSelector>,
642
643    /// Service type (standard, `wasm_http`, `wasm_plugin`, etc.)
644    #[serde(default)]
645    pub service_type: ServiceType,
646
647    /// WASM configuration (used when `service_type` is any Wasm* variant)
648    /// Also accepts the deprecated `wasm_http` key for backward compatibility.
649    #[serde(default, skip_serializing_if = "Option::is_none", alias = "wasm_http")]
650    pub wasm: Option<WasmConfig>,
651
652    /// Use host networking (container shares host network namespace)
653    ///
654    /// When true, the container will NOT get its own network namespace.
655    /// This is set programmatically via the `--host-network` CLI flag, not in YAML specs.
656    #[serde(skip)]
657    pub host_network: bool,
658}
659
660/// Command override specification (Section 5.5)
661#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
662#[serde(deny_unknown_fields)]
663pub struct CommandSpec {
664    /// Override image ENTRYPOINT
665    #[serde(default, skip_serializing_if = "Option::is_none")]
666    pub entrypoint: Option<Vec<String>>,
667
668    /// Override image CMD
669    #[serde(default, skip_serializing_if = "Option::is_none")]
670    pub args: Option<Vec<String>>,
671
672    /// Override working directory
673    #[serde(default, skip_serializing_if = "Option::is_none")]
674    pub workdir: Option<String>,
675}
676
677fn default_resource_type() -> ResourceType {
678    ResourceType::Service
679}
680
681fn default_health() -> HealthSpec {
682    HealthSpec {
683        start_grace: Some(std::time::Duration::from_secs(5)),
684        interval: None,
685        timeout: None,
686        retries: 3,
687        check: HealthCheck::Tcp { port: 0 },
688    }
689}
690
691/// Resource type - determines container lifecycle
692#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
693#[serde(rename_all = "lowercase")]
694pub enum ResourceType {
695    /// Long-running container, receives traffic, load-balanced
696    Service,
697    /// Run-to-completion, triggered by endpoint/CLI/internal system
698    Job,
699    /// Scheduled run-to-completion, time-triggered
700    Cron,
701}
702
703/// Container image specification
704#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Validate)]
705#[serde(deny_unknown_fields)]
706pub struct ImageSpec {
707    /// Image name (e.g., "ghcr.io/org/api:latest")
708    #[validate(custom(function = "crate::validate::validate_image_name_wrapper"))]
709    pub name: String,
710
711    /// When to pull the image
712    #[serde(default = "default_pull_policy")]
713    pub pull_policy: PullPolicy,
714}
715
716fn default_pull_policy() -> PullPolicy {
717    PullPolicy::IfNotPresent
718}
719
720/// Image pull policy
721#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
722#[serde(rename_all = "snake_case")]
723pub enum PullPolicy {
724    /// Always pull the image
725    Always,
726    /// Pull only if not present locally
727    IfNotPresent,
728    /// Never pull, use local image only
729    Never,
730}
731
732/// Device passthrough specification
733#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Validate)]
734#[serde(deny_unknown_fields)]
735pub struct DeviceSpec {
736    /// Host device path (e.g., /dev/kvm, /dev/net/tun)
737    #[validate(length(min = 1, message = "device path cannot be empty"))]
738    pub path: String,
739
740    /// Allow read access
741    #[serde(default = "default_true")]
742    pub read: bool,
743
744    /// Allow write access
745    #[serde(default = "default_true")]
746    pub write: bool,
747
748    /// Allow mknod (create device nodes)
749    #[serde(default)]
750    pub mknod: bool,
751}
752
753fn default_true() -> bool {
754    true
755}
756
757/// Storage mount specification
758#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
759#[serde(deny_unknown_fields, tag = "type", rename_all = "snake_case")]
760pub enum StorageSpec {
761    /// Bind mount from host path to container
762    Bind {
763        source: String,
764        target: String,
765        #[serde(default)]
766        readonly: bool,
767    },
768    /// Named persistent storage volume
769    Named {
770        name: String,
771        target: String,
772        #[serde(default)]
773        readonly: bool,
774        /// Performance tier (default: local, SQLite-safe)
775        #[serde(default)]
776        tier: StorageTier,
777        /// Optional size limit (e.g., "1Gi", "512Mi")
778        #[serde(default, skip_serializing_if = "Option::is_none")]
779        size: Option<String>,
780    },
781    /// Anonymous storage (auto-named, container lifecycle)
782    Anonymous {
783        target: String,
784        /// Performance tier (default: local)
785        #[serde(default)]
786        tier: StorageTier,
787    },
788    /// Memory-backed tmpfs mount
789    Tmpfs {
790        target: String,
791        #[serde(default)]
792        size: Option<String>,
793        #[serde(default)]
794        mode: Option<u32>,
795    },
796    /// S3-backed FUSE mount
797    S3 {
798        bucket: String,
799        #[serde(default)]
800        prefix: Option<String>,
801        target: String,
802        #[serde(default)]
803        readonly: bool,
804        #[serde(default)]
805        endpoint: Option<String>,
806        #[serde(default)]
807        credentials: Option<String>,
808    },
809}
810
811/// Resource limits (upper bounds, not reservations)
812#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default, Validate)]
813#[serde(deny_unknown_fields)]
814pub struct ResourcesSpec {
815    /// CPU limit (cores, e.g., 0.5, 1, 2)
816    #[serde(default)]
817    #[validate(custom(function = "crate::validate::validate_cpu_option_wrapper"))]
818    pub cpu: Option<f64>,
819
820    /// Memory limit (e.g., "512Mi", "1Gi", "2Gi")
821    #[serde(default)]
822    #[validate(custom(function = "crate::validate::validate_memory_option_wrapper"))]
823    pub memory: Option<String>,
824
825    /// GPU resource request
826    #[serde(default, skip_serializing_if = "Option::is_none")]
827    pub gpu: Option<GpuSpec>,
828}
829
830/// Scheduling policy for GPU workloads
831#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
832#[serde(rename_all = "kebab-case")]
833pub enum SchedulingPolicy {
834    /// Place as many replicas as possible; partial placement is acceptable (default)
835    #[default]
836    BestEffort,
837    /// All replicas must be placed or none are; prevents partial GPU job deployment
838    Gang,
839    /// Spread replicas across nodes to maximize GPU distribution
840    Spread,
841}
842
843/// GPU sharing mode controlling how GPU resources are multiplexed.
844#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
845#[serde(rename_all = "kebab-case")]
846pub enum GpuSharingMode {
847    /// Whole GPU per container (default). No sharing.
848    #[default]
849    Exclusive,
850    /// NVIDIA Multi-Process Service: concurrent GPU compute sharing.
851    /// Multiple containers run GPU kernels simultaneously with hardware isolation.
852    Mps,
853    /// NVIDIA time-slicing: round-robin GPU access across containers.
854    /// Lower overhead than MPS but no concurrent execution.
855    TimeSlice,
856}
857
858/// Configuration for distributed GPU job coordination.
859///
860/// When enabled on a multi-replica GPU service, `ZLayer` injects standard
861/// distributed training environment variables (`MASTER_ADDR`, `MASTER_PORT`,
862/// `WORLD_SIZE`, `RANK`, `LOCAL_RANK`) so frameworks like `PyTorch`, `Horovod`,
863/// and `DeepSpeed` can coordinate automatically.
864#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Validate)]
865#[serde(deny_unknown_fields)]
866pub struct DistributedConfig {
867    /// Communication backend: "nccl" (default), "gloo", or "mpi"
868    #[serde(default = "default_dist_backend")]
869    pub backend: String,
870    /// Port for rank-0 master coordination (default: 29500)
871    #[serde(default = "default_dist_port")]
872    pub master_port: u16,
873}
874
875fn default_dist_backend() -> String {
876    "nccl".to_string()
877}
878
879fn default_dist_port() -> u16 {
880    29500
881}
882
883/// GPU resource specification
884///
885/// Supported vendors:
886/// - `nvidia` - NVIDIA GPUs via NVIDIA Container Toolkit (default)
887/// - `amd` - AMD GPUs via `ROCm` (/dev/kfd + /dev/dri/renderD*)
888/// - `intel` - Intel GPUs via VAAPI/i915 (/dev/dri/renderD*)
889/// - `apple` - Apple Silicon GPUs via Metal/MPS (macOS only)
890///
891/// Unknown vendors fall back to DRI render node passthrough.
892///
893/// ## GPU mode (macOS only)
894///
895/// When `vendor` is `"apple"`, the `mode` field controls how GPU access is provided:
896/// - `"native"` -- Seatbelt sandbox with direct Metal/MPS access (lowest overhead)
897/// - `"vm"` -- libkrun micro-VM with GPU forwarding (stronger isolation)
898/// - `None` (default) -- Auto-select based on platform and vendor
899///
900/// On Linux, `mode` is ignored; GPU passthrough always uses device node binding.
901#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Validate)]
902#[serde(deny_unknown_fields)]
903pub struct GpuSpec {
904    /// Number of GPUs to request
905    #[serde(default = "default_gpu_count")]
906    pub count: u32,
907    /// GPU vendor (`nvidia`, `amd`, `intel`, `apple`) - defaults to `nvidia`
908    #[serde(default = "default_gpu_vendor")]
909    pub vendor: String,
910    /// GPU access mode (macOS only): `"native"`, `"vm"`, or `None` for auto-select
911    #[serde(default, skip_serializing_if = "Option::is_none")]
912    pub mode: Option<String>,
913    /// Pin to a specific GPU model (e.g. "A100", "H100").
914    /// Substring match against detected GPU model names.
915    #[serde(default, skip_serializing_if = "Option::is_none")]
916    pub model: Option<String>,
917    /// Scheduling policy for GPU workloads.
918    /// - `best-effort` (default): place what fits
919    /// - `gang`: all-or-nothing for distributed jobs
920    /// - `spread`: distribute across nodes
921    #[serde(default, skip_serializing_if = "Option::is_none")]
922    pub scheduling: Option<SchedulingPolicy>,
923    /// Distributed GPU job coordination.
924    /// When set, injects `MASTER_ADDR`, `WORLD_SIZE`, `RANK`, `LOCAL_RANK` env vars.
925    #[serde(default, skip_serializing_if = "Option::is_none")]
926    pub distributed: Option<DistributedConfig>,
927    /// GPU sharing mode: exclusive (default), mps, or time-slice.
928    #[serde(default, skip_serializing_if = "Option::is_none")]
929    pub sharing: Option<GpuSharingMode>,
930}
931
932fn default_gpu_count() -> u32 {
933    1
934}
935
936fn default_gpu_vendor() -> String {
937    "nvidia".to_string()
938}
939
940/// Network configuration
941#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
942#[serde(deny_unknown_fields)]
943#[derive(Default)]
944pub struct NetworkSpec {
945    /// Overlay network configuration
946    #[serde(default)]
947    pub overlays: OverlayConfig,
948
949    /// Join policy (who can join this service)
950    #[serde(default)]
951    pub join: JoinPolicy,
952}
953
954/// Overlay network configuration
955#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
956#[serde(deny_unknown_fields)]
957pub struct OverlayConfig {
958    /// Service-scoped overlay (service replicas only)
959    #[serde(default)]
960    pub service: OverlaySettings,
961
962    /// Global overlay (all services in deployment)
963    #[serde(default)]
964    pub global: OverlaySettings,
965}
966
967impl Default for OverlayConfig {
968    fn default() -> Self {
969        Self {
970            service: OverlaySettings {
971                enabled: true,
972                encrypted: true,
973                isolated: true,
974            },
975            global: OverlaySettings {
976                enabled: true,
977                encrypted: true,
978                isolated: false,
979            },
980        }
981    }
982}
983
984/// Overlay network settings
985#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
986#[serde(deny_unknown_fields)]
987pub struct OverlaySettings {
988    /// Enable this overlay
989    #[serde(default = "default_enabled")]
990    pub enabled: bool,
991
992    /// Use encryption
993    #[serde(default = "default_encrypted")]
994    pub encrypted: bool,
995
996    /// Isolate from other services/groups
997    #[serde(default)]
998    pub isolated: bool,
999}
1000
1001fn default_enabled() -> bool {
1002    true
1003}
1004
1005fn default_encrypted() -> bool {
1006    true
1007}
1008
1009/// Join policy - controls who can join a service
1010#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
1011#[serde(deny_unknown_fields)]
1012pub struct JoinPolicy {
1013    /// Join mode
1014    #[serde(default = "default_join_mode")]
1015    pub mode: JoinMode,
1016
1017    /// Scope of join
1018    #[serde(default = "default_join_scope")]
1019    pub scope: JoinScope,
1020}
1021
1022impl Default for JoinPolicy {
1023    fn default() -> Self {
1024        Self {
1025            mode: default_join_mode(),
1026            scope: default_join_scope(),
1027        }
1028    }
1029}
1030
1031fn default_join_mode() -> JoinMode {
1032    JoinMode::Token
1033}
1034
1035fn default_join_scope() -> JoinScope {
1036    JoinScope::Service
1037}
1038
1039/// Join mode
1040#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
1041#[serde(rename_all = "snake_case")]
1042pub enum JoinMode {
1043    /// Any trusted node in deployment can self-enroll
1044    Open,
1045    /// Requires a join key (recommended)
1046    Token,
1047    /// Only control-plane/scheduler can place replicas
1048    Closed,
1049}
1050
1051/// Join scope
1052#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
1053#[serde(rename_all = "snake_case")]
1054pub enum JoinScope {
1055    /// Join this specific service
1056    Service,
1057    /// Join all services in deployment
1058    Global,
1059}
1060
1061/// Endpoint specification (proxy binding)
1062#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Validate)]
1063#[serde(deny_unknown_fields)]
1064pub struct EndpointSpec {
1065    /// Endpoint name (for routing)
1066    #[validate(length(min = 1, message = "endpoint name cannot be empty"))]
1067    pub name: String,
1068
1069    /// Protocol
1070    pub protocol: Protocol,
1071
1072    /// Proxy listen port (external-facing port)
1073    #[validate(custom(function = "crate::validate::validate_port_wrapper"))]
1074    pub port: u16,
1075
1076    /// Container port the service actually listens on.
1077    /// Defaults to `port` when not specified.
1078    #[serde(default, skip_serializing_if = "Option::is_none")]
1079    pub target_port: Option<u16>,
1080
1081    /// URL path prefix (for http/https/websocket)
1082    pub path: Option<String>,
1083
1084    /// Host pattern for routing (e.g. "api.example.com" or "*.example.com").
1085    /// `None` means match any host.
1086    #[serde(default, skip_serializing_if = "Option::is_none")]
1087    pub host: Option<String>,
1088
1089    /// Exposure type
1090    #[serde(default = "default_expose")]
1091    pub expose: ExposeType,
1092
1093    /// Optional stream (L4) proxy configuration
1094    /// Only applicable when protocol is tcp or udp
1095    #[serde(default, skip_serializing_if = "Option::is_none")]
1096    pub stream: Option<StreamEndpointConfig>,
1097
1098    /// Optional tunnel configuration for this endpoint
1099    #[serde(default, skip_serializing_if = "Option::is_none")]
1100    pub tunnel: Option<EndpointTunnelConfig>,
1101}
1102
1103impl EndpointSpec {
1104    /// Returns the port the container actually listens on.
1105    /// Falls back to `port` when `target_port` is not specified.
1106    #[must_use]
1107    pub fn target_port(&self) -> u16 {
1108        self.target_port.unwrap_or(self.port)
1109    }
1110}
1111
1112/// Tunnel configuration for an endpoint
1113#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
1114#[serde(deny_unknown_fields)]
1115pub struct EndpointTunnelConfig {
1116    /// Enable tunneling for this endpoint
1117    #[serde(default)]
1118    pub enabled: bool,
1119
1120    /// Source node name (defaults to service's node)
1121    #[serde(default, skip_serializing_if = "Option::is_none")]
1122    pub from: Option<String>,
1123
1124    /// Destination node name (defaults to cluster ingress)
1125    #[serde(default, skip_serializing_if = "Option::is_none")]
1126    pub to: Option<String>,
1127
1128    /// Remote port to expose (0 = auto-assign)
1129    #[serde(default)]
1130    pub remote_port: u16,
1131
1132    /// Override exposure for tunnel (public/internal)
1133    #[serde(default, skip_serializing_if = "Option::is_none")]
1134    pub expose: Option<ExposeType>,
1135
1136    /// On-demand access configuration
1137    #[serde(default, skip_serializing_if = "Option::is_none")]
1138    pub access: Option<TunnelAccessConfig>,
1139}
1140
1141/// On-demand access settings for `zlayer tunnel access`
1142#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
1143#[serde(deny_unknown_fields)]
1144pub struct TunnelAccessConfig {
1145    /// Allow on-demand access via CLI
1146    #[serde(default)]
1147    pub enabled: bool,
1148
1149    /// Maximum session duration (e.g., "4h", "30m")
1150    #[serde(default, skip_serializing_if = "Option::is_none")]
1151    pub max_ttl: Option<String>,
1152
1153    /// Log all access sessions
1154    #[serde(default)]
1155    pub audit: bool,
1156}
1157
1158fn default_expose() -> ExposeType {
1159    ExposeType::Internal
1160}
1161
1162/// Protocol type
1163#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
1164#[serde(rename_all = "lowercase")]
1165pub enum Protocol {
1166    Http,
1167    Https,
1168    Tcp,
1169    Udp,
1170    Websocket,
1171}
1172
1173/// Exposure type
1174#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
1175#[serde(rename_all = "lowercase")]
1176pub enum ExposeType {
1177    Public,
1178    #[default]
1179    Internal,
1180}
1181
1182/// Stream (L4) proxy configuration for TCP/UDP endpoints
1183#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
1184#[serde(deny_unknown_fields)]
1185pub struct StreamEndpointConfig {
1186    /// Enable TLS termination for TCP (auto-provision cert)
1187    #[serde(default)]
1188    pub tls: bool,
1189
1190    /// Enable PROXY protocol for passing client IP
1191    #[serde(default)]
1192    pub proxy_protocol: bool,
1193
1194    /// Custom session timeout for UDP (default: 60s)
1195    /// Format: duration string like "60s", "5m"
1196    #[serde(default, skip_serializing_if = "Option::is_none")]
1197    pub session_timeout: Option<String>,
1198
1199    /// Health check configuration for L4
1200    #[serde(default, skip_serializing_if = "Option::is_none")]
1201    pub health_check: Option<StreamHealthCheck>,
1202}
1203
1204/// Health check types for stream (L4) endpoints
1205#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
1206#[serde(tag = "type", rename_all = "snake_case")]
1207pub enum StreamHealthCheck {
1208    /// TCP connect check - verifies port is accepting connections
1209    TcpConnect,
1210    /// UDP probe - sends request and optionally validates response
1211    UdpProbe {
1212        /// Request payload to send (can use hex escapes like \\xFF)
1213        request: String,
1214        /// Expected response pattern (optional regex)
1215        #[serde(default, skip_serializing_if = "Option::is_none")]
1216        expect: Option<String>,
1217    },
1218}
1219
1220/// Scaling configuration
1221#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
1222#[serde(tag = "mode", rename_all = "lowercase", deny_unknown_fields)]
1223pub enum ScaleSpec {
1224    /// Adaptive scaling with metrics
1225    #[serde(rename = "adaptive")]
1226    Adaptive {
1227        /// Minimum replicas
1228        min: u32,
1229
1230        /// Maximum replicas
1231        max: u32,
1232
1233        /// Cooldown period between scale events
1234        #[serde(default, with = "duration::option")]
1235        cooldown: Option<std::time::Duration>,
1236
1237        /// Target metrics for scaling
1238        #[serde(default)]
1239        targets: ScaleTargets,
1240    },
1241
1242    /// Fixed number of replicas
1243    #[serde(rename = "fixed")]
1244    Fixed { replicas: u32 },
1245
1246    /// Manual scaling (no automatic scaling)
1247    #[serde(rename = "manual")]
1248    Manual,
1249}
1250
1251impl Default for ScaleSpec {
1252    fn default() -> Self {
1253        Self::Adaptive {
1254            min: 1,
1255            max: 10,
1256            cooldown: Some(std::time::Duration::from_secs(30)),
1257            targets: ScaleTargets::default(),
1258        }
1259    }
1260}
1261
1262/// Target metrics for adaptive scaling
1263#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
1264#[serde(deny_unknown_fields)]
1265#[derive(Default)]
1266pub struct ScaleTargets {
1267    /// CPU percentage threshold (0-100)
1268    #[serde(default)]
1269    pub cpu: Option<u8>,
1270
1271    /// Memory percentage threshold (0-100)
1272    #[serde(default)]
1273    pub memory: Option<u8>,
1274
1275    /// Requests per second threshold
1276    #[serde(default)]
1277    pub rps: Option<u32>,
1278}
1279
1280/// Dependency specification
1281#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
1282#[serde(deny_unknown_fields)]
1283pub struct DependsSpec {
1284    /// Service name to depend on
1285    pub service: String,
1286
1287    /// Condition for dependency
1288    #[serde(default = "default_condition")]
1289    pub condition: DependencyCondition,
1290
1291    /// Maximum time to wait
1292    #[serde(default = "default_timeout", with = "duration::option")]
1293    pub timeout: Option<std::time::Duration>,
1294
1295    /// Action on timeout
1296    #[serde(default = "default_on_timeout")]
1297    pub on_timeout: TimeoutAction,
1298}
1299
1300fn default_condition() -> DependencyCondition {
1301    DependencyCondition::Healthy
1302}
1303
1304#[allow(clippy::unnecessary_wraps)]
1305fn default_timeout() -> Option<std::time::Duration> {
1306    Some(std::time::Duration::from_secs(300))
1307}
1308
1309fn default_on_timeout() -> TimeoutAction {
1310    TimeoutAction::Fail
1311}
1312
1313/// Dependency condition
1314#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
1315#[serde(rename_all = "lowercase")]
1316pub enum DependencyCondition {
1317    /// Container process exists
1318    Started,
1319    /// Health check passes
1320    Healthy,
1321    /// Service is available for routing
1322    Ready,
1323}
1324
1325/// Timeout action
1326#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
1327#[serde(rename_all = "lowercase")]
1328pub enum TimeoutAction {
1329    Fail,
1330    Warn,
1331    Continue,
1332}
1333
1334/// Health check specification
1335#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
1336#[serde(deny_unknown_fields)]
1337pub struct HealthSpec {
1338    /// Grace period before first check
1339    #[serde(default, with = "duration::option")]
1340    pub start_grace: Option<std::time::Duration>,
1341
1342    /// Interval between checks
1343    #[serde(default, with = "duration::option")]
1344    pub interval: Option<std::time::Duration>,
1345
1346    /// Timeout per check
1347    #[serde(default, with = "duration::option")]
1348    pub timeout: Option<std::time::Duration>,
1349
1350    /// Number of retries before marking unhealthy
1351    #[serde(default = "default_retries")]
1352    pub retries: u32,
1353
1354    /// Health check type and parameters
1355    pub check: HealthCheck,
1356}
1357
1358fn default_retries() -> u32 {
1359    3
1360}
1361
1362/// Health check type
1363#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
1364#[serde(tag = "type", rename_all = "lowercase")]
1365pub enum HealthCheck {
1366    /// TCP port check
1367    Tcp {
1368        /// Port to check (0 = use first endpoint)
1369        port: u16,
1370    },
1371
1372    /// HTTP check
1373    Http {
1374        /// URL to check
1375        url: String,
1376        /// Expected status code
1377        #[serde(default = "default_expect_status")]
1378        expect_status: u16,
1379    },
1380
1381    /// Command check
1382    Command {
1383        /// Command to run
1384        command: String,
1385    },
1386}
1387
1388fn default_expect_status() -> u16 {
1389    200
1390}
1391
1392/// Init actions specification
1393#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
1394#[serde(deny_unknown_fields)]
1395#[derive(Default)]
1396pub struct InitSpec {
1397    /// Init steps to run before container starts
1398    #[serde(default)]
1399    pub steps: Vec<InitStep>,
1400}
1401
1402/// Init action step
1403#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
1404#[serde(deny_unknown_fields)]
1405pub struct InitStep {
1406    /// Step identifier
1407    pub id: String,
1408
1409    /// Action to perform (e.g., "`init.wait_tcp`")
1410    pub uses: String,
1411
1412    /// Parameters for the action
1413    #[serde(default)]
1414    pub with: InitParams,
1415
1416    /// Number of retries
1417    #[serde(default)]
1418    pub retry: Option<u32>,
1419
1420    /// Maximum time for this step
1421    #[serde(default, with = "duration::option")]
1422    pub timeout: Option<std::time::Duration>,
1423
1424    /// Action on failure
1425    #[serde(default = "default_on_failure")]
1426    pub on_failure: FailureAction,
1427}
1428
1429fn default_on_failure() -> FailureAction {
1430    FailureAction::Fail
1431}
1432
1433/// Init action parameters
1434pub type InitParams = std::collections::HashMap<String, serde_json::Value>;
1435
1436/// Failure action for init steps
1437#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
1438#[serde(rename_all = "lowercase")]
1439pub enum FailureAction {
1440    Fail,
1441    Warn,
1442    Continue,
1443}
1444
1445/// Error handling policies
1446#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
1447#[serde(deny_unknown_fields)]
1448#[derive(Default)]
1449pub struct ErrorsSpec {
1450    /// Init failure policy
1451    #[serde(default)]
1452    pub on_init_failure: InitFailurePolicy,
1453
1454    /// Panic/restart policy
1455    #[serde(default)]
1456    pub on_panic: PanicPolicy,
1457}
1458
1459/// Init failure policy
1460#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
1461#[serde(deny_unknown_fields)]
1462pub struct InitFailurePolicy {
1463    #[serde(default = "default_init_action")]
1464    pub action: InitFailureAction,
1465}
1466
1467impl Default for InitFailurePolicy {
1468    fn default() -> Self {
1469        Self {
1470            action: default_init_action(),
1471        }
1472    }
1473}
1474
1475fn default_init_action() -> InitFailureAction {
1476    InitFailureAction::Fail
1477}
1478
1479/// Init failure action
1480#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
1481#[serde(rename_all = "lowercase")]
1482pub enum InitFailureAction {
1483    Fail,
1484    Restart,
1485    Backoff,
1486}
1487
1488/// Panic policy
1489#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
1490#[serde(deny_unknown_fields)]
1491pub struct PanicPolicy {
1492    #[serde(default = "default_panic_action")]
1493    pub action: PanicAction,
1494}
1495
1496impl Default for PanicPolicy {
1497    fn default() -> Self {
1498        Self {
1499            action: default_panic_action(),
1500        }
1501    }
1502}
1503
1504fn default_panic_action() -> PanicAction {
1505    PanicAction::Restart
1506}
1507
1508/// Panic action
1509#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
1510#[serde(rename_all = "lowercase")]
1511pub enum PanicAction {
1512    Restart,
1513    Shutdown,
1514    Isolate,
1515}
1516
1517#[cfg(test)]
1518mod tests {
1519    use super::*;
1520
1521    #[test]
1522    fn test_parse_simple_spec() {
1523        let yaml = r"
1524version: v1
1525deployment: test
1526services:
1527  hello:
1528    rtype: service
1529    image:
1530      name: hello-world:latest
1531    endpoints:
1532      - name: http
1533        protocol: http
1534        port: 8080
1535        expose: public
1536";
1537
1538        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
1539        assert_eq!(spec.version, "v1");
1540        assert_eq!(spec.deployment, "test");
1541        assert!(spec.services.contains_key("hello"));
1542    }
1543
1544    #[test]
1545    fn test_parse_duration() {
1546        let yaml = r"
1547version: v1
1548deployment: test
1549services:
1550  test:
1551    rtype: service
1552    image:
1553      name: test:latest
1554    health:
1555      timeout: 30s
1556      interval: 1m
1557      start_grace: 5s
1558      check:
1559        type: tcp
1560        port: 8080
1561";
1562
1563        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
1564        let health = &spec.services["test"].health;
1565        assert_eq!(health.timeout, Some(std::time::Duration::from_secs(30)));
1566        assert_eq!(health.interval, Some(std::time::Duration::from_secs(60)));
1567        assert_eq!(health.start_grace, Some(std::time::Duration::from_secs(5)));
1568        match &health.check {
1569            HealthCheck::Tcp { port } => assert_eq!(*port, 8080),
1570            _ => panic!("Expected TCP health check"),
1571        }
1572    }
1573
1574    #[test]
1575    fn test_parse_adaptive_scale() {
1576        let yaml = r"
1577version: v1
1578deployment: test
1579services:
1580  test:
1581    rtype: service
1582    image:
1583      name: test:latest
1584    scale:
1585      mode: adaptive
1586      min: 2
1587      max: 10
1588      cooldown: 15s
1589      targets:
1590        cpu: 70
1591        rps: 800
1592";
1593
1594        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
1595        let scale = &spec.services["test"].scale;
1596        match scale {
1597            ScaleSpec::Adaptive {
1598                min,
1599                max,
1600                cooldown,
1601                targets,
1602            } => {
1603                assert_eq!(*min, 2);
1604                assert_eq!(*max, 10);
1605                assert_eq!(*cooldown, Some(std::time::Duration::from_secs(15)));
1606                assert_eq!(targets.cpu, Some(70));
1607                assert_eq!(targets.rps, Some(800));
1608            }
1609            _ => panic!("Expected Adaptive scale mode"),
1610        }
1611    }
1612
1613    #[test]
1614    fn test_node_mode_default() {
1615        let yaml = r"
1616version: v1
1617deployment: test
1618services:
1619  hello:
1620    rtype: service
1621    image:
1622      name: hello-world:latest
1623";
1624
1625        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
1626        assert_eq!(spec.services["hello"].node_mode, NodeMode::Shared);
1627        assert!(spec.services["hello"].node_selector.is_none());
1628    }
1629
1630    #[test]
1631    fn test_node_mode_dedicated() {
1632        let yaml = r"
1633version: v1
1634deployment: test
1635services:
1636  api:
1637    rtype: service
1638    image:
1639      name: api:latest
1640    node_mode: dedicated
1641";
1642
1643        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
1644        assert_eq!(spec.services["api"].node_mode, NodeMode::Dedicated);
1645    }
1646
1647    #[test]
1648    fn test_node_mode_exclusive() {
1649        let yaml = r"
1650version: v1
1651deployment: test
1652services:
1653  database:
1654    rtype: service
1655    image:
1656      name: postgres:15
1657    node_mode: exclusive
1658";
1659
1660        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
1661        assert_eq!(spec.services["database"].node_mode, NodeMode::Exclusive);
1662    }
1663
1664    #[test]
1665    fn test_node_selector_with_labels() {
1666        let yaml = r#"
1667version: v1
1668deployment: test
1669services:
1670  ml-worker:
1671    rtype: service
1672    image:
1673      name: ml-worker:latest
1674    node_mode: dedicated
1675    node_selector:
1676      labels:
1677        gpu: "true"
1678        zone: us-east
1679      prefer_labels:
1680        storage: ssd
1681"#;
1682
1683        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
1684        let service = &spec.services["ml-worker"];
1685        assert_eq!(service.node_mode, NodeMode::Dedicated);
1686
1687        let selector = service.node_selector.as_ref().unwrap();
1688        assert_eq!(selector.labels.get("gpu"), Some(&"true".to_string()));
1689        assert_eq!(selector.labels.get("zone"), Some(&"us-east".to_string()));
1690        assert_eq!(
1691            selector.prefer_labels.get("storage"),
1692            Some(&"ssd".to_string())
1693        );
1694    }
1695
1696    #[test]
1697    fn test_node_mode_serialization_roundtrip() {
1698        use serde_json;
1699
1700        // Test all variants serialize/deserialize correctly
1701        let modes = [NodeMode::Shared, NodeMode::Dedicated, NodeMode::Exclusive];
1702        let expected_json = ["\"shared\"", "\"dedicated\"", "\"exclusive\""];
1703
1704        for (mode, expected) in modes.iter().zip(expected_json.iter()) {
1705            let json = serde_json::to_string(mode).unwrap();
1706            assert_eq!(&json, *expected, "Serialization failed for {mode:?}");
1707
1708            let deserialized: NodeMode = serde_json::from_str(&json).unwrap();
1709            assert_eq!(deserialized, *mode, "Roundtrip failed for {mode:?}");
1710        }
1711    }
1712
1713    #[test]
1714    fn test_node_selector_empty() {
1715        let yaml = r"
1716version: v1
1717deployment: test
1718services:
1719  api:
1720    rtype: service
1721    image:
1722      name: api:latest
1723    node_selector:
1724      labels: {}
1725";
1726
1727        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
1728        let selector = spec.services["api"].node_selector.as_ref().unwrap();
1729        assert!(selector.labels.is_empty());
1730        assert!(selector.prefer_labels.is_empty());
1731    }
1732
1733    #[test]
1734    fn test_mixed_node_modes_in_deployment() {
1735        let yaml = r"
1736version: v1
1737deployment: test
1738services:
1739  redis:
1740    rtype: service
1741    image:
1742      name: redis:alpine
1743    # Default shared mode
1744  api:
1745    rtype: service
1746    image:
1747      name: api:latest
1748    node_mode: dedicated
1749  database:
1750    rtype: service
1751    image:
1752      name: postgres:15
1753    node_mode: exclusive
1754    node_selector:
1755      labels:
1756        storage: ssd
1757";
1758
1759        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
1760        assert_eq!(spec.services["redis"].node_mode, NodeMode::Shared);
1761        assert_eq!(spec.services["api"].node_mode, NodeMode::Dedicated);
1762        assert_eq!(spec.services["database"].node_mode, NodeMode::Exclusive);
1763
1764        let db_selector = spec.services["database"].node_selector.as_ref().unwrap();
1765        assert_eq!(db_selector.labels.get("storage"), Some(&"ssd".to_string()));
1766    }
1767
1768    #[test]
1769    fn test_storage_bind_mount() {
1770        let yaml = r"
1771version: v1
1772deployment: test
1773services:
1774  app:
1775    image:
1776      name: app:latest
1777    storage:
1778      - type: bind
1779        source: /host/data
1780        target: /app/data
1781        readonly: true
1782";
1783        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
1784        let storage = &spec.services["app"].storage;
1785        assert_eq!(storage.len(), 1);
1786        match &storage[0] {
1787            StorageSpec::Bind {
1788                source,
1789                target,
1790                readonly,
1791            } => {
1792                assert_eq!(source, "/host/data");
1793                assert_eq!(target, "/app/data");
1794                assert!(*readonly);
1795            }
1796            _ => panic!("Expected Bind storage"),
1797        }
1798    }
1799
1800    #[test]
1801    fn test_storage_named_with_tier() {
1802        let yaml = r"
1803version: v1
1804deployment: test
1805services:
1806  app:
1807    image:
1808      name: app:latest
1809    storage:
1810      - type: named
1811        name: my-data
1812        target: /app/data
1813        tier: cached
1814";
1815        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
1816        let storage = &spec.services["app"].storage;
1817        match &storage[0] {
1818            StorageSpec::Named {
1819                name, target, tier, ..
1820            } => {
1821                assert_eq!(name, "my-data");
1822                assert_eq!(target, "/app/data");
1823                assert_eq!(*tier, StorageTier::Cached);
1824            }
1825            _ => panic!("Expected Named storage"),
1826        }
1827    }
1828
1829    #[test]
1830    fn test_storage_anonymous() {
1831        let yaml = r"
1832version: v1
1833deployment: test
1834services:
1835  app:
1836    image:
1837      name: app:latest
1838    storage:
1839      - type: anonymous
1840        target: /app/cache
1841";
1842        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
1843        let storage = &spec.services["app"].storage;
1844        match &storage[0] {
1845            StorageSpec::Anonymous { target, tier } => {
1846                assert_eq!(target, "/app/cache");
1847                assert_eq!(*tier, StorageTier::Local); // default
1848            }
1849            _ => panic!("Expected Anonymous storage"),
1850        }
1851    }
1852
1853    #[test]
1854    fn test_storage_tmpfs() {
1855        let yaml = r"
1856version: v1
1857deployment: test
1858services:
1859  app:
1860    image:
1861      name: app:latest
1862    storage:
1863      - type: tmpfs
1864        target: /app/tmp
1865        size: 256Mi
1866        mode: 1777
1867";
1868        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
1869        let storage = &spec.services["app"].storage;
1870        match &storage[0] {
1871            StorageSpec::Tmpfs { target, size, mode } => {
1872                assert_eq!(target, "/app/tmp");
1873                assert_eq!(size.as_deref(), Some("256Mi"));
1874                assert_eq!(*mode, Some(1777));
1875            }
1876            _ => panic!("Expected Tmpfs storage"),
1877        }
1878    }
1879
1880    #[test]
1881    fn test_storage_s3() {
1882        let yaml = r"
1883version: v1
1884deployment: test
1885services:
1886  app:
1887    image:
1888      name: app:latest
1889    storage:
1890      - type: s3
1891        bucket: my-bucket
1892        prefix: models/
1893        target: /app/models
1894        readonly: true
1895        endpoint: https://s3.us-west-2.amazonaws.com
1896        credentials: aws-creds
1897";
1898        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
1899        let storage = &spec.services["app"].storage;
1900        match &storage[0] {
1901            StorageSpec::S3 {
1902                bucket,
1903                prefix,
1904                target,
1905                readonly,
1906                endpoint,
1907                credentials,
1908            } => {
1909                assert_eq!(bucket, "my-bucket");
1910                assert_eq!(prefix.as_deref(), Some("models/"));
1911                assert_eq!(target, "/app/models");
1912                assert!(*readonly);
1913                assert_eq!(
1914                    endpoint.as_deref(),
1915                    Some("https://s3.us-west-2.amazonaws.com")
1916                );
1917                assert_eq!(credentials.as_deref(), Some("aws-creds"));
1918            }
1919            _ => panic!("Expected S3 storage"),
1920        }
1921    }
1922
1923    #[test]
1924    fn test_storage_multiple_types() {
1925        let yaml = r"
1926version: v1
1927deployment: test
1928services:
1929  app:
1930    image:
1931      name: app:latest
1932    storage:
1933      - type: bind
1934        source: /etc/config
1935        target: /app/config
1936        readonly: true
1937      - type: named
1938        name: app-data
1939        target: /app/data
1940      - type: tmpfs
1941        target: /app/tmp
1942";
1943        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
1944        let storage = &spec.services["app"].storage;
1945        assert_eq!(storage.len(), 3);
1946        assert!(matches!(&storage[0], StorageSpec::Bind { .. }));
1947        assert!(matches!(&storage[1], StorageSpec::Named { .. }));
1948        assert!(matches!(&storage[2], StorageSpec::Tmpfs { .. }));
1949    }
1950
1951    #[test]
1952    fn test_storage_tier_default() {
1953        let yaml = r"
1954version: v1
1955deployment: test
1956services:
1957  app:
1958    image:
1959      name: app:latest
1960    storage:
1961      - type: named
1962        name: data
1963        target: /data
1964";
1965        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
1966        match &spec.services["app"].storage[0] {
1967            StorageSpec::Named { tier, .. } => {
1968                assert_eq!(*tier, StorageTier::Local); // default should be Local
1969            }
1970            _ => panic!("Expected Named storage"),
1971        }
1972    }
1973
1974    // ==========================================================================
1975    // Tunnel configuration tests
1976    // ==========================================================================
1977
1978    #[test]
1979    fn test_endpoint_tunnel_config_basic() {
1980        let yaml = r"
1981version: v1
1982deployment: test
1983services:
1984  api:
1985    image:
1986      name: api:latest
1987    endpoints:
1988      - name: http
1989        protocol: http
1990        port: 8080
1991        tunnel:
1992          enabled: true
1993          remote_port: 8080
1994";
1995        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
1996        let endpoint = &spec.services["api"].endpoints[0];
1997        let tunnel = endpoint.tunnel.as_ref().unwrap();
1998        assert!(tunnel.enabled);
1999        assert_eq!(tunnel.remote_port, 8080);
2000        assert!(tunnel.from.is_none());
2001        assert!(tunnel.to.is_none());
2002    }
2003
2004    #[test]
2005    fn test_endpoint_tunnel_config_full() {
2006        let yaml = r"
2007version: v1
2008deployment: test
2009services:
2010  api:
2011    image:
2012      name: api:latest
2013    endpoints:
2014      - name: http
2015        protocol: http
2016        port: 8080
2017        tunnel:
2018          enabled: true
2019          from: node-1
2020          to: ingress-node
2021          remote_port: 9000
2022          expose: public
2023          access:
2024            enabled: true
2025            max_ttl: 4h
2026            audit: true
2027";
2028        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
2029        let endpoint = &spec.services["api"].endpoints[0];
2030        let tunnel = endpoint.tunnel.as_ref().unwrap();
2031        assert!(tunnel.enabled);
2032        assert_eq!(tunnel.from, Some("node-1".to_string()));
2033        assert_eq!(tunnel.to, Some("ingress-node".to_string()));
2034        assert_eq!(tunnel.remote_port, 9000);
2035        assert_eq!(tunnel.expose, Some(ExposeType::Public));
2036
2037        let access = tunnel.access.as_ref().unwrap();
2038        assert!(access.enabled);
2039        assert_eq!(access.max_ttl, Some("4h".to_string()));
2040        assert!(access.audit);
2041    }
2042
2043    #[test]
2044    fn test_top_level_tunnel_definition() {
2045        let yaml = r"
2046version: v1
2047deployment: test
2048services: {}
2049tunnels:
2050  db-tunnel:
2051    from: app-node
2052    to: db-node
2053    local_port: 5432
2054    remote_port: 5432
2055    protocol: tcp
2056    expose: internal
2057";
2058        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
2059        let tunnel = spec.tunnels.get("db-tunnel").unwrap();
2060        assert_eq!(tunnel.from, "app-node");
2061        assert_eq!(tunnel.to, "db-node");
2062        assert_eq!(tunnel.local_port, 5432);
2063        assert_eq!(tunnel.remote_port, 5432);
2064        assert_eq!(tunnel.protocol, TunnelProtocol::Tcp);
2065        assert_eq!(tunnel.expose, ExposeType::Internal);
2066    }
2067
2068    #[test]
2069    fn test_top_level_tunnel_defaults() {
2070        let yaml = r"
2071version: v1
2072deployment: test
2073services: {}
2074tunnels:
2075  simple-tunnel:
2076    from: node-a
2077    to: node-b
2078    local_port: 3000
2079    remote_port: 3000
2080";
2081        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
2082        let tunnel = spec.tunnels.get("simple-tunnel").unwrap();
2083        assert_eq!(tunnel.protocol, TunnelProtocol::Tcp); // default
2084        assert_eq!(tunnel.expose, ExposeType::Internal); // default
2085    }
2086
2087    #[test]
2088    fn test_tunnel_protocol_udp() {
2089        let yaml = r"
2090version: v1
2091deployment: test
2092services: {}
2093tunnels:
2094  udp-tunnel:
2095    from: node-a
2096    to: node-b
2097    local_port: 5353
2098    remote_port: 5353
2099    protocol: udp
2100";
2101        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
2102        let tunnel = spec.tunnels.get("udp-tunnel").unwrap();
2103        assert_eq!(tunnel.protocol, TunnelProtocol::Udp);
2104    }
2105
2106    #[test]
2107    fn test_endpoint_without_tunnel() {
2108        let yaml = r"
2109version: v1
2110deployment: test
2111services:
2112  api:
2113    image:
2114      name: api:latest
2115    endpoints:
2116      - name: http
2117        protocol: http
2118        port: 8080
2119";
2120        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
2121        let endpoint = &spec.services["api"].endpoints[0];
2122        assert!(endpoint.tunnel.is_none());
2123    }
2124
2125    #[test]
2126    fn test_deployment_without_tunnels() {
2127        let yaml = r"
2128version: v1
2129deployment: test
2130services:
2131  api:
2132    image:
2133      name: api:latest
2134";
2135        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
2136        assert!(spec.tunnels.is_empty());
2137    }
2138
2139    // ==========================================================================
2140    // ApiSpec tests
2141    // ==========================================================================
2142
2143    #[test]
2144    fn test_spec_without_api_block_uses_defaults() {
2145        let yaml = r"
2146version: v1
2147deployment: test
2148services:
2149  hello:
2150    image:
2151      name: hello-world:latest
2152";
2153        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
2154        assert!(spec.api.enabled);
2155        assert_eq!(spec.api.bind, "0.0.0.0:3669");
2156        assert!(spec.api.jwt_secret.is_none());
2157        assert!(spec.api.swagger);
2158    }
2159
2160    #[test]
2161    fn test_spec_with_explicit_api_block() {
2162        let yaml = r#"
2163version: v1
2164deployment: test
2165services:
2166  hello:
2167    image:
2168      name: hello-world:latest
2169api:
2170  enabled: false
2171  bind: "127.0.0.1:9090"
2172  jwt_secret: "my-secret"
2173  swagger: false
2174"#;
2175        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
2176        assert!(!spec.api.enabled);
2177        assert_eq!(spec.api.bind, "127.0.0.1:9090");
2178        assert_eq!(spec.api.jwt_secret, Some("my-secret".to_string()));
2179        assert!(!spec.api.swagger);
2180    }
2181
2182    #[test]
2183    fn test_spec_with_partial_api_block() {
2184        let yaml = r#"
2185version: v1
2186deployment: test
2187services:
2188  hello:
2189    image:
2190      name: hello-world:latest
2191api:
2192  bind: "0.0.0.0:3000"
2193"#;
2194        let spec: DeploymentSpec = serde_yaml::from_str(yaml).unwrap();
2195        assert!(spec.api.enabled); // default true
2196        assert_eq!(spec.api.bind, "0.0.0.0:3000");
2197        assert!(spec.api.jwt_secret.is_none()); // default None
2198        assert!(spec.api.swagger); // default true
2199    }
2200}