ringkernel_core/
config.rs

1//! Unified configuration for RingKernel enterprise features.
2//!
3//! This module provides a comprehensive configuration system that ties together
4//! observability, health monitoring, multi-GPU coordination, and migration features.
5//!
6//! # Example
7//!
8//! ```ignore
9//! use ringkernel_core::config::{RingKernelConfig, ConfigBuilder};
10//!
11//! let config = ConfigBuilder::new()
12//!     .with_observability(|obs| obs
13//!         .enable_tracing(true)
14//!         .enable_metrics(true)
15//!         .metrics_port(9090))
16//!     .with_health(|health| health
17//!         .heartbeat_interval(Duration::from_secs(5))
18//!         .circuit_breaker_threshold(5))
19//!     .with_multi_gpu(|gpu| gpu
20//!         .load_balancing(LoadBalancingStrategy::LeastLoaded)
21//!         .enable_p2p(true))
22//!     .build()?;
23//!
24//! let runtime = RingKernelRuntime::with_config(config)?;
25//! ```
26//!
27//! # Configuration File Support
28//!
29//! With the `config-file` feature enabled, you can load configurations from TOML or YAML files:
30//!
31//! ```ignore
32//! use ringkernel_core::config::RingKernelConfig;
33//!
34//! // Load from TOML file
35//! let config = RingKernelConfig::from_toml_file("config.toml")?;
36//!
37//! // Load from YAML file
38//! let config = RingKernelConfig::from_yaml_file("config.yaml")?;
39//!
40//! // Load from string
41//! let config = RingKernelConfig::from_toml_str(toml_content)?;
42//! ```
43
44use std::collections::HashMap;
45use std::path::PathBuf;
46use std::time::Duration;
47
48use crate::error::{Result, RingKernelError};
49use crate::health::{BackoffStrategy, CircuitBreakerConfig, LoadSheddingPolicy};
50use crate::multi_gpu::LoadBalancingStrategy;
51use crate::runtime::Backend;
52
53#[cfg(feature = "config-file")]
54use std::path::Path;
55
56// ============================================================================
57// Main Configuration
58// ============================================================================
59
60/// Unified configuration for RingKernel.
61#[derive(Debug, Clone, Default)]
62pub struct RingKernelConfig {
63    /// General settings.
64    pub general: GeneralConfig,
65    /// Observability settings.
66    pub observability: ObservabilityConfig,
67    /// Health monitoring settings.
68    pub health: HealthConfig,
69    /// Multi-GPU settings.
70    pub multi_gpu: MultiGpuConfig,
71    /// Migration settings.
72    pub migration: MigrationConfig,
73    /// Custom settings.
74    pub custom: HashMap<String, String>,
75}
76
77impl RingKernelConfig {
78    /// Create a new configuration with defaults.
79    pub fn new() -> Self {
80        Self::default()
81    }
82
83    /// Create a builder for fluent configuration.
84    pub fn builder() -> ConfigBuilder {
85        ConfigBuilder::new()
86    }
87
88    /// Validate the configuration.
89    pub fn validate(&self) -> Result<()> {
90        self.general.validate()?;
91        self.observability.validate()?;
92        self.health.validate()?;
93        self.multi_gpu.validate()?;
94        self.migration.validate()?;
95        Ok(())
96    }
97
98    /// Get a custom setting by key.
99    pub fn get_custom(&self, key: &str) -> Option<&str> {
100        self.custom.get(key).map(|s| s.as_str())
101    }
102
103    /// Set a custom setting.
104    pub fn set_custom(&mut self, key: impl Into<String>, value: impl Into<String>) {
105        self.custom.insert(key.into(), value.into());
106    }
107}
108
109// ============================================================================
110// General Configuration
111// ============================================================================
112
113/// General runtime settings.
114#[derive(Debug, Clone)]
115pub struct GeneralConfig {
116    /// Preferred backend.
117    pub backend: Backend,
118    /// Application name (for metrics/tracing).
119    pub app_name: String,
120    /// Application version.
121    pub app_version: String,
122    /// Environment (dev, staging, prod).
123    pub environment: Environment,
124    /// Log level.
125    pub log_level: LogLevel,
126    /// Data directory for checkpoints, logs, etc.
127    pub data_dir: Option<PathBuf>,
128}
129
130impl Default for GeneralConfig {
131    fn default() -> Self {
132        Self {
133            backend: Backend::Auto,
134            app_name: "ringkernel".to_string(),
135            app_version: env!("CARGO_PKG_VERSION").to_string(),
136            environment: Environment::Development,
137            log_level: LogLevel::Info,
138            data_dir: None,
139        }
140    }
141}
142
143impl GeneralConfig {
144    /// Validate general configuration.
145    pub fn validate(&self) -> Result<()> {
146        if self.app_name.is_empty() {
147            return Err(RingKernelError::InvalidConfig(
148                "app_name cannot be empty".to_string(),
149            ));
150        }
151        Ok(())
152    }
153}
154
155/// Runtime environment.
156#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
157pub enum Environment {
158    /// Development environment.
159    #[default]
160    Development,
161    /// Staging/testing environment.
162    Staging,
163    /// Production environment.
164    Production,
165}
166
167impl Environment {
168    /// Returns true if this is a production environment.
169    pub fn is_production(&self) -> bool {
170        matches!(self, Environment::Production)
171    }
172
173    /// Get the environment as a string.
174    pub fn as_str(&self) -> &'static str {
175        match self {
176            Environment::Development => "development",
177            Environment::Staging => "staging",
178            Environment::Production => "production",
179        }
180    }
181}
182
183/// Log level configuration.
184#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
185pub enum LogLevel {
186    /// Trace level (most verbose).
187    Trace,
188    /// Debug level.
189    Debug,
190    /// Info level (default).
191    #[default]
192    Info,
193    /// Warning level.
194    Warn,
195    /// Error level (least verbose).
196    Error,
197}
198
199impl LogLevel {
200    /// Get the log level as a string.
201    pub fn as_str(&self) -> &'static str {
202        match self {
203            LogLevel::Trace => "trace",
204            LogLevel::Debug => "debug",
205            LogLevel::Info => "info",
206            LogLevel::Warn => "warn",
207            LogLevel::Error => "error",
208        }
209    }
210}
211
212// ============================================================================
213// Observability Configuration
214// ============================================================================
215
216/// Observability settings.
217#[derive(Debug, Clone)]
218pub struct ObservabilityConfig {
219    /// Enable tracing.
220    pub tracing_enabled: bool,
221    /// Enable metrics.
222    pub metrics_enabled: bool,
223    /// Metrics port for Prometheus scraping.
224    pub metrics_port: u16,
225    /// Metrics path (default: /metrics).
226    pub metrics_path: String,
227    /// Trace sampling rate (0.0 to 1.0).
228    pub trace_sample_rate: f64,
229    /// Enable Grafana dashboard generation.
230    pub grafana_enabled: bool,
231    /// OTLP endpoint for trace export.
232    pub otlp_endpoint: Option<String>,
233    /// Custom metric labels.
234    pub metric_labels: HashMap<String, String>,
235}
236
237impl Default for ObservabilityConfig {
238    fn default() -> Self {
239        Self {
240            tracing_enabled: true,
241            metrics_enabled: true,
242            metrics_port: 9090,
243            metrics_path: "/metrics".to_string(),
244            trace_sample_rate: 1.0,
245            grafana_enabled: false,
246            otlp_endpoint: None,
247            metric_labels: HashMap::new(),
248        }
249    }
250}
251
252impl ObservabilityConfig {
253    /// Validate observability configuration.
254    pub fn validate(&self) -> Result<()> {
255        if self.trace_sample_rate < 0.0 || self.trace_sample_rate > 1.0 {
256            return Err(RingKernelError::InvalidConfig(format!(
257                "trace_sample_rate must be between 0.0 and 1.0, got {}",
258                self.trace_sample_rate
259            )));
260        }
261        if self.metrics_port == 0 {
262            return Err(RingKernelError::InvalidConfig(
263                "metrics_port cannot be 0".to_string(),
264            ));
265        }
266        Ok(())
267    }
268}
269
270// ============================================================================
271// Health Configuration
272// ============================================================================
273
274/// Health monitoring settings.
275#[derive(Debug, Clone)]
276pub struct HealthConfig {
277    /// Enable health checks.
278    pub health_checks_enabled: bool,
279    /// Health check interval.
280    pub check_interval: Duration,
281    /// Heartbeat timeout.
282    pub heartbeat_timeout: Duration,
283    /// Circuit breaker configuration.
284    pub circuit_breaker: CircuitBreakerConfig,
285    /// Retry policy for transient failures.
286    pub retry: RetryConfig,
287    /// Load shedding policy.
288    pub load_shedding: LoadSheddingPolicy,
289    /// Kernel watchdog enabled.
290    pub watchdog_enabled: bool,
291    /// Watchdog failure threshold.
292    pub watchdog_failure_threshold: u32,
293}
294
295impl Default for HealthConfig {
296    fn default() -> Self {
297        Self {
298            health_checks_enabled: true,
299            check_interval: Duration::from_secs(10),
300            heartbeat_timeout: Duration::from_secs(30),
301            circuit_breaker: CircuitBreakerConfig::default(),
302            retry: RetryConfig::default(),
303            load_shedding: LoadSheddingPolicy::default(),
304            watchdog_enabled: true,
305            watchdog_failure_threshold: 3,
306        }
307    }
308}
309
310impl HealthConfig {
311    /// Validate health configuration.
312    pub fn validate(&self) -> Result<()> {
313        if self.check_interval.is_zero() {
314            return Err(RingKernelError::InvalidConfig(
315                "check_interval cannot be zero".to_string(),
316            ));
317        }
318        if self.heartbeat_timeout.is_zero() {
319            return Err(RingKernelError::InvalidConfig(
320                "heartbeat_timeout cannot be zero".to_string(),
321            ));
322        }
323        if self.heartbeat_timeout < self.check_interval {
324            return Err(RingKernelError::InvalidConfig(
325                "heartbeat_timeout should be >= check_interval".to_string(),
326            ));
327        }
328        Ok(())
329    }
330}
331
332/// Retry configuration.
333#[derive(Debug, Clone)]
334pub struct RetryConfig {
335    /// Maximum retry attempts.
336    pub max_attempts: u32,
337    /// Backoff strategy.
338    pub backoff: BackoffStrategy,
339    /// Enable jitter.
340    pub jitter: bool,
341    /// Maximum backoff duration.
342    pub max_backoff: Duration,
343}
344
345impl Default for RetryConfig {
346    fn default() -> Self {
347        Self {
348            max_attempts: 3,
349            backoff: BackoffStrategy::Exponential {
350                initial: Duration::from_millis(100),
351                max: Duration::from_secs(30),
352                multiplier: 2.0,
353            },
354            jitter: true,
355            max_backoff: Duration::from_secs(30),
356        }
357    }
358}
359
360// ============================================================================
361// Multi-GPU Configuration
362// ============================================================================
363
364/// Multi-GPU coordination settings.
365#[derive(Debug, Clone)]
366pub struct MultiGpuConfig {
367    /// Enable multi-GPU support.
368    pub enabled: bool,
369    /// Load balancing strategy.
370    pub load_balancing: LoadBalancingStrategy,
371    /// Enable peer-to-peer transfers.
372    pub p2p_enabled: bool,
373    /// Auto-select devices.
374    pub auto_select_device: bool,
375    /// Maximum kernels per device.
376    pub max_kernels_per_device: usize,
377    /// Preferred device indices.
378    pub preferred_devices: Vec<usize>,
379    /// Enable topology discovery.
380    pub topology_discovery: bool,
381    /// Enable cross-GPU K2K routing.
382    pub cross_gpu_k2k: bool,
383}
384
385impl Default for MultiGpuConfig {
386    fn default() -> Self {
387        Self {
388            enabled: true,
389            load_balancing: LoadBalancingStrategy::LeastLoaded,
390            p2p_enabled: true,
391            auto_select_device: true,
392            max_kernels_per_device: 32,
393            preferred_devices: Vec::new(),
394            topology_discovery: true,
395            cross_gpu_k2k: true,
396        }
397    }
398}
399
400impl MultiGpuConfig {
401    /// Validate multi-GPU configuration.
402    pub fn validate(&self) -> Result<()> {
403        if self.max_kernels_per_device == 0 {
404            return Err(RingKernelError::InvalidConfig(
405                "max_kernels_per_device cannot be 0".to_string(),
406            ));
407        }
408        Ok(())
409    }
410}
411
412// ============================================================================
413// Migration Configuration
414// ============================================================================
415
416/// Kernel migration settings.
417#[derive(Debug, Clone)]
418pub struct MigrationConfig {
419    /// Enable live migration.
420    pub enabled: bool,
421    /// Checkpoint storage type.
422    pub storage: CheckpointStorageType,
423    /// Checkpoint directory (for file storage).
424    pub checkpoint_dir: PathBuf,
425    /// Maximum checkpoint size.
426    pub max_checkpoint_size: usize,
427    /// Enable compression.
428    pub compression_enabled: bool,
429    /// Compression level (1-9).
430    pub compression_level: u32,
431    /// Migration timeout.
432    pub migration_timeout: Duration,
433    /// Enable incremental checkpoints.
434    pub incremental_enabled: bool,
435    /// Cloud storage configuration.
436    pub cloud_config: CloudStorageConfig,
437}
438
439/// Cloud storage configuration for checkpoint persistence.
440#[derive(Debug, Clone, Default)]
441pub struct CloudStorageConfig {
442    /// S3 bucket name.
443    pub s3_bucket: String,
444    /// S3 key prefix (e.g., "checkpoints/").
445    pub s3_prefix: String,
446    /// AWS region (e.g., "us-east-1").
447    pub s3_region: Option<String>,
448    /// Custom S3 endpoint URL (for MinIO, R2, etc.).
449    pub s3_endpoint: Option<String>,
450    /// Enable server-side encryption.
451    pub s3_encryption: bool,
452}
453
454impl Default for MigrationConfig {
455    fn default() -> Self {
456        Self {
457            enabled: true,
458            storage: CheckpointStorageType::Memory,
459            checkpoint_dir: PathBuf::from("/tmp/ringkernel/checkpoints"),
460            max_checkpoint_size: 1024 * 1024 * 1024, // 1 GB
461            compression_enabled: false,
462            compression_level: 3,
463            migration_timeout: Duration::from_secs(60),
464            incremental_enabled: false,
465            cloud_config: CloudStorageConfig::default(),
466        }
467    }
468}
469
470impl MigrationConfig {
471    /// Validate migration configuration.
472    pub fn validate(&self) -> Result<()> {
473        if self.compression_level == 0 || self.compression_level > 9 {
474            return Err(RingKernelError::InvalidConfig(format!(
475                "compression_level must be between 1 and 9, got {}",
476                self.compression_level
477            )));
478        }
479        if self.max_checkpoint_size == 0 {
480            return Err(RingKernelError::InvalidConfig(
481                "max_checkpoint_size cannot be 0".to_string(),
482            ));
483        }
484        Ok(())
485    }
486}
487
488/// Checkpoint storage type.
489#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
490pub enum CheckpointStorageType {
491    /// In-memory storage (fast, non-persistent).
492    #[default]
493    Memory,
494    /// File-based storage (persistent).
495    File,
496    /// Cloud storage (S3, GCS).
497    Cloud,
498}
499
500impl CheckpointStorageType {
501    /// Get the storage type as a string.
502    pub fn as_str(&self) -> &'static str {
503        match self {
504            CheckpointStorageType::Memory => "memory",
505            CheckpointStorageType::File => "file",
506            CheckpointStorageType::Cloud => "cloud",
507        }
508    }
509}
510
511// ============================================================================
512// Configuration Builder
513// ============================================================================
514
515/// Fluent builder for RingKernelConfig.
516#[derive(Debug, Clone, Default)]
517pub struct ConfigBuilder {
518    config: RingKernelConfig,
519}
520
521impl ConfigBuilder {
522    /// Create a new configuration builder.
523    pub fn new() -> Self {
524        Self {
525            config: RingKernelConfig::default(),
526        }
527    }
528
529    /// Configure general settings.
530    pub fn with_general<F>(mut self, f: F) -> Self
531    where
532        F: FnOnce(GeneralConfigBuilder) -> GeneralConfigBuilder,
533    {
534        let builder = f(GeneralConfigBuilder::new());
535        self.config.general = builder.build();
536        self
537    }
538
539    /// Configure observability settings.
540    pub fn with_observability<F>(mut self, f: F) -> Self
541    where
542        F: FnOnce(ObservabilityConfigBuilder) -> ObservabilityConfigBuilder,
543    {
544        let builder = f(ObservabilityConfigBuilder::new());
545        self.config.observability = builder.build();
546        self
547    }
548
549    /// Configure health settings.
550    pub fn with_health<F>(mut self, f: F) -> Self
551    where
552        F: FnOnce(HealthConfigBuilder) -> HealthConfigBuilder,
553    {
554        let builder = f(HealthConfigBuilder::new());
555        self.config.health = builder.build();
556        self
557    }
558
559    /// Configure multi-GPU settings.
560    pub fn with_multi_gpu<F>(mut self, f: F) -> Self
561    where
562        F: FnOnce(MultiGpuConfigBuilder) -> MultiGpuConfigBuilder,
563    {
564        let builder = f(MultiGpuConfigBuilder::new());
565        self.config.multi_gpu = builder.build();
566        self
567    }
568
569    /// Configure migration settings.
570    pub fn with_migration<F>(mut self, f: F) -> Self
571    where
572        F: FnOnce(MigrationConfigBuilder) -> MigrationConfigBuilder,
573    {
574        let builder = f(MigrationConfigBuilder::new());
575        self.config.migration = builder.build();
576        self
577    }
578
579    /// Add a custom setting.
580    pub fn custom(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
581        self.config.custom.insert(key.into(), value.into());
582        self
583    }
584
585    /// Build and validate the configuration.
586    pub fn build(self) -> Result<RingKernelConfig> {
587        self.config.validate()?;
588        Ok(self.config)
589    }
590
591    /// Build without validation.
592    pub fn build_unchecked(self) -> RingKernelConfig {
593        self.config
594    }
595}
596
597// ============================================================================
598// Sub-Builders
599// ============================================================================
600
601/// Builder for GeneralConfig.
602#[derive(Debug, Clone)]
603pub struct GeneralConfigBuilder {
604    config: GeneralConfig,
605}
606
607impl GeneralConfigBuilder {
608    /// Create a new general config builder.
609    pub fn new() -> Self {
610        Self {
611            config: GeneralConfig::default(),
612        }
613    }
614
615    /// Set the backend.
616    pub fn backend(mut self, backend: Backend) -> Self {
617        self.config.backend = backend;
618        self
619    }
620
621    /// Set the application name.
622    pub fn app_name(mut self, name: impl Into<String>) -> Self {
623        self.config.app_name = name.into();
624        self
625    }
626
627    /// Set the application version.
628    pub fn app_version(mut self, version: impl Into<String>) -> Self {
629        self.config.app_version = version.into();
630        self
631    }
632
633    /// Set the environment.
634    pub fn environment(mut self, env: Environment) -> Self {
635        self.config.environment = env;
636        self
637    }
638
639    /// Set the log level.
640    pub fn log_level(mut self, level: LogLevel) -> Self {
641        self.config.log_level = level;
642        self
643    }
644
645    /// Set the data directory.
646    pub fn data_dir(mut self, path: impl Into<PathBuf>) -> Self {
647        self.config.data_dir = Some(path.into());
648        self
649    }
650
651    /// Build the configuration.
652    pub fn build(self) -> GeneralConfig {
653        self.config
654    }
655}
656
657impl Default for GeneralConfigBuilder {
658    fn default() -> Self {
659        Self::new()
660    }
661}
662
663/// Builder for ObservabilityConfig.
664#[derive(Debug, Clone)]
665pub struct ObservabilityConfigBuilder {
666    config: ObservabilityConfig,
667}
668
669impl ObservabilityConfigBuilder {
670    /// Create a new observability config builder.
671    pub fn new() -> Self {
672        Self {
673            config: ObservabilityConfig::default(),
674        }
675    }
676
677    /// Enable or disable tracing.
678    pub fn enable_tracing(mut self, enabled: bool) -> Self {
679        self.config.tracing_enabled = enabled;
680        self
681    }
682
683    /// Enable or disable metrics.
684    pub fn enable_metrics(mut self, enabled: bool) -> Self {
685        self.config.metrics_enabled = enabled;
686        self
687    }
688
689    /// Set the metrics port.
690    pub fn metrics_port(mut self, port: u16) -> Self {
691        self.config.metrics_port = port;
692        self
693    }
694
695    /// Set the metrics path.
696    pub fn metrics_path(mut self, path: impl Into<String>) -> Self {
697        self.config.metrics_path = path.into();
698        self
699    }
700
701    /// Set the trace sample rate.
702    pub fn trace_sample_rate(mut self, rate: f64) -> Self {
703        self.config.trace_sample_rate = rate;
704        self
705    }
706
707    /// Enable Grafana dashboard generation.
708    pub fn enable_grafana(mut self, enabled: bool) -> Self {
709        self.config.grafana_enabled = enabled;
710        self
711    }
712
713    /// Set the OTLP endpoint.
714    pub fn otlp_endpoint(mut self, endpoint: impl Into<String>) -> Self {
715        self.config.otlp_endpoint = Some(endpoint.into());
716        self
717    }
718
719    /// Add a metric label.
720    pub fn metric_label(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
721        self.config.metric_labels.insert(key.into(), value.into());
722        self
723    }
724
725    /// Build the configuration.
726    pub fn build(self) -> ObservabilityConfig {
727        self.config
728    }
729}
730
731impl Default for ObservabilityConfigBuilder {
732    fn default() -> Self {
733        Self::new()
734    }
735}
736
737/// Builder for HealthConfig.
738#[derive(Debug, Clone)]
739pub struct HealthConfigBuilder {
740    config: HealthConfig,
741}
742
743impl HealthConfigBuilder {
744    /// Create a new health config builder.
745    pub fn new() -> Self {
746        Self {
747            config: HealthConfig::default(),
748        }
749    }
750
751    /// Enable or disable health checks.
752    pub fn enable_health_checks(mut self, enabled: bool) -> Self {
753        self.config.health_checks_enabled = enabled;
754        self
755    }
756
757    /// Set the check interval.
758    pub fn check_interval(mut self, interval: Duration) -> Self {
759        self.config.check_interval = interval;
760        self
761    }
762
763    /// Set the heartbeat timeout.
764    pub fn heartbeat_timeout(mut self, timeout: Duration) -> Self {
765        self.config.heartbeat_timeout = timeout;
766        self
767    }
768
769    /// Set circuit breaker failure threshold.
770    pub fn circuit_breaker_threshold(mut self, threshold: u32) -> Self {
771        self.config.circuit_breaker.failure_threshold = threshold;
772        self
773    }
774
775    /// Set circuit breaker recovery timeout.
776    pub fn circuit_breaker_recovery_timeout(mut self, timeout: Duration) -> Self {
777        self.config.circuit_breaker.recovery_timeout = timeout;
778        self
779    }
780
781    /// Set circuit breaker half-open max requests.
782    pub fn circuit_breaker_half_open_max_requests(mut self, requests: u32) -> Self {
783        self.config.circuit_breaker.half_open_max_requests = requests;
784        self
785    }
786
787    /// Configure retry policy.
788    pub fn retry_max_attempts(mut self, attempts: u32) -> Self {
789        self.config.retry.max_attempts = attempts;
790        self
791    }
792
793    /// Enable or disable retry jitter.
794    pub fn retry_jitter(mut self, enabled: bool) -> Self {
795        self.config.retry.jitter = enabled;
796        self
797    }
798
799    /// Set load shedding policy.
800    pub fn load_shedding(mut self, policy: LoadSheddingPolicy) -> Self {
801        self.config.load_shedding = policy;
802        self
803    }
804
805    /// Enable or disable kernel watchdog.
806    pub fn enable_watchdog(mut self, enabled: bool) -> Self {
807        self.config.watchdog_enabled = enabled;
808        self
809    }
810
811    /// Set watchdog failure threshold.
812    pub fn watchdog_failure_threshold(mut self, threshold: u32) -> Self {
813        self.config.watchdog_failure_threshold = threshold;
814        self
815    }
816
817    /// Build the configuration.
818    pub fn build(self) -> HealthConfig {
819        self.config
820    }
821}
822
823impl Default for HealthConfigBuilder {
824    fn default() -> Self {
825        Self::new()
826    }
827}
828
829/// Builder for MultiGpuConfig.
830#[derive(Debug, Clone)]
831pub struct MultiGpuConfigBuilder {
832    config: MultiGpuConfig,
833}
834
835impl MultiGpuConfigBuilder {
836    /// Create a new multi-GPU config builder.
837    pub fn new() -> Self {
838        Self {
839            config: MultiGpuConfig::default(),
840        }
841    }
842
843    /// Enable or disable multi-GPU support.
844    pub fn enable(mut self, enabled: bool) -> Self {
845        self.config.enabled = enabled;
846        self
847    }
848
849    /// Set the load balancing strategy.
850    pub fn load_balancing(mut self, strategy: LoadBalancingStrategy) -> Self {
851        self.config.load_balancing = strategy;
852        self
853    }
854
855    /// Enable or disable P2P transfers.
856    pub fn enable_p2p(mut self, enabled: bool) -> Self {
857        self.config.p2p_enabled = enabled;
858        self
859    }
860
861    /// Enable or disable auto device selection.
862    pub fn auto_select_device(mut self, enabled: bool) -> Self {
863        self.config.auto_select_device = enabled;
864        self
865    }
866
867    /// Set maximum kernels per device.
868    pub fn max_kernels_per_device(mut self, max: usize) -> Self {
869        self.config.max_kernels_per_device = max;
870        self
871    }
872
873    /// Set preferred devices.
874    pub fn preferred_devices(mut self, devices: Vec<usize>) -> Self {
875        self.config.preferred_devices = devices;
876        self
877    }
878
879    /// Enable or disable topology discovery.
880    pub fn topology_discovery(mut self, enabled: bool) -> Self {
881        self.config.topology_discovery = enabled;
882        self
883    }
884
885    /// Enable or disable cross-GPU K2K routing.
886    pub fn cross_gpu_k2k(mut self, enabled: bool) -> Self {
887        self.config.cross_gpu_k2k = enabled;
888        self
889    }
890
891    /// Build the configuration.
892    pub fn build(self) -> MultiGpuConfig {
893        self.config
894    }
895}
896
897impl Default for MultiGpuConfigBuilder {
898    fn default() -> Self {
899        Self::new()
900    }
901}
902
903/// Builder for MigrationConfig.
904#[derive(Debug, Clone)]
905pub struct MigrationConfigBuilder {
906    config: MigrationConfig,
907}
908
909impl MigrationConfigBuilder {
910    /// Create a new migration config builder.
911    pub fn new() -> Self {
912        Self {
913            config: MigrationConfig::default(),
914        }
915    }
916
917    /// Enable or disable migration.
918    pub fn enable(mut self, enabled: bool) -> Self {
919        self.config.enabled = enabled;
920        self
921    }
922
923    /// Set the storage type.
924    pub fn storage(mut self, storage: CheckpointStorageType) -> Self {
925        self.config.storage = storage;
926        self
927    }
928
929    /// Set the checkpoint directory.
930    pub fn checkpoint_dir(mut self, path: impl Into<PathBuf>) -> Self {
931        self.config.checkpoint_dir = path.into();
932        self
933    }
934
935    /// Set maximum checkpoint size.
936    pub fn max_checkpoint_size(mut self, size: usize) -> Self {
937        self.config.max_checkpoint_size = size;
938        self
939    }
940
941    /// Enable or disable compression.
942    pub fn enable_compression(mut self, enabled: bool) -> Self {
943        self.config.compression_enabled = enabled;
944        self
945    }
946
947    /// Set compression level.
948    pub fn compression_level(mut self, level: u32) -> Self {
949        self.config.compression_level = level;
950        self
951    }
952
953    /// Set migration timeout.
954    pub fn migration_timeout(mut self, timeout: Duration) -> Self {
955        self.config.migration_timeout = timeout;
956        self
957    }
958
959    /// Enable or disable incremental checkpoints.
960    pub fn enable_incremental(mut self, enabled: bool) -> Self {
961        self.config.incremental_enabled = enabled;
962        self
963    }
964
965    /// Configure S3 bucket for cloud storage.
966    pub fn s3_bucket(mut self, bucket: impl Into<String>) -> Self {
967        self.config.cloud_config.s3_bucket = bucket.into();
968        self
969    }
970
971    /// Set S3 key prefix.
972    pub fn s3_prefix(mut self, prefix: impl Into<String>) -> Self {
973        self.config.cloud_config.s3_prefix = prefix.into();
974        self
975    }
976
977    /// Set AWS region for S3.
978    pub fn s3_region(mut self, region: impl Into<String>) -> Self {
979        self.config.cloud_config.s3_region = Some(region.into());
980        self
981    }
982
983    /// Set custom S3 endpoint (for MinIO, R2, etc.).
984    pub fn s3_endpoint(mut self, endpoint: impl Into<String>) -> Self {
985        self.config.cloud_config.s3_endpoint = Some(endpoint.into());
986        self
987    }
988
989    /// Enable S3 server-side encryption.
990    pub fn s3_encryption(mut self, enabled: bool) -> Self {
991        self.config.cloud_config.s3_encryption = enabled;
992        self
993    }
994
995    /// Build the configuration.
996    pub fn build(self) -> MigrationConfig {
997        self.config
998    }
999}
1000
1001impl Default for MigrationConfigBuilder {
1002    fn default() -> Self {
1003        Self::new()
1004    }
1005}
1006
1007// ============================================================================
1008// Configuration Presets
1009// ============================================================================
1010
1011impl RingKernelConfig {
1012    /// Create a minimal configuration for development.
1013    pub fn development() -> Self {
1014        ConfigBuilder::new()
1015            .with_general(|g| {
1016                g.environment(Environment::Development)
1017                    .log_level(LogLevel::Debug)
1018            })
1019            .with_observability(|o| o.trace_sample_rate(1.0))
1020            .with_health(|h| h.enable_health_checks(true))
1021            .build_unchecked()
1022    }
1023
1024    /// Create a production-ready configuration.
1025    pub fn production() -> Self {
1026        ConfigBuilder::new()
1027            .with_general(|g| {
1028                g.environment(Environment::Production)
1029                    .log_level(LogLevel::Info)
1030            })
1031            .with_observability(|o| {
1032                o.enable_tracing(true)
1033                    .enable_metrics(true)
1034                    .trace_sample_rate(0.1) // 10% sampling in production
1035                    .enable_grafana(true)
1036            })
1037            .with_health(|h| {
1038                h.enable_health_checks(true)
1039                    .check_interval(Duration::from_secs(5))
1040                    .heartbeat_timeout(Duration::from_secs(15))
1041                    .circuit_breaker_threshold(5)
1042                    .enable_watchdog(true)
1043            })
1044            .with_multi_gpu(|g| {
1045                g.enable(true)
1046                    .load_balancing(LoadBalancingStrategy::LeastLoaded)
1047                    .enable_p2p(true)
1048                    .topology_discovery(true)
1049            })
1050            .with_migration(|m| {
1051                m.enable(true)
1052                    .storage(CheckpointStorageType::File)
1053                    .enable_compression(true)
1054                    .compression_level(3)
1055            })
1056            .build_unchecked()
1057    }
1058
1059    /// Create a high-performance configuration.
1060    pub fn high_performance() -> Self {
1061        ConfigBuilder::new()
1062            .with_general(|g| {
1063                g.environment(Environment::Production)
1064                    .log_level(LogLevel::Warn)
1065            })
1066            .with_observability(|o| {
1067                o.enable_tracing(false) // Disable tracing for max performance
1068                    .enable_metrics(true)
1069                    .trace_sample_rate(0.0)
1070            })
1071            .with_health(|h| {
1072                h.enable_health_checks(true)
1073                    .check_interval(Duration::from_secs(30)) // Less frequent checks
1074                    .watchdog_failure_threshold(5)
1075            })
1076            .with_multi_gpu(|g| {
1077                g.enable(true)
1078                    .load_balancing(LoadBalancingStrategy::LeastLoaded)
1079                    .enable_p2p(true)
1080                    .max_kernels_per_device(64)
1081                    .cross_gpu_k2k(true)
1082            })
1083            .with_migration(|m| {
1084                m.enable(true)
1085                    .storage(CheckpointStorageType::Memory)
1086                    .enable_compression(false) // Skip compression for speed
1087            })
1088            .build_unchecked()
1089    }
1090}
1091
1092// ============================================================================
1093// Configuration File Support
1094// ============================================================================
1095
1096/// File format for configuration loading.
1097#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1098pub enum ConfigFormat {
1099    /// TOML format.
1100    Toml,
1101    /// YAML format.
1102    Yaml,
1103}
1104
1105impl ConfigFormat {
1106    /// Detect format from file extension.
1107    pub fn from_extension(path: &std::path::Path) -> Option<Self> {
1108        path.extension()
1109            .and_then(|ext| ext.to_str())
1110            .map(|ext| ext.to_lowercase())
1111            .and_then(|ext| match ext.as_str() {
1112                "toml" => Some(ConfigFormat::Toml),
1113                "yaml" | "yml" => Some(ConfigFormat::Yaml),
1114                _ => None,
1115            })
1116    }
1117}
1118
1119#[cfg(feature = "config-file")]
1120mod file_config {
1121    use super::*;
1122    use serde::{Deserialize, Serialize};
1123
1124    /// File-format configuration (serialization-friendly).
1125    ///
1126    /// This struct uses primitive types that are easy to serialize/deserialize.
1127    /// It can be converted to/from `RingKernelConfig`.
1128    #[derive(Debug, Clone, Serialize, Deserialize, Default)]
1129    #[serde(default)]
1130    pub struct FileConfig {
1131        /// General settings.
1132        #[serde(default)]
1133        pub general: FileGeneralConfig,
1134        /// Observability settings.
1135        #[serde(default)]
1136        pub observability: FileObservabilityConfig,
1137        /// Health monitoring settings.
1138        #[serde(default)]
1139        pub health: FileHealthConfig,
1140        /// Multi-GPU settings.
1141        #[serde(default)]
1142        pub multi_gpu: FileMultiGpuConfig,
1143        /// Migration settings.
1144        #[serde(default)]
1145        pub migration: FileMigrationConfig,
1146        /// Custom settings.
1147        #[serde(default)]
1148        pub custom: HashMap<String, String>,
1149    }
1150
1151    /// File-format general configuration.
1152    #[derive(Debug, Clone, Serialize, Deserialize)]
1153    #[serde(default)]
1154    pub struct FileGeneralConfig {
1155        /// Backend: "auto", "cpu", "cuda", "wgpu", "metal".
1156        pub backend: String,
1157        /// Application name.
1158        pub app_name: String,
1159        /// Application version.
1160        pub app_version: String,
1161        /// Environment: "development", "staging", "production".
1162        pub environment: String,
1163        /// Log level: "trace", "debug", "info", "warn", "error".
1164        pub log_level: String,
1165        /// Data directory path.
1166        pub data_dir: Option<String>,
1167    }
1168
1169    impl Default for FileGeneralConfig {
1170        fn default() -> Self {
1171            Self {
1172                backend: "auto".to_string(),
1173                app_name: "ringkernel".to_string(),
1174                app_version: env!("CARGO_PKG_VERSION").to_string(),
1175                environment: "development".to_string(),
1176                log_level: "info".to_string(),
1177                data_dir: None,
1178            }
1179        }
1180    }
1181
1182    /// File-format observability configuration.
1183    #[derive(Debug, Clone, Serialize, Deserialize)]
1184    #[serde(default)]
1185    pub struct FileObservabilityConfig {
1186        /// Enable tracing.
1187        pub tracing_enabled: bool,
1188        /// Enable metrics.
1189        pub metrics_enabled: bool,
1190        /// Metrics port.
1191        pub metrics_port: u16,
1192        /// Metrics path.
1193        pub metrics_path: String,
1194        /// Trace sample rate (0.0 to 1.0).
1195        pub trace_sample_rate: f64,
1196        /// Enable Grafana dashboard generation.
1197        pub grafana_enabled: bool,
1198        /// OTLP endpoint.
1199        pub otlp_endpoint: Option<String>,
1200        /// Custom metric labels.
1201        #[serde(default)]
1202        pub metric_labels: HashMap<String, String>,
1203    }
1204
1205    impl Default for FileObservabilityConfig {
1206        fn default() -> Self {
1207            Self {
1208                tracing_enabled: true,
1209                metrics_enabled: true,
1210                metrics_port: 9090,
1211                metrics_path: "/metrics".to_string(),
1212                trace_sample_rate: 1.0,
1213                grafana_enabled: false,
1214                otlp_endpoint: None,
1215                metric_labels: HashMap::new(),
1216            }
1217        }
1218    }
1219
1220    /// File-format health configuration.
1221    #[derive(Debug, Clone, Serialize, Deserialize)]
1222    #[serde(default)]
1223    pub struct FileHealthConfig {
1224        /// Enable health checks.
1225        pub health_checks_enabled: bool,
1226        /// Health check interval in milliseconds.
1227        pub check_interval_ms: u64,
1228        /// Heartbeat timeout in milliseconds.
1229        pub heartbeat_timeout_ms: u64,
1230        /// Circuit breaker failure threshold.
1231        pub circuit_breaker_failure_threshold: u32,
1232        /// Circuit breaker recovery timeout in milliseconds.
1233        pub circuit_breaker_recovery_timeout_ms: u64,
1234        /// Circuit breaker half-open max requests.
1235        pub circuit_breaker_half_open_max_requests: u32,
1236        /// Retry max attempts.
1237        pub retry_max_attempts: u32,
1238        /// Enable retry jitter.
1239        pub retry_jitter: bool,
1240        /// Max backoff in milliseconds.
1241        pub retry_max_backoff_ms: u64,
1242        /// Enable kernel watchdog.
1243        pub watchdog_enabled: bool,
1244        /// Watchdog failure threshold.
1245        pub watchdog_failure_threshold: u32,
1246    }
1247
1248    impl Default for FileHealthConfig {
1249        fn default() -> Self {
1250            Self {
1251                health_checks_enabled: true,
1252                check_interval_ms: 10_000,
1253                heartbeat_timeout_ms: 30_000,
1254                circuit_breaker_failure_threshold: 5,
1255                circuit_breaker_recovery_timeout_ms: 30_000,
1256                circuit_breaker_half_open_max_requests: 3,
1257                retry_max_attempts: 3,
1258                retry_jitter: true,
1259                retry_max_backoff_ms: 30_000,
1260                watchdog_enabled: true,
1261                watchdog_failure_threshold: 3,
1262            }
1263        }
1264    }
1265
1266    /// File-format multi-GPU configuration.
1267    #[derive(Debug, Clone, Serialize, Deserialize)]
1268    #[serde(default)]
1269    pub struct FileMultiGpuConfig {
1270        /// Enable multi-GPU support.
1271        pub enabled: bool,
1272        /// Load balancing: "round_robin", "least_loaded", "random", "preferred".
1273        pub load_balancing: String,
1274        /// Enable P2P transfers.
1275        pub p2p_enabled: bool,
1276        /// Auto-select device.
1277        pub auto_select_device: bool,
1278        /// Maximum kernels per device.
1279        pub max_kernels_per_device: usize,
1280        /// Preferred device indices.
1281        #[serde(default)]
1282        pub preferred_devices: Vec<usize>,
1283        /// Enable topology discovery.
1284        pub topology_discovery: bool,
1285        /// Enable cross-GPU K2K routing.
1286        pub cross_gpu_k2k: bool,
1287    }
1288
1289    impl Default for FileMultiGpuConfig {
1290        fn default() -> Self {
1291            Self {
1292                enabled: true,
1293                load_balancing: "least_loaded".to_string(),
1294                p2p_enabled: true,
1295                auto_select_device: true,
1296                max_kernels_per_device: 32,
1297                preferred_devices: Vec::new(),
1298                topology_discovery: true,
1299                cross_gpu_k2k: true,
1300            }
1301        }
1302    }
1303
1304    /// File-format migration configuration.
1305    #[derive(Debug, Clone, Serialize, Deserialize)]
1306    #[serde(default)]
1307    pub struct FileMigrationConfig {
1308        /// Enable migration.
1309        pub enabled: bool,
1310        /// Storage type: "memory", "file", "cloud".
1311        pub storage: String,
1312        /// Checkpoint directory.
1313        pub checkpoint_dir: String,
1314        /// Maximum checkpoint size in bytes.
1315        pub max_checkpoint_size: usize,
1316        /// Enable compression.
1317        pub compression_enabled: bool,
1318        /// Compression level (1-9).
1319        pub compression_level: u32,
1320        /// Migration timeout in milliseconds.
1321        pub migration_timeout_ms: u64,
1322        /// Enable incremental checkpoints.
1323        pub incremental_enabled: bool,
1324    }
1325
1326    impl Default for FileMigrationConfig {
1327        fn default() -> Self {
1328            Self {
1329                enabled: true,
1330                storage: "memory".to_string(),
1331                checkpoint_dir: "/tmp/ringkernel/checkpoints".to_string(),
1332                max_checkpoint_size: 1024 * 1024 * 1024,
1333                compression_enabled: false,
1334                compression_level: 3,
1335                migration_timeout_ms: 60_000,
1336                incremental_enabled: false,
1337            }
1338        }
1339    }
1340
1341    // ========================================================================
1342    // Conversion Implementations
1343    // ========================================================================
1344
1345    impl From<FileConfig> for RingKernelConfig {
1346        fn from(file: FileConfig) -> Self {
1347            RingKernelConfig {
1348                general: file.general.into(),
1349                observability: file.observability.into(),
1350                health: file.health.into(),
1351                multi_gpu: file.multi_gpu.into(),
1352                migration: file.migration.into(),
1353                custom: file.custom,
1354            }
1355        }
1356    }
1357
1358    impl From<&RingKernelConfig> for FileConfig {
1359        fn from(config: &RingKernelConfig) -> Self {
1360            FileConfig {
1361                general: (&config.general).into(),
1362                observability: (&config.observability).into(),
1363                health: (&config.health).into(),
1364                multi_gpu: (&config.multi_gpu).into(),
1365                migration: (&config.migration).into(),
1366                custom: config.custom.clone(),
1367            }
1368        }
1369    }
1370
1371    impl From<FileGeneralConfig> for GeneralConfig {
1372        fn from(file: FileGeneralConfig) -> Self {
1373            GeneralConfig {
1374                backend: match file.backend.to_lowercase().as_str() {
1375                    "cpu" => Backend::Cpu,
1376                    "cuda" => Backend::Cuda,
1377                    "wgpu" => Backend::Wgpu,
1378                    "metal" => Backend::Metal,
1379                    _ => Backend::Auto,
1380                },
1381                app_name: file.app_name,
1382                app_version: file.app_version,
1383                environment: match file.environment.to_lowercase().as_str() {
1384                    "staging" => Environment::Staging,
1385                    "production" | "prod" => Environment::Production,
1386                    _ => Environment::Development,
1387                },
1388                log_level: match file.log_level.to_lowercase().as_str() {
1389                    "trace" => LogLevel::Trace,
1390                    "debug" => LogLevel::Debug,
1391                    "warn" | "warning" => LogLevel::Warn,
1392                    "error" => LogLevel::Error,
1393                    _ => LogLevel::Info,
1394                },
1395                data_dir: file.data_dir.map(PathBuf::from),
1396            }
1397        }
1398    }
1399
1400    impl From<&GeneralConfig> for FileGeneralConfig {
1401        fn from(config: &GeneralConfig) -> Self {
1402            FileGeneralConfig {
1403                backend: match config.backend {
1404                    Backend::Auto => "auto".to_string(),
1405                    Backend::Cpu => "cpu".to_string(),
1406                    Backend::Cuda => "cuda".to_string(),
1407                    Backend::Wgpu => "wgpu".to_string(),
1408                    Backend::Metal => "metal".to_string(),
1409                },
1410                app_name: config.app_name.clone(),
1411                app_version: config.app_version.clone(),
1412                environment: config.environment.as_str().to_string(),
1413                log_level: config.log_level.as_str().to_string(),
1414                data_dir: config.data_dir.as_ref().map(|p| p.display().to_string()),
1415            }
1416        }
1417    }
1418
1419    impl From<FileObservabilityConfig> for ObservabilityConfig {
1420        fn from(file: FileObservabilityConfig) -> Self {
1421            ObservabilityConfig {
1422                tracing_enabled: file.tracing_enabled,
1423                metrics_enabled: file.metrics_enabled,
1424                metrics_port: file.metrics_port,
1425                metrics_path: file.metrics_path,
1426                trace_sample_rate: file.trace_sample_rate,
1427                grafana_enabled: file.grafana_enabled,
1428                otlp_endpoint: file.otlp_endpoint,
1429                metric_labels: file.metric_labels,
1430            }
1431        }
1432    }
1433
1434    impl From<&ObservabilityConfig> for FileObservabilityConfig {
1435        fn from(config: &ObservabilityConfig) -> Self {
1436            FileObservabilityConfig {
1437                tracing_enabled: config.tracing_enabled,
1438                metrics_enabled: config.metrics_enabled,
1439                metrics_port: config.metrics_port,
1440                metrics_path: config.metrics_path.clone(),
1441                trace_sample_rate: config.trace_sample_rate,
1442                grafana_enabled: config.grafana_enabled,
1443                otlp_endpoint: config.otlp_endpoint.clone(),
1444                metric_labels: config.metric_labels.clone(),
1445            }
1446        }
1447    }
1448
1449    impl From<FileHealthConfig> for HealthConfig {
1450        fn from(file: FileHealthConfig) -> Self {
1451            HealthConfig {
1452                health_checks_enabled: file.health_checks_enabled,
1453                check_interval: Duration::from_millis(file.check_interval_ms),
1454                heartbeat_timeout: Duration::from_millis(file.heartbeat_timeout_ms),
1455                circuit_breaker: CircuitBreakerConfig {
1456                    failure_threshold: file.circuit_breaker_failure_threshold,
1457                    success_threshold: 1, // Default: 1 success to close
1458                    recovery_timeout: Duration::from_millis(
1459                        file.circuit_breaker_recovery_timeout_ms,
1460                    ),
1461                    window_duration: Duration::from_secs(60), // Default: 60 second window
1462                    half_open_max_requests: file.circuit_breaker_half_open_max_requests,
1463                },
1464                retry: RetryConfig {
1465                    max_attempts: file.retry_max_attempts,
1466                    backoff: BackoffStrategy::Exponential {
1467                        initial: Duration::from_millis(100),
1468                        max: Duration::from_millis(file.retry_max_backoff_ms),
1469                        multiplier: 2.0,
1470                    },
1471                    jitter: file.retry_jitter,
1472                    max_backoff: Duration::from_millis(file.retry_max_backoff_ms),
1473                },
1474                load_shedding: LoadSheddingPolicy::default(),
1475                watchdog_enabled: file.watchdog_enabled,
1476                watchdog_failure_threshold: file.watchdog_failure_threshold,
1477            }
1478        }
1479    }
1480
1481    impl From<&HealthConfig> for FileHealthConfig {
1482        fn from(config: &HealthConfig) -> Self {
1483            FileHealthConfig {
1484                health_checks_enabled: config.health_checks_enabled,
1485                check_interval_ms: config.check_interval.as_millis() as u64,
1486                heartbeat_timeout_ms: config.heartbeat_timeout.as_millis() as u64,
1487                circuit_breaker_failure_threshold: config.circuit_breaker.failure_threshold,
1488                circuit_breaker_recovery_timeout_ms: config
1489                    .circuit_breaker
1490                    .recovery_timeout
1491                    .as_millis() as u64,
1492                circuit_breaker_half_open_max_requests: config
1493                    .circuit_breaker
1494                    .half_open_max_requests,
1495                retry_max_attempts: config.retry.max_attempts,
1496                retry_jitter: config.retry.jitter,
1497                retry_max_backoff_ms: config.retry.max_backoff.as_millis() as u64,
1498                watchdog_enabled: config.watchdog_enabled,
1499                watchdog_failure_threshold: config.watchdog_failure_threshold,
1500            }
1501        }
1502    }
1503
1504    impl From<FileMultiGpuConfig> for MultiGpuConfig {
1505        fn from(file: FileMultiGpuConfig) -> Self {
1506            MultiGpuConfig {
1507                enabled: file.enabled,
1508                load_balancing: match file.load_balancing.to_lowercase().as_str() {
1509                    "round_robin" | "roundrobin" => LoadBalancingStrategy::RoundRobin,
1510                    "first_available" | "firstavailable" => LoadBalancingStrategy::FirstAvailable,
1511                    "memory_based" | "memorybased" => LoadBalancingStrategy::MemoryBased,
1512                    "compute_capability" | "computecapability" => {
1513                        LoadBalancingStrategy::ComputeCapability
1514                    }
1515                    "custom" => LoadBalancingStrategy::Custom,
1516                    _ => LoadBalancingStrategy::LeastLoaded,
1517                },
1518                p2p_enabled: file.p2p_enabled,
1519                auto_select_device: file.auto_select_device,
1520                max_kernels_per_device: file.max_kernels_per_device,
1521                preferred_devices: file.preferred_devices,
1522                topology_discovery: file.topology_discovery,
1523                cross_gpu_k2k: file.cross_gpu_k2k,
1524            }
1525        }
1526    }
1527
1528    impl From<&MultiGpuConfig> for FileMultiGpuConfig {
1529        fn from(config: &MultiGpuConfig) -> Self {
1530            FileMultiGpuConfig {
1531                enabled: config.enabled,
1532                load_balancing: match config.load_balancing {
1533                    LoadBalancingStrategy::FirstAvailable => "first_available".to_string(),
1534                    LoadBalancingStrategy::LeastLoaded => "least_loaded".to_string(),
1535                    LoadBalancingStrategy::RoundRobin => "round_robin".to_string(),
1536                    LoadBalancingStrategy::MemoryBased => "memory_based".to_string(),
1537                    LoadBalancingStrategy::ComputeCapability => "compute_capability".to_string(),
1538                    LoadBalancingStrategy::Custom => "custom".to_string(),
1539                },
1540                p2p_enabled: config.p2p_enabled,
1541                auto_select_device: config.auto_select_device,
1542                max_kernels_per_device: config.max_kernels_per_device,
1543                preferred_devices: config.preferred_devices.clone(),
1544                topology_discovery: config.topology_discovery,
1545                cross_gpu_k2k: config.cross_gpu_k2k,
1546            }
1547        }
1548    }
1549
1550    impl From<FileMigrationConfig> for MigrationConfig {
1551        fn from(file: FileMigrationConfig) -> Self {
1552            MigrationConfig {
1553                enabled: file.enabled,
1554                storage: match file.storage.to_lowercase().as_str() {
1555                    "file" => CheckpointStorageType::File,
1556                    "cloud" => CheckpointStorageType::Cloud,
1557                    _ => CheckpointStorageType::Memory,
1558                },
1559                checkpoint_dir: PathBuf::from(file.checkpoint_dir),
1560                max_checkpoint_size: file.max_checkpoint_size,
1561                compression_enabled: file.compression_enabled,
1562                compression_level: file.compression_level,
1563                migration_timeout: Duration::from_millis(file.migration_timeout_ms),
1564                incremental_enabled: file.incremental_enabled,
1565            }
1566        }
1567    }
1568
1569    impl From<&MigrationConfig> for FileMigrationConfig {
1570        fn from(config: &MigrationConfig) -> Self {
1571            FileMigrationConfig {
1572                enabled: config.enabled,
1573                storage: config.storage.as_str().to_string(),
1574                checkpoint_dir: config.checkpoint_dir.display().to_string(),
1575                max_checkpoint_size: config.max_checkpoint_size,
1576                compression_enabled: config.compression_enabled,
1577                compression_level: config.compression_level,
1578                migration_timeout_ms: config.migration_timeout.as_millis() as u64,
1579                incremental_enabled: config.incremental_enabled,
1580            }
1581        }
1582    }
1583}
1584
1585#[cfg(feature = "config-file")]
1586pub use file_config::*;
1587
1588#[cfg(feature = "config-file")]
1589impl RingKernelConfig {
1590    /// Load configuration from a TOML file.
1591    pub fn from_toml_file<P: AsRef<Path>>(path: P) -> Result<Self> {
1592        let content = std::fs::read_to_string(path.as_ref()).map_err(|e| {
1593            RingKernelError::InvalidConfig(format!("Failed to read config file: {}", e))
1594        })?;
1595        Self::from_toml_str(&content)
1596    }
1597
1598    /// Load configuration from a TOML string.
1599    pub fn from_toml_str(content: &str) -> Result<Self> {
1600        let file_config: FileConfig = toml::from_str(content).map_err(|e| {
1601            RingKernelError::InvalidConfig(format!("Failed to parse TOML config: {}", e))
1602        })?;
1603        let config: RingKernelConfig = file_config.into();
1604        config.validate()?;
1605        Ok(config)
1606    }
1607
1608    /// Load configuration from a YAML file.
1609    pub fn from_yaml_file<P: AsRef<Path>>(path: P) -> Result<Self> {
1610        let content = std::fs::read_to_string(path.as_ref()).map_err(|e| {
1611            RingKernelError::InvalidConfig(format!("Failed to read config file: {}", e))
1612        })?;
1613        Self::from_yaml_str(&content)
1614    }
1615
1616    /// Load configuration from a YAML string.
1617    pub fn from_yaml_str(content: &str) -> Result<Self> {
1618        let file_config: FileConfig = serde_yaml::from_str(content).map_err(|e| {
1619            RingKernelError::InvalidConfig(format!("Failed to parse YAML config: {}", e))
1620        })?;
1621        let config: RingKernelConfig = file_config.into();
1622        config.validate()?;
1623        Ok(config)
1624    }
1625
1626    /// Load configuration from a file, auto-detecting format from extension.
1627    pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
1628        let path = path.as_ref();
1629        let format = ConfigFormat::from_extension(path).ok_or_else(|| {
1630            RingKernelError::InvalidConfig(format!(
1631                "Unknown config file extension: {}",
1632                path.display()
1633            ))
1634        })?;
1635
1636        match format {
1637            ConfigFormat::Toml => Self::from_toml_file(path),
1638            ConfigFormat::Yaml => Self::from_yaml_file(path),
1639        }
1640    }
1641
1642    /// Write configuration to a TOML string.
1643    pub fn to_toml_str(&self) -> Result<String> {
1644        let file_config: FileConfig = self.into();
1645        toml::to_string_pretty(&file_config).map_err(|e| {
1646            RingKernelError::InvalidConfig(format!("Failed to serialize to TOML: {}", e))
1647        })
1648    }
1649
1650    /// Write configuration to a YAML string.
1651    pub fn to_yaml_str(&self) -> Result<String> {
1652        let file_config: FileConfig = self.into();
1653        serde_yaml::to_string(&file_config).map_err(|e| {
1654            RingKernelError::InvalidConfig(format!("Failed to serialize to YAML: {}", e))
1655        })
1656    }
1657
1658    /// Write configuration to a file.
1659    pub fn to_file<P: AsRef<Path>>(&self, path: P) -> Result<()> {
1660        let path = path.as_ref();
1661        let format = ConfigFormat::from_extension(path).ok_or_else(|| {
1662            RingKernelError::InvalidConfig(format!(
1663                "Unknown config file extension: {}",
1664                path.display()
1665            ))
1666        })?;
1667
1668        let content = match format {
1669            ConfigFormat::Toml => self.to_toml_str()?,
1670            ConfigFormat::Yaml => self.to_yaml_str()?,
1671        };
1672
1673        std::fs::write(path, content).map_err(|e| {
1674            RingKernelError::InvalidConfig(format!("Failed to write config file: {}", e))
1675        })
1676    }
1677}
1678
1679// ============================================================================
1680// Tests
1681// ============================================================================
1682
1683#[cfg(test)]
1684mod tests {
1685    use super::*;
1686
1687    #[test]
1688    fn test_default_config() {
1689        let config = RingKernelConfig::default();
1690        assert!(config.validate().is_ok());
1691    }
1692
1693    #[test]
1694    fn test_builder_basic() {
1695        let config = ConfigBuilder::new().build().unwrap();
1696
1697        assert_eq!(config.general.environment, Environment::Development);
1698        assert!(config.observability.tracing_enabled);
1699        assert!(config.health.health_checks_enabled);
1700        assert!(config.multi_gpu.enabled);
1701    }
1702
1703    #[test]
1704    fn test_builder_with_general() {
1705        let config = ConfigBuilder::new()
1706            .with_general(|g| {
1707                g.app_name("test_app")
1708                    .environment(Environment::Production)
1709                    .log_level(LogLevel::Warn)
1710            })
1711            .build()
1712            .unwrap();
1713
1714        assert_eq!(config.general.app_name, "test_app");
1715        assert_eq!(config.general.environment, Environment::Production);
1716        assert_eq!(config.general.log_level, LogLevel::Warn);
1717    }
1718
1719    #[test]
1720    fn test_builder_with_observability() {
1721        let config = ConfigBuilder::new()
1722            .with_observability(|o| {
1723                o.enable_tracing(false)
1724                    .metrics_port(8080)
1725                    .trace_sample_rate(0.5)
1726            })
1727            .build()
1728            .unwrap();
1729
1730        assert!(!config.observability.tracing_enabled);
1731        assert_eq!(config.observability.metrics_port, 8080);
1732        assert_eq!(config.observability.trace_sample_rate, 0.5);
1733    }
1734
1735    #[test]
1736    fn test_builder_with_health() {
1737        let config = ConfigBuilder::new()
1738            .with_health(|h| {
1739                h.check_interval(Duration::from_secs(5))
1740                    .heartbeat_timeout(Duration::from_secs(15))
1741                    .circuit_breaker_threshold(10)
1742            })
1743            .build()
1744            .unwrap();
1745
1746        assert_eq!(config.health.check_interval, Duration::from_secs(5));
1747        assert_eq!(config.health.heartbeat_timeout, Duration::from_secs(15));
1748        assert_eq!(config.health.circuit_breaker.failure_threshold, 10);
1749    }
1750
1751    #[test]
1752    fn test_builder_with_multi_gpu() {
1753        let config = ConfigBuilder::new()
1754            .with_multi_gpu(|g| {
1755                g.load_balancing(LoadBalancingStrategy::RoundRobin)
1756                    .enable_p2p(false)
1757                    .max_kernels_per_device(64)
1758            })
1759            .build()
1760            .unwrap();
1761
1762        assert_eq!(
1763            config.multi_gpu.load_balancing,
1764            LoadBalancingStrategy::RoundRobin
1765        );
1766        assert!(!config.multi_gpu.p2p_enabled);
1767        assert_eq!(config.multi_gpu.max_kernels_per_device, 64);
1768    }
1769
1770    #[test]
1771    fn test_builder_with_migration() {
1772        let config = ConfigBuilder::new()
1773            .with_migration(|m| {
1774                m.storage(CheckpointStorageType::File)
1775                    .enable_compression(true)
1776                    .compression_level(5)
1777            })
1778            .build()
1779            .unwrap();
1780
1781        assert_eq!(config.migration.storage, CheckpointStorageType::File);
1782        assert!(config.migration.compression_enabled);
1783        assert_eq!(config.migration.compression_level, 5);
1784    }
1785
1786    #[test]
1787    fn test_validation_invalid_sample_rate() {
1788        let result = ConfigBuilder::new()
1789            .with_observability(|o| o.trace_sample_rate(1.5))
1790            .build();
1791
1792        assert!(result.is_err());
1793    }
1794
1795    #[test]
1796    fn test_validation_invalid_compression_level() {
1797        let result = ConfigBuilder::new()
1798            .with_migration(|m| m.compression_level(10))
1799            .build();
1800
1801        assert!(result.is_err());
1802    }
1803
1804    #[test]
1805    fn test_validation_invalid_check_interval() {
1806        let result = ConfigBuilder::new()
1807            .with_health(|h| h.check_interval(Duration::ZERO))
1808            .build();
1809
1810        assert!(result.is_err());
1811    }
1812
1813    #[test]
1814    fn test_custom_settings() {
1815        let config = ConfigBuilder::new()
1816            .custom("feature_flag", "enabled")
1817            .custom("custom_param", "42")
1818            .build()
1819            .unwrap();
1820
1821        assert_eq!(config.get_custom("feature_flag"), Some("enabled"));
1822        assert_eq!(config.get_custom("custom_param"), Some("42"));
1823        assert_eq!(config.get_custom("nonexistent"), None);
1824    }
1825
1826    #[test]
1827    fn test_environment() {
1828        assert!(!Environment::Development.is_production());
1829        assert!(!Environment::Staging.is_production());
1830        assert!(Environment::Production.is_production());
1831
1832        assert_eq!(Environment::Development.as_str(), "development");
1833        assert_eq!(Environment::Staging.as_str(), "staging");
1834        assert_eq!(Environment::Production.as_str(), "production");
1835    }
1836
1837    #[test]
1838    fn test_log_level() {
1839        assert_eq!(LogLevel::Trace.as_str(), "trace");
1840        assert_eq!(LogLevel::Debug.as_str(), "debug");
1841        assert_eq!(LogLevel::Info.as_str(), "info");
1842        assert_eq!(LogLevel::Warn.as_str(), "warn");
1843        assert_eq!(LogLevel::Error.as_str(), "error");
1844    }
1845
1846    #[test]
1847    fn test_storage_type() {
1848        assert_eq!(CheckpointStorageType::Memory.as_str(), "memory");
1849        assert_eq!(CheckpointStorageType::File.as_str(), "file");
1850        assert_eq!(CheckpointStorageType::Cloud.as_str(), "cloud");
1851    }
1852
1853    #[test]
1854    fn test_preset_development() {
1855        let config = RingKernelConfig::development();
1856        assert_eq!(config.general.environment, Environment::Development);
1857        assert_eq!(config.general.log_level, LogLevel::Debug);
1858    }
1859
1860    #[test]
1861    fn test_preset_production() {
1862        let config = RingKernelConfig::production();
1863        assert_eq!(config.general.environment, Environment::Production);
1864        assert!(config.observability.grafana_enabled);
1865        assert!(config.migration.compression_enabled);
1866    }
1867
1868    #[test]
1869    fn test_preset_high_performance() {
1870        let config = RingKernelConfig::high_performance();
1871        assert!(!config.observability.tracing_enabled);
1872        assert_eq!(config.observability.trace_sample_rate, 0.0);
1873        assert!(!config.migration.compression_enabled);
1874    }
1875
1876    #[test]
1877    fn test_config_format_from_extension() {
1878        use std::path::Path;
1879
1880        assert_eq!(
1881            ConfigFormat::from_extension(Path::new("config.toml")),
1882            Some(ConfigFormat::Toml)
1883        );
1884        assert_eq!(
1885            ConfigFormat::from_extension(Path::new("config.yaml")),
1886            Some(ConfigFormat::Yaml)
1887        );
1888        assert_eq!(
1889            ConfigFormat::from_extension(Path::new("config.yml")),
1890            Some(ConfigFormat::Yaml)
1891        );
1892        assert_eq!(
1893            ConfigFormat::from_extension(Path::new("config.TOML")),
1894            Some(ConfigFormat::Toml)
1895        );
1896        assert_eq!(ConfigFormat::from_extension(Path::new("config.json")), None);
1897        assert_eq!(ConfigFormat::from_extension(Path::new("config")), None);
1898    }
1899}
1900
1901// ============================================================================
1902// Configuration File Tests (feature-gated)
1903// ============================================================================
1904
1905#[cfg(all(test, feature = "config-file"))]
1906mod file_config_tests {
1907    use super::*;
1908    use std::time::Duration;
1909
1910    const SAMPLE_TOML: &str = r#"
1911[general]
1912app_name = "test-app"
1913app_version = "2.0.0"
1914environment = "production"
1915log_level = "debug"
1916backend = "cuda"
1917
1918[observability]
1919tracing_enabled = true
1920metrics_enabled = true
1921metrics_port = 8080
1922trace_sample_rate = 0.5
1923
1924[health]
1925health_checks_enabled = true
1926check_interval_ms = 5000
1927heartbeat_timeout_ms = 15000
1928circuit_breaker_failure_threshold = 10
1929watchdog_enabled = true
1930
1931[multi_gpu]
1932enabled = true
1933load_balancing = "round_robin"
1934p2p_enabled = false
1935max_kernels_per_device = 64
1936
1937[migration]
1938enabled = true
1939storage = "file"
1940checkpoint_dir = "/data/checkpoints"
1941compression_enabled = true
1942compression_level = 5
1943
1944[custom]
1945feature_x = "enabled"
1946max_retries = "5"
1947"#;
1948
1949    const SAMPLE_YAML: &str = r#"
1950general:
1951  app_name: test-app
1952  app_version: "2.0.0"
1953  environment: production
1954  log_level: debug
1955  backend: cuda
1956
1957observability:
1958  tracing_enabled: true
1959  metrics_enabled: true
1960  metrics_port: 8080
1961  trace_sample_rate: 0.5
1962
1963health:
1964  health_checks_enabled: true
1965  check_interval_ms: 5000
1966  heartbeat_timeout_ms: 15000
1967  circuit_breaker_failure_threshold: 10
1968  watchdog_enabled: true
1969
1970multi_gpu:
1971  enabled: true
1972  load_balancing: round_robin
1973  p2p_enabled: false
1974  max_kernels_per_device: 64
1975
1976migration:
1977  enabled: true
1978  storage: file
1979  checkpoint_dir: /data/checkpoints
1980  compression_enabled: true
1981  compression_level: 5
1982
1983custom:
1984  feature_x: enabled
1985  max_retries: "5"
1986"#;
1987
1988    #[test]
1989    fn test_from_toml_str() {
1990        let config = RingKernelConfig::from_toml_str(SAMPLE_TOML).unwrap();
1991
1992        assert_eq!(config.general.app_name, "test-app");
1993        assert_eq!(config.general.app_version, "2.0.0");
1994        assert_eq!(config.general.environment, Environment::Production);
1995        assert_eq!(config.general.log_level, LogLevel::Debug);
1996        assert_eq!(config.general.backend, Backend::Cuda);
1997
1998        assert!(config.observability.tracing_enabled);
1999        assert_eq!(config.observability.metrics_port, 8080);
2000        assert_eq!(config.observability.trace_sample_rate, 0.5);
2001
2002        assert_eq!(config.health.check_interval, Duration::from_millis(5000));
2003        assert_eq!(
2004            config.health.heartbeat_timeout,
2005            Duration::from_millis(15000)
2006        );
2007        assert_eq!(config.health.circuit_breaker.failure_threshold, 10);
2008
2009        assert_eq!(
2010            config.multi_gpu.load_balancing,
2011            LoadBalancingStrategy::RoundRobin
2012        );
2013        assert!(!config.multi_gpu.p2p_enabled);
2014        assert_eq!(config.multi_gpu.max_kernels_per_device, 64);
2015
2016        assert_eq!(config.migration.storage, CheckpointStorageType::File);
2017        assert!(config.migration.compression_enabled);
2018        assert_eq!(config.migration.compression_level, 5);
2019
2020        assert_eq!(config.get_custom("feature_x"), Some("enabled"));
2021        assert_eq!(config.get_custom("max_retries"), Some("5"));
2022    }
2023
2024    #[test]
2025    fn test_from_yaml_str() {
2026        let config = RingKernelConfig::from_yaml_str(SAMPLE_YAML).unwrap();
2027
2028        assert_eq!(config.general.app_name, "test-app");
2029        assert_eq!(config.general.app_version, "2.0.0");
2030        assert_eq!(config.general.environment, Environment::Production);
2031        assert_eq!(config.general.log_level, LogLevel::Debug);
2032        assert_eq!(config.general.backend, Backend::Cuda);
2033
2034        assert!(config.observability.tracing_enabled);
2035        assert_eq!(config.observability.metrics_port, 8080);
2036        assert_eq!(config.observability.trace_sample_rate, 0.5);
2037
2038        assert_eq!(config.health.check_interval, Duration::from_millis(5000));
2039        assert_eq!(
2040            config.health.heartbeat_timeout,
2041            Duration::from_millis(15000)
2042        );
2043        assert_eq!(config.health.circuit_breaker.failure_threshold, 10);
2044
2045        assert_eq!(
2046            config.multi_gpu.load_balancing,
2047            LoadBalancingStrategy::RoundRobin
2048        );
2049        assert!(!config.multi_gpu.p2p_enabled);
2050        assert_eq!(config.multi_gpu.max_kernels_per_device, 64);
2051
2052        assert_eq!(config.migration.storage, CheckpointStorageType::File);
2053        assert!(config.migration.compression_enabled);
2054        assert_eq!(config.migration.compression_level, 5);
2055
2056        assert_eq!(config.get_custom("feature_x"), Some("enabled"));
2057        assert_eq!(config.get_custom("max_retries"), Some("5"));
2058    }
2059
2060    #[test]
2061    fn test_to_toml_str() {
2062        let config = RingKernelConfig::production();
2063        let toml_str = config.to_toml_str().unwrap();
2064
2065        // Parse back and verify
2066        let parsed = RingKernelConfig::from_toml_str(&toml_str).unwrap();
2067        assert_eq!(parsed.general.environment, Environment::Production);
2068        assert!(parsed.observability.grafana_enabled);
2069    }
2070
2071    #[test]
2072    fn test_to_yaml_str() {
2073        let config = RingKernelConfig::production();
2074        let yaml_str = config.to_yaml_str().unwrap();
2075
2076        // Parse back and verify
2077        let parsed = RingKernelConfig::from_yaml_str(&yaml_str).unwrap();
2078        assert_eq!(parsed.general.environment, Environment::Production);
2079        assert!(parsed.observability.grafana_enabled);
2080    }
2081
2082    #[test]
2083    fn test_roundtrip_toml() {
2084        let original = ConfigBuilder::new()
2085            .with_general(|g| {
2086                g.app_name("roundtrip-test")
2087                    .environment(Environment::Staging)
2088                    .log_level(LogLevel::Warn)
2089            })
2090            .with_observability(|o| o.metrics_port(9999).trace_sample_rate(0.25))
2091            .with_multi_gpu(|m| m.max_kernels_per_device(128))
2092            .custom("test_key", "test_value")
2093            .build()
2094            .unwrap();
2095
2096        let toml_str = original.to_toml_str().unwrap();
2097        let parsed = RingKernelConfig::from_toml_str(&toml_str).unwrap();
2098
2099        assert_eq!(parsed.general.app_name, "roundtrip-test");
2100        assert_eq!(parsed.general.environment, Environment::Staging);
2101        assert_eq!(parsed.general.log_level, LogLevel::Warn);
2102        assert_eq!(parsed.observability.metrics_port, 9999);
2103        assert_eq!(parsed.observability.trace_sample_rate, 0.25);
2104        assert_eq!(parsed.multi_gpu.max_kernels_per_device, 128);
2105        assert_eq!(parsed.get_custom("test_key"), Some("test_value"));
2106    }
2107
2108    #[test]
2109    fn test_roundtrip_yaml() {
2110        let original = ConfigBuilder::new()
2111            .with_general(|g| {
2112                g.app_name("roundtrip-test")
2113                    .environment(Environment::Staging)
2114                    .log_level(LogLevel::Warn)
2115            })
2116            .with_observability(|o| o.metrics_port(9999).trace_sample_rate(0.25))
2117            .with_multi_gpu(|m| m.max_kernels_per_device(128))
2118            .custom("test_key", "test_value")
2119            .build()
2120            .unwrap();
2121
2122        let yaml_str = original.to_yaml_str().unwrap();
2123        let parsed = RingKernelConfig::from_yaml_str(&yaml_str).unwrap();
2124
2125        assert_eq!(parsed.general.app_name, "roundtrip-test");
2126        assert_eq!(parsed.general.environment, Environment::Staging);
2127        assert_eq!(parsed.general.log_level, LogLevel::Warn);
2128        assert_eq!(parsed.observability.metrics_port, 9999);
2129        assert_eq!(parsed.observability.trace_sample_rate, 0.25);
2130        assert_eq!(parsed.multi_gpu.max_kernels_per_device, 128);
2131        assert_eq!(parsed.get_custom("test_key"), Some("test_value"));
2132    }
2133
2134    #[test]
2135    fn test_partial_config() {
2136        // Test that missing sections use defaults
2137        let minimal_toml = r#"
2138[general]
2139app_name = "minimal"
2140"#;
2141        let config = RingKernelConfig::from_toml_str(minimal_toml).unwrap();
2142        assert_eq!(config.general.app_name, "minimal");
2143        assert_eq!(config.general.environment, Environment::Development); // default
2144        assert!(config.observability.tracing_enabled); // default
2145        assert!(config.health.health_checks_enabled); // default
2146    }
2147
2148    #[test]
2149    fn test_invalid_toml() {
2150        let invalid = "this is not valid toml { }";
2151        let result = RingKernelConfig::from_toml_str(invalid);
2152        assert!(result.is_err());
2153    }
2154
2155    #[test]
2156    fn test_invalid_yaml() {
2157        let invalid = "{{invalid yaml}}";
2158        let result = RingKernelConfig::from_yaml_str(invalid);
2159        assert!(result.is_err());
2160    }
2161
2162    #[test]
2163    fn test_validation_on_load() {
2164        // Invalid: trace_sample_rate > 1.0
2165        let invalid_toml = r#"
2166[observability]
2167trace_sample_rate = 1.5
2168"#;
2169        let result = RingKernelConfig::from_toml_str(invalid_toml);
2170        assert!(result.is_err());
2171    }
2172
2173    #[test]
2174    fn test_file_config_defaults() {
2175        let file_config = FileConfig::default();
2176        let config: RingKernelConfig = file_config.into();
2177
2178        assert_eq!(config.general.app_name, "ringkernel");
2179        assert_eq!(config.general.environment, Environment::Development);
2180        assert!(config.observability.tracing_enabled);
2181        assert!(config.health.health_checks_enabled);
2182        assert!(config.multi_gpu.enabled);
2183        assert!(config.validate().is_ok());
2184    }
2185
2186    #[test]
2187    fn test_environment_aliases() {
2188        // Test "prod" alias for production
2189        let toml = r#"
2190[general]
2191environment = "prod"
2192"#;
2193        let config = RingKernelConfig::from_toml_str(toml).unwrap();
2194        assert_eq!(config.general.environment, Environment::Production);
2195    }
2196
2197    #[test]
2198    fn test_load_balancing_aliases() {
2199        // Test "roundrobin" alias
2200        let toml = r#"
2201[multi_gpu]
2202load_balancing = "roundrobin"
2203"#;
2204        let config = RingKernelConfig::from_toml_str(toml).unwrap();
2205        assert_eq!(
2206            config.multi_gpu.load_balancing,
2207            LoadBalancingStrategy::RoundRobin
2208        );
2209    }
2210}