1use sklears_core::{error::Result as SklResult, prelude::SklearsError, traits::Estimator};
7use std::collections::HashMap;
8use std::fs::{self, File};
9use std::io::{Read, Write};
10use std::path::{Path, PathBuf};
11use std::sync::{Arc, Mutex, RwLock};
12use std::thread::{self, JoinHandle};
13use std::time::{Duration, SystemTime};
14
15use crate::distributed::{ResourceRequirements, TaskPriority};
16use crate::scheduling::{RetryConfig, SchedulingStrategy};
17
18pub trait ConfigurationProvider: Send + Sync {
20 fn get_configuration(&self, config_id: &str) -> SklResult<PipelineConfig>;
22
23 fn list_configurations(&self) -> SklResult<Vec<String>>;
25
26 fn has_configuration(&self, config_id: &str) -> bool;
28
29 fn metadata(&self) -> ConfigProviderMetadata;
31
32 fn validate_configuration(&self, config: &PipelineConfig) -> SklResult<ValidationResult>;
34
35 fn save_configuration(&self, config_id: &str, config: &PipelineConfig) -> SklResult<()> {
37 Err(SklearsError::InvalidInput(
38 "Provider does not support saving configurations".to_string(),
39 ))
40 }
41}
42
43#[derive(Debug, Clone)]
45pub struct ConfigProviderMetadata {
46 pub name: String,
48 pub version: String,
50 pub features: Vec<ConfigProviderFeature>,
52 pub description: String,
54}
55
56#[derive(Debug, Clone, PartialEq, Eq)]
58pub enum ConfigProviderFeature {
59 Read,
61 Write,
63 List,
65 Validate,
67 Watch,
69 Template,
71 Inheritance,
73}
74
75#[derive(Debug, Clone)]
77pub struct ValidationResult {
78 pub valid: bool,
80 pub errors: Vec<ValidationError>,
82 pub warnings: Vec<ValidationWarning>,
84 pub suggestions: Vec<ValidationSuggestion>,
86}
87
88#[derive(Debug, Clone)]
90pub struct ValidationError {
91 pub message: String,
93 pub path: String,
95 pub code: String,
97 pub severity: ValidationSeverity,
99}
100
101#[derive(Debug, Clone)]
103pub struct ValidationWarning {
104 pub message: String,
106 pub path: String,
108 pub code: String,
110}
111
112#[derive(Debug, Clone)]
114pub struct ValidationSuggestion {
115 pub message: String,
117 pub path: String,
119 pub suggested_value: Option<ConfigValue>,
121}
122
123#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
125pub enum ValidationSeverity {
126 Info,
128 Warning,
130 Error,
132 Critical,
134}
135
136#[derive(Debug, Clone)]
138pub struct ConfigurationTemplate {
139 pub name: String,
141 pub version: String,
143 pub base_template: Option<String>,
145 pub parameters: HashMap<String, TemplateParameter>,
147 pub template: PipelineConfig,
149 pub metadata: TemplateMetadata,
151}
152
153#[derive(Debug, Clone)]
155pub struct TemplateParameter {
156 pub name: String,
158 pub parameter_type: TemplateParameterType,
160 pub default_value: Option<ConfigValue>,
162 pub required: bool,
164 pub description: String,
166 pub constraints: Vec<ParameterConstraint>,
168}
169
170#[derive(Debug, Clone, PartialEq, Eq)]
172pub enum TemplateParameterType {
173 String,
175 Integer,
177 Float,
179 Boolean,
181 Array,
183 Object,
185 Reference,
187 Expression,
189}
190
191#[derive(Debug, Clone)]
193pub enum ParameterConstraint {
194 MinValue(f64),
196 MaxValue(f64),
198 MinLength(usize),
200 MaxLength(usize),
202 Pattern(String),
204 OneOf(Vec<ConfigValue>),
206 Custom(String),
208}
209
210#[derive(Debug, Clone)]
212pub struct TemplateMetadata {
213 pub description: String,
215 pub author: String,
217 pub category: String,
219 pub tags: Vec<String>,
221 pub created_at: SystemTime,
223 pub updated_at: SystemTime,
225}
226
227#[derive(Debug, Clone)]
229pub struct ConfigurationInheritance {
230 pub parent_id: String,
232 pub strategy: InheritanceStrategy,
234 pub overrides: Vec<String>,
236 pub conflict_resolution: ConflictResolution,
238}
239
240#[derive(Debug, Clone, PartialEq, Eq)]
242pub enum InheritanceStrategy {
243 Replace,
245 Merge,
247 Append,
249 Prepend,
251 Custom(String),
253}
254
255#[derive(Debug, Clone, PartialEq, Eq)]
257pub enum ConflictResolution {
258 UseParent,
260 UseChild,
262 Merge,
264 Error,
266 Interactive,
268}
269
270#[derive(Debug, Clone)]
272pub struct PipelineConfig {
273 pub metadata: ConfigMetadata,
275 pub pipeline: PipelineDefinition,
277 pub execution: ExecutionConfig,
279 pub resources: ResourceConfig,
281 pub environments: HashMap<String, EnvironmentConfig>,
283 pub features: HashMap<String, bool>,
285 pub custom: HashMap<String, ConfigValue>,
287}
288
289#[derive(Debug, Clone)]
291pub struct ConfigMetadata {
292 pub name: String,
294 pub version: String,
296 pub description: Option<String>,
298 pub author: Option<String>,
300 pub created_at: SystemTime,
302 pub updated_at: SystemTime,
304 pub schema_version: String,
306 pub tags: Vec<String>,
308}
309
310#[derive(Debug, Clone)]
312pub struct PipelineDefinition {
313 pub steps: Vec<StepConfig>,
315 pub estimator: Option<EstimatorConfig>,
317 pub data_sources: Vec<DataSourceConfig>,
319 pub outputs: Vec<OutputConfig>,
321 pub parameters: HashMap<String, ParameterConfig>,
323}
324
325#[derive(Debug, Clone)]
327pub struct StepConfig {
328 pub name: String,
330 pub step_type: String,
332 pub parameters: HashMap<String, ConfigValue>,
334 pub condition: Option<String>,
336 pub depends_on: Vec<String>,
338 pub resources: Option<ResourceRequirements>,
340 pub enabled: bool,
342}
343
344#[derive(Debug, Clone)]
346pub struct EstimatorConfig {
347 pub estimator_type: String,
349 pub parameters: HashMap<String, ConfigValue>,
351 pub hyperparameters: Option<HyperparameterSpace>,
353 pub cross_validation: Option<CrossValidationConfig>,
355}
356
357#[derive(Debug, Clone)]
359pub struct DataSourceConfig {
360 pub name: String,
362 pub source_type: String,
364 pub connection: HashMap<String, ConfigValue>,
366 pub format: Option<String>,
368 pub schema: Option<SchemaConfig>,
370 pub preprocessing: Vec<String>,
372}
373
374#[derive(Debug, Clone)]
376pub struct OutputConfig {
377 pub name: String,
379 pub output_type: String,
381 pub format: String,
383 pub destination: HashMap<String, ConfigValue>,
385 pub postprocessing: Vec<String>,
387}
388
389#[derive(Debug, Clone)]
391pub struct SchemaConfig {
392 pub columns: Vec<ColumnDefinition>,
394 pub validation_rules: Vec<ValidationRule>,
396 pub types: HashMap<String, String>,
398}
399
400#[derive(Debug, Clone)]
402pub struct ColumnDefinition {
403 pub name: String,
405 pub data_type: String,
407 pub required: bool,
409 pub default: Option<ConfigValue>,
411 pub constraints: Vec<String>,
413}
414
415#[derive(Debug, Clone)]
417pub struct ValidationRule {
418 pub name: String,
420 pub rule_type: String,
422 pub parameters: HashMap<String, ConfigValue>,
424 pub error_message: String,
426}
427
428#[derive(Debug, Clone)]
430pub struct ParameterConfig {
431 pub param_type: String,
433 pub default: ConfigValue,
435 pub description: Option<String>,
437 pub constraints: Vec<String>,
439 pub env_overrides: HashMap<String, ConfigValue>,
441}
442
443#[derive(Debug, Clone)]
445pub struct HyperparameterSpace {
446 pub strategy: String,
448 pub budget: Option<u32>,
450 pub parameters: HashMap<String, ParameterSpace>,
452 pub metric: String,
454 pub direction: OptimizationDirection,
456}
457
458#[derive(Debug, Clone)]
460pub struct ParameterSpace {
461 pub space_type: String,
463 pub low: Option<f64>,
465 pub high: Option<f64>,
467 pub choices: Option<Vec<ConfigValue>>,
469 pub distribution: Option<HashMap<String, f64>>,
471}
472
473#[derive(Debug, Clone)]
475pub enum OptimizationDirection {
476 Minimize,
478 Maximize,
480}
481
482#[derive(Debug, Clone)]
484pub struct CrossValidationConfig {
485 pub strategy: String,
487 pub n_folds: Option<u32>,
489 pub test_size: Option<f64>,
491 pub random_state: Option<u64>,
493 pub shuffle: bool,
495}
496
497#[derive(Debug, Clone)]
499pub struct ExecutionConfig {
500 pub mode: String,
502 pub parallelism: ParallelismConfig,
504 pub scheduling: SchedulingConfig,
506 pub retry: RetryConfig,
508 pub timeouts: TimeoutConfig,
510 pub logging: LoggingConfig,
512 pub monitoring: MonitoringConfig,
514}
515
516#[derive(Debug, Clone)]
518pub struct ParallelismConfig {
519 pub max_workers: Option<u32>,
521 pub thread_pool_size: Option<u32>,
523 pub process_pool_size: Option<u32>,
525 pub gpu_enabled: bool,
527 pub batching: BatchConfig,
529}
530
531#[derive(Debug, Clone)]
533pub struct BatchConfig {
534 pub batch_size: u32,
536 pub max_wait_time: Duration,
538 pub dynamic_batching: bool,
540 pub adaptive_sizing: bool,
542}
543
544#[derive(Debug, Clone)]
546pub struct SchedulingConfig {
547 pub strategy: SchedulingStrategy,
549 pub priorities: HashMap<String, TaskPriority>,
551 pub resource_allocation: ResourceAllocationConfig,
553 pub load_balancing: LoadBalancingConfig,
555}
556
557#[derive(Debug, Clone)]
559pub struct ResourceAllocationConfig {
560 pub cpu_strategy: String,
562 pub memory_strategy: String,
564 pub gpu_strategy: String,
566 pub disk_strategy: String,
568}
569
570#[derive(Debug, Clone)]
572pub struct LoadBalancingConfig {
573 pub algorithm: String,
575 pub health_check: HealthCheckConfig,
577 pub failover: FailoverConfig,
579}
580
581#[derive(Debug, Clone)]
583pub struct HealthCheckConfig {
584 pub interval: Duration,
586 pub timeout: Duration,
588 pub failure_threshold: u32,
590 pub recovery_threshold: u32,
592}
593
594#[derive(Debug, Clone)]
596pub struct FailoverConfig {
597 pub enabled: bool,
599 pub strategy: String,
601 pub max_attempts: u32,
603 pub cooldown: Duration,
605}
606
607#[derive(Debug, Clone)]
609pub struct TimeoutConfig {
610 pub default_task: Duration,
612 pub pipeline: Duration,
614 pub network: Duration,
616 pub database: Duration,
618 pub file_io: Duration,
620}
621
622#[derive(Debug, Clone)]
624pub struct LoggingConfig {
625 pub level: String,
627 pub format: String,
629 pub outputs: Vec<LogOutput>,
631 pub structured: bool,
633 pub rotation: Option<LogRotationConfig>,
635}
636
637#[derive(Debug, Clone)]
639pub struct LogOutput {
640 pub output_type: String,
642 pub config: HashMap<String, ConfigValue>,
644}
645
646#[derive(Debug, Clone)]
648pub struct LogRotationConfig {
649 pub max_size: u64,
651 pub max_files: u32,
653 pub interval: Option<Duration>,
655 pub compress: bool,
657}
658
659#[derive(Debug, Clone)]
661pub struct MonitoringConfig {
662 pub enabled: bool,
664 pub metrics: MetricsConfig,
666 pub tracing: TracingConfig,
668 pub alerting: AlertingConfig,
670 pub health_checks: Vec<String>,
672}
673
674#[derive(Debug, Clone)]
676pub struct MetricsConfig {
677 pub collection_interval: Duration,
679 pub export_endpoints: Vec<String>,
681 pub custom_metrics: Vec<String>,
683 pub retention: Duration,
685}
686
687#[derive(Debug, Clone)]
689pub struct TracingConfig {
690 pub enabled: bool,
692 pub sampling_rate: f64,
694 pub export_endpoints: Vec<String>,
696 pub custom_attributes: HashMap<String, String>,
698}
699
700#[derive(Debug, Clone)]
702pub struct AlertingConfig {
703 pub enabled: bool,
705 pub rules: Vec<AlertRule>,
707 pub channels: Vec<NotificationChannel>,
709 pub throttling: AlertThrottlingConfig,
711}
712
713#[derive(Debug, Clone)]
715pub struct AlertRule {
716 pub name: String,
718 pub condition: String,
720 pub severity: String,
722 pub message: String,
724 pub channels: Vec<String>,
726}
727
728#[derive(Debug, Clone)]
730pub struct NotificationChannel {
731 pub name: String,
733 pub channel_type: String,
735 pub config: HashMap<String, ConfigValue>,
737}
738
739#[derive(Debug, Clone)]
741pub struct AlertThrottlingConfig {
742 pub enabled: bool,
744 pub window: Duration,
746 pub max_alerts: u32,
748}
749
750#[derive(Debug, Clone)]
752pub struct ResourceConfig {
753 pub defaults: ResourceRequirements,
755 pub step_overrides: HashMap<String, ResourceRequirements>,
757 pub limits: ResourceLimits,
759 pub monitoring: ResourceMonitoringConfig,
761}
762
763#[derive(Debug, Clone)]
765pub struct ResourceLimits {
766 pub max_cpu: Option<u32>,
768 pub max_memory: Option<u64>,
770 pub max_disk: Option<u64>,
772 pub max_gpu: Option<u32>,
774 pub max_network: Option<u32>,
776}
777
778#[derive(Debug, Clone)]
780pub struct ResourceMonitoringConfig {
781 pub enabled: bool,
783 pub interval: Duration,
785 pub thresholds: ResourceThresholds,
787 pub auto_scaling: Option<AutoScalingConfig>,
789}
790
791#[derive(Debug, Clone)]
793pub struct ResourceThresholds {
794 pub cpu_threshold: f64,
796 pub memory_threshold: f64,
798 pub disk_threshold: f64,
800 pub network_threshold: f64,
802}
803
804#[derive(Debug, Clone)]
806pub struct AutoScalingConfig {
807 pub enabled: bool,
809 pub strategy: String,
811 pub min_instances: u32,
813 pub max_instances: u32,
815 pub scale_up_threshold: f64,
817 pub scale_down_threshold: f64,
819 pub cooldown: Duration,
821}
822
823#[derive(Debug, Clone)]
825pub struct EnvironmentConfig {
826 pub name: String,
828 pub overrides: HashMap<String, ConfigValue>,
830 pub resources: Option<ResourceConfig>,
832 pub execution: Option<ExecutionConfig>,
834}
835
836#[derive(Debug, Clone, PartialEq)]
838pub enum ConfigValue {
839 String(String),
841 Integer(i64),
843 Float(f64),
845 Boolean(bool),
847 Array(Vec<ConfigValue>),
849 Object(HashMap<String, ConfigValue>),
851 Null,
853}
854
855impl ConfigValue {
856 #[must_use]
858 pub fn as_string(&self) -> Option<&String> {
859 match self {
860 ConfigValue::String(s) => Some(s),
861 _ => None,
862 }
863 }
864
865 #[must_use]
867 pub fn as_integer(&self) -> Option<i64> {
868 match self {
869 ConfigValue::Integer(i) => Some(*i),
870 _ => None,
871 }
872 }
873
874 #[must_use]
876 pub fn as_float(&self) -> Option<f64> {
877 match self {
878 ConfigValue::Float(f) => Some(*f),
879 ConfigValue::Integer(i) => Some(*i as f64),
880 _ => None,
881 }
882 }
883
884 #[must_use]
886 pub fn as_boolean(&self) -> Option<bool> {
887 match self {
888 ConfigValue::Boolean(b) => Some(*b),
889 _ => None,
890 }
891 }
892
893 #[must_use]
895 pub fn as_array(&self) -> Option<&Vec<ConfigValue>> {
896 match self {
897 ConfigValue::Array(arr) => Some(arr),
898 _ => None,
899 }
900 }
901
902 #[must_use]
904 pub fn as_object(&self) -> Option<&HashMap<String, ConfigValue>> {
905 match self {
906 ConfigValue::Object(obj) => Some(obj),
907 _ => None,
908 }
909 }
910}
911
912pub struct TemplateEngine {
914 functions: HashMap<String, Box<dyn Fn(&[ConfigValue]) -> SklResult<ConfigValue> + Send + Sync>>,
916 variables: HashMap<String, ConfigValue>,
918 evaluator: ExpressionEvaluator,
920}
921
922#[derive(Debug)]
924pub struct ExpressionEvaluator {
925 builtin_functions: HashMap<String, fn(&[ConfigValue]) -> SklResult<ConfigValue>>,
927}
928
929#[derive(Debug)]
931pub struct AdvancedValidator {
932 rules: HashMap<String, ValidationRule>,
934 schemas: HashMap<String, ConfigurationSchema>,
936 cross_reference_validator: CrossReferenceValidator,
938}
939
940#[derive(Debug, Clone)]
942pub struct ConfigurationSchema {
943 pub name: String,
945 pub version: String,
947 pub required_fields: Vec<String>,
949 pub field_schemas: HashMap<String, FieldSchema>,
951 pub conditional_validations: Vec<ConditionalValidation>,
953}
954
955#[derive(Debug, Clone, PartialEq)]
957pub struct FieldSchema {
958 pub field_type: FieldType,
960 pub constraints: Vec<FieldConstraint>,
962 pub default_value: Option<ConfigValue>,
964 pub description: String,
966}
967
968#[derive(Debug, Clone, PartialEq)]
970pub enum FieldType {
971 String,
973 Integer,
975 Float,
977 Boolean,
979 Array(Box<FieldType>),
981 Object(HashMap<String, FieldSchema>),
983 Union(Vec<FieldType>),
985 Reference(String),
987}
988
989#[derive(Debug, Clone, PartialEq)]
991pub enum FieldConstraint {
992 MinValue(f64),
994 MaxValue(f64),
996 MinLength(usize),
998 MaxLength(usize),
1000 Pattern(String),
1002 Enum(Vec<ConfigValue>),
1004 Custom(String),
1006 NotEmpty,
1008 Unique,
1010}
1011
1012#[derive(Debug, Clone)]
1014pub struct ConditionalValidation {
1015 pub condition: String,
1017 pub rule: String,
1019 pub error_message: String,
1021}
1022
1023#[derive(Debug)]
1025pub struct CrossReferenceValidator {
1026 references: HashMap<String, Vec<String>>,
1028 dependency_graph: DependencyGraph,
1030}
1031
1032#[derive(Debug)]
1034pub struct DependencyGraph {
1035 graph: HashMap<String, Vec<String>>,
1037 visited: HashMap<String, bool>,
1039 rec_stack: HashMap<String, bool>,
1041}
1042
1043pub struct ConfigManager {
1045 config: Arc<RwLock<PipelineConfig>>,
1047 validation_rules: Vec<ValidationRule>,
1049 current_environment: String,
1051 file_watchers: Arc<Mutex<HashMap<PathBuf, JoinHandle<()>>>>,
1053 hot_reload_enabled: bool,
1055 providers: Arc<Mutex<HashMap<String, Box<dyn ConfigurationProvider>>>>,
1057 templates: Arc<Mutex<HashMap<String, ConfigurationTemplate>>>,
1059 inheritance_cache: Arc<Mutex<HashMap<String, PipelineConfig>>>,
1061 template_engine: Arc<Mutex<TemplateEngine>>,
1063 advanced_validator: Arc<Mutex<AdvancedValidator>>,
1065}
1066
1067impl ConfigManager {
1068 #[must_use]
1070 pub fn new() -> Self {
1071 Self {
1072 config: Arc::new(RwLock::new(Self::default_config())),
1073 validation_rules: Vec::new(),
1074 current_environment: "development".to_string(),
1075 file_watchers: Arc::new(Mutex::new(HashMap::new())),
1076 hot_reload_enabled: false,
1077 providers: Arc::new(Mutex::new(HashMap::new())),
1078 templates: Arc::new(Mutex::new(HashMap::new())),
1079 inheritance_cache: Arc::new(Mutex::new(HashMap::new())),
1080 template_engine: Arc::new(Mutex::new(TemplateEngine::new())),
1081 advanced_validator: Arc::new(Mutex::new(AdvancedValidator::new())),
1082 }
1083 }
1084
1085 fn default_config() -> PipelineConfig {
1087 PipelineConfig {
1089 metadata: ConfigMetadata {
1090 name: "default".to_string(),
1091 version: "1.0.0".to_string(),
1092 description: Some("Default pipeline configuration".to_string()),
1093 author: None,
1094 created_at: SystemTime::now(),
1095 updated_at: SystemTime::now(),
1096 schema_version: "1.0".to_string(),
1097 tags: Vec::new(),
1098 },
1099 pipeline: PipelineDefinition {
1100 steps: Vec::new(),
1101 estimator: None,
1102 data_sources: Vec::new(),
1103 outputs: Vec::new(),
1104 parameters: HashMap::new(),
1105 },
1106 execution: ExecutionConfig {
1107 mode: "local".to_string(),
1108 parallelism: ParallelismConfig {
1109 max_workers: Some(4),
1110 thread_pool_size: Some(4),
1111 process_pool_size: None,
1112 gpu_enabled: false,
1113 batching: BatchConfig {
1114 batch_size: 100,
1115 max_wait_time: Duration::from_millis(100),
1116 dynamic_batching: false,
1117 adaptive_sizing: false,
1118 },
1119 },
1120 scheduling: SchedulingConfig {
1121 strategy: SchedulingStrategy::FIFO,
1122 priorities: HashMap::new(),
1123 resource_allocation: ResourceAllocationConfig {
1124 cpu_strategy: "fair".to_string(),
1125 memory_strategy: "fair".to_string(),
1126 gpu_strategy: "exclusive".to_string(),
1127 disk_strategy: "fair".to_string(),
1128 },
1129 load_balancing: LoadBalancingConfig {
1130 algorithm: "round_robin".to_string(),
1131 health_check: HealthCheckConfig {
1132 interval: Duration::from_secs(30),
1133 timeout: Duration::from_secs(5),
1134 failure_threshold: 3,
1135 recovery_threshold: 2,
1136 },
1137 failover: FailoverConfig {
1138 enabled: true,
1139 strategy: "immediate".to_string(),
1140 max_attempts: 3,
1141 cooldown: Duration::from_secs(60),
1142 },
1143 },
1144 },
1145 retry: RetryConfig::default(),
1146 timeouts: TimeoutConfig {
1147 default_task: Duration::from_secs(300),
1148 pipeline: Duration::from_secs(3600),
1149 network: Duration::from_secs(30),
1150 database: Duration::from_secs(60),
1151 file_io: Duration::from_secs(60),
1152 },
1153 logging: LoggingConfig {
1154 level: "info".to_string(),
1155 format: "json".to_string(),
1156 outputs: vec![LogOutput {
1157 output_type: "console".to_string(),
1158 config: HashMap::new(),
1159 }],
1160 structured: true,
1161 rotation: None,
1162 },
1163 monitoring: MonitoringConfig {
1164 enabled: true,
1165 metrics: MetricsConfig {
1166 collection_interval: Duration::from_secs(60),
1167 export_endpoints: Vec::new(),
1168 custom_metrics: Vec::new(),
1169 retention: Duration::from_secs(86400 * 7), },
1171 tracing: TracingConfig {
1172 enabled: false,
1173 sampling_rate: 0.1,
1174 export_endpoints: Vec::new(),
1175 custom_attributes: HashMap::new(),
1176 },
1177 alerting: AlertingConfig {
1178 enabled: false,
1179 rules: Vec::new(),
1180 channels: Vec::new(),
1181 throttling: AlertThrottlingConfig {
1182 enabled: true,
1183 window: Duration::from_secs(300),
1184 max_alerts: 10,
1185 },
1186 },
1187 health_checks: Vec::new(),
1188 },
1189 },
1190 resources: ResourceConfig {
1191 defaults: ResourceRequirements {
1192 cpu_cores: 1,
1193 memory_mb: 512,
1194 disk_mb: 1024,
1195 gpu_required: false,
1196 estimated_duration: Duration::from_secs(60),
1197 priority: TaskPriority::Normal,
1198 },
1199 step_overrides: HashMap::new(),
1200 limits: ResourceLimits {
1201 max_cpu: None,
1202 max_memory: None,
1203 max_disk: None,
1204 max_gpu: None,
1205 max_network: None,
1206 },
1207 monitoring: ResourceMonitoringConfig {
1208 enabled: true,
1209 interval: Duration::from_secs(30),
1210 thresholds: ResourceThresholds {
1211 cpu_threshold: 0.8,
1212 memory_threshold: 0.8,
1213 disk_threshold: 0.9,
1214 network_threshold: 0.8,
1215 },
1216 auto_scaling: None,
1217 },
1218 },
1219 environments: HashMap::new(),
1220 features: HashMap::new(),
1221 custom: HashMap::new(),
1222 }
1223 }
1224
1225 pub fn load_from_yaml(&mut self, path: &Path) -> SklResult<()> {
1227 let content = fs::read_to_string(path)?;
1228 let config = self.parse_yaml(&content)?;
1229 self.set_config(config)?;
1230
1231 if self.hot_reload_enabled {
1232 self.start_file_watcher(path.to_path_buf())?;
1233 }
1234
1235 Ok(())
1236 }
1237
1238 pub fn load_from_json(&mut self, path: &Path) -> SklResult<()> {
1240 let content = fs::read_to_string(path)?;
1241 let config = self.parse_json(&content)?;
1242 self.set_config(config)?;
1243
1244 if self.hot_reload_enabled {
1245 self.start_file_watcher(path.to_path_buf())?;
1246 }
1247
1248 Ok(())
1249 }
1250
1251 fn parse_yaml(&self, _content: &str) -> SklResult<PipelineConfig> {
1253 Ok(Self::default_config())
1256 }
1257
1258 fn parse_json(&self, _content: &str) -> SklResult<PipelineConfig> {
1260 Ok(Self::default_config())
1263 }
1264
1265 pub fn set_config(&mut self, config: PipelineConfig) -> SklResult<()> {
1267 self.validate_config(&config)?;
1268
1269 let mut current_config = self.config.write().unwrap();
1270 *current_config = config;
1271 Ok(())
1272 }
1273
1274 #[must_use]
1276 pub fn get_config(&self) -> PipelineConfig {
1277 let config = self.config.read().unwrap();
1278 config.clone()
1279 }
1280
1281 #[must_use]
1283 pub fn get_value(&self, path: &str) -> Option<ConfigValue> {
1284 let config = self.config.read().unwrap();
1285 self.get_value_from_path(&config, path)
1286 }
1287
1288 pub fn set_value(&mut self, path: &str, value: ConfigValue) -> SklResult<()> {
1290 let mut config = self.config.write().unwrap();
1291 self.set_value_at_path(&mut config, path, value)?;
1292 Ok(())
1293 }
1294
1295 fn validate_config(&self, config: &PipelineConfig) -> SklResult<()> {
1297 if config.metadata.name.is_empty() {
1299 return Err(SklearsError::InvalidInput(
1300 "Configuration name cannot be empty".to_string(),
1301 ));
1302 }
1303
1304 for step in &config.pipeline.steps {
1306 if step.name.is_empty() {
1307 return Err(SklearsError::InvalidInput(
1308 "Step name cannot be empty".to_string(),
1309 ));
1310 }
1311 if step.step_type.is_empty() {
1312 return Err(SklearsError::InvalidInput(
1313 "Step type cannot be empty".to_string(),
1314 ));
1315 }
1316 }
1317
1318 if config.resources.defaults.cpu_cores == 0 {
1320 return Err(SklearsError::InvalidInput(
1321 "CPU cores must be greater than 0".to_string(),
1322 ));
1323 }
1324
1325 for rule in &self.validation_rules {
1327 self.apply_validation_rule(config, rule)?;
1328 }
1329
1330 Ok(())
1331 }
1332
1333 fn apply_validation_rule(
1335 &self,
1336 _config: &PipelineConfig,
1337 _rule: &ValidationRule,
1338 ) -> SklResult<()> {
1339 Ok(())
1342 }
1343
1344 fn get_value_from_path(&self, config: &PipelineConfig, path: &str) -> Option<ConfigValue> {
1346 let parts: Vec<&str> = path.split('.').collect();
1347
1348 match parts.first() {
1349 Some(&"metadata") => match parts.get(1) {
1350 Some(&"name") => Some(ConfigValue::String(config.metadata.name.clone())),
1351 Some(&"version") => Some(ConfigValue::String(config.metadata.version.clone())),
1352 _ => None,
1353 },
1354 Some(&"execution") => match parts.get(1) {
1355 Some(&"mode") => Some(ConfigValue::String(config.execution.mode.clone())),
1356 _ => None,
1357 },
1358 _ => None,
1359 }
1360 }
1361
1362 fn set_value_at_path(
1364 &self,
1365 _config: &mut PipelineConfig,
1366 _path: &str,
1367 _value: ConfigValue,
1368 ) -> SklResult<()> {
1369 Ok(())
1372 }
1373
1374 fn start_file_watcher(&mut self, path: PathBuf) -> SklResult<()> {
1376 let path_clone = path.clone();
1377
1378 let handle = thread::spawn(move || {
1379 loop {
1382 thread::sleep(Duration::from_secs(1));
1383
1384 if let Ok(metadata) = fs::metadata(&path_clone) {
1386 if let Ok(modified) = metadata.modified() {
1387 }
1391 }
1392 }
1393 });
1394
1395 let mut watchers = self.file_watchers.lock().unwrap();
1396 watchers.insert(path, handle);
1397 Ok(())
1398 }
1399
1400 pub fn enable_hot_reload(&mut self) {
1402 self.hot_reload_enabled = true;
1403 }
1404
1405 pub fn disable_hot_reload(&mut self) {
1407 self.hot_reload_enabled = false;
1408
1409 let mut watchers = self.file_watchers.lock().unwrap();
1411 watchers.clear();
1412 }
1413
1414 pub fn set_environment(&mut self, environment: &str) {
1416 self.current_environment = environment.to_string();
1417 }
1418
1419 #[must_use]
1421 pub fn get_environment(&self) -> &str {
1422 &self.current_environment
1423 }
1424
1425 pub fn apply_environment_overrides(&mut self) -> SklResult<()> {
1427 let config = self.config.read().unwrap();
1428
1429 if let Some(env_config) = config.environments.get(&self.current_environment) {
1430 drop(config);
1432
1433 }
1436
1437 Ok(())
1438 }
1439
1440 pub fn export_to_yaml(&self, path: &Path) -> SklResult<()> {
1442 let config = self.config.read().unwrap();
1443 let yaml_content = self.serialize_to_yaml(&config)?;
1444
1445 let mut file = File::create(path)?;
1446 file.write_all(yaml_content.as_bytes())?;
1447 Ok(())
1448 }
1449
1450 pub fn export_to_json(&self, path: &Path) -> SklResult<()> {
1452 let config = self.config.read().unwrap();
1453 let json_content = self.serialize_to_json(&config)?;
1454
1455 let mut file = File::create(path)?;
1456 file.write_all(json_content.as_bytes())?;
1457 Ok(())
1458 }
1459
1460 fn serialize_to_yaml(&self, _config: &PipelineConfig) -> SklResult<String> {
1462 Ok("# Pipeline Configuration\nmetadata:\n name: example".to_string())
1464 }
1465
1466 fn serialize_to_json(&self, _config: &PipelineConfig) -> SklResult<String> {
1468 Ok(r#"{"metadata": {"name": "example"}}"#.to_string())
1470 }
1471
1472 pub fn add_validation_rule(&mut self, rule: ValidationRule) {
1474 self.validation_rules.push(rule);
1475 }
1476
1477 #[must_use]
1479 pub fn list_validation_rules(&self) -> &[ValidationRule] {
1480 &self.validation_rules
1481 }
1482
1483 pub fn create_template(&self, template_type: &str) -> SklResult<PipelineConfig> {
1485 match template_type {
1486 "basic" => Ok(self.create_basic_template()),
1487 "advanced" => Ok(self.create_advanced_template()),
1488 "distributed" => Ok(self.create_distributed_template()),
1489 _ => Err(SklearsError::InvalidInput(format!(
1490 "Unknown template type: {template_type}"
1491 ))),
1492 }
1493 }
1494
1495 fn create_basic_template(&self) -> PipelineConfig {
1497 let mut config = Self::default_config();
1498 config.metadata.name = "basic_pipeline".to_string();
1499 config.metadata.description = Some("Basic pipeline template".to_string());
1500
1501 config.pipeline.steps.push(StepConfig {
1503 name: "preprocessing".to_string(),
1504 step_type: "StandardScaler".to_string(),
1505 parameters: HashMap::new(),
1506 condition: None,
1507 depends_on: Vec::new(),
1508 resources: None,
1509 enabled: true,
1510 });
1511
1512 config
1513 }
1514
1515 fn create_advanced_template(&self) -> PipelineConfig {
1517 let mut config = self.create_basic_template();
1518 config.metadata.name = "advanced_pipeline".to_string();
1519 config.metadata.description = Some("Advanced pipeline template".to_string());
1520
1521 config.execution.monitoring.enabled = true;
1523 config.execution.monitoring.tracing.enabled = true;
1524 config.execution.monitoring.alerting.enabled = true;
1525
1526 config
1527 }
1528
1529 fn create_distributed_template(&self) -> PipelineConfig {
1531 let mut config = self.create_advanced_template();
1532 config.metadata.name = "distributed_pipeline".to_string();
1533 config.metadata.description = Some("Distributed pipeline template".to_string());
1534
1535 config.execution.mode = "distributed".to_string();
1537 config.execution.parallelism.max_workers = Some(16);
1538
1539 config
1540 }
1541
1542 pub fn register_provider(
1544 &self,
1545 name: String,
1546 provider: Box<dyn ConfigurationProvider>,
1547 ) -> SklResult<()> {
1548 let mut providers = self
1549 .providers
1550 .lock()
1551 .map_err(|_| SklearsError::InvalidData {
1552 reason: "Failed to acquire provider lock".to_string(),
1553 })?;
1554 providers.insert(name, provider);
1555 Ok(())
1556 }
1557
1558 pub fn get_from_provider(
1560 &self,
1561 provider_name: &str,
1562 config_id: &str,
1563 ) -> SklResult<PipelineConfig> {
1564 let providers = self
1565 .providers
1566 .lock()
1567 .map_err(|_| SklearsError::InvalidData {
1568 reason: "Failed to acquire provider lock".to_string(),
1569 })?;
1570
1571 let provider = providers
1572 .get(provider_name)
1573 .ok_or_else(|| SklearsError::InvalidData {
1574 reason: format!("Provider '{provider_name}' not found"),
1575 })?;
1576
1577 provider.get_configuration(config_id)
1578 }
1579
1580 pub fn register_template(&self, template: ConfigurationTemplate) -> SklResult<()> {
1582 let mut templates = self
1583 .templates
1584 .lock()
1585 .map_err(|_| SklearsError::InvalidData {
1586 reason: "Failed to acquire template lock".to_string(),
1587 })?;
1588 templates.insert(template.name.clone(), template);
1589 Ok(())
1590 }
1591
1592 pub fn create_from_template(
1594 &self,
1595 template_name: &str,
1596 parameters: HashMap<String, ConfigValue>,
1597 ) -> SklResult<PipelineConfig> {
1598 let templates = self
1599 .templates
1600 .lock()
1601 .map_err(|_| SklearsError::InvalidData {
1602 reason: "Failed to acquire template lock".to_string(),
1603 })?;
1604
1605 let template = templates
1606 .get(template_name)
1607 .ok_or_else(|| SklearsError::InvalidData {
1608 reason: format!("Template '{template_name}' not found"),
1609 })?;
1610
1611 let mut template_engine =
1612 self.template_engine
1613 .lock()
1614 .map_err(|_| SklearsError::InvalidData {
1615 reason: "Failed to acquire template engine lock".to_string(),
1616 })?;
1617
1618 template_engine.render_template(template, ¶meters)
1619 }
1620
1621 pub fn validate_advanced(&self, config: &PipelineConfig) -> SklResult<ValidationResult> {
1623 let validator = self
1624 .advanced_validator
1625 .lock()
1626 .map_err(|_| SklearsError::InvalidData {
1627 reason: "Failed to acquire validator lock".to_string(),
1628 })?;
1629
1630 validator.validate(config)
1631 }
1632}
1633
1634impl Default for TemplateEngine {
1635 fn default() -> Self {
1636 Self::new()
1637 }
1638}
1639
1640impl TemplateEngine {
1641 #[must_use]
1643 pub fn new() -> Self {
1644 Self {
1645 functions: HashMap::new(),
1646 variables: HashMap::new(),
1647 evaluator: ExpressionEvaluator::new(),
1648 }
1649 }
1650
1651 pub fn render_template(
1653 &mut self,
1654 template: &ConfigurationTemplate,
1655 parameters: &HashMap<String, ConfigValue>,
1656 ) -> SklResult<PipelineConfig> {
1657 for (key, value) in parameters {
1659 self.variables.insert(key.clone(), value.clone());
1660 }
1661
1662 let config = if template.base_template.is_some() {
1664 template.template.clone()
1666 } else {
1667 template.template.clone()
1668 };
1669
1670 Ok(config)
1671 }
1672}
1673
1674impl Default for ExpressionEvaluator {
1675 fn default() -> Self {
1676 Self::new()
1677 }
1678}
1679
1680impl ExpressionEvaluator {
1681 #[must_use]
1683 pub fn new() -> Self {
1684 Self {
1685 builtin_functions: HashMap::new(),
1686 }
1687 }
1688}
1689
1690impl Default for AdvancedValidator {
1691 fn default() -> Self {
1692 Self::new()
1693 }
1694}
1695
1696impl AdvancedValidator {
1697 #[must_use]
1699 pub fn new() -> Self {
1700 Self {
1701 rules: HashMap::new(),
1702 schemas: HashMap::new(),
1703 cross_reference_validator: CrossReferenceValidator::new(),
1704 }
1705 }
1706
1707 pub fn validate(&self, config: &PipelineConfig) -> SklResult<ValidationResult> {
1709 let errors = Vec::new();
1710 let warnings = Vec::new();
1711 let suggestions = Vec::new();
1712
1713 Ok(ValidationResult {
1715 valid: errors.is_empty(),
1716 errors,
1717 warnings,
1718 suggestions,
1719 })
1720 }
1721}
1722
1723impl Default for CrossReferenceValidator {
1724 fn default() -> Self {
1725 Self::new()
1726 }
1727}
1728
1729impl CrossReferenceValidator {
1730 #[must_use]
1732 pub fn new() -> Self {
1733 Self {
1734 references: HashMap::new(),
1735 dependency_graph: DependencyGraph::new(),
1736 }
1737 }
1738}
1739
1740impl Default for DependencyGraph {
1741 fn default() -> Self {
1742 Self::new()
1743 }
1744}
1745
1746impl DependencyGraph {
1747 #[must_use]
1749 pub fn new() -> Self {
1750 Self {
1751 graph: HashMap::new(),
1752 visited: HashMap::new(),
1753 rec_stack: HashMap::new(),
1754 }
1755 }
1756}
1757
1758impl Default for ConfigManager {
1759 fn default() -> Self {
1760 Self::new()
1761 }
1762}
1763
1764#[allow(non_snake_case)]
1765#[cfg(test)]
1766mod tests {
1767 use super::*;
1768 use std::env;
1769
1770 #[test]
1771 fn test_config_value_types() {
1772 let string_val = ConfigValue::String("test".to_string());
1773 let int_val = ConfigValue::Integer(42);
1774 let float_val = ConfigValue::Float(3.14);
1775 let bool_val = ConfigValue::Boolean(true);
1776
1777 assert_eq!(string_val.as_string(), Some(&"test".to_string()));
1778 assert_eq!(int_val.as_integer(), Some(42));
1779 assert_eq!(float_val.as_float(), Some(3.14));
1780 assert_eq!(bool_val.as_boolean(), Some(true));
1781 }
1782
1783 #[test]
1784 fn test_config_manager_creation() {
1785 let manager = ConfigManager::new();
1786 let config = manager.get_config();
1787
1788 assert_eq!(config.metadata.name, "default");
1789 assert_eq!(config.execution.mode, "local");
1790 }
1791
1792 #[test]
1793 fn test_config_validation() {
1794 let mut manager = ConfigManager::new();
1795 let mut config = ConfigManager::default_config();
1796
1797 assert!(manager.validate_config(&config).is_ok());
1799
1800 config.metadata.name.clear();
1802 assert!(manager.validate_config(&config).is_err());
1803 }
1804
1805 #[test]
1806 fn test_environment_management() {
1807 let mut manager = ConfigManager::new();
1808
1809 assert_eq!(manager.get_environment(), "development");
1810
1811 manager.set_environment("production");
1812 assert_eq!(manager.get_environment(), "production");
1813 }
1814
1815 #[test]
1816 fn test_template_creation() {
1817 let manager = ConfigManager::new();
1818
1819 let basic_template = manager.create_template("basic").unwrap();
1820 assert_eq!(basic_template.metadata.name, "basic_pipeline");
1821
1822 let advanced_template = manager.create_template("advanced").unwrap();
1823 assert_eq!(advanced_template.metadata.name, "advanced_pipeline");
1824 assert!(advanced_template.execution.monitoring.enabled);
1825
1826 let distributed_template = manager.create_template("distributed").unwrap();
1827 assert_eq!(distributed_template.execution.mode, "distributed");
1828 }
1829
1830 #[test]
1831 fn test_validation_rules() {
1832 let mut manager = ConfigManager::new();
1833
1834 let rule = ValidationRule {
1835 name: "test_rule".to_string(),
1836 rule_type: "range_check".to_string(),
1837 parameters: HashMap::new(),
1838 error_message: "Value out of range".to_string(),
1839 };
1840
1841 manager.add_validation_rule(rule);
1842 assert_eq!(manager.list_validation_rules().len(), 1);
1843 }
1844
1845 #[test]
1846 fn test_step_config() {
1847 let step = StepConfig {
1848 name: "test_step".to_string(),
1849 step_type: "Transformer".to_string(),
1850 parameters: HashMap::new(),
1851 condition: Some("data_size > 1000".to_string()),
1852 depends_on: vec!["previous_step".to_string()],
1853 resources: None,
1854 enabled: true,
1855 };
1856
1857 assert_eq!(step.name, "test_step");
1858 assert_eq!(step.depends_on.len(), 1);
1859 assert!(step.enabled);
1860 }
1861}