dataforge/
config.rs

1//! 配置管理模块
2//! 
3//! 提供数据生成配置和数据库配置管理
4
5use crate::error::{DataForgeError, Result};
6use serde::{Deserialize, Serialize};
7use std::collections::HashMap;
8use std::path::Path;
9
10
11/// 事务模式
12#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
13pub enum TransactionMode {
14    /// 自动提交
15    AutoCommit,
16    /// 手动提交
17    Manual,
18    /// 批量提交
19    Batch { batch_size: usize },
20}
21
22impl Default for TransactionMode {
23    fn default() -> Self {
24        Self::Batch { batch_size: 1000 }
25    }
26}
27
28/// 数据库配置
29#[derive(Debug, Clone, Serialize, Deserialize)]
30pub struct DatabaseConfig {
31    /// 数据库URL
32    pub url: String,
33    /// 最大连接数
34    pub max_connections: u32,
35    /// 连接超时时间(秒)
36    pub connection_timeout: u64,
37    /// 最大重试次数
38    pub max_retries: u32,
39    /// 批量插入大小
40    pub batch_insert_size: usize,
41    /// 事务模式
42    pub transaction_mode: TransactionMode,
43    /// 连接池配置
44    pub pool_config: PoolConfig,
45    /// SSL配置
46    pub ssl_config: Option<SslConfig>,
47}
48
49/// 连接池配置
50#[derive(Debug, Clone, Serialize, Deserialize)]
51pub struct PoolConfig {
52    /// 最小连接数
53    pub min_connections: u32,
54    /// 最大连接数
55    pub max_connections: u32,
56    /// 连接空闲超时时间(秒)
57    pub idle_timeout: u64,
58    /// 连接最大生存时间(秒)
59    pub max_lifetime: u64,
60}
61
62/// SSL配置
63#[derive(Debug, Clone, Serialize, Deserialize)]
64pub struct SslConfig {
65    /// 是否启用SSL
66    pub enabled: bool,
67    /// CA证书路径
68    pub ca_cert_path: Option<String>,
69    /// 客户端证书路径
70    pub client_cert_path: Option<String>,
71    /// 客户端私钥路径
72    pub client_key_path: Option<String>,
73    /// 是否验证服务器证书
74    pub verify_server_cert: bool,
75}
76
77/// 生成器配置
78#[derive(Debug, Clone, Serialize, Deserialize)]
79pub struct GeneratorConfig {
80    /// 生成器名称
81    pub name: String,
82    /// 生成器类型
83    pub generator_type: String,
84    /// 配置参数
85    pub params: HashMap<String, serde_json::Value>,
86    /// 权重(用于随机选择)
87    pub weight: f64,
88    /// 是否启用
89    pub enabled: bool,
90}
91
92/// 数据生成配置
93#[derive(Debug, Clone, Serialize, Deserialize)]
94pub struct DataGenConfig {
95    /// 每批次生成数量
96    pub batch_size: usize,
97    /// 区域设置
98    pub locale: String,
99    /// 空值概率 (0.0-1.0)
100    pub null_probability: f32,
101    /// 并行度 (0表示自动检测)
102    pub parallelism: usize,
103    /// 生成策略
104    pub strategy: String,
105    /// 输出格式
106    pub output_format: OutputFormat,
107    /// 字段配置
108    pub fields: HashMap<String, FieldConfig>,
109    /// 全局生成器配置
110    pub generators: Vec<GeneratorConfig>,
111    /// 内存配置
112    pub memory_config: MemoryConfig,
113}
114
115/// 输出格式
116#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
117pub enum OutputFormat {
118    Json,
119    Csv,
120    Sql,
121    Xml,
122    Yaml,
123    Parquet,
124    Avro,
125}
126
127/// 字段配置
128#[derive(Debug, Clone, Serialize, Deserialize)]
129pub struct FieldConfig {
130    /// 字段名
131    pub name: String,
132    /// 数据类型
133    pub data_type: String,
134    /// 生成器名称
135    pub generator: String,
136    /// 生成器参数
137    pub generator_params: HashMap<String, serde_json::Value>,
138    /// 约束条件
139    pub constraints: ConstraintConfig,
140    /// 是否可为空
141    pub nullable: bool,
142    /// 默认值
143    pub default_value: Option<serde_json::Value>,
144}
145
146/// 约束配置
147#[derive(Debug, Clone, Serialize, Deserialize)]
148pub struct ConstraintConfig {
149    /// 最小值
150    pub min: Option<serde_json::Value>,
151    /// 最大值
152    pub max: Option<serde_json::Value>,
153    /// 正则表达式
154    pub pattern: Option<String>,
155    /// 枚举值
156    pub enum_values: Option<Vec<serde_json::Value>>,
157    /// 长度限制
158    pub length: Option<LengthConstraint>,
159    /// 唯一性约束
160    pub unique: bool,
161}
162
163/// 长度约束
164#[derive(Debug, Clone, Serialize, Deserialize)]
165pub struct LengthConstraint {
166    /// 最小长度
167    pub min: Option<usize>,
168    /// 最大长度
169    pub max: Option<usize>,
170}
171
172/// 内存配置
173#[derive(Debug, Clone, Serialize, Deserialize)]
174pub struct MemoryConfig {
175    /// 内存池初始大小
176    pub pool_initial_size: usize,
177    /// 内存池最大大小
178    pub pool_max_size: usize,
179    /// 字符串池大小
180    pub string_pool_size: usize,
181    /// 清理间隔(秒)
182    pub cleanup_interval: u64,
183    /// 最大空闲时间(秒)
184    pub max_idle_time: u64,
185}
186
187/// 主配置结构
188#[derive(Debug, Clone, Serialize, Deserialize)]
189pub struct Config {
190    /// 应用配置
191    pub app: AppConfig,
192    /// 数据生成配置
193    pub data_generation: DataGenConfig,
194    /// 数据库配置
195    pub databases: HashMap<String, DatabaseConfig>,
196    /// 日志配置
197    pub logging: LoggingConfig,
198    /// 监控配置
199    pub monitoring: MonitoringConfig,
200}
201
202/// 应用配置
203#[derive(Debug, Clone, Serialize, Deserialize)]
204pub struct AppConfig {
205    /// 应用名称
206    pub name: String,
207    /// 版本
208    pub version: String,
209    /// 环境
210    pub environment: String,
211    /// 调试模式
212    pub debug: bool,
213    /// 工作目录
214    pub work_dir: String,
215}
216
217/// 日志配置
218#[derive(Debug, Clone, Serialize, Deserialize)]
219pub struct LoggingConfig {
220    /// 日志级别
221    pub level: String,
222    /// 日志格式
223    pub format: String,
224    /// 输出目标
225    pub targets: Vec<LogTarget>,
226}
227
228/// 日志目标
229#[derive(Debug, Clone, Serialize, Deserialize)]
230pub struct LogTarget {
231    /// 目标类型
232    pub target_type: String,
233    /// 配置参数
234    pub config: HashMap<String, serde_json::Value>,
235}
236
237/// 监控配置
238#[derive(Debug, Clone, Serialize, Deserialize)]
239pub struct MonitoringConfig {
240    /// 是否启用监控
241    pub enabled: bool,
242    /// 指标收集间隔(秒)
243    pub metrics_interval: u64,
244    /// Prometheus配置
245    pub prometheus: Option<PrometheusConfig>,
246    /// 健康检查配置
247    pub health_check: HealthCheckConfig,
248}
249
250/// Prometheus配置
251#[derive(Debug, Clone, Serialize, Deserialize)]
252pub struct PrometheusConfig {
253    /// 监听地址
254    pub listen_address: String,
255    /// 监听端口
256    pub port: u16,
257    /// 指标路径
258    pub metrics_path: String,
259}
260
261/// 健康检查配置
262#[derive(Debug, Clone, Serialize, Deserialize)]
263pub struct HealthCheckConfig {
264    /// 是否启用
265    pub enabled: bool,
266    /// 检查间隔(秒)
267    pub interval: u64,
268    /// 超时时间(秒)
269    pub timeout: u64,
270}
271
272/// 配置管理器
273pub struct ConfigManager {
274    config: Config,
275    config_path: Option<String>,
276}
277
278impl ConfigManager {
279    /// 创建新的配置管理器
280    pub fn new(config: Config) -> Self {
281        Self {
282            config,
283            config_path: None,
284        }
285    }
286
287    /// 从文件加载配置
288    pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
289        let path_str = path.as_ref().to_string_lossy().to_string();
290        let content = std::fs::read_to_string(&path)
291            .map_err(|e| DataForgeError::config(&format!("Failed to read config file: {}", e)))?;
292
293        let config = if path_str.ends_with(".toml") {
294            toml::from_str(&content)
295                .map_err(|e| DataForgeError::config(&format!("Failed to parse TOML config: {}", e)))?
296        } else if path_str.ends_with(".yaml") || path_str.ends_with(".yml") {
297            serde_yaml::from_str(&content)
298                .map_err(|e| DataForgeError::config(&format!("Failed to parse YAML config: {}", e)))?
299        } else if path_str.ends_with(".json") {
300            serde_json::from_str(&content)
301                .map_err(|e| DataForgeError::config(&format!("Failed to parse JSON config: {}", e)))?
302        } else {
303            return Err(DataForgeError::config("Unsupported config file format"));
304        };
305
306        Ok(Self {
307            config,
308            config_path: Some(path_str),
309        })
310    }
311
312    /// 创建默认配置
313    pub fn default() -> Self {
314        Self {
315            config: Config::default(),
316            config_path: None,
317        }
318    }
319
320    /// 获取配置
321    pub fn config(&self) -> &Config {
322        &self.config
323    }
324
325    /// 获取可变配置
326    pub fn config_mut(&mut self) -> &mut Config {
327        &mut self.config
328    }
329
330    /// 保存配置到文件
331    pub fn save(&self) -> Result<()> {
332        if let Some(path) = &self.config_path {
333            let content = if path.ends_with(".toml") {
334                toml::to_string_pretty(&self.config)
335                    .map_err(|e| DataForgeError::config(&format!("Failed to serialize TOML config: {}", e)))?
336            } else if path.ends_with(".yaml") || path.ends_with(".yml") {
337                serde_yaml::to_string(&self.config)
338                    .map_err(|e| DataForgeError::config(&format!("Failed to serialize YAML config: {}", e)))?
339            } else if path.ends_with(".json") {
340                serde_json::to_string_pretty(&self.config)
341                    .map_err(|e| DataForgeError::config(&format!("Failed to serialize JSON config: {}", e)))?
342            } else {
343                return Err(DataForgeError::config("Unsupported config file format"));
344            };
345
346            std::fs::write(path, content)
347                .map_err(|e| DataForgeError::config(&format!("Failed to write config file: {}", e)))?;
348        } else {
349            return Err(DataForgeError::config("No config file path specified"));
350        }
351
352        Ok(())
353    }
354
355    /// 验证配置
356    pub fn validate(&self) -> Result<()> {
357        let config = &self.config;
358
359        // 验证数据生成配置
360        if config.data_generation.batch_size == 0 {
361            return Err(DataForgeError::config("Batch size must be greater than 0"));
362        }
363
364        if config.data_generation.null_probability < 0.0 || config.data_generation.null_probability > 1.0 {
365            return Err(DataForgeError::config("Null probability must be between 0.0 and 1.0"));
366        }
367
368        // 验证数据库配置
369        for (name, db_config) in &config.databases {
370            if db_config.url.is_empty() {
371                return Err(DataForgeError::config(&format!("Database '{}' URL cannot be empty", name)));
372            }
373
374            if db_config.max_connections == 0 {
375                return Err(DataForgeError::config(&format!("Database '{}' max_connections must be greater than 0", name)));
376            }
377
378            if db_config.batch_insert_size == 0 {
379                return Err(DataForgeError::config(&format!("Database '{}' batch_insert_size must be greater than 0", name)));
380            }
381        }
382
383        // 验证内存配置
384        let mem_config = &config.data_generation.memory_config;
385        if mem_config.pool_max_size < mem_config.pool_initial_size {
386            return Err(DataForgeError::config("Memory pool max_size must be >= initial_size"));
387        }
388
389        Ok(())
390    }
391
392    /// 合并配置
393    pub fn merge(&mut self, other: Config) -> Result<()> {
394        // 简单的合并逻辑,实际应用中可能需要更复杂的合并策略
395        self.config.data_generation.batch_size = other.data_generation.batch_size;
396        self.config.data_generation.null_probability = other.data_generation.null_probability;
397        
398        // 合并数据库配置
399        for (name, db_config) in other.databases {
400            self.config.databases.insert(name, db_config);
401        }
402
403        // 合并字段配置
404        for (name, field_config) in other.data_generation.fields {
405            self.config.data_generation.fields.insert(name, field_config);
406        }
407
408        self.validate()
409    }
410
411    /// 获取数据库配置
412    pub fn get_database_config(&self, name: &str) -> Option<&DatabaseConfig> {
413        self.config.databases.get(name)
414    }
415
416    /// 获取字段配置
417    pub fn get_field_config(&self, name: &str) -> Option<&FieldConfig> {
418        self.config.data_generation.fields.get(name)
419    }
420
421    /// 设置环境变量覆盖
422    pub fn apply_env_overrides(&mut self) {
423        // 从环境变量覆盖配置
424        if let Ok(batch_size) = std::env::var("DATAFORGE_BATCH_SIZE") {
425            if let Ok(size) = batch_size.parse::<usize>() {
426                self.config.data_generation.batch_size = size;
427            }
428        }
429
430        if let Ok(null_prob) = std::env::var("DATAFORGE_NULL_PROBABILITY") {
431            if let Ok(prob) = null_prob.parse::<f32>() {
432                self.config.data_generation.null_probability = prob;
433            }
434        }
435
436        if let Ok(parallelism) = std::env::var("DATAFORGE_PARALLELISM") {
437            if let Ok(par) = parallelism.parse::<usize>() {
438                self.config.data_generation.parallelism = par;
439            }
440        }
441    }
442}
443
444// 默认实现
445impl Default for Config {
446    fn default() -> Self {
447        Self {
448            app: AppConfig {
449                name: "DataForge".to_string(),
450                version: "0.1.0".to_string(),
451                environment: "development".to_string(),
452                debug: true,
453                work_dir: ".".to_string(),
454            },
455            data_generation: DataGenConfig {
456                batch_size: 1000,
457                locale: "zh_CN".to_string(),
458                null_probability: 0.05,
459                parallelism: 0,
460                strategy: "random".to_string(),
461                output_format: OutputFormat::Json,
462                fields: HashMap::new(),
463                generators: Vec::new(),
464                memory_config: MemoryConfig {
465                    pool_initial_size: 10,
466                    pool_max_size: 1000,
467                    string_pool_size: 1000,
468                    cleanup_interval: 60,
469                    max_idle_time: 300,
470                },
471            },
472            databases: HashMap::new(),
473            logging: LoggingConfig {
474                level: "info".to_string(),
475                format: "json".to_string(),
476                targets: vec![LogTarget {
477                    target_type: "console".to_string(),
478                    config: HashMap::new(),
479                }],
480            },
481            monitoring: MonitoringConfig {
482                enabled: false,
483                metrics_interval: 30,
484                prometheus: None,
485                health_check: HealthCheckConfig {
486                    enabled: true,
487                    interval: 30,
488                    timeout: 5,
489                },
490            },
491        }
492    }
493}
494
495impl Default for DatabaseConfig {
496    fn default() -> Self {
497        Self {
498            url: "sqlite::memory:".to_string(),
499            max_connections: 10,
500            connection_timeout: 30,
501            max_retries: 3,
502            batch_insert_size: 1000,
503            transaction_mode: TransactionMode::default(),
504            pool_config: PoolConfig {
505                min_connections: 1,
506                max_connections: 10,
507                idle_timeout: 600,
508                max_lifetime: 3600,
509            },
510            ssl_config: None,
511        }
512    }
513}
514
515impl Default for ConstraintConfig {
516    fn default() -> Self {
517        Self {
518            min: None,
519            max: None,
520            pattern: None,
521            enum_values: None,
522            length: None,
523            unique: false,
524        }
525    }
526}
527
528impl Default for MemoryConfig {
529    fn default() -> Self {
530        Self {
531            pool_initial_size: 64,
532            pool_max_size: 1024,
533            string_pool_size: 10000,
534            cleanup_interval: 300,
535            max_idle_time: 600,
536        }
537    }
538}
539
540impl Default for DataGenConfig {
541    fn default() -> Self {
542        Self {
543            batch_size: 1000,
544            locale: "zh_CN".to_string(),
545            null_probability: 0.05,
546            parallelism: 1,
547            strategy: "random".to_string(),
548            output_format: OutputFormat::Json,
549            fields: HashMap::new(),
550            generators: Vec::new(),
551            memory_config: MemoryConfig::default(),
552        }
553    }
554}
555
556#[cfg(test)]
557mod tests {
558    use super::*;
559
560
561    #[test]
562    fn test_default_config() {
563        let config = Config::default();
564        assert_eq!(config.app.name, "DataForge");
565        assert_eq!(config.data_generation.batch_size, 1000);
566        assert_eq!(config.data_generation.null_probability, 0.05);
567    }
568
569    #[test]
570    fn test_config_validation() {
571        let manager = ConfigManager::default();
572        assert!(manager.validate().is_ok());
573
574        let mut invalid_config = Config::default();
575        invalid_config.data_generation.batch_size = 0;
576        let invalid_manager = ConfigManager {
577            config: invalid_config,
578            config_path: None,
579        };
580        assert!(invalid_manager.validate().is_err());
581    }
582
583    #[test]
584    fn test_config_serialization() {
585        let config = Config::default();
586        
587        // 测试JSON序列化
588        let json_str = serde_json::to_string_pretty(&config).unwrap();
589        let deserialized: Config = serde_json::from_str(&json_str).unwrap();
590        assert_eq!(config.app.name, deserialized.app.name);
591    }
592
593    #[test]
594    fn test_env_overrides() {
595        std::env::set_var("DATAFORGE_BATCH_SIZE", "2000");
596        std::env::set_var("DATAFORGE_NULL_PROBABILITY", "0.1");
597        
598        let mut manager = ConfigManager::default();
599        manager.apply_env_overrides();
600        
601        assert_eq!(manager.config.data_generation.batch_size, 2000);
602        assert_eq!(manager.config.data_generation.null_probability, 0.1);
603        
604        // 清理环境变量
605        std::env::remove_var("DATAFORGE_BATCH_SIZE");
606        std::env::remove_var("DATAFORGE_NULL_PROBABILITY");
607    }
608
609    #[test]
610    fn test_database_config() {
611        let mut config = Config::default();
612        let db_config = DatabaseConfig {
613            url: "mysql://user:pass@localhost/test".to_string(),
614            max_connections: 20,
615            ..Default::default()
616        };
617        
618        config.databases.insert("test_db".to_string(), db_config);
619        
620        let manager = ConfigManager {
621            config,
622            config_path: None,
623        };
624        
625        let db_config = manager.get_database_config("test_db").unwrap();
626        assert_eq!(db_config.url, "mysql://user:pass@localhost/test");
627        assert_eq!(db_config.max_connections, 20);
628    }
629}