1use crate::error::{DataForgeError, Result};
6use serde::{Deserialize, Serialize};
7use std::collections::HashMap;
8use std::path::Path;
9
10
11#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
13pub enum TransactionMode {
14 AutoCommit,
16 Manual,
18 Batch { batch_size: usize },
20}
21
22impl Default for TransactionMode {
23 fn default() -> Self {
24 Self::Batch { batch_size: 1000 }
25 }
26}
27
28#[derive(Debug, Clone, Serialize, Deserialize)]
30pub struct DatabaseConfig {
31 pub url: String,
33 pub max_connections: u32,
35 pub connection_timeout: u64,
37 pub max_retries: u32,
39 pub batch_insert_size: usize,
41 pub transaction_mode: TransactionMode,
43 pub pool_config: PoolConfig,
45 pub ssl_config: Option<SslConfig>,
47}
48
49#[derive(Debug, Clone, Serialize, Deserialize)]
51pub struct PoolConfig {
52 pub min_connections: u32,
54 pub max_connections: u32,
56 pub idle_timeout: u64,
58 pub max_lifetime: u64,
60}
61
62#[derive(Debug, Clone, Serialize, Deserialize)]
64pub struct SslConfig {
65 pub enabled: bool,
67 pub ca_cert_path: Option<String>,
69 pub client_cert_path: Option<String>,
71 pub client_key_path: Option<String>,
73 pub verify_server_cert: bool,
75}
76
77#[derive(Debug, Clone, Serialize, Deserialize)]
79pub struct GeneratorConfig {
80 pub name: String,
82 pub generator_type: String,
84 pub params: HashMap<String, serde_json::Value>,
86 pub weight: f64,
88 pub enabled: bool,
90}
91
92#[derive(Debug, Clone, Serialize, Deserialize)]
94pub struct DataGenConfig {
95 pub batch_size: usize,
97 pub locale: String,
99 pub null_probability: f32,
101 pub parallelism: usize,
103 pub strategy: String,
105 pub output_format: OutputFormat,
107 pub fields: HashMap<String, FieldConfig>,
109 pub generators: Vec<GeneratorConfig>,
111 pub memory_config: MemoryConfig,
113}
114
115#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
117pub enum OutputFormat {
118 Json,
119 Csv,
120 Sql,
121 Xml,
122 Yaml,
123 Parquet,
124 Avro,
125}
126
127#[derive(Debug, Clone, Serialize, Deserialize)]
129pub struct FieldConfig {
130 pub name: String,
132 pub data_type: String,
134 pub generator: String,
136 pub generator_params: HashMap<String, serde_json::Value>,
138 pub constraints: ConstraintConfig,
140 pub nullable: bool,
142 pub default_value: Option<serde_json::Value>,
144}
145
146#[derive(Debug, Clone, Serialize, Deserialize)]
148pub struct ConstraintConfig {
149 pub min: Option<serde_json::Value>,
151 pub max: Option<serde_json::Value>,
153 pub pattern: Option<String>,
155 pub enum_values: Option<Vec<serde_json::Value>>,
157 pub length: Option<LengthConstraint>,
159 pub unique: bool,
161}
162
163#[derive(Debug, Clone, Serialize, Deserialize)]
165pub struct LengthConstraint {
166 pub min: Option<usize>,
168 pub max: Option<usize>,
170}
171
172#[derive(Debug, Clone, Serialize, Deserialize)]
174pub struct MemoryConfig {
175 pub pool_initial_size: usize,
177 pub pool_max_size: usize,
179 pub string_pool_size: usize,
181 pub cleanup_interval: u64,
183 pub max_idle_time: u64,
185}
186
187#[derive(Debug, Clone, Serialize, Deserialize)]
189pub struct Config {
190 pub app: AppConfig,
192 pub data_generation: DataGenConfig,
194 pub databases: HashMap<String, DatabaseConfig>,
196 pub logging: LoggingConfig,
198 pub monitoring: MonitoringConfig,
200}
201
202#[derive(Debug, Clone, Serialize, Deserialize)]
204pub struct AppConfig {
205 pub name: String,
207 pub version: String,
209 pub environment: String,
211 pub debug: bool,
213 pub work_dir: String,
215}
216
217#[derive(Debug, Clone, Serialize, Deserialize)]
219pub struct LoggingConfig {
220 pub level: String,
222 pub format: String,
224 pub targets: Vec<LogTarget>,
226}
227
228#[derive(Debug, Clone, Serialize, Deserialize)]
230pub struct LogTarget {
231 pub target_type: String,
233 pub config: HashMap<String, serde_json::Value>,
235}
236
237#[derive(Debug, Clone, Serialize, Deserialize)]
239pub struct MonitoringConfig {
240 pub enabled: bool,
242 pub metrics_interval: u64,
244 pub prometheus: Option<PrometheusConfig>,
246 pub health_check: HealthCheckConfig,
248}
249
250#[derive(Debug, Clone, Serialize, Deserialize)]
252pub struct PrometheusConfig {
253 pub listen_address: String,
255 pub port: u16,
257 pub metrics_path: String,
259}
260
261#[derive(Debug, Clone, Serialize, Deserialize)]
263pub struct HealthCheckConfig {
264 pub enabled: bool,
266 pub interval: u64,
268 pub timeout: u64,
270}
271
272pub struct ConfigManager {
274 config: Config,
275 config_path: Option<String>,
276}
277
278impl ConfigManager {
279 pub fn new(config: Config) -> Self {
281 Self {
282 config,
283 config_path: None,
284 }
285 }
286
287 pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
289 let path_str = path.as_ref().to_string_lossy().to_string();
290 let content = std::fs::read_to_string(&path)
291 .map_err(|e| DataForgeError::config(&format!("Failed to read config file: {}", e)))?;
292
293 let config = if path_str.ends_with(".toml") {
294 toml::from_str(&content)
295 .map_err(|e| DataForgeError::config(&format!("Failed to parse TOML config: {}", e)))?
296 } else if path_str.ends_with(".yaml") || path_str.ends_with(".yml") {
297 serde_yaml::from_str(&content)
298 .map_err(|e| DataForgeError::config(&format!("Failed to parse YAML config: {}", e)))?
299 } else if path_str.ends_with(".json") {
300 serde_json::from_str(&content)
301 .map_err(|e| DataForgeError::config(&format!("Failed to parse JSON config: {}", e)))?
302 } else {
303 return Err(DataForgeError::config("Unsupported config file format"));
304 };
305
306 Ok(Self {
307 config,
308 config_path: Some(path_str),
309 })
310 }
311
312 pub fn default() -> Self {
314 Self {
315 config: Config::default(),
316 config_path: None,
317 }
318 }
319
320 pub fn config(&self) -> &Config {
322 &self.config
323 }
324
325 pub fn config_mut(&mut self) -> &mut Config {
327 &mut self.config
328 }
329
330 pub fn save(&self) -> Result<()> {
332 if let Some(path) = &self.config_path {
333 let content = if path.ends_with(".toml") {
334 toml::to_string_pretty(&self.config)
335 .map_err(|e| DataForgeError::config(&format!("Failed to serialize TOML config: {}", e)))?
336 } else if path.ends_with(".yaml") || path.ends_with(".yml") {
337 serde_yaml::to_string(&self.config)
338 .map_err(|e| DataForgeError::config(&format!("Failed to serialize YAML config: {}", e)))?
339 } else if path.ends_with(".json") {
340 serde_json::to_string_pretty(&self.config)
341 .map_err(|e| DataForgeError::config(&format!("Failed to serialize JSON config: {}", e)))?
342 } else {
343 return Err(DataForgeError::config("Unsupported config file format"));
344 };
345
346 std::fs::write(path, content)
347 .map_err(|e| DataForgeError::config(&format!("Failed to write config file: {}", e)))?;
348 } else {
349 return Err(DataForgeError::config("No config file path specified"));
350 }
351
352 Ok(())
353 }
354
355 pub fn validate(&self) -> Result<()> {
357 let config = &self.config;
358
359 if config.data_generation.batch_size == 0 {
361 return Err(DataForgeError::config("Batch size must be greater than 0"));
362 }
363
364 if config.data_generation.null_probability < 0.0 || config.data_generation.null_probability > 1.0 {
365 return Err(DataForgeError::config("Null probability must be between 0.0 and 1.0"));
366 }
367
368 for (name, db_config) in &config.databases {
370 if db_config.url.is_empty() {
371 return Err(DataForgeError::config(&format!("Database '{}' URL cannot be empty", name)));
372 }
373
374 if db_config.max_connections == 0 {
375 return Err(DataForgeError::config(&format!("Database '{}' max_connections must be greater than 0", name)));
376 }
377
378 if db_config.batch_insert_size == 0 {
379 return Err(DataForgeError::config(&format!("Database '{}' batch_insert_size must be greater than 0", name)));
380 }
381 }
382
383 let mem_config = &config.data_generation.memory_config;
385 if mem_config.pool_max_size < mem_config.pool_initial_size {
386 return Err(DataForgeError::config("Memory pool max_size must be >= initial_size"));
387 }
388
389 Ok(())
390 }
391
392 pub fn merge(&mut self, other: Config) -> Result<()> {
394 self.config.data_generation.batch_size = other.data_generation.batch_size;
396 self.config.data_generation.null_probability = other.data_generation.null_probability;
397
398 for (name, db_config) in other.databases {
400 self.config.databases.insert(name, db_config);
401 }
402
403 for (name, field_config) in other.data_generation.fields {
405 self.config.data_generation.fields.insert(name, field_config);
406 }
407
408 self.validate()
409 }
410
411 pub fn get_database_config(&self, name: &str) -> Option<&DatabaseConfig> {
413 self.config.databases.get(name)
414 }
415
416 pub fn get_field_config(&self, name: &str) -> Option<&FieldConfig> {
418 self.config.data_generation.fields.get(name)
419 }
420
421 pub fn apply_env_overrides(&mut self) {
423 if let Ok(batch_size) = std::env::var("DATAFORGE_BATCH_SIZE") {
425 if let Ok(size) = batch_size.parse::<usize>() {
426 self.config.data_generation.batch_size = size;
427 }
428 }
429
430 if let Ok(null_prob) = std::env::var("DATAFORGE_NULL_PROBABILITY") {
431 if let Ok(prob) = null_prob.parse::<f32>() {
432 self.config.data_generation.null_probability = prob;
433 }
434 }
435
436 if let Ok(parallelism) = std::env::var("DATAFORGE_PARALLELISM") {
437 if let Ok(par) = parallelism.parse::<usize>() {
438 self.config.data_generation.parallelism = par;
439 }
440 }
441 }
442}
443
444impl Default for Config {
446 fn default() -> Self {
447 Self {
448 app: AppConfig {
449 name: "DataForge".to_string(),
450 version: "0.1.0".to_string(),
451 environment: "development".to_string(),
452 debug: true,
453 work_dir: ".".to_string(),
454 },
455 data_generation: DataGenConfig {
456 batch_size: 1000,
457 locale: "zh_CN".to_string(),
458 null_probability: 0.05,
459 parallelism: 0,
460 strategy: "random".to_string(),
461 output_format: OutputFormat::Json,
462 fields: HashMap::new(),
463 generators: Vec::new(),
464 memory_config: MemoryConfig {
465 pool_initial_size: 10,
466 pool_max_size: 1000,
467 string_pool_size: 1000,
468 cleanup_interval: 60,
469 max_idle_time: 300,
470 },
471 },
472 databases: HashMap::new(),
473 logging: LoggingConfig {
474 level: "info".to_string(),
475 format: "json".to_string(),
476 targets: vec![LogTarget {
477 target_type: "console".to_string(),
478 config: HashMap::new(),
479 }],
480 },
481 monitoring: MonitoringConfig {
482 enabled: false,
483 metrics_interval: 30,
484 prometheus: None,
485 health_check: HealthCheckConfig {
486 enabled: true,
487 interval: 30,
488 timeout: 5,
489 },
490 },
491 }
492 }
493}
494
495impl Default for DatabaseConfig {
496 fn default() -> Self {
497 Self {
498 url: "sqlite::memory:".to_string(),
499 max_connections: 10,
500 connection_timeout: 30,
501 max_retries: 3,
502 batch_insert_size: 1000,
503 transaction_mode: TransactionMode::default(),
504 pool_config: PoolConfig {
505 min_connections: 1,
506 max_connections: 10,
507 idle_timeout: 600,
508 max_lifetime: 3600,
509 },
510 ssl_config: None,
511 }
512 }
513}
514
515impl Default for ConstraintConfig {
516 fn default() -> Self {
517 Self {
518 min: None,
519 max: None,
520 pattern: None,
521 enum_values: None,
522 length: None,
523 unique: false,
524 }
525 }
526}
527
528impl Default for MemoryConfig {
529 fn default() -> Self {
530 Self {
531 pool_initial_size: 64,
532 pool_max_size: 1024,
533 string_pool_size: 10000,
534 cleanup_interval: 300,
535 max_idle_time: 600,
536 }
537 }
538}
539
540impl Default for DataGenConfig {
541 fn default() -> Self {
542 Self {
543 batch_size: 1000,
544 locale: "zh_CN".to_string(),
545 null_probability: 0.05,
546 parallelism: 1,
547 strategy: "random".to_string(),
548 output_format: OutputFormat::Json,
549 fields: HashMap::new(),
550 generators: Vec::new(),
551 memory_config: MemoryConfig::default(),
552 }
553 }
554}
555
556#[cfg(test)]
557mod tests {
558 use super::*;
559
560
561 #[test]
562 fn test_default_config() {
563 let config = Config::default();
564 assert_eq!(config.app.name, "DataForge");
565 assert_eq!(config.data_generation.batch_size, 1000);
566 assert_eq!(config.data_generation.null_probability, 0.05);
567 }
568
569 #[test]
570 fn test_config_validation() {
571 let manager = ConfigManager::default();
572 assert!(manager.validate().is_ok());
573
574 let mut invalid_config = Config::default();
575 invalid_config.data_generation.batch_size = 0;
576 let invalid_manager = ConfigManager {
577 config: invalid_config,
578 config_path: None,
579 };
580 assert!(invalid_manager.validate().is_err());
581 }
582
583 #[test]
584 fn test_config_serialization() {
585 let config = Config::default();
586
587 let json_str = serde_json::to_string_pretty(&config).unwrap();
589 let deserialized: Config = serde_json::from_str(&json_str).unwrap();
590 assert_eq!(config.app.name, deserialized.app.name);
591 }
592
593 #[test]
594 fn test_env_overrides() {
595 std::env::set_var("DATAFORGE_BATCH_SIZE", "2000");
596 std::env::set_var("DATAFORGE_NULL_PROBABILITY", "0.1");
597
598 let mut manager = ConfigManager::default();
599 manager.apply_env_overrides();
600
601 assert_eq!(manager.config.data_generation.batch_size, 2000);
602 assert_eq!(manager.config.data_generation.null_probability, 0.1);
603
604 std::env::remove_var("DATAFORGE_BATCH_SIZE");
606 std::env::remove_var("DATAFORGE_NULL_PROBABILITY");
607 }
608
609 #[test]
610 fn test_database_config() {
611 let mut config = Config::default();
612 let db_config = DatabaseConfig {
613 url: "mysql://user:pass@localhost/test".to_string(),
614 max_connections: 20,
615 ..Default::default()
616 };
617
618 config.databases.insert("test_db".to_string(), db_config);
619
620 let manager = ConfigManager {
621 config,
622 config_path: None,
623 };
624
625 let db_config = manager.get_database_config("test_db").unwrap();
626 assert_eq!(db_config.url, "mysql://user:pass@localhost/test");
627 assert_eq!(db_config.max_connections, 20);
628 }
629}