organizational_intelligence_plugin/
config.rs

1//! Configuration Management
2//!
3//! PROD-004: Centralized configuration with file and environment support
4//! Supports YAML files with environment variable overrides
5
6use anyhow::Result;
7use serde::{Deserialize, Serialize};
8use std::path::Path;
9
10/// Main configuration structure
11#[derive(Debug, Clone, Default, Serialize, Deserialize)]
12#[serde(default)]
13pub struct Config {
14    /// Analysis settings
15    pub analysis: AnalysisConfig,
16
17    /// ML model settings
18    pub ml: MlConfig,
19
20    /// Storage settings
21    pub storage: StorageConfig,
22
23    /// GPU/compute settings
24    pub compute: ComputeConfig,
25
26    /// Logging settings
27    pub logging: LoggingConfig,
28}
29
30/// Analysis configuration
31#[derive(Debug, Clone, Serialize, Deserialize)]
32#[serde(default)]
33pub struct AnalysisConfig {
34    /// Maximum commits to analyze per repository
35    pub max_commits: usize,
36
37    /// Number of parallel workers
38    pub workers: usize,
39
40    /// Cache directory for cloned repos
41    pub cache_dir: String,
42
43    /// Include merge commits
44    pub include_merges: bool,
45}
46
47impl Default for AnalysisConfig {
48    fn default() -> Self {
49        Self {
50            max_commits: 1000,
51            workers: num_cpus::get().max(1),
52            cache_dir: ".oip-cache".to_string(),
53            include_merges: false,
54        }
55    }
56}
57
58/// ML model configuration
59#[derive(Debug, Clone, Serialize, Deserialize)]
60#[serde(default)]
61pub struct MlConfig {
62    /// Number of trees for Random Forest
63    pub n_trees: usize,
64
65    /// Maximum tree depth
66    pub max_depth: usize,
67
68    /// Number of clusters for K-means
69    pub k_clusters: usize,
70
71    /// K-means max iterations
72    pub max_iterations: usize,
73
74    /// SMOTE k-neighbors
75    pub smote_k: usize,
76
77    /// Target minority ratio for SMOTE
78    pub smote_ratio: f32,
79}
80
81impl Default for MlConfig {
82    fn default() -> Self {
83        Self {
84            n_trees: 100,
85            max_depth: 10,
86            k_clusters: 5,
87            max_iterations: 100,
88            smote_k: 5,
89            smote_ratio: 0.5,
90        }
91    }
92}
93
94/// Storage configuration
95#[derive(Debug, Clone, Serialize, Deserialize)]
96#[serde(default)]
97pub struct StorageConfig {
98    /// Default output file
99    pub default_output: String,
100
101    /// Enable compression
102    pub compress: bool,
103
104    /// Batch size for bulk operations
105    pub batch_size: usize,
106}
107
108impl Default for StorageConfig {
109    fn default() -> Self {
110        Self {
111            default_output: "oip-gpu.db".to_string(),
112            compress: true,
113            batch_size: 1000,
114        }
115    }
116}
117
118/// Compute/GPU configuration
119#[derive(Debug, Clone, Serialize, Deserialize)]
120#[serde(default)]
121pub struct ComputeConfig {
122    /// Preferred backend: "auto", "gpu", "simd", "cpu"
123    pub backend: String,
124
125    /// GPU workgroup size
126    pub workgroup_size: usize,
127
128    /// Enable GPU if available
129    pub gpu_enabled: bool,
130}
131
132impl Default for ComputeConfig {
133    fn default() -> Self {
134        Self {
135            backend: "auto".to_string(),
136            workgroup_size: 256,
137            gpu_enabled: true,
138        }
139    }
140}
141
142/// Logging configuration
143#[derive(Debug, Clone, Serialize, Deserialize)]
144#[serde(default)]
145pub struct LoggingConfig {
146    /// Log level: "trace", "debug", "info", "warn", "error"
147    pub level: String,
148
149    /// Enable JSON output
150    pub json: bool,
151
152    /// Log file path (optional)
153    pub file: Option<String>,
154}
155
156impl Default for LoggingConfig {
157    fn default() -> Self {
158        Self {
159            level: "info".to_string(),
160            json: false,
161            file: None,
162        }
163    }
164}
165
166impl Config {
167    /// Load configuration from file
168    pub fn from_file(path: &Path) -> Result<Self> {
169        let content = std::fs::read_to_string(path)?;
170        let config: Config = serde_yaml::from_str(&content)?;
171        Ok(config)
172    }
173
174    /// Load configuration with environment overrides
175    pub fn load(path: Option<&Path>) -> Result<Self> {
176        let mut config = if let Some(p) = path {
177            if p.exists() {
178                Self::from_file(p)?
179            } else {
180                Self::default()
181            }
182        } else {
183            // Try default locations
184            let default_paths = [".oip.yaml", ".oip.yml", "oip.yaml", "oip.yml"];
185            let mut found = None;
186            for p in &default_paths {
187                let path = Path::new(p);
188                if path.exists() {
189                    found = Some(Self::from_file(path)?);
190                    break;
191                }
192            }
193            found.unwrap_or_default()
194        };
195
196        // Apply environment overrides
197        config.apply_env_overrides();
198
199        Ok(config)
200    }
201
202    /// Apply environment variable overrides
203    fn apply_env_overrides(&mut self) {
204        // Analysis
205        if let Ok(val) = std::env::var("OIP_MAX_COMMITS") {
206            if let Ok(n) = val.parse() {
207                self.analysis.max_commits = n;
208            }
209        }
210        if let Ok(val) = std::env::var("OIP_WORKERS") {
211            if let Ok(n) = val.parse() {
212                self.analysis.workers = n;
213            }
214        }
215        if let Ok(val) = std::env::var("OIP_CACHE_DIR") {
216            self.analysis.cache_dir = val;
217        }
218
219        // ML
220        if let Ok(val) = std::env::var("OIP_K_CLUSTERS") {
221            if let Ok(n) = val.parse() {
222                self.ml.k_clusters = n;
223            }
224        }
225
226        // Compute
227        if let Ok(val) = std::env::var("OIP_BACKEND") {
228            self.compute.backend = val;
229        }
230        if let Ok(val) = std::env::var("OIP_GPU_ENABLED") {
231            self.compute.gpu_enabled = val == "1" || val.to_lowercase() == "true";
232        }
233
234        // Logging
235        if let Ok(val) = std::env::var("OIP_LOG_LEVEL") {
236            self.logging.level = val;
237        }
238        if let Ok(val) = std::env::var("OIP_LOG_JSON") {
239            self.logging.json = val == "1" || val.to_lowercase() == "true";
240        }
241    }
242
243    /// Save configuration to file
244    pub fn save(&self, path: &Path) -> Result<()> {
245        let content = serde_yaml::to_string(self)?;
246        std::fs::write(path, content)?;
247        Ok(())
248    }
249
250    /// Validate configuration
251    pub fn validate(&self) -> Result<()> {
252        if self.analysis.max_commits == 0 {
253            anyhow::bail!("max_commits must be > 0");
254        }
255        if self.analysis.workers == 0 {
256            anyhow::bail!("workers must be > 0");
257        }
258        if self.ml.k_clusters == 0 {
259            anyhow::bail!("k_clusters must be > 0");
260        }
261        if self.ml.smote_ratio <= 0.0 || self.ml.smote_ratio > 1.0 {
262            anyhow::bail!("smote_ratio must be in (0, 1]");
263        }
264        Ok(())
265    }
266
267    /// Generate example configuration
268    pub fn example_yaml() -> String {
269        let config = Config::default();
270        serde_yaml::to_string(&config).unwrap_or_default()
271    }
272}
273
274#[cfg(test)]
275mod tests {
276    use super::*;
277    use tempfile::TempDir;
278
279    #[test]
280    fn test_default_config() {
281        let config = Config::default();
282        assert_eq!(config.analysis.max_commits, 1000);
283        assert_eq!(config.ml.k_clusters, 5);
284        assert_eq!(config.compute.backend, "auto");
285    }
286
287    #[test]
288    fn test_config_validation() {
289        let config = Config::default();
290        assert!(config.validate().is_ok());
291    }
292
293    #[test]
294    fn test_invalid_config() {
295        let mut config = Config::default();
296        config.analysis.max_commits = 0;
297        assert!(config.validate().is_err());
298    }
299
300    #[test]
301    fn test_config_save_load() {
302        let temp_dir = TempDir::new().unwrap();
303        let config_path = temp_dir.path().join("test-config.yaml");
304
305        let config = Config::default();
306        config.save(&config_path).unwrap();
307
308        let loaded = Config::from_file(&config_path).unwrap();
309        assert_eq!(loaded.analysis.max_commits, config.analysis.max_commits);
310        assert_eq!(loaded.ml.k_clusters, config.ml.k_clusters);
311    }
312
313    #[test]
314    fn test_example_yaml() {
315        let yaml = Config::example_yaml();
316        assert!(yaml.contains("analysis"));
317        assert!(yaml.contains("ml"));
318        assert!(yaml.contains("compute"));
319    }
320
321    #[test]
322    #[serial_test::serial]
323    fn test_load_missing_file() {
324        // Clean up any env vars from other tests
325        std::env::remove_var("OIP_MAX_COMMITS");
326        std::env::remove_var("OIP_GPU_ENABLED");
327
328        let config = Config::load(Some(Path::new("nonexistent.yaml"))).unwrap();
329        // Should return defaults when file doesn't exist
330        assert_eq!(config.analysis.max_commits, 1000);
331    }
332
333    #[test]
334    #[serial_test::serial]
335    fn test_load_no_path_no_defaults() {
336        // Clean up any env vars from other tests
337        std::env::remove_var("OIP_MAX_COMMITS");
338        std::env::remove_var("OIP_GPU_ENABLED");
339
340        // Load with no path and no default files present
341        let config = Config::load(None).unwrap();
342        assert_eq!(config.analysis.max_commits, 1000); // Should use defaults
343    }
344
345    #[test]
346    #[serial_test::serial]
347    fn test_env_overrides_max_commits() {
348        // Clean up first to ensure clean state
349        std::env::remove_var("OIP_MAX_COMMITS");
350
351        std::env::set_var("OIP_MAX_COMMITS", "500");
352        let mut config = Config::default();
353        config.apply_env_overrides();
354        assert_eq!(config.analysis.max_commits, 500);
355        std::env::remove_var("OIP_MAX_COMMITS");
356    }
357
358    #[test]
359    #[serial_test::serial]
360    fn test_env_overrides_workers() {
361        std::env::remove_var("OIP_WORKERS");
362        std::env::set_var("OIP_WORKERS", "8");
363        let mut config = Config::default();
364        config.apply_env_overrides();
365        assert_eq!(config.analysis.workers, 8);
366        std::env::remove_var("OIP_WORKERS");
367    }
368
369    #[test]
370    #[serial_test::serial]
371    fn test_env_overrides_cache_dir() {
372        std::env::remove_var("OIP_CACHE_DIR");
373        std::env::set_var("OIP_CACHE_DIR", "/tmp/custom-cache");
374        let mut config = Config::default();
375        config.apply_env_overrides();
376        assert_eq!(config.analysis.cache_dir, "/tmp/custom-cache");
377        std::env::remove_var("OIP_CACHE_DIR");
378    }
379
380    #[test]
381    #[serial_test::serial]
382    fn test_env_overrides_k_clusters() {
383        std::env::remove_var("OIP_K_CLUSTERS");
384        std::env::set_var("OIP_K_CLUSTERS", "10");
385        let mut config = Config::default();
386        config.apply_env_overrides();
387        assert_eq!(config.ml.k_clusters, 10);
388        std::env::remove_var("OIP_K_CLUSTERS");
389    }
390
391    #[test]
392    #[serial_test::serial]
393    fn test_env_overrides_backend() {
394        std::env::remove_var("OIP_BACKEND");
395        std::env::set_var("OIP_BACKEND", "simd");
396        let mut config = Config::default();
397        config.apply_env_overrides();
398        assert_eq!(config.compute.backend, "simd");
399        std::env::remove_var("OIP_BACKEND");
400    }
401
402    #[test]
403    #[serial_test::serial]
404    fn test_env_overrides_gpu_enabled_true() {
405        std::env::remove_var("OIP_GPU_ENABLED");
406        std::env::set_var("OIP_GPU_ENABLED", "true");
407        let mut config = Config::default();
408        config.apply_env_overrides();
409        assert!(config.compute.gpu_enabled);
410        std::env::remove_var("OIP_GPU_ENABLED");
411    }
412
413    #[test]
414    #[serial_test::serial]
415    fn test_env_overrides_gpu_enabled_1() {
416        std::env::remove_var("OIP_GPU_ENABLED");
417        std::env::set_var("OIP_GPU_ENABLED", "1");
418        let mut config = Config::default();
419        config.apply_env_overrides();
420        assert!(config.compute.gpu_enabled);
421        std::env::remove_var("OIP_GPU_ENABLED");
422    }
423
424    #[test]
425    #[serial_test::serial]
426    fn test_env_overrides_gpu_enabled_false() {
427        // Clean up first to avoid interference from parallel tests
428        std::env::remove_var("OIP_GPU_ENABLED");
429
430        std::env::set_var("OIP_GPU_ENABLED", "false");
431        let mut config = Config::default();
432        config.compute.gpu_enabled = true; // Start with true
433        config.apply_env_overrides();
434        assert!(!config.compute.gpu_enabled);
435        std::env::remove_var("OIP_GPU_ENABLED");
436    }
437
438    #[test]
439    #[serial_test::serial]
440    fn test_env_overrides_log_level() {
441        std::env::remove_var("OIP_LOG_LEVEL");
442        std::env::set_var("OIP_LOG_LEVEL", "debug");
443        let mut config = Config::default();
444        config.apply_env_overrides();
445        assert_eq!(config.logging.level, "debug");
446        std::env::remove_var("OIP_LOG_LEVEL");
447    }
448
449    #[test]
450    #[serial_test::serial]
451    fn test_env_overrides_log_json() {
452        std::env::remove_var("OIP_LOG_JSON");
453        std::env::set_var("OIP_LOG_JSON", "1");
454        let mut config = Config::default();
455        config.apply_env_overrides();
456        assert!(config.logging.json);
457        std::env::remove_var("OIP_LOG_JSON");
458    }
459
460    #[test]
461    fn test_validation_workers_zero() {
462        let mut config = Config::default();
463        config.analysis.workers = 0;
464        assert!(config.validate().is_err());
465    }
466
467    #[test]
468    fn test_validation_k_clusters_zero() {
469        let mut config = Config::default();
470        config.ml.k_clusters = 0;
471        assert!(config.validate().is_err());
472    }
473
474    #[test]
475    fn test_validation_smote_ratio_zero() {
476        let mut config = Config::default();
477        config.ml.smote_ratio = 0.0;
478        assert!(config.validate().is_err());
479    }
480
481    #[test]
482    fn test_validation_smote_ratio_over_one() {
483        let mut config = Config::default();
484        config.ml.smote_ratio = 1.5;
485        assert!(config.validate().is_err());
486    }
487
488    #[test]
489    fn test_validation_smote_ratio_exactly_one() {
490        let mut config = Config::default();
491        config.ml.smote_ratio = 1.0;
492        assert!(config.validate().is_ok());
493    }
494
495    #[test]
496    fn test_analysis_config_defaults() {
497        let config = AnalysisConfig::default();
498        assert_eq!(config.max_commits, 1000);
499        assert!(config.workers > 0); // At least 1
500        assert_eq!(config.cache_dir, ".oip-cache");
501        assert!(!config.include_merges);
502    }
503
504    #[test]
505    fn test_ml_config_defaults() {
506        let config = MlConfig::default();
507        assert_eq!(config.n_trees, 100);
508        assert_eq!(config.max_depth, 10);
509        assert_eq!(config.k_clusters, 5);
510        assert_eq!(config.max_iterations, 100);
511        assert_eq!(config.smote_k, 5);
512        assert_eq!(config.smote_ratio, 0.5);
513    }
514
515    #[test]
516    fn test_storage_config_defaults() {
517        let config = StorageConfig::default();
518        assert_eq!(config.default_output, "oip-gpu.db");
519        assert!(config.compress);
520        assert_eq!(config.batch_size, 1000);
521    }
522
523    #[test]
524    fn test_compute_config_defaults() {
525        let config = ComputeConfig::default();
526        assert_eq!(config.backend, "auto");
527        assert_eq!(config.workgroup_size, 256);
528        assert!(config.gpu_enabled);
529    }
530
531    #[test]
532    fn test_logging_config_defaults() {
533        let config = LoggingConfig::default();
534        assert_eq!(config.level, "info");
535        assert!(!config.json);
536        assert!(config.file.is_none());
537    }
538
539    #[test]
540    fn test_config_serialization() {
541        let config = Config::default();
542        let yaml = serde_yaml::to_string(&config).unwrap();
543        let deserialized: Config = serde_yaml::from_str(&yaml).unwrap();
544        assert_eq!(
545            config.analysis.max_commits,
546            deserialized.analysis.max_commits
547        );
548        assert_eq!(config.ml.k_clusters, deserialized.ml.k_clusters);
549    }
550
551    #[test]
552    #[serial_test::serial]
553    fn test_invalid_env_value_ignored() {
554        // Clean up any env vars from other tests
555        std::env::remove_var("OIP_GPU_ENABLED");
556
557        std::env::set_var("OIP_MAX_COMMITS", "not-a-number");
558        let config = Config::load(None).unwrap();
559        assert_eq!(config.analysis.max_commits, 1000); // Should use default
560        std::env::remove_var("OIP_MAX_COMMITS");
561    }
562}