scribe_core/
config.rs

1//! Configuration types and management for Scribe.
2//!
3//! Provides comprehensive configuration structures with validation,
4//! serialization, and environment-based overrides.
5
6use std::collections::{HashMap, HashSet};
7use std::path::PathBuf;
8use std::time::Duration;
9use serde::{Deserialize, Serialize};
10use globset::{Glob, GlobSet, GlobSetBuilder};
11
12use crate::error::{Result, ScribeError};
13use crate::types::HeuristicWeights;
14use crate::file::Language;
15
16/// Main configuration structure for Scribe
17#[derive(Debug, Clone, Serialize, Deserialize)]
18pub struct Config {
19    /// General settings
20    pub general: GeneralConfig,
21    
22    /// File filtering configuration
23    pub filtering: FilteringConfig,
24    
25    /// Analysis configuration
26    pub analysis: AnalysisConfig,
27    
28    /// Scoring configuration
29    pub scoring: ScoringConfig,
30    
31    /// Performance and resource limits
32    pub performance: PerformanceConfig,
33    
34    /// Git integration settings
35    pub git: GitConfig,
36    
37    /// Feature flags
38    pub features: FeatureFlags,
39    
40    /// Output format configuration
41    pub output: OutputConfig,
42}
43
44impl Default for Config {
45    fn default() -> Self {
46        Self {
47            general: GeneralConfig::default(),
48            filtering: FilteringConfig::default(),
49            analysis: AnalysisConfig::default(),
50            scoring: ScoringConfig::default(),
51            performance: PerformanceConfig::default(),
52            git: GitConfig::default(),
53            features: FeatureFlags::default(),
54            output: OutputConfig::default(),
55        }
56    }
57}
58
59impl Config {
60    /// Load configuration from a file
61    pub fn load_from_file<P: AsRef<std::path::Path>>(path: P) -> Result<Self> {
62        let content = std::fs::read_to_string(path.as_ref())
63            .map_err(|e| ScribeError::path_with_source(
64                "Failed to read config file", 
65                path.as_ref(), 
66                e
67            ))?;
68
69        let config: Config = match path.as_ref().extension().and_then(|s| s.to_str()) {
70            Some("json") => serde_json::from_str(&content)?,
71            Some("yaml") | Some("yml") => {
72                return Err(ScribeError::config("YAML support not yet implemented"));
73            }
74            Some("toml") => {
75                return Err(ScribeError::config("TOML support not yet implemented"));
76            }
77            _ => {
78                return Err(ScribeError::config(
79                    "Unsupported config file format. Use .json, .yaml, or .toml"
80                ));
81            }
82        };
83
84        config.validate()?;
85        Ok(config)
86    }
87
88    /// Save configuration to a file
89    pub fn save_to_file<P: AsRef<std::path::Path>>(&self, path: P) -> Result<()> {
90        let content = match path.as_ref().extension().and_then(|s| s.to_str()) {
91            Some("json") => serde_json::to_string_pretty(self)?,
92            Some("yaml") | Some("yml") => {
93                return Err(ScribeError::config("YAML support not yet implemented"));
94            }
95            Some("toml") => {
96                return Err(ScribeError::config("TOML support not yet implemented"));
97            }
98            _ => {
99                return Err(ScribeError::config(
100                    "Unsupported config file format. Use .json, .yaml, or .toml"
101                ));
102            }
103        };
104
105        std::fs::write(path.as_ref(), content)
106            .map_err(|e| ScribeError::path_with_source(
107                "Failed to write config file", 
108                path.as_ref(), 
109                e
110            ))?;
111
112        Ok(())
113    }
114
115    /// Validate the configuration
116    pub fn validate(&self) -> Result<()> {
117        self.general.validate()?;
118        self.filtering.validate()?;
119        self.analysis.validate()?;
120        self.scoring.validate()?;
121        self.performance.validate()?;
122        self.git.validate()?;
123        self.features.validate()?;
124        self.output.validate()?;
125        Ok(())
126    }
127
128    /// Merge with another configuration (other takes priority)
129    pub fn merge_with(mut self, other: Config) -> Self {
130        // Note: This is a simplified merge - in practice you might want
131        // more sophisticated merging logic
132        self.general = other.general;
133        self.filtering = other.filtering;
134        self.analysis = other.analysis;
135        self.scoring = other.scoring;
136        self.performance = other.performance;
137        self.git = other.git;
138        self.features = other.features;
139        self.output = other.output;
140        self
141    }
142
143    /// Create a configuration hash for cache invalidation
144    pub fn compute_hash(&self) -> String {
145        use std::collections::hash_map::DefaultHasher;
146        use std::hash::{Hash, Hasher};
147
148        let serialized = serde_json::to_string(self).unwrap_or_default();
149        let mut hasher = DefaultHasher::new();
150        serialized.hash(&mut hasher);
151        format!("{:x}", hasher.finish())
152    }
153}
154
155/// General application settings
156#[derive(Debug, Clone, Serialize, Deserialize)]
157pub struct GeneralConfig {
158    /// Verbosity level (0-4)
159    pub verbosity: u8,
160    
161    /// Enable progress reporting
162    pub show_progress: bool,
163    
164    /// Enable colored output
165    pub use_colors: bool,
166    
167    /// Maximum number of worker threads (0 = auto-detect)
168    pub max_threads: usize,
169    
170    /// Working directory override
171    pub working_dir: Option<PathBuf>,
172}
173
174impl Default for GeneralConfig {
175    fn default() -> Self {
176        Self {
177            verbosity: 1,
178            show_progress: true,
179            use_colors: true,
180            max_threads: 0, // Auto-detect
181            working_dir: None,
182        }
183    }
184}
185
186impl GeneralConfig {
187    fn validate(&self) -> Result<()> {
188        if self.verbosity > 4 {
189            return Err(ScribeError::config_field(
190                "Verbosity must be between 0 and 4", 
191                "verbosity"
192            ));
193        }
194        Ok(())
195    }
196}
197
198/// File filtering configuration
199#[derive(Debug, Clone, Serialize, Deserialize)]
200pub struct FilteringConfig {
201    /// Include patterns (glob format)
202    pub include_patterns: Vec<String>,
203    
204    /// Exclude patterns (glob format)
205    pub exclude_patterns: Vec<String>,
206    
207    /// Maximum file size in bytes
208    pub max_file_size: u64,
209    
210    /// Minimum file size in bytes
211    pub min_file_size: u64,
212    
213    /// Languages to include (empty = all)
214    pub include_languages: HashSet<Language>,
215    
216    /// Languages to exclude
217    pub exclude_languages: HashSet<Language>,
218    
219    /// Whether to follow symbolic links
220    pub follow_symlinks: bool,
221    
222    /// Whether to include hidden files (starting with .)
223    pub include_hidden: bool,
224    
225    /// Whether to respect .gitignore files
226    pub respect_gitignore: bool,
227    
228    /// Additional ignore files to respect
229    pub ignore_files: Vec<PathBuf>,
230}
231
232impl Default for FilteringConfig {
233    fn default() -> Self {
234        Self {
235            include_patterns: vec![],
236            exclude_patterns: vec![
237                "node_modules/**".to_string(),
238                "target/**".to_string(),
239                ".git/**".to_string(),
240                "build/**".to_string(),
241                "dist/**".to_string(),
242                "__pycache__/**".to_string(),
243                "*.pyc".to_string(),
244                ".DS_Store".to_string(),
245            ],
246            max_file_size: 10 * 1024 * 1024, // 10MB
247            min_file_size: 0,
248            include_languages: HashSet::new(), // Empty = all languages
249            exclude_languages: HashSet::new(),
250            follow_symlinks: false,
251            include_hidden: false,
252            respect_gitignore: true,
253            ignore_files: vec![],
254        }
255    }
256}
257
258impl FilteringConfig {
259    fn validate(&self) -> Result<()> {
260        if self.max_file_size < self.min_file_size {
261            return Err(ScribeError::config(
262                "max_file_size must be >= min_file_size"
263            ));
264        }
265
266        // Validate glob patterns
267        for pattern in &self.include_patterns {
268            Glob::new(pattern).map_err(|e| ScribeError::pattern(
269                format!("Invalid include pattern: {}", e), 
270                pattern
271            ))?;
272        }
273
274        for pattern in &self.exclude_patterns {
275            Glob::new(pattern).map_err(|e| ScribeError::pattern(
276                format!("Invalid exclude pattern: {}", e), 
277                pattern
278            ))?;
279        }
280
281        Ok(())
282    }
283
284    /// Build a GlobSet for include patterns
285    pub fn build_include_set(&self) -> Result<Option<GlobSet>> {
286        if self.include_patterns.is_empty() {
287            return Ok(None);
288        }
289
290        let mut builder = GlobSetBuilder::new();
291        for pattern in &self.include_patterns {
292            builder.add(Glob::new(pattern)?);
293        }
294        Ok(Some(builder.build()?))
295    }
296
297    /// Build a GlobSet for exclude patterns
298    pub fn build_exclude_set(&self) -> Result<GlobSet> {
299        let mut builder = GlobSetBuilder::new();
300        for pattern in &self.exclude_patterns {
301            builder.add(Glob::new(pattern)?);
302        }
303        Ok(builder.build()?)
304    }
305}
306
307/// Analysis configuration
308#[derive(Debug, Clone, Serialize, Deserialize)]
309pub struct AnalysisConfig {
310    /// Whether to analyze file content (not just metadata)
311    pub analyze_content: bool,
312    
313    /// Whether to compute token estimates
314    pub compute_tokens: bool,
315    
316    /// Whether to count lines
317    pub count_lines: bool,
318    
319    /// Whether to detect binary files by content (in addition to extension)
320    pub detect_binary_content: bool,
321    
322    /// Languages that require special handling
323    pub language_overrides: HashMap<String, Language>,
324    
325    /// Custom file type mappings (extension -> language)
326    pub custom_extensions: HashMap<String, Language>,
327    
328    /// Whether to cache analysis results
329    pub enable_caching: bool,
330    
331    /// Cache directory (relative to project root or absolute)
332    pub cache_dir: PathBuf,
333    
334    /// Cache TTL in seconds
335    pub cache_ttl: u64,
336}
337
338impl Default for AnalysisConfig {
339    fn default() -> Self {
340        Self {
341            analyze_content: true,
342            compute_tokens: true,
343            count_lines: true,
344            detect_binary_content: false,
345            language_overrides: HashMap::new(),
346            custom_extensions: HashMap::new(),
347            enable_caching: false,
348            cache_dir: PathBuf::from(".scribe-cache"),
349            cache_ttl: 3600, // 1 hour
350        }
351    }
352}
353
354impl AnalysisConfig {
355    fn validate(&self) -> Result<()> {
356        if self.cache_ttl == 0 {
357            return Err(ScribeError::config_field(
358                "cache_ttl must be > 0", 
359                "cache_ttl"
360            ));
361        }
362        Ok(())
363    }
364}
365
366/// Scoring system configuration
367#[derive(Debug, Clone, Serialize, Deserialize)]
368pub struct ScoringConfig {
369    /// Heuristic weights
370    pub weights: HeuristicWeights,
371    
372    /// Whether to enable advanced scoring features
373    pub enable_advanced: bool,
374    
375    /// Custom scoring rules
376    pub custom_rules: Vec<CustomScoringRule>,
377    
378    /// Minimum score threshold for inclusion
379    pub min_score_threshold: f64,
380    
381    /// Maximum number of files to return (0 = unlimited)
382    pub max_results: usize,
383    
384    /// Whether to normalize scores to 0-1 range
385    pub normalize_scores: bool,
386}
387
388impl Default for ScoringConfig {
389    fn default() -> Self {
390        Self {
391            weights: HeuristicWeights::default(),
392            enable_advanced: false,
393            custom_rules: vec![],
394            min_score_threshold: 0.0,
395            max_results: 0, // Unlimited
396            normalize_scores: true,
397        }
398    }
399}
400
401impl ScoringConfig {
402    fn validate(&self) -> Result<()> {
403        if self.min_score_threshold < 0.0 || self.min_score_threshold > 1.0 {
404            return Err(ScribeError::config_field(
405                "min_score_threshold must be between 0.0 and 1.0", 
406                "min_score_threshold"
407            ));
408        }
409        Ok(())
410    }
411}
412
413/// Custom scoring rule
414#[derive(Debug, Clone, Serialize, Deserialize)]
415pub struct CustomScoringRule {
416    /// Rule name/description
417    pub name: String,
418    
419    /// File pattern to match
420    pub pattern: String,
421    
422    /// Score modifier type
423    pub modifier: ScoreModifier,
424}
425
426/// Score modifier operations
427#[derive(Debug, Clone, Serialize, Deserialize)]
428pub enum ScoreModifier {
429    /// Add a constant value
430    Add(f64),
431    /// Multiply by a factor
432    Multiply(f64),
433    /// Set to a specific value
434    Set(f64),
435    /// Add bonus based on condition
436    ConditionalBonus { condition: String, bonus: f64 },
437}
438
439/// Performance and resource configuration
440#[derive(Debug, Clone, Serialize, Deserialize)]
441pub struct PerformanceConfig {
442    /// Maximum memory usage in MB (0 = unlimited)
443    pub max_memory_mb: usize,
444    
445    /// Analysis timeout per file in seconds
446    pub analysis_timeout: u64,
447    
448    /// Global timeout in seconds
449    pub global_timeout: u64,
450    
451    /// Batch size for parallel processing
452    pub batch_size: usize,
453    
454    /// Whether to use memory mapping for large files
455    pub use_mmap: bool,
456    
457    /// Buffer size for I/O operations
458    pub io_buffer_size: usize,
459}
460
461impl Default for PerformanceConfig {
462    fn default() -> Self {
463        Self {
464            max_memory_mb: 0, // Unlimited
465            analysis_timeout: 30,
466            global_timeout: 300, // 5 minutes
467            batch_size: 100,
468            use_mmap: false,
469            io_buffer_size: 64 * 1024, // 64KB
470        }
471    }
472}
473
474impl PerformanceConfig {
475    fn validate(&self) -> Result<()> {
476        if self.analysis_timeout == 0 {
477            return Err(ScribeError::config_field(
478                "analysis_timeout must be > 0", 
479                "analysis_timeout"
480            ));
481        }
482        if self.global_timeout == 0 {
483            return Err(ScribeError::config_field(
484                "global_timeout must be > 0", 
485                "global_timeout"
486            ));
487        }
488        if self.batch_size == 0 {
489            return Err(ScribeError::config_field(
490                "batch_size must be > 0", 
491                "batch_size"
492            ));
493        }
494        Ok(())
495    }
496
497    /// Get analysis timeout as Duration
498    pub fn analysis_timeout_duration(&self) -> Duration {
499        Duration::from_secs(self.analysis_timeout)
500    }
501
502    /// Get global timeout as Duration
503    pub fn global_timeout_duration(&self) -> Duration {
504        Duration::from_secs(self.global_timeout)
505    }
506}
507
508/// Git integration configuration
509#[derive(Debug, Clone, Serialize, Deserialize)]
510pub struct GitConfig {
511    /// Whether to use git information
512    pub enabled: bool,
513    
514    /// Whether to respect .gitignore
515    pub respect_gitignore: bool,
516    
517    /// Whether to include git status in analysis
518    pub include_status: bool,
519    
520    /// Whether to analyze git history for churn
521    pub analyze_history: bool,
522    
523    /// Number of commits to analyze for churn (0 = all)
524    pub history_depth: usize,
525    
526    /// Whether to include untracked files
527    pub include_untracked: bool,
528    
529    /// Git command timeout in seconds
530    pub git_timeout: u64,
531}
532
533impl Default for GitConfig {
534    fn default() -> Self {
535        Self {
536            enabled: true,
537            respect_gitignore: true,
538            include_status: true,
539            analyze_history: false,
540            history_depth: 100,
541            include_untracked: false,
542            git_timeout: 30,
543        }
544    }
545}
546
547impl GitConfig {
548    fn validate(&self) -> Result<()> {
549        if self.git_timeout == 0 {
550            return Err(ScribeError::config_field(
551                "git_timeout must be > 0", 
552                "git_timeout"
553            ));
554        }
555        Ok(())
556    }
557}
558
559/// Feature flags for experimental features
560#[derive(Debug, Clone, Serialize, Deserialize)]
561pub struct FeatureFlags {
562    /// Enable PageRank centrality computation
563    pub centrality_enabled: bool,
564    
565    /// Enable entrypoint detection
566    pub entrypoint_detection: bool,
567    
568    /// Enable examples/usage analysis
569    pub examples_analysis: bool,
570    
571    /// Enable semantic analysis (if available)
572    pub semantic_analysis: bool,
573    
574    /// Enable machine learning features
575    pub ml_features: bool,
576    
577    /// Enable experimental scoring algorithms
578    pub experimental_scoring: bool,
579}
580
581impl Default for FeatureFlags {
582    fn default() -> Self {
583        Self {
584            centrality_enabled: false,
585            entrypoint_detection: false,
586            examples_analysis: false,
587            semantic_analysis: false,
588            ml_features: false,
589            experimental_scoring: false,
590        }
591    }
592}
593
594impl FeatureFlags {
595    fn validate(&self) -> Result<()> {
596        // Currently no validation needed
597        Ok(())
598    }
599
600    /// Check if any V2 features are enabled
601    pub fn has_v2_features(&self) -> bool {
602        self.centrality_enabled || self.entrypoint_detection || self.examples_analysis
603    }
604
605    /// Get list of enabled feature names
606    pub fn enabled_features(&self) -> Vec<&'static str> {
607        let mut features = Vec::new();
608        
609        if self.centrality_enabled {
610            features.push("centrality");
611        }
612        if self.entrypoint_detection {
613            features.push("entrypoint_detection");
614        }
615        if self.examples_analysis {
616            features.push("examples_analysis");
617        }
618        if self.semantic_analysis {
619            features.push("semantic_analysis");
620        }
621        if self.ml_features {
622            features.push("ml_features");
623        }
624        if self.experimental_scoring {
625            features.push("experimental_scoring");
626        }
627        
628        features
629    }
630}
631
632/// Output format configuration
633#[derive(Debug, Clone, Serialize, Deserialize)]
634pub struct OutputConfig {
635    /// Output format
636    pub format: OutputFormat,
637    
638    /// Whether to include file content in output
639    pub include_content: bool,
640    
641    /// Whether to include detailed scores breakdown
642    pub include_score_breakdown: bool,
643    
644    /// Whether to include repository statistics
645    pub include_repo_stats: bool,
646    
647    /// Whether to sort results by score
648    pub sort_by_score: bool,
649    
650    /// Pretty print JSON output
651    pub pretty_json: bool,
652    
653    /// Custom output fields to include
654    pub custom_fields: Vec<String>,
655}
656
657/// Output format options
658#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
659pub enum OutputFormat {
660    Json,
661    JsonLines,
662    Csv,
663    Table,
664    Summary,
665}
666
667impl Default for OutputConfig {
668    fn default() -> Self {
669        Self {
670            format: OutputFormat::Json,
671            include_content: false,
672            include_score_breakdown: true,
673            include_repo_stats: true,
674            sort_by_score: true,
675            pretty_json: true,
676            custom_fields: vec![],
677        }
678    }
679}
680
681impl OutputConfig {
682    fn validate(&self) -> Result<()> {
683        // Currently no validation needed
684        Ok(())
685    }
686}
687
688#[cfg(test)]
689mod tests {
690    use super::*;
691    use tempfile::NamedTempFile;
692
693    #[test]
694    fn test_config_defaults() {
695        let config = Config::default();
696        assert_eq!(config.general.verbosity, 1);
697        assert!(config.filtering.respect_gitignore);
698        assert!(config.git.enabled);
699        assert!(!config.features.centrality_enabled);
700    }
701
702    #[test]
703    fn test_config_validation() {
704        let mut config = Config::default();
705        assert!(config.validate().is_ok());
706
707        // Test invalid verbosity
708        config.general.verbosity = 10;
709        assert!(config.validate().is_err());
710
711        // Reset and test invalid file sizes
712        config = Config::default();
713        config.filtering.max_file_size = 100;
714        config.filtering.min_file_size = 200;
715        assert!(config.validate().is_err());
716    }
717
718    #[test]
719    fn test_config_file_io() {
720        let config = Config::default();
721        let temp_file = NamedTempFile::new().unwrap();
722        
723        // Test JSON serialization
724        let json_path = temp_file.path().with_extension("json");
725        config.save_to_file(&json_path).unwrap();
726        let loaded_config = Config::load_from_file(&json_path).unwrap();
727        
728        assert_eq!(config.general.verbosity, loaded_config.general.verbosity);
729    }
730
731    #[test]
732    fn test_filtering_patterns() {
733        let mut config = FilteringConfig::default();
734        config.include_patterns.push("*.rs".to_string());
735        config.exclude_patterns.push("target/**".to_string());
736        
737        assert!(config.validate().is_ok());
738        
739        let include_set = config.build_include_set().unwrap();
740        assert!(include_set.is_some());
741        
742        let exclude_set = config.build_exclude_set().unwrap();
743        assert!(exclude_set.is_match("target/debug/file.o"));
744    }
745
746    #[test]
747    fn test_feature_flags() {
748        let mut flags = FeatureFlags::default();
749        assert!(!flags.has_v2_features());
750        assert!(flags.enabled_features().is_empty());
751        
752        flags.centrality_enabled = true;
753        flags.entrypoint_detection = true;
754        
755        assert!(flags.has_v2_features());
756        let enabled = flags.enabled_features();
757        assert!(enabled.contains(&"centrality"));
758        assert!(enabled.contains(&"entrypoint_detection"));
759    }
760
761    #[test]
762    fn test_performance_config_timeouts() {
763        let config = PerformanceConfig::default();
764        assert_eq!(config.analysis_timeout_duration(), Duration::from_secs(30));
765        assert_eq!(config.global_timeout_duration(), Duration::from_secs(300));
766    }
767
768    #[test]
769    fn test_config_hash() {
770        let config1 = Config::default();
771        let config2 = Config::default();
772        
773        let hash1 = config1.compute_hash();
774        let hash2 = config2.compute_hash();
775        
776        assert_eq!(hash1, hash2);
777        
778        let mut config3 = Config::default();
779        config3.general.verbosity = 2;
780        let hash3 = config3.compute_hash();
781        
782        assert_ne!(hash1, hash3);
783    }
784}