1use globset::{Glob, GlobSet, GlobSetBuilder};
7use serde::{Deserialize, Serialize};
8use std::collections::{HashMap, HashSet};
9use std::hash::{Hash, Hasher};
10use std::path::PathBuf;
11use std::time::Duration;
12
13use crate::error::{Result, ScribeError};
14use crate::file::Language;
15use crate::types::HeuristicWeights;
16
17#[derive(Debug, Clone, Serialize, Deserialize, Hash)]
19pub struct Config {
20 pub general: GeneralConfig,
22
23 pub filtering: FilteringConfig,
25
26 pub analysis: AnalysisConfig,
28
29 pub scoring: ScoringConfig,
31
32 pub performance: PerformanceConfig,
34
35 pub git: GitConfig,
37
38 pub features: FeatureFlags,
40
41 pub output: OutputConfig,
43}
44
45impl Default for Config {
46 fn default() -> Self {
47 Self {
48 general: GeneralConfig::default(),
49 filtering: FilteringConfig::default(),
50 analysis: AnalysisConfig::default(),
51 scoring: ScoringConfig::default(),
52 performance: PerformanceConfig::default(),
53 git: GitConfig::default(),
54 features: FeatureFlags::default(),
55 output: OutputConfig::default(),
56 }
57 }
58}
59
60impl Config {
61 pub fn load_from_file<P: AsRef<std::path::Path>>(path: P) -> Result<Self> {
63 let content = std::fs::read_to_string(path.as_ref()).map_err(|e| {
64 ScribeError::path_with_source("Failed to read config file", path.as_ref(), e)
65 })?;
66
67 let config: Config = match path.as_ref().extension().and_then(|s| s.to_str()) {
68 Some("json") => serde_json::from_str(&content)?,
69 Some("yaml") | Some("yml") => {
70 return Err(ScribeError::config("YAML support not yet implemented"));
71 }
72 Some("toml") => {
73 return Err(ScribeError::config("TOML support not yet implemented"));
74 }
75 _ => {
76 return Err(ScribeError::config(
77 "Unsupported config file format. Use .json, .yaml, or .toml",
78 ));
79 }
80 };
81
82 config.validate()?;
83 Ok(config)
84 }
85
86 pub fn save_to_file<P: AsRef<std::path::Path>>(&self, path: P) -> Result<()> {
88 let content = match path.as_ref().extension().and_then(|s| s.to_str()) {
89 Some("json") => serde_json::to_string_pretty(self)?,
90 Some("yaml") | Some("yml") => {
91 return Err(ScribeError::config("YAML support not yet implemented"));
92 }
93 Some("toml") => {
94 return Err(ScribeError::config("TOML support not yet implemented"));
95 }
96 _ => {
97 return Err(ScribeError::config(
98 "Unsupported config file format. Use .json, .yaml, or .toml",
99 ));
100 }
101 };
102
103 std::fs::write(path.as_ref(), content).map_err(|e| {
104 ScribeError::path_with_source("Failed to write config file", path.as_ref(), e)
105 })?;
106
107 Ok(())
108 }
109
110 pub fn validate(&self) -> Result<()> {
112 self.general.validate()?;
113 self.filtering.validate()?;
114 self.analysis.validate()?;
115 self.scoring.validate()?;
116 self.performance.validate()?;
117 self.git.validate()?;
118 self.features.validate()?;
119 self.output.validate()?;
120 Ok(())
121 }
122
123 pub fn merge_with(mut self, other: Config) -> Self {
125 self.general = other.general;
128 self.filtering = other.filtering;
129 self.analysis = other.analysis;
130 self.scoring = other.scoring;
131 self.performance = other.performance;
132 self.git = other.git;
133 self.features = other.features;
134 self.output = other.output;
135 self
136 }
137
138 pub fn compute_hash(&self) -> String {
141 use std::collections::hash_map::DefaultHasher;
142
143 let mut hasher = DefaultHasher::new();
144 self.hash(&mut hasher);
145 format!("{:x}", hasher.finish())
146 }
147}
148
149#[derive(Debug, Clone, Serialize, Deserialize)]
151pub struct GeneralConfig {
152 pub verbosity: u8,
154
155 pub show_progress: bool,
157
158 pub use_colors: bool,
160
161 pub max_threads: usize,
163
164 pub working_dir: Option<PathBuf>,
166}
167
168impl Hash for GeneralConfig {
170 fn hash<H: Hasher>(&self, state: &mut H) {
171 self.verbosity.hash(state);
172 self.show_progress.hash(state);
173 self.use_colors.hash(state);
174 self.max_threads.hash(state);
175 if let Some(ref path) = self.working_dir {
177 path.to_string_lossy().hash(state);
178 } else {
179 None::<String>.hash(state);
180 }
181 }
182}
183
184impl Default for GeneralConfig {
185 fn default() -> Self {
186 Self {
187 verbosity: 1,
188 show_progress: true,
189 use_colors: true,
190 max_threads: 0, working_dir: None,
192 }
193 }
194}
195
196impl GeneralConfig {
197 fn validate(&self) -> Result<()> {
198 if self.verbosity > 4 {
199 return Err(ScribeError::config_field(
200 "Verbosity must be between 0 and 4",
201 "verbosity",
202 ));
203 }
204 Ok(())
205 }
206}
207
208#[derive(Debug, Clone, Serialize, Deserialize)]
210pub struct FilteringConfig {
211 pub include_patterns: Vec<String>,
213
214 pub exclude_patterns: Vec<String>,
216
217 pub max_file_size: u64,
219
220 pub min_file_size: u64,
222
223 pub include_languages: HashSet<Language>,
225
226 pub exclude_languages: HashSet<Language>,
228
229 pub follow_symlinks: bool,
231
232 pub include_hidden: bool,
234
235 pub respect_gitignore: bool,
237
238 pub ignore_files: Vec<PathBuf>,
240}
241
242impl Hash for FilteringConfig {
244 fn hash<H: Hasher>(&self, state: &mut H) {
245 self.include_patterns.hash(state);
246 self.exclude_patterns.hash(state);
247 self.max_file_size.hash(state);
248 self.min_file_size.hash(state);
249 let mut include_langs: Vec<_> = self.include_languages.iter().collect();
251 include_langs.sort();
252 include_langs.hash(state);
253
254 let mut exclude_langs: Vec<_> = self.exclude_languages.iter().collect();
255 exclude_langs.sort();
256 exclude_langs.hash(state);
257 self.follow_symlinks.hash(state);
258 self.include_hidden.hash(state);
259 self.respect_gitignore.hash(state);
260 for path in &self.ignore_files {
262 path.to_string_lossy().hash(state);
263 }
264 }
265}
266
267impl Default for FilteringConfig {
268 fn default() -> Self {
269 Self {
270 include_patterns: vec![],
271 exclude_patterns: vec![
272 "node_modules/**".to_string(),
273 "target/**".to_string(),
274 ".git/**".to_string(),
275 "build/**".to_string(),
276 "dist/**".to_string(),
277 "__pycache__/**".to_string(),
278 "*.pyc".to_string(),
279 ".DS_Store".to_string(),
280 ],
281 max_file_size: 10 * 1024 * 1024, min_file_size: 0,
283 include_languages: HashSet::new(), exclude_languages: HashSet::new(),
285 follow_symlinks: false,
286 include_hidden: false,
287 respect_gitignore: true,
288 ignore_files: vec![],
289 }
290 }
291}
292
293impl FilteringConfig {
294 fn validate(&self) -> Result<()> {
295 if self.max_file_size < self.min_file_size {
296 return Err(ScribeError::config(
297 "max_file_size must be >= min_file_size",
298 ));
299 }
300
301 for pattern in &self.include_patterns {
303 Glob::new(pattern).map_err(|e| {
304 ScribeError::pattern(format!("Invalid include pattern: {}", e), pattern)
305 })?;
306 }
307
308 for pattern in &self.exclude_patterns {
309 Glob::new(pattern).map_err(|e| {
310 ScribeError::pattern(format!("Invalid exclude pattern: {}", e), pattern)
311 })?;
312 }
313
314 Ok(())
315 }
316
317 pub fn build_include_set(&self) -> Result<Option<GlobSet>> {
319 if self.include_patterns.is_empty() {
320 return Ok(None);
321 }
322
323 let mut builder = GlobSetBuilder::new();
324 for pattern in &self.include_patterns {
325 builder.add(Glob::new(pattern)?);
326 }
327 Ok(Some(builder.build()?))
328 }
329
330 pub fn build_exclude_set(&self) -> Result<GlobSet> {
332 let mut builder = GlobSetBuilder::new();
333 for pattern in &self.exclude_patterns {
334 builder.add(Glob::new(pattern)?);
335 }
336 Ok(builder.build()?)
337 }
338}
339
340#[derive(Debug, Clone, Serialize, Deserialize)]
342pub struct AnalysisConfig {
343 pub analyze_content: bool,
345
346 pub compute_tokens: bool,
348
349 pub count_lines: bool,
351
352 pub detect_binary_content: bool,
354
355 pub language_overrides: HashMap<String, Language>,
357
358 pub custom_extensions: HashMap<String, Language>,
360
361 pub enable_caching: bool,
363
364 pub cache_dir: PathBuf,
366
367 pub cache_ttl: u64,
369
370 pub token_budget: Option<usize>,
372}
373
374impl Hash for AnalysisConfig {
376 fn hash<H: Hasher>(&self, state: &mut H) {
377 self.analyze_content.hash(state);
378 self.compute_tokens.hash(state);
379 self.count_lines.hash(state);
380 self.detect_binary_content.hash(state);
381 let mut lang_overrides: Vec<_> = self.language_overrides.iter().collect();
383 lang_overrides.sort_by_key(|(k, _)| *k);
384 lang_overrides.hash(state);
385
386 let mut custom_exts: Vec<_> = self.custom_extensions.iter().collect();
387 custom_exts.sort_by_key(|(k, _)| *k);
388 custom_exts.hash(state);
389 self.enable_caching.hash(state);
390 self.cache_dir.to_string_lossy().hash(state);
392 self.cache_ttl.hash(state);
393 self.token_budget.hash(state);
394 }
395}
396
397impl Default for AnalysisConfig {
398 fn default() -> Self {
399 Self {
400 analyze_content: true,
401 compute_tokens: true,
402 count_lines: true,
403 detect_binary_content: false,
404 language_overrides: HashMap::new(),
405 custom_extensions: HashMap::new(),
406 enable_caching: false,
407 cache_dir: PathBuf::from(".scribe-cache"),
408 cache_ttl: 3600, token_budget: None,
410 }
411 }
412}
413
414impl AnalysisConfig {
415 fn validate(&self) -> Result<()> {
416 if self.cache_ttl == 0 {
417 return Err(ScribeError::config_field(
418 "cache_ttl must be > 0",
419 "cache_ttl",
420 ));
421 }
422 Ok(())
423 }
424}
425
426#[derive(Debug, Clone, Serialize, Deserialize)]
428pub struct ScoringConfig {
429 pub weights: HeuristicWeights,
431
432 pub custom_rules: Vec<CustomScoringRule>,
434
435 pub min_score_threshold: f64,
437
438 pub max_results: usize,
440
441 pub normalize_scores: bool,
443}
444
445impl Hash for ScoringConfig {
447 fn hash<H: Hasher>(&self, state: &mut H) {
448 self.weights.hash(state);
449 self.custom_rules.hash(state);
450 self.min_score_threshold.to_bits().hash(state);
452 self.max_results.hash(state);
453 self.normalize_scores.hash(state);
454 }
455}
456
457impl Default for ScoringConfig {
458 fn default() -> Self {
459 Self {
460 weights: HeuristicWeights::default(),
461 custom_rules: vec![],
462 min_score_threshold: 0.0,
463 max_results: 0, normalize_scores: true,
465 }
466 }
467}
468
469impl ScoringConfig {
470 fn validate(&self) -> Result<()> {
471 if self.min_score_threshold < 0.0 || self.min_score_threshold > 1.0 {
472 return Err(ScribeError::config_field(
473 "min_score_threshold must be between 0.0 and 1.0",
474 "min_score_threshold",
475 ));
476 }
477 Ok(())
478 }
479}
480
481#[derive(Debug, Clone, Serialize, Deserialize, Hash)]
483pub struct CustomScoringRule {
484 pub name: String,
486
487 pub pattern: String,
489
490 pub modifier: ScoreModifier,
492}
493
494#[derive(Debug, Clone, Serialize, Deserialize)]
496pub enum ScoreModifier {
497 Add(f64),
499 Multiply(f64),
501 Set(f64),
503 ConditionalBonus { condition: String, bonus: f64 },
505}
506
507impl Hash for ScoreModifier {
509 fn hash<H: Hasher>(&self, state: &mut H) {
510 match self {
511 ScoreModifier::Add(value) => {
512 0u8.hash(state); value.to_bits().hash(state);
514 }
515 ScoreModifier::Multiply(value) => {
516 1u8.hash(state); value.to_bits().hash(state);
518 }
519 ScoreModifier::Set(value) => {
520 2u8.hash(state); value.to_bits().hash(state);
522 }
523 ScoreModifier::ConditionalBonus { condition, bonus } => {
524 3u8.hash(state); condition.hash(state);
526 bonus.to_bits().hash(state);
527 }
528 }
529 }
530}
531
532#[derive(Debug, Clone, Serialize, Deserialize, Hash)]
534pub struct PerformanceConfig {
535 pub max_memory_mb: usize,
537
538 pub analysis_timeout: u64,
540
541 pub global_timeout: u64,
543
544 pub batch_size: usize,
546
547 pub use_mmap: bool,
549
550 pub io_buffer_size: usize,
552}
553
554impl Default for PerformanceConfig {
555 fn default() -> Self {
556 Self {
557 max_memory_mb: 0, analysis_timeout: 30,
559 global_timeout: 300, batch_size: 100,
561 use_mmap: false,
562 io_buffer_size: 64 * 1024, }
564 }
565}
566
567impl PerformanceConfig {
568 fn validate(&self) -> Result<()> {
569 if self.analysis_timeout == 0 {
570 return Err(ScribeError::config_field(
571 "analysis_timeout must be > 0",
572 "analysis_timeout",
573 ));
574 }
575 if self.global_timeout == 0 {
576 return Err(ScribeError::config_field(
577 "global_timeout must be > 0",
578 "global_timeout",
579 ));
580 }
581 if self.batch_size == 0 {
582 return Err(ScribeError::config_field(
583 "batch_size must be > 0",
584 "batch_size",
585 ));
586 }
587 Ok(())
588 }
589
590 pub fn analysis_timeout_duration(&self) -> Duration {
592 Duration::from_secs(self.analysis_timeout)
593 }
594
595 pub fn global_timeout_duration(&self) -> Duration {
597 Duration::from_secs(self.global_timeout)
598 }
599}
600
601#[derive(Debug, Clone, Serialize, Deserialize, Hash)]
603pub struct GitConfig {
604 pub enabled: bool,
606
607 pub respect_gitignore: bool,
609
610 pub include_status: bool,
612
613 pub analyze_history: bool,
615
616 pub history_depth: usize,
618
619 pub include_untracked: bool,
621
622 pub git_timeout: u64,
624}
625
626impl Default for GitConfig {
627 fn default() -> Self {
628 Self {
629 enabled: true,
630 respect_gitignore: true,
631 include_status: true,
632 analyze_history: false,
633 history_depth: 100,
634 include_untracked: false,
635 git_timeout: 30,
636 }
637 }
638}
639
640impl GitConfig {
641 fn validate(&self) -> Result<()> {
642 if self.git_timeout == 0 {
643 return Err(ScribeError::config_field(
644 "git_timeout must be > 0",
645 "git_timeout",
646 ));
647 }
648 Ok(())
649 }
650}
651
652#[derive(Debug, Clone, Serialize, Deserialize, Hash)]
654pub struct FeatureFlags {
655 pub centrality_enabled: bool,
657
658 pub entrypoint_detection: bool,
660
661 pub examples_analysis: bool,
663
664 pub semantic_analysis: bool,
666
667 pub ml_features: bool,
669
670 pub experimental_scoring: bool,
672
673 pub scaling_enabled: bool,
675
676 pub auto_exclude_tests: bool,
678}
679
680impl Default for FeatureFlags {
681 fn default() -> Self {
682 Self {
683 centrality_enabled: false,
684 entrypoint_detection: false,
685 examples_analysis: false,
686 semantic_analysis: false,
687 ml_features: false,
688 experimental_scoring: false,
689 scaling_enabled: false,
690 auto_exclude_tests: false,
691 }
692 }
693}
694
695impl FeatureFlags {
696 fn validate(&self) -> Result<()> {
697 Ok(())
699 }
700
701 pub fn has_v2_features(&self) -> bool {
703 self.centrality_enabled || self.entrypoint_detection || self.examples_analysis
704 }
705
706 pub fn enabled_features(&self) -> Vec<&'static str> {
708 let mut features = Vec::new();
709
710 if self.centrality_enabled {
711 features.push("centrality");
712 }
713 if self.entrypoint_detection {
714 features.push("entrypoint_detection");
715 }
716 if self.examples_analysis {
717 features.push("examples_analysis");
718 }
719 if self.semantic_analysis {
720 features.push("semantic_analysis");
721 }
722 if self.ml_features {
723 features.push("ml_features");
724 }
725 if self.experimental_scoring {
726 features.push("experimental_scoring");
727 }
728 if self.scaling_enabled {
729 features.push("scaling");
730 }
731
732 features
733 }
734}
735
736#[derive(Debug, Clone, Serialize, Deserialize, Hash)]
738pub struct OutputConfig {
739 pub format: OutputFormat,
741
742 pub include_content: bool,
744
745 pub include_score_breakdown: bool,
747
748 pub include_repo_stats: bool,
750
751 pub sort_by_score: bool,
753
754 pub pretty_json: bool,
756
757 pub custom_fields: Vec<String>,
759
760 pub file_path: Option<String>,
762}
763
764#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Hash)]
766pub enum OutputFormat {
767 Json,
768 JsonLines,
769 Csv,
770 Table,
771 Summary,
772}
773
774impl Default for OutputConfig {
775 fn default() -> Self {
776 Self {
777 format: OutputFormat::Json,
778 include_content: false,
779 include_score_breakdown: true,
780 include_repo_stats: true,
781 sort_by_score: true,
782 pretty_json: true,
783 custom_fields: vec![],
784 file_path: None,
785 }
786 }
787}
788
789impl OutputConfig {
790 fn validate(&self) -> Result<()> {
791 Ok(())
793 }
794}
795
796#[cfg(test)]
797mod tests {
798 use super::*;
799 use tempfile::NamedTempFile;
800
801 #[test]
802 fn test_config_defaults() {
803 let config = Config::default();
804 assert_eq!(config.general.verbosity, 1);
805 assert!(config.filtering.respect_gitignore);
806 assert!(config.git.enabled);
807 assert!(!config.features.centrality_enabled);
808 }
809
810 #[test]
811 fn test_config_validation() {
812 let mut config = Config::default();
813 assert!(config.validate().is_ok());
814
815 config.general.verbosity = 10;
817 assert!(config.validate().is_err());
818
819 config = Config::default();
821 config.filtering.max_file_size = 100;
822 config.filtering.min_file_size = 200;
823 assert!(config.validate().is_err());
824 }
825
826 #[test]
827 fn test_config_file_io() {
828 let config = Config::default();
829 let temp_file = NamedTempFile::new().unwrap();
830
831 let json_path = temp_file.path().with_extension("json");
833 config.save_to_file(&json_path).unwrap();
834 let loaded_config = Config::load_from_file(&json_path).unwrap();
835
836 assert_eq!(config.general.verbosity, loaded_config.general.verbosity);
837 }
838
839 #[test]
840 fn test_filtering_patterns() {
841 let mut config = FilteringConfig::default();
842 config.include_patterns.push("*.rs".to_string());
843 config.exclude_patterns.push("target/**".to_string());
844
845 assert!(config.validate().is_ok());
846
847 let include_set = config.build_include_set().unwrap();
848 assert!(include_set.is_some());
849
850 let exclude_set = config.build_exclude_set().unwrap();
851 assert!(exclude_set.is_match("target/debug/file.o"));
852 }
853
854 #[test]
855 fn test_feature_flags() {
856 let mut flags = FeatureFlags::default();
857 assert!(!flags.has_v2_features());
858 assert!(flags.enabled_features().is_empty());
859
860 flags.centrality_enabled = true;
861 flags.entrypoint_detection = true;
862
863 assert!(flags.has_v2_features());
864 let enabled = flags.enabled_features();
865 assert!(enabled.contains(&"centrality"));
866 assert!(enabled.contains(&"entrypoint_detection"));
867 }
868
869 #[test]
870 fn test_performance_config_timeouts() {
871 let config = PerformanceConfig::default();
872 assert_eq!(config.analysis_timeout_duration(), Duration::from_secs(30));
873 assert_eq!(config.global_timeout_duration(), Duration::from_secs(300));
874 }
875
876 #[test]
877 fn test_config_hash() {
878 let config1 = Config::default();
879 let config2 = Config::default();
880
881 let hash1 = config1.compute_hash();
882 let hash2 = config2.compute_hash();
883
884 assert_eq!(hash1, hash2);
885
886 let mut config3 = Config::default();
887 config3.general.verbosity = 2;
888 let hash3 = config3.compute_hash();
889
890 assert_ne!(hash1, hash3);
891 }
892}