1use std::collections::{HashMap, HashSet};
7use std::path::PathBuf;
8use std::time::Duration;
9use std::hash::{Hash, Hasher};
10use serde::{Deserialize, Serialize};
11use globset::{Glob, GlobSet, GlobSetBuilder};
12
13use crate::error::{Result, ScribeError};
14use crate::types::HeuristicWeights;
15use crate::file::Language;
16
17#[derive(Debug, Clone, Serialize, Deserialize, Hash)]
19pub struct Config {
20 pub general: GeneralConfig,
22
23 pub filtering: FilteringConfig,
25
26 pub analysis: AnalysisConfig,
28
29 pub scoring: ScoringConfig,
31
32 pub performance: PerformanceConfig,
34
35 pub git: GitConfig,
37
38 pub features: FeatureFlags,
40
41 pub output: OutputConfig,
43}
44
45impl Default for Config {
46 fn default() -> Self {
47 Self {
48 general: GeneralConfig::default(),
49 filtering: FilteringConfig::default(),
50 analysis: AnalysisConfig::default(),
51 scoring: ScoringConfig::default(),
52 performance: PerformanceConfig::default(),
53 git: GitConfig::default(),
54 features: FeatureFlags::default(),
55 output: OutputConfig::default(),
56 }
57 }
58}
59
60impl Config {
61 pub fn load_from_file<P: AsRef<std::path::Path>>(path: P) -> Result<Self> {
63 let content = std::fs::read_to_string(path.as_ref())
64 .map_err(|e| ScribeError::path_with_source(
65 "Failed to read config file",
66 path.as_ref(),
67 e
68 ))?;
69
70 let config: Config = match path.as_ref().extension().and_then(|s| s.to_str()) {
71 Some("json") => serde_json::from_str(&content)?,
72 Some("yaml") | Some("yml") => {
73 return Err(ScribeError::config("YAML support not yet implemented"));
74 }
75 Some("toml") => {
76 return Err(ScribeError::config("TOML support not yet implemented"));
77 }
78 _ => {
79 return Err(ScribeError::config(
80 "Unsupported config file format. Use .json, .yaml, or .toml"
81 ));
82 }
83 };
84
85 config.validate()?;
86 Ok(config)
87 }
88
89 pub fn save_to_file<P: AsRef<std::path::Path>>(&self, path: P) -> Result<()> {
91 let content = match path.as_ref().extension().and_then(|s| s.to_str()) {
92 Some("json") => serde_json::to_string_pretty(self)?,
93 Some("yaml") | Some("yml") => {
94 return Err(ScribeError::config("YAML support not yet implemented"));
95 }
96 Some("toml") => {
97 return Err(ScribeError::config("TOML support not yet implemented"));
98 }
99 _ => {
100 return Err(ScribeError::config(
101 "Unsupported config file format. Use .json, .yaml, or .toml"
102 ));
103 }
104 };
105
106 std::fs::write(path.as_ref(), content)
107 .map_err(|e| ScribeError::path_with_source(
108 "Failed to write config file",
109 path.as_ref(),
110 e
111 ))?;
112
113 Ok(())
114 }
115
116 pub fn validate(&self) -> Result<()> {
118 self.general.validate()?;
119 self.filtering.validate()?;
120 self.analysis.validate()?;
121 self.scoring.validate()?;
122 self.performance.validate()?;
123 self.git.validate()?;
124 self.features.validate()?;
125 self.output.validate()?;
126 Ok(())
127 }
128
129 pub fn merge_with(mut self, other: Config) -> Self {
131 self.general = other.general;
134 self.filtering = other.filtering;
135 self.analysis = other.analysis;
136 self.scoring = other.scoring;
137 self.performance = other.performance;
138 self.git = other.git;
139 self.features = other.features;
140 self.output = other.output;
141 self
142 }
143
144 pub fn compute_hash(&self) -> String {
147 use std::collections::hash_map::DefaultHasher;
148
149 let mut hasher = DefaultHasher::new();
150 self.hash(&mut hasher);
151 format!("{:x}", hasher.finish())
152 }
153}
154
155#[derive(Debug, Clone, Serialize, Deserialize)]
157pub struct GeneralConfig {
158 pub verbosity: u8,
160
161 pub show_progress: bool,
163
164 pub use_colors: bool,
166
167 pub max_threads: usize,
169
170 pub working_dir: Option<PathBuf>,
172}
173
174impl Hash for GeneralConfig {
176 fn hash<H: Hasher>(&self, state: &mut H) {
177 self.verbosity.hash(state);
178 self.show_progress.hash(state);
179 self.use_colors.hash(state);
180 self.max_threads.hash(state);
181 if let Some(ref path) = self.working_dir {
183 path.to_string_lossy().hash(state);
184 } else {
185 None::<String>.hash(state);
186 }
187 }
188}
189
190impl Default for GeneralConfig {
191 fn default() -> Self {
192 Self {
193 verbosity: 1,
194 show_progress: true,
195 use_colors: true,
196 max_threads: 0, working_dir: None,
198 }
199 }
200}
201
202impl GeneralConfig {
203 fn validate(&self) -> Result<()> {
204 if self.verbosity > 4 {
205 return Err(ScribeError::config_field(
206 "Verbosity must be between 0 and 4",
207 "verbosity"
208 ));
209 }
210 Ok(())
211 }
212}
213
214#[derive(Debug, Clone, Serialize, Deserialize)]
216pub struct FilteringConfig {
217 pub include_patterns: Vec<String>,
219
220 pub exclude_patterns: Vec<String>,
222
223 pub max_file_size: u64,
225
226 pub min_file_size: u64,
228
229 pub include_languages: HashSet<Language>,
231
232 pub exclude_languages: HashSet<Language>,
234
235 pub follow_symlinks: bool,
237
238 pub include_hidden: bool,
240
241 pub respect_gitignore: bool,
243
244 pub ignore_files: Vec<PathBuf>,
246}
247
248impl Hash for FilteringConfig {
250 fn hash<H: Hasher>(&self, state: &mut H) {
251 self.include_patterns.hash(state);
252 self.exclude_patterns.hash(state);
253 self.max_file_size.hash(state);
254 self.min_file_size.hash(state);
255 let mut include_langs: Vec<_> = self.include_languages.iter().collect();
257 include_langs.sort();
258 include_langs.hash(state);
259
260 let mut exclude_langs: Vec<_> = self.exclude_languages.iter().collect();
261 exclude_langs.sort();
262 exclude_langs.hash(state);
263 self.follow_symlinks.hash(state);
264 self.include_hidden.hash(state);
265 self.respect_gitignore.hash(state);
266 for path in &self.ignore_files {
268 path.to_string_lossy().hash(state);
269 }
270 }
271}
272
273impl Default for FilteringConfig {
274 fn default() -> Self {
275 Self {
276 include_patterns: vec![],
277 exclude_patterns: vec![
278 "node_modules/**".to_string(),
279 "target/**".to_string(),
280 ".git/**".to_string(),
281 "build/**".to_string(),
282 "dist/**".to_string(),
283 "__pycache__/**".to_string(),
284 "*.pyc".to_string(),
285 ".DS_Store".to_string(),
286 ],
287 max_file_size: 10 * 1024 * 1024, min_file_size: 0,
289 include_languages: HashSet::new(), exclude_languages: HashSet::new(),
291 follow_symlinks: false,
292 include_hidden: false,
293 respect_gitignore: true,
294 ignore_files: vec![],
295 }
296 }
297}
298
299impl FilteringConfig {
300 fn validate(&self) -> Result<()> {
301 if self.max_file_size < self.min_file_size {
302 return Err(ScribeError::config(
303 "max_file_size must be >= min_file_size"
304 ));
305 }
306
307 for pattern in &self.include_patterns {
309 Glob::new(pattern).map_err(|e| ScribeError::pattern(
310 format!("Invalid include pattern: {}", e),
311 pattern
312 ))?;
313 }
314
315 for pattern in &self.exclude_patterns {
316 Glob::new(pattern).map_err(|e| ScribeError::pattern(
317 format!("Invalid exclude pattern: {}", e),
318 pattern
319 ))?;
320 }
321
322 Ok(())
323 }
324
325 pub fn build_include_set(&self) -> Result<Option<GlobSet>> {
327 if self.include_patterns.is_empty() {
328 return Ok(None);
329 }
330
331 let mut builder = GlobSetBuilder::new();
332 for pattern in &self.include_patterns {
333 builder.add(Glob::new(pattern)?);
334 }
335 Ok(Some(builder.build()?))
336 }
337
338 pub fn build_exclude_set(&self) -> Result<GlobSet> {
340 let mut builder = GlobSetBuilder::new();
341 for pattern in &self.exclude_patterns {
342 builder.add(Glob::new(pattern)?);
343 }
344 Ok(builder.build()?)
345 }
346}
347
348#[derive(Debug, Clone, Serialize, Deserialize)]
350pub struct AnalysisConfig {
351 pub analyze_content: bool,
353
354 pub compute_tokens: bool,
356
357 pub count_lines: bool,
359
360 pub detect_binary_content: bool,
362
363 pub language_overrides: HashMap<String, Language>,
365
366 pub custom_extensions: HashMap<String, Language>,
368
369 pub enable_caching: bool,
371
372 pub cache_dir: PathBuf,
374
375 pub cache_ttl: u64,
377
378 pub token_budget: Option<usize>,
380}
381
382impl Hash for AnalysisConfig {
384 fn hash<H: Hasher>(&self, state: &mut H) {
385 self.analyze_content.hash(state);
386 self.compute_tokens.hash(state);
387 self.count_lines.hash(state);
388 self.detect_binary_content.hash(state);
389 let mut lang_overrides: Vec<_> = self.language_overrides.iter().collect();
391 lang_overrides.sort_by_key(|(k, _)| *k);
392 lang_overrides.hash(state);
393
394 let mut custom_exts: Vec<_> = self.custom_extensions.iter().collect();
395 custom_exts.sort_by_key(|(k, _)| *k);
396 custom_exts.hash(state);
397 self.enable_caching.hash(state);
398 self.cache_dir.to_string_lossy().hash(state);
400 self.cache_ttl.hash(state);
401 self.token_budget.hash(state);
402 }
403}
404
405impl Default for AnalysisConfig {
406 fn default() -> Self {
407 Self {
408 analyze_content: true,
409 compute_tokens: true,
410 count_lines: true,
411 detect_binary_content: false,
412 language_overrides: HashMap::new(),
413 custom_extensions: HashMap::new(),
414 enable_caching: false,
415 cache_dir: PathBuf::from(".scribe-cache"),
416 cache_ttl: 3600, token_budget: None,
418 }
419 }
420}
421
422impl AnalysisConfig {
423 fn validate(&self) -> Result<()> {
424 if self.cache_ttl == 0 {
425 return Err(ScribeError::config_field(
426 "cache_ttl must be > 0",
427 "cache_ttl"
428 ));
429 }
430 Ok(())
431 }
432}
433
434#[derive(Debug, Clone, Serialize, Deserialize)]
436pub struct ScoringConfig {
437 pub weights: HeuristicWeights,
439
440 pub enable_advanced: bool,
442
443 pub custom_rules: Vec<CustomScoringRule>,
445
446 pub min_score_threshold: f64,
448
449 pub max_results: usize,
451
452 pub normalize_scores: bool,
454}
455
456impl Hash for ScoringConfig {
458 fn hash<H: Hasher>(&self, state: &mut H) {
459 self.weights.hash(state);
460 self.enable_advanced.hash(state);
461 self.custom_rules.hash(state);
462 self.min_score_threshold.to_bits().hash(state);
464 self.max_results.hash(state);
465 self.normalize_scores.hash(state);
466 }
467}
468
469impl Default for ScoringConfig {
470 fn default() -> Self {
471 Self {
472 weights: HeuristicWeights::default(),
473 enable_advanced: false,
474 custom_rules: vec![],
475 min_score_threshold: 0.0,
476 max_results: 0, normalize_scores: true,
478 }
479 }
480}
481
482impl ScoringConfig {
483 fn validate(&self) -> Result<()> {
484 if self.min_score_threshold < 0.0 || self.min_score_threshold > 1.0 {
485 return Err(ScribeError::config_field(
486 "min_score_threshold must be between 0.0 and 1.0",
487 "min_score_threshold"
488 ));
489 }
490 Ok(())
491 }
492}
493
494#[derive(Debug, Clone, Serialize, Deserialize, Hash)]
496pub struct CustomScoringRule {
497 pub name: String,
499
500 pub pattern: String,
502
503 pub modifier: ScoreModifier,
505}
506
507#[derive(Debug, Clone, Serialize, Deserialize)]
509pub enum ScoreModifier {
510 Add(f64),
512 Multiply(f64),
514 Set(f64),
516 ConditionalBonus { condition: String, bonus: f64 },
518}
519
520impl Hash for ScoreModifier {
522 fn hash<H: Hasher>(&self, state: &mut H) {
523 match self {
524 ScoreModifier::Add(value) => {
525 0u8.hash(state); value.to_bits().hash(state);
527 }
528 ScoreModifier::Multiply(value) => {
529 1u8.hash(state); value.to_bits().hash(state);
531 }
532 ScoreModifier::Set(value) => {
533 2u8.hash(state); value.to_bits().hash(state);
535 }
536 ScoreModifier::ConditionalBonus { condition, bonus } => {
537 3u8.hash(state); condition.hash(state);
539 bonus.to_bits().hash(state);
540 }
541 }
542 }
543}
544
545#[derive(Debug, Clone, Serialize, Deserialize, Hash)]
547pub struct PerformanceConfig {
548 pub max_memory_mb: usize,
550
551 pub analysis_timeout: u64,
553
554 pub global_timeout: u64,
556
557 pub batch_size: usize,
559
560 pub use_mmap: bool,
562
563 pub io_buffer_size: usize,
565}
566
567impl Default for PerformanceConfig {
568 fn default() -> Self {
569 Self {
570 max_memory_mb: 0, analysis_timeout: 30,
572 global_timeout: 300, batch_size: 100,
574 use_mmap: false,
575 io_buffer_size: 64 * 1024, }
577 }
578}
579
580impl PerformanceConfig {
581 fn validate(&self) -> Result<()> {
582 if self.analysis_timeout == 0 {
583 return Err(ScribeError::config_field(
584 "analysis_timeout must be > 0",
585 "analysis_timeout"
586 ));
587 }
588 if self.global_timeout == 0 {
589 return Err(ScribeError::config_field(
590 "global_timeout must be > 0",
591 "global_timeout"
592 ));
593 }
594 if self.batch_size == 0 {
595 return Err(ScribeError::config_field(
596 "batch_size must be > 0",
597 "batch_size"
598 ));
599 }
600 Ok(())
601 }
602
603 pub fn analysis_timeout_duration(&self) -> Duration {
605 Duration::from_secs(self.analysis_timeout)
606 }
607
608 pub fn global_timeout_duration(&self) -> Duration {
610 Duration::from_secs(self.global_timeout)
611 }
612}
613
614#[derive(Debug, Clone, Serialize, Deserialize, Hash)]
616pub struct GitConfig {
617 pub enabled: bool,
619
620 pub respect_gitignore: bool,
622
623 pub include_status: bool,
625
626 pub analyze_history: bool,
628
629 pub history_depth: usize,
631
632 pub include_untracked: bool,
634
635 pub git_timeout: u64,
637}
638
639impl Default for GitConfig {
640 fn default() -> Self {
641 Self {
642 enabled: true,
643 respect_gitignore: true,
644 include_status: true,
645 analyze_history: false,
646 history_depth: 100,
647 include_untracked: false,
648 git_timeout: 30,
649 }
650 }
651}
652
653impl GitConfig {
654 fn validate(&self) -> Result<()> {
655 if self.git_timeout == 0 {
656 return Err(ScribeError::config_field(
657 "git_timeout must be > 0",
658 "git_timeout"
659 ));
660 }
661 Ok(())
662 }
663}
664
665#[derive(Debug, Clone, Serialize, Deserialize, Hash)]
667pub struct FeatureFlags {
668 pub centrality_enabled: bool,
670
671 pub entrypoint_detection: bool,
673
674 pub examples_analysis: bool,
676
677 pub semantic_analysis: bool,
679
680 pub ml_features: bool,
682
683 pub experimental_scoring: bool,
685
686 pub scaling_enabled: bool,
688}
689
690impl Default for FeatureFlags {
691 fn default() -> Self {
692 Self {
693 centrality_enabled: false,
694 entrypoint_detection: false,
695 examples_analysis: false,
696 semantic_analysis: false,
697 ml_features: false,
698 experimental_scoring: false,
699 scaling_enabled: false,
700 }
701 }
702}
703
704impl FeatureFlags {
705 fn validate(&self) -> Result<()> {
706 Ok(())
708 }
709
710 pub fn has_v2_features(&self) -> bool {
712 self.centrality_enabled || self.entrypoint_detection || self.examples_analysis
713 }
714
715 pub fn enabled_features(&self) -> Vec<&'static str> {
717 let mut features = Vec::new();
718
719 if self.centrality_enabled {
720 features.push("centrality");
721 }
722 if self.entrypoint_detection {
723 features.push("entrypoint_detection");
724 }
725 if self.examples_analysis {
726 features.push("examples_analysis");
727 }
728 if self.semantic_analysis {
729 features.push("semantic_analysis");
730 }
731 if self.ml_features {
732 features.push("ml_features");
733 }
734 if self.experimental_scoring {
735 features.push("experimental_scoring");
736 }
737 if self.scaling_enabled {
738 features.push("scaling");
739 }
740
741 features
742 }
743}
744
745#[derive(Debug, Clone, Serialize, Deserialize, Hash)]
747pub struct OutputConfig {
748 pub format: OutputFormat,
750
751 pub include_content: bool,
753
754 pub include_score_breakdown: bool,
756
757 pub include_repo_stats: bool,
759
760 pub sort_by_score: bool,
762
763 pub pretty_json: bool,
765
766 pub custom_fields: Vec<String>,
768}
769
770#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Hash)]
772pub enum OutputFormat {
773 Json,
774 JsonLines,
775 Csv,
776 Table,
777 Summary,
778}
779
780impl Default for OutputConfig {
781 fn default() -> Self {
782 Self {
783 format: OutputFormat::Json,
784 include_content: false,
785 include_score_breakdown: true,
786 include_repo_stats: true,
787 sort_by_score: true,
788 pretty_json: true,
789 custom_fields: vec![],
790 }
791 }
792}
793
794impl OutputConfig {
795 fn validate(&self) -> Result<()> {
796 Ok(())
798 }
799}
800
801#[cfg(test)]
802mod tests {
803 use super::*;
804 use tempfile::NamedTempFile;
805
806 #[test]
807 fn test_config_defaults() {
808 let config = Config::default();
809 assert_eq!(config.general.verbosity, 1);
810 assert!(config.filtering.respect_gitignore);
811 assert!(config.git.enabled);
812 assert!(!config.features.centrality_enabled);
813 }
814
815 #[test]
816 fn test_config_validation() {
817 let mut config = Config::default();
818 assert!(config.validate().is_ok());
819
820 config.general.verbosity = 10;
822 assert!(config.validate().is_err());
823
824 config = Config::default();
826 config.filtering.max_file_size = 100;
827 config.filtering.min_file_size = 200;
828 assert!(config.validate().is_err());
829 }
830
831 #[test]
832 fn test_config_file_io() {
833 let config = Config::default();
834 let temp_file = NamedTempFile::new().unwrap();
835
836 let json_path = temp_file.path().with_extension("json");
838 config.save_to_file(&json_path).unwrap();
839 let loaded_config = Config::load_from_file(&json_path).unwrap();
840
841 assert_eq!(config.general.verbosity, loaded_config.general.verbosity);
842 }
843
844 #[test]
845 fn test_filtering_patterns() {
846 let mut config = FilteringConfig::default();
847 config.include_patterns.push("*.rs".to_string());
848 config.exclude_patterns.push("target/**".to_string());
849
850 assert!(config.validate().is_ok());
851
852 let include_set = config.build_include_set().unwrap();
853 assert!(include_set.is_some());
854
855 let exclude_set = config.build_exclude_set().unwrap();
856 assert!(exclude_set.is_match("target/debug/file.o"));
857 }
858
859 #[test]
860 fn test_feature_flags() {
861 let mut flags = FeatureFlags::default();
862 assert!(!flags.has_v2_features());
863 assert!(flags.enabled_features().is_empty());
864
865 flags.centrality_enabled = true;
866 flags.entrypoint_detection = true;
867
868 assert!(flags.has_v2_features());
869 let enabled = flags.enabled_features();
870 assert!(enabled.contains(&"centrality"));
871 assert!(enabled.contains(&"entrypoint_detection"));
872 }
873
874 #[test]
875 fn test_performance_config_timeouts() {
876 let config = PerformanceConfig::default();
877 assert_eq!(config.analysis_timeout_duration(), Duration::from_secs(30));
878 assert_eq!(config.global_timeout_duration(), Duration::from_secs(300));
879 }
880
881 #[test]
882 fn test_config_hash() {
883 let config1 = Config::default();
884 let config2 = Config::default();
885
886 let hash1 = config1.compute_hash();
887 let hash2 = config2.compute_hash();
888
889 assert_eq!(hash1, hash2);
890
891 let mut config3 = Config::default();
892 config3.general.verbosity = 2;
893 let hash3 = config3.compute_hash();
894
895 assert_ne!(hash1, hash3);
896 }
897}