1use globset::{Glob, GlobSet, GlobSetBuilder};
7use serde::{Deserialize, Serialize};
8use std::collections::{HashMap, HashSet};
9use std::hash::{Hash, Hasher};
10use std::path::PathBuf;
11use std::time::Duration;
12
13use crate::error::{Result, ScribeError};
14use crate::file::Language;
15use crate::types::HeuristicWeights;
16
17#[derive(Debug, Clone, Serialize, Deserialize, Hash)]
19pub struct Config {
20 pub general: GeneralConfig,
22
23 pub filtering: FilteringConfig,
25
26 pub analysis: AnalysisConfig,
28
29 pub scoring: ScoringConfig,
31
32 pub performance: PerformanceConfig,
34
35 pub git: GitConfig,
37
38 pub features: FeatureFlags,
40
41 pub output: OutputConfig,
43}
44
45impl Default for Config {
46 fn default() -> Self {
47 Self {
48 general: GeneralConfig::default(),
49 filtering: FilteringConfig::default(),
50 analysis: AnalysisConfig::default(),
51 scoring: ScoringConfig::default(),
52 performance: PerformanceConfig::default(),
53 git: GitConfig::default(),
54 features: FeatureFlags::default(),
55 output: OutputConfig::default(),
56 }
57 }
58}
59
60impl Config {
61 pub fn load_from_file<P: AsRef<std::path::Path>>(path: P) -> Result<Self> {
63 let content = std::fs::read_to_string(path.as_ref()).map_err(|e| {
64 ScribeError::path_with_source("Failed to read config file", path.as_ref(), e)
65 })?;
66
67 let config: Config = match path.as_ref().extension().and_then(|s| s.to_str()) {
68 Some("json") => serde_json::from_str(&content)?,
69 Some("yaml") | Some("yml") => {
70 return Err(ScribeError::config("YAML support not yet implemented"));
71 }
72 Some("toml") => {
73 return Err(ScribeError::config("TOML support not yet implemented"));
74 }
75 _ => {
76 return Err(ScribeError::config(
77 "Unsupported config file format. Use .json, .yaml, or .toml",
78 ));
79 }
80 };
81
82 config.validate()?;
83 Ok(config)
84 }
85
86 pub fn save_to_file<P: AsRef<std::path::Path>>(&self, path: P) -> Result<()> {
88 let content = match path.as_ref().extension().and_then(|s| s.to_str()) {
89 Some("json") => serde_json::to_string_pretty(self)?,
90 Some("yaml") | Some("yml") => {
91 return Err(ScribeError::config("YAML support not yet implemented"));
92 }
93 Some("toml") => {
94 return Err(ScribeError::config("TOML support not yet implemented"));
95 }
96 _ => {
97 return Err(ScribeError::config(
98 "Unsupported config file format. Use .json, .yaml, or .toml",
99 ));
100 }
101 };
102
103 std::fs::write(path.as_ref(), content).map_err(|e| {
104 ScribeError::path_with_source("Failed to write config file", path.as_ref(), e)
105 })?;
106
107 Ok(())
108 }
109
110 pub fn validate(&self) -> Result<()> {
112 self.general.validate()?;
113 self.filtering.validate()?;
114 self.analysis.validate()?;
115 self.scoring.validate()?;
116 self.performance.validate()?;
117 self.git.validate()?;
118 self.features.validate()?;
119 self.output.validate()?;
120 Ok(())
121 }
122
123 pub fn merge_with(mut self, other: Config) -> Self {
125 self.general = other.general;
128 self.filtering = other.filtering;
129 self.analysis = other.analysis;
130 self.scoring = other.scoring;
131 self.performance = other.performance;
132 self.git = other.git;
133 self.features = other.features;
134 self.output = other.output;
135 self
136 }
137
138 pub fn compute_hash(&self) -> String {
141 use std::collections::hash_map::DefaultHasher;
142
143 let mut hasher = DefaultHasher::new();
144 self.hash(&mut hasher);
145 format!("{:x}", hasher.finish())
146 }
147}
148
149#[derive(Debug, Clone, Serialize, Deserialize)]
151pub struct GeneralConfig {
152 pub verbosity: u8,
154
155 pub show_progress: bool,
157
158 pub use_colors: bool,
160
161 pub max_threads: usize,
163
164 pub working_dir: Option<PathBuf>,
166}
167
168impl Hash for GeneralConfig {
170 fn hash<H: Hasher>(&self, state: &mut H) {
171 self.verbosity.hash(state);
172 self.show_progress.hash(state);
173 self.use_colors.hash(state);
174 self.max_threads.hash(state);
175 if let Some(ref path) = self.working_dir {
177 path.to_string_lossy().hash(state);
178 } else {
179 None::<String>.hash(state);
180 }
181 }
182}
183
184impl Default for GeneralConfig {
185 fn default() -> Self {
186 Self {
187 verbosity: 1,
188 show_progress: true,
189 use_colors: true,
190 max_threads: 0, working_dir: None,
192 }
193 }
194}
195
196impl GeneralConfig {
197 fn validate(&self) -> Result<()> {
198 if self.verbosity > 4 {
199 return Err(ScribeError::config_field(
200 "Verbosity must be between 0 and 4",
201 "verbosity",
202 ));
203 }
204 Ok(())
205 }
206}
207
208#[derive(Debug, Clone, Serialize, Deserialize)]
210pub struct FilteringConfig {
211 pub include_patterns: Vec<String>,
213
214 pub exclude_patterns: Vec<String>,
216
217 pub max_file_size: u64,
219
220 pub min_file_size: u64,
222
223 pub include_languages: HashSet<Language>,
225
226 pub exclude_languages: HashSet<Language>,
228
229 pub follow_symlinks: bool,
231
232 pub include_hidden: bool,
234
235 pub respect_gitignore: bool,
237
238 pub ignore_files: Vec<PathBuf>,
240}
241
242impl Hash for FilteringConfig {
244 fn hash<H: Hasher>(&self, state: &mut H) {
245 self.include_patterns.hash(state);
246 self.exclude_patterns.hash(state);
247 self.max_file_size.hash(state);
248 self.min_file_size.hash(state);
249 let mut include_langs: Vec<_> = self.include_languages.iter().collect();
251 include_langs.sort();
252 include_langs.hash(state);
253
254 let mut exclude_langs: Vec<_> = self.exclude_languages.iter().collect();
255 exclude_langs.sort();
256 exclude_langs.hash(state);
257 self.follow_symlinks.hash(state);
258 self.include_hidden.hash(state);
259 self.respect_gitignore.hash(state);
260 for path in &self.ignore_files {
262 path.to_string_lossy().hash(state);
263 }
264 }
265}
266
267impl Default for FilteringConfig {
268 fn default() -> Self {
269 Self {
270 include_patterns: vec![],
271 exclude_patterns: vec![
272 "node_modules/**".to_string(),
273 "target/**".to_string(),
274 ".git/**".to_string(),
275 "build/**".to_string(),
276 "dist/**".to_string(),
277 "__pycache__/**".to_string(),
278 "*.pyc".to_string(),
279 ".DS_Store".to_string(),
280 ],
281 max_file_size: 10 * 1024 * 1024, min_file_size: 0,
283 include_languages: HashSet::new(), exclude_languages: HashSet::new(),
285 follow_symlinks: false,
286 include_hidden: false,
287 respect_gitignore: true,
288 ignore_files: vec![],
289 }
290 }
291}
292
293impl FilteringConfig {
294 fn validate(&self) -> Result<()> {
295 if self.max_file_size < self.min_file_size {
296 return Err(ScribeError::config(
297 "max_file_size must be >= min_file_size",
298 ));
299 }
300
301 for pattern in &self.include_patterns {
303 Glob::new(pattern).map_err(|e| {
304 ScribeError::pattern(format!("Invalid include pattern: {}", e), pattern)
305 })?;
306 }
307
308 for pattern in &self.exclude_patterns {
309 Glob::new(pattern).map_err(|e| {
310 ScribeError::pattern(format!("Invalid exclude pattern: {}", e), pattern)
311 })?;
312 }
313
314 Ok(())
315 }
316
317 pub fn build_include_set(&self) -> Result<Option<GlobSet>> {
319 if self.include_patterns.is_empty() {
320 return Ok(None);
321 }
322
323 let mut builder = GlobSetBuilder::new();
324 for pattern in &self.include_patterns {
325 builder.add(Glob::new(pattern)?);
326 }
327 Ok(Some(builder.build()?))
328 }
329
330 pub fn build_exclude_set(&self) -> Result<GlobSet> {
332 let mut builder = GlobSetBuilder::new();
333 for pattern in &self.exclude_patterns {
334 builder.add(Glob::new(pattern)?);
335 }
336 Ok(builder.build()?)
337 }
338}
339
340#[derive(Debug, Clone, Serialize, Deserialize)]
342pub struct AnalysisConfig {
343 pub analyze_content: bool,
345
346 pub compute_tokens: bool,
348
349 pub count_lines: bool,
351
352 pub detect_binary_content: bool,
354
355 pub language_overrides: HashMap<String, Language>,
357
358 pub custom_extensions: HashMap<String, Language>,
360
361 pub enable_caching: bool,
363
364 pub cache_dir: PathBuf,
366
367 pub cache_ttl: u64,
369
370 pub token_budget: Option<usize>,
372}
373
374impl Hash for AnalysisConfig {
376 fn hash<H: Hasher>(&self, state: &mut H) {
377 self.analyze_content.hash(state);
378 self.compute_tokens.hash(state);
379 self.count_lines.hash(state);
380 self.detect_binary_content.hash(state);
381 let mut lang_overrides: Vec<_> = self.language_overrides.iter().collect();
383 lang_overrides.sort_by_key(|(k, _)| *k);
384 lang_overrides.hash(state);
385
386 let mut custom_exts: Vec<_> = self.custom_extensions.iter().collect();
387 custom_exts.sort_by_key(|(k, _)| *k);
388 custom_exts.hash(state);
389 self.enable_caching.hash(state);
390 self.cache_dir.to_string_lossy().hash(state);
392 self.cache_ttl.hash(state);
393 self.token_budget.hash(state);
394 }
395}
396
397impl Default for AnalysisConfig {
398 fn default() -> Self {
399 Self {
400 analyze_content: true,
401 compute_tokens: true,
402 count_lines: true,
403 detect_binary_content: false,
404 language_overrides: HashMap::new(),
405 custom_extensions: HashMap::new(),
406 enable_caching: false,
407 cache_dir: PathBuf::from(".scribe-cache"),
408 cache_ttl: 3600, token_budget: None,
410 }
411 }
412}
413
414impl AnalysisConfig {
415 fn validate(&self) -> Result<()> {
416 if self.cache_ttl == 0 {
417 return Err(ScribeError::config_field(
418 "cache_ttl must be > 0",
419 "cache_ttl",
420 ));
421 }
422 Ok(())
423 }
424}
425
426#[derive(Debug, Clone, Serialize, Deserialize)]
428pub struct ScoringConfig {
429 pub weights: HeuristicWeights,
431
432 pub enable_advanced: bool,
434
435 pub custom_rules: Vec<CustomScoringRule>,
437
438 pub min_score_threshold: f64,
440
441 pub max_results: usize,
443
444 pub normalize_scores: bool,
446}
447
448impl Hash for ScoringConfig {
450 fn hash<H: Hasher>(&self, state: &mut H) {
451 self.weights.hash(state);
452 self.enable_advanced.hash(state);
453 self.custom_rules.hash(state);
454 self.min_score_threshold.to_bits().hash(state);
456 self.max_results.hash(state);
457 self.normalize_scores.hash(state);
458 }
459}
460
461impl Default for ScoringConfig {
462 fn default() -> Self {
463 Self {
464 weights: HeuristicWeights::default(),
465 enable_advanced: false,
466 custom_rules: vec![],
467 min_score_threshold: 0.0,
468 max_results: 0, normalize_scores: true,
470 }
471 }
472}
473
474impl ScoringConfig {
475 fn validate(&self) -> Result<()> {
476 if self.min_score_threshold < 0.0 || self.min_score_threshold > 1.0 {
477 return Err(ScribeError::config_field(
478 "min_score_threshold must be between 0.0 and 1.0",
479 "min_score_threshold",
480 ));
481 }
482 Ok(())
483 }
484}
485
486#[derive(Debug, Clone, Serialize, Deserialize, Hash)]
488pub struct CustomScoringRule {
489 pub name: String,
491
492 pub pattern: String,
494
495 pub modifier: ScoreModifier,
497}
498
499#[derive(Debug, Clone, Serialize, Deserialize)]
501pub enum ScoreModifier {
502 Add(f64),
504 Multiply(f64),
506 Set(f64),
508 ConditionalBonus { condition: String, bonus: f64 },
510}
511
512impl Hash for ScoreModifier {
514 fn hash<H: Hasher>(&self, state: &mut H) {
515 match self {
516 ScoreModifier::Add(value) => {
517 0u8.hash(state); value.to_bits().hash(state);
519 }
520 ScoreModifier::Multiply(value) => {
521 1u8.hash(state); value.to_bits().hash(state);
523 }
524 ScoreModifier::Set(value) => {
525 2u8.hash(state); value.to_bits().hash(state);
527 }
528 ScoreModifier::ConditionalBonus { condition, bonus } => {
529 3u8.hash(state); condition.hash(state);
531 bonus.to_bits().hash(state);
532 }
533 }
534 }
535}
536
537#[derive(Debug, Clone, Serialize, Deserialize, Hash)]
539pub struct PerformanceConfig {
540 pub max_memory_mb: usize,
542
543 pub analysis_timeout: u64,
545
546 pub global_timeout: u64,
548
549 pub batch_size: usize,
551
552 pub use_mmap: bool,
554
555 pub io_buffer_size: usize,
557}
558
559impl Default for PerformanceConfig {
560 fn default() -> Self {
561 Self {
562 max_memory_mb: 0, analysis_timeout: 30,
564 global_timeout: 300, batch_size: 100,
566 use_mmap: false,
567 io_buffer_size: 64 * 1024, }
569 }
570}
571
572impl PerformanceConfig {
573 fn validate(&self) -> Result<()> {
574 if self.analysis_timeout == 0 {
575 return Err(ScribeError::config_field(
576 "analysis_timeout must be > 0",
577 "analysis_timeout",
578 ));
579 }
580 if self.global_timeout == 0 {
581 return Err(ScribeError::config_field(
582 "global_timeout must be > 0",
583 "global_timeout",
584 ));
585 }
586 if self.batch_size == 0 {
587 return Err(ScribeError::config_field(
588 "batch_size must be > 0",
589 "batch_size",
590 ));
591 }
592 Ok(())
593 }
594
595 pub fn analysis_timeout_duration(&self) -> Duration {
597 Duration::from_secs(self.analysis_timeout)
598 }
599
600 pub fn global_timeout_duration(&self) -> Duration {
602 Duration::from_secs(self.global_timeout)
603 }
604}
605
606#[derive(Debug, Clone, Serialize, Deserialize, Hash)]
608pub struct GitConfig {
609 pub enabled: bool,
611
612 pub respect_gitignore: bool,
614
615 pub include_status: bool,
617
618 pub analyze_history: bool,
620
621 pub history_depth: usize,
623
624 pub include_untracked: bool,
626
627 pub git_timeout: u64,
629}
630
631impl Default for GitConfig {
632 fn default() -> Self {
633 Self {
634 enabled: true,
635 respect_gitignore: true,
636 include_status: true,
637 analyze_history: false,
638 history_depth: 100,
639 include_untracked: false,
640 git_timeout: 30,
641 }
642 }
643}
644
645impl GitConfig {
646 fn validate(&self) -> Result<()> {
647 if self.git_timeout == 0 {
648 return Err(ScribeError::config_field(
649 "git_timeout must be > 0",
650 "git_timeout",
651 ));
652 }
653 Ok(())
654 }
655}
656
657#[derive(Debug, Clone, Serialize, Deserialize, Hash)]
659pub struct FeatureFlags {
660 pub centrality_enabled: bool,
662
663 pub entrypoint_detection: bool,
665
666 pub examples_analysis: bool,
668
669 pub semantic_analysis: bool,
671
672 pub ml_features: bool,
674
675 pub experimental_scoring: bool,
677
678 pub scaling_enabled: bool,
680
681 pub auto_exclude_tests: bool,
683}
684
685impl Default for FeatureFlags {
686 fn default() -> Self {
687 Self {
688 centrality_enabled: false,
689 entrypoint_detection: false,
690 examples_analysis: false,
691 semantic_analysis: false,
692 ml_features: false,
693 experimental_scoring: false,
694 scaling_enabled: false,
695 auto_exclude_tests: false,
696 }
697 }
698}
699
700impl FeatureFlags {
701 fn validate(&self) -> Result<()> {
702 Ok(())
704 }
705
706 pub fn has_v2_features(&self) -> bool {
708 self.centrality_enabled || self.entrypoint_detection || self.examples_analysis
709 }
710
711 pub fn enabled_features(&self) -> Vec<&'static str> {
713 let mut features = Vec::new();
714
715 if self.centrality_enabled {
716 features.push("centrality");
717 }
718 if self.entrypoint_detection {
719 features.push("entrypoint_detection");
720 }
721 if self.examples_analysis {
722 features.push("examples_analysis");
723 }
724 if self.semantic_analysis {
725 features.push("semantic_analysis");
726 }
727 if self.ml_features {
728 features.push("ml_features");
729 }
730 if self.experimental_scoring {
731 features.push("experimental_scoring");
732 }
733 if self.scaling_enabled {
734 features.push("scaling");
735 }
736
737 features
738 }
739}
740
741#[derive(Debug, Clone, Serialize, Deserialize, Hash)]
743pub struct OutputConfig {
744 pub format: OutputFormat,
746
747 pub include_content: bool,
749
750 pub include_score_breakdown: bool,
752
753 pub include_repo_stats: bool,
755
756 pub sort_by_score: bool,
758
759 pub pretty_json: bool,
761
762 pub custom_fields: Vec<String>,
764}
765
766#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Hash)]
768pub enum OutputFormat {
769 Json,
770 JsonLines,
771 Csv,
772 Table,
773 Summary,
774}
775
776impl Default for OutputConfig {
777 fn default() -> Self {
778 Self {
779 format: OutputFormat::Json,
780 include_content: false,
781 include_score_breakdown: true,
782 include_repo_stats: true,
783 sort_by_score: true,
784 pretty_json: true,
785 custom_fields: vec![],
786 }
787 }
788}
789
790impl OutputConfig {
791 fn validate(&self) -> Result<()> {
792 Ok(())
794 }
795}
796
797#[cfg(test)]
798mod tests {
799 use super::*;
800 use tempfile::NamedTempFile;
801
802 #[test]
803 fn test_config_defaults() {
804 let config = Config::default();
805 assert_eq!(config.general.verbosity, 1);
806 assert!(config.filtering.respect_gitignore);
807 assert!(config.git.enabled);
808 assert!(!config.features.centrality_enabled);
809 }
810
811 #[test]
812 fn test_config_validation() {
813 let mut config = Config::default();
814 assert!(config.validate().is_ok());
815
816 config.general.verbosity = 10;
818 assert!(config.validate().is_err());
819
820 config = Config::default();
822 config.filtering.max_file_size = 100;
823 config.filtering.min_file_size = 200;
824 assert!(config.validate().is_err());
825 }
826
827 #[test]
828 fn test_config_file_io() {
829 let config = Config::default();
830 let temp_file = NamedTempFile::new().unwrap();
831
832 let json_path = temp_file.path().with_extension("json");
834 config.save_to_file(&json_path).unwrap();
835 let loaded_config = Config::load_from_file(&json_path).unwrap();
836
837 assert_eq!(config.general.verbosity, loaded_config.general.verbosity);
838 }
839
840 #[test]
841 fn test_filtering_patterns() {
842 let mut config = FilteringConfig::default();
843 config.include_patterns.push("*.rs".to_string());
844 config.exclude_patterns.push("target/**".to_string());
845
846 assert!(config.validate().is_ok());
847
848 let include_set = config.build_include_set().unwrap();
849 assert!(include_set.is_some());
850
851 let exclude_set = config.build_exclude_set().unwrap();
852 assert!(exclude_set.is_match("target/debug/file.o"));
853 }
854
855 #[test]
856 fn test_feature_flags() {
857 let mut flags = FeatureFlags::default();
858 assert!(!flags.has_v2_features());
859 assert!(flags.enabled_features().is_empty());
860
861 flags.centrality_enabled = true;
862 flags.entrypoint_detection = true;
863
864 assert!(flags.has_v2_features());
865 let enabled = flags.enabled_features();
866 assert!(enabled.contains(&"centrality"));
867 assert!(enabled.contains(&"entrypoint_detection"));
868 }
869
870 #[test]
871 fn test_performance_config_timeouts() {
872 let config = PerformanceConfig::default();
873 assert_eq!(config.analysis_timeout_duration(), Duration::from_secs(30));
874 assert_eq!(config.global_timeout_duration(), Duration::from_secs(300));
875 }
876
877 #[test]
878 fn test_config_hash() {
879 let config1 = Config::default();
880 let config2 = Config::default();
881
882 let hash1 = config1.compute_hash();
883 let hash2 = config2.compute_hash();
884
885 assert_eq!(hash1, hash2);
886
887 let mut config3 = Config::default();
888 config3.general.verbosity = 2;
889 let hash3 = config3.compute_hash();
890
891 assert_ne!(hash1, hash3);
892 }
893}