1use std::collections::{HashMap, HashSet};
7use std::path::PathBuf;
8use std::time::Duration;
9use serde::{Deserialize, Serialize};
10use globset::{Glob, GlobSet, GlobSetBuilder};
11
12use crate::error::{Result, ScribeError};
13use crate::types::HeuristicWeights;
14use crate::file::Language;
15
16#[derive(Debug, Clone, Serialize, Deserialize)]
18pub struct Config {
19 pub general: GeneralConfig,
21
22 pub filtering: FilteringConfig,
24
25 pub analysis: AnalysisConfig,
27
28 pub scoring: ScoringConfig,
30
31 pub performance: PerformanceConfig,
33
34 pub git: GitConfig,
36
37 pub features: FeatureFlags,
39
40 pub output: OutputConfig,
42}
43
44impl Default for Config {
45 fn default() -> Self {
46 Self {
47 general: GeneralConfig::default(),
48 filtering: FilteringConfig::default(),
49 analysis: AnalysisConfig::default(),
50 scoring: ScoringConfig::default(),
51 performance: PerformanceConfig::default(),
52 git: GitConfig::default(),
53 features: FeatureFlags::default(),
54 output: OutputConfig::default(),
55 }
56 }
57}
58
59impl Config {
60 pub fn load_from_file<P: AsRef<std::path::Path>>(path: P) -> Result<Self> {
62 let content = std::fs::read_to_string(path.as_ref())
63 .map_err(|e| ScribeError::path_with_source(
64 "Failed to read config file",
65 path.as_ref(),
66 e
67 ))?;
68
69 let config: Config = match path.as_ref().extension().and_then(|s| s.to_str()) {
70 Some("json") => serde_json::from_str(&content)?,
71 Some("yaml") | Some("yml") => {
72 return Err(ScribeError::config("YAML support not yet implemented"));
73 }
74 Some("toml") => {
75 return Err(ScribeError::config("TOML support not yet implemented"));
76 }
77 _ => {
78 return Err(ScribeError::config(
79 "Unsupported config file format. Use .json, .yaml, or .toml"
80 ));
81 }
82 };
83
84 config.validate()?;
85 Ok(config)
86 }
87
88 pub fn save_to_file<P: AsRef<std::path::Path>>(&self, path: P) -> Result<()> {
90 let content = match path.as_ref().extension().and_then(|s| s.to_str()) {
91 Some("json") => serde_json::to_string_pretty(self)?,
92 Some("yaml") | Some("yml") => {
93 return Err(ScribeError::config("YAML support not yet implemented"));
94 }
95 Some("toml") => {
96 return Err(ScribeError::config("TOML support not yet implemented"));
97 }
98 _ => {
99 return Err(ScribeError::config(
100 "Unsupported config file format. Use .json, .yaml, or .toml"
101 ));
102 }
103 };
104
105 std::fs::write(path.as_ref(), content)
106 .map_err(|e| ScribeError::path_with_source(
107 "Failed to write config file",
108 path.as_ref(),
109 e
110 ))?;
111
112 Ok(())
113 }
114
115 pub fn validate(&self) -> Result<()> {
117 self.general.validate()?;
118 self.filtering.validate()?;
119 self.analysis.validate()?;
120 self.scoring.validate()?;
121 self.performance.validate()?;
122 self.git.validate()?;
123 self.features.validate()?;
124 self.output.validate()?;
125 Ok(())
126 }
127
128 pub fn merge_with(mut self, other: Config) -> Self {
130 self.general = other.general;
133 self.filtering = other.filtering;
134 self.analysis = other.analysis;
135 self.scoring = other.scoring;
136 self.performance = other.performance;
137 self.git = other.git;
138 self.features = other.features;
139 self.output = other.output;
140 self
141 }
142
143 pub fn compute_hash(&self) -> String {
145 use std::collections::hash_map::DefaultHasher;
146 use std::hash::{Hash, Hasher};
147
148 let serialized = serde_json::to_string(self).unwrap_or_default();
149 let mut hasher = DefaultHasher::new();
150 serialized.hash(&mut hasher);
151 format!("{:x}", hasher.finish())
152 }
153}
154
155#[derive(Debug, Clone, Serialize, Deserialize)]
157pub struct GeneralConfig {
158 pub verbosity: u8,
160
161 pub show_progress: bool,
163
164 pub use_colors: bool,
166
167 pub max_threads: usize,
169
170 pub working_dir: Option<PathBuf>,
172}
173
174impl Default for GeneralConfig {
175 fn default() -> Self {
176 Self {
177 verbosity: 1,
178 show_progress: true,
179 use_colors: true,
180 max_threads: 0, working_dir: None,
182 }
183 }
184}
185
186impl GeneralConfig {
187 fn validate(&self) -> Result<()> {
188 if self.verbosity > 4 {
189 return Err(ScribeError::config_field(
190 "Verbosity must be between 0 and 4",
191 "verbosity"
192 ));
193 }
194 Ok(())
195 }
196}
197
198#[derive(Debug, Clone, Serialize, Deserialize)]
200pub struct FilteringConfig {
201 pub include_patterns: Vec<String>,
203
204 pub exclude_patterns: Vec<String>,
206
207 pub max_file_size: u64,
209
210 pub min_file_size: u64,
212
213 pub include_languages: HashSet<Language>,
215
216 pub exclude_languages: HashSet<Language>,
218
219 pub follow_symlinks: bool,
221
222 pub include_hidden: bool,
224
225 pub respect_gitignore: bool,
227
228 pub ignore_files: Vec<PathBuf>,
230}
231
232impl Default for FilteringConfig {
233 fn default() -> Self {
234 Self {
235 include_patterns: vec![],
236 exclude_patterns: vec![
237 "node_modules/**".to_string(),
238 "target/**".to_string(),
239 ".git/**".to_string(),
240 "build/**".to_string(),
241 "dist/**".to_string(),
242 "__pycache__/**".to_string(),
243 "*.pyc".to_string(),
244 ".DS_Store".to_string(),
245 ],
246 max_file_size: 10 * 1024 * 1024, min_file_size: 0,
248 include_languages: HashSet::new(), exclude_languages: HashSet::new(),
250 follow_symlinks: false,
251 include_hidden: false,
252 respect_gitignore: true,
253 ignore_files: vec![],
254 }
255 }
256}
257
258impl FilteringConfig {
259 fn validate(&self) -> Result<()> {
260 if self.max_file_size < self.min_file_size {
261 return Err(ScribeError::config(
262 "max_file_size must be >= min_file_size"
263 ));
264 }
265
266 for pattern in &self.include_patterns {
268 Glob::new(pattern).map_err(|e| ScribeError::pattern(
269 format!("Invalid include pattern: {}", e),
270 pattern
271 ))?;
272 }
273
274 for pattern in &self.exclude_patterns {
275 Glob::new(pattern).map_err(|e| ScribeError::pattern(
276 format!("Invalid exclude pattern: {}", e),
277 pattern
278 ))?;
279 }
280
281 Ok(())
282 }
283
284 pub fn build_include_set(&self) -> Result<Option<GlobSet>> {
286 if self.include_patterns.is_empty() {
287 return Ok(None);
288 }
289
290 let mut builder = GlobSetBuilder::new();
291 for pattern in &self.include_patterns {
292 builder.add(Glob::new(pattern)?);
293 }
294 Ok(Some(builder.build()?))
295 }
296
297 pub fn build_exclude_set(&self) -> Result<GlobSet> {
299 let mut builder = GlobSetBuilder::new();
300 for pattern in &self.exclude_patterns {
301 builder.add(Glob::new(pattern)?);
302 }
303 Ok(builder.build()?)
304 }
305}
306
307#[derive(Debug, Clone, Serialize, Deserialize)]
309pub struct AnalysisConfig {
310 pub analyze_content: bool,
312
313 pub compute_tokens: bool,
315
316 pub count_lines: bool,
318
319 pub detect_binary_content: bool,
321
322 pub language_overrides: HashMap<String, Language>,
324
325 pub custom_extensions: HashMap<String, Language>,
327
328 pub enable_caching: bool,
330
331 pub cache_dir: PathBuf,
333
334 pub cache_ttl: u64,
336}
337
338impl Default for AnalysisConfig {
339 fn default() -> Self {
340 Self {
341 analyze_content: true,
342 compute_tokens: true,
343 count_lines: true,
344 detect_binary_content: false,
345 language_overrides: HashMap::new(),
346 custom_extensions: HashMap::new(),
347 enable_caching: false,
348 cache_dir: PathBuf::from(".scribe-cache"),
349 cache_ttl: 3600, }
351 }
352}
353
354impl AnalysisConfig {
355 fn validate(&self) -> Result<()> {
356 if self.cache_ttl == 0 {
357 return Err(ScribeError::config_field(
358 "cache_ttl must be > 0",
359 "cache_ttl"
360 ));
361 }
362 Ok(())
363 }
364}
365
366#[derive(Debug, Clone, Serialize, Deserialize)]
368pub struct ScoringConfig {
369 pub weights: HeuristicWeights,
371
372 pub enable_advanced: bool,
374
375 pub custom_rules: Vec<CustomScoringRule>,
377
378 pub min_score_threshold: f64,
380
381 pub max_results: usize,
383
384 pub normalize_scores: bool,
386}
387
388impl Default for ScoringConfig {
389 fn default() -> Self {
390 Self {
391 weights: HeuristicWeights::default(),
392 enable_advanced: false,
393 custom_rules: vec![],
394 min_score_threshold: 0.0,
395 max_results: 0, normalize_scores: true,
397 }
398 }
399}
400
401impl ScoringConfig {
402 fn validate(&self) -> Result<()> {
403 if self.min_score_threshold < 0.0 || self.min_score_threshold > 1.0 {
404 return Err(ScribeError::config_field(
405 "min_score_threshold must be between 0.0 and 1.0",
406 "min_score_threshold"
407 ));
408 }
409 Ok(())
410 }
411}
412
413#[derive(Debug, Clone, Serialize, Deserialize)]
415pub struct CustomScoringRule {
416 pub name: String,
418
419 pub pattern: String,
421
422 pub modifier: ScoreModifier,
424}
425
426#[derive(Debug, Clone, Serialize, Deserialize)]
428pub enum ScoreModifier {
429 Add(f64),
431 Multiply(f64),
433 Set(f64),
435 ConditionalBonus { condition: String, bonus: f64 },
437}
438
439#[derive(Debug, Clone, Serialize, Deserialize)]
441pub struct PerformanceConfig {
442 pub max_memory_mb: usize,
444
445 pub analysis_timeout: u64,
447
448 pub global_timeout: u64,
450
451 pub batch_size: usize,
453
454 pub use_mmap: bool,
456
457 pub io_buffer_size: usize,
459}
460
461impl Default for PerformanceConfig {
462 fn default() -> Self {
463 Self {
464 max_memory_mb: 0, analysis_timeout: 30,
466 global_timeout: 300, batch_size: 100,
468 use_mmap: false,
469 io_buffer_size: 64 * 1024, }
471 }
472}
473
474impl PerformanceConfig {
475 fn validate(&self) -> Result<()> {
476 if self.analysis_timeout == 0 {
477 return Err(ScribeError::config_field(
478 "analysis_timeout must be > 0",
479 "analysis_timeout"
480 ));
481 }
482 if self.global_timeout == 0 {
483 return Err(ScribeError::config_field(
484 "global_timeout must be > 0",
485 "global_timeout"
486 ));
487 }
488 if self.batch_size == 0 {
489 return Err(ScribeError::config_field(
490 "batch_size must be > 0",
491 "batch_size"
492 ));
493 }
494 Ok(())
495 }
496
497 pub fn analysis_timeout_duration(&self) -> Duration {
499 Duration::from_secs(self.analysis_timeout)
500 }
501
502 pub fn global_timeout_duration(&self) -> Duration {
504 Duration::from_secs(self.global_timeout)
505 }
506}
507
508#[derive(Debug, Clone, Serialize, Deserialize)]
510pub struct GitConfig {
511 pub enabled: bool,
513
514 pub respect_gitignore: bool,
516
517 pub include_status: bool,
519
520 pub analyze_history: bool,
522
523 pub history_depth: usize,
525
526 pub include_untracked: bool,
528
529 pub git_timeout: u64,
531}
532
533impl Default for GitConfig {
534 fn default() -> Self {
535 Self {
536 enabled: true,
537 respect_gitignore: true,
538 include_status: true,
539 analyze_history: false,
540 history_depth: 100,
541 include_untracked: false,
542 git_timeout: 30,
543 }
544 }
545}
546
547impl GitConfig {
548 fn validate(&self) -> Result<()> {
549 if self.git_timeout == 0 {
550 return Err(ScribeError::config_field(
551 "git_timeout must be > 0",
552 "git_timeout"
553 ));
554 }
555 Ok(())
556 }
557}
558
559#[derive(Debug, Clone, Serialize, Deserialize)]
561pub struct FeatureFlags {
562 pub centrality_enabled: bool,
564
565 pub entrypoint_detection: bool,
567
568 pub examples_analysis: bool,
570
571 pub semantic_analysis: bool,
573
574 pub ml_features: bool,
576
577 pub experimental_scoring: bool,
579}
580
581impl Default for FeatureFlags {
582 fn default() -> Self {
583 Self {
584 centrality_enabled: false,
585 entrypoint_detection: false,
586 examples_analysis: false,
587 semantic_analysis: false,
588 ml_features: false,
589 experimental_scoring: false,
590 }
591 }
592}
593
594impl FeatureFlags {
595 fn validate(&self) -> Result<()> {
596 Ok(())
598 }
599
600 pub fn has_v2_features(&self) -> bool {
602 self.centrality_enabled || self.entrypoint_detection || self.examples_analysis
603 }
604
605 pub fn enabled_features(&self) -> Vec<&'static str> {
607 let mut features = Vec::new();
608
609 if self.centrality_enabled {
610 features.push("centrality");
611 }
612 if self.entrypoint_detection {
613 features.push("entrypoint_detection");
614 }
615 if self.examples_analysis {
616 features.push("examples_analysis");
617 }
618 if self.semantic_analysis {
619 features.push("semantic_analysis");
620 }
621 if self.ml_features {
622 features.push("ml_features");
623 }
624 if self.experimental_scoring {
625 features.push("experimental_scoring");
626 }
627
628 features
629 }
630}
631
632#[derive(Debug, Clone, Serialize, Deserialize)]
634pub struct OutputConfig {
635 pub format: OutputFormat,
637
638 pub include_content: bool,
640
641 pub include_score_breakdown: bool,
643
644 pub include_repo_stats: bool,
646
647 pub sort_by_score: bool,
649
650 pub pretty_json: bool,
652
653 pub custom_fields: Vec<String>,
655}
656
657#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
659pub enum OutputFormat {
660 Json,
661 JsonLines,
662 Csv,
663 Table,
664 Summary,
665}
666
667impl Default for OutputConfig {
668 fn default() -> Self {
669 Self {
670 format: OutputFormat::Json,
671 include_content: false,
672 include_score_breakdown: true,
673 include_repo_stats: true,
674 sort_by_score: true,
675 pretty_json: true,
676 custom_fields: vec![],
677 }
678 }
679}
680
681impl OutputConfig {
682 fn validate(&self) -> Result<()> {
683 Ok(())
685 }
686}
687
688#[cfg(test)]
689mod tests {
690 use super::*;
691 use tempfile::NamedTempFile;
692
693 #[test]
694 fn test_config_defaults() {
695 let config = Config::default();
696 assert_eq!(config.general.verbosity, 1);
697 assert!(config.filtering.respect_gitignore);
698 assert!(config.git.enabled);
699 assert!(!config.features.centrality_enabled);
700 }
701
702 #[test]
703 fn test_config_validation() {
704 let mut config = Config::default();
705 assert!(config.validate().is_ok());
706
707 config.general.verbosity = 10;
709 assert!(config.validate().is_err());
710
711 config = Config::default();
713 config.filtering.max_file_size = 100;
714 config.filtering.min_file_size = 200;
715 assert!(config.validate().is_err());
716 }
717
718 #[test]
719 fn test_config_file_io() {
720 let config = Config::default();
721 let temp_file = NamedTempFile::new().unwrap();
722
723 let json_path = temp_file.path().with_extension("json");
725 config.save_to_file(&json_path).unwrap();
726 let loaded_config = Config::load_from_file(&json_path).unwrap();
727
728 assert_eq!(config.general.verbosity, loaded_config.general.verbosity);
729 }
730
731 #[test]
732 fn test_filtering_patterns() {
733 let mut config = FilteringConfig::default();
734 config.include_patterns.push("*.rs".to_string());
735 config.exclude_patterns.push("target/**".to_string());
736
737 assert!(config.validate().is_ok());
738
739 let include_set = config.build_include_set().unwrap();
740 assert!(include_set.is_some());
741
742 let exclude_set = config.build_exclude_set().unwrap();
743 assert!(exclude_set.is_match("target/debug/file.o"));
744 }
745
746 #[test]
747 fn test_feature_flags() {
748 let mut flags = FeatureFlags::default();
749 assert!(!flags.has_v2_features());
750 assert!(flags.enabled_features().is_empty());
751
752 flags.centrality_enabled = true;
753 flags.entrypoint_detection = true;
754
755 assert!(flags.has_v2_features());
756 let enabled = flags.enabled_features();
757 assert!(enabled.contains(&"centrality"));
758 assert!(enabled.contains(&"entrypoint_detection"));
759 }
760
761 #[test]
762 fn test_performance_config_timeouts() {
763 let config = PerformanceConfig::default();
764 assert_eq!(config.analysis_timeout_duration(), Duration::from_secs(30));
765 assert_eq!(config.global_timeout_duration(), Duration::from_secs(300));
766 }
767
768 #[test]
769 fn test_config_hash() {
770 let config1 = Config::default();
771 let config2 = Config::default();
772
773 let hash1 = config1.compute_hash();
774 let hash2 = config2.compute_hash();
775
776 assert_eq!(hash1, hash2);
777
778 let mut config3 = Config::default();
779 config3.general.verbosity = 2;
780 let hash3 = config3.compute_hash();
781
782 assert_ne!(hash1, hash3);
783 }
784}