use globset::{Glob, GlobSet, GlobSetBuilder};
use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet};
use std::hash::{Hash, Hasher};
use std::path::PathBuf;
use std::time::Duration;
use crate::error::{Result, ScribeError};
use crate::file::Language;
use crate::types::HeuristicWeights;
#[derive(Debug, Clone, Serialize, Deserialize, Hash)]
pub struct Config {
pub general: GeneralConfig,
pub filtering: FilteringConfig,
pub analysis: AnalysisConfig,
pub scoring: ScoringConfig,
pub performance: PerformanceConfig,
pub git: GitConfig,
pub features: FeatureFlags,
pub output: OutputConfig,
}
impl Default for Config {
fn default() -> Self {
Self {
general: GeneralConfig::default(),
filtering: FilteringConfig::default(),
analysis: AnalysisConfig::default(),
scoring: ScoringConfig::default(),
performance: PerformanceConfig::default(),
git: GitConfig::default(),
features: FeatureFlags::default(),
output: OutputConfig::default(),
}
}
}
impl Config {
pub fn load_from_file<P: AsRef<std::path::Path>>(path: P) -> Result<Self> {
let content = std::fs::read_to_string(path.as_ref()).map_err(|e| {
ScribeError::path_with_source("Failed to read config file", path.as_ref(), e)
})?;
let config: Config = match path.as_ref().extension().and_then(|s| s.to_str()) {
Some("json") => serde_json::from_str(&content)?,
Some("yaml") | Some("yml") => {
return Err(ScribeError::config("YAML support not yet implemented"));
}
Some("toml") => {
return Err(ScribeError::config("TOML support not yet implemented"));
}
_ => {
return Err(ScribeError::config(
"Unsupported config file format. Use .json, .yaml, or .toml",
));
}
};
config.validate()?;
Ok(config)
}
pub fn save_to_file<P: AsRef<std::path::Path>>(&self, path: P) -> Result<()> {
let content = match path.as_ref().extension().and_then(|s| s.to_str()) {
Some("json") => serde_json::to_string_pretty(self)?,
Some("yaml") | Some("yml") => {
return Err(ScribeError::config("YAML support not yet implemented"));
}
Some("toml") => {
return Err(ScribeError::config("TOML support not yet implemented"));
}
_ => {
return Err(ScribeError::config(
"Unsupported config file format. Use .json, .yaml, or .toml",
));
}
};
std::fs::write(path.as_ref(), content).map_err(|e| {
ScribeError::path_with_source("Failed to write config file", path.as_ref(), e)
})?;
Ok(())
}
pub fn validate(&self) -> Result<()> {
self.general.validate()?;
self.filtering.validate()?;
self.analysis.validate()?;
self.scoring.validate()?;
self.performance.validate()?;
self.git.validate()?;
self.features.validate()?;
self.output.validate()?;
Ok(())
}
pub fn merge_with(mut self, other: Config) -> Self {
self.general = other.general;
self.filtering = other.filtering;
self.analysis = other.analysis;
self.scoring = other.scoring;
self.performance = other.performance;
self.git = other.git;
self.features = other.features;
self.output = other.output;
self
}
pub fn compute_hash(&self) -> String {
use std::collections::hash_map::DefaultHasher;
let mut hasher = DefaultHasher::new();
self.hash(&mut hasher);
format!("{:x}", hasher.finish())
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GeneralConfig {
pub verbosity: u8,
pub show_progress: bool,
pub use_colors: bool,
pub max_threads: usize,
pub working_dir: Option<PathBuf>,
}
impl Hash for GeneralConfig {
fn hash<H: Hasher>(&self, state: &mut H) {
self.verbosity.hash(state);
self.show_progress.hash(state);
self.use_colors.hash(state);
self.max_threads.hash(state);
if let Some(ref path) = self.working_dir {
path.to_string_lossy().hash(state);
} else {
None::<String>.hash(state);
}
}
}
impl Default for GeneralConfig {
fn default() -> Self {
Self {
verbosity: 1,
show_progress: true,
use_colors: true,
max_threads: 0, working_dir: None,
}
}
}
impl GeneralConfig {
fn validate(&self) -> Result<()> {
if self.verbosity > 4 {
return Err(ScribeError::config_field(
"Verbosity must be between 0 and 4",
"verbosity",
));
}
Ok(())
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FilteringConfig {
pub include_patterns: Vec<String>,
pub exclude_patterns: Vec<String>,
pub max_file_size: u64,
pub min_file_size: u64,
pub include_languages: HashSet<Language>,
pub exclude_languages: HashSet<Language>,
pub follow_symlinks: bool,
pub include_hidden: bool,
pub respect_gitignore: bool,
pub ignore_files: Vec<PathBuf>,
}
impl Hash for FilteringConfig {
fn hash<H: Hasher>(&self, state: &mut H) {
self.include_patterns.hash(state);
self.exclude_patterns.hash(state);
self.max_file_size.hash(state);
self.min_file_size.hash(state);
let mut include_langs: Vec<_> = self.include_languages.iter().collect();
include_langs.sort();
include_langs.hash(state);
let mut exclude_langs: Vec<_> = self.exclude_languages.iter().collect();
exclude_langs.sort();
exclude_langs.hash(state);
self.follow_symlinks.hash(state);
self.include_hidden.hash(state);
self.respect_gitignore.hash(state);
for path in &self.ignore_files {
path.to_string_lossy().hash(state);
}
}
}
impl Default for FilteringConfig {
fn default() -> Self {
Self {
include_patterns: vec![],
exclude_patterns: vec![
"node_modules/**".to_string(),
"target/**".to_string(),
".git/**".to_string(),
"build/**".to_string(),
"dist/**".to_string(),
"__pycache__/**".to_string(),
"*.pyc".to_string(),
".DS_Store".to_string(),
],
max_file_size: 10 * 1024 * 1024, min_file_size: 0,
include_languages: HashSet::new(), exclude_languages: HashSet::new(),
follow_symlinks: false,
include_hidden: false,
respect_gitignore: true,
ignore_files: vec![],
}
}
}
impl FilteringConfig {
fn validate(&self) -> Result<()> {
if self.max_file_size < self.min_file_size {
return Err(ScribeError::config(
"max_file_size must be >= min_file_size",
));
}
for pattern in &self.include_patterns {
Glob::new(pattern).map_err(|e| {
ScribeError::pattern(format!("Invalid include pattern: {}", e), pattern)
})?;
}
for pattern in &self.exclude_patterns {
Glob::new(pattern).map_err(|e| {
ScribeError::pattern(format!("Invalid exclude pattern: {}", e), pattern)
})?;
}
Ok(())
}
pub fn build_include_set(&self) -> Result<Option<GlobSet>> {
if self.include_patterns.is_empty() {
return Ok(None);
}
let mut builder = GlobSetBuilder::new();
for pattern in &self.include_patterns {
builder.add(Glob::new(pattern)?);
}
Ok(Some(builder.build()?))
}
pub fn build_exclude_set(&self) -> Result<GlobSet> {
let mut builder = GlobSetBuilder::new();
for pattern in &self.exclude_patterns {
builder.add(Glob::new(pattern)?);
}
Ok(builder.build()?)
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AnalysisConfig {
pub analyze_content: bool,
pub compute_tokens: bool,
pub count_lines: bool,
pub detect_binary_content: bool,
pub language_overrides: HashMap<String, Language>,
pub custom_extensions: HashMap<String, Language>,
pub enable_caching: bool,
pub cache_dir: PathBuf,
pub cache_ttl: u64,
pub token_budget: Option<usize>,
}
impl Hash for AnalysisConfig {
fn hash<H: Hasher>(&self, state: &mut H) {
self.analyze_content.hash(state);
self.compute_tokens.hash(state);
self.count_lines.hash(state);
self.detect_binary_content.hash(state);
let mut lang_overrides: Vec<_> = self.language_overrides.iter().collect();
lang_overrides.sort_by_key(|(k, _)| *k);
lang_overrides.hash(state);
let mut custom_exts: Vec<_> = self.custom_extensions.iter().collect();
custom_exts.sort_by_key(|(k, _)| *k);
custom_exts.hash(state);
self.enable_caching.hash(state);
self.cache_dir.to_string_lossy().hash(state);
self.cache_ttl.hash(state);
self.token_budget.hash(state);
}
}
impl Default for AnalysisConfig {
fn default() -> Self {
Self {
analyze_content: true,
compute_tokens: true,
count_lines: true,
detect_binary_content: false,
language_overrides: HashMap::new(),
custom_extensions: HashMap::new(),
enable_caching: false,
cache_dir: PathBuf::from(".scribe-cache"),
cache_ttl: 3600, token_budget: None,
}
}
}
impl AnalysisConfig {
fn validate(&self) -> Result<()> {
if self.cache_ttl == 0 {
return Err(ScribeError::config_field(
"cache_ttl must be > 0",
"cache_ttl",
));
}
Ok(())
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ScoringConfig {
pub weights: HeuristicWeights,
pub custom_rules: Vec<CustomScoringRule>,
pub min_score_threshold: f64,
pub max_results: usize,
pub normalize_scores: bool,
}
impl Hash for ScoringConfig {
fn hash<H: Hasher>(&self, state: &mut H) {
self.weights.hash(state);
self.custom_rules.hash(state);
self.min_score_threshold.to_bits().hash(state);
self.max_results.hash(state);
self.normalize_scores.hash(state);
}
}
impl Default for ScoringConfig {
fn default() -> Self {
Self {
weights: HeuristicWeights::default(),
custom_rules: vec![],
min_score_threshold: 0.0,
max_results: 0, normalize_scores: true,
}
}
}
impl ScoringConfig {
fn validate(&self) -> Result<()> {
if self.min_score_threshold < 0.0 || self.min_score_threshold > 1.0 {
return Err(ScribeError::config_field(
"min_score_threshold must be between 0.0 and 1.0",
"min_score_threshold",
));
}
Ok(())
}
}
#[derive(Debug, Clone, Serialize, Deserialize, Hash)]
pub struct CustomScoringRule {
pub name: String,
pub pattern: String,
pub modifier: ScoreModifier,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum ScoreModifier {
Add(f64),
Multiply(f64),
Set(f64),
ConditionalBonus { condition: String, bonus: f64 },
}
impl Hash for ScoreModifier {
fn hash<H: Hasher>(&self, state: &mut H) {
match self {
ScoreModifier::Add(value) => {
0u8.hash(state); value.to_bits().hash(state);
}
ScoreModifier::Multiply(value) => {
1u8.hash(state); value.to_bits().hash(state);
}
ScoreModifier::Set(value) => {
2u8.hash(state); value.to_bits().hash(state);
}
ScoreModifier::ConditionalBonus { condition, bonus } => {
3u8.hash(state); condition.hash(state);
bonus.to_bits().hash(state);
}
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize, Hash)]
pub struct PerformanceConfig {
pub max_memory_mb: usize,
pub analysis_timeout: u64,
pub global_timeout: u64,
pub batch_size: usize,
pub use_mmap: bool,
pub io_buffer_size: usize,
}
impl Default for PerformanceConfig {
fn default() -> Self {
Self {
max_memory_mb: 0, analysis_timeout: 30,
global_timeout: 300, batch_size: 100,
use_mmap: false,
io_buffer_size: 64 * 1024, }
}
}
impl PerformanceConfig {
fn validate(&self) -> Result<()> {
if self.analysis_timeout == 0 {
return Err(ScribeError::config_field(
"analysis_timeout must be > 0",
"analysis_timeout",
));
}
if self.global_timeout == 0 {
return Err(ScribeError::config_field(
"global_timeout must be > 0",
"global_timeout",
));
}
if self.batch_size == 0 {
return Err(ScribeError::config_field(
"batch_size must be > 0",
"batch_size",
));
}
Ok(())
}
pub fn analysis_timeout_duration(&self) -> Duration {
Duration::from_secs(self.analysis_timeout)
}
pub fn global_timeout_duration(&self) -> Duration {
Duration::from_secs(self.global_timeout)
}
}
#[derive(Debug, Clone, Serialize, Deserialize, Hash)]
pub struct GitConfig {
pub enabled: bool,
pub respect_gitignore: bool,
pub include_status: bool,
pub analyze_history: bool,
pub history_depth: usize,
pub include_untracked: bool,
pub git_timeout: u64,
}
impl Default for GitConfig {
fn default() -> Self {
Self {
enabled: true,
respect_gitignore: true,
include_status: true,
analyze_history: false,
history_depth: 100,
include_untracked: false,
git_timeout: 30,
}
}
}
impl GitConfig {
fn validate(&self) -> Result<()> {
if self.git_timeout == 0 {
return Err(ScribeError::config_field(
"git_timeout must be > 0",
"git_timeout",
));
}
Ok(())
}
}
#[derive(Debug, Clone, Serialize, Deserialize, Hash)]
pub struct FeatureFlags {
pub centrality_enabled: bool,
pub entrypoint_detection: bool,
pub examples_analysis: bool,
pub semantic_analysis: bool,
pub ml_features: bool,
pub experimental_scoring: bool,
pub scaling_enabled: bool,
pub auto_exclude_tests: bool,
}
impl Default for FeatureFlags {
fn default() -> Self {
Self {
centrality_enabled: false,
entrypoint_detection: false,
examples_analysis: false,
semantic_analysis: false,
ml_features: false,
experimental_scoring: false,
scaling_enabled: false,
auto_exclude_tests: false,
}
}
}
impl FeatureFlags {
fn validate(&self) -> Result<()> {
Ok(())
}
pub fn has_v2_features(&self) -> bool {
self.centrality_enabled || self.entrypoint_detection || self.examples_analysis
}
pub fn enabled_features(&self) -> Vec<&'static str> {
let mut features = Vec::new();
if self.centrality_enabled {
features.push("centrality");
}
if self.entrypoint_detection {
features.push("entrypoint_detection");
}
if self.examples_analysis {
features.push("examples_analysis");
}
if self.semantic_analysis {
features.push("semantic_analysis");
}
if self.ml_features {
features.push("ml_features");
}
if self.experimental_scoring {
features.push("experimental_scoring");
}
if self.scaling_enabled {
features.push("scaling");
}
features
}
}
#[derive(Debug, Clone, Serialize, Deserialize, Hash)]
pub struct OutputConfig {
pub format: OutputFormat,
pub include_content: bool,
pub include_score_breakdown: bool,
pub include_repo_stats: bool,
pub sort_by_score: bool,
pub pretty_json: bool,
pub custom_fields: Vec<String>,
pub file_path: Option<String>,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Hash)]
pub enum OutputFormat {
Json,
JsonLines,
Csv,
Table,
Summary,
}
impl Default for OutputConfig {
fn default() -> Self {
Self {
format: OutputFormat::Json,
include_content: false,
include_score_breakdown: true,
include_repo_stats: true,
sort_by_score: true,
pretty_json: true,
custom_fields: vec![],
file_path: None,
}
}
}
impl OutputConfig {
fn validate(&self) -> Result<()> {
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::NamedTempFile;
#[test]
fn test_config_defaults() {
let config = Config::default();
assert_eq!(config.general.verbosity, 1);
assert!(config.filtering.respect_gitignore);
assert!(config.git.enabled);
assert!(!config.features.centrality_enabled);
}
#[test]
fn test_config_validation() {
let mut config = Config::default();
assert!(config.validate().is_ok());
config.general.verbosity = 10;
assert!(config.validate().is_err());
config = Config::default();
config.filtering.max_file_size = 100;
config.filtering.min_file_size = 200;
assert!(config.validate().is_err());
}
#[test]
fn test_config_file_io() {
let config = Config::default();
let temp_file = NamedTempFile::new().unwrap();
let json_path = temp_file.path().with_extension("json");
config.save_to_file(&json_path).unwrap();
let loaded_config = Config::load_from_file(&json_path).unwrap();
assert_eq!(config.general.verbosity, loaded_config.general.verbosity);
}
#[test]
fn test_filtering_patterns() {
let mut config = FilteringConfig::default();
config.include_patterns.push("*.rs".to_string());
config.exclude_patterns.push("target/**".to_string());
assert!(config.validate().is_ok());
let include_set = config.build_include_set().unwrap();
assert!(include_set.is_some());
let exclude_set = config.build_exclude_set().unwrap();
assert!(exclude_set.is_match("target/debug/file.o"));
}
#[test]
fn test_feature_flags() {
let mut flags = FeatureFlags::default();
assert!(!flags.has_v2_features());
assert!(flags.enabled_features().is_empty());
flags.centrality_enabled = true;
flags.entrypoint_detection = true;
assert!(flags.has_v2_features());
let enabled = flags.enabled_features();
assert!(enabled.contains(&"centrality"));
assert!(enabled.contains(&"entrypoint_detection"));
}
#[test]
fn test_performance_config_timeouts() {
let config = PerformanceConfig::default();
assert_eq!(config.analysis_timeout_duration(), Duration::from_secs(30));
assert_eq!(config.global_timeout_duration(), Duration::from_secs(300));
}
#[test]
fn test_config_hash() {
let config1 = Config::default();
let config2 = Config::default();
let hash1 = config1.compute_hash();
let hash2 = config2.compute_hash();
assert_eq!(hash1, hash2);
let mut config3 = Config::default();
config3.general.verbosity = 2;
let hash3 = config3.compute_hash();
assert_ne!(hash1, hash3);
}
}