use crate::{CodememError, GraphConfig, ScoringWeights, VectorConfig};
use serde::{Deserialize, Serialize};
use std::path::{Path, PathBuf};
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
#[serde(default)]
pub struct CodememConfig {
pub scoring: ScoringWeights,
pub vector: VectorConfig,
pub graph: GraphConfig,
pub embedding: EmbeddingConfig,
pub storage: StorageConfig,
pub chunking: ChunkingConfig,
pub enrichment: EnrichmentConfig,
pub scip: ScipConfig,
pub memory: MemoryConfig,
}
impl CodememConfig {
pub fn load(path: &Path) -> Result<Self, CodememError> {
let content = std::fs::read_to_string(path)?;
let config: Self =
toml::from_str(&content).map_err(|e| CodememError::Config(e.to_string()))?;
config.validate()?;
Ok(config)
}
pub fn validate(&self) -> Result<(), CodememError> {
let w = &self.scoring;
let weights = [
w.vector_similarity,
w.graph_strength,
w.token_overlap,
w.temporal,
w.tag_matching,
w.importance,
w.confidence,
w.recency,
];
if weights.iter().any(|v| !v.is_finite() || *v < 0.0) {
return Err(CodememError::Config(
"All scoring weights must be finite and non-negative".to_string(),
));
}
if self.embedding.dimensions == 0 {
return Err(CodememError::Config(
"Embedding dimensions must be > 0".to_string(),
));
}
if self.vector.dimensions == 0 {
return Err(CodememError::Config(
"Vector dimensions must be > 0".to_string(),
));
}
if self.embedding.cache_capacity == 0 {
return Err(CodememError::Config(
"Embedding cache capacity must be > 0".to_string(),
));
}
if self.embedding.batch_size == 0 {
return Err(CodememError::Config(
"Embedding batch size must be > 0".to_string(),
));
}
if self.chunking.min_chunk_size >= self.chunking.max_chunk_size {
return Err(CodememError::Config(
"min_chunk_size must be less than max_chunk_size".to_string(),
));
}
if !(0.0..=1.0).contains(&self.enrichment.dedup_similarity_threshold) {
return Err(CodememError::Config(
"dedup_similarity_threshold must be between 0.0 and 1.0".to_string(),
));
}
if !(0.0..=1.0).contains(&self.enrichment.insight_confidence) {
return Err(CodememError::Config(
"insight_confidence must be between 0.0 and 1.0".to_string(),
));
}
if self.scip.max_references_per_symbol == 0 {
return Err(CodememError::Config(
"scip.max_references_per_symbol must be > 0".to_string(),
));
}
let thresholds = [
(
self.chunking.min_chunk_score_threshold,
"min_chunk_score_threshold",
),
(
self.chunking.min_symbol_score_threshold,
"min_symbol_score_threshold",
),
];
for (val, name) in &thresholds {
if !(0.0..=1.0).contains(val) {
return Err(CodememError::Config(format!(
"{name} must be between 0.0 and 1.0"
)));
}
}
Ok(())
}
pub fn save(&self, path: &Path) -> Result<(), CodememError> {
self.validate()?;
let content =
toml::to_string_pretty(self).map_err(|e| CodememError::Config(e.to_string()))?;
if let Some(parent) = path.parent() {
std::fs::create_dir_all(parent)?;
}
std::fs::write(path, content)?;
Ok(())
}
pub fn load_or_default() -> Self {
let path = Self::default_path();
if path.exists() {
match Self::load(&path) {
Ok(config) => config,
Err(e) => {
tracing::warn!("Failed to load config: {e}, using defaults");
CodememConfig::default()
}
}
} else {
Self::default()
}
}
pub fn default_path() -> PathBuf {
dirs::home_dir()
.unwrap_or_else(|| PathBuf::from("."))
.join(".codemem")
.join("config.toml")
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(default)]
pub struct EmbeddingConfig {
pub provider: String,
pub model: String,
pub url: String,
pub dimensions: usize,
pub cache_capacity: usize,
pub batch_size: usize,
pub dtype: String,
}
impl Default for EmbeddingConfig {
fn default() -> Self {
Self {
provider: "candle".to_string(),
model: "BAAI/bge-base-en-v1.5".to_string(),
url: String::new(),
dimensions: 768,
cache_capacity: 10_000,
batch_size: 16,
dtype: "f16".to_string(),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(default)]
pub struct StorageConfig {
#[serde(default = "default_storage_backend")]
pub backend: String,
#[serde(default)]
pub url: Option<String>,
pub cache_size_mb: u32,
pub busy_timeout_secs: u64,
}
fn default_storage_backend() -> String {
"sqlite".to_string()
}
impl Default for StorageConfig {
fn default() -> Self {
Self {
backend: default_storage_backend(),
url: None,
cache_size_mb: 64,
busy_timeout_secs: 5,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(default)]
pub struct ChunkingConfig {
pub enabled: bool,
pub max_chunk_size: usize,
pub min_chunk_size: usize,
pub auto_compact: bool,
pub max_retained_chunks_per_file: usize,
pub min_chunk_score_threshold: f64,
pub max_retained_symbols_per_file: usize,
pub min_symbol_score_threshold: f64,
}
impl Default for ChunkingConfig {
fn default() -> Self {
Self {
enabled: true,
max_chunk_size: 1500,
min_chunk_size: 50,
auto_compact: true,
max_retained_chunks_per_file: 10,
min_chunk_score_threshold: 0.2,
max_retained_symbols_per_file: 15,
min_symbol_score_threshold: 0.15,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(default)]
pub struct ScipConfig {
pub enabled: bool,
pub auto_detect_indexers: bool,
pub cache_index: bool,
pub cache_ttl_hours: u64,
pub create_external_nodes: bool,
pub max_references_per_symbol: usize,
pub store_docs_as_memories: bool,
pub hierarchical_containment: bool,
pub collapse_intra_class_edges: bool,
pub fan_out_limits: FanOutLimits,
pub indexers: ScipIndexersConfig,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(default)]
pub struct FanOutLimits {
pub module: usize,
pub function: usize,
pub method: usize,
pub class: usize,
}
impl Default for FanOutLimits {
fn default() -> Self {
Self {
module: 200,
function: 30,
method: 30,
class: 50,
}
}
}
impl Default for ScipConfig {
fn default() -> Self {
Self {
enabled: true,
auto_detect_indexers: true,
cache_index: true,
cache_ttl_hours: 24,
create_external_nodes: true,
max_references_per_symbol: 100,
store_docs_as_memories: true,
hierarchical_containment: true,
collapse_intra_class_edges: true,
fan_out_limits: FanOutLimits::default(),
indexers: ScipIndexersConfig::default(),
}
}
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
#[serde(default)]
pub struct ScipIndexersConfig {
pub rust: String,
pub typescript: String,
pub python: String,
pub java: String,
pub go: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(default)]
pub struct EnrichmentConfig {
pub git_min_commit_count: usize,
pub git_min_co_change_count: usize,
pub perf_min_coupling_degree: usize,
pub perf_min_symbol_count: usize,
pub insight_confidence: f64,
pub dedup_similarity_threshold: f64,
pub max_code_smells: usize,
pub dead_code: DeadCodeConfig,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(default)]
pub struct DeadCodeConfig {
pub enabled: bool,
pub exempt_decorators: Vec<String>,
pub exempt_kinds: Vec<String>,
pub min_symbols: usize,
}
impl Default for DeadCodeConfig {
fn default() -> Self {
Self {
enabled: true,
exempt_decorators: vec![
"app.route".into(),
"route".into(),
"pytest.fixture".into(),
"fixture".into(),
"click.command".into(),
"celery.task".into(),
"property".into(),
"staticmethod".into(),
"classmethod".into(),
"override".into(),
"abstractmethod".into(),
"test".into(),
"tokio::test".into(),
"async_trait".into(),
],
exempt_kinds: vec!["constructor".into(), "test".into()],
min_symbols: 10,
}
}
}
impl Default for EnrichmentConfig {
fn default() -> Self {
Self {
git_min_commit_count: 25,
git_min_co_change_count: 5,
perf_min_coupling_degree: 25,
perf_min_symbol_count: 30,
insight_confidence: 0.5,
dedup_similarity_threshold: 0.90,
max_code_smells: 50,
dead_code: DeadCodeConfig::default(),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(default)]
pub struct MemoryConfig {
pub default_session_ttl_hours: u64,
pub expire_enrichments_on_reindex: bool,
}
impl Default for MemoryConfig {
fn default() -> Self {
Self {
default_session_ttl_hours: 168, expire_enrichments_on_reindex: true,
}
}
}
#[cfg(test)]
#[path = "tests/config_tests.rs"]
mod tests;