1use crate::{CodememError, GraphConfig, ScoringWeights, VectorConfig};
6use serde::{Deserialize, Serialize};
7use std::path::{Path, PathBuf};
8
9#[derive(Debug, Clone, Default, Serialize, Deserialize)]
11#[serde(default)]
12pub struct CodememConfig {
13 pub scoring: ScoringWeights,
14 pub vector: VectorConfig,
15 pub graph: GraphConfig,
16 pub embedding: EmbeddingConfig,
17 pub storage: StorageConfig,
18 pub chunking: ChunkingConfig,
19 pub enrichment: EnrichmentConfig,
20}
21
22impl CodememConfig {
23 pub fn load(path: &Path) -> Result<Self, CodememError> {
25 let content = std::fs::read_to_string(path)?;
26 let config: Self =
27 toml::from_str(&content).map_err(|e| CodememError::Config(e.to_string()))?;
28 config.validate()?;
29 Ok(config)
30 }
31
32 pub fn validate(&self) -> Result<(), CodememError> {
38 let w = &self.scoring;
41 let weights = [
42 w.vector_similarity,
43 w.graph_strength,
44 w.token_overlap,
45 w.temporal,
46 w.tag_matching,
47 w.importance,
48 w.confidence,
49 w.recency,
50 ];
51 if weights.iter().any(|v| !v.is_finite() || *v < 0.0) {
52 return Err(CodememError::Config(
53 "All scoring weights must be finite and non-negative".to_string(),
54 ));
55 }
56
57 if self.embedding.dimensions == 0 {
59 return Err(CodememError::Config(
60 "Embedding dimensions must be > 0".to_string(),
61 ));
62 }
63
64 if self.vector.dimensions == 0 {
66 return Err(CodememError::Config(
67 "Vector dimensions must be > 0".to_string(),
68 ));
69 }
70
71 if self.embedding.cache_capacity == 0 {
73 return Err(CodememError::Config(
74 "Embedding cache capacity must be > 0".to_string(),
75 ));
76 }
77
78 if self.chunking.min_chunk_size >= self.chunking.max_chunk_size {
80 return Err(CodememError::Config(
81 "min_chunk_size must be less than max_chunk_size".to_string(),
82 ));
83 }
84
85 if !(0.0..=1.0).contains(&self.enrichment.dedup_similarity_threshold) {
87 return Err(CodememError::Config(
88 "dedup_similarity_threshold must be between 0.0 and 1.0".to_string(),
89 ));
90 }
91
92 if !(0.0..=1.0).contains(&self.enrichment.insight_confidence) {
94 return Err(CodememError::Config(
95 "insight_confidence must be between 0.0 and 1.0".to_string(),
96 ));
97 }
98
99 let thresholds = [
101 (
102 self.chunking.min_chunk_score_threshold,
103 "min_chunk_score_threshold",
104 ),
105 (
106 self.chunking.min_symbol_score_threshold,
107 "min_symbol_score_threshold",
108 ),
109 ];
110 for (val, name) in &thresholds {
111 if !(0.0..=1.0).contains(val) {
112 return Err(CodememError::Config(format!(
113 "{name} must be between 0.0 and 1.0"
114 )));
115 }
116 }
117
118 Ok(())
119 }
120
121 pub fn save(&self, path: &Path) -> Result<(), CodememError> {
123 self.validate()?;
125 let content =
126 toml::to_string_pretty(self).map_err(|e| CodememError::Config(e.to_string()))?;
127 if let Some(parent) = path.parent() {
128 std::fs::create_dir_all(parent)?;
129 }
130 std::fs::write(path, content)?;
131 Ok(())
132 }
133
134 pub fn load_or_default() -> Self {
136 let path = Self::default_path();
137 if path.exists() {
138 match Self::load(&path) {
139 Ok(config) => config,
140 Err(e) => {
141 tracing::warn!("Failed to load config: {e}, using defaults");
142 CodememConfig::default()
143 }
144 }
145 } else {
146 Self::default()
147 }
148 }
149
150 pub fn default_path() -> PathBuf {
152 dirs::home_dir()
153 .unwrap_or_else(|| PathBuf::from("."))
154 .join(".codemem")
155 .join("config.toml")
156 }
157}
158
159#[derive(Debug, Clone, Serialize, Deserialize)]
161#[serde(default)]
162pub struct EmbeddingConfig {
163 pub provider: String,
165 pub model: String,
167 pub url: String,
169 pub dimensions: usize,
171 pub cache_capacity: usize,
173}
174
175impl Default for EmbeddingConfig {
176 fn default() -> Self {
177 Self {
178 provider: "candle".to_string(),
179 model: "BAAI/bge-base-en-v1.5".to_string(),
180 url: String::new(),
181 dimensions: 768,
182 cache_capacity: 10_000,
183 }
184 }
185}
186
187#[derive(Debug, Clone, Serialize, Deserialize)]
189#[serde(default)]
190pub struct StorageConfig {
191 pub db_path: String,
193 pub cache_size_mb: u32,
195 pub busy_timeout_secs: u64,
197}
198
199impl Default for StorageConfig {
200 fn default() -> Self {
201 Self {
202 db_path: dirs::home_dir()
203 .unwrap_or_else(|| PathBuf::from("."))
204 .join(".codemem")
205 .join("codemem.db")
206 .to_string_lossy()
207 .into_owned(),
208 cache_size_mb: 64,
209 busy_timeout_secs: 5,
210 }
211 }
212}
213
214#[derive(Debug, Clone, Serialize, Deserialize)]
216#[serde(default)]
217pub struct ChunkingConfig {
218 pub enabled: bool,
220 pub max_chunk_size: usize,
222 pub min_chunk_size: usize,
224 pub auto_compact: bool,
226 pub max_retained_chunks_per_file: usize,
228 pub min_chunk_score_threshold: f64,
230 pub max_retained_symbols_per_file: usize,
232 pub min_symbol_score_threshold: f64,
234}
235
236impl Default for ChunkingConfig {
237 fn default() -> Self {
238 Self {
239 enabled: true,
240 max_chunk_size: 1500,
241 min_chunk_size: 50,
242 auto_compact: true,
243 max_retained_chunks_per_file: 10,
244 min_chunk_score_threshold: 0.2,
245 max_retained_symbols_per_file: 15,
246 min_symbol_score_threshold: 0.15,
247 }
248 }
249}
250
251#[derive(Debug, Clone, Serialize, Deserialize)]
253#[serde(default)]
254pub struct EnrichmentConfig {
255 pub git_min_commit_count: usize,
257 pub git_min_co_change_count: usize,
259 pub perf_min_coupling_degree: usize,
261 pub perf_min_symbol_count: usize,
263 pub insight_confidence: f64,
265 pub dedup_similarity_threshold: f64,
267}
268
269impl Default for EnrichmentConfig {
270 fn default() -> Self {
271 Self {
272 git_min_commit_count: 25,
273 git_min_co_change_count: 5,
274 perf_min_coupling_degree: 25,
275 perf_min_symbol_count: 30,
276 insight_confidence: 0.5,
277 dedup_similarity_threshold: 0.90,
278 }
279 }
280}
281
282#[cfg(test)]
283#[path = "tests/config_tests.rs"]
284mod tests;