1use crate::{CodememError, GraphConfig, ScoringWeights, VectorConfig};
6use serde::{Deserialize, Serialize};
7use std::path::{Path, PathBuf};
8
9#[derive(Debug, Clone, Default, Serialize, Deserialize)]
11#[serde(default)]
12pub struct CodememConfig {
13 pub scoring: ScoringWeights,
14 pub vector: VectorConfig,
15 pub graph: GraphConfig,
16 pub embedding: EmbeddingConfig,
17 pub storage: StorageConfig,
18 pub chunking: ChunkingConfig,
19 pub enrichment: EnrichmentConfig,
20}
21
22impl CodememConfig {
23 pub fn load(path: &Path) -> Result<Self, CodememError> {
25 let content = std::fs::read_to_string(path)?;
26 let config: Self =
27 toml::from_str(&content).map_err(|e| CodememError::Config(e.to_string()))?;
28 config.validate()?;
29 Ok(config)
30 }
31
32 pub fn validate(&self) -> Result<(), CodememError> {
38 let w = &self.scoring;
41 let weights = [
42 w.vector_similarity,
43 w.graph_strength,
44 w.token_overlap,
45 w.temporal,
46 w.tag_matching,
47 w.importance,
48 w.confidence,
49 w.recency,
50 ];
51 if weights.iter().any(|v| !v.is_finite() || *v < 0.0) {
52 return Err(CodememError::Config(
53 "All scoring weights must be finite and non-negative".to_string(),
54 ));
55 }
56
57 if self.embedding.dimensions == 0 {
59 return Err(CodememError::Config(
60 "Embedding dimensions must be > 0".to_string(),
61 ));
62 }
63
64 if self.vector.dimensions == 0 {
66 return Err(CodememError::Config(
67 "Vector dimensions must be > 0".to_string(),
68 ));
69 }
70
71 if self.embedding.cache_capacity == 0 {
73 return Err(CodememError::Config(
74 "Embedding cache capacity must be > 0".to_string(),
75 ));
76 }
77
78 if self.embedding.batch_size == 0 {
80 return Err(CodememError::Config(
81 "Embedding batch size must be > 0".to_string(),
82 ));
83 }
84
85 if self.chunking.min_chunk_size >= self.chunking.max_chunk_size {
87 return Err(CodememError::Config(
88 "min_chunk_size must be less than max_chunk_size".to_string(),
89 ));
90 }
91
92 if !(0.0..=1.0).contains(&self.enrichment.dedup_similarity_threshold) {
94 return Err(CodememError::Config(
95 "dedup_similarity_threshold must be between 0.0 and 1.0".to_string(),
96 ));
97 }
98
99 if !(0.0..=1.0).contains(&self.enrichment.insight_confidence) {
101 return Err(CodememError::Config(
102 "insight_confidence must be between 0.0 and 1.0".to_string(),
103 ));
104 }
105
106 let thresholds = [
108 (
109 self.chunking.min_chunk_score_threshold,
110 "min_chunk_score_threshold",
111 ),
112 (
113 self.chunking.min_symbol_score_threshold,
114 "min_symbol_score_threshold",
115 ),
116 ];
117 for (val, name) in &thresholds {
118 if !(0.0..=1.0).contains(val) {
119 return Err(CodememError::Config(format!(
120 "{name} must be between 0.0 and 1.0"
121 )));
122 }
123 }
124
125 Ok(())
126 }
127
128 pub fn save(&self, path: &Path) -> Result<(), CodememError> {
130 self.validate()?;
132 let content =
133 toml::to_string_pretty(self).map_err(|e| CodememError::Config(e.to_string()))?;
134 if let Some(parent) = path.parent() {
135 std::fs::create_dir_all(parent)?;
136 }
137 std::fs::write(path, content)?;
138 Ok(())
139 }
140
141 pub fn load_or_default() -> Self {
143 let path = Self::default_path();
144 if path.exists() {
145 match Self::load(&path) {
146 Ok(config) => config,
147 Err(e) => {
148 tracing::warn!("Failed to load config: {e}, using defaults");
149 CodememConfig::default()
150 }
151 }
152 } else {
153 Self::default()
154 }
155 }
156
157 pub fn default_path() -> PathBuf {
159 dirs::home_dir()
160 .unwrap_or_else(|| PathBuf::from("."))
161 .join(".codemem")
162 .join("config.toml")
163 }
164}
165
166#[derive(Debug, Clone, Serialize, Deserialize)]
168#[serde(default)]
169pub struct EmbeddingConfig {
170 pub provider: String,
172 pub model: String,
174 pub url: String,
176 pub dimensions: usize,
179 pub cache_capacity: usize,
181 pub batch_size: usize,
183 pub dtype: String,
186}
187
188impl Default for EmbeddingConfig {
189 fn default() -> Self {
190 Self {
191 provider: "candle".to_string(),
192 model: "BAAI/bge-base-en-v1.5".to_string(),
193 url: String::new(),
194 dimensions: 768,
195 cache_capacity: 10_000,
196 batch_size: 16,
197 dtype: "f32".to_string(),
198 }
199 }
200}
201
202#[derive(Debug, Clone, Serialize, Deserialize)]
204#[serde(default)]
205pub struct StorageConfig {
206 pub cache_size_mb: u32,
208 pub busy_timeout_secs: u64,
210}
211
212impl Default for StorageConfig {
213 fn default() -> Self {
214 Self {
215 cache_size_mb: 64,
216 busy_timeout_secs: 5,
217 }
218 }
219}
220
221#[derive(Debug, Clone, Serialize, Deserialize)]
223#[serde(default)]
224pub struct ChunkingConfig {
225 pub enabled: bool,
227 pub max_chunk_size: usize,
229 pub min_chunk_size: usize,
231 pub auto_compact: bool,
233 pub max_retained_chunks_per_file: usize,
235 pub min_chunk_score_threshold: f64,
237 pub max_retained_symbols_per_file: usize,
239 pub min_symbol_score_threshold: f64,
241}
242
243impl Default for ChunkingConfig {
244 fn default() -> Self {
245 Self {
246 enabled: true,
247 max_chunk_size: 1500,
248 min_chunk_size: 50,
249 auto_compact: true,
250 max_retained_chunks_per_file: 10,
251 min_chunk_score_threshold: 0.2,
252 max_retained_symbols_per_file: 15,
253 min_symbol_score_threshold: 0.15,
254 }
255 }
256}
257
258#[derive(Debug, Clone, Serialize, Deserialize)]
260#[serde(default)]
261pub struct EnrichmentConfig {
262 pub git_min_commit_count: usize,
264 pub git_min_co_change_count: usize,
266 pub perf_min_coupling_degree: usize,
268 pub perf_min_symbol_count: usize,
270 pub insight_confidence: f64,
272 pub dedup_similarity_threshold: f64,
274}
275
276impl Default for EnrichmentConfig {
277 fn default() -> Self {
278 Self {
279 git_min_commit_count: 25,
280 git_min_co_change_count: 5,
281 perf_min_coupling_degree: 25,
282 perf_min_symbol_count: 30,
283 insight_confidence: 0.5,
284 dedup_similarity_threshold: 0.90,
285 }
286 }
287}
288
289#[cfg(test)]
290#[path = "tests/config_tests.rs"]
291mod tests;