1use crate::{CodememError, GraphConfig, ScoringWeights, VectorConfig};
6use serde::{Deserialize, Serialize};
7use std::path::{Path, PathBuf};
8
9#[derive(Debug, Clone, Default, Serialize, Deserialize)]
11#[serde(default)]
12pub struct CodememConfig {
13 pub scoring: ScoringWeights,
14 pub vector: VectorConfig,
15 pub graph: GraphConfig,
16 pub embedding: EmbeddingConfig,
17 pub storage: StorageConfig,
18 pub chunking: ChunkingConfig,
19 pub enrichment: EnrichmentConfig,
20 pub scip: ScipConfig,
21}
22
23impl CodememConfig {
24 pub fn load(path: &Path) -> Result<Self, CodememError> {
26 let content = std::fs::read_to_string(path)?;
27 let config: Self =
28 toml::from_str(&content).map_err(|e| CodememError::Config(e.to_string()))?;
29 config.validate()?;
30 Ok(config)
31 }
32
33 pub fn validate(&self) -> Result<(), CodememError> {
39 let w = &self.scoring;
42 let weights = [
43 w.vector_similarity,
44 w.graph_strength,
45 w.token_overlap,
46 w.temporal,
47 w.tag_matching,
48 w.importance,
49 w.confidence,
50 w.recency,
51 ];
52 if weights.iter().any(|v| !v.is_finite() || *v < 0.0) {
53 return Err(CodememError::Config(
54 "All scoring weights must be finite and non-negative".to_string(),
55 ));
56 }
57
58 if self.embedding.dimensions == 0 {
60 return Err(CodememError::Config(
61 "Embedding dimensions must be > 0".to_string(),
62 ));
63 }
64
65 if self.vector.dimensions == 0 {
67 return Err(CodememError::Config(
68 "Vector dimensions must be > 0".to_string(),
69 ));
70 }
71
72 if self.embedding.cache_capacity == 0 {
74 return Err(CodememError::Config(
75 "Embedding cache capacity must be > 0".to_string(),
76 ));
77 }
78
79 if self.embedding.batch_size == 0 {
81 return Err(CodememError::Config(
82 "Embedding batch size must be > 0".to_string(),
83 ));
84 }
85
86 if self.chunking.min_chunk_size >= self.chunking.max_chunk_size {
88 return Err(CodememError::Config(
89 "min_chunk_size must be less than max_chunk_size".to_string(),
90 ));
91 }
92
93 if !(0.0..=1.0).contains(&self.enrichment.dedup_similarity_threshold) {
95 return Err(CodememError::Config(
96 "dedup_similarity_threshold must be between 0.0 and 1.0".to_string(),
97 ));
98 }
99
100 if !(0.0..=1.0).contains(&self.enrichment.insight_confidence) {
102 return Err(CodememError::Config(
103 "insight_confidence must be between 0.0 and 1.0".to_string(),
104 ));
105 }
106
107 if self.scip.max_references_per_symbol == 0 {
109 return Err(CodememError::Config(
110 "scip.max_references_per_symbol must be > 0".to_string(),
111 ));
112 }
113
114 let thresholds = [
116 (
117 self.chunking.min_chunk_score_threshold,
118 "min_chunk_score_threshold",
119 ),
120 (
121 self.chunking.min_symbol_score_threshold,
122 "min_symbol_score_threshold",
123 ),
124 ];
125 for (val, name) in &thresholds {
126 if !(0.0..=1.0).contains(val) {
127 return Err(CodememError::Config(format!(
128 "{name} must be between 0.0 and 1.0"
129 )));
130 }
131 }
132
133 Ok(())
134 }
135
136 pub fn save(&self, path: &Path) -> Result<(), CodememError> {
138 self.validate()?;
140 let content =
141 toml::to_string_pretty(self).map_err(|e| CodememError::Config(e.to_string()))?;
142 if let Some(parent) = path.parent() {
143 std::fs::create_dir_all(parent)?;
144 }
145 std::fs::write(path, content)?;
146 Ok(())
147 }
148
149 pub fn load_or_default() -> Self {
151 let path = Self::default_path();
152 if path.exists() {
153 match Self::load(&path) {
154 Ok(config) => config,
155 Err(e) => {
156 tracing::warn!("Failed to load config: {e}, using defaults");
157 CodememConfig::default()
158 }
159 }
160 } else {
161 Self::default()
162 }
163 }
164
165 pub fn default_path() -> PathBuf {
167 dirs::home_dir()
168 .unwrap_or_else(|| PathBuf::from("."))
169 .join(".codemem")
170 .join("config.toml")
171 }
172}
173
174#[derive(Debug, Clone, Serialize, Deserialize)]
176#[serde(default)]
177pub struct EmbeddingConfig {
178 pub provider: String,
180 pub model: String,
182 pub url: String,
184 pub dimensions: usize,
187 pub cache_capacity: usize,
189 pub batch_size: usize,
191 pub dtype: String,
194}
195
196impl Default for EmbeddingConfig {
197 fn default() -> Self {
198 Self {
199 provider: "candle".to_string(),
200 model: "BAAI/bge-base-en-v1.5".to_string(),
201 url: String::new(),
202 dimensions: 768,
203 cache_capacity: 10_000,
204 batch_size: 16,
205 dtype: "f32".to_string(),
206 }
207 }
208}
209
210#[derive(Debug, Clone, Serialize, Deserialize)]
212#[serde(default)]
213pub struct StorageConfig {
214 pub cache_size_mb: u32,
216 pub busy_timeout_secs: u64,
218}
219
220impl Default for StorageConfig {
221 fn default() -> Self {
222 Self {
223 cache_size_mb: 64,
224 busy_timeout_secs: 5,
225 }
226 }
227}
228
229#[derive(Debug, Clone, Serialize, Deserialize)]
231#[serde(default)]
232pub struct ChunkingConfig {
233 pub enabled: bool,
235 pub max_chunk_size: usize,
237 pub min_chunk_size: usize,
239 pub auto_compact: bool,
241 pub max_retained_chunks_per_file: usize,
243 pub min_chunk_score_threshold: f64,
245 pub max_retained_symbols_per_file: usize,
247 pub min_symbol_score_threshold: f64,
249}
250
251impl Default for ChunkingConfig {
252 fn default() -> Self {
253 Self {
254 enabled: true,
255 max_chunk_size: 1500,
256 min_chunk_size: 50,
257 auto_compact: true,
258 max_retained_chunks_per_file: 10,
259 min_chunk_score_threshold: 0.2,
260 max_retained_symbols_per_file: 15,
261 min_symbol_score_threshold: 0.15,
262 }
263 }
264}
265
266#[derive(Debug, Clone, Serialize, Deserialize)]
268#[serde(default)]
269pub struct ScipConfig {
270 pub enabled: bool,
272 pub auto_detect_indexers: bool,
274 pub cache_index: bool,
276 pub cache_ttl_hours: u64,
278 pub create_external_nodes: bool,
280 pub max_references_per_symbol: usize,
282 pub store_docs_as_memories: bool,
284 pub hierarchical_containment: bool,
287 pub collapse_intra_class_edges: bool,
289 pub fan_out_limits: FanOutLimits,
291 pub indexers: ScipIndexersConfig,
293}
294
295#[derive(Debug, Clone, Serialize, Deserialize)]
297#[serde(default)]
298pub struct FanOutLimits {
299 pub module: usize,
300 pub function: usize,
301 pub method: usize,
302 pub class: usize,
303}
304
305impl Default for FanOutLimits {
306 fn default() -> Self {
307 Self {
308 module: 200,
309 function: 30,
310 method: 30,
311 class: 50,
312 }
313 }
314}
315
316impl Default for ScipConfig {
317 fn default() -> Self {
318 Self {
319 enabled: true,
320 auto_detect_indexers: true,
321 cache_index: true,
322 cache_ttl_hours: 24,
323 create_external_nodes: true,
324 max_references_per_symbol: 100,
325 store_docs_as_memories: true,
326 hierarchical_containment: true,
327 collapse_intra_class_edges: true,
328 fan_out_limits: FanOutLimits::default(),
329 indexers: ScipIndexersConfig::default(),
330 }
331 }
332}
333
334#[derive(Debug, Clone, Default, Serialize, Deserialize)]
339#[serde(default)]
340pub struct ScipIndexersConfig {
341 pub rust: String,
342 pub typescript: String,
343 pub python: String,
344 pub java: String,
345 pub go: String,
346}
347
348#[derive(Debug, Clone, Serialize, Deserialize)]
350#[serde(default)]
351pub struct EnrichmentConfig {
352 pub git_min_commit_count: usize,
354 pub git_min_co_change_count: usize,
356 pub perf_min_coupling_degree: usize,
358 pub perf_min_symbol_count: usize,
360 pub insight_confidence: f64,
362 pub dedup_similarity_threshold: f64,
364}
365
366impl Default for EnrichmentConfig {
367 fn default() -> Self {
368 Self {
369 git_min_commit_count: 25,
370 git_min_co_change_count: 5,
371 perf_min_coupling_degree: 25,
372 perf_min_symbol_count: 30,
373 insight_confidence: 0.5,
374 dedup_similarity_threshold: 0.90,
375 }
376 }
377}
378
379#[cfg(test)]
380#[path = "tests/config_tests.rs"]
381mod tests;