1use crate::{CodememError, GraphConfig, ScoringWeights, VectorConfig};
6use serde::{Deserialize, Serialize};
7use std::path::{Path, PathBuf};
8
9#[derive(Debug, Clone, Default, Serialize, Deserialize)]
11#[serde(default)]
12pub struct CodememConfig {
13 pub scoring: ScoringWeights,
14 pub vector: VectorConfig,
15 pub graph: GraphConfig,
16 pub embedding: EmbeddingConfig,
17 pub storage: StorageConfig,
18 pub chunking: ChunkingConfig,
19 pub enrichment: EnrichmentConfig,
20 pub scip: ScipConfig,
21 pub memory: MemoryConfig,
22}
23
24impl CodememConfig {
25 pub fn load(path: &Path) -> Result<Self, CodememError> {
27 let content = std::fs::read_to_string(path)?;
28 let config: Self =
29 toml::from_str(&content).map_err(|e| CodememError::Config(e.to_string()))?;
30 config.validate()?;
31 Ok(config)
32 }
33
34 pub fn validate(&self) -> Result<(), CodememError> {
40 let w = &self.scoring;
43 let weights = [
44 w.vector_similarity,
45 w.graph_strength,
46 w.token_overlap,
47 w.temporal,
48 w.tag_matching,
49 w.importance,
50 w.confidence,
51 w.recency,
52 ];
53 if weights.iter().any(|v| !v.is_finite() || *v < 0.0) {
54 return Err(CodememError::Config(
55 "All scoring weights must be finite and non-negative".to_string(),
56 ));
57 }
58
59 if self.embedding.dimensions == 0 {
61 return Err(CodememError::Config(
62 "Embedding dimensions must be > 0".to_string(),
63 ));
64 }
65
66 if self.vector.dimensions == 0 {
68 return Err(CodememError::Config(
69 "Vector dimensions must be > 0".to_string(),
70 ));
71 }
72
73 if self.embedding.cache_capacity == 0 {
75 return Err(CodememError::Config(
76 "Embedding cache capacity must be > 0".to_string(),
77 ));
78 }
79
80 if self.embedding.batch_size == 0 {
82 return Err(CodememError::Config(
83 "Embedding batch size must be > 0".to_string(),
84 ));
85 }
86
87 if self.chunking.min_chunk_size >= self.chunking.max_chunk_size {
89 return Err(CodememError::Config(
90 "min_chunk_size must be less than max_chunk_size".to_string(),
91 ));
92 }
93
94 if !(0.0..=1.0).contains(&self.enrichment.dedup_similarity_threshold) {
96 return Err(CodememError::Config(
97 "dedup_similarity_threshold must be between 0.0 and 1.0".to_string(),
98 ));
99 }
100
101 if !(0.0..=1.0).contains(&self.enrichment.insight_confidence) {
103 return Err(CodememError::Config(
104 "insight_confidence must be between 0.0 and 1.0".to_string(),
105 ));
106 }
107
108 if self.scip.max_references_per_symbol == 0 {
110 return Err(CodememError::Config(
111 "scip.max_references_per_symbol must be > 0".to_string(),
112 ));
113 }
114
115 let thresholds = [
117 (
118 self.chunking.min_chunk_score_threshold,
119 "min_chunk_score_threshold",
120 ),
121 (
122 self.chunking.min_symbol_score_threshold,
123 "min_symbol_score_threshold",
124 ),
125 ];
126 for (val, name) in &thresholds {
127 if !(0.0..=1.0).contains(val) {
128 return Err(CodememError::Config(format!(
129 "{name} must be between 0.0 and 1.0"
130 )));
131 }
132 }
133
134 Ok(())
135 }
136
137 pub fn save(&self, path: &Path) -> Result<(), CodememError> {
139 self.validate()?;
141 let content =
142 toml::to_string_pretty(self).map_err(|e| CodememError::Config(e.to_string()))?;
143 if let Some(parent) = path.parent() {
144 std::fs::create_dir_all(parent)?;
145 }
146 std::fs::write(path, content)?;
147 Ok(())
148 }
149
150 pub fn load_or_default() -> Self {
152 let path = Self::default_path();
153 if path.exists() {
154 match Self::load(&path) {
155 Ok(config) => config,
156 Err(e) => {
157 tracing::warn!("Failed to load config: {e}, using defaults");
158 CodememConfig::default()
159 }
160 }
161 } else {
162 Self::default()
163 }
164 }
165
166 pub fn default_path() -> PathBuf {
168 dirs::home_dir()
169 .unwrap_or_else(|| PathBuf::from("."))
170 .join(".codemem")
171 .join("config.toml")
172 }
173}
174
175#[derive(Debug, Clone, Serialize, Deserialize)]
177#[serde(default)]
178pub struct EmbeddingConfig {
179 pub provider: String,
181 pub model: String,
183 pub url: String,
185 pub dimensions: usize,
188 pub cache_capacity: usize,
190 pub batch_size: usize,
192 pub dtype: String,
195}
196
197impl Default for EmbeddingConfig {
198 fn default() -> Self {
199 Self {
200 provider: "candle".to_string(),
201 model: "BAAI/bge-base-en-v1.5".to_string(),
202 url: String::new(),
203 dimensions: 768,
204 cache_capacity: 10_000,
205 batch_size: 16,
206 dtype: "f32".to_string(),
207 }
208 }
209}
210
211#[derive(Debug, Clone, Serialize, Deserialize)]
213#[serde(default)]
214pub struct StorageConfig {
215 #[serde(default = "default_storage_backend")]
217 pub backend: String,
218 #[serde(default)]
220 pub url: Option<String>,
221 pub cache_size_mb: u32,
223 pub busy_timeout_secs: u64,
225}
226
227fn default_storage_backend() -> String {
228 "sqlite".to_string()
229}
230
231impl Default for StorageConfig {
232 fn default() -> Self {
233 Self {
234 backend: default_storage_backend(),
235 url: None,
236 cache_size_mb: 64,
237 busy_timeout_secs: 5,
238 }
239 }
240}
241
242#[derive(Debug, Clone, Serialize, Deserialize)]
244#[serde(default)]
245pub struct ChunkingConfig {
246 pub enabled: bool,
248 pub max_chunk_size: usize,
250 pub min_chunk_size: usize,
252 pub auto_compact: bool,
254 pub max_retained_chunks_per_file: usize,
256 pub min_chunk_score_threshold: f64,
258 pub max_retained_symbols_per_file: usize,
260 pub min_symbol_score_threshold: f64,
262}
263
264impl Default for ChunkingConfig {
265 fn default() -> Self {
266 Self {
267 enabled: true,
268 max_chunk_size: 1500,
269 min_chunk_size: 50,
270 auto_compact: true,
271 max_retained_chunks_per_file: 10,
272 min_chunk_score_threshold: 0.2,
273 max_retained_symbols_per_file: 15,
274 min_symbol_score_threshold: 0.15,
275 }
276 }
277}
278
279#[derive(Debug, Clone, Serialize, Deserialize)]
281#[serde(default)]
282pub struct ScipConfig {
283 pub enabled: bool,
285 pub auto_detect_indexers: bool,
287 pub cache_index: bool,
289 pub cache_ttl_hours: u64,
291 pub create_external_nodes: bool,
293 pub max_references_per_symbol: usize,
295 pub store_docs_as_memories: bool,
297 pub hierarchical_containment: bool,
300 pub collapse_intra_class_edges: bool,
302 pub fan_out_limits: FanOutLimits,
304 pub indexers: ScipIndexersConfig,
306}
307
308#[derive(Debug, Clone, Serialize, Deserialize)]
310#[serde(default)]
311pub struct FanOutLimits {
312 pub module: usize,
313 pub function: usize,
314 pub method: usize,
315 pub class: usize,
316}
317
318impl Default for FanOutLimits {
319 fn default() -> Self {
320 Self {
321 module: 200,
322 function: 30,
323 method: 30,
324 class: 50,
325 }
326 }
327}
328
329impl Default for ScipConfig {
330 fn default() -> Self {
331 Self {
332 enabled: true,
333 auto_detect_indexers: true,
334 cache_index: true,
335 cache_ttl_hours: 24,
336 create_external_nodes: true,
337 max_references_per_symbol: 100,
338 store_docs_as_memories: true,
339 hierarchical_containment: true,
340 collapse_intra_class_edges: true,
341 fan_out_limits: FanOutLimits::default(),
342 indexers: ScipIndexersConfig::default(),
343 }
344 }
345}
346
347#[derive(Debug, Clone, Default, Serialize, Deserialize)]
352#[serde(default)]
353pub struct ScipIndexersConfig {
354 pub rust: String,
355 pub typescript: String,
356 pub python: String,
357 pub java: String,
358 pub go: String,
359}
360
361#[derive(Debug, Clone, Serialize, Deserialize)]
363#[serde(default)]
364pub struct EnrichmentConfig {
365 pub git_min_commit_count: usize,
367 pub git_min_co_change_count: usize,
369 pub perf_min_coupling_degree: usize,
371 pub perf_min_symbol_count: usize,
373 pub insight_confidence: f64,
375 pub dedup_similarity_threshold: f64,
377 pub dead_code: DeadCodeConfig,
379}
380
381#[derive(Debug, Clone, Serialize, Deserialize)]
383#[serde(default)]
384pub struct DeadCodeConfig {
385 pub enabled: bool,
387 pub exempt_decorators: Vec<String>,
393 pub exempt_kinds: Vec<String>,
395 pub min_symbols: usize,
398}
399
400impl Default for DeadCodeConfig {
401 fn default() -> Self {
402 Self {
403 enabled: true,
404 exempt_decorators: vec![
405 "app.route".into(),
406 "route".into(),
407 "pytest.fixture".into(),
408 "fixture".into(),
409 "click.command".into(),
410 "celery.task".into(),
411 "property".into(),
412 "staticmethod".into(),
413 "classmethod".into(),
414 "override".into(),
415 "abstractmethod".into(),
416 "test".into(),
417 "tokio::test".into(),
418 "async_trait".into(),
419 ],
420 exempt_kinds: vec!["constructor".into(), "test".into()],
421 min_symbols: 10,
422 }
423 }
424}
425
426impl Default for EnrichmentConfig {
427 fn default() -> Self {
428 Self {
429 git_min_commit_count: 25,
430 git_min_co_change_count: 5,
431 perf_min_coupling_degree: 25,
432 perf_min_symbol_count: 30,
433 insight_confidence: 0.5,
434 dedup_similarity_threshold: 0.90,
435 dead_code: DeadCodeConfig::default(),
436 }
437 }
438}
439
440#[derive(Debug, Clone, Serialize, Deserialize)]
442#[serde(default)]
443pub struct MemoryConfig {
444 pub default_session_ttl_hours: u64,
447 pub expire_enrichments_on_reindex: bool,
450}
451
452impl Default for MemoryConfig {
453 fn default() -> Self {
454 Self {
455 default_session_ttl_hours: 168, expire_enrichments_on_reindex: true,
457 }
458 }
459}
460
461#[cfg(test)]
462#[path = "tests/config_tests.rs"]
463mod tests;