1use crate::{CodememError, GraphConfig, ScoringWeights, VectorConfig};
6use serde::{Deserialize, Serialize};
7use std::path::{Path, PathBuf};
8
9#[derive(Debug, Clone, Default, Serialize, Deserialize)]
11#[serde(default)]
12pub struct CodememConfig {
13 pub scoring: ScoringWeights,
14 pub vector: VectorConfig,
15 pub graph: GraphConfig,
16 pub embedding: EmbeddingConfig,
17 pub storage: StorageConfig,
18 pub chunking: ChunkingConfig,
19 pub enrichment: EnrichmentConfig,
20}
21
22impl CodememConfig {
23 pub fn load(path: &Path) -> Result<Self, CodememError> {
25 let content = std::fs::read_to_string(path)?;
26 let config: Self =
27 toml::from_str(&content).map_err(|e| CodememError::Config(e.to_string()))?;
28 config.validate()?;
29 Ok(config)
30 }
31
32 pub fn validate(&self) -> Result<(), CodememError> {
38 let w = &self.scoring;
41 let weights = [
42 w.vector_similarity,
43 w.graph_strength,
44 w.token_overlap,
45 w.temporal,
46 w.tag_matching,
47 w.importance,
48 w.confidence,
49 w.recency,
50 ];
51 if weights.iter().any(|v| !v.is_finite() || *v < 0.0) {
52 return Err(CodememError::Config(
53 "All scoring weights must be finite and non-negative".to_string(),
54 ));
55 }
56
57 if self.embedding.dimensions == 0 {
59 return Err(CodememError::Config(
60 "Embedding dimensions must be > 0".to_string(),
61 ));
62 }
63
64 if self.vector.dimensions == 0 {
66 return Err(CodememError::Config(
67 "Vector dimensions must be > 0".to_string(),
68 ));
69 }
70
71 if self.embedding.cache_capacity == 0 {
73 return Err(CodememError::Config(
74 "Embedding cache capacity must be > 0".to_string(),
75 ));
76 }
77
78 if self.embedding.batch_size == 0 {
80 return Err(CodememError::Config(
81 "Embedding batch size must be > 0".to_string(),
82 ));
83 }
84
85 if self.chunking.min_chunk_size >= self.chunking.max_chunk_size {
87 return Err(CodememError::Config(
88 "min_chunk_size must be less than max_chunk_size".to_string(),
89 ));
90 }
91
92 if !(0.0..=1.0).contains(&self.enrichment.dedup_similarity_threshold) {
94 return Err(CodememError::Config(
95 "dedup_similarity_threshold must be between 0.0 and 1.0".to_string(),
96 ));
97 }
98
99 if !(0.0..=1.0).contains(&self.enrichment.insight_confidence) {
101 return Err(CodememError::Config(
102 "insight_confidence must be between 0.0 and 1.0".to_string(),
103 ));
104 }
105
106 let thresholds = [
108 (
109 self.chunking.min_chunk_score_threshold,
110 "min_chunk_score_threshold",
111 ),
112 (
113 self.chunking.min_symbol_score_threshold,
114 "min_symbol_score_threshold",
115 ),
116 ];
117 for (val, name) in &thresholds {
118 if !(0.0..=1.0).contains(val) {
119 return Err(CodememError::Config(format!(
120 "{name} must be between 0.0 and 1.0"
121 )));
122 }
123 }
124
125 Ok(())
126 }
127
128 pub fn save(&self, path: &Path) -> Result<(), CodememError> {
130 self.validate()?;
132 let content =
133 toml::to_string_pretty(self).map_err(|e| CodememError::Config(e.to_string()))?;
134 if let Some(parent) = path.parent() {
135 std::fs::create_dir_all(parent)?;
136 }
137 std::fs::write(path, content)?;
138 Ok(())
139 }
140
141 pub fn load_or_default() -> Self {
143 let path = Self::default_path();
144 if path.exists() {
145 match Self::load(&path) {
146 Ok(config) => config,
147 Err(e) => {
148 tracing::warn!("Failed to load config: {e}, using defaults");
149 CodememConfig::default()
150 }
151 }
152 } else {
153 Self::default()
154 }
155 }
156
157 pub fn default_path() -> PathBuf {
159 dirs::home_dir()
160 .unwrap_or_else(|| PathBuf::from("."))
161 .join(".codemem")
162 .join("config.toml")
163 }
164}
165
166#[derive(Debug, Clone, Serialize, Deserialize)]
168#[serde(default)]
169pub struct EmbeddingConfig {
170 pub provider: String,
172 pub model: String,
174 pub url: String,
176 pub dimensions: usize,
178 pub cache_capacity: usize,
180 pub batch_size: usize,
182}
183
184impl Default for EmbeddingConfig {
185 fn default() -> Self {
186 Self {
187 provider: "candle".to_string(),
188 model: "BAAI/bge-base-en-v1.5".to_string(),
189 url: String::new(),
190 dimensions: 768,
191 cache_capacity: 10_000,
192 batch_size: 16,
193 }
194 }
195}
196
197#[derive(Debug, Clone, Serialize, Deserialize)]
199#[serde(default)]
200pub struct StorageConfig {
201 pub db_path: String,
203 pub cache_size_mb: u32,
205 pub busy_timeout_secs: u64,
207}
208
209impl Default for StorageConfig {
210 fn default() -> Self {
211 Self {
212 db_path: dirs::home_dir()
213 .unwrap_or_else(|| PathBuf::from("."))
214 .join(".codemem")
215 .join("codemem.db")
216 .to_string_lossy()
217 .into_owned(),
218 cache_size_mb: 64,
219 busy_timeout_secs: 5,
220 }
221 }
222}
223
224#[derive(Debug, Clone, Serialize, Deserialize)]
226#[serde(default)]
227pub struct ChunkingConfig {
228 pub enabled: bool,
230 pub max_chunk_size: usize,
232 pub min_chunk_size: usize,
234 pub auto_compact: bool,
236 pub max_retained_chunks_per_file: usize,
238 pub min_chunk_score_threshold: f64,
240 pub max_retained_symbols_per_file: usize,
242 pub min_symbol_score_threshold: f64,
244}
245
246impl Default for ChunkingConfig {
247 fn default() -> Self {
248 Self {
249 enabled: true,
250 max_chunk_size: 1500,
251 min_chunk_size: 50,
252 auto_compact: true,
253 max_retained_chunks_per_file: 10,
254 min_chunk_score_threshold: 0.2,
255 max_retained_symbols_per_file: 15,
256 min_symbol_score_threshold: 0.15,
257 }
258 }
259}
260
261#[derive(Debug, Clone, Serialize, Deserialize)]
263#[serde(default)]
264pub struct EnrichmentConfig {
265 pub git_min_commit_count: usize,
267 pub git_min_co_change_count: usize,
269 pub perf_min_coupling_degree: usize,
271 pub perf_min_symbol_count: usize,
273 pub insight_confidence: f64,
275 pub dedup_similarity_threshold: f64,
277}
278
279impl Default for EnrichmentConfig {
280 fn default() -> Self {
281 Self {
282 git_min_commit_count: 25,
283 git_min_co_change_count: 5,
284 perf_min_coupling_degree: 25,
285 perf_min_symbol_count: 30,
286 insight_confidence: 0.5,
287 dedup_similarity_threshold: 0.90,
288 }
289 }
290}
291
292#[cfg(test)]
293#[path = "tests/config_tests.rs"]
294mod tests;