Skip to main content

codemem_core/
config.rs

1//! Persistent configuration for Codemem.
2//!
3//! Loads/saves a TOML config at `~/.codemem/config.toml`.
4
5use crate::{CodememError, GraphConfig, ScoringWeights, VectorConfig};
6use serde::{Deserialize, Serialize};
7use std::path::{Path, PathBuf};
8
9/// Top-level Codemem configuration.
10#[derive(Debug, Clone, Default, Serialize, Deserialize)]
11#[serde(default)]
12pub struct CodememConfig {
13    pub scoring: ScoringWeights,
14    pub vector: VectorConfig,
15    pub graph: GraphConfig,
16    pub embedding: EmbeddingConfig,
17    pub storage: StorageConfig,
18    pub chunking: ChunkingConfig,
19    pub enrichment: EnrichmentConfig,
20}
21
22impl CodememConfig {
23    /// Load configuration from the given path. Validates after loading.
24    pub fn load(path: &Path) -> Result<Self, CodememError> {
25        let content = std::fs::read_to_string(path)?;
26        let config: Self =
27            toml::from_str(&content).map_err(|e| CodememError::Config(e.to_string()))?;
28        config.validate()?;
29        Ok(config)
30    }
31
32    /// Validate configuration values.
33    ///
34    /// Checks that scoring weights are non-negative, dimensions and cache sizes
35    /// are positive, chunk size bounds are consistent, and dedup threshold is
36    /// in the valid range.
37    pub fn validate(&self) -> Result<(), CodememError> {
38        // M5: Scoring weights must be finite and non-negative.
39        // Check is_finite() first to reject NaN/Inf, then < 0.0 for negatives.
40        let w = &self.scoring;
41        let weights = [
42            w.vector_similarity,
43            w.graph_strength,
44            w.token_overlap,
45            w.temporal,
46            w.tag_matching,
47            w.importance,
48            w.confidence,
49            w.recency,
50        ];
51        if weights.iter().any(|v| !v.is_finite() || *v < 0.0) {
52            return Err(CodememError::Config(
53                "All scoring weights must be finite and non-negative".to_string(),
54            ));
55        }
56
57        // Embedding dimensions must be positive
58        if self.embedding.dimensions == 0 {
59            return Err(CodememError::Config(
60                "Embedding dimensions must be > 0".to_string(),
61            ));
62        }
63
64        // Vector dimensions must be positive
65        if self.vector.dimensions == 0 {
66            return Err(CodememError::Config(
67                "Vector dimensions must be > 0".to_string(),
68            ));
69        }
70
71        // Cache capacity must be positive
72        if self.embedding.cache_capacity == 0 {
73            return Err(CodememError::Config(
74                "Embedding cache capacity must be > 0".to_string(),
75            ));
76        }
77
78        // Batch size must be positive
79        if self.embedding.batch_size == 0 {
80            return Err(CodememError::Config(
81                "Embedding batch size must be > 0".to_string(),
82            ));
83        }
84
85        // Chunk size bounds
86        if self.chunking.min_chunk_size >= self.chunking.max_chunk_size {
87            return Err(CodememError::Config(
88                "min_chunk_size must be less than max_chunk_size".to_string(),
89            ));
90        }
91
92        // Dedup threshold in [0.0, 1.0] (also rejects NaN via range check)
93        if !(0.0..=1.0).contains(&self.enrichment.dedup_similarity_threshold) {
94            return Err(CodememError::Config(
95                "dedup_similarity_threshold must be between 0.0 and 1.0".to_string(),
96            ));
97        }
98
99        // Enrichment confidence in [0.0, 1.0]
100        if !(0.0..=1.0).contains(&self.enrichment.insight_confidence) {
101            return Err(CodememError::Config(
102                "insight_confidence must be between 0.0 and 1.0".to_string(),
103            ));
104        }
105
106        // Chunking score thresholds in [0.0, 1.0]
107        let thresholds = [
108            (
109                self.chunking.min_chunk_score_threshold,
110                "min_chunk_score_threshold",
111            ),
112            (
113                self.chunking.min_symbol_score_threshold,
114                "min_symbol_score_threshold",
115            ),
116        ];
117        for (val, name) in &thresholds {
118            if !(0.0..=1.0).contains(val) {
119                return Err(CodememError::Config(format!(
120                    "{name} must be between 0.0 and 1.0"
121                )));
122            }
123        }
124
125        Ok(())
126    }
127
128    /// Save configuration to the given path. Validates before saving.
129    pub fn save(&self, path: &Path) -> Result<(), CodememError> {
130        // M5: Validate before saving to prevent persisting invalid config.
131        self.validate()?;
132        let content =
133            toml::to_string_pretty(self).map_err(|e| CodememError::Config(e.to_string()))?;
134        if let Some(parent) = path.parent() {
135            std::fs::create_dir_all(parent)?;
136        }
137        std::fs::write(path, content)?;
138        Ok(())
139    }
140
141    /// Load from the default path, or return defaults if the file doesn't exist.
142    pub fn load_or_default() -> Self {
143        let path = Self::default_path();
144        if path.exists() {
145            match Self::load(&path) {
146                Ok(config) => config,
147                Err(e) => {
148                    tracing::warn!("Failed to load config: {e}, using defaults");
149                    CodememConfig::default()
150                }
151            }
152        } else {
153            Self::default()
154        }
155    }
156
157    /// Default config path: `~/.codemem/config.toml`.
158    pub fn default_path() -> PathBuf {
159        dirs::home_dir()
160            .unwrap_or_else(|| PathBuf::from("."))
161            .join(".codemem")
162            .join("config.toml")
163    }
164}
165
166/// Embedding provider configuration.
167#[derive(Debug, Clone, Serialize, Deserialize)]
168#[serde(default)]
169pub struct EmbeddingConfig {
170    /// Provider name: "candle" (default), "ollama", or "openai".
171    pub provider: String,
172    /// Model name (provider-specific).
173    pub model: String,
174    /// API URL for remote providers.
175    pub url: String,
176    /// Embedding dimensions.
177    pub dimensions: usize,
178    /// LRU cache capacity.
179    pub cache_capacity: usize,
180    /// Batch size for embedding forward passes (GPU memory trade-off).
181    pub batch_size: usize,
182}
183
184impl Default for EmbeddingConfig {
185    fn default() -> Self {
186        Self {
187            provider: "candle".to_string(),
188            model: "BAAI/bge-base-en-v1.5".to_string(),
189            url: String::new(),
190            dimensions: 768,
191            cache_capacity: 10_000,
192            batch_size: 16,
193        }
194    }
195}
196
197/// Storage configuration.
198#[derive(Debug, Clone, Serialize, Deserialize)]
199#[serde(default)]
200pub struct StorageConfig {
201    /// SQLite cache size in MB.
202    pub cache_size_mb: u32,
203    /// SQLite busy timeout in seconds.
204    pub busy_timeout_secs: u64,
205}
206
207impl Default for StorageConfig {
208    fn default() -> Self {
209        Self {
210            cache_size_mb: 64,
211            busy_timeout_secs: 5,
212        }
213    }
214}
215
216/// CST-aware code chunking configuration.
217#[derive(Debug, Clone, Serialize, Deserialize)]
218#[serde(default)]
219pub struct ChunkingConfig {
220    /// Whether chunking is enabled during indexing.
221    pub enabled: bool,
222    /// Maximum chunk size in non-whitespace characters.
223    pub max_chunk_size: usize,
224    /// Minimum chunk size in non-whitespace characters.
225    pub min_chunk_size: usize,
226    /// Whether to auto-compact the graph after indexing.
227    pub auto_compact: bool,
228    /// Maximum number of retained chunk graph-nodes per file after compaction.
229    pub max_retained_chunks_per_file: usize,
230    /// Minimum chunk score (0.0–1.0) to survive compaction.
231    pub min_chunk_score_threshold: f64,
232    /// Maximum number of retained symbol graph-nodes per file after compaction.
233    pub max_retained_symbols_per_file: usize,
234    /// Minimum symbol score (0.0–1.0) to survive compaction.
235    pub min_symbol_score_threshold: f64,
236}
237
238impl Default for ChunkingConfig {
239    fn default() -> Self {
240        Self {
241            enabled: true,
242            max_chunk_size: 1500,
243            min_chunk_size: 50,
244            auto_compact: true,
245            max_retained_chunks_per_file: 10,
246            min_chunk_score_threshold: 0.2,
247            max_retained_symbols_per_file: 15,
248            min_symbol_score_threshold: 0.15,
249        }
250    }
251}
252
253/// Enrichment pipeline configuration for controlling insight generation thresholds.
254#[derive(Debug, Clone, Serialize, Deserialize)]
255#[serde(default)]
256pub struct EnrichmentConfig {
257    /// Minimum commit count for a file to generate a high-activity insight.
258    pub git_min_commit_count: usize,
259    /// Minimum co-change count for a file pair to generate a coupling insight.
260    pub git_min_co_change_count: usize,
261    /// Minimum coupling degree for a node to generate a high-coupling insight.
262    pub perf_min_coupling_degree: usize,
263    /// Minimum symbol count for a file to generate a complexity insight.
264    pub perf_min_symbol_count: usize,
265    /// Default confidence for auto-generated insights.
266    pub insight_confidence: f64,
267    /// Cosine similarity threshold for deduplicating insights.
268    pub dedup_similarity_threshold: f64,
269}
270
271impl Default for EnrichmentConfig {
272    fn default() -> Self {
273        Self {
274            git_min_commit_count: 25,
275            git_min_co_change_count: 5,
276            perf_min_coupling_degree: 25,
277            perf_min_symbol_count: 30,
278            insight_confidence: 0.5,
279            dedup_similarity_threshold: 0.90,
280        }
281    }
282}
283
284#[cfg(test)]
285#[path = "tests/config_tests.rs"]
286mod tests;