Skip to main content

seekr_code/
config.rs

1//! Configuration management for Seekr.
2//!
3//! Loads configuration from `~/.seekr/config.toml` with sensible defaults.
4//! CLI arguments can override config file values.
5
6use crate::error::ConfigError;
7use serde::{Deserialize, Serialize};
8use std::path::{Path, PathBuf};
9
10/// Default server port for HTTP API.
11const DEFAULT_PORT: u16 = 7720;
12
13/// Default batch size for embedding computation.
14const DEFAULT_BATCH_SIZE: usize = 32;
15
16/// Default RRF fusion parameter k.
17const DEFAULT_RRF_K: u32 = 60;
18
19/// Default maximum file size for indexing (10 MB).
20const DEFAULT_MAX_FILE_SIZE: u64 = 10 * 1024 * 1024;
21
22/// Default number of context lines for text search results.
23const DEFAULT_CONTEXT_LINES: usize = 2;
24
25/// Default top-k results for semantic search.
26const DEFAULT_TOP_K: usize = 20;
27
28/// Main configuration structure for Seekr.
29#[derive(Debug, Clone, Serialize, Deserialize)]
30#[serde(default)]
31pub struct SeekrConfig {
32    /// Directory for storing index files.
33    /// Default: `~/.seekr/indexes/`
34    pub index_dir: PathBuf,
35
36    /// Directory for storing downloaded ONNX models.
37    /// Default: `~/.seekr/models/`
38    pub model_dir: PathBuf,
39
40    /// Embedding model name.
41    /// Default: "all-MiniLM-L6-v2"
42    pub embed_model: String,
43
44    /// File glob patterns to exclude from scanning.
45    pub exclude_patterns: Vec<String>,
46
47    /// Maximum file size (in bytes) to index.
48    pub max_file_size: u64,
49
50    /// Server configuration.
51    pub server: ServerConfig,
52
53    /// Search configuration.
54    pub search: SearchConfig,
55
56    /// Embedding configuration.
57    pub embedding: EmbeddingConfig,
58}
59
60/// Server-specific configuration.
61#[derive(Debug, Clone, Serialize, Deserialize)]
62#[serde(default)]
63pub struct ServerConfig {
64    /// Host address to bind. Default: "127.0.0.1"
65    pub host: String,
66
67    /// Port number. Default: 7720
68    pub port: u16,
69}
70
71/// Search-specific configuration.
72#[derive(Debug, Clone, Serialize, Deserialize)]
73#[serde(default)]
74pub struct SearchConfig {
75    /// Number of context lines for text search results.
76    pub context_lines: usize,
77
78    /// Top-k results for semantic search.
79    pub top_k: usize,
80
81    /// RRF fusion parameter k.
82    pub rrf_k: u32,
83
84    /// Minimum score threshold for semantic search results (0.0 - 1.0).
85    pub score_threshold: f32,
86}
87
88/// Embedding-specific configuration.
89#[derive(Debug, Clone, Serialize, Deserialize)]
90#[serde(default)]
91pub struct EmbeddingConfig {
92    /// Batch size for embedding computation.
93    pub batch_size: usize,
94}
95
96impl Default for SeekrConfig {
97    fn default() -> Self {
98        let seekr_dir = default_seekr_dir();
99        Self {
100            index_dir: seekr_dir.join("indexes"),
101            model_dir: seekr_dir.join("models"),
102            embed_model: "all-MiniLM-L6-v2".to_string(),
103            exclude_patterns: vec![
104                "*.min.js".to_string(),
105                "*.min.css".to_string(),
106                "*.lock".to_string(),
107                "package-lock.json".to_string(),
108                "yarn.lock".to_string(),
109            ],
110            max_file_size: DEFAULT_MAX_FILE_SIZE,
111            server: ServerConfig::default(),
112            search: SearchConfig::default(),
113            embedding: EmbeddingConfig::default(),
114        }
115    }
116}
117
118impl Default for ServerConfig {
119    fn default() -> Self {
120        Self {
121            host: "127.0.0.1".to_string(),
122            port: DEFAULT_PORT,
123        }
124    }
125}
126
127impl Default for SearchConfig {
128    fn default() -> Self {
129        Self {
130            context_lines: DEFAULT_CONTEXT_LINES,
131            top_k: DEFAULT_TOP_K,
132            rrf_k: DEFAULT_RRF_K,
133            score_threshold: 0.0,
134        }
135    }
136}
137
138impl Default for EmbeddingConfig {
139    fn default() -> Self {
140        Self {
141            batch_size: DEFAULT_BATCH_SIZE,
142        }
143    }
144}
145
146impl SeekrConfig {
147    /// Load configuration from the default config file path.
148    ///
149    /// If the config file does not exist, returns default configuration
150    /// and creates the default config file.
151    pub fn load() -> std::result::Result<Self, ConfigError> {
152        let config_path = default_config_path();
153        Self::load_from(&config_path)
154    }
155
156    /// Load configuration from a specific file path.
157    pub fn load_from(path: &Path) -> std::result::Result<Self, ConfigError> {
158        if !path.exists() {
159            let config = Self::default();
160            // Attempt to create default config file, but don't fail if we can't
161            if let Err(e) = config.save_to(path) {
162                tracing::warn!(
163                    "Could not write default config to {}: {}",
164                    path.display(),
165                    e
166                );
167            }
168            return Ok(config);
169        }
170
171        let content = std::fs::read_to_string(path)?;
172        let config: SeekrConfig =
173            toml::from_str(&content).map_err(|e| ConfigError::ParseError(e.to_string()))?;
174        Ok(config)
175    }
176
177    /// Save configuration to a specific file path.
178    pub fn save_to(&self, path: &Path) -> std::result::Result<(), ConfigError> {
179        if let Some(parent) = path.parent() {
180            std::fs::create_dir_all(parent)?;
181        }
182        let content =
183            toml::to_string_pretty(self).map_err(|e| ConfigError::ParseError(e.to_string()))?;
184        std::fs::write(path, content)?;
185        Ok(())
186    }
187
188    /// Get the index directory for a specific project path.
189    ///
190    /// Each project gets its own isolated index directory based on
191    /// a blake3 hash of the canonical project path.
192    pub fn project_index_dir(&self, project_path: &Path) -> PathBuf {
193        let canonical = project_path
194            .canonicalize()
195            .unwrap_or_else(|_| project_path.to_path_buf());
196        let hash = blake3::hash(canonical.to_string_lossy().as_bytes());
197        // Use first 16 hex chars for readability
198        let hex = hash.to_hex();
199        let short_hash = &hex.as_str()[..16];
200        self.index_dir.join(short_hash)
201    }
202}
203
204/// Get the default Seekr data directory (`~/.seekr/`).
205fn default_seekr_dir() -> PathBuf {
206    dirs::home_dir()
207        .unwrap_or_else(|| PathBuf::from("."))
208        .join(".seekr")
209}
210
211/// Get the default config file path (`~/.seekr/config.toml`).
212pub fn default_config_path() -> PathBuf {
213    default_seekr_dir().join("config.toml")
214}
215
216#[cfg(test)]
217mod tests {
218    use super::*;
219
220    #[test]
221    fn test_default_config() {
222        let config = SeekrConfig::default();
223        assert_eq!(config.server.port, 7720);
224        assert_eq!(config.embed_model, "all-MiniLM-L6-v2");
225        assert_eq!(config.embedding.batch_size, 32);
226        assert_eq!(config.search.rrf_k, 60);
227    }
228
229    #[test]
230    fn test_project_index_dir_isolation() {
231        let config = SeekrConfig::default();
232        let dir_a = config.project_index_dir(Path::new("/home/user/project-a"));
233        let dir_b = config.project_index_dir(Path::new("/home/user/project-b"));
234        assert_ne!(
235            dir_a, dir_b,
236            "Different projects should have different index dirs"
237        );
238    }
239
240    #[test]
241    fn test_load_nonexistent_returns_default() {
242        let config = SeekrConfig::load_from(Path::new("/nonexistent/path/config.toml")).unwrap();
243        assert_eq!(config.server.port, DEFAULT_PORT);
244    }
245}