Skip to main content

velesdb_core/
config.rs

1//! `VelesDB` Configuration Module
2//!
3//! Provides configuration file support via `velesdb.toml`, environment variables,
4//! and runtime overrides.
5//!
6//! # Priority (highest to lowest)
7//!
8//! 1. Runtime overrides (API, REPL)
9//! 2. Environment variables (`VELESDB_*`)
10//! 3. Configuration file (`velesdb.toml`)
11//! 4. Default values
12
13use figment::{
14    providers::{Env, Format, Serialized, Toml},
15    Figment,
16};
17use serde::{Deserialize, Serialize};
18use std::path::Path;
19use thiserror::Error;
20
21/// Configuration errors.
22#[derive(Error, Debug)]
23pub enum ConfigError {
24    /// Failed to parse configuration file.
25    #[error("Failed to parse configuration: {0}")]
26    ParseError(String),
27
28    /// Invalid configuration value.
29    #[error("Invalid configuration value for '{key}': {message}")]
30    InvalidValue {
31        /// Configuration key that failed validation.
32        key: String,
33        /// Validation error message.
34        message: String,
35    },
36
37    /// Configuration file not found.
38    #[error("Configuration file not found: {0}")]
39    FileNotFound(String),
40
41    /// IO error.
42    #[error("IO error: {0}")]
43    IoError(#[from] std::io::Error),
44}
45
46/// Search mode presets.
47#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
48#[serde(rename_all = "snake_case")]
49pub enum SearchMode {
50    /// Fast search with `ef_search=64`, ~90% recall.
51    Fast,
52    /// Balanced search with `ef_search=128`, ~98% recall (default).
53    #[default]
54    Balanced,
55    /// Accurate search with `ef_search=256`, ~100% recall.
56    Accurate,
57    /// Perfect recall with bruteforce, 100% guaranteed.
58    Perfect,
59}
60
61impl SearchMode {
62    /// Returns the `ef_search` value for this mode.
63    #[must_use]
64    pub fn ef_search(&self) -> usize {
65        match self {
66            Self::Fast => 64,
67            Self::Balanced => 128,
68            Self::Accurate => 256,
69            Self::Perfect => usize::MAX, // Signals bruteforce
70        }
71    }
72}
73
74/// Search configuration section.
75#[derive(Debug, Clone, Serialize, Deserialize)]
76#[serde(default)]
77pub struct SearchConfig {
78    /// Default search mode.
79    pub default_mode: SearchMode,
80    /// Override `ef_search` (if set, overrides mode).
81    pub ef_search: Option<usize>,
82    /// Maximum results per query.
83    pub max_results: usize,
84    /// Query timeout in milliseconds.
85    pub query_timeout_ms: u64,
86}
87
88impl Default for SearchConfig {
89    fn default() -> Self {
90        Self {
91            default_mode: SearchMode::Balanced,
92            ef_search: None,
93            max_results: 1000,
94            query_timeout_ms: 30000,
95        }
96    }
97}
98
99/// HNSW index configuration section.
100#[derive(Debug, Clone, Default, Serialize, Deserialize)]
101#[serde(default)]
102pub struct HnswConfig {
103    /// Number of connections per node (M parameter).
104    /// `None` = auto based on dimension.
105    pub m: Option<usize>,
106    /// Size of the candidate pool during construction.
107    /// `None` = auto based on dimension.
108    pub ef_construction: Option<usize>,
109    /// Maximum number of layers (0 = auto).
110    pub max_layers: usize,
111}
112
113/// Storage configuration section.
114#[derive(Debug, Clone, Serialize, Deserialize)]
115#[serde(default)]
116pub struct StorageConfig {
117    /// Data directory path.
118    pub data_dir: String,
119    /// Storage mode: "mmap" or "memory".
120    pub storage_mode: String,
121    /// Mmap cache size in megabytes.
122    pub mmap_cache_mb: usize,
123    /// Vector alignment in bytes.
124    pub vector_alignment: usize,
125}
126
127impl Default for StorageConfig {
128    fn default() -> Self {
129        Self {
130            data_dir: "./velesdb_data".to_string(),
131            storage_mode: "mmap".to_string(),
132            mmap_cache_mb: 1024,
133            vector_alignment: 64,
134        }
135    }
136}
137
138/// Limits configuration section.
139#[derive(Debug, Clone, Serialize, Deserialize)]
140#[serde(default)]
141pub struct LimitsConfig {
142    /// Maximum vector dimensions.
143    pub max_dimensions: usize,
144    /// Maximum vectors per collection.
145    pub max_vectors_per_collection: usize,
146    /// Maximum number of collections.
147    pub max_collections: usize,
148    /// Maximum payload size in bytes.
149    pub max_payload_size: usize,
150    /// Maximum vectors for perfect mode (bruteforce).
151    pub max_perfect_mode_vectors: usize,
152}
153
154impl Default for LimitsConfig {
155    fn default() -> Self {
156        Self {
157            max_dimensions: 4096,
158            max_vectors_per_collection: 100_000_000,
159            max_collections: 1000,
160            max_payload_size: 1_048_576, // 1 MB
161            max_perfect_mode_vectors: 500_000,
162        }
163    }
164}
165
166/// Server configuration section.
167#[derive(Debug, Clone, Serialize, Deserialize)]
168#[serde(default)]
169pub struct ServerConfig {
170    /// Host address.
171    pub host: String,
172    /// Port number.
173    pub port: u16,
174    /// Number of worker threads (0 = auto).
175    pub workers: usize,
176    /// Maximum HTTP body size in bytes.
177    pub max_body_size: usize,
178    /// Enable CORS.
179    pub cors_enabled: bool,
180    /// CORS allowed origins.
181    pub cors_origins: Vec<String>,
182}
183
184impl Default for ServerConfig {
185    fn default() -> Self {
186        Self {
187            host: "127.0.0.1".to_string(),
188            port: 8080,
189            workers: 0,                 // Auto
190            max_body_size: 104_857_600, // 100 MB
191            cors_enabled: false,
192            cors_origins: vec!["*".to_string()],
193        }
194    }
195}
196
197/// Logging configuration section.
198#[derive(Debug, Clone, Serialize, Deserialize)]
199#[serde(default)]
200pub struct LoggingConfig {
201    /// Log level: error, warn, info, debug, trace.
202    pub level: String,
203    /// Log format: text or json.
204    pub format: String,
205    /// Log file path (empty = stdout).
206    pub file: String,
207}
208
209impl Default for LoggingConfig {
210    fn default() -> Self {
211        Self {
212            level: "info".to_string(),
213            format: "text".to_string(),
214            file: String::new(),
215        }
216    }
217}
218
219/// Quantization configuration section (EPIC-073/US-005).
220#[derive(Debug, Clone, Serialize, Deserialize)]
221#[serde(default)]
222pub struct QuantizationConfig {
223    /// Default quantization type: none, sq8, binary.
224    pub default_type: String,
225    /// Enable reranking after quantized search.
226    pub rerank_enabled: bool,
227    /// Reranking multiplier for candidates.
228    pub rerank_multiplier: usize,
229    /// Auto-enable quantization for large collections (EPIC-073/US-005).
230    pub auto_quantization: bool,
231    /// Threshold for auto-quantization (number of vectors).
232    pub auto_quantization_threshold: usize,
233}
234
235impl Default for QuantizationConfig {
236    fn default() -> Self {
237        Self {
238            default_type: "none".to_string(),
239            rerank_enabled: true,
240            rerank_multiplier: 2,
241            auto_quantization: true,
242            auto_quantization_threshold: 10_000,
243        }
244    }
245}
246
247impl QuantizationConfig {
248    /// Returns whether quantization should be used based on vector count (EPIC-073/US-005).
249    #[must_use]
250    pub fn should_quantize(&self, vector_count: usize) -> bool {
251        self.auto_quantization && vector_count >= self.auto_quantization_threshold
252    }
253}
254
255/// Main `VelesDB` configuration structure.
256#[derive(Debug, Clone, Serialize, Deserialize, Default)]
257#[serde(default)]
258pub struct VelesConfig {
259    /// Search configuration.
260    pub search: SearchConfig,
261    /// HNSW index configuration.
262    pub hnsw: HnswConfig,
263    /// Storage configuration.
264    pub storage: StorageConfig,
265    /// Limits configuration.
266    pub limits: LimitsConfig,
267    /// Server configuration.
268    pub server: ServerConfig,
269    /// Logging configuration.
270    pub logging: LoggingConfig,
271    /// Quantization configuration.
272    pub quantization: QuantizationConfig,
273}
274
275impl VelesConfig {
276    /// Loads configuration from default sources.
277    ///
278    /// Priority: defaults < file < environment variables.
279    ///
280    /// # Errors
281    ///
282    /// Returns an error if configuration parsing fails.
283    pub fn load() -> Result<Self, ConfigError> {
284        Self::load_from_path("velesdb.toml")
285    }
286
287    /// Loads configuration from a specific file path.
288    ///
289    /// # Arguments
290    ///
291    /// * `path` - Path to the configuration file.
292    ///
293    /// # Errors
294    ///
295    /// Returns an error if configuration parsing fails.
296    pub fn load_from_path<P: AsRef<Path>>(path: P) -> Result<Self, ConfigError> {
297        let figment = Figment::new()
298            .merge(Serialized::defaults(Self::default()))
299            .merge(Toml::file(path.as_ref()))
300            .merge(Env::prefixed("VELESDB_").split("_").lowercase(false));
301
302        figment
303            .extract()
304            .map_err(|e| ConfigError::ParseError(e.to_string()))
305    }
306
307    /// Creates a configuration from a TOML string.
308    ///
309    /// # Arguments
310    ///
311    /// * `toml_str` - TOML configuration string.
312    ///
313    /// # Errors
314    ///
315    /// Returns an error if parsing fails.
316    pub fn from_toml(toml_str: &str) -> Result<Self, ConfigError> {
317        let figment = Figment::new()
318            .merge(Serialized::defaults(Self::default()))
319            .merge(Toml::string(toml_str));
320
321        figment
322            .extract()
323            .map_err(|e| ConfigError::ParseError(e.to_string()))
324    }
325
326    /// Validates the configuration.
327    ///
328    /// # Errors
329    ///
330    /// Returns an error if any configuration value is invalid.
331    pub fn validate(&self) -> Result<(), ConfigError> {
332        // Validate search config
333        if let Some(ef) = self.search.ef_search {
334            if !(16..=4096).contains(&ef) {
335                return Err(ConfigError::InvalidValue {
336                    key: "search.ef_search".to_string(),
337                    message: format!("value {ef} is out of range [16, 4096]"),
338                });
339            }
340        }
341
342        if self.search.max_results == 0 || self.search.max_results > 10000 {
343            return Err(ConfigError::InvalidValue {
344                key: "search.max_results".to_string(),
345                message: format!(
346                    "value {} is out of range [1, 10000]",
347                    self.search.max_results
348                ),
349            });
350        }
351
352        // Validate HNSW config
353        if let Some(m) = self.hnsw.m {
354            if !(4..=128).contains(&m) {
355                return Err(ConfigError::InvalidValue {
356                    key: "hnsw.m".to_string(),
357                    message: format!("value {m} is out of range [4, 128]"),
358                });
359            }
360        }
361
362        if let Some(ef) = self.hnsw.ef_construction {
363            if !(100..=2000).contains(&ef) {
364                return Err(ConfigError::InvalidValue {
365                    key: "hnsw.ef_construction".to_string(),
366                    message: format!("value {ef} is out of range [100, 2000]"),
367                });
368            }
369        }
370
371        // Validate limits
372        if self.limits.max_dimensions == 0 || self.limits.max_dimensions > 65536 {
373            return Err(ConfigError::InvalidValue {
374                key: "limits.max_dimensions".to_string(),
375                message: format!(
376                    "value {} is out of range [1, 65536]",
377                    self.limits.max_dimensions
378                ),
379            });
380        }
381
382        // Validate server config
383        if self.server.port < 1024 {
384            return Err(ConfigError::InvalidValue {
385                key: "server.port".to_string(),
386                message: format!("value {} must be >= 1024", self.server.port),
387            });
388        }
389
390        // Validate storage mode
391        let valid_modes = ["mmap", "memory"];
392        if !valid_modes.contains(&self.storage.storage_mode.as_str()) {
393            return Err(ConfigError::InvalidValue {
394                key: "storage.storage_mode".to_string(),
395                message: format!(
396                    "value '{}' is invalid, expected one of: {:?}",
397                    self.storage.storage_mode, valid_modes
398                ),
399            });
400        }
401
402        // Validate logging level
403        let valid_levels = ["error", "warn", "info", "debug", "trace"];
404        if !valid_levels.contains(&self.logging.level.as_str()) {
405            return Err(ConfigError::InvalidValue {
406                key: "logging.level".to_string(),
407                message: format!(
408                    "value '{}' is invalid, expected one of: {:?}",
409                    self.logging.level, valid_levels
410                ),
411            });
412        }
413
414        Ok(())
415    }
416
417    /// Returns the effective `ef_search` value.
418    ///
419    /// Uses explicit `ef_search` if set, otherwise derives from search mode.
420    #[must_use]
421    pub fn effective_ef_search(&self) -> usize {
422        self.search
423            .ef_search
424            .unwrap_or_else(|| self.search.default_mode.ef_search())
425    }
426
427    /// Serializes the configuration to TOML.
428    ///
429    /// # Errors
430    ///
431    /// Returns an error if serialization fails.
432    pub fn to_toml(&self) -> Result<String, ConfigError> {
433        toml::to_string_pretty(self).map_err(|e| ConfigError::ParseError(e.to_string()))
434    }
435}