velesdb_core/
config.rs

1//! `VelesDB` Configuration Module
2//!
3//! Provides configuration file support via `velesdb.toml`, environment variables,
4//! and runtime overrides.
5//!
6//! # Priority (highest to lowest)
7//!
8//! 1. Runtime overrides (API, REPL)
9//! 2. Environment variables (`VELESDB_*`)
10//! 3. Configuration file (`velesdb.toml`)
11//! 4. Default values
12
13use figment::{
14    providers::{Env, Format, Serialized, Toml},
15    Figment,
16};
17use serde::{Deserialize, Serialize};
18use std::path::Path;
19use thiserror::Error;
20
21/// Configuration errors.
22#[derive(Error, Debug)]
23pub enum ConfigError {
24    /// Failed to parse configuration file.
25    #[error("Failed to parse configuration: {0}")]
26    ParseError(String),
27
28    /// Invalid configuration value.
29    #[error("Invalid configuration value for '{key}': {message}")]
30    InvalidValue {
31        /// Configuration key that failed validation.
32        key: String,
33        /// Validation error message.
34        message: String,
35    },
36
37    /// Configuration file not found.
38    #[error("Configuration file not found: {0}")]
39    FileNotFound(String),
40
41    /// IO error.
42    #[error("IO error: {0}")]
43    IoError(#[from] std::io::Error),
44}
45
46/// Search mode presets.
47#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
48#[serde(rename_all = "snake_case")]
49pub enum SearchMode {
50    /// Fast search with `ef_search=64`, ~90% recall.
51    Fast,
52    /// Balanced search with `ef_search=128`, ~98% recall (default).
53    #[default]
54    Balanced,
55    /// Accurate search with `ef_search=256`, ~100% recall.
56    Accurate,
57    /// Perfect recall with bruteforce, 100% guaranteed.
58    Perfect,
59}
60
61impl SearchMode {
62    /// Returns the `ef_search` value for this mode.
63    #[must_use]
64    pub fn ef_search(&self) -> usize {
65        match self {
66            Self::Fast => 64,
67            Self::Balanced => 128,
68            Self::Accurate => 256,
69            Self::Perfect => usize::MAX, // Signals bruteforce
70        }
71    }
72}
73
74/// Search configuration section.
75#[derive(Debug, Clone, Serialize, Deserialize)]
76#[serde(default)]
77pub struct SearchConfig {
78    /// Default search mode.
79    pub default_mode: SearchMode,
80    /// Override `ef_search` (if set, overrides mode).
81    pub ef_search: Option<usize>,
82    /// Maximum results per query.
83    pub max_results: usize,
84    /// Query timeout in milliseconds.
85    pub query_timeout_ms: u64,
86}
87
88impl Default for SearchConfig {
89    fn default() -> Self {
90        Self {
91            default_mode: SearchMode::Balanced,
92            ef_search: None,
93            max_results: 1000,
94            query_timeout_ms: 30000,
95        }
96    }
97}
98
99/// HNSW index configuration section.
100#[derive(Debug, Clone, Default, Serialize, Deserialize)]
101#[serde(default)]
102pub struct HnswConfig {
103    /// Number of connections per node (M parameter).
104    /// `None` = auto based on dimension.
105    pub m: Option<usize>,
106    /// Size of the candidate pool during construction.
107    /// `None` = auto based on dimension.
108    pub ef_construction: Option<usize>,
109    /// Maximum number of layers (0 = auto).
110    pub max_layers: usize,
111}
112
113/// Storage configuration section.
114#[derive(Debug, Clone, Serialize, Deserialize)]
115#[serde(default)]
116pub struct StorageConfig {
117    /// Data directory path.
118    pub data_dir: String,
119    /// Storage mode: "mmap" or "memory".
120    pub storage_mode: String,
121    /// Mmap cache size in megabytes.
122    pub mmap_cache_mb: usize,
123    /// Vector alignment in bytes.
124    pub vector_alignment: usize,
125}
126
127impl Default for StorageConfig {
128    fn default() -> Self {
129        Self {
130            data_dir: "./velesdb_data".to_string(),
131            storage_mode: "mmap".to_string(),
132            mmap_cache_mb: 1024,
133            vector_alignment: 64,
134        }
135    }
136}
137
138/// Limits configuration section.
139#[derive(Debug, Clone, Serialize, Deserialize)]
140#[serde(default)]
141pub struct LimitsConfig {
142    /// Maximum vector dimensions.
143    pub max_dimensions: usize,
144    /// Maximum vectors per collection.
145    pub max_vectors_per_collection: usize,
146    /// Maximum number of collections.
147    pub max_collections: usize,
148    /// Maximum payload size in bytes.
149    pub max_payload_size: usize,
150    /// Maximum vectors for perfect mode (bruteforce).
151    pub max_perfect_mode_vectors: usize,
152}
153
154impl Default for LimitsConfig {
155    fn default() -> Self {
156        Self {
157            max_dimensions: 4096,
158            max_vectors_per_collection: 100_000_000,
159            max_collections: 1000,
160            max_payload_size: 1_048_576, // 1 MB
161            max_perfect_mode_vectors: 500_000,
162        }
163    }
164}
165
166/// Server configuration section.
167#[derive(Debug, Clone, Serialize, Deserialize)]
168#[serde(default)]
169pub struct ServerConfig {
170    /// Host address.
171    pub host: String,
172    /// Port number.
173    pub port: u16,
174    /// Number of worker threads (0 = auto).
175    pub workers: usize,
176    /// Maximum HTTP body size in bytes.
177    pub max_body_size: usize,
178    /// Enable CORS.
179    pub cors_enabled: bool,
180    /// CORS allowed origins.
181    pub cors_origins: Vec<String>,
182}
183
184impl Default for ServerConfig {
185    fn default() -> Self {
186        Self {
187            host: "127.0.0.1".to_string(),
188            port: 8080,
189            workers: 0,                 // Auto
190            max_body_size: 104_857_600, // 100 MB
191            cors_enabled: false,
192            cors_origins: vec!["*".to_string()],
193        }
194    }
195}
196
197/// Logging configuration section.
198#[derive(Debug, Clone, Serialize, Deserialize)]
199#[serde(default)]
200pub struct LoggingConfig {
201    /// Log level: error, warn, info, debug, trace.
202    pub level: String,
203    /// Log format: text or json.
204    pub format: String,
205    /// Log file path (empty = stdout).
206    pub file: String,
207}
208
209impl Default for LoggingConfig {
210    fn default() -> Self {
211        Self {
212            level: "info".to_string(),
213            format: "text".to_string(),
214            file: String::new(),
215        }
216    }
217}
218
219/// Quantization configuration section.
220#[derive(Debug, Clone, Serialize, Deserialize)]
221#[serde(default)]
222pub struct QuantizationConfig {
223    /// Default quantization type: none, sq8, binary.
224    pub default_type: String,
225    /// Enable reranking after quantized search.
226    pub rerank_enabled: bool,
227    /// Reranking multiplier for candidates.
228    pub rerank_multiplier: usize,
229}
230
231impl Default for QuantizationConfig {
232    fn default() -> Self {
233        Self {
234            default_type: "none".to_string(),
235            rerank_enabled: true,
236            rerank_multiplier: 2,
237        }
238    }
239}
240
241/// Main `VelesDB` configuration structure.
242#[derive(Debug, Clone, Serialize, Deserialize, Default)]
243#[serde(default)]
244pub struct VelesConfig {
245    /// Search configuration.
246    pub search: SearchConfig,
247    /// HNSW index configuration.
248    pub hnsw: HnswConfig,
249    /// Storage configuration.
250    pub storage: StorageConfig,
251    /// Limits configuration.
252    pub limits: LimitsConfig,
253    /// Server configuration.
254    pub server: ServerConfig,
255    /// Logging configuration.
256    pub logging: LoggingConfig,
257    /// Quantization configuration.
258    pub quantization: QuantizationConfig,
259}
260
261impl VelesConfig {
262    /// Loads configuration from default sources.
263    ///
264    /// Priority: defaults < file < environment variables.
265    ///
266    /// # Errors
267    ///
268    /// Returns an error if configuration parsing fails.
269    pub fn load() -> Result<Self, ConfigError> {
270        Self::load_from_path("velesdb.toml")
271    }
272
273    /// Loads configuration from a specific file path.
274    ///
275    /// # Arguments
276    ///
277    /// * `path` - Path to the configuration file.
278    ///
279    /// # Errors
280    ///
281    /// Returns an error if configuration parsing fails.
282    pub fn load_from_path<P: AsRef<Path>>(path: P) -> Result<Self, ConfigError> {
283        let figment = Figment::new()
284            .merge(Serialized::defaults(Self::default()))
285            .merge(Toml::file(path.as_ref()))
286            .merge(Env::prefixed("VELESDB_").split("_").lowercase(false));
287
288        figment
289            .extract()
290            .map_err(|e| ConfigError::ParseError(e.to_string()))
291    }
292
293    /// Creates a configuration from a TOML string.
294    ///
295    /// # Arguments
296    ///
297    /// * `toml_str` - TOML configuration string.
298    ///
299    /// # Errors
300    ///
301    /// Returns an error if parsing fails.
302    pub fn from_toml(toml_str: &str) -> Result<Self, ConfigError> {
303        let figment = Figment::new()
304            .merge(Serialized::defaults(Self::default()))
305            .merge(Toml::string(toml_str));
306
307        figment
308            .extract()
309            .map_err(|e| ConfigError::ParseError(e.to_string()))
310    }
311
312    /// Validates the configuration.
313    ///
314    /// # Errors
315    ///
316    /// Returns an error if any configuration value is invalid.
317    pub fn validate(&self) -> Result<(), ConfigError> {
318        // Validate search config
319        if let Some(ef) = self.search.ef_search {
320            if !(16..=4096).contains(&ef) {
321                return Err(ConfigError::InvalidValue {
322                    key: "search.ef_search".to_string(),
323                    message: format!("value {ef} is out of range [16, 4096]"),
324                });
325            }
326        }
327
328        if self.search.max_results == 0 || self.search.max_results > 10000 {
329            return Err(ConfigError::InvalidValue {
330                key: "search.max_results".to_string(),
331                message: format!(
332                    "value {} is out of range [1, 10000]",
333                    self.search.max_results
334                ),
335            });
336        }
337
338        // Validate HNSW config
339        if let Some(m) = self.hnsw.m {
340            if !(4..=128).contains(&m) {
341                return Err(ConfigError::InvalidValue {
342                    key: "hnsw.m".to_string(),
343                    message: format!("value {m} is out of range [4, 128]"),
344                });
345            }
346        }
347
348        if let Some(ef) = self.hnsw.ef_construction {
349            if !(100..=2000).contains(&ef) {
350                return Err(ConfigError::InvalidValue {
351                    key: "hnsw.ef_construction".to_string(),
352                    message: format!("value {ef} is out of range [100, 2000]"),
353                });
354            }
355        }
356
357        // Validate limits
358        if self.limits.max_dimensions == 0 || self.limits.max_dimensions > 65536 {
359            return Err(ConfigError::InvalidValue {
360                key: "limits.max_dimensions".to_string(),
361                message: format!(
362                    "value {} is out of range [1, 65536]",
363                    self.limits.max_dimensions
364                ),
365            });
366        }
367
368        // Validate server config
369        if self.server.port < 1024 {
370            return Err(ConfigError::InvalidValue {
371                key: "server.port".to_string(),
372                message: format!("value {} must be >= 1024", self.server.port),
373            });
374        }
375
376        // Validate storage mode
377        let valid_modes = ["mmap", "memory"];
378        if !valid_modes.contains(&self.storage.storage_mode.as_str()) {
379            return Err(ConfigError::InvalidValue {
380                key: "storage.storage_mode".to_string(),
381                message: format!(
382                    "value '{}' is invalid, expected one of: {:?}",
383                    self.storage.storage_mode, valid_modes
384                ),
385            });
386        }
387
388        // Validate logging level
389        let valid_levels = ["error", "warn", "info", "debug", "trace"];
390        if !valid_levels.contains(&self.logging.level.as_str()) {
391            return Err(ConfigError::InvalidValue {
392                key: "logging.level".to_string(),
393                message: format!(
394                    "value '{}' is invalid, expected one of: {:?}",
395                    self.logging.level, valid_levels
396                ),
397            });
398        }
399
400        Ok(())
401    }
402
403    /// Returns the effective `ef_search` value.
404    ///
405    /// Uses explicit `ef_search` if set, otherwise derives from search mode.
406    #[must_use]
407    pub fn effective_ef_search(&self) -> usize {
408        self.search
409            .ef_search
410            .unwrap_or_else(|| self.search.default_mode.ef_search())
411    }
412
413    /// Serializes the configuration to TOML.
414    ///
415    /// # Errors
416    ///
417    /// Returns an error if serialization fails.
418    pub fn to_toml(&self) -> Result<String, ConfigError> {
419        toml::to_string_pretty(self).map_err(|e| ConfigError::ParseError(e.to_string()))
420    }
421}