Skip to main content

velesdb_core/
config.rs

1//! `VelesDB` Configuration Module
2//!
3//! Provides configuration file support via `velesdb.toml`, environment variables,
4//! and runtime overrides.
5//!
6//! # Priority (highest to lowest)
7//!
8//! 1. Runtime overrides (API, REPL)
9//! 2. Environment variables (`VELESDB_*`)
10//! 3. Configuration file (`velesdb.toml`)
11//! 4. Default values
12
13use figment::{
14    providers::{Env, Format, Serialized, Toml},
15    Figment,
16};
17use serde::{Deserialize, Serialize};
18use std::path::Path;
19use thiserror::Error;
20
21// Re-export quantization types so existing `crate::config::Quantization*` paths work.
22pub use crate::config_quantization::{QuantizationConfig, QuantizationType};
23
24/// Configuration errors.
25#[derive(Error, Debug)]
26#[non_exhaustive]
27pub enum ConfigError {
28    /// Failed to parse configuration file.
29    #[error("Failed to parse configuration: {0}")]
30    ParseError(String),
31
32    /// Invalid configuration value.
33    #[error("Invalid configuration value for '{key}': {message}")]
34    InvalidValue {
35        /// Configuration key that failed validation.
36        key: String,
37        /// Validation error message.
38        message: String,
39    },
40
41    /// Configuration file not found.
42    #[error("Configuration file not found: {0}")]
43    FileNotFound(String),
44
45    /// IO error.
46    #[error("IO error: {0}")]
47    IoError(#[from] std::io::Error),
48}
49
50/// Search mode presets.
51#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
52#[serde(rename_all = "snake_case")]
53#[non_exhaustive]
54pub enum SearchMode {
55    /// Fast search with `ef_search=96`, ~95% recall.
56    Fast,
57    /// Balanced search with `ef_search=160`, ~99.5% recall (default).
58    #[default]
59    Balanced,
60    /// Accurate search with `ef_search=512`, ~100% recall.
61    Accurate,
62    /// Perfect recall with bruteforce, 100% guaranteed.
63    Perfect,
64}
65
66impl SearchMode {
67    /// Returns the `ef_search` value for this mode.
68    #[must_use]
69    pub fn ef_search(&self) -> usize {
70        match self {
71            Self::Fast => 96,
72            Self::Balanced => 160,
73            Self::Accurate => 512,
74            Self::Perfect => usize::MAX, // Signals bruteforce
75        }
76    }
77}
78
79/// Search configuration section.
80#[derive(Debug, Clone, Serialize, Deserialize)]
81#[serde(default)]
82pub struct SearchConfig {
83    /// Default search mode.
84    pub default_mode: SearchMode,
85    /// Override `ef_search` (if set, overrides mode).
86    pub ef_search: Option<usize>,
87    /// Maximum results per query.
88    pub max_results: usize,
89    /// Query timeout in milliseconds.
90    pub query_timeout_ms: u64,
91}
92
93impl Default for SearchConfig {
94    fn default() -> Self {
95        Self {
96            default_mode: SearchMode::Balanced,
97            ef_search: None,
98            max_results: 1000,
99            query_timeout_ms: 30000,
100        }
101    }
102}
103
104/// HNSW index configuration section.
105#[derive(Debug, Clone, Default, Serialize, Deserialize)]
106#[serde(default)]
107pub struct HnswConfig {
108    /// Number of connections per node (M parameter).
109    /// `None` = auto based on dimension.
110    pub m: Option<usize>,
111    /// Size of the candidate pool during construction.
112    /// `None` = auto based on dimension.
113    pub ef_construction: Option<usize>,
114    /// Maximum number of layers (0 = auto).
115    pub max_layers: usize,
116}
117
118/// Server-layer configuration types (HTTP transport, logging, storage paths).
119///
120/// These types are intentionally separated from the core engine configuration
121/// (`SearchConfig`, `HnswConfig`, `LimitsConfig`) to enforce layer boundaries.
122/// Import via `config::server::ServerConfig` or use the crate-root re-exports.
123pub mod server {
124    use serde::{Deserialize, Serialize};
125
126    /// Storage configuration section.
127    #[derive(Debug, Clone, Serialize, Deserialize)]
128    #[serde(default)]
129    pub struct StorageConfig {
130        /// Data directory path.
131        pub data_dir: String,
132        /// Storage mode: `"mmap"` or `"memory"`.
133        pub storage_mode: String,
134        /// Mmap cache size in megabytes.
135        pub mmap_cache_mb: usize,
136        /// Vector alignment in bytes.
137        pub vector_alignment: usize,
138    }
139
140    impl Default for StorageConfig {
141        fn default() -> Self {
142            Self {
143                data_dir: "./velesdb_data".to_string(),
144                storage_mode: "mmap".to_string(),
145                mmap_cache_mb: 1024,
146                vector_alignment: 64,
147            }
148        }
149    }
150
151    /// Server configuration section.
152    #[derive(Debug, Clone, Serialize, Deserialize)]
153    #[serde(default)]
154    pub struct ServerConfig {
155        /// Host address.
156        pub host: String,
157        /// Port number.
158        pub port: u16,
159        /// Number of worker threads (0 = auto).
160        pub workers: usize,
161        /// Maximum HTTP body size in bytes.
162        pub max_body_size: usize,
163        /// Enable CORS.
164        pub cors_enabled: bool,
165        /// CORS allowed origins.
166        pub cors_origins: Vec<String>,
167    }
168
169    impl Default for ServerConfig {
170        fn default() -> Self {
171            Self {
172                host: "127.0.0.1".to_string(),
173                port: 8080,
174                workers: 0,
175                max_body_size: 104_857_600,
176                cors_enabled: false,
177                cors_origins: vec!["*".to_string()],
178            }
179        }
180    }
181
182    /// Logging configuration section.
183    #[derive(Debug, Clone, Serialize, Deserialize)]
184    #[serde(default)]
185    pub struct LoggingConfig {
186        /// Log level: `error`, `warn`, `info`, `debug`, `trace`.
187        pub level: String,
188        /// Log format: `text` or `json`.
189        pub format: String,
190        /// Log file path (empty = stdout).
191        pub file: String,
192    }
193
194    impl Default for LoggingConfig {
195        fn default() -> Self {
196            Self {
197                level: "info".to_string(),
198                format: "text".to_string(),
199                file: String::new(),
200            }
201        }
202    }
203}
204
205// Backward-compatible re-exports at module level.
206pub use server::{LoggingConfig, ServerConfig, StorageConfig};
207
208/// Limits configuration section.
209#[derive(Debug, Clone, Serialize, Deserialize)]
210#[serde(default)]
211pub struct LimitsConfig {
212    /// Maximum vector dimensions.
213    pub max_dimensions: usize,
214    /// Maximum vectors per collection.
215    pub max_vectors_per_collection: usize,
216    /// Maximum number of collections.
217    pub max_collections: usize,
218    /// Maximum payload size in bytes.
219    pub max_payload_size: usize,
220    /// Maximum vectors for perfect mode (bruteforce).
221    pub max_perfect_mode_vectors: usize,
222}
223
224impl Default for LimitsConfig {
225    fn default() -> Self {
226        Self {
227            max_dimensions: 4096,
228            max_vectors_per_collection: 100_000_000,
229            max_collections: 1000,
230            max_payload_size: 1_048_576, // 1 MB
231            max_perfect_mode_vectors: 500_000,
232        }
233    }
234}
235
236// ---------------------------------------------------------------------------
237// WAL batch commit configuration
238// ---------------------------------------------------------------------------
239
240/// Default commit delay in microseconds for WAL group commit.
241const fn default_commit_delay_us() -> u64 {
242    100
243}
244
245/// Default maximum entries per WAL batch.
246const fn default_max_batch_size() -> usize {
247    128
248}
249
250/// Configuration for WAL group commit batching.
251///
252/// When enabled, multiple concurrent writes are batched into a single
253/// `sync_all()` call, amortizing the fsync cost across the batch.
254///
255/// # Example (TOML)
256///
257/// ```toml
258/// [wal_batch]
259/// enabled = true
260/// commit_delay_us = 200
261/// max_batch_size = 256
262/// ```
263#[derive(Debug, Clone, Serialize, Deserialize)]
264pub struct WalBatchConfig {
265    /// Whether group commit is enabled. Default: `false`.
266    #[serde(default)]
267    pub enabled: bool,
268    /// Maximum delay in microseconds before flushing a batch. Default: `100`.
269    #[serde(default = "default_commit_delay_us")]
270    pub commit_delay_us: u64,
271    /// Maximum number of entries per batch. Default: `128`.
272    #[serde(default = "default_max_batch_size")]
273    pub max_batch_size: usize,
274}
275
276impl Default for WalBatchConfig {
277    fn default() -> Self {
278        Self {
279            enabled: false,
280            commit_delay_us: 100,
281            max_batch_size: 128,
282        }
283    }
284}
285
286/// Main `VelesDB` configuration structure.
287#[derive(Debug, Clone, Serialize, Deserialize, Default)]
288#[serde(default)]
289pub struct VelesConfig {
290    /// Search configuration.
291    pub search: SearchConfig,
292    /// HNSW index configuration.
293    pub hnsw: HnswConfig,
294    /// Storage configuration.
295    pub storage: StorageConfig,
296    /// Limits configuration.
297    pub limits: LimitsConfig,
298    /// Server configuration.
299    pub server: ServerConfig,
300    /// Logging configuration.
301    pub logging: LoggingConfig,
302    /// Quantization configuration.
303    pub quantization: QuantizationConfig,
304    /// WAL group commit batching configuration.
305    pub wal_batch: WalBatchConfig,
306}
307
308impl VelesConfig {
309    /// Loads configuration from default sources.
310    ///
311    /// Priority: defaults < file < environment variables.
312    ///
313    /// # Errors
314    ///
315    /// Returns `ConfigError` if the configuration file is malformed or
316    /// environment variables contain invalid values.
317    pub fn load() -> Result<Self, ConfigError> {
318        Self::load_from_path("velesdb.toml")
319    }
320
321    /// Loads configuration from a specific file path.
322    ///
323    /// # Arguments
324    ///
325    /// * `path` - Path to the configuration file.
326    ///
327    /// # Errors
328    ///
329    /// Returns an error if configuration parsing fails.
330    pub fn load_from_path<P: AsRef<Path>>(path: P) -> Result<Self, ConfigError> {
331        let figment = Figment::new()
332            .merge(Serialized::defaults(Self::default()))
333            .merge(Toml::file(path.as_ref()))
334            .merge(Env::prefixed("VELESDB_").split("_").lowercase(false));
335
336        figment
337            .extract()
338            .map_err(|e| ConfigError::ParseError(e.to_string()))
339    }
340
341    /// Creates a configuration from a TOML string.
342    ///
343    /// # Arguments
344    ///
345    /// * `toml_str` - TOML configuration string.
346    ///
347    /// # Errors
348    ///
349    /// Returns an error if parsing fails.
350    pub fn from_toml(toml_str: &str) -> Result<Self, ConfigError> {
351        let figment = Figment::new()
352            .merge(Serialized::defaults(Self::default()))
353            .merge(Toml::string(toml_str));
354
355        figment
356            .extract()
357            .map_err(|e| ConfigError::ParseError(e.to_string()))
358    }
359
360    // Validation is in config_validation.rs
361
362    /// Returns the effective `ef_search` value.
363    #[must_use]
364    pub fn effective_ef_search(&self) -> usize {
365        self.search
366            .ef_search
367            .unwrap_or_else(|| self.search.default_mode.ef_search())
368    }
369
370    /// Serializes the configuration to TOML.
371    ///
372    /// # Errors
373    ///
374    /// Returns an error if serialization fails.
375    pub fn to_toml(&self) -> Result<String, ConfigError> {
376        toml::to_string_pretty(self).map_err(|e| ConfigError::ParseError(e.to_string()))
377    }
378}