velesdb_core/config.rs
1//! `VelesDB` Configuration Module
2//!
3//! Provides configuration file support via `velesdb.toml`, environment variables,
4//! and runtime overrides.
5//!
6//! # Priority (highest to lowest)
7//!
8//! 1. Runtime overrides (API, REPL)
9//! 2. Environment variables (`VELESDB_*`)
10//! 3. Configuration file (`velesdb.toml`)
11//! 4. Default values
12
13use figment::{
14 providers::{Env, Format, Serialized, Toml},
15 Figment,
16};
17use serde::{Deserialize, Serialize};
18use std::path::Path;
19use thiserror::Error;
20
21// Re-export quantization types so existing `crate::config::Quantization*` paths work.
22pub use crate::config_quantization::{QuantizationConfig, QuantizationType};
23
24/// Configuration errors.
25#[derive(Error, Debug)]
26#[non_exhaustive]
27pub enum ConfigError {
28 /// Failed to parse configuration file.
29 #[error("Failed to parse configuration: {0}")]
30 ParseError(String),
31
32 /// Invalid configuration value.
33 #[error("Invalid configuration value for '{key}': {message}")]
34 InvalidValue {
35 /// Configuration key that failed validation.
36 key: String,
37 /// Validation error message.
38 message: String,
39 },
40
41 /// Configuration file not found.
42 #[error("Configuration file not found: {0}")]
43 FileNotFound(String),
44
45 /// IO error.
46 #[error("IO error: {0}")]
47 IoError(#[from] std::io::Error),
48}
49
50/// Search mode presets.
51#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
52#[serde(rename_all = "snake_case")]
53#[non_exhaustive]
54pub enum SearchMode {
55 /// Fast search with `ef_search=96`, ~95% recall.
56 Fast,
57 /// Balanced search with `ef_search=160`, ~99.5% recall (default).
58 #[default]
59 Balanced,
60 /// Accurate search with `ef_search=512`, ~100% recall.
61 Accurate,
62 /// Perfect recall with bruteforce, 100% guaranteed.
63 Perfect,
64}
65
66impl SearchMode {
67 /// Returns the `ef_search` value for this mode.
68 #[must_use]
69 pub fn ef_search(&self) -> usize {
70 match self {
71 Self::Fast => 96,
72 Self::Balanced => 160,
73 Self::Accurate => 512,
74 Self::Perfect => usize::MAX, // Signals bruteforce
75 }
76 }
77}
78
79/// Search configuration section.
80#[derive(Debug, Clone, Serialize, Deserialize)]
81#[serde(default)]
82pub struct SearchConfig {
83 /// Default search mode.
84 pub default_mode: SearchMode,
85 /// Override `ef_search` (if set, overrides mode).
86 pub ef_search: Option<usize>,
87 /// Maximum results per query.
88 pub max_results: usize,
89 /// Query timeout in milliseconds.
90 pub query_timeout_ms: u64,
91}
92
93impl Default for SearchConfig {
94 fn default() -> Self {
95 Self {
96 default_mode: SearchMode::Balanced,
97 ef_search: None,
98 max_results: 1000,
99 query_timeout_ms: 30000,
100 }
101 }
102}
103
104/// HNSW index configuration section.
105#[derive(Debug, Clone, Default, Serialize, Deserialize)]
106#[serde(default)]
107pub struct HnswConfig {
108 /// Number of connections per node (M parameter).
109 /// `None` = auto based on dimension.
110 pub m: Option<usize>,
111 /// Size of the candidate pool during construction.
112 /// `None` = auto based on dimension.
113 pub ef_construction: Option<usize>,
114 /// Maximum number of layers (0 = auto).
115 pub max_layers: usize,
116}
117
118/// Server-layer configuration types (HTTP transport, logging, storage paths).
119///
120/// These types are intentionally separated from the core engine configuration
121/// (`SearchConfig`, `HnswConfig`, `LimitsConfig`) to enforce layer boundaries.
122/// Import via `config::server::ServerConfig` or use the crate-root re-exports.
123pub mod server {
124 use serde::{Deserialize, Serialize};
125
126 /// Storage configuration section.
127 #[derive(Debug, Clone, Serialize, Deserialize)]
128 #[serde(default)]
129 pub struct StorageConfig {
130 /// Data directory path.
131 pub data_dir: String,
132 /// Storage mode: `"mmap"` or `"memory"`.
133 pub storage_mode: String,
134 /// Mmap cache size in megabytes.
135 pub mmap_cache_mb: usize,
136 /// Vector alignment in bytes.
137 pub vector_alignment: usize,
138 }
139
140 impl Default for StorageConfig {
141 fn default() -> Self {
142 Self {
143 data_dir: "./velesdb_data".to_string(),
144 storage_mode: "mmap".to_string(),
145 mmap_cache_mb: 1024,
146 vector_alignment: 64,
147 }
148 }
149 }
150
151 /// Server configuration section.
152 #[derive(Debug, Clone, Serialize, Deserialize)]
153 #[serde(default)]
154 pub struct ServerConfig {
155 /// Host address.
156 pub host: String,
157 /// Port number.
158 pub port: u16,
159 /// Number of worker threads (0 = auto).
160 pub workers: usize,
161 /// Maximum HTTP body size in bytes.
162 pub max_body_size: usize,
163 /// Enable CORS.
164 pub cors_enabled: bool,
165 /// CORS allowed origins.
166 pub cors_origins: Vec<String>,
167 }
168
169 impl Default for ServerConfig {
170 fn default() -> Self {
171 Self {
172 host: "127.0.0.1".to_string(),
173 port: 8080,
174 workers: 0,
175 max_body_size: 104_857_600,
176 cors_enabled: false,
177 cors_origins: vec!["*".to_string()],
178 }
179 }
180 }
181
182 /// Logging configuration section.
183 #[derive(Debug, Clone, Serialize, Deserialize)]
184 #[serde(default)]
185 pub struct LoggingConfig {
186 /// Log level: `error`, `warn`, `info`, `debug`, `trace`.
187 pub level: String,
188 /// Log format: `text` or `json`.
189 pub format: String,
190 /// Log file path (empty = stdout).
191 pub file: String,
192 }
193
194 impl Default for LoggingConfig {
195 fn default() -> Self {
196 Self {
197 level: "info".to_string(),
198 format: "text".to_string(),
199 file: String::new(),
200 }
201 }
202 }
203}
204
205// Backward-compatible re-exports at module level.
206pub use server::{LoggingConfig, ServerConfig, StorageConfig};
207
208/// Limits configuration section.
209#[derive(Debug, Clone, Serialize, Deserialize)]
210#[serde(default)]
211pub struct LimitsConfig {
212 /// Maximum vector dimensions.
213 pub max_dimensions: usize,
214 /// Maximum vectors per collection.
215 pub max_vectors_per_collection: usize,
216 /// Maximum number of collections.
217 pub max_collections: usize,
218 /// Maximum payload size in bytes.
219 pub max_payload_size: usize,
220 /// Maximum vectors for perfect mode (bruteforce).
221 pub max_perfect_mode_vectors: usize,
222}
223
224impl Default for LimitsConfig {
225 fn default() -> Self {
226 Self {
227 max_dimensions: 4096,
228 max_vectors_per_collection: 100_000_000,
229 max_collections: 1000,
230 max_payload_size: 1_048_576, // 1 MB
231 max_perfect_mode_vectors: 500_000,
232 }
233 }
234}
235
236// ---------------------------------------------------------------------------
237// WAL batch commit configuration
238// ---------------------------------------------------------------------------
239
240/// Default commit delay in microseconds for WAL group commit.
241const fn default_commit_delay_us() -> u64 {
242 100
243}
244
245/// Default maximum entries per WAL batch.
246const fn default_max_batch_size() -> usize {
247 128
248}
249
250/// Configuration for WAL group commit batching.
251///
252/// When enabled, multiple concurrent writes are batched into a single
253/// `sync_all()` call, amortizing the fsync cost across the batch.
254///
255/// # Example (TOML)
256///
257/// ```toml
258/// [wal_batch]
259/// enabled = true
260/// commit_delay_us = 200
261/// max_batch_size = 256
262/// ```
263#[derive(Debug, Clone, Serialize, Deserialize)]
264pub struct WalBatchConfig {
265 /// Whether group commit is enabled. Default: `false`.
266 #[serde(default)]
267 pub enabled: bool,
268 /// Maximum delay in microseconds before flushing a batch. Default: `100`.
269 #[serde(default = "default_commit_delay_us")]
270 pub commit_delay_us: u64,
271 /// Maximum number of entries per batch. Default: `128`.
272 #[serde(default = "default_max_batch_size")]
273 pub max_batch_size: usize,
274}
275
276impl Default for WalBatchConfig {
277 fn default() -> Self {
278 Self {
279 enabled: false,
280 commit_delay_us: 100,
281 max_batch_size: 128,
282 }
283 }
284}
285
286/// Main `VelesDB` configuration structure.
287#[derive(Debug, Clone, Serialize, Deserialize, Default)]
288#[serde(default)]
289pub struct VelesConfig {
290 /// Search configuration.
291 pub search: SearchConfig,
292 /// HNSW index configuration.
293 pub hnsw: HnswConfig,
294 /// Storage configuration.
295 pub storage: StorageConfig,
296 /// Limits configuration.
297 pub limits: LimitsConfig,
298 /// Server configuration.
299 pub server: ServerConfig,
300 /// Logging configuration.
301 pub logging: LoggingConfig,
302 /// Quantization configuration.
303 pub quantization: QuantizationConfig,
304 /// WAL group commit batching configuration.
305 pub wal_batch: WalBatchConfig,
306}
307
308impl VelesConfig {
309 /// Loads configuration from default sources.
310 ///
311 /// Priority: defaults < file < environment variables.
312 ///
313 /// # Errors
314 ///
315 /// Returns `ConfigError` if the configuration file is malformed or
316 /// environment variables contain invalid values.
317 pub fn load() -> Result<Self, ConfigError> {
318 Self::load_from_path("velesdb.toml")
319 }
320
321 /// Loads configuration from a specific file path.
322 ///
323 /// # Arguments
324 ///
325 /// * `path` - Path to the configuration file.
326 ///
327 /// # Errors
328 ///
329 /// Returns an error if configuration parsing fails.
330 pub fn load_from_path<P: AsRef<Path>>(path: P) -> Result<Self, ConfigError> {
331 let figment = Figment::new()
332 .merge(Serialized::defaults(Self::default()))
333 .merge(Toml::file(path.as_ref()))
334 .merge(Env::prefixed("VELESDB_").split("_").lowercase(false));
335
336 figment
337 .extract()
338 .map_err(|e| ConfigError::ParseError(e.to_string()))
339 }
340
341 /// Creates a configuration from a TOML string.
342 ///
343 /// # Arguments
344 ///
345 /// * `toml_str` - TOML configuration string.
346 ///
347 /// # Errors
348 ///
349 /// Returns an error if parsing fails.
350 pub fn from_toml(toml_str: &str) -> Result<Self, ConfigError> {
351 let figment = Figment::new()
352 .merge(Serialized::defaults(Self::default()))
353 .merge(Toml::string(toml_str));
354
355 figment
356 .extract()
357 .map_err(|e| ConfigError::ParseError(e.to_string()))
358 }
359
360 // Validation is in config_validation.rs
361
362 /// Returns the effective `ef_search` value.
363 #[must_use]
364 pub fn effective_ef_search(&self) -> usize {
365 self.search
366 .ef_search
367 .unwrap_or_else(|| self.search.default_mode.ef_search())
368 }
369
370 /// Serializes the configuration to TOML.
371 ///
372 /// # Errors
373 ///
374 /// Returns an error if serialization fails.
375 pub fn to_toml(&self) -> Result<String, ConfigError> {
376 toml::to_string_pretty(self).map_err(|e| ConfigError::ParseError(e.to_string()))
377 }
378}