velesdb_core/
config.rs

1//! `VelesDB` Configuration Module
2//!
3//! Provides configuration file support via `velesdb.toml`, environment variables,
4//! and runtime overrides.
5//!
6//! # Priority (highest to lowest)
7//!
8//! 1. Runtime overrides (API, REPL)
9//! 2. Environment variables (`VELESDB_*`)
10//! 3. Configuration file (`velesdb.toml`)
11//! 4. Default values
12
13use figment::{
14    providers::{Env, Format, Serialized, Toml},
15    Figment,
16};
17use serde::{Deserialize, Serialize};
18use std::path::Path;
19use thiserror::Error;
20
21/// Configuration errors.
22#[derive(Error, Debug)]
23pub enum ConfigError {
24    /// Failed to parse configuration file.
25    #[error("Failed to parse configuration: {0}")]
26    ParseError(String),
27
28    /// Invalid configuration value.
29    #[error("Invalid configuration value for '{key}': {message}")]
30    InvalidValue {
31        /// Configuration key that failed validation.
32        key: String,
33        /// Validation error message.
34        message: String,
35    },
36
37    /// Configuration file not found.
38    #[error("Configuration file not found: {0}")]
39    FileNotFound(String),
40
41    /// IO error.
42    #[error("IO error: {0}")]
43    IoError(#[from] std::io::Error),
44}
45
46/// Search mode presets.
47#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
48#[serde(rename_all = "snake_case")]
49pub enum SearchMode {
50    /// Fast search with `ef_search=64`, ~90% recall.
51    Fast,
52    /// Balanced search with `ef_search=128`, ~98% recall (default).
53    #[default]
54    Balanced,
55    /// Accurate search with `ef_search=256`, ~99% recall.
56    Accurate,
57    /// High recall search with `ef_search=1024`, ~99.7% recall.
58    HighRecall,
59    /// Perfect recall with bruteforce, 100% guaranteed.
60    Perfect,
61}
62
63impl SearchMode {
64    /// Returns the `ef_search` value for this mode.
65    #[must_use]
66    pub fn ef_search(&self) -> usize {
67        match self {
68            Self::Fast => 64,
69            Self::Balanced => 128,
70            Self::Accurate => 256,
71            Self::HighRecall => 1024,
72            Self::Perfect => usize::MAX, // Signals bruteforce
73        }
74    }
75}
76
77/// Search configuration section.
78#[derive(Debug, Clone, Serialize, Deserialize)]
79#[serde(default)]
80pub struct SearchConfig {
81    /// Default search mode.
82    pub default_mode: SearchMode,
83    /// Override `ef_search` (if set, overrides mode).
84    pub ef_search: Option<usize>,
85    /// Maximum results per query.
86    pub max_results: usize,
87    /// Query timeout in milliseconds.
88    pub query_timeout_ms: u64,
89}
90
91impl Default for SearchConfig {
92    fn default() -> Self {
93        Self {
94            default_mode: SearchMode::Balanced,
95            ef_search: None,
96            max_results: 1000,
97            query_timeout_ms: 30000,
98        }
99    }
100}
101
102/// HNSW index configuration section.
103#[derive(Debug, Clone, Default, Serialize, Deserialize)]
104#[serde(default)]
105pub struct HnswConfig {
106    /// Number of connections per node (M parameter).
107    /// `None` = auto based on dimension.
108    pub m: Option<usize>,
109    /// Size of the candidate pool during construction.
110    /// `None` = auto based on dimension.
111    pub ef_construction: Option<usize>,
112    /// Maximum number of layers (0 = auto).
113    pub max_layers: usize,
114}
115
116/// Storage configuration section.
117#[derive(Debug, Clone, Serialize, Deserialize)]
118#[serde(default)]
119pub struct StorageConfig {
120    /// Data directory path.
121    pub data_dir: String,
122    /// Storage mode: "mmap" or "memory".
123    pub storage_mode: String,
124    /// Mmap cache size in megabytes.
125    pub mmap_cache_mb: usize,
126    /// Vector alignment in bytes.
127    pub vector_alignment: usize,
128}
129
130impl Default for StorageConfig {
131    fn default() -> Self {
132        Self {
133            data_dir: "./velesdb_data".to_string(),
134            storage_mode: "mmap".to_string(),
135            mmap_cache_mb: 1024,
136            vector_alignment: 64,
137        }
138    }
139}
140
141/// Limits configuration section.
142#[derive(Debug, Clone, Serialize, Deserialize)]
143#[serde(default)]
144pub struct LimitsConfig {
145    /// Maximum vector dimensions.
146    pub max_dimensions: usize,
147    /// Maximum vectors per collection.
148    pub max_vectors_per_collection: usize,
149    /// Maximum number of collections.
150    pub max_collections: usize,
151    /// Maximum payload size in bytes.
152    pub max_payload_size: usize,
153    /// Maximum vectors for perfect mode (bruteforce).
154    pub max_perfect_mode_vectors: usize,
155}
156
157impl Default for LimitsConfig {
158    fn default() -> Self {
159        Self {
160            max_dimensions: 4096,
161            max_vectors_per_collection: 100_000_000,
162            max_collections: 1000,
163            max_payload_size: 1_048_576, // 1 MB
164            max_perfect_mode_vectors: 500_000,
165        }
166    }
167}
168
169/// Server configuration section.
170#[derive(Debug, Clone, Serialize, Deserialize)]
171#[serde(default)]
172pub struct ServerConfig {
173    /// Host address.
174    pub host: String,
175    /// Port number.
176    pub port: u16,
177    /// Number of worker threads (0 = auto).
178    pub workers: usize,
179    /// Maximum HTTP body size in bytes.
180    pub max_body_size: usize,
181    /// Enable CORS.
182    pub cors_enabled: bool,
183    /// CORS allowed origins.
184    pub cors_origins: Vec<String>,
185}
186
187impl Default for ServerConfig {
188    fn default() -> Self {
189        Self {
190            host: "127.0.0.1".to_string(),
191            port: 8080,
192            workers: 0,                 // Auto
193            max_body_size: 104_857_600, // 100 MB
194            cors_enabled: false,
195            cors_origins: vec!["*".to_string()],
196        }
197    }
198}
199
200/// Logging configuration section.
201#[derive(Debug, Clone, Serialize, Deserialize)]
202#[serde(default)]
203pub struct LoggingConfig {
204    /// Log level: error, warn, info, debug, trace.
205    pub level: String,
206    /// Log format: text or json.
207    pub format: String,
208    /// Log file path (empty = stdout).
209    pub file: String,
210}
211
212impl Default for LoggingConfig {
213    fn default() -> Self {
214        Self {
215            level: "info".to_string(),
216            format: "text".to_string(),
217            file: String::new(),
218        }
219    }
220}
221
222/// Quantization configuration section.
223#[derive(Debug, Clone, Serialize, Deserialize)]
224#[serde(default)]
225pub struct QuantizationConfig {
226    /// Default quantization type: none, sq8, binary.
227    pub default_type: String,
228    /// Enable reranking after quantized search.
229    pub rerank_enabled: bool,
230    /// Reranking multiplier for candidates.
231    pub rerank_multiplier: usize,
232}
233
234impl Default for QuantizationConfig {
235    fn default() -> Self {
236        Self {
237            default_type: "none".to_string(),
238            rerank_enabled: true,
239            rerank_multiplier: 2,
240        }
241    }
242}
243
244/// Main `VelesDB` configuration structure.
245#[derive(Debug, Clone, Serialize, Deserialize, Default)]
246#[serde(default)]
247pub struct VelesConfig {
248    /// Search configuration.
249    pub search: SearchConfig,
250    /// HNSW index configuration.
251    pub hnsw: HnswConfig,
252    /// Storage configuration.
253    pub storage: StorageConfig,
254    /// Limits configuration.
255    pub limits: LimitsConfig,
256    /// Server configuration.
257    pub server: ServerConfig,
258    /// Logging configuration.
259    pub logging: LoggingConfig,
260    /// Quantization configuration.
261    pub quantization: QuantizationConfig,
262}
263
264impl VelesConfig {
265    /// Loads configuration from default sources.
266    ///
267    /// Priority: defaults < file < environment variables.
268    ///
269    /// # Errors
270    ///
271    /// Returns an error if configuration parsing fails.
272    pub fn load() -> Result<Self, ConfigError> {
273        Self::load_from_path("velesdb.toml")
274    }
275
276    /// Loads configuration from a specific file path.
277    ///
278    /// # Arguments
279    ///
280    /// * `path` - Path to the configuration file.
281    ///
282    /// # Errors
283    ///
284    /// Returns an error if configuration parsing fails.
285    pub fn load_from_path<P: AsRef<Path>>(path: P) -> Result<Self, ConfigError> {
286        let figment = Figment::new()
287            .merge(Serialized::defaults(Self::default()))
288            .merge(Toml::file(path.as_ref()))
289            .merge(Env::prefixed("VELESDB_").split("_").lowercase(false));
290
291        figment
292            .extract()
293            .map_err(|e| ConfigError::ParseError(e.to_string()))
294    }
295
296    /// Creates a configuration from a TOML string.
297    ///
298    /// # Arguments
299    ///
300    /// * `toml_str` - TOML configuration string.
301    ///
302    /// # Errors
303    ///
304    /// Returns an error if parsing fails.
305    pub fn from_toml(toml_str: &str) -> Result<Self, ConfigError> {
306        let figment = Figment::new()
307            .merge(Serialized::defaults(Self::default()))
308            .merge(Toml::string(toml_str));
309
310        figment
311            .extract()
312            .map_err(|e| ConfigError::ParseError(e.to_string()))
313    }
314
315    /// Validates the configuration.
316    ///
317    /// # Errors
318    ///
319    /// Returns an error if any configuration value is invalid.
320    pub fn validate(&self) -> Result<(), ConfigError> {
321        // Validate search config
322        if let Some(ef) = self.search.ef_search {
323            if !(16..=4096).contains(&ef) {
324                return Err(ConfigError::InvalidValue {
325                    key: "search.ef_search".to_string(),
326                    message: format!("value {ef} is out of range [16, 4096]"),
327                });
328            }
329        }
330
331        if self.search.max_results == 0 || self.search.max_results > 10000 {
332            return Err(ConfigError::InvalidValue {
333                key: "search.max_results".to_string(),
334                message: format!(
335                    "value {} is out of range [1, 10000]",
336                    self.search.max_results
337                ),
338            });
339        }
340
341        // Validate HNSW config
342        if let Some(m) = self.hnsw.m {
343            if !(4..=128).contains(&m) {
344                return Err(ConfigError::InvalidValue {
345                    key: "hnsw.m".to_string(),
346                    message: format!("value {m} is out of range [4, 128]"),
347                });
348            }
349        }
350
351        if let Some(ef) = self.hnsw.ef_construction {
352            if !(100..=2000).contains(&ef) {
353                return Err(ConfigError::InvalidValue {
354                    key: "hnsw.ef_construction".to_string(),
355                    message: format!("value {ef} is out of range [100, 2000]"),
356                });
357            }
358        }
359
360        // Validate limits
361        if self.limits.max_dimensions == 0 || self.limits.max_dimensions > 65536 {
362            return Err(ConfigError::InvalidValue {
363                key: "limits.max_dimensions".to_string(),
364                message: format!(
365                    "value {} is out of range [1, 65536]",
366                    self.limits.max_dimensions
367                ),
368            });
369        }
370
371        // Validate server config
372        if self.server.port < 1024 {
373            return Err(ConfigError::InvalidValue {
374                key: "server.port".to_string(),
375                message: format!("value {} must be >= 1024", self.server.port),
376            });
377        }
378
379        // Validate storage mode
380        let valid_modes = ["mmap", "memory"];
381        if !valid_modes.contains(&self.storage.storage_mode.as_str()) {
382            return Err(ConfigError::InvalidValue {
383                key: "storage.storage_mode".to_string(),
384                message: format!(
385                    "value '{}' is invalid, expected one of: {:?}",
386                    self.storage.storage_mode, valid_modes
387                ),
388            });
389        }
390
391        // Validate logging level
392        let valid_levels = ["error", "warn", "info", "debug", "trace"];
393        if !valid_levels.contains(&self.logging.level.as_str()) {
394            return Err(ConfigError::InvalidValue {
395                key: "logging.level".to_string(),
396                message: format!(
397                    "value '{}' is invalid, expected one of: {:?}",
398                    self.logging.level, valid_levels
399                ),
400            });
401        }
402
403        Ok(())
404    }
405
406    /// Returns the effective `ef_search` value.
407    ///
408    /// Uses explicit `ef_search` if set, otherwise derives from search mode.
409    #[must_use]
410    pub fn effective_ef_search(&self) -> usize {
411        self.search
412            .ef_search
413            .unwrap_or_else(|| self.search.default_mode.ef_search())
414    }
415
416    /// Serializes the configuration to TOML.
417    ///
418    /// # Errors
419    ///
420    /// Returns an error if serialization fails.
421    pub fn to_toml(&self) -> Result<String, ConfigError> {
422        toml::to_string_pretty(self).map_err(|e| ConfigError::ParseError(e.to_string()))
423    }
424}
425
426#[cfg(test)]
427mod tests {
428    use super::*;
429
430    // ========================================================================
431    // SearchMode tests
432    // ========================================================================
433
434    #[test]
435    fn test_search_mode_ef_search_values() {
436        // Arrange & Act & Assert
437        assert_eq!(SearchMode::Fast.ef_search(), 64);
438        assert_eq!(SearchMode::Balanced.ef_search(), 128);
439        assert_eq!(SearchMode::Accurate.ef_search(), 256);
440        assert_eq!(SearchMode::HighRecall.ef_search(), 1024);
441        assert_eq!(SearchMode::Perfect.ef_search(), usize::MAX);
442    }
443
444    #[test]
445    fn test_search_mode_default_is_balanced() {
446        // Arrange & Act
447        let mode = SearchMode::default();
448
449        // Assert
450        assert_eq!(mode, SearchMode::Balanced);
451    }
452
453    #[test]
454    fn test_search_mode_serialization() {
455        // Arrange
456        let mode = SearchMode::HighRecall;
457
458        // Act
459        let json = serde_json::to_string(&mode).expect("serialize");
460        let deserialized: SearchMode = serde_json::from_str(&json).expect("deserialize");
461
462        // Assert
463        assert_eq!(json, "\"high_recall\"");
464        assert_eq!(deserialized, mode);
465    }
466
467    // ========================================================================
468    // VelesConfig default tests
469    // ========================================================================
470
471    #[test]
472    fn test_config_default_values() {
473        // Arrange & Act
474        let config = VelesConfig::default();
475
476        // Assert
477        assert_eq!(config.search.default_mode, SearchMode::Balanced);
478        assert_eq!(config.search.max_results, 1000);
479        assert_eq!(config.search.query_timeout_ms, 30000);
480        assert!(config.search.ef_search.is_none());
481        assert_eq!(config.server.port, 8080);
482        assert_eq!(config.storage.storage_mode, "mmap");
483        assert_eq!(config.logging.level, "info");
484    }
485
486    #[test]
487    fn test_config_effective_ef_search_from_mode() {
488        // Arrange
489        let config = VelesConfig::default();
490
491        // Act
492        let ef = config.effective_ef_search();
493
494        // Assert
495        assert_eq!(ef, 128); // Balanced mode
496    }
497
498    #[test]
499    fn test_config_effective_ef_search_override() {
500        // Arrange
501        let mut config = VelesConfig::default();
502        config.search.ef_search = Some(512);
503
504        // Act
505        let ef = config.effective_ef_search();
506
507        // Assert
508        assert_eq!(ef, 512);
509    }
510
511    // ========================================================================
512    // TOML parsing tests
513    // ========================================================================
514
515    #[test]
516    fn test_config_from_toml_minimal() {
517        // Arrange
518        let toml = r#"
519[search]
520default_mode = "fast"
521"#;
522
523        // Act
524        let config = VelesConfig::from_toml(toml).expect("parse");
525
526        // Assert
527        assert_eq!(config.search.default_mode, SearchMode::Fast);
528        // Other values should be defaults
529        assert_eq!(config.server.port, 8080);
530    }
531
532    #[test]
533    fn test_config_from_toml_full() {
534        // Arrange
535        let toml = r#"
536[search]
537default_mode = "high_recall"
538ef_search = 512
539max_results = 500
540query_timeout_ms = 60000
541
542[hnsw]
543m = 48
544ef_construction = 600
545
546[storage]
547data_dir = "/var/lib/velesdb"
548storage_mode = "mmap"
549mmap_cache_mb = 2048
550
551[limits]
552max_dimensions = 2048
553max_perfect_mode_vectors = 100000
554
555[server]
556host = "0.0.0.0"
557port = 9090
558workers = 8
559
560[logging]
561level = "debug"
562format = "json"
563"#;
564
565        // Act
566        let config = VelesConfig::from_toml(toml).expect("parse");
567
568        // Assert
569        assert_eq!(config.search.default_mode, SearchMode::HighRecall);
570        assert_eq!(config.search.ef_search, Some(512));
571        assert_eq!(config.search.max_results, 500);
572        assert_eq!(config.hnsw.m, Some(48));
573        assert_eq!(config.hnsw.ef_construction, Some(600));
574        assert_eq!(config.storage.data_dir, "/var/lib/velesdb");
575        assert_eq!(config.storage.mmap_cache_mb, 2048);
576        assert_eq!(config.limits.max_dimensions, 2048);
577        assert_eq!(config.server.host, "0.0.0.0");
578        assert_eq!(config.server.port, 9090);
579        assert_eq!(config.server.workers, 8);
580        assert_eq!(config.logging.level, "debug");
581        assert_eq!(config.logging.format, "json");
582    }
583
584    #[test]
585    fn test_config_from_toml_invalid_mode() {
586        // Arrange
587        let toml = r#"
588[search]
589default_mode = "ultra_fast"
590"#;
591
592        // Act
593        let result = VelesConfig::from_toml(toml);
594
595        // Assert
596        assert!(result.is_err());
597    }
598
599    // ========================================================================
600    // Validation tests
601    // ========================================================================
602
603    #[test]
604    fn test_config_validate_success() {
605        // Arrange
606        let config = VelesConfig::default();
607
608        // Act
609        let result = config.validate();
610
611        // Assert
612        assert!(result.is_ok());
613    }
614
615    #[test]
616    fn test_config_validate_ef_search_too_low() {
617        // Arrange
618        let mut config = VelesConfig::default();
619        config.search.ef_search = Some(10);
620
621        // Act
622        let result = config.validate();
623
624        // Assert
625        assert!(result.is_err());
626        let err = result.unwrap_err();
627        assert!(err.to_string().contains("search.ef_search"));
628    }
629
630    #[test]
631    fn test_config_validate_ef_search_too_high() {
632        // Arrange
633        let mut config = VelesConfig::default();
634        config.search.ef_search = Some(5000);
635
636        // Act
637        let result = config.validate();
638
639        // Assert
640        assert!(result.is_err());
641    }
642
643    #[test]
644    fn test_config_validate_invalid_storage_mode() {
645        // Arrange
646        let mut config = VelesConfig::default();
647        config.storage.storage_mode = "disk".to_string();
648
649        // Act
650        let result = config.validate();
651
652        // Assert
653        assert!(result.is_err());
654        let err = result.unwrap_err();
655        assert!(err.to_string().contains("storage.storage_mode"));
656    }
657
658    #[test]
659    fn test_config_validate_invalid_log_level() {
660        // Arrange
661        let mut config = VelesConfig::default();
662        config.logging.level = "verbose".to_string();
663
664        // Act
665        let result = config.validate();
666
667        // Assert
668        assert!(result.is_err());
669        let err = result.unwrap_err();
670        assert!(err.to_string().contains("logging.level"));
671    }
672
673    #[test]
674    fn test_config_validate_port_too_low() {
675        // Arrange
676        let mut config = VelesConfig::default();
677        config.server.port = 80;
678
679        // Act
680        let result = config.validate();
681
682        // Assert
683        assert!(result.is_err());
684        let err = result.unwrap_err();
685        assert!(err.to_string().contains("server.port"));
686    }
687
688    #[test]
689    fn test_config_validate_hnsw_m_out_of_range() {
690        // Arrange
691        let mut config = VelesConfig::default();
692        config.hnsw.m = Some(2);
693
694        // Act
695        let result = config.validate();
696
697        // Assert
698        assert!(result.is_err());
699    }
700
701    // ========================================================================
702    // Serialization tests
703    // ========================================================================
704
705    #[test]
706    fn test_config_to_toml() {
707        // Arrange
708        let config = VelesConfig::default();
709
710        // Act
711        let toml_str = config.to_toml().expect("serialize");
712
713        // Assert
714        assert!(toml_str.contains("[search]"));
715        assert!(toml_str.contains("default_mode"));
716        assert!(toml_str.contains("[server]"));
717        assert!(toml_str.contains("port = 8080"));
718    }
719
720    #[test]
721    fn test_config_roundtrip() {
722        // Arrange
723        let mut config = VelesConfig::default();
724        config.search.default_mode = SearchMode::Accurate;
725        config.search.ef_search = Some(300);
726        config.server.port = 9000;
727
728        // Act
729        let toml_str = config.to_toml().expect("serialize");
730        let parsed = VelesConfig::from_toml(&toml_str).expect("parse");
731
732        // Assert
733        assert_eq!(parsed.search.default_mode, SearchMode::Accurate);
734        assert_eq!(parsed.search.ef_search, Some(300));
735        assert_eq!(parsed.server.port, 9000);
736    }
737}