oxirs_tdb/store/
store_params.rs

1//! Store parameters and configuration builder
2//!
3//! Provides comprehensive configuration management inspired by Apache Jena TDB2's StoreParams.
4//! Supports:
5//! - Builder pattern for flexible configuration
6//! - Parameter validation and constraints
7//! - Serialization/deserialization for configuration files
8//! - Default presets (development, production, performance)
9//! - Parameter inheritance and overlays
10
11use crate::error::{Result, TdbError};
12use serde::{Deserialize, Serialize};
13use std::path::{Path, PathBuf};
14
15/// Store parameters containing all configuration options
16#[derive(Debug, Clone, Serialize, Deserialize)]
17pub struct StoreParams {
18    // Storage configuration
19    /// Directory for storing RDF data
20    pub data_dir: PathBuf,
21    /// Page size in bytes (must be power of 2)
22    pub page_size: usize,
23    /// Buffer pool size (number of pages to cache)
24    pub buffer_pool_size: usize,
25
26    // Index configuration
27    /// Enable Subject-Predicate-Object index
28    pub enable_spo_index: bool,
29    /// Enable Predicate-Object-Subject index
30    pub enable_pos_index: bool,
31    /// Enable Object-Subject-Predicate index
32    pub enable_osp_index: bool,
33    /// Enable quad (named graph) indexes
34    pub enable_quad_indexes: bool,
35
36    // Dictionary configuration
37    /// Enable inline storage of small values
38    pub enable_inline_values: bool,
39    /// Enable prefix compression for URIs
40    pub enable_prefix_compression: bool,
41    /// Dictionary cache size (number of entries)
42    pub dictionary_cache_size: usize,
43
44    // Transaction configuration
45    /// Enable write-ahead logging for durability
46    pub enable_wal: bool,
47    /// Write-ahead log buffer size in bytes
48    pub wal_buffer_size: usize,
49    /// Maximum transaction size (number of triples)
50    pub max_transaction_size: usize,
51    /// Transaction timeout in seconds
52    pub transaction_timeout_secs: u64,
53
54    // Compression configuration
55    /// Enable data compression
56    pub enable_compression: bool,
57    /// Compression algorithm to use
58    pub compression_algorithm: CompressionAlgorithm,
59    /// Compression level (algorithm-specific)
60    pub compression_level: u32,
61
62    // Bloom filter configuration
63    /// Enable bloom filters for indexes
64    pub enable_bloom_filters: bool,
65    /// Bloom filter false positive rate (0.0 to 1.0)
66    pub bloom_filter_fpr: f64,
67    /// Bloom filter size per index
68    pub bloom_filter_size_per_index: usize,
69
70    // Query optimization
71    /// Enable query result caching
72    pub enable_query_cache: bool,
73    /// Query cache size (number of cached queries)
74    pub query_cache_size: usize,
75    /// Enable statistics collection
76    pub enable_statistics: bool,
77    /// Statistics sampling rate (0.0 to 1.0)
78    pub statistics_sample_rate: f64,
79
80    // Query monitoring
81    /// Enable query performance monitoring
82    pub enable_query_monitoring: bool,
83    /// Slow query threshold in milliseconds
84    pub slow_query_threshold_ms: u64,
85    /// Query timeout in milliseconds
86    pub query_timeout_ms: u64,
87
88    // Spatial indexing
89    /// Enable spatial indexing for geospatial queries
90    pub enable_spatial_indexing: bool,
91    /// Maximum entries per spatial index node
92    pub spatial_index_max_entries: usize,
93
94    // Production features
95    /// Enable diagnostic logging and error reporting
96    pub enable_diagnostics: bool,
97    /// Enable metrics collection and export
98    pub enable_metrics: bool,
99    /// Enable performance profiling
100    pub enable_profiling: bool,
101
102    // Connection pooling
103    /// Minimum number of connections to maintain
104    pub min_connections: usize,
105    /// Maximum number of concurrent connections
106    pub max_connections: usize,
107    /// Connection timeout in seconds
108    pub connection_timeout_secs: u64,
109
110    // Backup configuration
111    /// Enable online (hot) backups
112    pub enable_online_backup: bool,
113    /// Number of days to retain backups
114    pub backup_retention_days: u32,
115    /// Enable backup encryption
116    pub enable_backup_encryption: bool,
117
118    // Performance tuning
119    /// Enable direct I/O bypassing OS cache
120    pub enable_direct_io: bool,
121    /// Enable asynchronous I/O operations
122    pub enable_async_io: bool,
123    /// Enable NUMA (Non-Uniform Memory Access) awareness
124    pub enable_numa_awareness: bool,
125    /// Enable GPU acceleration for computations
126    pub enable_gpu_acceleration: bool,
127
128    // Distributed systems
129    /// Enable data replication
130    pub enable_replication: bool,
131    /// Replication mode (master-slave, master-master, etc.)
132    pub replication_mode: ReplicationMode,
133}
134
135/// Compression algorithm selection
136#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
137pub enum CompressionAlgorithm {
138    /// No compression
139    None,
140    /// LZ4 - fast compression
141    Lz4,
142    /// Zstandard - balanced compression
143    Zstd,
144    /// Brotli - high compression
145    Brotli,
146    /// Snappy - ultra-fast compression
147    Snappy,
148}
149
150/// Replication mode
151#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
152pub enum ReplicationMode {
153    /// No replication
154    None,
155    /// Master-slave replication
156    MasterSlave,
157    /// Master-master replication
158    MasterMaster,
159}
160
161impl StoreParams {
162    /// Create new store parameters with minimal configuration
163    pub fn new<P: AsRef<Path>>(data_dir: P) -> Self {
164        Self {
165            // Storage configuration
166            data_dir: data_dir.as_ref().to_path_buf(),
167            page_size: 4096,
168            buffer_pool_size: 1000,
169
170            // Index configuration
171            enable_spo_index: true,
172            enable_pos_index: true,
173            enable_osp_index: true,
174            enable_quad_indexes: false,
175
176            // Dictionary configuration
177            enable_inline_values: true,
178            enable_prefix_compression: true,
179            dictionary_cache_size: 10000,
180
181            // Transaction configuration
182            enable_wal: true,
183            wal_buffer_size: 1024 * 1024, // 1MB
184            max_transaction_size: 1_000_000,
185            transaction_timeout_secs: 60,
186
187            // Compression configuration
188            enable_compression: true,
189            compression_algorithm: CompressionAlgorithm::Lz4,
190            compression_level: 3,
191
192            // Bloom filter configuration
193            enable_bloom_filters: true,
194            bloom_filter_fpr: 0.01,
195            bloom_filter_size_per_index: 1_000_000,
196
197            // Query optimization
198            enable_query_cache: true,
199            query_cache_size: 1000,
200            enable_statistics: true,
201            statistics_sample_rate: 1.0,
202
203            // Query monitoring
204            enable_query_monitoring: true,
205            slow_query_threshold_ms: 1000,
206            query_timeout_ms: 30_000,
207
208            // Spatial indexing
209            enable_spatial_indexing: true,
210            spatial_index_max_entries: 1000,
211
212            // Production features
213            enable_diagnostics: true,
214            enable_metrics: true,
215            enable_profiling: false,
216
217            // Connection pooling
218            min_connections: 2,
219            max_connections: 10,
220            connection_timeout_secs: 30,
221
222            // Backup configuration
223            enable_online_backup: true,
224            backup_retention_days: 7,
225            enable_backup_encryption: false,
226
227            // Performance tuning
228            enable_direct_io: false,
229            enable_async_io: false,
230            enable_numa_awareness: false,
231            enable_gpu_acceleration: false,
232
233            // Distributed systems
234            enable_replication: false,
235            replication_mode: ReplicationMode::None,
236        }
237    }
238
239    /// Validate parameters and return errors if invalid
240    pub fn validate(&self) -> Result<()> {
241        // Page size must be power of 2 and >= 512
242        if !self.page_size.is_power_of_two() || self.page_size < 512 {
243            return Err(TdbError::InvalidConfiguration(format!(
244                "Page size must be power of 2 and >= 512, got {}",
245                self.page_size
246            )));
247        }
248
249        // Buffer pool size must be > 0
250        if self.buffer_pool_size == 0 {
251            return Err(TdbError::InvalidConfiguration(
252                "Buffer pool size must be > 0".to_string(),
253            ));
254        }
255
256        // Bloom filter FPR must be between 0 and 1
257        if self.bloom_filter_fpr <= 0.0 || self.bloom_filter_fpr >= 1.0 {
258            return Err(TdbError::InvalidConfiguration(format!(
259                "Bloom filter FPR must be between 0 and 1, got {}",
260                self.bloom_filter_fpr
261            )));
262        }
263
264        // Statistics sample rate must be between 0 and 1
265        if self.statistics_sample_rate < 0.0 || self.statistics_sample_rate > 1.0 {
266            return Err(TdbError::InvalidConfiguration(format!(
267                "Statistics sample rate must be between 0 and 1, got {}",
268                self.statistics_sample_rate
269            )));
270        }
271
272        // Connection pool constraints
273        if self.min_connections > self.max_connections {
274            return Err(TdbError::InvalidConfiguration(format!(
275                "Min connections ({}) > max connections ({})",
276                self.min_connections, self.max_connections
277            )));
278        }
279
280        Ok(())
281    }
282
283    /// Save parameters to JSON file
284    pub fn save_to_file<P: AsRef<Path>>(&self, path: P) -> Result<()> {
285        let json = serde_json::to_string_pretty(self)
286            .map_err(|e| TdbError::Serialization(format!("Failed to serialize config: {}", e)))?;
287        std::fs::write(path, json)?;
288        Ok(())
289    }
290
291    /// Load parameters from JSON file
292    pub fn load_from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
293        let json = std::fs::read_to_string(path)?;
294        let params: StoreParams = serde_json::from_str(&json)
295            .map_err(|e| TdbError::Deserialization(format!("Failed to parse config: {}", e)))?;
296        params.validate()?;
297        Ok(params)
298    }
299}
300
301/// Builder for store parameters with fluent API
302pub struct StoreParamsBuilder {
303    params: StoreParams,
304}
305
306impl StoreParamsBuilder {
307    /// Create new builder with default parameters
308    pub fn new<P: AsRef<Path>>(data_dir: P) -> Self {
309        Self {
310            params: StoreParams::new(data_dir),
311        }
312    }
313
314    /// Create builder from existing parameters
315    pub fn from_params(params: StoreParams) -> Self {
316        Self { params }
317    }
318
319    /// Set page size (must be power of 2)
320    pub fn page_size(mut self, size: usize) -> Self {
321        self.params.page_size = size;
322        self
323    }
324
325    /// Set buffer pool size
326    pub fn buffer_pool_size(mut self, size: usize) -> Self {
327        self.params.buffer_pool_size = size;
328        self
329    }
330
331    /// Enable/disable triple indexes
332    pub fn with_triple_indexes(mut self, spo: bool, pos: bool, osp: bool) -> Self {
333        self.params.enable_spo_index = spo;
334        self.params.enable_pos_index = pos;
335        self.params.enable_osp_index = osp;
336        self
337    }
338
339    /// Enable/disable quad indexes
340    pub fn with_quad_indexes(mut self, enable: bool) -> Self {
341        self.params.enable_quad_indexes = enable;
342        self
343    }
344
345    /// Enable/disable inline values optimization
346    pub fn with_inline_values(mut self, enable: bool) -> Self {
347        self.params.enable_inline_values = enable;
348        self
349    }
350
351    /// Enable/disable prefix compression
352    pub fn with_prefix_compression(mut self, enable: bool) -> Self {
353        self.params.enable_prefix_compression = enable;
354        self
355    }
356
357    /// Set dictionary cache size
358    pub fn dictionary_cache_size(mut self, size: usize) -> Self {
359        self.params.dictionary_cache_size = size;
360        self
361    }
362
363    /// Enable/disable write-ahead logging
364    pub fn with_wal(mut self, enable: bool) -> Self {
365        self.params.enable_wal = enable;
366        self
367    }
368
369    /// Set WAL buffer size
370    pub fn wal_buffer_size(mut self, size: usize) -> Self {
371        self.params.wal_buffer_size = size;
372        self
373    }
374
375    /// Set maximum transaction size
376    pub fn max_transaction_size(mut self, size: usize) -> Self {
377        self.params.max_transaction_size = size;
378        self
379    }
380
381    /// Set transaction timeout
382    pub fn transaction_timeout(mut self, seconds: u64) -> Self {
383        self.params.transaction_timeout_secs = seconds;
384        self
385    }
386
387    /// Configure compression
388    pub fn with_compression(mut self, algorithm: CompressionAlgorithm, level: u32) -> Self {
389        self.params.enable_compression = algorithm != CompressionAlgorithm::None;
390        self.params.compression_algorithm = algorithm;
391        self.params.compression_level = level;
392        self
393    }
394
395    /// Configure bloom filters
396    pub fn with_bloom_filters(mut self, enable: bool, fpr: f64, size: usize) -> Self {
397        self.params.enable_bloom_filters = enable;
398        self.params.bloom_filter_fpr = fpr;
399        self.params.bloom_filter_size_per_index = size;
400        self
401    }
402
403    /// Configure query cache
404    pub fn with_query_cache(mut self, enable: bool, size: usize) -> Self {
405        self.params.enable_query_cache = enable;
406        self.params.query_cache_size = size;
407        self
408    }
409
410    /// Configure statistics
411    pub fn with_statistics(mut self, enable: bool, sample_rate: f64) -> Self {
412        self.params.enable_statistics = enable;
413        self.params.statistics_sample_rate = sample_rate;
414        self
415    }
416
417    /// Configure query monitoring
418    pub fn with_query_monitoring(
419        mut self,
420        enable: bool,
421        slow_threshold_ms: u64,
422        timeout_ms: u64,
423    ) -> Self {
424        self.params.enable_query_monitoring = enable;
425        self.params.slow_query_threshold_ms = slow_threshold_ms;
426        self.params.query_timeout_ms = timeout_ms;
427        self
428    }
429
430    /// Configure spatial indexing
431    pub fn with_spatial_indexing(mut self, enable: bool, max_entries: usize) -> Self {
432        self.params.enable_spatial_indexing = enable;
433        self.params.spatial_index_max_entries = max_entries;
434        self
435    }
436
437    /// Configure production features
438    pub fn with_production_features(
439        mut self,
440        diagnostics: bool,
441        metrics: bool,
442        profiling: bool,
443    ) -> Self {
444        self.params.enable_diagnostics = diagnostics;
445        self.params.enable_metrics = metrics;
446        self.params.enable_profiling = profiling;
447        self
448    }
449
450    /// Configure connection pooling
451    pub fn with_connection_pool(mut self, min: usize, max: usize, timeout_secs: u64) -> Self {
452        self.params.min_connections = min;
453        self.params.max_connections = max;
454        self.params.connection_timeout_secs = timeout_secs;
455        self
456    }
457
458    /// Configure backup
459    pub fn with_backup(mut self, enable_online: bool, retention_days: u32, encrypt: bool) -> Self {
460        self.params.enable_online_backup = enable_online;
461        self.params.backup_retention_days = retention_days;
462        self.params.enable_backup_encryption = encrypt;
463        self
464    }
465
466    /// Configure performance features
467    pub fn with_performance_features(
468        mut self,
469        direct_io: bool,
470        async_io: bool,
471        numa: bool,
472        gpu: bool,
473    ) -> Self {
474        self.params.enable_direct_io = direct_io;
475        self.params.enable_async_io = async_io;
476        self.params.enable_numa_awareness = numa;
477        self.params.enable_gpu_acceleration = gpu;
478        self
479    }
480
481    /// Configure replication
482    pub fn with_replication(mut self, mode: ReplicationMode) -> Self {
483        self.params.enable_replication = mode != ReplicationMode::None;
484        self.params.replication_mode = mode;
485        self
486    }
487
488    /// Build and validate parameters
489    pub fn build(self) -> Result<StoreParams> {
490        self.params.validate()?;
491        Ok(self.params)
492    }
493
494    /// Build without validation (use with caution)
495    pub fn build_unchecked(self) -> StoreParams {
496        self.params
497    }
498}
499
500/// Preset configurations for common use cases
501pub struct StorePresets;
502
503impl StorePresets {
504    /// Development preset - optimized for fast iteration
505    pub fn development<P: AsRef<Path>>(data_dir: P) -> StoreParamsBuilder {
506        StoreParamsBuilder::new(data_dir)
507            .buffer_pool_size(100)
508            .with_compression(CompressionAlgorithm::None, 0)
509            .with_statistics(false, 0.0)
510            .with_production_features(false, false, false)
511            .with_performance_features(false, false, false, false)
512    }
513
514    /// Production preset - balanced for production workloads
515    pub fn production<P: AsRef<Path>>(data_dir: P) -> StoreParamsBuilder {
516        StoreParamsBuilder::new(data_dir)
517            .buffer_pool_size(10000)
518            .with_compression(CompressionAlgorithm::Lz4, 3)
519            .with_statistics(true, 1.0)
520            .with_production_features(true, true, false)
521            .with_backup(true, 30, true)
522            .with_connection_pool(5, 50, 60)
523    }
524
525    /// Performance preset - optimized for maximum throughput
526    pub fn performance<P: AsRef<Path>>(data_dir: P) -> StoreParamsBuilder {
527        StoreParamsBuilder::new(data_dir)
528            .buffer_pool_size(50000)
529            .with_compression(CompressionAlgorithm::Snappy, 1)
530            .with_bloom_filters(true, 0.001, 10_000_000)
531            .with_performance_features(true, true, true, true)
532            .with_query_cache(true, 10000)
533    }
534
535    /// Minimal preset - bare minimum configuration
536    pub fn minimal<P: AsRef<Path>>(data_dir: P) -> StoreParamsBuilder {
537        StoreParamsBuilder::new(data_dir)
538            .buffer_pool_size(10)
539            .with_compression(CompressionAlgorithm::None, 0)
540            .with_bloom_filters(false, 0.01, 0)
541            .with_query_cache(false, 0)
542            .with_statistics(false, 0.0)
543            .with_production_features(false, false, false)
544    }
545}
546
547#[cfg(test)]
548mod tests {
549    use super::*;
550    use std::env;
551
552    #[test]
553    fn test_store_params_new() {
554        let params = StoreParams::new("/tmp/test");
555        assert_eq!(params.page_size, 4096);
556        assert_eq!(params.buffer_pool_size, 1000);
557        assert!(params.enable_spo_index);
558    }
559
560    #[test]
561    fn test_store_params_validation() {
562        let params = StoreParams::new("/tmp/test");
563        assert!(params.validate().is_ok());
564
565        let mut invalid_params = params.clone();
566        invalid_params.page_size = 1000; // Not power of 2
567        assert!(invalid_params.validate().is_err());
568
569        let mut invalid_params2 = params.clone();
570        invalid_params2.bloom_filter_fpr = 1.5; // Out of range
571        assert!(invalid_params2.validate().is_err());
572    }
573
574    #[test]
575    fn test_builder_basic() {
576        let params = StoreParamsBuilder::new("/tmp/test")
577            .page_size(8192)
578            .buffer_pool_size(2000)
579            .build()
580            .unwrap();
581
582        assert_eq!(params.page_size, 8192);
583        assert_eq!(params.buffer_pool_size, 2000);
584    }
585
586    #[test]
587    fn test_builder_compression() {
588        let params = StoreParamsBuilder::new("/tmp/test")
589            .with_compression(CompressionAlgorithm::Zstd, 5)
590            .build()
591            .unwrap();
592
593        assert!(params.enable_compression);
594        assert_eq!(params.compression_algorithm, CompressionAlgorithm::Zstd);
595        assert_eq!(params.compression_level, 5);
596    }
597
598    #[test]
599    fn test_builder_bloom_filters() {
600        let params = StoreParamsBuilder::new("/tmp/test")
601            .with_bloom_filters(true, 0.001, 5_000_000)
602            .build()
603            .unwrap();
604
605        assert!(params.enable_bloom_filters);
606        assert_eq!(params.bloom_filter_fpr, 0.001);
607        assert_eq!(params.bloom_filter_size_per_index, 5_000_000);
608    }
609
610    #[test]
611    fn test_presets_development() {
612        let params = StorePresets::development("/tmp/test").build().unwrap();
613        assert_eq!(params.buffer_pool_size, 100);
614        assert_eq!(params.compression_algorithm, CompressionAlgorithm::None);
615        assert!(!params.enable_statistics);
616    }
617
618    #[test]
619    fn test_presets_production() {
620        let params = StorePresets::production("/tmp/test").build().unwrap();
621        assert_eq!(params.buffer_pool_size, 10000);
622        assert!(params.enable_diagnostics);
623        assert!(params.enable_backup_encryption);
624    }
625
626    #[test]
627    fn test_presets_performance() {
628        let params = StorePresets::performance("/tmp/test").build().unwrap();
629        assert_eq!(params.buffer_pool_size, 50000);
630        assert!(params.enable_direct_io);
631        assert!(params.enable_gpu_acceleration);
632    }
633
634    #[test]
635    fn test_save_load_params() {
636        let temp_dir = env::temp_dir();
637        let config_file = temp_dir.join("test_store_params.json");
638
639        let params = StoreParamsBuilder::new(&temp_dir)
640            .page_size(8192)
641            .buffer_pool_size(5000)
642            .build()
643            .unwrap();
644
645        params.save_to_file(&config_file).unwrap();
646        let loaded_params = StoreParams::load_from_file(&config_file).unwrap();
647
648        assert_eq!(params.page_size, loaded_params.page_size);
649        assert_eq!(params.buffer_pool_size, loaded_params.buffer_pool_size);
650    }
651
652    #[test]
653    fn test_replication_config() {
654        let params = StoreParamsBuilder::new("/tmp/test")
655            .with_replication(ReplicationMode::MasterSlave)
656            .build()
657            .unwrap();
658
659        assert!(params.enable_replication);
660        assert_eq!(params.replication_mode, ReplicationMode::MasterSlave);
661    }
662
663    #[test]
664    fn test_connection_pool_validation() {
665        let result = StoreParamsBuilder::new("/tmp/test")
666            .with_connection_pool(10, 5, 30) // min > max (invalid)
667            .build();
668
669        assert!(result.is_err());
670    }
671}