Skip to main content

grafeo_engine/
config.rs

1//! Database configuration.
2
3use std::fmt;
4use std::path::PathBuf;
5
6/// The graph data model for a database.
7///
8/// Each database uses exactly one model, chosen at creation time and immutable
9/// after that. The engine initializes only the relevant store, saving memory.
10///
11/// Schema variants (OWL, RDFS, JSON Schema) are a server-level concern - from
12/// the engine's perspective those map to either `Lpg` or `Rdf`.
13#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Hash)]
14pub enum GraphModel {
15    /// Labeled Property Graph (default). Supports GQL, Cypher, Gremlin, GraphQL.
16    #[default]
17    Lpg,
18    /// RDF triple store. Supports SPARQL.
19    Rdf,
20}
21
22impl fmt::Display for GraphModel {
23    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
24        match self {
25            Self::Lpg => write!(f, "LPG"),
26            Self::Rdf => write!(f, "RDF"),
27        }
28    }
29}
30
31/// WAL durability mode controlling the trade-off between safety and speed.
32///
33/// This enum lives in config so that `Config` can always carry the desired
34/// durability regardless of whether the `wal` feature is compiled in. When
35/// WAL is enabled, the engine maps this to the adapter-level durability mode.
36#[derive(Debug, Clone, Copy, PartialEq, Eq)]
37pub enum DurabilityMode {
38    /// Fsync after every commit. Slowest but safest.
39    Sync,
40    /// Batch fsync periodically. Good balance of performance and durability.
41    Batch {
42        /// Maximum time between syncs in milliseconds.
43        max_delay_ms: u64,
44        /// Maximum records between syncs.
45        max_records: u64,
46    },
47    /// Adaptive sync via a background flusher thread.
48    Adaptive {
49        /// Target interval between flushes in milliseconds.
50        target_interval_ms: u64,
51    },
52    /// No sync - rely on OS buffer flushing. Fastest but may lose recent data.
53    NoSync,
54}
55
56impl Default for DurabilityMode {
57    fn default() -> Self {
58        Self::Batch {
59            max_delay_ms: 100,
60            max_records: 1000,
61        }
62    }
63}
64
65/// Errors from [`Config::validate()`].
66#[derive(Debug, Clone, PartialEq, Eq)]
67pub enum ConfigError {
68    /// Memory limit must be greater than zero.
69    ZeroMemoryLimit,
70    /// Thread count must be greater than zero.
71    ZeroThreads,
72    /// WAL flush interval must be greater than zero.
73    ZeroWalFlushInterval,
74    /// RDF graph model requires the `rdf` feature flag.
75    RdfFeatureRequired,
76}
77
78impl fmt::Display for ConfigError {
79    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
80        match self {
81            Self::ZeroMemoryLimit => write!(f, "memory_limit must be greater than zero"),
82            Self::ZeroThreads => write!(f, "threads must be greater than zero"),
83            Self::ZeroWalFlushInterval => {
84                write!(f, "wal_flush_interval_ms must be greater than zero")
85            }
86            Self::RdfFeatureRequired => {
87                write!(
88                    f,
89                    "RDF graph model requires the `rdf` feature flag to be enabled"
90                )
91            }
92        }
93    }
94}
95
96impl std::error::Error for ConfigError {}
97
98/// Database configuration.
99#[derive(Debug, Clone)]
100#[allow(clippy::struct_excessive_bools)] // Config structs naturally have many boolean flags
101pub struct Config {
102    /// Graph data model (LPG or RDF). Immutable after database creation.
103    pub graph_model: GraphModel,
104    /// Path to the database directory (None for in-memory only).
105    pub path: Option<PathBuf>,
106
107    /// Memory limit in bytes (None for unlimited).
108    pub memory_limit: Option<usize>,
109
110    /// Path for spilling data to disk under memory pressure.
111    pub spill_path: Option<PathBuf>,
112
113    /// Number of worker threads for query execution.
114    pub threads: usize,
115
116    /// Whether to enable WAL for durability.
117    pub wal_enabled: bool,
118
119    /// WAL flush interval in milliseconds.
120    pub wal_flush_interval_ms: u64,
121
122    /// Whether to maintain backward edges.
123    pub backward_edges: bool,
124
125    /// Whether to enable query logging.
126    pub query_logging: bool,
127
128    /// Adaptive execution configuration.
129    pub adaptive: AdaptiveConfig,
130
131    /// Whether to use factorized execution for multi-hop queries.
132    ///
133    /// When enabled, consecutive MATCH expansions are executed using factorized
134    /// representation which avoids Cartesian product materialization. This provides
135    /// 5-100x speedup for multi-hop queries with high fan-out.
136    ///
137    /// Enabled by default.
138    pub factorized_execution: bool,
139
140    /// WAL durability mode. Only used when `wal_enabled` is true.
141    pub wal_durability: DurabilityMode,
142
143    /// Whether to enable catalog schema constraint enforcement.
144    ///
145    /// When true, the catalog enforces label, edge type, and property constraints
146    /// (e.g. required properties, uniqueness). The server sets this for JSON
147    /// Schema databases and populates constraints after creation.
148    pub schema_constraints: bool,
149}
150
151/// Configuration for adaptive query execution.
152///
153/// Adaptive execution monitors actual row counts during query processing and
154/// can trigger re-optimization when estimates are significantly wrong.
155#[derive(Debug, Clone)]
156pub struct AdaptiveConfig {
157    /// Whether adaptive execution is enabled.
158    pub enabled: bool,
159
160    /// Deviation threshold that triggers re-optimization.
161    ///
162    /// A value of 3.0 means re-optimization is triggered when actual cardinality
163    /// is more than 3x or less than 1/3x the estimated value.
164    pub threshold: f64,
165
166    /// Minimum number of rows before considering re-optimization.
167    ///
168    /// Helps avoid thrashing on small result sets.
169    pub min_rows: u64,
170
171    /// Maximum number of re-optimizations allowed per query.
172    pub max_reoptimizations: usize,
173}
174
175impl Default for AdaptiveConfig {
176    fn default() -> Self {
177        Self {
178            enabled: true,
179            threshold: 3.0,
180            min_rows: 1000,
181            max_reoptimizations: 3,
182        }
183    }
184}
185
186impl AdaptiveConfig {
187    /// Creates a disabled adaptive config.
188    #[must_use]
189    pub fn disabled() -> Self {
190        Self {
191            enabled: false,
192            ..Default::default()
193        }
194    }
195
196    /// Sets the deviation threshold.
197    #[must_use]
198    pub fn with_threshold(mut self, threshold: f64) -> Self {
199        self.threshold = threshold;
200        self
201    }
202
203    /// Sets the minimum rows before re-optimization.
204    #[must_use]
205    pub fn with_min_rows(mut self, min_rows: u64) -> Self {
206        self.min_rows = min_rows;
207        self
208    }
209
210    /// Sets the maximum number of re-optimizations.
211    #[must_use]
212    pub fn with_max_reoptimizations(mut self, max: usize) -> Self {
213        self.max_reoptimizations = max;
214        self
215    }
216}
217
218impl Default for Config {
219    fn default() -> Self {
220        Self {
221            graph_model: GraphModel::default(),
222            path: None,
223            memory_limit: None,
224            spill_path: None,
225            threads: num_cpus::get(),
226            wal_enabled: true,
227            wal_flush_interval_ms: 100,
228            backward_edges: true,
229            query_logging: false,
230            adaptive: AdaptiveConfig::default(),
231            factorized_execution: true,
232            wal_durability: DurabilityMode::default(),
233            schema_constraints: false,
234        }
235    }
236}
237
238impl Config {
239    /// Creates a new configuration for an in-memory database.
240    #[must_use]
241    pub fn in_memory() -> Self {
242        Self {
243            path: None,
244            wal_enabled: false,
245            ..Default::default()
246        }
247    }
248
249    /// Creates a new configuration for a persistent database.
250    #[must_use]
251    pub fn persistent(path: impl Into<PathBuf>) -> Self {
252        Self {
253            path: Some(path.into()),
254            wal_enabled: true,
255            ..Default::default()
256        }
257    }
258
259    /// Sets the memory limit.
260    #[must_use]
261    pub fn with_memory_limit(mut self, limit: usize) -> Self {
262        self.memory_limit = Some(limit);
263        self
264    }
265
266    /// Sets the number of worker threads.
267    #[must_use]
268    pub fn with_threads(mut self, threads: usize) -> Self {
269        self.threads = threads;
270        self
271    }
272
273    /// Disables backward edges.
274    #[must_use]
275    pub fn without_backward_edges(mut self) -> Self {
276        self.backward_edges = false;
277        self
278    }
279
280    /// Enables query logging.
281    #[must_use]
282    pub fn with_query_logging(mut self) -> Self {
283        self.query_logging = true;
284        self
285    }
286
287    /// Sets the memory budget as a fraction of system RAM.
288    #[must_use]
289    pub fn with_memory_fraction(mut self, fraction: f64) -> Self {
290        use grafeo_common::memory::buffer::BufferManagerConfig;
291        let system_memory = BufferManagerConfig::detect_system_memory();
292        self.memory_limit = Some((system_memory as f64 * fraction) as usize);
293        self
294    }
295
296    /// Sets the spill directory for out-of-core processing.
297    #[must_use]
298    pub fn with_spill_path(mut self, path: impl Into<PathBuf>) -> Self {
299        self.spill_path = Some(path.into());
300        self
301    }
302
303    /// Sets the adaptive execution configuration.
304    #[must_use]
305    pub fn with_adaptive(mut self, adaptive: AdaptiveConfig) -> Self {
306        self.adaptive = adaptive;
307        self
308    }
309
310    /// Disables adaptive execution.
311    #[must_use]
312    pub fn without_adaptive(mut self) -> Self {
313        self.adaptive.enabled = false;
314        self
315    }
316
317    /// Disables factorized execution for multi-hop queries.
318    ///
319    /// This reverts to the traditional flat execution model where each expansion
320    /// creates a full Cartesian product. Only use this if you encounter issues
321    /// with factorized execution.
322    #[must_use]
323    pub fn without_factorized_execution(mut self) -> Self {
324        self.factorized_execution = false;
325        self
326    }
327
328    /// Sets the graph data model.
329    #[must_use]
330    pub fn with_graph_model(mut self, model: GraphModel) -> Self {
331        self.graph_model = model;
332        self
333    }
334
335    /// Sets the WAL durability mode.
336    #[must_use]
337    pub fn with_wal_durability(mut self, mode: DurabilityMode) -> Self {
338        self.wal_durability = mode;
339        self
340    }
341
342    /// Enables catalog schema constraint enforcement.
343    #[must_use]
344    pub fn with_schema_constraints(mut self) -> Self {
345        self.schema_constraints = true;
346        self
347    }
348
349    /// Validates the configuration, returning an error for invalid combinations.
350    ///
351    /// Called automatically by [`GrafeoDB::with_config()`](crate::GrafeoDB::with_config).
352    ///
353    /// # Errors
354    ///
355    /// Returns [`ConfigError`] if any setting is invalid.
356    pub fn validate(&self) -> std::result::Result<(), ConfigError> {
357        if let Some(limit) = self.memory_limit
358            && limit == 0
359        {
360            return Err(ConfigError::ZeroMemoryLimit);
361        }
362
363        if self.threads == 0 {
364            return Err(ConfigError::ZeroThreads);
365        }
366
367        if self.wal_flush_interval_ms == 0 {
368            return Err(ConfigError::ZeroWalFlushInterval);
369        }
370
371        #[cfg(not(feature = "rdf"))]
372        if self.graph_model == GraphModel::Rdf {
373            return Err(ConfigError::RdfFeatureRequired);
374        }
375
376        Ok(())
377    }
378}
379
380/// Helper function to get CPU count (fallback implementation).
381mod num_cpus {
382    #[cfg(not(target_arch = "wasm32"))]
383    pub fn get() -> usize {
384        std::thread::available_parallelism()
385            .map(|n| n.get())
386            .unwrap_or(4)
387    }
388
389    #[cfg(target_arch = "wasm32")]
390    pub fn get() -> usize {
391        1
392    }
393}
394
395#[cfg(test)]
396mod tests {
397    use super::*;
398
399    #[test]
400    fn test_config_default() {
401        let config = Config::default();
402        assert_eq!(config.graph_model, GraphModel::Lpg);
403        assert!(config.path.is_none());
404        assert!(config.memory_limit.is_none());
405        assert!(config.spill_path.is_none());
406        assert!(config.threads > 0);
407        assert!(config.wal_enabled);
408        assert_eq!(config.wal_flush_interval_ms, 100);
409        assert!(config.backward_edges);
410        assert!(!config.query_logging);
411        assert!(config.factorized_execution);
412        assert_eq!(config.wal_durability, DurabilityMode::default());
413        assert!(!config.schema_constraints);
414    }
415
416    #[test]
417    fn test_config_in_memory() {
418        let config = Config::in_memory();
419        assert!(config.path.is_none());
420        assert!(!config.wal_enabled);
421        assert!(config.backward_edges);
422    }
423
424    #[test]
425    fn test_config_persistent() {
426        let config = Config::persistent("/tmp/test_db");
427        assert_eq!(
428            config.path.as_deref(),
429            Some(std::path::Path::new("/tmp/test_db"))
430        );
431        assert!(config.wal_enabled);
432    }
433
434    #[test]
435    fn test_config_with_memory_limit() {
436        let config = Config::in_memory().with_memory_limit(1024 * 1024);
437        assert_eq!(config.memory_limit, Some(1024 * 1024));
438    }
439
440    #[test]
441    fn test_config_with_threads() {
442        let config = Config::in_memory().with_threads(8);
443        assert_eq!(config.threads, 8);
444    }
445
446    #[test]
447    fn test_config_without_backward_edges() {
448        let config = Config::in_memory().without_backward_edges();
449        assert!(!config.backward_edges);
450    }
451
452    #[test]
453    fn test_config_with_query_logging() {
454        let config = Config::in_memory().with_query_logging();
455        assert!(config.query_logging);
456    }
457
458    #[test]
459    fn test_config_with_spill_path() {
460        let config = Config::in_memory().with_spill_path("/tmp/spill");
461        assert_eq!(
462            config.spill_path.as_deref(),
463            Some(std::path::Path::new("/tmp/spill"))
464        );
465    }
466
467    #[test]
468    fn test_config_with_memory_fraction() {
469        let config = Config::in_memory().with_memory_fraction(0.5);
470        assert!(config.memory_limit.is_some());
471        assert!(config.memory_limit.unwrap() > 0);
472    }
473
474    #[test]
475    fn test_config_with_adaptive() {
476        let adaptive = AdaptiveConfig::default().with_threshold(5.0);
477        let config = Config::in_memory().with_adaptive(adaptive);
478        assert!((config.adaptive.threshold - 5.0).abs() < f64::EPSILON);
479    }
480
481    #[test]
482    fn test_config_without_adaptive() {
483        let config = Config::in_memory().without_adaptive();
484        assert!(!config.adaptive.enabled);
485    }
486
487    #[test]
488    fn test_config_without_factorized_execution() {
489        let config = Config::in_memory().without_factorized_execution();
490        assert!(!config.factorized_execution);
491    }
492
493    #[test]
494    fn test_config_builder_chaining() {
495        let config = Config::persistent("/tmp/db")
496            .with_memory_limit(512 * 1024 * 1024)
497            .with_threads(4)
498            .with_query_logging()
499            .without_backward_edges()
500            .with_spill_path("/tmp/spill");
501
502        assert!(config.path.is_some());
503        assert_eq!(config.memory_limit, Some(512 * 1024 * 1024));
504        assert_eq!(config.threads, 4);
505        assert!(config.query_logging);
506        assert!(!config.backward_edges);
507        assert!(config.spill_path.is_some());
508    }
509
510    #[test]
511    fn test_adaptive_config_default() {
512        let config = AdaptiveConfig::default();
513        assert!(config.enabled);
514        assert!((config.threshold - 3.0).abs() < f64::EPSILON);
515        assert_eq!(config.min_rows, 1000);
516        assert_eq!(config.max_reoptimizations, 3);
517    }
518
519    #[test]
520    fn test_adaptive_config_disabled() {
521        let config = AdaptiveConfig::disabled();
522        assert!(!config.enabled);
523    }
524
525    #[test]
526    fn test_adaptive_config_with_threshold() {
527        let config = AdaptiveConfig::default().with_threshold(10.0);
528        assert!((config.threshold - 10.0).abs() < f64::EPSILON);
529    }
530
531    #[test]
532    fn test_adaptive_config_with_min_rows() {
533        let config = AdaptiveConfig::default().with_min_rows(500);
534        assert_eq!(config.min_rows, 500);
535    }
536
537    #[test]
538    fn test_adaptive_config_with_max_reoptimizations() {
539        let config = AdaptiveConfig::default().with_max_reoptimizations(5);
540        assert_eq!(config.max_reoptimizations, 5);
541    }
542
543    #[test]
544    fn test_adaptive_config_builder_chaining() {
545        let config = AdaptiveConfig::default()
546            .with_threshold(2.0)
547            .with_min_rows(100)
548            .with_max_reoptimizations(10);
549        assert!((config.threshold - 2.0).abs() < f64::EPSILON);
550        assert_eq!(config.min_rows, 100);
551        assert_eq!(config.max_reoptimizations, 10);
552    }
553
554    // --- GraphModel tests ---
555
556    #[test]
557    fn test_graph_model_default_is_lpg() {
558        assert_eq!(GraphModel::default(), GraphModel::Lpg);
559    }
560
561    #[test]
562    fn test_graph_model_display() {
563        assert_eq!(GraphModel::Lpg.to_string(), "LPG");
564        assert_eq!(GraphModel::Rdf.to_string(), "RDF");
565    }
566
567    #[test]
568    fn test_config_with_graph_model() {
569        let config = Config::in_memory().with_graph_model(GraphModel::Rdf);
570        assert_eq!(config.graph_model, GraphModel::Rdf);
571    }
572
573    // --- DurabilityMode tests ---
574
575    #[test]
576    fn test_durability_mode_default_is_batch() {
577        let mode = DurabilityMode::default();
578        assert_eq!(
579            mode,
580            DurabilityMode::Batch {
581                max_delay_ms: 100,
582                max_records: 1000
583            }
584        );
585    }
586
587    #[test]
588    fn test_config_with_wal_durability() {
589        let config = Config::persistent("/tmp/db").with_wal_durability(DurabilityMode::Sync);
590        assert_eq!(config.wal_durability, DurabilityMode::Sync);
591    }
592
593    #[test]
594    fn test_config_with_wal_durability_nosync() {
595        let config = Config::persistent("/tmp/db").with_wal_durability(DurabilityMode::NoSync);
596        assert_eq!(config.wal_durability, DurabilityMode::NoSync);
597    }
598
599    #[test]
600    fn test_config_with_wal_durability_adaptive() {
601        let config = Config::persistent("/tmp/db").with_wal_durability(DurabilityMode::Adaptive {
602            target_interval_ms: 50,
603        });
604        assert_eq!(
605            config.wal_durability,
606            DurabilityMode::Adaptive {
607                target_interval_ms: 50
608            }
609        );
610    }
611
612    // --- schema_constraints tests ---
613
614    #[test]
615    fn test_config_with_schema_constraints() {
616        let config = Config::in_memory().with_schema_constraints();
617        assert!(config.schema_constraints);
618    }
619
620    // --- validate() tests ---
621
622    #[test]
623    fn test_validate_default_config() {
624        assert!(Config::default().validate().is_ok());
625    }
626
627    #[test]
628    fn test_validate_in_memory_config() {
629        assert!(Config::in_memory().validate().is_ok());
630    }
631
632    #[test]
633    fn test_validate_rejects_zero_memory_limit() {
634        let config = Config::in_memory().with_memory_limit(0);
635        assert_eq!(config.validate(), Err(ConfigError::ZeroMemoryLimit));
636    }
637
638    #[test]
639    fn test_validate_rejects_zero_threads() {
640        let config = Config::in_memory().with_threads(0);
641        assert_eq!(config.validate(), Err(ConfigError::ZeroThreads));
642    }
643
644    #[test]
645    fn test_validate_rejects_zero_wal_flush_interval() {
646        let mut config = Config::in_memory();
647        config.wal_flush_interval_ms = 0;
648        assert_eq!(config.validate(), Err(ConfigError::ZeroWalFlushInterval));
649    }
650
651    #[cfg(not(feature = "rdf"))]
652    #[test]
653    fn test_validate_rejects_rdf_without_feature() {
654        let config = Config::in_memory().with_graph_model(GraphModel::Rdf);
655        assert_eq!(config.validate(), Err(ConfigError::RdfFeatureRequired));
656    }
657
658    #[test]
659    fn test_config_error_display() {
660        assert_eq!(
661            ConfigError::ZeroMemoryLimit.to_string(),
662            "memory_limit must be greater than zero"
663        );
664        assert_eq!(
665            ConfigError::ZeroThreads.to_string(),
666            "threads must be greater than zero"
667        );
668        assert_eq!(
669            ConfigError::ZeroWalFlushInterval.to_string(),
670            "wal_flush_interval_ms must be greater than zero"
671        );
672        assert_eq!(
673            ConfigError::RdfFeatureRequired.to_string(),
674            "RDF graph model requires the `rdf` feature flag to be enabled"
675        );
676    }
677
678    // --- Builder chaining with new fields ---
679
680    #[test]
681    fn test_config_full_builder_chaining() {
682        let config = Config::persistent("/tmp/db")
683            .with_graph_model(GraphModel::Lpg)
684            .with_memory_limit(512 * 1024 * 1024)
685            .with_threads(4)
686            .with_query_logging()
687            .with_wal_durability(DurabilityMode::Sync)
688            .with_schema_constraints()
689            .without_backward_edges()
690            .with_spill_path("/tmp/spill");
691
692        assert_eq!(config.graph_model, GraphModel::Lpg);
693        assert!(config.path.is_some());
694        assert_eq!(config.memory_limit, Some(512 * 1024 * 1024));
695        assert_eq!(config.threads, 4);
696        assert!(config.query_logging);
697        assert_eq!(config.wal_durability, DurabilityMode::Sync);
698        assert!(config.schema_constraints);
699        assert!(!config.backward_edges);
700        assert!(config.spill_path.is_some());
701        assert!(config.validate().is_ok());
702    }
703}