Skip to main content

grafeo_engine/
config.rs

1//! Database configuration.
2
3use std::fmt;
4use std::path::PathBuf;
5use std::time::Duration;
6
7/// The graph data model for a database.
8///
9/// Each database uses exactly one model, chosen at creation time and immutable
10/// after that. The engine initializes only the relevant store, saving memory.
11///
12/// Schema variants (OWL, RDFS, JSON Schema) are a server-level concern - from
13/// the engine's perspective those map to either `Lpg` or `Rdf`.
14#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Hash)]
15pub enum GraphModel {
16    /// Labeled Property Graph (default). Supports GQL, Cypher, Gremlin, GraphQL.
17    #[default]
18    Lpg,
19    /// RDF triple store. Supports SPARQL.
20    Rdf,
21}
22
23impl fmt::Display for GraphModel {
24    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
25        match self {
26            Self::Lpg => write!(f, "LPG"),
27            Self::Rdf => write!(f, "RDF"),
28        }
29    }
30}
31
32/// WAL durability mode controlling the trade-off between safety and speed.
33///
34/// This enum lives in config so that `Config` can always carry the desired
35/// durability regardless of whether the `wal` feature is compiled in. When
36/// WAL is enabled, the engine maps this to the adapter-level durability mode.
37#[derive(Debug, Clone, Copy, PartialEq, Eq)]
38pub enum DurabilityMode {
39    /// Fsync after every commit. Slowest but safest.
40    Sync,
41    /// Batch fsync periodically. Good balance of performance and durability.
42    Batch {
43        /// Maximum time between syncs in milliseconds.
44        max_delay_ms: u64,
45        /// Maximum records between syncs.
46        max_records: u64,
47    },
48    /// Adaptive sync via a background flusher thread.
49    Adaptive {
50        /// Target interval between flushes in milliseconds.
51        target_interval_ms: u64,
52    },
53    /// No sync - rely on OS buffer flushing. Fastest but may lose recent data.
54    NoSync,
55}
56
57impl Default for DurabilityMode {
58    fn default() -> Self {
59        Self::Batch {
60            max_delay_ms: 100,
61            max_records: 1000,
62        }
63    }
64}
65
66/// Errors from [`Config::validate()`].
67#[derive(Debug, Clone, PartialEq, Eq)]
68pub enum ConfigError {
69    /// Memory limit must be greater than zero.
70    ZeroMemoryLimit,
71    /// Thread count must be greater than zero.
72    ZeroThreads,
73    /// WAL flush interval must be greater than zero.
74    ZeroWalFlushInterval,
75    /// RDF graph model requires the `rdf` feature flag.
76    RdfFeatureRequired,
77}
78
79impl fmt::Display for ConfigError {
80    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
81        match self {
82            Self::ZeroMemoryLimit => write!(f, "memory_limit must be greater than zero"),
83            Self::ZeroThreads => write!(f, "threads must be greater than zero"),
84            Self::ZeroWalFlushInterval => {
85                write!(f, "wal_flush_interval_ms must be greater than zero")
86            }
87            Self::RdfFeatureRequired => {
88                write!(
89                    f,
90                    "RDF graph model requires the `rdf` feature flag to be enabled"
91                )
92            }
93        }
94    }
95}
96
97impl std::error::Error for ConfigError {}
98
99/// Database configuration.
100#[derive(Debug, Clone)]
101#[allow(clippy::struct_excessive_bools)] // Config structs naturally have many boolean flags
102pub struct Config {
103    /// Graph data model (LPG or RDF). Immutable after database creation.
104    pub graph_model: GraphModel,
105    /// Path to the database directory (None for in-memory only).
106    pub path: Option<PathBuf>,
107
108    /// Memory limit in bytes (None for unlimited).
109    pub memory_limit: Option<usize>,
110
111    /// Path for spilling data to disk under memory pressure.
112    pub spill_path: Option<PathBuf>,
113
114    /// Number of worker threads for query execution.
115    pub threads: usize,
116
117    /// Whether to enable WAL for durability.
118    pub wal_enabled: bool,
119
120    /// WAL flush interval in milliseconds.
121    pub wal_flush_interval_ms: u64,
122
123    /// Whether to maintain backward edges.
124    pub backward_edges: bool,
125
126    /// Whether to enable query logging.
127    pub query_logging: bool,
128
129    /// Adaptive execution configuration.
130    pub adaptive: AdaptiveConfig,
131
132    /// Whether to use factorized execution for multi-hop queries.
133    ///
134    /// When enabled, consecutive MATCH expansions are executed using factorized
135    /// representation which avoids Cartesian product materialization. This provides
136    /// 5-100x speedup for multi-hop queries with high fan-out.
137    ///
138    /// Enabled by default.
139    pub factorized_execution: bool,
140
141    /// WAL durability mode. Only used when `wal_enabled` is true.
142    pub wal_durability: DurabilityMode,
143
144    /// Whether to enable catalog schema constraint enforcement.
145    ///
146    /// When true, the catalog enforces label, edge type, and property constraints
147    /// (e.g. required properties, uniqueness). The server sets this for JSON
148    /// Schema databases and populates constraints after creation.
149    pub schema_constraints: bool,
150
151    /// Maximum time a single query may run before being cancelled.
152    ///
153    /// When set, the executor checks the deadline between operator batches and
154    /// returns `QueryError::timeout()` if the wall-clock limit is exceeded.
155    /// `None` means no timeout (queries may run indefinitely).
156    pub query_timeout: Option<Duration>,
157
158    /// Run MVCC version garbage collection every N commits.
159    ///
160    /// Old versions that are no longer visible to any active transaction are
161    /// pruned to reclaim memory. Set to 0 to disable automatic GC.
162    pub gc_interval: usize,
163}
164
165/// Configuration for adaptive query execution.
166///
167/// Adaptive execution monitors actual row counts during query processing and
168/// can trigger re-optimization when estimates are significantly wrong.
169#[derive(Debug, Clone)]
170pub struct AdaptiveConfig {
171    /// Whether adaptive execution is enabled.
172    pub enabled: bool,
173
174    /// Deviation threshold that triggers re-optimization.
175    ///
176    /// A value of 3.0 means re-optimization is triggered when actual cardinality
177    /// is more than 3x or less than 1/3x the estimated value.
178    pub threshold: f64,
179
180    /// Minimum number of rows before considering re-optimization.
181    ///
182    /// Helps avoid thrashing on small result sets.
183    pub min_rows: u64,
184
185    /// Maximum number of re-optimizations allowed per query.
186    pub max_reoptimizations: usize,
187}
188
189impl Default for AdaptiveConfig {
190    fn default() -> Self {
191        Self {
192            enabled: true,
193            threshold: 3.0,
194            min_rows: 1000,
195            max_reoptimizations: 3,
196        }
197    }
198}
199
200impl AdaptiveConfig {
201    /// Creates a disabled adaptive config.
202    #[must_use]
203    pub fn disabled() -> Self {
204        Self {
205            enabled: false,
206            ..Default::default()
207        }
208    }
209
210    /// Sets the deviation threshold.
211    #[must_use]
212    pub fn with_threshold(mut self, threshold: f64) -> Self {
213        self.threshold = threshold;
214        self
215    }
216
217    /// Sets the minimum rows before re-optimization.
218    #[must_use]
219    pub fn with_min_rows(mut self, min_rows: u64) -> Self {
220        self.min_rows = min_rows;
221        self
222    }
223
224    /// Sets the maximum number of re-optimizations.
225    #[must_use]
226    pub fn with_max_reoptimizations(mut self, max: usize) -> Self {
227        self.max_reoptimizations = max;
228        self
229    }
230}
231
232impl Default for Config {
233    fn default() -> Self {
234        Self {
235            graph_model: GraphModel::default(),
236            path: None,
237            memory_limit: None,
238            spill_path: None,
239            threads: num_cpus::get(),
240            wal_enabled: true,
241            wal_flush_interval_ms: 100,
242            backward_edges: true,
243            query_logging: false,
244            adaptive: AdaptiveConfig::default(),
245            factorized_execution: true,
246            wal_durability: DurabilityMode::default(),
247            schema_constraints: false,
248            query_timeout: None,
249            gc_interval: 100,
250        }
251    }
252}
253
254impl Config {
255    /// Creates a new configuration for an in-memory database.
256    #[must_use]
257    pub fn in_memory() -> Self {
258        Self {
259            path: None,
260            wal_enabled: false,
261            ..Default::default()
262        }
263    }
264
265    /// Creates a new configuration for a persistent database.
266    #[must_use]
267    pub fn persistent(path: impl Into<PathBuf>) -> Self {
268        Self {
269            path: Some(path.into()),
270            wal_enabled: true,
271            ..Default::default()
272        }
273    }
274
275    /// Sets the memory limit.
276    #[must_use]
277    pub fn with_memory_limit(mut self, limit: usize) -> Self {
278        self.memory_limit = Some(limit);
279        self
280    }
281
282    /// Sets the number of worker threads.
283    #[must_use]
284    pub fn with_threads(mut self, threads: usize) -> Self {
285        self.threads = threads;
286        self
287    }
288
289    /// Disables backward edges.
290    #[must_use]
291    pub fn without_backward_edges(mut self) -> Self {
292        self.backward_edges = false;
293        self
294    }
295
296    /// Enables query logging.
297    #[must_use]
298    pub fn with_query_logging(mut self) -> Self {
299        self.query_logging = true;
300        self
301    }
302
303    /// Sets the memory budget as a fraction of system RAM.
304    #[must_use]
305    pub fn with_memory_fraction(mut self, fraction: f64) -> Self {
306        use grafeo_common::memory::buffer::BufferManagerConfig;
307        let system_memory = BufferManagerConfig::detect_system_memory();
308        self.memory_limit = Some((system_memory as f64 * fraction) as usize);
309        self
310    }
311
312    /// Sets the spill directory for out-of-core processing.
313    #[must_use]
314    pub fn with_spill_path(mut self, path: impl Into<PathBuf>) -> Self {
315        self.spill_path = Some(path.into());
316        self
317    }
318
319    /// Sets the adaptive execution configuration.
320    #[must_use]
321    pub fn with_adaptive(mut self, adaptive: AdaptiveConfig) -> Self {
322        self.adaptive = adaptive;
323        self
324    }
325
326    /// Disables adaptive execution.
327    #[must_use]
328    pub fn without_adaptive(mut self) -> Self {
329        self.adaptive.enabled = false;
330        self
331    }
332
333    /// Disables factorized execution for multi-hop queries.
334    ///
335    /// This reverts to the traditional flat execution model where each expansion
336    /// creates a full Cartesian product. Only use this if you encounter issues
337    /// with factorized execution.
338    #[must_use]
339    pub fn without_factorized_execution(mut self) -> Self {
340        self.factorized_execution = false;
341        self
342    }
343
344    /// Sets the graph data model.
345    #[must_use]
346    pub fn with_graph_model(mut self, model: GraphModel) -> Self {
347        self.graph_model = model;
348        self
349    }
350
351    /// Sets the WAL durability mode.
352    #[must_use]
353    pub fn with_wal_durability(mut self, mode: DurabilityMode) -> Self {
354        self.wal_durability = mode;
355        self
356    }
357
358    /// Enables catalog schema constraint enforcement.
359    #[must_use]
360    pub fn with_schema_constraints(mut self) -> Self {
361        self.schema_constraints = true;
362        self
363    }
364
365    /// Sets the maximum time a query may run before being cancelled.
366    #[must_use]
367    pub fn with_query_timeout(mut self, timeout: Duration) -> Self {
368        self.query_timeout = Some(timeout);
369        self
370    }
371
372    /// Sets the MVCC garbage collection interval (every N commits).
373    ///
374    /// Set to 0 to disable automatic GC.
375    #[must_use]
376    pub fn with_gc_interval(mut self, interval: usize) -> Self {
377        self.gc_interval = interval;
378        self
379    }
380
381    /// Validates the configuration, returning an error for invalid combinations.
382    ///
383    /// Called automatically by [`GrafeoDB::with_config()`](crate::GrafeoDB::with_config).
384    ///
385    /// # Errors
386    ///
387    /// Returns [`ConfigError`] if any setting is invalid.
388    pub fn validate(&self) -> std::result::Result<(), ConfigError> {
389        if let Some(limit) = self.memory_limit
390            && limit == 0
391        {
392            return Err(ConfigError::ZeroMemoryLimit);
393        }
394
395        if self.threads == 0 {
396            return Err(ConfigError::ZeroThreads);
397        }
398
399        if self.wal_flush_interval_ms == 0 {
400            return Err(ConfigError::ZeroWalFlushInterval);
401        }
402
403        #[cfg(not(feature = "rdf"))]
404        if self.graph_model == GraphModel::Rdf {
405            return Err(ConfigError::RdfFeatureRequired);
406        }
407
408        Ok(())
409    }
410}
411
412/// Helper function to get CPU count (fallback implementation).
413mod num_cpus {
414    #[cfg(not(target_arch = "wasm32"))]
415    pub fn get() -> usize {
416        std::thread::available_parallelism()
417            .map(|n| n.get())
418            .unwrap_or(4)
419    }
420
421    #[cfg(target_arch = "wasm32")]
422    pub fn get() -> usize {
423        1
424    }
425}
426
427#[cfg(test)]
428mod tests {
429    use super::*;
430
431    #[test]
432    fn test_config_default() {
433        let config = Config::default();
434        assert_eq!(config.graph_model, GraphModel::Lpg);
435        assert!(config.path.is_none());
436        assert!(config.memory_limit.is_none());
437        assert!(config.spill_path.is_none());
438        assert!(config.threads > 0);
439        assert!(config.wal_enabled);
440        assert_eq!(config.wal_flush_interval_ms, 100);
441        assert!(config.backward_edges);
442        assert!(!config.query_logging);
443        assert!(config.factorized_execution);
444        assert_eq!(config.wal_durability, DurabilityMode::default());
445        assert!(!config.schema_constraints);
446        assert!(config.query_timeout.is_none());
447        assert_eq!(config.gc_interval, 100);
448    }
449
450    #[test]
451    fn test_config_in_memory() {
452        let config = Config::in_memory();
453        assert!(config.path.is_none());
454        assert!(!config.wal_enabled);
455        assert!(config.backward_edges);
456    }
457
458    #[test]
459    fn test_config_persistent() {
460        let config = Config::persistent("/tmp/test_db");
461        assert_eq!(
462            config.path.as_deref(),
463            Some(std::path::Path::new("/tmp/test_db"))
464        );
465        assert!(config.wal_enabled);
466    }
467
468    #[test]
469    fn test_config_with_memory_limit() {
470        let config = Config::in_memory().with_memory_limit(1024 * 1024);
471        assert_eq!(config.memory_limit, Some(1024 * 1024));
472    }
473
474    #[test]
475    fn test_config_with_threads() {
476        let config = Config::in_memory().with_threads(8);
477        assert_eq!(config.threads, 8);
478    }
479
480    #[test]
481    fn test_config_without_backward_edges() {
482        let config = Config::in_memory().without_backward_edges();
483        assert!(!config.backward_edges);
484    }
485
486    #[test]
487    fn test_config_with_query_logging() {
488        let config = Config::in_memory().with_query_logging();
489        assert!(config.query_logging);
490    }
491
492    #[test]
493    fn test_config_with_spill_path() {
494        let config = Config::in_memory().with_spill_path("/tmp/spill");
495        assert_eq!(
496            config.spill_path.as_deref(),
497            Some(std::path::Path::new("/tmp/spill"))
498        );
499    }
500
501    #[test]
502    fn test_config_with_memory_fraction() {
503        let config = Config::in_memory().with_memory_fraction(0.5);
504        assert!(config.memory_limit.is_some());
505        assert!(config.memory_limit.unwrap() > 0);
506    }
507
508    #[test]
509    fn test_config_with_adaptive() {
510        let adaptive = AdaptiveConfig::default().with_threshold(5.0);
511        let config = Config::in_memory().with_adaptive(adaptive);
512        assert!((config.adaptive.threshold - 5.0).abs() < f64::EPSILON);
513    }
514
515    #[test]
516    fn test_config_without_adaptive() {
517        let config = Config::in_memory().without_adaptive();
518        assert!(!config.adaptive.enabled);
519    }
520
521    #[test]
522    fn test_config_without_factorized_execution() {
523        let config = Config::in_memory().without_factorized_execution();
524        assert!(!config.factorized_execution);
525    }
526
527    #[test]
528    fn test_config_builder_chaining() {
529        let config = Config::persistent("/tmp/db")
530            .with_memory_limit(512 * 1024 * 1024)
531            .with_threads(4)
532            .with_query_logging()
533            .without_backward_edges()
534            .with_spill_path("/tmp/spill");
535
536        assert!(config.path.is_some());
537        assert_eq!(config.memory_limit, Some(512 * 1024 * 1024));
538        assert_eq!(config.threads, 4);
539        assert!(config.query_logging);
540        assert!(!config.backward_edges);
541        assert!(config.spill_path.is_some());
542    }
543
544    #[test]
545    fn test_adaptive_config_default() {
546        let config = AdaptiveConfig::default();
547        assert!(config.enabled);
548        assert!((config.threshold - 3.0).abs() < f64::EPSILON);
549        assert_eq!(config.min_rows, 1000);
550        assert_eq!(config.max_reoptimizations, 3);
551    }
552
553    #[test]
554    fn test_adaptive_config_disabled() {
555        let config = AdaptiveConfig::disabled();
556        assert!(!config.enabled);
557    }
558
559    #[test]
560    fn test_adaptive_config_with_threshold() {
561        let config = AdaptiveConfig::default().with_threshold(10.0);
562        assert!((config.threshold - 10.0).abs() < f64::EPSILON);
563    }
564
565    #[test]
566    fn test_adaptive_config_with_min_rows() {
567        let config = AdaptiveConfig::default().with_min_rows(500);
568        assert_eq!(config.min_rows, 500);
569    }
570
571    #[test]
572    fn test_adaptive_config_with_max_reoptimizations() {
573        let config = AdaptiveConfig::default().with_max_reoptimizations(5);
574        assert_eq!(config.max_reoptimizations, 5);
575    }
576
577    #[test]
578    fn test_adaptive_config_builder_chaining() {
579        let config = AdaptiveConfig::default()
580            .with_threshold(2.0)
581            .with_min_rows(100)
582            .with_max_reoptimizations(10);
583        assert!((config.threshold - 2.0).abs() < f64::EPSILON);
584        assert_eq!(config.min_rows, 100);
585        assert_eq!(config.max_reoptimizations, 10);
586    }
587
588    // --- GraphModel tests ---
589
590    #[test]
591    fn test_graph_model_default_is_lpg() {
592        assert_eq!(GraphModel::default(), GraphModel::Lpg);
593    }
594
595    #[test]
596    fn test_graph_model_display() {
597        assert_eq!(GraphModel::Lpg.to_string(), "LPG");
598        assert_eq!(GraphModel::Rdf.to_string(), "RDF");
599    }
600
601    #[test]
602    fn test_config_with_graph_model() {
603        let config = Config::in_memory().with_graph_model(GraphModel::Rdf);
604        assert_eq!(config.graph_model, GraphModel::Rdf);
605    }
606
607    // --- DurabilityMode tests ---
608
609    #[test]
610    fn test_durability_mode_default_is_batch() {
611        let mode = DurabilityMode::default();
612        assert_eq!(
613            mode,
614            DurabilityMode::Batch {
615                max_delay_ms: 100,
616                max_records: 1000
617            }
618        );
619    }
620
621    #[test]
622    fn test_config_with_wal_durability() {
623        let config = Config::persistent("/tmp/db").with_wal_durability(DurabilityMode::Sync);
624        assert_eq!(config.wal_durability, DurabilityMode::Sync);
625    }
626
627    #[test]
628    fn test_config_with_wal_durability_nosync() {
629        let config = Config::persistent("/tmp/db").with_wal_durability(DurabilityMode::NoSync);
630        assert_eq!(config.wal_durability, DurabilityMode::NoSync);
631    }
632
633    #[test]
634    fn test_config_with_wal_durability_adaptive() {
635        let config = Config::persistent("/tmp/db").with_wal_durability(DurabilityMode::Adaptive {
636            target_interval_ms: 50,
637        });
638        assert_eq!(
639            config.wal_durability,
640            DurabilityMode::Adaptive {
641                target_interval_ms: 50
642            }
643        );
644    }
645
646    // --- schema_constraints tests ---
647
648    #[test]
649    fn test_config_with_schema_constraints() {
650        let config = Config::in_memory().with_schema_constraints();
651        assert!(config.schema_constraints);
652    }
653
654    // --- query_timeout tests ---
655
656    #[test]
657    fn test_config_with_query_timeout() {
658        let config = Config::in_memory().with_query_timeout(Duration::from_secs(30));
659        assert_eq!(config.query_timeout, Some(Duration::from_secs(30)));
660    }
661
662    // --- gc_interval tests ---
663
664    #[test]
665    fn test_config_with_gc_interval() {
666        let config = Config::in_memory().with_gc_interval(50);
667        assert_eq!(config.gc_interval, 50);
668    }
669
670    #[test]
671    fn test_config_gc_disabled() {
672        let config = Config::in_memory().with_gc_interval(0);
673        assert_eq!(config.gc_interval, 0);
674    }
675
676    // --- validate() tests ---
677
678    #[test]
679    fn test_validate_default_config() {
680        assert!(Config::default().validate().is_ok());
681    }
682
683    #[test]
684    fn test_validate_in_memory_config() {
685        assert!(Config::in_memory().validate().is_ok());
686    }
687
688    #[test]
689    fn test_validate_rejects_zero_memory_limit() {
690        let config = Config::in_memory().with_memory_limit(0);
691        assert_eq!(config.validate(), Err(ConfigError::ZeroMemoryLimit));
692    }
693
694    #[test]
695    fn test_validate_rejects_zero_threads() {
696        let config = Config::in_memory().with_threads(0);
697        assert_eq!(config.validate(), Err(ConfigError::ZeroThreads));
698    }
699
700    #[test]
701    fn test_validate_rejects_zero_wal_flush_interval() {
702        let mut config = Config::in_memory();
703        config.wal_flush_interval_ms = 0;
704        assert_eq!(config.validate(), Err(ConfigError::ZeroWalFlushInterval));
705    }
706
707    #[cfg(not(feature = "rdf"))]
708    #[test]
709    fn test_validate_rejects_rdf_without_feature() {
710        let config = Config::in_memory().with_graph_model(GraphModel::Rdf);
711        assert_eq!(config.validate(), Err(ConfigError::RdfFeatureRequired));
712    }
713
714    #[test]
715    fn test_config_error_display() {
716        assert_eq!(
717            ConfigError::ZeroMemoryLimit.to_string(),
718            "memory_limit must be greater than zero"
719        );
720        assert_eq!(
721            ConfigError::ZeroThreads.to_string(),
722            "threads must be greater than zero"
723        );
724        assert_eq!(
725            ConfigError::ZeroWalFlushInterval.to_string(),
726            "wal_flush_interval_ms must be greater than zero"
727        );
728        assert_eq!(
729            ConfigError::RdfFeatureRequired.to_string(),
730            "RDF graph model requires the `rdf` feature flag to be enabled"
731        );
732    }
733
734    // --- Builder chaining with new fields ---
735
736    #[test]
737    fn test_config_full_builder_chaining() {
738        let config = Config::persistent("/tmp/db")
739            .with_graph_model(GraphModel::Lpg)
740            .with_memory_limit(512 * 1024 * 1024)
741            .with_threads(4)
742            .with_query_logging()
743            .with_wal_durability(DurabilityMode::Sync)
744            .with_schema_constraints()
745            .without_backward_edges()
746            .with_spill_path("/tmp/spill")
747            .with_query_timeout(Duration::from_secs(60));
748
749        assert_eq!(config.graph_model, GraphModel::Lpg);
750        assert!(config.path.is_some());
751        assert_eq!(config.memory_limit, Some(512 * 1024 * 1024));
752        assert_eq!(config.threads, 4);
753        assert!(config.query_logging);
754        assert_eq!(config.wal_durability, DurabilityMode::Sync);
755        assert!(config.schema_constraints);
756        assert!(!config.backward_edges);
757        assert!(config.spill_path.is_some());
758        assert_eq!(config.query_timeout, Some(Duration::from_secs(60)));
759        assert!(config.validate().is_ok());
760    }
761}