Skip to main content

grafeo_engine/
config.rs

1//! Database configuration.
2
3use std::path::PathBuf;
4
5/// Database configuration.
6#[derive(Debug, Clone)]
7#[allow(clippy::struct_excessive_bools)] // Config structs naturally have many boolean flags
8pub struct Config {
9    /// Path to the database directory (None for in-memory only).
10    pub path: Option<PathBuf>,
11
12    /// Memory limit in bytes (None for unlimited).
13    pub memory_limit: Option<usize>,
14
15    /// Path for spilling data to disk under memory pressure.
16    pub spill_path: Option<PathBuf>,
17
18    /// Number of worker threads for query execution.
19    pub threads: usize,
20
21    /// Whether to enable WAL for durability.
22    pub wal_enabled: bool,
23
24    /// WAL flush interval in milliseconds.
25    pub wal_flush_interval_ms: u64,
26
27    /// Whether to maintain backward edges.
28    pub backward_edges: bool,
29
30    /// Whether to enable query logging.
31    pub query_logging: bool,
32
33    /// Adaptive execution configuration.
34    pub adaptive: AdaptiveConfig,
35
36    /// Whether to use factorized execution for multi-hop queries.
37    ///
38    /// When enabled, consecutive MATCH expansions are executed using factorized
39    /// representation which avoids Cartesian product materialization. This provides
40    /// 5-100x speedup for multi-hop queries with high fan-out.
41    ///
42    /// Enabled by default.
43    pub factorized_execution: bool,
44}
45
46/// Configuration for adaptive query execution.
47///
48/// Adaptive execution monitors actual row counts during query processing and
49/// can trigger re-optimization when estimates are significantly wrong.
50#[derive(Debug, Clone)]
51pub struct AdaptiveConfig {
52    /// Whether adaptive execution is enabled.
53    pub enabled: bool,
54
55    /// Deviation threshold that triggers re-optimization.
56    ///
57    /// A value of 3.0 means re-optimization is triggered when actual cardinality
58    /// is more than 3x or less than 1/3x the estimated value.
59    pub threshold: f64,
60
61    /// Minimum number of rows before considering re-optimization.
62    ///
63    /// Helps avoid thrashing on small result sets.
64    pub min_rows: u64,
65
66    /// Maximum number of re-optimizations allowed per query.
67    pub max_reoptimizations: usize,
68}
69
70impl Default for AdaptiveConfig {
71    fn default() -> Self {
72        Self {
73            enabled: true,
74            threshold: 3.0,
75            min_rows: 1000,
76            max_reoptimizations: 3,
77        }
78    }
79}
80
81impl AdaptiveConfig {
82    /// Creates a disabled adaptive config.
83    #[must_use]
84    pub fn disabled() -> Self {
85        Self {
86            enabled: false,
87            ..Default::default()
88        }
89    }
90
91    /// Sets the deviation threshold.
92    #[must_use]
93    pub fn with_threshold(mut self, threshold: f64) -> Self {
94        self.threshold = threshold;
95        self
96    }
97
98    /// Sets the minimum rows before re-optimization.
99    #[must_use]
100    pub fn with_min_rows(mut self, min_rows: u64) -> Self {
101        self.min_rows = min_rows;
102        self
103    }
104
105    /// Sets the maximum number of re-optimizations.
106    #[must_use]
107    pub fn with_max_reoptimizations(mut self, max: usize) -> Self {
108        self.max_reoptimizations = max;
109        self
110    }
111}
112
113impl Default for Config {
114    fn default() -> Self {
115        Self {
116            path: None,
117            memory_limit: None,
118            spill_path: None,
119            threads: num_cpus::get(),
120            wal_enabled: true,
121            wal_flush_interval_ms: 100,
122            backward_edges: true,
123            query_logging: false,
124            adaptive: AdaptiveConfig::default(),
125            factorized_execution: true,
126        }
127    }
128}
129
130impl Config {
131    /// Creates a new configuration for an in-memory database.
132    #[must_use]
133    pub fn in_memory() -> Self {
134        Self {
135            path: None,
136            wal_enabled: false,
137            ..Default::default()
138        }
139    }
140
141    /// Creates a new configuration for a persistent database.
142    #[must_use]
143    pub fn persistent(path: impl Into<PathBuf>) -> Self {
144        Self {
145            path: Some(path.into()),
146            wal_enabled: true,
147            ..Default::default()
148        }
149    }
150
151    /// Sets the memory limit.
152    #[must_use]
153    pub fn with_memory_limit(mut self, limit: usize) -> Self {
154        self.memory_limit = Some(limit);
155        self
156    }
157
158    /// Sets the number of worker threads.
159    #[must_use]
160    pub fn with_threads(mut self, threads: usize) -> Self {
161        self.threads = threads;
162        self
163    }
164
165    /// Disables backward edges.
166    #[must_use]
167    pub fn without_backward_edges(mut self) -> Self {
168        self.backward_edges = false;
169        self
170    }
171
172    /// Enables query logging.
173    #[must_use]
174    pub fn with_query_logging(mut self) -> Self {
175        self.query_logging = true;
176        self
177    }
178
179    /// Sets the memory budget as a fraction of system RAM.
180    #[must_use]
181    pub fn with_memory_fraction(mut self, fraction: f64) -> Self {
182        use grafeo_common::memory::buffer::BufferManagerConfig;
183        let system_memory = BufferManagerConfig::detect_system_memory();
184        self.memory_limit = Some((system_memory as f64 * fraction) as usize);
185        self
186    }
187
188    /// Sets the spill directory for out-of-core processing.
189    #[must_use]
190    pub fn with_spill_path(mut self, path: impl Into<PathBuf>) -> Self {
191        self.spill_path = Some(path.into());
192        self
193    }
194
195    /// Sets the adaptive execution configuration.
196    #[must_use]
197    pub fn with_adaptive(mut self, adaptive: AdaptiveConfig) -> Self {
198        self.adaptive = adaptive;
199        self
200    }
201
202    /// Disables adaptive execution.
203    #[must_use]
204    pub fn without_adaptive(mut self) -> Self {
205        self.adaptive.enabled = false;
206        self
207    }
208
209    /// Disables factorized execution for multi-hop queries.
210    ///
211    /// This reverts to the traditional flat execution model where each expansion
212    /// creates a full Cartesian product. Only use this if you encounter issues
213    /// with factorized execution.
214    #[must_use]
215    pub fn without_factorized_execution(mut self) -> Self {
216        self.factorized_execution = false;
217        self
218    }
219}
220
221/// Helper function to get CPU count (fallback implementation).
222mod num_cpus {
223    pub fn get() -> usize {
224        std::thread::available_parallelism()
225            .map(|n| n.get())
226            .unwrap_or(4)
227    }
228}