vibesql_executor/select/morsel/config.rs
1//! Morsel configuration and adaptive sizing.
2//!
3//! This module provides configuration for morsel-driven execution, including
4//! per-operation morsel sizes and adaptive sizing based on row width and selectivity.
5
6use vibesql_types::DataType;
7
8use super::morsel_debug_enabled;
9
10/// Configuration for morsel-driven execution.
11///
12/// Supports per-operation morsel sizes based on benchmark data showing that
13/// different operations perform optimally with different morsel sizes:
14///
15/// - **Filter/Aggregate**: Smaller sizes (2K) improve cache locality
16/// - **GROUP BY**: Smaller sizes (2K) benefit from faster hash table merges
17/// - **Join**: Medium sizes (4K) balance hash table operations
18/// - **Sort**: Larger sizes (8K) improve merge phase efficiency
19/// - **Scan**: Larger sizes (8K) optimize sequential I/O
20///
21/// DuckDB uses 2048 for SIMD vectorization alignment (32 x 64-byte AVX-512 elements).
22#[derive(Debug, Clone)]
23pub struct MorselConfig {
24 /// Default morsel size (used when no operation-specific size applies)
25 pub morsel_size: usize,
26 /// Morsel size for filter operations (default: 2048)
27 pub filter_size: usize,
28 /// Morsel size for GROUP BY operations (default: 2048)
29 pub group_by_size: usize,
30 /// Morsel size for hash join build phase (default: 4096)
31 pub join_build_size: usize,
32 /// Morsel size for hash join probe phase (default: 4096)
33 pub join_probe_size: usize,
34 /// Morsel size for sort operations (default: 8192)
35 pub sort_size: usize,
36 /// Morsel size for scan/materialize operations (default: 8192)
37 pub scan_size: usize,
38 /// Morsel size for aggregate operations (default: 2048)
39 pub aggregate_size: usize,
40}
41
42/// Target cache size for morsel data (2MB = typical L3 cache slice)
43pub(super) const TARGET_CACHE_BYTES: usize = 2 * 1024 * 1024;
44
45/// Minimum morsel size to amortize work-stealing overhead
46pub(super) const MIN_MORSEL_SIZE: usize = 10_000;
47
48/// Maximum morsel size to ensure enough morsels for load balancing
49pub(super) const MAX_MORSEL_SIZE: usize = 100_000;
50
51/// Default morsel size when no hints are available
52pub(super) const DEFAULT_MORSEL_SIZE: usize = 50_000;
53
54// Per-operation optimal morsel sizes based on benchmark data (TPC-H SF 0.1, 8 threads)
55// See issue #4282 and docs/performance/MORSEL_SIZE_INVESTIGATION.md
56
57/// Default filter morsel size - smaller for better cache locality
58/// Benchmark: Q1 aggregation 46.7ms (1K) vs 50.3ms (50K) = ~7% improvement
59pub(super) const DEFAULT_FILTER_SIZE: usize = 2048;
60
61/// Default GROUP BY morsel size - smaller reduces hash table merge overhead
62/// Benefits from same cache locality as filter operations
63pub(super) const DEFAULT_GROUP_BY_SIZE: usize = 2048;
64
65/// Default hash join build morsel size - medium size for hash table insertion
66/// Benchmark: Q5 join 281ms (2K) vs 286ms (50K) = ~2% improvement
67pub(super) const DEFAULT_JOIN_BUILD_SIZE: usize = 4096;
68
69/// Default hash join probe morsel size - medium size for hash table lookups
70pub(super) const DEFAULT_JOIN_PROBE_SIZE: usize = 4096;
71
72/// Default sort morsel size - larger for efficient merge phases
73/// Sort shows good parallel scaling, larger morsels reduce merge depth
74pub(super) const DEFAULT_SORT_SIZE: usize = 8192;
75
76/// Default scan morsel size - larger for sequential I/O efficiency
77/// Scan is memory-bandwidth limited, larger morsels amortize overhead
78pub(super) const DEFAULT_SCAN_SIZE: usize = 8192;
79
80/// Default aggregate morsel size - smaller for cache locality
81/// Similar characteristics to filter operations
82pub(super) const DEFAULT_AGGREGATE_SIZE: usize = 2048;
83
84impl MorselConfig {
85 /// Create a new configuration with the given morsel size for all operations.
86 ///
87 /// This uses the same size for all operations, which is useful for testing
88 /// or when you want uniform behavior. For production use, prefer `optimal()`
89 /// which uses per-operation sizes based on benchmark data.
90 pub fn new(morsel_size: usize) -> Self {
91 Self {
92 morsel_size,
93 filter_size: morsel_size,
94 group_by_size: morsel_size,
95 join_build_size: morsel_size,
96 join_probe_size: morsel_size,
97 sort_size: morsel_size,
98 scan_size: morsel_size,
99 aggregate_size: morsel_size,
100 }
101 }
102
103 /// Create a new configuration with per-operation optimal sizes.
104 ///
105 /// Uses different morsel sizes for each operation based on benchmark data:
106 /// - Filter/Aggregate: 2048 (cache locality)
107 /// - GROUP BY: 2048 (hash table merge efficiency)
108 /// - Join: 4096 (hash table operations)
109 /// - Sort: 8192 (merge phase efficiency)
110 /// - Scan: 8192 (sequential I/O)
111 pub fn with_per_operation_sizes() -> Self {
112 Self {
113 morsel_size: DEFAULT_MORSEL_SIZE,
114 filter_size: DEFAULT_FILTER_SIZE,
115 group_by_size: DEFAULT_GROUP_BY_SIZE,
116 join_build_size: DEFAULT_JOIN_BUILD_SIZE,
117 join_probe_size: DEFAULT_JOIN_PROBE_SIZE,
118 sort_size: DEFAULT_SORT_SIZE,
119 scan_size: DEFAULT_SCAN_SIZE,
120 aggregate_size: DEFAULT_AGGREGATE_SIZE,
121 }
122 }
123
124 /// Calculate optimal morsel size based on hardware characteristics.
125 ///
126 /// Uses per-operation morsel sizes based on benchmark data (see issue #4282).
127 /// Supports `MORSEL_SIZE` environment variable override for all operations.
128 ///
129 /// When `MORSEL_SIZE` is set, that value is used for all operations (uniform mode).
130 /// Otherwise, per-operation optimal sizes are used.
131 pub fn optimal() -> Self {
132 // Check for user override - if set, use uniform size for all operations
133 if let Ok(size_str) = std::env::var("MORSEL_SIZE") {
134 let morsel_size =
135 size_str.parse::<usize>().unwrap_or(DEFAULT_MORSEL_SIZE).clamp(1000, 500_000);
136 return Self::new(morsel_size);
137 }
138
139 // Use per-operation optimal sizes
140 Self::with_per_operation_sizes()
141 }
142
143 /// Create an adaptive configuration based on estimated row width in bytes.
144 ///
145 /// Adjusts morsel size to maintain consistent L3 cache occupancy regardless
146 /// of row width. Wide rows get smaller morsels, narrow rows get larger morsels.
147 ///
148 /// # Arguments
149 ///
150 /// * `avg_row_bytes` - Estimated average size of each row in bytes
151 ///
152 /// # Example
153 ///
154 /// ```
155 /// use vibesql_executor::select::morsel::MorselConfig;
156 ///
157 /// // For wide rows (~500 bytes each), use smaller morsels
158 /// let wide_config = MorselConfig::for_row_width(500);
159 ///
160 /// // For narrow rows (~20 bytes each), use larger morsels
161 /// let narrow_config = MorselConfig::for_row_width(20);
162 ///
163 /// // Narrow rows get larger morsels than wide rows
164 /// assert!(narrow_config.morsel_size > wide_config.morsel_size);
165 /// ```
166 pub fn for_row_width(avg_row_bytes: usize) -> Self {
167 // Avoid division by zero, use minimum of 1 byte per row
168 let row_bytes = avg_row_bytes.max(1);
169
170 // Calculate morsel size to fit TARGET_CACHE_BYTES
171 let morsel_size = (TARGET_CACHE_BYTES / row_bytes).clamp(MIN_MORSEL_SIZE, MAX_MORSEL_SIZE);
172
173 if morsel_debug_enabled() {
174 eprintln!(
175 "[MORSEL] Adaptive sizing: {} bytes/row -> {} rows/morsel",
176 row_bytes, morsel_size
177 );
178 }
179
180 // Use uniform size for row-width-adaptive configs
181 Self::new(morsel_size)
182 }
183
184 /// Create an adaptive configuration based on a schema (list of column types).
185 ///
186 /// Estimates row width from the schema and adjusts morsel size accordingly.
187 /// This is the recommended method when schema information is available.
188 ///
189 /// # Arguments
190 ///
191 /// * `schema` - Slice of column data types in the row
192 ///
193 /// # Example
194 ///
195 /// ```
196 /// use vibesql_executor::select::morsel::MorselConfig;
197 /// use vibesql_types::DataType;
198 ///
199 /// let schema = [
200 /// DataType::Integer,
201 /// DataType::Varchar { max_length: Some(100) },
202 /// DataType::Date,
203 /// ];
204 /// let config = MorselConfig::for_schema(&schema);
205 /// assert!(config.morsel_size > 0);
206 /// ```
207 pub fn for_schema(schema: &[DataType]) -> Self {
208 if schema.is_empty() {
209 return Self::optimal();
210 }
211
212 // Sum estimated sizes for all columns, plus Row struct overhead
213 const ROW_OVERHEAD: usize = 24; // Vec header for values
214 let row_bytes: usize =
215 ROW_OVERHEAD + schema.iter().map(|dt| dt.estimated_size_bytes()).sum::<usize>();
216
217 Self::for_row_width(row_bytes)
218 }
219
220 /// Create an adaptive configuration based on estimated filter selectivity.
221 ///
222 /// For filter operations with known selectivity, adjusts morsel size:
223 /// - Low selectivity (few rows pass) -> larger morsels to reduce overhead
224 /// - High selectivity (many rows pass) -> smaller morsels for better balancing
225 ///
226 /// # Arguments
227 ///
228 /// * `selectivity` - Fraction of rows expected to pass the filter (0.0 to 1.0)
229 ///
230 /// # Example
231 ///
232 /// ```
233 /// use vibesql_executor::select::morsel::MorselConfig;
234 ///
235 /// // For a highly selective filter (1% pass rate), use larger morsels
236 /// let selective = MorselConfig::for_selectivity(0.01);
237 ///
238 /// // For a low selectivity filter (90% pass rate), use smaller morsels
239 /// let unselective = MorselConfig::for_selectivity(0.90);
240 ///
241 /// // More selective filters get larger morsels
242 /// assert!(selective.morsel_size > unselective.morsel_size);
243 /// ```
244 pub fn for_selectivity(selectivity: f64) -> Self {
245 // Clamp selectivity to valid range
246 let sel = selectivity.clamp(0.001, 1.0);
247
248 // For very low selectivity, increase morsel size to reduce overhead
249 // The idea: if only 1% of rows pass, we need larger input morsels
250 // to get meaningful output morsels
251 let adjusted = if sel < 0.1 {
252 // Scale inversely with selectivity, but cap at 2x default
253 ((DEFAULT_MORSEL_SIZE as f64) / sel).min((MAX_MORSEL_SIZE * 2) as f64) as usize
254 } else {
255 DEFAULT_MORSEL_SIZE
256 };
257
258 let morsel_size = adjusted.clamp(MIN_MORSEL_SIZE, MAX_MORSEL_SIZE * 2);
259
260 if morsel_debug_enabled() {
261 eprintln!(
262 "[MORSEL] Selectivity-based sizing: {:.1}% selectivity -> {} rows/morsel",
263 sel * 100.0,
264 morsel_size
265 );
266 }
267
268 // Use uniform size for selectivity-adaptive configs
269 Self::new(morsel_size)
270 }
271
272 /// Create an adaptive configuration combining row width and selectivity hints.
273 ///
274 /// This is the most accurate method when both schema and selectivity estimates
275 /// are available (e.g., from query optimizer statistics).
276 ///
277 /// # Arguments
278 ///
279 /// * `schema` - Slice of column data types
280 /// * `selectivity` - Optional filter selectivity (0.0 to 1.0)
281 ///
282 /// # Example
283 ///
284 /// ```
285 /// use vibesql_executor::select::morsel::MorselConfig;
286 /// use vibesql_types::DataType;
287 ///
288 /// let schema = [DataType::Integer, DataType::Bigint];
289 /// let config = MorselConfig::adaptive(&schema, Some(0.05));
290 /// assert!(config.morsel_size > 0);
291 /// ```
292 pub fn adaptive(schema: &[DataType], selectivity: Option<f64>) -> Self {
293 // Start with schema-based sizing
294 let base_config = Self::for_schema(schema);
295
296 // Adjust for selectivity if provided
297 match selectivity {
298 Some(sel) if sel < 0.1 => {
299 // For low selectivity, scale up from the schema-based size
300 let adjusted = ((base_config.morsel_size as f64) / sel.clamp(0.001, 1.0))
301 .min((MAX_MORSEL_SIZE * 2) as f64) as usize;
302 let morsel_size = adjusted.clamp(MIN_MORSEL_SIZE, MAX_MORSEL_SIZE * 2);
303
304 if morsel_debug_enabled() {
305 eprintln!(
306 "[MORSEL] Adaptive sizing: schema={} bytes, selectivity={:.1}% -> {} rows/morsel",
307 schema.iter().map(|dt| dt.estimated_size_bytes()).sum::<usize>(),
308 sel * 100.0,
309 morsel_size
310 );
311 }
312
313 // Use uniform size for adaptive configs
314 Self::new(morsel_size)
315 }
316 _ => base_config,
317 }
318 }
319}
320
321impl Default for MorselConfig {
322 fn default() -> Self {
323 Self::optimal()
324 }
325}
326
327/// Global morsel configuration, initialized once on first access.
328static GLOBAL_CONFIG: std::sync::OnceLock<MorselConfig> = std::sync::OnceLock::new();
329
330/// Get the global morsel configuration.
331pub fn global_config() -> &'static MorselConfig {
332 GLOBAL_CONFIG.get_or_init(MorselConfig::optimal)
333}