vibesql_executor/select/morsel/
config.rs

1//! Morsel configuration and adaptive sizing.
2//!
3//! This module provides configuration for morsel-driven execution, including
4//! per-operation morsel sizes and adaptive sizing based on row width and selectivity.
5
6use vibesql_types::DataType;
7
8use super::morsel_debug_enabled;
9
10/// Configuration for morsel-driven execution.
11///
12/// Supports per-operation morsel sizes based on benchmark data showing that
13/// different operations perform optimally with different morsel sizes:
14///
15/// - **Filter/Aggregate**: Smaller sizes (2K) improve cache locality
16/// - **GROUP BY**: Smaller sizes (2K) benefit from faster hash table merges
17/// - **Join**: Medium sizes (4K) balance hash table operations
18/// - **Sort**: Larger sizes (8K) improve merge phase efficiency
19/// - **Scan**: Larger sizes (8K) optimize sequential I/O
20///
21/// DuckDB uses 2048 for SIMD vectorization alignment (32 x 64-byte AVX-512 elements).
22#[derive(Debug, Clone)]
23pub struct MorselConfig {
24    /// Default morsel size (used when no operation-specific size applies)
25    pub morsel_size: usize,
26    /// Morsel size for filter operations (default: 2048)
27    pub filter_size: usize,
28    /// Morsel size for GROUP BY operations (default: 2048)
29    pub group_by_size: usize,
30    /// Morsel size for hash join build phase (default: 4096)
31    pub join_build_size: usize,
32    /// Morsel size for hash join probe phase (default: 4096)
33    pub join_probe_size: usize,
34    /// Morsel size for sort operations (default: 8192)
35    pub sort_size: usize,
36    /// Morsel size for scan/materialize operations (default: 8192)
37    pub scan_size: usize,
38    /// Morsel size for aggregate operations (default: 2048)
39    pub aggregate_size: usize,
40}
41
42/// Target cache size for morsel data (2MB = typical L3 cache slice)
43pub(super) const TARGET_CACHE_BYTES: usize = 2 * 1024 * 1024;
44
45/// Minimum morsel size to amortize work-stealing overhead
46pub(super) const MIN_MORSEL_SIZE: usize = 10_000;
47
48/// Maximum morsel size to ensure enough morsels for load balancing
49pub(super) const MAX_MORSEL_SIZE: usize = 100_000;
50
51/// Default morsel size when no hints are available
52pub(super) const DEFAULT_MORSEL_SIZE: usize = 50_000;
53
54// Per-operation optimal morsel sizes based on benchmark data (TPC-H SF 0.1, 8 threads)
55// See issue #4282 and docs/performance/MORSEL_SIZE_INVESTIGATION.md
56
57/// Default filter morsel size - smaller for better cache locality
58/// Benchmark: Q1 aggregation 46.7ms (1K) vs 50.3ms (50K) = ~7% improvement
59pub(super) const DEFAULT_FILTER_SIZE: usize = 2048;
60
61/// Default GROUP BY morsel size - smaller reduces hash table merge overhead
62/// Benefits from same cache locality as filter operations
63pub(super) const DEFAULT_GROUP_BY_SIZE: usize = 2048;
64
65/// Default hash join build morsel size - medium size for hash table insertion
66/// Benchmark: Q5 join 281ms (2K) vs 286ms (50K) = ~2% improvement
67pub(super) const DEFAULT_JOIN_BUILD_SIZE: usize = 4096;
68
69/// Default hash join probe morsel size - medium size for hash table lookups
70pub(super) const DEFAULT_JOIN_PROBE_SIZE: usize = 4096;
71
72/// Default sort morsel size - larger for efficient merge phases
73/// Sort shows good parallel scaling, larger morsels reduce merge depth
74pub(super) const DEFAULT_SORT_SIZE: usize = 8192;
75
76/// Default scan morsel size - larger for sequential I/O efficiency
77/// Scan is memory-bandwidth limited, larger morsels amortize overhead
78pub(super) const DEFAULT_SCAN_SIZE: usize = 8192;
79
80/// Default aggregate morsel size - smaller for cache locality
81/// Similar characteristics to filter operations
82pub(super) const DEFAULT_AGGREGATE_SIZE: usize = 2048;
83
84impl MorselConfig {
85    /// Create a new configuration with the given morsel size for all operations.
86    ///
87    /// This uses the same size for all operations, which is useful for testing
88    /// or when you want uniform behavior. For production use, prefer `optimal()`
89    /// which uses per-operation sizes based on benchmark data.
90    pub fn new(morsel_size: usize) -> Self {
91        Self {
92            morsel_size,
93            filter_size: morsel_size,
94            group_by_size: morsel_size,
95            join_build_size: morsel_size,
96            join_probe_size: morsel_size,
97            sort_size: morsel_size,
98            scan_size: morsel_size,
99            aggregate_size: morsel_size,
100        }
101    }
102
103    /// Create a new configuration with per-operation optimal sizes.
104    ///
105    /// Uses different morsel sizes for each operation based on benchmark data:
106    /// - Filter/Aggregate: 2048 (cache locality)
107    /// - GROUP BY: 2048 (hash table merge efficiency)
108    /// - Join: 4096 (hash table operations)
109    /// - Sort: 8192 (merge phase efficiency)
110    /// - Scan: 8192 (sequential I/O)
111    pub fn with_per_operation_sizes() -> Self {
112        Self {
113            morsel_size: DEFAULT_MORSEL_SIZE,
114            filter_size: DEFAULT_FILTER_SIZE,
115            group_by_size: DEFAULT_GROUP_BY_SIZE,
116            join_build_size: DEFAULT_JOIN_BUILD_SIZE,
117            join_probe_size: DEFAULT_JOIN_PROBE_SIZE,
118            sort_size: DEFAULT_SORT_SIZE,
119            scan_size: DEFAULT_SCAN_SIZE,
120            aggregate_size: DEFAULT_AGGREGATE_SIZE,
121        }
122    }
123
124    /// Calculate optimal morsel size based on hardware characteristics.
125    ///
126    /// Uses per-operation morsel sizes based on benchmark data (see issue #4282).
127    /// Supports `MORSEL_SIZE` environment variable override for all operations.
128    ///
129    /// When `MORSEL_SIZE` is set, that value is used for all operations (uniform mode).
130    /// Otherwise, per-operation optimal sizes are used.
131    pub fn optimal() -> Self {
132        // Check for user override - if set, use uniform size for all operations
133        if let Ok(size_str) = std::env::var("MORSEL_SIZE") {
134            let morsel_size =
135                size_str.parse::<usize>().unwrap_or(DEFAULT_MORSEL_SIZE).clamp(1000, 500_000);
136            return Self::new(morsel_size);
137        }
138
139        // Use per-operation optimal sizes
140        Self::with_per_operation_sizes()
141    }
142
143    /// Create an adaptive configuration based on estimated row width in bytes.
144    ///
145    /// Adjusts morsel size to maintain consistent L3 cache occupancy regardless
146    /// of row width. Wide rows get smaller morsels, narrow rows get larger morsels.
147    ///
148    /// # Arguments
149    ///
150    /// * `avg_row_bytes` - Estimated average size of each row in bytes
151    ///
152    /// # Example
153    ///
154    /// ```
155    /// use vibesql_executor::select::morsel::MorselConfig;
156    ///
157    /// // For wide rows (~500 bytes each), use smaller morsels
158    /// let wide_config = MorselConfig::for_row_width(500);
159    ///
160    /// // For narrow rows (~20 bytes each), use larger morsels
161    /// let narrow_config = MorselConfig::for_row_width(20);
162    ///
163    /// // Narrow rows get larger morsels than wide rows
164    /// assert!(narrow_config.morsel_size > wide_config.morsel_size);
165    /// ```
166    pub fn for_row_width(avg_row_bytes: usize) -> Self {
167        // Avoid division by zero, use minimum of 1 byte per row
168        let row_bytes = avg_row_bytes.max(1);
169
170        // Calculate morsel size to fit TARGET_CACHE_BYTES
171        let morsel_size = (TARGET_CACHE_BYTES / row_bytes).clamp(MIN_MORSEL_SIZE, MAX_MORSEL_SIZE);
172
173        if morsel_debug_enabled() {
174            eprintln!(
175                "[MORSEL] Adaptive sizing: {} bytes/row -> {} rows/morsel",
176                row_bytes, morsel_size
177            );
178        }
179
180        // Use uniform size for row-width-adaptive configs
181        Self::new(morsel_size)
182    }
183
184    /// Create an adaptive configuration based on a schema (list of column types).
185    ///
186    /// Estimates row width from the schema and adjusts morsel size accordingly.
187    /// This is the recommended method when schema information is available.
188    ///
189    /// # Arguments
190    ///
191    /// * `schema` - Slice of column data types in the row
192    ///
193    /// # Example
194    ///
195    /// ```
196    /// use vibesql_executor::select::morsel::MorselConfig;
197    /// use vibesql_types::DataType;
198    ///
199    /// let schema = [
200    ///     DataType::Integer,
201    ///     DataType::Varchar { max_length: Some(100) },
202    ///     DataType::Date,
203    /// ];
204    /// let config = MorselConfig::for_schema(&schema);
205    /// assert!(config.morsel_size > 0);
206    /// ```
207    pub fn for_schema(schema: &[DataType]) -> Self {
208        if schema.is_empty() {
209            return Self::optimal();
210        }
211
212        // Sum estimated sizes for all columns, plus Row struct overhead
213        const ROW_OVERHEAD: usize = 24; // Vec header for values
214        let row_bytes: usize =
215            ROW_OVERHEAD + schema.iter().map(|dt| dt.estimated_size_bytes()).sum::<usize>();
216
217        Self::for_row_width(row_bytes)
218    }
219
220    /// Create an adaptive configuration based on estimated filter selectivity.
221    ///
222    /// For filter operations with known selectivity, adjusts morsel size:
223    /// - Low selectivity (few rows pass) -> larger morsels to reduce overhead
224    /// - High selectivity (many rows pass) -> smaller morsels for better balancing
225    ///
226    /// # Arguments
227    ///
228    /// * `selectivity` - Fraction of rows expected to pass the filter (0.0 to 1.0)
229    ///
230    /// # Example
231    ///
232    /// ```
233    /// use vibesql_executor::select::morsel::MorselConfig;
234    ///
235    /// // For a highly selective filter (1% pass rate), use larger morsels
236    /// let selective = MorselConfig::for_selectivity(0.01);
237    ///
238    /// // For a low selectivity filter (90% pass rate), use smaller morsels
239    /// let unselective = MorselConfig::for_selectivity(0.90);
240    ///
241    /// // More selective filters get larger morsels
242    /// assert!(selective.morsel_size > unselective.morsel_size);
243    /// ```
244    pub fn for_selectivity(selectivity: f64) -> Self {
245        // Clamp selectivity to valid range
246        let sel = selectivity.clamp(0.001, 1.0);
247
248        // For very low selectivity, increase morsel size to reduce overhead
249        // The idea: if only 1% of rows pass, we need larger input morsels
250        // to get meaningful output morsels
251        let adjusted = if sel < 0.1 {
252            // Scale inversely with selectivity, but cap at 2x default
253            ((DEFAULT_MORSEL_SIZE as f64) / sel).min((MAX_MORSEL_SIZE * 2) as f64) as usize
254        } else {
255            DEFAULT_MORSEL_SIZE
256        };
257
258        let morsel_size = adjusted.clamp(MIN_MORSEL_SIZE, MAX_MORSEL_SIZE * 2);
259
260        if morsel_debug_enabled() {
261            eprintln!(
262                "[MORSEL] Selectivity-based sizing: {:.1}% selectivity -> {} rows/morsel",
263                sel * 100.0,
264                morsel_size
265            );
266        }
267
268        // Use uniform size for selectivity-adaptive configs
269        Self::new(morsel_size)
270    }
271
272    /// Create an adaptive configuration combining row width and selectivity hints.
273    ///
274    /// This is the most accurate method when both schema and selectivity estimates
275    /// are available (e.g., from query optimizer statistics).
276    ///
277    /// # Arguments
278    ///
279    /// * `schema` - Slice of column data types
280    /// * `selectivity` - Optional filter selectivity (0.0 to 1.0)
281    ///
282    /// # Example
283    ///
284    /// ```
285    /// use vibesql_executor::select::morsel::MorselConfig;
286    /// use vibesql_types::DataType;
287    ///
288    /// let schema = [DataType::Integer, DataType::Bigint];
289    /// let config = MorselConfig::adaptive(&schema, Some(0.05));
290    /// assert!(config.morsel_size > 0);
291    /// ```
292    pub fn adaptive(schema: &[DataType], selectivity: Option<f64>) -> Self {
293        // Start with schema-based sizing
294        let base_config = Self::for_schema(schema);
295
296        // Adjust for selectivity if provided
297        match selectivity {
298            Some(sel) if sel < 0.1 => {
299                // For low selectivity, scale up from the schema-based size
300                let adjusted = ((base_config.morsel_size as f64) / sel.clamp(0.001, 1.0))
301                    .min((MAX_MORSEL_SIZE * 2) as f64) as usize;
302                let morsel_size = adjusted.clamp(MIN_MORSEL_SIZE, MAX_MORSEL_SIZE * 2);
303
304                if morsel_debug_enabled() {
305                    eprintln!(
306                        "[MORSEL] Adaptive sizing: schema={} bytes, selectivity={:.1}% -> {} rows/morsel",
307                        schema.iter().map(|dt| dt.estimated_size_bytes()).sum::<usize>(),
308                        sel * 100.0,
309                        morsel_size
310                    );
311                }
312
313                // Use uniform size for adaptive configs
314                Self::new(morsel_size)
315            }
316            _ => base_config,
317        }
318    }
319}
320
321impl Default for MorselConfig {
322    fn default() -> Self {
323        Self::optimal()
324    }
325}
326
327/// Global morsel configuration, initialized once on first access.
328static GLOBAL_CONFIG: std::sync::OnceLock<MorselConfig> = std::sync::OnceLock::new();
329
330/// Get the global morsel configuration.
331pub fn global_config() -> &'static MorselConfig {
332    GLOBAL_CONFIG.get_or_init(MorselConfig::optimal)
333}