vibesql_executor/evaluator/
combined_core.rs

1//! Combined schema expression evaluator
2//!
3//! This module provides the CombinedExpressionEvaluator for evaluating expressions
4//! in the context of combined schemas (e.g., JOINs with multiple tables).
5
6use std::{
7    cell::RefCell,
8    collections::HashMap,
9    hash::{Hash, Hasher},
10    rc::Rc,
11};
12
13use ahash::AHasher;
14use lru::LruCache;
15
16use crate::{errors::ExecutorError, schema::CombinedSchema, select::WindowFunctionKey};
17
18/// Evaluates expressions with combined schema (for JOINs)
19pub struct CombinedExpressionEvaluator<'a> {
20    pub(super) schema: &'a CombinedSchema,
21    pub(super) database: Option<&'a vibesql_storage::Database>,
22    pub(super) outer_row: Option<&'a vibesql_storage::Row>,
23    pub(super) outer_schema: Option<&'a CombinedSchema>,
24    /// All outer rows for outer-correlated aggregates (issue #4930)
25    /// When an aggregate in a scalar subquery references only outer columns,
26    /// it should aggregate over ALL outer rows, not just the current one.
27    pub(super) outer_rows: Option<&'a [vibesql_storage::Row]>,
28    /// Outer context for chained column resolution (SQLite-style context chaining)
29    /// This enables proper scope shadowing for deeply nested subqueries (issue #4493)
30    pub(super) outer_context: Option<&'a CombinedExpressionEvaluator<'a>>,
31    pub(super) window_mapping: Option<&'a HashMap<WindowFunctionKey, usize>>,
32    /// Procedural context for stored procedure/function variable resolution
33    pub(super) procedural_context: Option<&'a crate::procedural::ExecutionContext>,
34    /// CTE (Common Table Expression) context for accessing WITH clause results
35    pub(super) cte_context: Option<&'a HashMap<String, crate::select::cte::CteResult>>,
36    /// Cache for column lookups to avoid repeated schema traversals
37    /// Uses pre-computed hash of (table, column) as key to avoid string allocations
38    column_cache: RefCell<HashMap<u64, usize>>,
39    /// Cache for non-correlated subquery results with LRU eviction (key = subquery hash, value =
40    /// result rows) Shared via Rc across child evaluators within a single statement execution.
41    /// Cache lifetime is tied to the evaluator instance - each new evaluator gets a fresh cache.
42    pub(super) subquery_cache: Rc<RefCell<LruCache<u64, Vec<vibesql_storage::Row>>>>,
43    /// Current depth in expression tree (for preventing stack overflow)
44    pub(super) depth: usize,
45    /// CSE cache for common sub-expression elimination with LRU eviction (shared via Rc across
46    /// depth levels)
47    pub(super) cse_cache: Rc<RefCell<LruCache<u64, vibesql_types::SqlValue>>>,
48    /// Whether CSE is enabled (can be disabled for debugging)
49    pub(super) enable_cse: bool,
50    /// Cache for subquery correlation analysis (key = subquery pointer address, value =
51    /// is_correlated) Avoids expensive AST traversal for every row evaluation (issue #4142)
52    pub(super) correlation_cache: Rc<RefCell<HashMap<usize, bool>>>,
53    /// Cache for subquery hash values (key = subquery pointer address, value = hash)
54    /// Avoids expensive Debug format + hash computation for every row evaluation (issue #4142)
55    pub(super) subquery_hash_cache: Rc<RefCell<HashMap<usize, u64>>>,
56}
57
58impl<'a> CombinedExpressionEvaluator<'a> {
59    /// Create a new combined expression evaluator
60    /// Note: Currently unused as all callers use with_database(), but kept for API completeness
61    #[allow(dead_code)]
62    pub(crate) fn new(schema: &'a CombinedSchema) -> Self {
63        CombinedExpressionEvaluator {
64            schema,
65            database: None,
66            outer_row: None,
67            outer_schema: None,
68            outer_rows: None,
69            outer_context: None,
70            window_mapping: None,
71            procedural_context: None,
72            cte_context: None,
73            column_cache: RefCell::new(HashMap::new()),
74            subquery_cache: Rc::new(RefCell::new(super::caching::create_subquery_cache())),
75            depth: 0,
76            cse_cache: Rc::new(RefCell::new(super::caching::create_cse_cache())),
77            enable_cse: super::caching::is_cse_enabled(),
78            correlation_cache: Rc::new(RefCell::new(HashMap::new())),
79            subquery_hash_cache: Rc::new(RefCell::new(HashMap::new())),
80        }
81    }
82
83    /// Create a new combined expression evaluator with database reference
84    pub(crate) fn with_database(
85        schema: &'a CombinedSchema,
86        database: &'a vibesql_storage::Database,
87    ) -> Self {
88        CombinedExpressionEvaluator {
89            schema,
90            database: Some(database),
91            outer_row: None,
92            outer_schema: None,
93            outer_rows: None,
94            outer_context: None,
95            window_mapping: None,
96            procedural_context: None,
97            cte_context: None,
98            column_cache: RefCell::new(HashMap::new()),
99            subquery_cache: Rc::new(RefCell::new(super::caching::create_subquery_cache())),
100            depth: 0,
101            cse_cache: Rc::new(RefCell::new(super::caching::create_cse_cache())),
102            enable_cse: super::caching::is_cse_enabled(),
103            correlation_cache: Rc::new(RefCell::new(HashMap::new())),
104            subquery_hash_cache: Rc::new(RefCell::new(HashMap::new())),
105        }
106    }
107
108    /// Create a new combined expression evaluator with database and outer context for correlated
109    /// subqueries
110    pub(crate) fn with_database_and_outer_context(
111        schema: &'a CombinedSchema,
112        database: &'a vibesql_storage::Database,
113        outer_row: &'a vibesql_storage::Row,
114        outer_schema: &'a CombinedSchema,
115    ) -> Self {
116        CombinedExpressionEvaluator {
117            schema,
118            database: Some(database),
119            outer_row: Some(outer_row),
120            outer_schema: Some(outer_schema),
121            outer_rows: None,
122            outer_context: None,
123            window_mapping: None,
124            procedural_context: None,
125            cte_context: None,
126            column_cache: RefCell::new(HashMap::new()),
127            subquery_cache: Rc::new(RefCell::new(super::caching::create_subquery_cache())),
128            depth: 0,
129            cse_cache: Rc::new(RefCell::new(super::caching::create_cse_cache())),
130            enable_cse: super::caching::is_cse_enabled(),
131            correlation_cache: Rc::new(RefCell::new(HashMap::new())),
132            subquery_hash_cache: Rc::new(RefCell::new(HashMap::new())),
133        }
134    }
135
136    /// Create a new combined expression evaluator with database and window mapping
137    pub(crate) fn with_database_and_windows(
138        schema: &'a CombinedSchema,
139        database: &'a vibesql_storage::Database,
140        window_mapping: &'a HashMap<WindowFunctionKey, usize>,
141    ) -> Self {
142        CombinedExpressionEvaluator {
143            schema,
144            database: Some(database),
145            outer_row: None,
146            outer_schema: None,
147            outer_rows: None,
148            outer_context: None,
149            window_mapping: Some(window_mapping),
150            procedural_context: None,
151            cte_context: None,
152            column_cache: RefCell::new(HashMap::new()),
153            subquery_cache: Rc::new(RefCell::new(super::caching::create_subquery_cache())),
154            depth: 0,
155            cse_cache: Rc::new(RefCell::new(super::caching::create_cse_cache())),
156            enable_cse: super::caching::is_cse_enabled(),
157            correlation_cache: Rc::new(RefCell::new(HashMap::new())),
158            subquery_hash_cache: Rc::new(RefCell::new(HashMap::new())),
159        }
160    }
161
162    /// Create a new combined expression evaluator with database, outer context, and window mapping
163    /// Used for window functions in correlated subqueries without FROM clause
164    pub(crate) fn with_database_outer_and_windows(
165        schema: &'a CombinedSchema,
166        database: &'a vibesql_storage::Database,
167        outer_row: &'a vibesql_storage::Row,
168        outer_schema: &'a CombinedSchema,
169        window_mapping: &'a HashMap<WindowFunctionKey, usize>,
170    ) -> Self {
171        CombinedExpressionEvaluator {
172            schema,
173            database: Some(database),
174            outer_row: Some(outer_row),
175            outer_schema: Some(outer_schema),
176            outer_rows: None,
177            outer_context: None,
178            window_mapping: Some(window_mapping),
179            procedural_context: None,
180            cte_context: None,
181            column_cache: RefCell::new(HashMap::new()),
182            subquery_cache: Rc::new(RefCell::new(super::caching::create_subquery_cache())),
183            depth: 0,
184            cse_cache: Rc::new(RefCell::new(super::caching::create_cse_cache())),
185            enable_cse: super::caching::is_cse_enabled(),
186            correlation_cache: Rc::new(RefCell::new(HashMap::new())),
187            subquery_hash_cache: Rc::new(RefCell::new(HashMap::new())),
188        }
189    }
190
191    /// Create a new combined expression evaluator with database, window mapping, and CTE context
192    pub(crate) fn with_database_and_windows_and_cte(
193        schema: &'a CombinedSchema,
194        database: &'a vibesql_storage::Database,
195        window_mapping: &'a HashMap<WindowFunctionKey, usize>,
196        cte_context: &'a HashMap<String, crate::select::cte::CteResult>,
197    ) -> Self {
198        CombinedExpressionEvaluator {
199            schema,
200            database: Some(database),
201            outer_row: None,
202            outer_schema: None,
203            outer_rows: None,
204            outer_context: None,
205            window_mapping: Some(window_mapping),
206            procedural_context: None,
207            cte_context: Some(cte_context),
208            column_cache: RefCell::new(HashMap::new()),
209            subquery_cache: Rc::new(RefCell::new(super::caching::create_subquery_cache())),
210            depth: 0,
211            cse_cache: Rc::new(RefCell::new(super::caching::create_cse_cache())),
212            enable_cse: super::caching::is_cse_enabled(),
213            correlation_cache: Rc::new(RefCell::new(HashMap::new())),
214            subquery_hash_cache: Rc::new(RefCell::new(HashMap::new())),
215        }
216    }
217
218    /// Create a new combined expression evaluator with database and procedural context
219    pub(crate) fn with_database_and_procedural_context(
220        schema: &'a CombinedSchema,
221        database: &'a vibesql_storage::Database,
222        procedural_context: &'a crate::procedural::ExecutionContext,
223    ) -> Self {
224        CombinedExpressionEvaluator {
225            schema,
226            database: Some(database),
227            outer_row: None,
228            outer_schema: None,
229            outer_rows: None,
230            outer_context: None,
231            window_mapping: None,
232            procedural_context: Some(procedural_context),
233            cte_context: None,
234            column_cache: RefCell::new(HashMap::new()),
235            subquery_cache: Rc::new(RefCell::new(super::caching::create_subquery_cache())),
236            depth: 0,
237            cse_cache: Rc::new(RefCell::new(super::caching::create_cse_cache())),
238            enable_cse: super::caching::is_cse_enabled(),
239            correlation_cache: Rc::new(RefCell::new(HashMap::new())),
240            subquery_hash_cache: Rc::new(RefCell::new(HashMap::new())),
241        }
242    }
243
244    /// Create a new combined expression evaluator with database and CTE context
245    pub(crate) fn with_database_and_cte(
246        schema: &'a CombinedSchema,
247        database: &'a vibesql_storage::Database,
248        cte_context: &'a HashMap<String, crate::select::cte::CteResult>,
249    ) -> Self {
250        CombinedExpressionEvaluator {
251            schema,
252            database: Some(database),
253            outer_row: None,
254            outer_schema: None,
255            outer_rows: None,
256            outer_context: None,
257            window_mapping: None,
258            procedural_context: None,
259            cte_context: Some(cte_context),
260            column_cache: RefCell::new(HashMap::new()),
261            subquery_cache: Rc::new(RefCell::new(super::caching::create_subquery_cache())),
262            depth: 0,
263            cse_cache: Rc::new(RefCell::new(super::caching::create_cse_cache())),
264            enable_cse: super::caching::is_cse_enabled(),
265            correlation_cache: Rc::new(RefCell::new(HashMap::new())),
266            subquery_hash_cache: Rc::new(RefCell::new(HashMap::new())),
267        }
268    }
269
270    /// Create a new combined expression evaluator with database, outer context, and CTE context
271    pub(crate) fn with_database_and_outer_context_and_cte(
272        schema: &'a CombinedSchema,
273        database: &'a vibesql_storage::Database,
274        outer_row: &'a vibesql_storage::Row,
275        outer_schema: &'a CombinedSchema,
276        cte_context: &'a HashMap<String, crate::select::cte::CteResult>,
277    ) -> Self {
278        CombinedExpressionEvaluator {
279            schema,
280            database: Some(database),
281            outer_row: Some(outer_row),
282            outer_schema: Some(outer_schema),
283            outer_rows: None,
284            outer_context: None,
285            window_mapping: None,
286            procedural_context: None,
287            cte_context: Some(cte_context),
288            column_cache: RefCell::new(HashMap::new()),
289            subquery_cache: Rc::new(RefCell::new(super::caching::create_subquery_cache())),
290            depth: 0,
291            cse_cache: Rc::new(RefCell::new(super::caching::create_cse_cache())),
292            enable_cse: super::caching::is_cse_enabled(),
293            correlation_cache: Rc::new(RefCell::new(HashMap::new())),
294            subquery_hash_cache: Rc::new(RefCell::new(HashMap::new())),
295        }
296    }
297
298    /// Create a new combined expression evaluator with database, procedural context, and CTE
299    /// context
300    pub(crate) fn with_database_and_procedural_context_and_cte(
301        schema: &'a CombinedSchema,
302        database: &'a vibesql_storage::Database,
303        procedural_context: &'a crate::procedural::ExecutionContext,
304        cte_context: &'a HashMap<String, crate::select::cte::CteResult>,
305    ) -> Self {
306        CombinedExpressionEvaluator {
307            schema,
308            database: Some(database),
309            outer_row: None,
310            outer_schema: None,
311            outer_rows: None,
312            outer_context: None,
313            window_mapping: None,
314            procedural_context: Some(procedural_context),
315            cte_context: Some(cte_context),
316            column_cache: RefCell::new(HashMap::new()),
317            subquery_cache: Rc::new(RefCell::new(super::caching::create_subquery_cache())),
318            depth: 0,
319            cse_cache: Rc::new(RefCell::new(super::caching::create_cse_cache())),
320            enable_cse: super::caching::is_cse_enabled(),
321            correlation_cache: Rc::new(RefCell::new(HashMap::new())),
322            subquery_hash_cache: Rc::new(RefCell::new(HashMap::new())),
323        }
324    }
325
326    /// Clear the CSE cache
327    /// Should be called before evaluating expressions for a new row in multi-row contexts
328    pub(crate) fn clear_cse_cache(&self) {
329        self.cse_cache.borrow_mut().clear();
330    }
331
332    /// Set all outer rows for outer-correlated aggregates (issue #4930).
333    ///
334    /// When an aggregate function in a scalar subquery references only outer columns,
335    /// it should aggregate over ALL outer rows, not just the current one.
336    pub fn set_outer_rows(&mut self, outer_rows: &'a [vibesql_storage::Row]) {
337        self.outer_rows = Some(outer_rows);
338    }
339
340    /// Get the outer rows if set
341    pub(crate) fn get_outer_rows(&self) -> Option<&'a [vibesql_storage::Row]> {
342        self.outer_rows
343    }
344
345    /// Get the outer schema if set
346    pub(crate) fn get_outer_schema(&self) -> Option<&'a CombinedSchema> {
347        self.outer_schema
348    }
349
350    /// Get the inner schema
351    pub(crate) fn get_schema(&self) -> &'a CombinedSchema {
352        self.schema
353    }
354
355    /// Compute hash key for column cache without allocating strings
356    #[inline]
357    fn column_cache_key(table: Option<&str>, column: &str) -> u64 {
358        let mut hasher = AHasher::default();
359        table.hash(&mut hasher);
360        column.hash(&mut hasher);
361        hasher.finish()
362    }
363
364    /// Get column index with caching to avoid repeated schema lookups
365    pub(crate) fn get_column_index_cached(
366        &self,
367        table: Option<&str>,
368        column: &str,
369    ) -> Option<usize> {
370        let key = Self::column_cache_key(table, column);
371
372        // Check cache first
373        if let Some(&idx) = self.column_cache.borrow().get(&key) {
374            return Some(idx);
375        }
376
377        // Cache miss: lookup and store
378        if let Some(idx) = self.schema.get_column_index(table, column) {
379            self.column_cache.borrow_mut().insert(key, idx);
380            Some(idx)
381        } else {
382            None
383        }
384    }
385
386    /// Helper to execute a closure with incremented depth
387    pub(super) fn with_incremented_depth<F, T>(&self, f: F) -> Result<T, ExecutorError>
388    where
389        F: FnOnce(&Self) -> Result<T, ExecutorError>,
390    {
391        // Create a new evaluator with incremented depth
392        // Share caches between parent and child evaluators
393        let evaluator = CombinedExpressionEvaluator {
394            schema: self.schema,
395            database: self.database,
396            outer_row: self.outer_row,
397            outer_schema: self.outer_schema,
398            outer_rows: self.outer_rows,
399            outer_context: self.outer_context,
400            window_mapping: self.window_mapping,
401            procedural_context: self.procedural_context,
402            cte_context: self.cte_context,
403            // Share the column cache between parent and child evaluators
404            column_cache: RefCell::new(self.column_cache.borrow().clone()),
405            // Share the subquery cache - subqueries can be reused across depths
406            subquery_cache: self.subquery_cache.clone(),
407            depth: self.depth + 1,
408            cse_cache: self.cse_cache.clone(),
409            enable_cse: self.enable_cse,
410            // Share correlation and hash caches - they're keyed by AST pointer address
411            correlation_cache: self.correlation_cache.clone(),
412            subquery_hash_cache: self.subquery_hash_cache.clone(),
413        };
414        f(&evaluator)
415    }
416
417    /// Clone the evaluator for evaluating a different expression
418    ///
419    /// Shares the subquery cache (safe because non-correlated subqueries produce
420    /// the same results regardless of the current row) but creates a fresh CSE cache
421    /// (necessary because CSE results depend on row values).
422    /// Also shares correlation and hash caches (keyed by AST pointer, not row-dependent).
423    pub fn clone_for_new_expression(&self) -> Self {
424        CombinedExpressionEvaluator {
425            schema: self.schema,
426            database: self.database,
427            outer_row: self.outer_row,
428            outer_schema: self.outer_schema,
429            outer_rows: self.outer_rows,
430            outer_context: self.outer_context,
431            window_mapping: self.window_mapping,
432            procedural_context: self.procedural_context,
433            cte_context: self.cte_context,
434            column_cache: RefCell::new(HashMap::new()),
435            subquery_cache: self.subquery_cache.clone(),
436            depth: self.depth,
437            cse_cache: Rc::new(RefCell::new(super::caching::create_cse_cache())),
438            enable_cse: self.enable_cse,
439            // Share correlation and hash caches - they're keyed by AST pointer address
440            correlation_cache: self.correlation_cache.clone(),
441            subquery_hash_cache: self.subquery_hash_cache.clone(),
442        }
443    }
444
445    /// Get the combined schema for this evaluator
446    pub(crate) fn schema(&self) -> &'a CombinedSchema {
447        self.schema
448    }
449
450    /// Get the database reference (if available)
451    pub(crate) fn database(&self) -> Option<&'a vibesql_storage::Database> {
452        self.database
453    }
454
455    /// Check if a subquery is correlated, with caching by AST pointer address.
456    ///
457    /// This avoids expensive AST traversal for every row evaluation.
458    /// The correlation status of a subquery is determined by its AST structure,
459    /// which doesn't change during query execution.
460    #[inline]
461    pub(crate) fn is_correlated_cached(&self, subquery: &vibesql_ast::SelectStmt) -> bool {
462        let ptr = subquery as *const _ as usize;
463
464        // Check cache first
465        if let Some(&is_correlated) = self.correlation_cache.borrow().get(&ptr) {
466            return is_correlated;
467        }
468
469        // Cache miss: compute and store
470        let is_correlated =
471            crate::optimizer::subquery_rewrite::correlation::is_correlated(subquery);
472        self.correlation_cache.borrow_mut().insert(ptr, is_correlated);
473        is_correlated
474    }
475
476    /// Compute subquery hash with caching by AST pointer address.
477    ///
478    /// This avoids expensive Debug format + hash computation for every row evaluation.
479    /// The hash of a subquery is determined by its AST structure,
480    /// which doesn't change during query execution.
481    #[inline]
482    pub(crate) fn compute_subquery_hash_cached(&self, subquery: &vibesql_ast::SelectStmt) -> u64 {
483        let ptr = subquery as *const _ as usize;
484
485        // Check cache first
486        if let Some(&hash) = self.subquery_hash_cache.borrow().get(&ptr) {
487            return hash;
488        }
489
490        // Cache miss: compute and store
491        let hash = super::caching::compute_subquery_hash(subquery);
492        self.subquery_hash_cache.borrow_mut().insert(ptr, hash);
493        hash
494    }
495
496    /// Get evaluator components for parallel execution
497    /// Returns (schema, database, outer_row, outer_schema, window_mapping, cte_context, enable_cse)
498    ///
499    /// Issue #3562: Now includes cte_context to enable IN subqueries referencing CTEs
500    /// during parallel predicate evaluation.
501    #[cfg(feature = "parallel")]
502    pub(crate) fn get_parallel_components(&self) -> super::parallel::ParallelComponents<'a> {
503        (
504            self.schema,
505            self.database,
506            self.outer_row,
507            self.outer_schema,
508            self.window_mapping,
509            self.cte_context,
510            self.enable_cse,
511        )
512    }
513
514    /// Create evaluator from parallel components
515    /// Creates a fresh evaluator with independent caches for thread-safe parallel execution
516    ///
517    /// Issue #3562: Now accepts cte_context to enable IN subqueries referencing CTEs
518    /// during parallel predicate evaluation.
519    #[cfg(feature = "parallel")]
520    pub(crate) fn from_parallel_components(
521        schema: &'a CombinedSchema,
522        database: Option<&'a vibesql_storage::Database>,
523        outer_row: Option<&'a vibesql_storage::Row>,
524        outer_schema: Option<&'a CombinedSchema>,
525        window_mapping: Option<&'a HashMap<WindowFunctionKey, usize>>,
526        cte_context: Option<
527            &'a std::collections::HashMap<String, super::super::select::cte::CteResult>,
528        >,
529        enable_cse: bool,
530    ) -> Self {
531        CombinedExpressionEvaluator {
532            schema,
533            database,
534            outer_row,
535            outer_schema,
536            outer_rows: None,
537            outer_context: None,
538            window_mapping,
539            procedural_context: None,
540            cte_context,
541            column_cache: RefCell::new(HashMap::new()),
542            subquery_cache: Rc::new(RefCell::new(super::caching::create_subquery_cache())),
543            depth: 0,
544            cse_cache: Rc::new(RefCell::new(super::caching::create_cse_cache())),
545            correlation_cache: Rc::new(RefCell::new(HashMap::new())),
546            subquery_hash_cache: Rc::new(RefCell::new(HashMap::new())),
547            enable_cse,
548        }
549    }
550}