stoolap/storage/expression/mod.rs
1// Copyright 2025 Stoolap Contributors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Expression system for Stoolap
16//!
17//! This module provides boolean expressions used for filtering rows in queries.
18//!
19//! # Expression Types
20//!
21//! - [`ComparisonExpr`] - Simple comparison (column op value)
22//! - [`AndExpr`], [`OrExpr`], [`NotExpr`] - Logical operators
23//! - [`BetweenExpr`] - Range check (column BETWEEN low AND high)
24//! - [`InListExpr`] - List membership (column IN (v1, v2, ...))
25//! - [`NullCheckExpr`] - NULL check (column IS NULL / IS NOT NULL)
26//! - [`RangeExpr`] - Optimized range check with custom inclusivity
27//! - [`CastExpr`] - Type cast expression
28//! - [`LikeExpr`] - Pattern matching (LIKE/ILIKE with % and _ wildcards)
29//! - [`FunctionExpr`] - Scalar function evaluation (e.g., UPPER(col) = 'X')
30
31pub mod between;
32pub mod cast;
33pub mod comparison;
34pub mod compiled;
35pub mod function;
36pub mod in_list;
37pub mod like;
38pub mod logical;
39pub mod null_check;
40pub mod range;
41
42use std::any::Any;
43use std::fmt::Debug;
44
45use rustc_hash::FxHashMap;
46
47use crate::core::{Operator, Result, Row, Schema, Value};
48
49// Re-export expression types
50pub use between::BetweenExpr;
51pub use cast::{CastExpr, CompoundExpr};
52pub use comparison::ComparisonExpr;
53pub use compiled::{clear_regex_cache, CompiledFilter, CompiledPattern};
54pub use function::{EvalExpr, FunctionArg, FunctionExpr};
55pub use in_list::InListExpr;
56pub use like::{clear_like_regex_cache, LikeExpr};
57pub use logical::{AndExpr, ConstBoolExpr, NotExpr, OrExpr};
58pub use null_check::NullCheckExpr;
59pub use range::RangeExpr;
60
61/// Expression trait for boolean expressions used in WHERE clauses
62///
63/// All expressions evaluate a row and return true/false to indicate
64/// whether the row matches the condition.
65pub trait Expression: Send + Sync + Debug {
66 /// Evaluate the expression against a row
67 ///
68 /// Returns `Ok(true)` if the row matches, `Ok(false)` if it doesn't,
69 /// or an error if evaluation fails.
70 fn evaluate(&self, row: &Row) -> Result<bool>;
71
72 /// Fast evaluation without detailed error handling
73 ///
74 /// This is optimized for the hot path in query processing.
75 /// Returns `false` on any error condition.
76 fn evaluate_fast(&self, row: &Row) -> bool;
77
78 /// Create a copy of this expression with column aliases resolved
79 ///
80 /// The aliases map maps alias names to original column names.
81 /// If a column in the expression matches an alias, it will be
82 /// replaced with the original name in the returned expression.
83 fn with_aliases(&self, aliases: &FxHashMap<String, String>) -> Box<dyn Expression>;
84
85 /// Prepare the expression for a specific schema
86 ///
87 /// This pre-computes column indices for fast row access during evaluation.
88 /// Should be called before evaluating many rows with the same schema.
89 fn prepare_for_schema(&mut self, schema: &Schema);
90
91 /// Check if this expression has been prepared for a schema
92 fn is_prepared(&self) -> bool;
93
94 /// Get the column name this expression operates on (if single column)
95 fn get_column_name(&self) -> Option<&str> {
96 None
97 }
98
99 /// Check if this expression can potentially use an index
100 fn can_use_index(&self) -> bool {
101 false
102 }
103
104 /// Extract equality comparison info for index lookups
105 ///
106 /// Returns (column_name, operator, value) if this is a simple comparison expression.
107 /// This is used for primary key lookups and index lookups without requiring downcasting.
108 fn get_comparison_info(&self) -> Option<(&str, Operator, &Value)> {
109 None
110 }
111
112 /// Get child expressions if this is an AND expression
113 ///
114 /// Returns Some with a slice of child expressions for AND expressions,
115 /// None for other expression types. Used for expression pushdown optimization.
116 fn get_and_operands(&self) -> Option<&[Box<dyn Expression>]> {
117 None
118 }
119
120 /// Get child expressions if this is an OR expression
121 ///
122 /// Returns Some with a slice of child expressions for OR expressions,
123 /// None for other expression types. Used for OR index union optimization.
124 fn get_or_operands(&self) -> Option<&[Box<dyn Expression>]> {
125 None
126 }
127
128 /// Get LIKE prefix info for index range scanning
129 ///
130 /// For LIKE expressions with prefix patterns (e.g., 'John%'), returns:
131 /// - column_name: The column being matched
132 /// - prefix: The prefix before the first wildcard (e.g., "John")
133 /// - negated: Whether this is NOT LIKE
134 ///
135 /// Returns None for patterns with leading wildcards or non-LIKE expressions.
136 fn get_like_prefix_info(&self) -> Option<(&str, String, bool)> {
137 None
138 }
139
140 /// Collect all simple comparisons from this expression tree
141 ///
142 /// For AND expressions, recursively collects comparisons from all branches.
143 /// For comparison expressions, returns itself.
144 /// Used for index pushdown optimization.
145 fn collect_comparisons(&self) -> Vec<(&str, Operator, &Value)> {
146 if let Some(info) = self.get_comparison_info() {
147 vec![info]
148 } else if let Some(children) = self.get_and_operands() {
149 let mut result = Vec::new();
150 for child in children {
151 result.extend(child.collect_comparisons());
152 }
153 result
154 } else {
155 vec![]
156 }
157 }
158
159 /// Clone the expression into a boxed trait object
160 fn clone_box(&self) -> Box<dyn Expression>;
161
162 /// Check if the expression result would be UNKNOWN (NULL) for this row
163 ///
164 /// In SQL's three-valued logic, comparisons with NULL return UNKNOWN.
165 /// For filtering purposes, UNKNOWN is treated as false.
166 /// However, NOT(UNKNOWN) should remain UNKNOWN, not become true.
167 ///
168 /// This method helps detect when a false result is actually UNKNOWN due to NULL,
169 /// so that NOT expressions can handle three-valued logic correctly.
170 ///
171 /// Default implementation returns false (expression is never unknown due to NULL).
172 fn is_unknown_due_to_null(&self, _row: &Row) -> bool {
173 false
174 }
175
176 /// Get a reference to the expression as Any for downcasting
177 fn as_any(&self) -> &dyn Any {
178 // Default implementation that returns self
179 // Implementations should override if they need to be downcast
180 panic!("as_any not implemented for this expression type")
181 }
182}
183
184impl Clone for Box<dyn Expression> {
185 fn clone(&self) -> Self {
186 self.clone_box()
187 }
188}
189
190/// Helper to find column index in schema
191/// OPTIMIZATION: Uses Schema's cached column index map for O(1) lookup
192pub(crate) fn find_column_index(schema: &Schema, column: &str) -> Option<usize> {
193 // Use cached lowercase column index map from Schema for O(1) lookup
194 // The map uses lowercase keys, so we need to lowercase the input
195 schema
196 .column_index_map()
197 .get(&column.to_lowercase())
198 .copied()
199}
200
201/// Helper to resolve column name through aliases
202pub(crate) fn resolve_alias<'a>(
203 column: &'a str,
204 aliases: &'a FxHashMap<String, String>,
205) -> &'a str {
206 aliases.get(column).map(|s| s.as_str()).unwrap_or(column)
207}
208
209#[cfg(test)]
210mod tests {
211 use super::*;
212 use crate::core::{DataType, SchemaBuilder};
213
214 fn test_schema() -> Schema {
215 SchemaBuilder::new("test")
216 .add_primary_key("id", DataType::Integer)
217 .add("name", DataType::Text)
218 .add("age", DataType::Integer)
219 .add_nullable("email", DataType::Text)
220 .build()
221 }
222
223 #[test]
224 fn test_find_column_index() {
225 let schema = test_schema();
226
227 assert_eq!(find_column_index(&schema, "id"), Some(0));
228 assert_eq!(find_column_index(&schema, "name"), Some(1));
229 assert_eq!(find_column_index(&schema, "age"), Some(2));
230 assert_eq!(find_column_index(&schema, "email"), Some(3));
231 assert_eq!(find_column_index(&schema, "nonexistent"), None);
232
233 // Case insensitive
234 assert_eq!(find_column_index(&schema, "ID"), Some(0));
235 assert_eq!(find_column_index(&schema, "NAME"), Some(1));
236 }
237
238 #[test]
239 fn test_resolve_alias() {
240 let mut aliases = FxHashMap::default();
241 aliases.insert("n".to_string(), "name".to_string());
242 aliases.insert("a".to_string(), "age".to_string());
243
244 assert_eq!(resolve_alias("n", &aliases), "name");
245 assert_eq!(resolve_alias("a", &aliases), "age");
246 assert_eq!(resolve_alias("id", &aliases), "id"); // Not an alias
247 }
248}