Skip to main content

stoolap/storage/expression/
mod.rs

1// Copyright 2025 Stoolap Contributors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Expression system for Stoolap
16//!
17//! This module provides boolean expressions used for filtering rows in queries.
18//!
19//! # Expression Types
20//!
21//! - [`ComparisonExpr`] - Simple comparison (column op value)
22//! - [`AndExpr`], [`OrExpr`], [`NotExpr`] - Logical operators
23//! - [`BetweenExpr`] - Range check (column BETWEEN low AND high)
24//! - [`InListExpr`] - List membership (column IN (v1, v2, ...))
25//! - [`NullCheckExpr`] - NULL check (column IS NULL / IS NOT NULL)
26//! - [`RangeExpr`] - Optimized range check with custom inclusivity
27//! - [`CastExpr`] - Type cast expression
28//! - [`LikeExpr`] - Pattern matching (LIKE/ILIKE with % and _ wildcards)
29//! - [`FunctionExpr`] - Scalar function evaluation (e.g., UPPER(col) = 'X')
30
31pub mod between;
32pub mod cast;
33pub mod comparison;
34pub mod compiled;
35pub mod function;
36pub mod in_list;
37pub mod like;
38pub mod logical;
39pub mod null_check;
40pub mod range;
41
42use std::any::Any;
43use std::fmt::Debug;
44
45use rustc_hash::FxHashMap;
46
47use crate::core::{Operator, Result, Row, Schema, Value};
48
49// Re-export expression types
50pub use between::BetweenExpr;
51pub use cast::{CastExpr, CompoundExpr};
52pub use comparison::ComparisonExpr;
53pub use compiled::{clear_regex_cache, CompiledFilter, CompiledPattern};
54pub use function::{EvalExpr, FunctionArg, FunctionExpr};
55pub use in_list::InListExpr;
56pub use like::{clear_like_regex_cache, LikeExpr};
57pub use logical::{AndExpr, ConstBoolExpr, NotExpr, OrExpr};
58pub use null_check::NullCheckExpr;
59pub use range::RangeExpr;
60
61/// Expression trait for boolean expressions used in WHERE clauses
62///
63/// All expressions evaluate a row and return true/false to indicate
64/// whether the row matches the condition.
65pub trait Expression: Send + Sync + Debug {
66    /// Evaluate the expression against a row
67    ///
68    /// Returns `Ok(true)` if the row matches, `Ok(false)` if it doesn't,
69    /// or an error if evaluation fails.
70    fn evaluate(&self, row: &Row) -> Result<bool>;
71
72    /// Fast evaluation without detailed error handling
73    ///
74    /// This is optimized for the hot path in query processing.
75    /// Returns `false` on any error condition.
76    fn evaluate_fast(&self, row: &Row) -> bool;
77
78    /// Create a copy of this expression with column aliases resolved
79    ///
80    /// The aliases map maps alias names to original column names.
81    /// If a column in the expression matches an alias, it will be
82    /// replaced with the original name in the returned expression.
83    fn with_aliases(&self, aliases: &FxHashMap<String, String>) -> Box<dyn Expression>;
84
85    /// Prepare the expression for a specific schema
86    ///
87    /// This pre-computes column indices for fast row access during evaluation.
88    /// Should be called before evaluating many rows with the same schema.
89    fn prepare_for_schema(&mut self, schema: &Schema);
90
91    /// Check if this expression has been prepared for a schema
92    fn is_prepared(&self) -> bool;
93
94    /// Get the column name this expression operates on (if single column)
95    fn get_column_name(&self) -> Option<&str> {
96        None
97    }
98
99    /// Check if this expression can potentially use an index
100    fn can_use_index(&self) -> bool {
101        false
102    }
103
104    /// Extract equality comparison info for index lookups
105    ///
106    /// Returns (column_name, operator, value) if this is a simple comparison expression.
107    /// This is used for primary key lookups and index lookups without requiring downcasting.
108    fn get_comparison_info(&self) -> Option<(&str, Operator, &Value)> {
109        None
110    }
111
112    /// Get child expressions if this is an AND expression
113    ///
114    /// Returns Some with a slice of child expressions for AND expressions,
115    /// None for other expression types. Used for expression pushdown optimization.
116    fn get_and_operands(&self) -> Option<&[Box<dyn Expression>]> {
117        None
118    }
119
120    /// Get child expressions if this is an OR expression
121    ///
122    /// Returns Some with a slice of child expressions for OR expressions,
123    /// None for other expression types. Used for OR index union optimization.
124    fn get_or_operands(&self) -> Option<&[Box<dyn Expression>]> {
125        None
126    }
127
128    /// Get LIKE prefix info for index range scanning
129    ///
130    /// For LIKE expressions with prefix patterns (e.g., 'John%'), returns:
131    /// - column_name: The column being matched
132    /// - prefix: The prefix before the first wildcard (e.g., "John")
133    /// - negated: Whether this is NOT LIKE
134    ///
135    /// Returns None for patterns with leading wildcards or non-LIKE expressions.
136    fn get_like_prefix_info(&self) -> Option<(&str, String, bool)> {
137        None
138    }
139
140    /// Collect all simple comparisons from this expression tree
141    ///
142    /// For AND expressions, recursively collects comparisons from all branches.
143    /// For comparison expressions, returns itself.
144    /// Used for index pushdown optimization.
145    fn collect_comparisons(&self) -> Vec<(&str, Operator, &Value)> {
146        if let Some(info) = self.get_comparison_info() {
147            vec![info]
148        } else if let Some(children) = self.get_and_operands() {
149            let mut result = Vec::new();
150            for child in children {
151                result.extend(child.collect_comparisons());
152            }
153            result
154        } else {
155            vec![]
156        }
157    }
158
159    /// Clone the expression into a boxed trait object
160    fn clone_box(&self) -> Box<dyn Expression>;
161
162    /// Check if the expression result would be UNKNOWN (NULL) for this row
163    ///
164    /// In SQL's three-valued logic, comparisons with NULL return UNKNOWN.
165    /// For filtering purposes, UNKNOWN is treated as false.
166    /// However, NOT(UNKNOWN) should remain UNKNOWN, not become true.
167    ///
168    /// This method helps detect when a false result is actually UNKNOWN due to NULL,
169    /// so that NOT expressions can handle three-valued logic correctly.
170    ///
171    /// Default implementation returns false (expression is never unknown due to NULL).
172    fn is_unknown_due_to_null(&self, _row: &Row) -> bool {
173        false
174    }
175
176    /// Get a reference to the expression as Any for downcasting
177    fn as_any(&self) -> &dyn Any {
178        // Default implementation that returns self
179        // Implementations should override if they need to be downcast
180        panic!("as_any not implemented for this expression type")
181    }
182}
183
184impl Clone for Box<dyn Expression> {
185    fn clone(&self) -> Self {
186        self.clone_box()
187    }
188}
189
190/// Helper to find column index in schema
191/// OPTIMIZATION: Uses Schema's cached column index map for O(1) lookup
192pub(crate) fn find_column_index(schema: &Schema, column: &str) -> Option<usize> {
193    // Use cached lowercase column index map from Schema for O(1) lookup
194    // The map uses lowercase keys, so we need to lowercase the input
195    schema
196        .column_index_map()
197        .get(&column.to_lowercase())
198        .copied()
199}
200
201/// Helper to resolve column name through aliases
202pub(crate) fn resolve_alias<'a>(
203    column: &'a str,
204    aliases: &'a FxHashMap<String, String>,
205) -> &'a str {
206    aliases.get(column).map(|s| s.as_str()).unwrap_or(column)
207}
208
209#[cfg(test)]
210mod tests {
211    use super::*;
212    use crate::core::{DataType, SchemaBuilder};
213
214    fn test_schema() -> Schema {
215        SchemaBuilder::new("test")
216            .add_primary_key("id", DataType::Integer)
217            .add("name", DataType::Text)
218            .add("age", DataType::Integer)
219            .add_nullable("email", DataType::Text)
220            .build()
221    }
222
223    #[test]
224    fn test_find_column_index() {
225        let schema = test_schema();
226
227        assert_eq!(find_column_index(&schema, "id"), Some(0));
228        assert_eq!(find_column_index(&schema, "name"), Some(1));
229        assert_eq!(find_column_index(&schema, "age"), Some(2));
230        assert_eq!(find_column_index(&schema, "email"), Some(3));
231        assert_eq!(find_column_index(&schema, "nonexistent"), None);
232
233        // Case insensitive
234        assert_eq!(find_column_index(&schema, "ID"), Some(0));
235        assert_eq!(find_column_index(&schema, "NAME"), Some(1));
236    }
237
238    #[test]
239    fn test_resolve_alias() {
240        let mut aliases = FxHashMap::default();
241        aliases.insert("n".to_string(), "name".to_string());
242        aliases.insert("a".to_string(), "age".to_string());
243
244        assert_eq!(resolve_alias("n", &aliases), "name");
245        assert_eq!(resolve_alias("a", &aliases), "age");
246        assert_eq!(resolve_alias("id", &aliases), "id"); // Not an alias
247    }
248}