Skip to main content

trueno_db/query/
mod.rs

1//! Query parsing and execution
2//!
3//! Toyota Way Principles:
4//! - Kaizen: JIT compilation eliminates interpreter overhead (Neumann 2011)
5//! - Muda elimination: Kernel fusion (Wu et al. 2012)
6//!
7//! ## Phase 1 SQL Subset
8//!
9//! Supports analytics workload (OLAP):
10//! - SELECT with column list or *
11//! - FROM single table (no JOINs in Phase 1)
12//! - WHERE with simple predicates (>, <, =, >=, <=, !=)
13//! - GROUP BY with aggregations (SUM, AVG, COUNT, MIN, MAX)
14//! - ORDER BY (ASC/DESC)
15//! - LIMIT
16//!
17//! References:
18//! - sqlparser-rs: <https://docs.rs/sqlparser>
19//! - TPC-H queries: Analytics benchmark patterns
20
21pub mod executor;
22
23pub use executor::QueryExecutor;
24
25use sqlparser::ast::{Expr, Query, Select, SelectItem, SetExpr, Statement};
26use sqlparser::dialect::GenericDialect;
27use sqlparser::parser::Parser;
28
29/// Type alias for aggregation tuple (function, column, optional alias)
30pub type Aggregation = (AggregateFunction, String, Option<String>);
31
32/// Parsed SQL query with extracted components
33#[derive(Debug, Clone, PartialEq, Eq)]
34pub struct QueryPlan {
35    /// Selected columns (or * for all)
36    pub columns: Vec<String>,
37    /// Table name
38    pub table: String,
39    /// WHERE clause expression (optional)
40    pub filter: Option<String>,
41    /// GROUP BY columns (optional)
42    pub group_by: Vec<String>,
43    /// Aggregation functions: (function, column, alias)
44    pub aggregations: Vec<Aggregation>,
45    /// ORDER BY clauses
46    pub order_by: Vec<(String, OrderDirection)>,
47    /// LIMIT count (optional)
48    pub limit: Option<usize>,
49}
50
51/// Supported aggregation functions
52#[derive(Debug, Clone, Copy, PartialEq, Eq)]
53pub enum AggregateFunction {
54    /// Sum of values
55    Sum,
56    /// Average of values
57    Avg,
58    /// Count of rows
59    Count,
60    /// Minimum value
61    Min,
62    /// Maximum value
63    Max,
64}
65
66/// Sort order direction
67#[derive(Debug, Clone, Copy, PartialEq, Eq)]
68pub enum OrderDirection {
69    /// Ascending order (smallest first)
70    Asc,
71    /// Descending order (largest first)
72    Desc,
73}
74
75/// Query parser and executor
76pub struct QueryEngine {
77    dialect: GenericDialect,
78}
79
80impl Default for QueryEngine {
81    fn default() -> Self {
82        Self::new()
83    }
84}
85
86impl QueryEngine {
87    /// Create a new query engine
88    #[must_use]
89    pub const fn new() -> Self {
90        Self { dialect: GenericDialect {} }
91    }
92
93    /// Parse SQL query into query plan
94    ///
95    /// # Errors
96    /// Returns error if:
97    /// - SQL syntax is invalid
98    /// - Query uses unsupported features (JOINs, subqueries, etc.)
99    /// - Multiple statements provided
100    ///
101    /// # Example
102    /// ```
103    /// use trueno_db::query::QueryEngine;
104    ///
105    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
106    /// let engine = QueryEngine::new();
107    /// let plan = engine.parse("SELECT id, name FROM users WHERE age > 18")?;
108    /// assert_eq!(plan.table, "users");
109    /// # Ok(())
110    /// # }
111    pub fn parse(&self, sql: &str) -> crate::Result<QueryPlan> {
112        // Handle empty query
113        if sql.trim().is_empty() {
114            return Ok(QueryPlan {
115                columns: vec!["*".to_string()],
116                table: String::new(),
117                filter: None,
118                group_by: Vec::new(),
119                aggregations: Vec::new(),
120                order_by: Vec::new(),
121                limit: None,
122            });
123        }
124
125        // Parse SQL
126        let statements = Parser::parse_sql(&self.dialect, sql)
127            .map_err(|e| crate::Error::ParseError(format!("SQL parse error: {e}")))?;
128
129        // Validate single statement
130        if statements.len() != 1 {
131            return Err(crate::Error::ParseError("Only single statements supported".to_string()));
132        }
133
134        // Extract SELECT statement
135        let stmt = &statements[0];
136        let Statement::Query(query) = stmt else {
137            return Err(crate::Error::ParseError("Only SELECT queries supported".to_string()));
138        };
139
140        Self::parse_select_query(query)
141    }
142
143    fn parse_select_query(query: &Query) -> crate::Result<QueryPlan> {
144        // Extract SELECT body
145        let SetExpr::Select(select) = query.body.as_ref() else {
146            return Err(crate::Error::ParseError("Only SELECT queries supported".to_string()));
147        };
148
149        // Extract table name (FROM clause)
150        let table = Self::extract_table_name(select)?;
151
152        // Extract columns and aggregations
153        let (columns, aggregations) = Self::extract_columns(&select.projection)?;
154
155        // Extract WHERE clause
156        let filter = select.selection.as_ref().map(ToString::to_string);
157
158        // Extract GROUP BY
159        let group_by = Self::extract_group_by(&select.group_by);
160
161        // Extract ORDER BY
162        let order_by = Self::extract_order_by(query.order_by.as_ref());
163
164        // Extract LIMIT
165        let limit = Self::extract_limit(query.limit.as_ref());
166
167        Ok(QueryPlan { columns, table, filter, group_by, aggregations, order_by, limit })
168    }
169
170    fn extract_table_name(select: &Select) -> crate::Result<String> {
171        if select.from.is_empty() {
172            return Ok(String::new());
173        }
174
175        if select.from.len() > 1 {
176            return Err(crate::Error::ParseError(
177                "Multiple tables not supported in Phase 1".to_string(),
178            ));
179        }
180
181        let table_with_joins = &select.from[0];
182        if !table_with_joins.joins.is_empty() {
183            return Err(crate::Error::ParseError("JOINs not supported in Phase 1".to_string()));
184        }
185
186        Ok(table_with_joins.relation.to_string())
187    }
188
189    fn extract_columns(
190        projection: &[SelectItem],
191    ) -> crate::Result<(Vec<String>, Vec<Aggregation>)> {
192        let mut columns = Vec::new();
193        let mut aggregations = Vec::new();
194
195        for item in projection {
196            match item {
197                SelectItem::Wildcard(_) => {
198                    columns.push("*".to_string());
199                }
200                SelectItem::UnnamedExpr(expr) => {
201                    if let Some((func, col)) = Self::extract_aggregate(expr) {
202                        aggregations.push((func, col, None));
203                    } else {
204                        columns.push(expr.to_string());
205                    }
206                }
207                SelectItem::ExprWithAlias { expr, alias } => {
208                    if let Some((func, col)) = Self::extract_aggregate(expr) {
209                        aggregations.push((func, col, Some(alias.value.clone())));
210                    } else {
211                        columns.push(alias.value.clone());
212                    }
213                }
214                SelectItem::QualifiedWildcard(..) => {
215                    return Err(crate::Error::ParseError(
216                        "Qualified wildcards not supported".to_string(),
217                    ))
218                }
219            }
220        }
221
222        Ok((columns, aggregations))
223    }
224
225    fn extract_aggregate(expr: &Expr) -> Option<(AggregateFunction, String)> {
226        if let Expr::Function(func) = expr {
227            let func_name = func.name.to_string().to_uppercase();
228            let agg_func = match func_name.as_str() {
229                "SUM" => AggregateFunction::Sum,
230                "AVG" => AggregateFunction::Avg,
231                "COUNT" => AggregateFunction::Count,
232                "MIN" => AggregateFunction::Min,
233                "MAX" => AggregateFunction::Max,
234                _ => return None,
235            };
236
237            // Extract column name from arguments
238            let col = match &func.args {
239                sqlparser::ast::FunctionArguments::List(func_arg_list) => {
240                    func_arg_list.args.first().map_or_else(|| "*".to_string(), ToString::to_string)
241                }
242                _ => "*".to_string(),
243            };
244            return Some((agg_func, col));
245        }
246        None
247    }
248
249    fn extract_group_by(group_by: &sqlparser::ast::GroupByExpr) -> Vec<String> {
250        match group_by {
251            sqlparser::ast::GroupByExpr::All(_) => Vec::new(),
252            sqlparser::ast::GroupByExpr::Expressions(exprs, _) => {
253                exprs.iter().map(ToString::to_string).collect()
254            }
255        }
256    }
257
258    fn extract_order_by(
259        order_by: Option<&sqlparser::ast::OrderBy>,
260    ) -> Vec<(String, OrderDirection)> {
261        order_by
262            .map(|ob| {
263                ob.exprs
264                    .iter()
265                    .map(|o| {
266                        let col = o.expr.to_string();
267                        let dir = if o.asc.unwrap_or(true) {
268                            OrderDirection::Asc
269                        } else {
270                            OrderDirection::Desc
271                        };
272                        (col, dir)
273                    })
274                    .collect()
275            })
276            .unwrap_or_default()
277    }
278
279    fn extract_limit(limit: Option<&Expr>) -> Option<usize> {
280        limit.and_then(|expr| {
281            if let Expr::Value(sqlparser::ast::Value::Number(n, _)) = expr {
282                n.parse().ok()
283            } else {
284                None
285            }
286        })
287    }
288}