Skip to main content

sqlrite/sql/parser/
select.rs

1use sqlparser::ast::{
2    DuplicateTreatment, Expr, FunctionArg, FunctionArgExpr, FunctionArguments, JoinConstraint,
3    JoinOperator, LimitClause, ObjectName, ObjectNamePart, OrderByKind, Query, Select, SelectItem,
4    SetExpr, Statement, TableFactor, TableWithJoins, Value,
5};
6
7use crate::error::{Result, SQLRiteError};
8
9/// Aggregate function name. v1 covers the SQLite-classic five.
10#[derive(Debug, Clone, Copy, PartialEq, Eq)]
11pub enum AggregateFn {
12    Count,
13    Sum,
14    Avg,
15    Min,
16    Max,
17}
18
19impl AggregateFn {
20    pub fn as_str(self) -> &'static str {
21        match self {
22            AggregateFn::Count => "COUNT",
23            AggregateFn::Sum => "SUM",
24            AggregateFn::Avg => "AVG",
25            AggregateFn::Min => "MIN",
26            AggregateFn::Max => "MAX",
27        }
28    }
29
30    pub(crate) fn from_name(name: &str) -> Option<Self> {
31        match name.to_ascii_lowercase().as_str() {
32            "count" => Some(AggregateFn::Count),
33            "sum" => Some(AggregateFn::Sum),
34            "avg" => Some(AggregateFn::Avg),
35            "min" => Some(AggregateFn::Min),
36            "max" => Some(AggregateFn::Max),
37            _ => None,
38        }
39    }
40}
41
42/// What the aggregate is fed: `*` (only valid for COUNT) or a bare column.
43#[derive(Debug, Clone, PartialEq, Eq)]
44pub enum AggregateArg {
45    Star,
46    Column(String),
47}
48
49/// A parsed aggregate call like `COUNT(*)`, `SUM(salary)`, `COUNT(DISTINCT dept)`.
50#[derive(Debug, Clone, PartialEq, Eq)]
51pub struct AggregateCall {
52    pub func: AggregateFn,
53    pub arg: AggregateArg,
54    /// `DISTINCT` inside the parens. v1 only allows it on COUNT.
55    pub distinct: bool,
56}
57
58impl AggregateCall {
59    /// Canonical display form used to match ORDER BY expressions against
60    /// aggregate output columns when the user didn't supply an alias.
61    /// Mirrors the output-header convention.
62    pub fn display_name(&self) -> String {
63        let inner = match &self.arg {
64            AggregateArg::Star => "*".to_string(),
65            AggregateArg::Column(c) => {
66                if self.distinct {
67                    format!("DISTINCT {c}")
68                } else {
69                    c.clone()
70                }
71            }
72        };
73        format!("{}({inner})", self.func.as_str())
74    }
75}
76
77/// One entry in the projection list.
78#[derive(Debug, Clone)]
79pub struct ProjectionItem {
80    pub kind: ProjectionKind,
81    /// `AS alias` if explicitly supplied.
82    pub alias: Option<String>,
83}
84
85impl ProjectionItem {
86    /// Resolve the user-visible column header for this projection item.
87    /// Alias if supplied, else the bare column name or aggregate display.
88    /// For qualified `t.col` shapes the header is just `col` — this
89    /// matches SQLite, where qualifiers don't propagate to output
90    /// column names.
91    pub fn output_name(&self) -> String {
92        if let Some(a) = &self.alias {
93            return a.clone();
94        }
95        match &self.kind {
96            ProjectionKind::Column { name, .. } => name.clone(),
97            ProjectionKind::Aggregate(a) => a.display_name(),
98        }
99    }
100}
101
102/// What an individual projection item produces.
103#[derive(Debug, Clone)]
104pub enum ProjectionKind {
105    /// Column reference. `qualifier` is `Some` for `t.col` shapes
106    /// (SQLR-5 — needed so JOIN execution can disambiguate
107    /// same-named columns across tables); `None` for bare `col`.
108    /// The single-table path ignores the qualifier and looks up the
109    /// name directly, preserving legacy behavior.
110    Column {
111        qualifier: Option<String>,
112        name: String,
113    },
114    /// Aggregate function call: `COUNT(*)`, `SUM(col)`, etc.
115    Aggregate(AggregateCall),
116}
117
118/// What columns to project from a SELECT.
119#[derive(Debug, Clone)]
120pub enum Projection {
121    /// `SELECT *` — every column in the table, in declaration order.
122    All,
123    /// Explicit, ordered projection list — possibly mixing bare columns
124    /// with aggregate calls (`SELECT dept, COUNT(*) FROM t`).
125    Items(Vec<ProjectionItem>),
126}
127
128/// A parsed `ORDER BY` clause: a single sort key (expression), ascending
129/// by default. Phase 7b widened this from "bare column name" to
130/// "arbitrary expression" so KNN queries of the form
131/// `ORDER BY vec_distance_l2(col, [...]) LIMIT k` work end-to-end. The
132/// expression is evaluated per-row at execution time via `eval_expr`;
133/// the simple `ORDER BY col` form still works because that's just an
134/// `Expr::Identifier` taking the same path.
135#[derive(Debug, Clone)]
136pub struct OrderByClause {
137    pub expr: Expr,
138    pub ascending: bool,
139}
140
141/// SQLR-5 — flavor of join. SQLite ships INNER and LEFT OUTER; we
142/// implement the full quartet on top of a single nested-loop driver
143/// because the per-flavor differences are small (NULL-padding policy
144/// for unmatched left/right rows). RIGHT OUTER and FULL OUTER aren't
145/// in SQLite — see `docs/design-decisions.md` for the rationale.
146#[derive(Debug, Clone, Copy, PartialEq, Eq)]
147pub enum JoinType {
148    Inner,
149    LeftOuter,
150    RightOuter,
151    FullOuter,
152}
153
154impl JoinType {
155    pub fn as_str(self) -> &'static str {
156        match self {
157            JoinType::Inner => "INNER",
158            JoinType::LeftOuter => "LEFT OUTER",
159            JoinType::RightOuter => "RIGHT OUTER",
160            JoinType::FullOuter => "FULL OUTER",
161        }
162    }
163}
164
165/// How a JOIN matches rows. SQLR-5 originally shipped `ON` only; the
166/// USING / NATURAL increment adds the two name-based constraints.
167/// `ON` carries its predicate straight from the parser. `USING` and
168/// `NATURAL` defer their equality synthesis to the executor because
169/// they need table schemas (which column names exist, and — for
170/// `NATURAL` — which are shared) that the parser doesn't have. The
171/// executor turns both into the same `left.col = right.col [AND …]`
172/// predicate the `ON` path already evaluates. `CROSS JOIN` is rewritten
173/// to `ON true` at parse time (no schema needed) and so reuses the
174/// `On` variant directly.
175#[derive(Debug, Clone)]
176pub enum JoinConstraintKind {
177    /// `ON <expr>` (and the parse-time rewrite of `CROSS JOIN` to
178    /// `ON true`). Evaluated per-row over the multi-table scope. Boxed
179    /// to keep this enum small — `Expr` dwarfs the other variants.
180    On(Box<Expr>),
181    /// `USING (col[, col…])` — equality on each named column, plus the
182    /// SQLite convention that each named column appears once in
183    /// `SELECT *`. Columns are validated and the predicate is
184    /// synthesized at execution time.
185    Using(Vec<String>),
186    /// `NATURAL` — the shared column names of the two sides are
187    /// discovered at execution time, then treated exactly like
188    /// `USING (<shared cols>)`. No shared columns ⇒ a cross product.
189    Natural,
190}
191
192/// One JOIN clause from the FROM list. Multi-join queries
193/// (`A JOIN B ... JOIN C ...`) become a `Vec<JoinClause>` evaluated
194/// left-to-right against the accumulator. The match condition is one
195/// of `ON` / `USING` / `NATURAL` (see [`JoinConstraintKind`]);
196/// `CROSS JOIN` arrives here already rewritten to `ON true`.
197#[derive(Debug, Clone)]
198pub struct JoinClause {
199    pub join_type: JoinType,
200    pub right_table: String,
201    /// `AS alias` if the right table introduced one. Stored separately
202    /// from `right_table` so the executor can normalize on
203    /// `alias.unwrap_or(right_table)` for qualifier matching.
204    pub right_alias: Option<String>,
205    /// What the join matches on. See [`JoinConstraintKind`].
206    pub constraint: JoinConstraintKind,
207}
208
209/// A parsed, simplified SELECT query.
210#[derive(Debug, Clone)]
211pub struct SelectQuery {
212    pub table_name: String,
213    /// Optional `AS alias` on the leading FROM table. The executor's
214    /// scope resolver treats `alias.unwrap_or(table_name)` as the
215    /// qualifier name.
216    pub table_alias: Option<String>,
217    /// SQLR-5 — JOIN clauses in source order. Empty = single-table
218    /// SELECT, the existing fast path.
219    pub joins: Vec<JoinClause>,
220    pub projection: Projection,
221    /// Raw sqlparser WHERE expression, evaluated by the executor at run time.
222    pub selection: Option<Expr>,
223    pub order_by: Option<OrderByClause>,
224    pub limit: Option<usize>,
225    /// `SELECT DISTINCT`.
226    pub distinct: bool,
227    /// `GROUP BY a, b` — bare column names. Empty = no GROUP BY.
228    pub group_by: Vec<String>,
229    /// SQLR-52 — raw sqlparser HAVING expression, evaluated by the
230    /// executor against each group's output row after aggregation.
231    /// Parser-level invariant: `Some` implies `group_by` is non-empty
232    /// (HAVING without GROUP BY is rejected in v0).
233    pub having: Option<Expr>,
234}
235
236impl SelectQuery {
237    pub fn new(statement: &Statement) -> Result<Self> {
238        let Statement::Query(query) = statement else {
239            return Err(SQLRiteError::Internal(
240                "Error parsing SELECT: expected a Query statement".to_string(),
241            ));
242        };
243
244        let Query {
245            body,
246            order_by,
247            limit_clause,
248            ..
249        } = query.as_ref();
250
251        let SetExpr::Select(select) = body.as_ref() else {
252            return Err(SQLRiteError::NotImplemented(
253                "Only simple SELECT queries are supported (no UNION / VALUES / CTEs yet)"
254                    .to_string(),
255            ));
256        };
257        let Select {
258            projection,
259            from,
260            selection,
261            distinct,
262            group_by,
263            having,
264            ..
265        } = select.as_ref();
266
267        // SQLR-3: read DISTINCT instead of rejecting it. Postgres's
268        // `DISTINCT ON (...)` stays unsupported — it's a per-group
269        // tie-breaker that isn't part of the SQLite surface we mirror.
270        let distinct_flag = match distinct {
271            None => false,
272            Some(sqlparser::ast::Distinct::Distinct) => true,
273            Some(sqlparser::ast::Distinct::All) => false,
274            Some(sqlparser::ast::Distinct::On(_)) => {
275                return Err(SQLRiteError::NotImplemented(
276                    "SELECT DISTINCT ON (...) is not supported".to_string(),
277                ));
278            }
279        };
280        // SQLR-3: parse GROUP BY into a list of bare column names.
281        // GroupByExpr::Expressions(v, _) with an empty v is the "no
282        // GROUP BY" shape; non-empty means we've got grouping. Reject
283        // GROUP BY ALL and GROUP BY on non-bare expressions for v1.
284        let group_by_cols: Vec<String> = match group_by {
285            sqlparser::ast::GroupByExpr::Expressions(exprs, _) => {
286                let mut out = Vec::with_capacity(exprs.len());
287                for e in exprs {
288                    let col = match e {
289                        Expr::Identifier(ident) => ident.value.clone(),
290                        Expr::CompoundIdentifier(parts) => {
291                            parts.last().map(|p| p.value.clone()).ok_or_else(|| {
292                                SQLRiteError::Internal("empty compound identifier".to_string())
293                            })?
294                        }
295                        other => {
296                            return Err(SQLRiteError::NotImplemented(format!(
297                                "GROUP BY only supports bare column references for now, got {other:?}"
298                            )));
299                        }
300                    };
301                    out.push(col);
302                }
303                out
304            }
305            _ => {
306                return Err(SQLRiteError::NotImplemented(
307                    "GROUP BY ALL is not supported".to_string(),
308                ));
309            }
310        };
311
312        // SQLR-52 — HAVING is the post-aggregation filter, so it only
313        // makes sense against grouped output. SQLite allows the
314        // degenerate no-GROUP-BY single-group form, but the Phase 9e
315        // executor's grouping pipeline assumes an explicit GROUP BY;
316        // reject the degenerate shape rather than special-casing it.
317        if having.is_some() && group_by_cols.is_empty() {
318            return Err(SQLRiteError::NotImplemented(
319                "HAVING without GROUP BY is not supported in v0; use WHERE for row-level \
320                 filters or restructure with a subquery"
321                    .to_string(),
322            ));
323        }
324
325        let (table_name, table_alias, joins) = extract_from_clause(from)?;
326        let projection = parse_projection(projection)?;
327        let order_by = parse_order_by(order_by.as_ref())?;
328        let limit = parse_limit(limit_clause.as_ref())?;
329
330        // SQLR-3 validation: when GROUP BY is present, every bare-column
331        // entry in the projection must appear in the GROUP BY list. Bare
332        // columns in the SELECT are otherwise undefined per group.
333        if !group_by_cols.is_empty()
334            && let Projection::Items(items) = &projection
335        {
336            for item in items {
337                if let ProjectionKind::Column { name: c, .. } = &item.kind
338                    && !group_by_cols.contains(c)
339                {
340                    return Err(SQLRiteError::Internal(format!(
341                        "column '{c}' must appear in GROUP BY or be used in an aggregate function"
342                    )));
343                }
344            }
345        }
346
347        // SQLR-5 — aggregations across joined results aren't covered
348        // by the current single-table grouping pipeline. Reject GROUP
349        // BY / aggregates over a join up front so the user gets a clear
350        // message rather than wrong results.
351        if !joins.is_empty() {
352            let has_agg = matches!(
353                &projection,
354                Projection::Items(items)
355                    if items.iter().any(|i| matches!(i.kind, ProjectionKind::Aggregate(_)))
356            );
357            if has_agg || !group_by_cols.is_empty() {
358                return Err(SQLRiteError::NotImplemented(
359                    "GROUP BY / aggregate functions over JOIN results are not supported yet"
360                        .to_string(),
361                ));
362            }
363            if distinct_flag {
364                return Err(SQLRiteError::NotImplemented(
365                    "SELECT DISTINCT over JOIN results is not supported yet".to_string(),
366                ));
367            }
368        }
369
370        Ok(SelectQuery {
371            table_name,
372            table_alias,
373            joins,
374            projection,
375            selection: selection.clone(),
376            order_by,
377            limit,
378            distinct: distinct_flag,
379            group_by: group_by_cols,
380            having: having.clone(),
381        })
382    }
383}
384
385/// Pull the leading FROM table (with optional alias) and any JOIN
386/// clauses out of the parsed FROM list. Supports a single base table
387/// plus zero or more INNER / LEFT / RIGHT / FULL OUTER joins with an
388/// `ON`, `USING (...)`, or `NATURAL` constraint, and `CROSS JOIN`
389/// (rewritten to `INNER ... ON true`). Comma-separated FROM lists and
390/// SEMI / ANTI / ASOF / APPLY joins surface as `NotImplemented`.
391fn extract_from_clause(
392    from: &[TableWithJoins],
393) -> Result<(String, Option<String>, Vec<JoinClause>)> {
394    if from.is_empty() {
395        return Err(SQLRiteError::Internal(
396            "SELECT requires a FROM clause".to_string(),
397        ));
398    }
399    if from.len() != 1 {
400        return Err(SQLRiteError::NotImplemented(
401            "comma-separated FROM lists are not supported — use explicit JOIN syntax".to_string(),
402        ));
403    }
404    let twj = &from[0];
405    let (table_name, table_alias) = extract_table_factor(&twj.relation)?;
406
407    let mut joins = Vec::with_capacity(twj.joins.len());
408    for j in &twj.joins {
409        let (right_table, right_alias) = extract_table_factor(&j.relation)?;
410        let (join_type, constraint) = match &j.join_operator {
411            // Bare `JOIN` defaults to INNER per SQL standard.
412            JoinOperator::Join(c) | JoinOperator::Inner(c) => {
413                (JoinType::Inner, convert_constraint(c)?)
414            }
415            JoinOperator::Left(c) | JoinOperator::LeftOuter(c) => {
416                (JoinType::LeftOuter, convert_constraint(c)?)
417            }
418            JoinOperator::Right(c) | JoinOperator::RightOuter(c) => {
419                (JoinType::RightOuter, convert_constraint(c)?)
420            }
421            JoinOperator::FullOuter(c) => (JoinType::FullOuter, convert_constraint(c)?),
422            // `CROSS JOIN` is the cross product: INNER with an always-true
423            // ON. A constraint on a CROSS JOIN is non-standard, but if the
424            // parser handed us `USING` / `NATURAL` / `ON` we honor it
425            // rather than silently dropping it.
426            JoinOperator::CrossJoin(c) => (JoinType::Inner, convert_cross_constraint(c)?),
427            other => {
428                return Err(SQLRiteError::NotImplemented(format!(
429                    "join flavor {other:?} is not supported \
430                     (only INNER / LEFT OUTER / RIGHT OUTER / FULL OUTER / CROSS, \
431                     with ON / USING / NATURAL)"
432                )));
433            }
434        };
435        joins.push(JoinClause {
436            join_type,
437            right_table,
438            right_alias,
439            constraint,
440        });
441    }
442
443    Ok((table_name, table_alias, joins))
444}
445
446fn extract_table_factor(tf: &TableFactor) -> Result<(String, Option<String>)> {
447    match tf {
448        TableFactor::Table { name, alias, .. } => {
449            let table_name = name.to_string();
450            let alias_name = alias.as_ref().map(|a| a.name.value.clone());
451            // We don't yet support alias column lists like `(c1, c2)` —
452            // they only matter for table-valued functions / derived
453            // tables, which we don't have either.
454            if let Some(a) = alias.as_ref()
455                && !a.columns.is_empty()
456            {
457                return Err(SQLRiteError::NotImplemented(
458                    "table alias column lists are not supported".to_string(),
459                ));
460            }
461            Ok((table_name, alias_name))
462        }
463        _ => Err(SQLRiteError::NotImplemented(
464            "only plain table references are supported in FROM / JOIN".to_string(),
465        )),
466    }
467}
468
469/// Lower a `sqlparser` join constraint into our [`JoinConstraintKind`].
470/// `ON` passes through; `USING` is narrowed to a list of bare column
471/// names; `NATURAL` defers to the executor. A constraint-less join
472/// (`A JOIN B` with no `ON` / `USING`) is rejected — `CROSS JOIN` is
473/// the supported way to ask for a cross product and is handled by
474/// [`convert_cross_constraint`].
475fn convert_constraint(constraint: &JoinConstraint) -> Result<JoinConstraintKind> {
476    match constraint {
477        JoinConstraint::On(expr) => Ok(JoinConstraintKind::On(Box::new(expr.clone()))),
478        JoinConstraint::Using(cols) => {
479            let names = cols
480                .iter()
481                .map(extract_using_column)
482                .collect::<Result<Vec<String>>>()?;
483            Ok(JoinConstraintKind::Using(names))
484        }
485        JoinConstraint::Natural => Ok(JoinConstraintKind::Natural),
486        JoinConstraint::None => Err(SQLRiteError::NotImplemented(
487            "JOIN without an ON / USING / NATURAL condition is not supported \
488             (use `... ON ...`, `... USING (...)`, `NATURAL JOIN`, or `CROSS JOIN`)"
489                .to_string(),
490        )),
491    }
492}
493
494/// Constraint handling for `CROSS JOIN`. The standard form carries no
495/// constraint and means "cross product", which we express as `ON true`
496/// so it flows through the same executor path as any other join.
497fn convert_cross_constraint(constraint: &JoinConstraint) -> Result<JoinConstraintKind> {
498    match constraint {
499        JoinConstraint::None => Ok(JoinConstraintKind::On(Box::new(true_literal()))),
500        // Non-standard, but if a constraint was attached to a CROSS JOIN,
501        // honor it instead of dropping it on the floor.
502        other => convert_constraint(other),
503    }
504}
505
506/// Pull a bare column name out of a `USING (...)` entry. `USING`
507/// columns are always simple identifiers; anything qualified or
508/// multi-part is rejected.
509fn extract_using_column(name: &ObjectName) -> Result<String> {
510    match name.0.as_slice() {
511        [ObjectNamePart::Identifier(ident)] => Ok(ident.value.clone()),
512        _ => Err(SQLRiteError::NotImplemented(format!(
513            "USING column must be a simple column name, got {name}"
514        ))),
515    }
516}
517
518/// An always-true boolean literal expression, used to rewrite
519/// `CROSS JOIN` into `INNER JOIN ... ON true`.
520fn true_literal() -> Expr {
521    Expr::Value(Value::Boolean(true).with_empty_span())
522}
523
524fn parse_projection(items: &[SelectItem]) -> Result<Projection> {
525    // Special-case `SELECT *`.
526    if items.len() == 1
527        && let SelectItem::Wildcard(_) = &items[0]
528    {
529        return Ok(Projection::All);
530    }
531    let mut out = Vec::with_capacity(items.len());
532    for item in items {
533        out.push(parse_select_item(item)?);
534    }
535    Ok(Projection::Items(out))
536}
537
538fn parse_select_item(item: &SelectItem) -> Result<ProjectionItem> {
539    match item {
540        SelectItem::UnnamedExpr(expr) => parse_projection_expr(expr, None),
541        SelectItem::ExprWithAlias { expr, alias } => {
542            parse_projection_expr(expr, Some(alias.value.clone()))
543        }
544        SelectItem::Wildcard(_) | SelectItem::QualifiedWildcard(_, _) => {
545            Err(SQLRiteError::NotImplemented(
546                "Wildcard mixed with other columns is not supported".to_string(),
547            ))
548        }
549    }
550}
551
552fn parse_projection_expr(expr: &Expr, alias: Option<String>) -> Result<ProjectionItem> {
553    match expr {
554        Expr::Identifier(ident) => Ok(ProjectionItem {
555            kind: ProjectionKind::Column {
556                qualifier: None,
557                name: ident.value.clone(),
558            },
559            alias,
560        }),
561        Expr::CompoundIdentifier(parts) => match parts.as_slice() {
562            [only] => Ok(ProjectionItem {
563                kind: ProjectionKind::Column {
564                    qualifier: None,
565                    name: only.value.clone(),
566                },
567                alias,
568            }),
569            [q, c] => Ok(ProjectionItem {
570                kind: ProjectionKind::Column {
571                    qualifier: Some(q.value.clone()),
572                    name: c.value.clone(),
573                },
574                alias,
575            }),
576            _ => Err(SQLRiteError::NotImplemented(format!(
577                "compound identifier with {} parts is not supported in projection",
578                parts.len()
579            ))),
580        },
581        Expr::Function(func) => {
582            let call = parse_aggregate_call(func)?;
583            Ok(ProjectionItem {
584                kind: ProjectionKind::Aggregate(call),
585                alias,
586            })
587        }
588        other => Err(SQLRiteError::NotImplemented(format!(
589            "Only bare column references and aggregate functions are supported in the projection list (got {other:?})"
590        ))),
591    }
592}
593
594pub(crate) fn parse_aggregate_call(func: &sqlparser::ast::Function) -> Result<AggregateCall> {
595    // Function name: only unqualified names like COUNT(...). Qualified
596    // names like `pkg.fn(...)` are out of scope.
597    let name = match func.name.0.as_slice() {
598        [sqlparser::ast::ObjectNamePart::Identifier(ident)] => ident.value.clone(),
599        _ => {
600            return Err(SQLRiteError::NotImplemented(format!(
601                "qualified function names not supported: {:?}",
602                func.name
603            )));
604        }
605    };
606    let agg_fn = AggregateFn::from_name(&name).ok_or_else(|| {
607        SQLRiteError::NotImplemented(format!(
608            "function '{name}' is not supported in the projection list (only aggregate functions are: COUNT, SUM, AVG, MIN, MAX)"
609        ))
610    })?;
611
612    // Aggregates only accept the basic List form. None / Subquery forms
613    // (CURRENT_TIMESTAMP, scalar subqueries) don't apply here.
614    let arg_list = match &func.args {
615        FunctionArguments::List(l) => l,
616        _ => {
617            return Err(SQLRiteError::NotImplemented(format!(
618                "{name}(...) — unsupported argument shape"
619            )));
620        }
621    };
622
623    let distinct = matches!(
624        arg_list.duplicate_treatment,
625        Some(DuplicateTreatment::Distinct)
626    );
627
628    if !arg_list.clauses.is_empty() {
629        return Err(SQLRiteError::NotImplemented(format!(
630            "{name}(...) — extra argument clauses (ORDER BY / LIMIT inside the call) are not supported"
631        )));
632    }
633    if func.over.is_some() {
634        return Err(SQLRiteError::NotImplemented(
635            "window functions (OVER (...)) are not supported".to_string(),
636        ));
637    }
638    if func.filter.is_some() {
639        return Err(SQLRiteError::NotImplemented(
640            "FILTER (WHERE ...) on aggregates is not supported".to_string(),
641        ));
642    }
643    if !func.within_group.is_empty() {
644        return Err(SQLRiteError::NotImplemented(
645            "WITHIN GROUP on aggregates is not supported".to_string(),
646        ));
647    }
648
649    if arg_list.args.len() != 1 {
650        return Err(SQLRiteError::NotImplemented(format!(
651            "{name}(...) expects exactly one argument, got {}",
652            arg_list.args.len()
653        )));
654    }
655
656    let arg = match &arg_list.args[0] {
657        FunctionArg::Unnamed(FunctionArgExpr::Wildcard) => AggregateArg::Star,
658        FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Identifier(ident))) => {
659            AggregateArg::Column(ident.value.clone())
660        }
661        FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::CompoundIdentifier(parts))) => {
662            let c = parts
663                .last()
664                .map(|p| p.value.clone())
665                .ok_or_else(|| SQLRiteError::Internal("empty compound identifier".to_string()))?;
666            AggregateArg::Column(c)
667        }
668        other => {
669            return Err(SQLRiteError::NotImplemented(format!(
670                "{name}(...) — argument must be `*` or a bare column reference (got {other:?})"
671            )));
672        }
673    };
674
675    // v1: only COUNT(DISTINCT col) is supported. SUM/AVG/MIN/MAX with
676    // DISTINCT are valid SQL but uncommon and add accumulator complexity
677    // we don't yet need.
678    if distinct && agg_fn != AggregateFn::Count {
679        return Err(SQLRiteError::NotImplemented(format!(
680            "DISTINCT is only supported on COUNT(...) for now, not {}",
681            agg_fn.as_str()
682        )));
683    }
684    if matches!(arg, AggregateArg::Star) && agg_fn != AggregateFn::Count {
685        return Err(SQLRiteError::NotImplemented(format!(
686            "{}(*) is not supported; use {}(<column>)",
687            agg_fn.as_str(),
688            agg_fn.as_str()
689        )));
690    }
691
692    Ok(AggregateCall {
693        func: agg_fn,
694        arg,
695        distinct,
696    })
697}
698
699fn parse_order_by(order_by: Option<&sqlparser::ast::OrderBy>) -> Result<Option<OrderByClause>> {
700    let Some(ob) = order_by else {
701        return Ok(None);
702    };
703    let exprs = match &ob.kind {
704        OrderByKind::Expressions(v) => v,
705        OrderByKind::All(_) => {
706            return Err(SQLRiteError::NotImplemented(
707                "ORDER BY ALL is not supported".to_string(),
708            ));
709        }
710    };
711    if exprs.len() != 1 {
712        return Err(SQLRiteError::NotImplemented(
713            "ORDER BY must have exactly one column for now".to_string(),
714        ));
715    }
716    let obe = &exprs[0];
717    // Phase 7b: accept arbitrary expressions, not just bare column refs.
718    // The executor's `sort_rowids` evaluates this expression per row via
719    // `eval_expr`, which handles Identifier (column lookup), Function
720    // (vec_distance_*), arithmetic, etc. uniformly. The previous
721    // column-name-only restriction has been lifted.
722    let expr = obe.expr.clone();
723    // `asc == None` is the dialect default (ASC).
724    let ascending = obe.options.asc.unwrap_or(true);
725    Ok(Some(OrderByClause { expr, ascending }))
726}
727
728fn parse_limit(limit: Option<&LimitClause>) -> Result<Option<usize>> {
729    let Some(lc) = limit else {
730        return Ok(None);
731    };
732    let limit_expr = match lc {
733        LimitClause::LimitOffset { limit, offset, .. } => {
734            if offset.is_some() {
735                return Err(SQLRiteError::NotImplemented(
736                    "OFFSET is not supported yet".to_string(),
737                ));
738            }
739            limit.as_ref()
740        }
741        LimitClause::OffsetCommaLimit { .. } => {
742            return Err(SQLRiteError::NotImplemented(
743                "`LIMIT <offset>, <limit>` syntax is not supported yet".to_string(),
744            ));
745        }
746    };
747    let Some(expr) = limit_expr else {
748        return Ok(None);
749    };
750    let n = eval_const_usize(expr)?;
751    Ok(Some(n))
752}
753
754fn eval_const_usize(expr: &Expr) -> Result<usize> {
755    match expr {
756        Expr::Value(v) => match &v.value {
757            sqlparser::ast::Value::Number(n, _) => n.parse::<usize>().map_err(|e| {
758                SQLRiteError::Internal(format!("LIMIT must be a non-negative integer: {e}"))
759            }),
760            _ => Err(SQLRiteError::Internal(
761                "LIMIT must be an integer literal".to_string(),
762            )),
763        },
764        _ => Err(SQLRiteError::NotImplemented(
765            "LIMIT expression must be a literal number".to_string(),
766        )),
767    }
768}