Skip to main content

sqlrite/sql/parser/
select.rs

1use sqlparser::ast::{
2    DuplicateTreatment, Expr, FunctionArg, FunctionArgExpr, FunctionArguments, JoinConstraint,
3    JoinOperator, LimitClause, ObjectName, ObjectNamePart, OrderByKind, Query, Select, SelectItem,
4    SetExpr, Statement, TableFactor, TableWithJoins, Value,
5};
6
7use crate::error::{Result, SQLRiteError};
8
9/// Aggregate function name. v1 covers the SQLite-classic five.
10#[derive(Debug, Clone, Copy, PartialEq, Eq)]
11pub enum AggregateFn {
12    Count,
13    Sum,
14    Avg,
15    Min,
16    Max,
17}
18
19impl AggregateFn {
20    pub fn as_str(self) -> &'static str {
21        match self {
22            AggregateFn::Count => "COUNT",
23            AggregateFn::Sum => "SUM",
24            AggregateFn::Avg => "AVG",
25            AggregateFn::Min => "MIN",
26            AggregateFn::Max => "MAX",
27        }
28    }
29
30    pub(crate) fn from_name(name: &str) -> Option<Self> {
31        match name.to_ascii_lowercase().as_str() {
32            "count" => Some(AggregateFn::Count),
33            "sum" => Some(AggregateFn::Sum),
34            "avg" => Some(AggregateFn::Avg),
35            "min" => Some(AggregateFn::Min),
36            "max" => Some(AggregateFn::Max),
37            _ => None,
38        }
39    }
40}
41
42/// What the aggregate is fed: `*` (only valid for COUNT) or a column
43/// reference. SQLR-6 — the column carries its optional `t.` qualifier
44/// so joined aggregation (`SUM(orders.amount)`) can disambiguate
45/// same-named columns across in-scope tables; the single-table path
46/// validates it against the FROM table/alias (SQLR-14), same as
47/// projection qualifiers.
48#[derive(Debug, Clone, PartialEq, Eq)]
49pub enum AggregateArg {
50    Star,
51    Column {
52        qualifier: Option<String>,
53        name: String,
54    },
55}
56
57/// A parsed aggregate call like `COUNT(*)`, `SUM(salary)`, `COUNT(DISTINCT dept)`.
58#[derive(Debug, Clone, PartialEq, Eq)]
59pub struct AggregateCall {
60    pub func: AggregateFn,
61    pub arg: AggregateArg,
62    /// `DISTINCT` inside the parens. v1 only allows it on COUNT.
63    pub distinct: bool,
64}
65
66impl AggregateCall {
67    /// Canonical display form used to match ORDER BY expressions against
68    /// aggregate output columns when the user didn't supply an alias.
69    /// Mirrors the output-header convention.
70    pub fn display_name(&self) -> String {
71        self.display_name_impl(true)
72    }
73
74    /// Display form with the argument's `t.` qualifier stripped. Used
75    /// as a fallback when matching ORDER BY function calls against
76    /// projection slots, so `ORDER BY SUM(amount)` still finds a
77    /// `SELECT SUM(o.amount)` slot (and vice versa).
78    pub(crate) fn display_name_unqualified(&self) -> String {
79        self.display_name_impl(false)
80    }
81
82    fn display_name_impl(&self, qualified: bool) -> String {
83        let inner = match &self.arg {
84            AggregateArg::Star => "*".to_string(),
85            AggregateArg::Column { qualifier, name } => {
86                let col = match qualifier {
87                    Some(q) if qualified => format!("{q}.{name}"),
88                    _ => name.clone(),
89                };
90                if self.distinct {
91                    format!("DISTINCT {col}")
92                } else {
93                    col
94                }
95            }
96        };
97        format!("{}({inner})", self.func.as_str())
98    }
99}
100
101/// One entry in the projection list.
102#[derive(Debug, Clone)]
103pub struct ProjectionItem {
104    pub kind: ProjectionKind,
105    /// `AS alias` if explicitly supplied.
106    pub alias: Option<String>,
107}
108
109impl ProjectionItem {
110    /// Resolve the user-visible column header for this projection item.
111    /// Alias if supplied, else the bare column name or aggregate display.
112    /// For qualified `t.col` shapes the header is just `col` — this
113    /// matches SQLite, where qualifiers don't propagate to output
114    /// column names.
115    pub fn output_name(&self) -> String {
116        if let Some(a) = &self.alias {
117            return a.clone();
118        }
119        match &self.kind {
120            ProjectionKind::Column { name, .. } => name.clone(),
121            ProjectionKind::Aggregate(a) => a.display_name(),
122        }
123    }
124}
125
126/// What an individual projection item produces.
127#[derive(Debug, Clone)]
128pub enum ProjectionKind {
129    /// Column reference. `qualifier` is `Some` for `t.col` shapes
130    /// (SQLR-5 — needed so JOIN execution can disambiguate
131    /// same-named columns across tables); `None` for bare `col`.
132    /// The single-table path validates the qualifier against the FROM
133    /// table name / alias (SQLR-14) and then looks up the bare name.
134    Column {
135        qualifier: Option<String>,
136        name: String,
137    },
138    /// Aggregate function call: `COUNT(*)`, `SUM(col)`, etc.
139    Aggregate(AggregateCall),
140}
141
142/// What columns to project from a SELECT.
143#[derive(Debug, Clone)]
144pub enum Projection {
145    /// `SELECT *` — every column in the table, in declaration order.
146    All,
147    /// Explicit, ordered projection list — possibly mixing bare columns
148    /// with aggregate calls (`SELECT dept, COUNT(*) FROM t`).
149    Items(Vec<ProjectionItem>),
150}
151
152/// SQLR-6 — one GROUP BY key: an optionally-qualified column reference
153/// (`dept` or `t.dept`). The qualifier matters for joined SELECTs,
154/// where the same column name can exist on several in-scope tables;
155/// the single-table path validates it against the FROM table/alias
156/// (SQLR-14), mirroring projection qualifiers.
157#[derive(Debug, Clone, PartialEq, Eq)]
158pub struct GroupByKey {
159    pub qualifier: Option<String>,
160    pub name: String,
161}
162
163impl GroupByKey {
164    /// Does a (possibly qualified) column reference name this key?
165    /// Names must match exactly; qualifiers must match (ASCII
166    /// case-insensitively) only when both sides carry one — a bare
167    /// reference matches a qualified key and vice versa. Callers that
168    /// need strict table-resolution equality (the joined executor)
169    /// layer that check on top.
170    pub(crate) fn matches_column(&self, qualifier: Option<&str>, name: &str) -> bool {
171        self.name == name
172            && match (self.qualifier.as_deref(), qualifier) {
173                (Some(kq), Some(q)) => kq.eq_ignore_ascii_case(q),
174                _ => true,
175            }
176    }
177}
178
179/// A parsed `ORDER BY` clause: a single sort key (expression), ascending
180/// by default. Phase 7b widened this from "bare column name" to
181/// "arbitrary expression" so KNN queries of the form
182/// `ORDER BY vec_distance_l2(col, [...]) LIMIT k` work end-to-end. The
183/// expression is evaluated per-row at execution time via `eval_expr`;
184/// the simple `ORDER BY col` form still works because that's just an
185/// `Expr::Identifier` taking the same path.
186#[derive(Debug, Clone)]
187pub struct OrderByClause {
188    pub expr: Expr,
189    pub ascending: bool,
190}
191
192/// SQLR-5 — flavor of join. SQLite ships INNER and LEFT OUTER; we
193/// implement the full quartet on top of a single nested-loop driver
194/// because the per-flavor differences are small (NULL-padding policy
195/// for unmatched left/right rows). RIGHT OUTER and FULL OUTER aren't
196/// in SQLite — see `docs/design-decisions.md` for the rationale.
197#[derive(Debug, Clone, Copy, PartialEq, Eq)]
198pub enum JoinType {
199    Inner,
200    LeftOuter,
201    RightOuter,
202    FullOuter,
203}
204
205impl JoinType {
206    pub fn as_str(self) -> &'static str {
207        match self {
208            JoinType::Inner => "INNER",
209            JoinType::LeftOuter => "LEFT OUTER",
210            JoinType::RightOuter => "RIGHT OUTER",
211            JoinType::FullOuter => "FULL OUTER",
212        }
213    }
214}
215
216/// How a JOIN matches rows. SQLR-5 originally shipped `ON` only; the
217/// USING / NATURAL increment adds the two name-based constraints.
218/// `ON` carries its predicate straight from the parser. `USING` and
219/// `NATURAL` defer their equality synthesis to the executor because
220/// they need table schemas (which column names exist, and — for
221/// `NATURAL` — which are shared) that the parser doesn't have. The
222/// executor turns both into the same `left.col = right.col [AND …]`
223/// predicate the `ON` path already evaluates. `CROSS JOIN` is rewritten
224/// to `ON true` at parse time (no schema needed) and so reuses the
225/// `On` variant directly.
226#[derive(Debug, Clone)]
227pub enum JoinConstraintKind {
228    /// `ON <expr>` (and the parse-time rewrite of `CROSS JOIN` to
229    /// `ON true`). Evaluated per-row over the multi-table scope. Boxed
230    /// to keep this enum small — `Expr` dwarfs the other variants.
231    On(Box<Expr>),
232    /// `USING (col[, col…])` — equality on each named column, plus the
233    /// SQLite convention that each named column appears once in
234    /// `SELECT *`. Columns are validated and the predicate is
235    /// synthesized at execution time.
236    Using(Vec<String>),
237    /// `NATURAL` — the shared column names of the two sides are
238    /// discovered at execution time, then treated exactly like
239    /// `USING (<shared cols>)`. No shared columns ⇒ a cross product.
240    Natural,
241}
242
243/// One JOIN clause from the FROM list. Multi-join queries
244/// (`A JOIN B ... JOIN C ...`) become a `Vec<JoinClause>` evaluated
245/// left-to-right against the accumulator. The match condition is one
246/// of `ON` / `USING` / `NATURAL` (see [`JoinConstraintKind`]);
247/// `CROSS JOIN` arrives here already rewritten to `ON true`.
248#[derive(Debug, Clone)]
249pub struct JoinClause {
250    pub join_type: JoinType,
251    pub right_table: String,
252    /// `AS alias` if the right table introduced one. Stored separately
253    /// from `right_table` so the executor can normalize on
254    /// `alias.unwrap_or(right_table)` for qualifier matching.
255    pub right_alias: Option<String>,
256    /// What the join matches on. See [`JoinConstraintKind`].
257    pub constraint: JoinConstraintKind,
258}
259
260/// A parsed, simplified SELECT query.
261#[derive(Debug, Clone)]
262pub struct SelectQuery {
263    pub table_name: String,
264    /// Optional `AS alias` on the leading FROM table. The executor's
265    /// scope resolver treats `alias.unwrap_or(table_name)` as the
266    /// qualifier name.
267    pub table_alias: Option<String>,
268    /// SQLR-5 — JOIN clauses in source order. Empty = single-table
269    /// SELECT, the existing fast path.
270    pub joins: Vec<JoinClause>,
271    pub projection: Projection,
272    /// Raw sqlparser WHERE expression, evaluated by the executor at run time.
273    pub selection: Option<Expr>,
274    pub order_by: Option<OrderByClause>,
275    pub limit: Option<usize>,
276    /// `SELECT DISTINCT`.
277    pub distinct: bool,
278    /// `GROUP BY a, t.b` — optionally-qualified column references in
279    /// source order. Empty = no GROUP BY.
280    pub group_by: Vec<GroupByKey>,
281    /// SQLR-52 — raw sqlparser HAVING expression, evaluated by the
282    /// executor against each group's output row after aggregation.
283    /// Parser-level invariant: `Some` implies `group_by` is non-empty
284    /// (HAVING without GROUP BY is rejected in v0).
285    pub having: Option<Expr>,
286}
287
288impl SelectQuery {
289    pub fn new(statement: &Statement) -> Result<Self> {
290        let Statement::Query(query) = statement else {
291            return Err(SQLRiteError::Internal(
292                "Error parsing SELECT: expected a Query statement".to_string(),
293            ));
294        };
295
296        let Query {
297            body,
298            order_by,
299            limit_clause,
300            ..
301        } = query.as_ref();
302
303        let SetExpr::Select(select) = body.as_ref() else {
304            return Err(SQLRiteError::NotImplemented(
305                "Only simple SELECT queries are supported (no UNION / VALUES / CTEs yet)"
306                    .to_string(),
307            ));
308        };
309        let Select {
310            projection,
311            from,
312            selection,
313            distinct,
314            group_by,
315            having,
316            ..
317        } = select.as_ref();
318
319        // SQLR-3: read DISTINCT instead of rejecting it. Postgres's
320        // `DISTINCT ON (...)` stays unsupported — it's a per-group
321        // tie-breaker that isn't part of the SQLite surface we mirror.
322        let distinct_flag = match distinct {
323            None => false,
324            Some(sqlparser::ast::Distinct::Distinct) => true,
325            Some(sqlparser::ast::Distinct::All) => false,
326            Some(sqlparser::ast::Distinct::On(_)) => {
327                return Err(SQLRiteError::NotImplemented(
328                    "SELECT DISTINCT ON (...) is not supported".to_string(),
329                ));
330            }
331        };
332        // SQLR-3: parse GROUP BY into a list of column references.
333        // GroupByExpr::Expressions(v, _) with an empty v is the "no
334        // GROUP BY" shape; non-empty means we've got grouping. Reject
335        // GROUP BY ALL and GROUP BY on non-column expressions for v1.
336        // SQLR-6 — keys keep their `t.` qualifier so joined grouping
337        // (`GROUP BY customers.name`) can disambiguate.
338        let group_by_cols: Vec<GroupByKey> = match group_by {
339            sqlparser::ast::GroupByExpr::Expressions(exprs, _) => {
340                let mut out = Vec::with_capacity(exprs.len());
341                for e in exprs {
342                    let key = match e {
343                        Expr::Identifier(ident) => GroupByKey {
344                            qualifier: None,
345                            name: ident.value.clone(),
346                        },
347                        Expr::CompoundIdentifier(parts) => match parts.as_slice() {
348                            [only] => GroupByKey {
349                                qualifier: None,
350                                name: only.value.clone(),
351                            },
352                            [q, c] => GroupByKey {
353                                qualifier: Some(q.value.clone()),
354                                name: c.value.clone(),
355                            },
356                            _ => {
357                                return Err(SQLRiteError::NotImplemented(format!(
358                                    "GROUP BY identifier with {} parts is not supported",
359                                    parts.len()
360                                )));
361                            }
362                        },
363                        other => {
364                            return Err(SQLRiteError::NotImplemented(format!(
365                                "GROUP BY only supports bare column references for now, got {other:?}"
366                            )));
367                        }
368                    };
369                    out.push(key);
370                }
371                out
372            }
373            _ => {
374                return Err(SQLRiteError::NotImplemented(
375                    "GROUP BY ALL is not supported".to_string(),
376                ));
377            }
378        };
379
380        // SQLR-52 — HAVING is the post-aggregation filter, so it only
381        // makes sense against grouped output. SQLite allows the
382        // degenerate no-GROUP-BY single-group form, but the Phase 9e
383        // executor's grouping pipeline assumes an explicit GROUP BY;
384        // reject the degenerate shape rather than special-casing it.
385        if having.is_some() && group_by_cols.is_empty() {
386            return Err(SQLRiteError::NotImplemented(
387                "HAVING without GROUP BY is not supported in v0; use WHERE for row-level \
388                 filters or restructure with a subquery"
389                    .to_string(),
390            ));
391        }
392
393        let (table_name, table_alias, joins) = extract_from_clause(from)?;
394        let projection = parse_projection(projection)?;
395        let order_by = parse_order_by(order_by.as_ref())?;
396        let limit = parse_limit(limit_clause.as_ref())?;
397
398        // SQLR-3 validation: when GROUP BY is present, every bare-column
399        // entry in the projection must appear in the GROUP BY list. Bare
400        // columns in the SELECT are otherwise undefined per group.
401        // SQLR-6 — only the single-table case validates here; the joined
402        // case needs the table schemas to resolve qualifiers, so the
403        // joined executor performs the equivalent check against the
404        // full in-scope table list.
405        if joins.is_empty()
406            && !group_by_cols.is_empty()
407            && let Projection::Items(items) = &projection
408        {
409            for item in items {
410                if let ProjectionKind::Column { qualifier, name: c } = &item.kind
411                    && !group_by_cols
412                        .iter()
413                        .any(|g| g.matches_column(qualifier.as_deref(), c))
414                {
415                    return Err(SQLRiteError::Internal(format!(
416                        "column '{c}' must appear in GROUP BY or be used in an aggregate function"
417                    )));
418                }
419            }
420        }
421
422        Ok(SelectQuery {
423            table_name,
424            table_alias,
425            joins,
426            projection,
427            selection: selection.clone(),
428            order_by,
429            limit,
430            distinct: distinct_flag,
431            group_by: group_by_cols,
432            having: having.clone(),
433        })
434    }
435}
436
437/// Pull the leading FROM table (with optional alias) and any JOIN
438/// clauses out of the parsed FROM list. Supports a single base table
439/// plus zero or more INNER / LEFT / RIGHT / FULL OUTER joins with an
440/// `ON`, `USING (...)`, or `NATURAL` constraint, and `CROSS JOIN`
441/// (rewritten to `INNER ... ON true`). Comma-separated FROM lists and
442/// SEMI / ANTI / ASOF / APPLY joins surface as `NotImplemented`.
443fn extract_from_clause(
444    from: &[TableWithJoins],
445) -> Result<(String, Option<String>, Vec<JoinClause>)> {
446    if from.is_empty() {
447        return Err(SQLRiteError::Internal(
448            "SELECT requires a FROM clause".to_string(),
449        ));
450    }
451    if from.len() != 1 {
452        return Err(SQLRiteError::NotImplemented(
453            "comma-separated FROM lists are not supported — use explicit JOIN syntax".to_string(),
454        ));
455    }
456    let twj = &from[0];
457    let (table_name, table_alias) = extract_table_factor(&twj.relation)?;
458
459    let mut joins = Vec::with_capacity(twj.joins.len());
460    for j in &twj.joins {
461        let (right_table, right_alias) = extract_table_factor(&j.relation)?;
462        let (join_type, constraint) = match &j.join_operator {
463            // Bare `JOIN` defaults to INNER per SQL standard.
464            JoinOperator::Join(c) | JoinOperator::Inner(c) => {
465                (JoinType::Inner, convert_constraint(c)?)
466            }
467            JoinOperator::Left(c) | JoinOperator::LeftOuter(c) => {
468                (JoinType::LeftOuter, convert_constraint(c)?)
469            }
470            JoinOperator::Right(c) | JoinOperator::RightOuter(c) => {
471                (JoinType::RightOuter, convert_constraint(c)?)
472            }
473            JoinOperator::FullOuter(c) => (JoinType::FullOuter, convert_constraint(c)?),
474            // `CROSS JOIN` is the cross product: INNER with an always-true
475            // ON. A constraint on a CROSS JOIN is non-standard, but if the
476            // parser handed us `USING` / `NATURAL` / `ON` we honor it
477            // rather than silently dropping it.
478            JoinOperator::CrossJoin(c) => (JoinType::Inner, convert_cross_constraint(c)?),
479            other => {
480                return Err(SQLRiteError::NotImplemented(format!(
481                    "join flavor {other:?} is not supported \
482                     (only INNER / LEFT OUTER / RIGHT OUTER / FULL OUTER / CROSS, \
483                     with ON / USING / NATURAL)"
484                )));
485            }
486        };
487        joins.push(JoinClause {
488            join_type,
489            right_table,
490            right_alias,
491            constraint,
492        });
493    }
494
495    Ok((table_name, table_alias, joins))
496}
497
498fn extract_table_factor(tf: &TableFactor) -> Result<(String, Option<String>)> {
499    match tf {
500        TableFactor::Table { name, alias, .. } => {
501            let table_name = name.to_string();
502            let alias_name = alias.as_ref().map(|a| a.name.value.clone());
503            // We don't yet support alias column lists like `(c1, c2)` —
504            // they only matter for table-valued functions / derived
505            // tables, which we don't have either.
506            if let Some(a) = alias.as_ref()
507                && !a.columns.is_empty()
508            {
509                return Err(SQLRiteError::NotImplemented(
510                    "table alias column lists are not supported".to_string(),
511                ));
512            }
513            Ok((table_name, alias_name))
514        }
515        _ => Err(SQLRiteError::NotImplemented(
516            "only plain table references are supported in FROM / JOIN".to_string(),
517        )),
518    }
519}
520
521/// Lower a `sqlparser` join constraint into our [`JoinConstraintKind`].
522/// `ON` passes through; `USING` is narrowed to a list of bare column
523/// names; `NATURAL` defers to the executor. A constraint-less join
524/// (`A JOIN B` with no `ON` / `USING`) is rejected — `CROSS JOIN` is
525/// the supported way to ask for a cross product and is handled by
526/// [`convert_cross_constraint`].
527fn convert_constraint(constraint: &JoinConstraint) -> Result<JoinConstraintKind> {
528    match constraint {
529        JoinConstraint::On(expr) => Ok(JoinConstraintKind::On(Box::new(expr.clone()))),
530        JoinConstraint::Using(cols) => {
531            let names = cols
532                .iter()
533                .map(extract_using_column)
534                .collect::<Result<Vec<String>>>()?;
535            Ok(JoinConstraintKind::Using(names))
536        }
537        JoinConstraint::Natural => Ok(JoinConstraintKind::Natural),
538        JoinConstraint::None => Err(SQLRiteError::NotImplemented(
539            "JOIN without an ON / USING / NATURAL condition is not supported \
540             (use `... ON ...`, `... USING (...)`, `NATURAL JOIN`, or `CROSS JOIN`)"
541                .to_string(),
542        )),
543    }
544}
545
546/// Constraint handling for `CROSS JOIN`. The standard form carries no
547/// constraint and means "cross product", which we express as `ON true`
548/// so it flows through the same executor path as any other join.
549fn convert_cross_constraint(constraint: &JoinConstraint) -> Result<JoinConstraintKind> {
550    match constraint {
551        JoinConstraint::None => Ok(JoinConstraintKind::On(Box::new(true_literal()))),
552        // Non-standard, but if a constraint was attached to a CROSS JOIN,
553        // honor it instead of dropping it on the floor.
554        other => convert_constraint(other),
555    }
556}
557
558/// Pull a bare column name out of a `USING (...)` entry. `USING`
559/// columns are always simple identifiers; anything qualified or
560/// multi-part is rejected.
561fn extract_using_column(name: &ObjectName) -> Result<String> {
562    match name.0.as_slice() {
563        [ObjectNamePart::Identifier(ident)] => Ok(ident.value.clone()),
564        _ => Err(SQLRiteError::NotImplemented(format!(
565            "USING column must be a simple column name, got {name}"
566        ))),
567    }
568}
569
570/// An always-true boolean literal expression, used to rewrite
571/// `CROSS JOIN` into `INNER JOIN ... ON true`.
572fn true_literal() -> Expr {
573    Expr::Value(Value::Boolean(true).with_empty_span())
574}
575
576fn parse_projection(items: &[SelectItem]) -> Result<Projection> {
577    // Special-case `SELECT *`.
578    if items.len() == 1
579        && let SelectItem::Wildcard(_) = &items[0]
580    {
581        return Ok(Projection::All);
582    }
583    let mut out = Vec::with_capacity(items.len());
584    for item in items {
585        out.push(parse_select_item(item)?);
586    }
587    Ok(Projection::Items(out))
588}
589
590fn parse_select_item(item: &SelectItem) -> Result<ProjectionItem> {
591    match item {
592        SelectItem::UnnamedExpr(expr) => parse_projection_expr(expr, None),
593        SelectItem::ExprWithAlias { expr, alias } => {
594            parse_projection_expr(expr, Some(alias.value.clone()))
595        }
596        SelectItem::Wildcard(_) | SelectItem::QualifiedWildcard(_, _) => {
597            Err(SQLRiteError::NotImplemented(
598                "Wildcard mixed with other columns is not supported".to_string(),
599            ))
600        }
601    }
602}
603
604fn parse_projection_expr(expr: &Expr, alias: Option<String>) -> Result<ProjectionItem> {
605    match expr {
606        Expr::Identifier(ident) => Ok(ProjectionItem {
607            kind: ProjectionKind::Column {
608                qualifier: None,
609                name: ident.value.clone(),
610            },
611            alias,
612        }),
613        Expr::CompoundIdentifier(parts) => match parts.as_slice() {
614            [only] => Ok(ProjectionItem {
615                kind: ProjectionKind::Column {
616                    qualifier: None,
617                    name: only.value.clone(),
618                },
619                alias,
620            }),
621            [q, c] => Ok(ProjectionItem {
622                kind: ProjectionKind::Column {
623                    qualifier: Some(q.value.clone()),
624                    name: c.value.clone(),
625                },
626                alias,
627            }),
628            _ => Err(SQLRiteError::NotImplemented(format!(
629                "compound identifier with {} parts is not supported in projection",
630                parts.len()
631            ))),
632        },
633        Expr::Function(func) => {
634            let call = parse_aggregate_call(func)?;
635            Ok(ProjectionItem {
636                kind: ProjectionKind::Aggregate(call),
637                alias,
638            })
639        }
640        other => Err(SQLRiteError::NotImplemented(format!(
641            "Only bare column references and aggregate functions are supported in the projection list (got {other:?})"
642        ))),
643    }
644}
645
646pub(crate) fn parse_aggregate_call(func: &sqlparser::ast::Function) -> Result<AggregateCall> {
647    // Function name: only unqualified names like COUNT(...). Qualified
648    // names like `pkg.fn(...)` are out of scope.
649    let name = match func.name.0.as_slice() {
650        [sqlparser::ast::ObjectNamePart::Identifier(ident)] => ident.value.clone(),
651        _ => {
652            return Err(SQLRiteError::NotImplemented(format!(
653                "qualified function names not supported: {:?}",
654                func.name
655            )));
656        }
657    };
658    let agg_fn = AggregateFn::from_name(&name).ok_or_else(|| {
659        SQLRiteError::NotImplemented(format!(
660            "function '{name}' is not supported in the projection list (only aggregate functions are: COUNT, SUM, AVG, MIN, MAX)"
661        ))
662    })?;
663
664    // Aggregates only accept the basic List form. None / Subquery forms
665    // (CURRENT_TIMESTAMP, scalar subqueries) don't apply here.
666    let arg_list = match &func.args {
667        FunctionArguments::List(l) => l,
668        _ => {
669            return Err(SQLRiteError::NotImplemented(format!(
670                "{name}(...) — unsupported argument shape"
671            )));
672        }
673    };
674
675    let distinct = matches!(
676        arg_list.duplicate_treatment,
677        Some(DuplicateTreatment::Distinct)
678    );
679
680    if !arg_list.clauses.is_empty() {
681        return Err(SQLRiteError::NotImplemented(format!(
682            "{name}(...) — extra argument clauses (ORDER BY / LIMIT inside the call) are not supported"
683        )));
684    }
685    if func.over.is_some() {
686        return Err(SQLRiteError::NotImplemented(
687            "window functions (OVER (...)) are not supported".to_string(),
688        ));
689    }
690    if func.filter.is_some() {
691        return Err(SQLRiteError::NotImplemented(
692            "FILTER (WHERE ...) on aggregates is not supported".to_string(),
693        ));
694    }
695    if !func.within_group.is_empty() {
696        return Err(SQLRiteError::NotImplemented(
697            "WITHIN GROUP on aggregates is not supported".to_string(),
698        ));
699    }
700
701    if arg_list.args.len() != 1 {
702        return Err(SQLRiteError::NotImplemented(format!(
703            "{name}(...) expects exactly one argument, got {}",
704            arg_list.args.len()
705        )));
706    }
707
708    let arg = match &arg_list.args[0] {
709        FunctionArg::Unnamed(FunctionArgExpr::Wildcard) => AggregateArg::Star,
710        FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Identifier(ident))) => {
711            AggregateArg::Column {
712                qualifier: None,
713                name: ident.value.clone(),
714            }
715        }
716        FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::CompoundIdentifier(parts))) => {
717            match parts.as_slice() {
718                [only] => AggregateArg::Column {
719                    qualifier: None,
720                    name: only.value.clone(),
721                },
722                [q, c] => AggregateArg::Column {
723                    qualifier: Some(q.value.clone()),
724                    name: c.value.clone(),
725                },
726                _ => {
727                    return Err(SQLRiteError::NotImplemented(format!(
728                        "{name}(...) — argument identifier with {} parts is not supported",
729                        parts.len()
730                    )));
731                }
732            }
733        }
734        other => {
735            return Err(SQLRiteError::NotImplemented(format!(
736                "{name}(...) — argument must be `*` or a bare column reference (got {other:?})"
737            )));
738        }
739    };
740
741    // v1: only COUNT(DISTINCT col) is supported. SUM/AVG/MIN/MAX with
742    // DISTINCT are valid SQL but uncommon and add accumulator complexity
743    // we don't yet need.
744    if distinct && agg_fn != AggregateFn::Count {
745        return Err(SQLRiteError::NotImplemented(format!(
746            "DISTINCT is only supported on COUNT(...) for now, not {}",
747            agg_fn.as_str()
748        )));
749    }
750    if matches!(arg, AggregateArg::Star) && agg_fn != AggregateFn::Count {
751        return Err(SQLRiteError::NotImplemented(format!(
752            "{}(*) is not supported; use {}(<column>)",
753            agg_fn.as_str(),
754            agg_fn.as_str()
755        )));
756    }
757
758    Ok(AggregateCall {
759        func: agg_fn,
760        arg,
761        distinct,
762    })
763}
764
765fn parse_order_by(order_by: Option<&sqlparser::ast::OrderBy>) -> Result<Option<OrderByClause>> {
766    let Some(ob) = order_by else {
767        return Ok(None);
768    };
769    let exprs = match &ob.kind {
770        OrderByKind::Expressions(v) => v,
771        OrderByKind::All(_) => {
772            return Err(SQLRiteError::NotImplemented(
773                "ORDER BY ALL is not supported".to_string(),
774            ));
775        }
776    };
777    if exprs.len() != 1 {
778        return Err(SQLRiteError::NotImplemented(
779            "ORDER BY must have exactly one column for now".to_string(),
780        ));
781    }
782    let obe = &exprs[0];
783    // Phase 7b: accept arbitrary expressions, not just bare column refs.
784    // The executor's `sort_rowids` evaluates this expression per row via
785    // `eval_expr`, which handles Identifier (column lookup), Function
786    // (vec_distance_*), arithmetic, etc. uniformly. The previous
787    // column-name-only restriction has been lifted.
788    let expr = obe.expr.clone();
789    // `asc == None` is the dialect default (ASC).
790    let ascending = obe.options.asc.unwrap_or(true);
791    Ok(Some(OrderByClause { expr, ascending }))
792}
793
794fn parse_limit(limit: Option<&LimitClause>) -> Result<Option<usize>> {
795    let Some(lc) = limit else {
796        return Ok(None);
797    };
798    let limit_expr = match lc {
799        LimitClause::LimitOffset { limit, offset, .. } => {
800            if offset.is_some() {
801                return Err(SQLRiteError::NotImplemented(
802                    "OFFSET is not supported yet".to_string(),
803                ));
804            }
805            limit.as_ref()
806        }
807        LimitClause::OffsetCommaLimit { .. } => {
808            return Err(SQLRiteError::NotImplemented(
809                "`LIMIT <offset>, <limit>` syntax is not supported yet".to_string(),
810            ));
811        }
812    };
813    let Some(expr) = limit_expr else {
814        return Ok(None);
815    };
816    let n = eval_const_usize(expr)?;
817    Ok(Some(n))
818}
819
820fn eval_const_usize(expr: &Expr) -> Result<usize> {
821    match expr {
822        Expr::Value(v) => match &v.value {
823            sqlparser::ast::Value::Number(n, _) => n.parse::<usize>().map_err(|e| {
824                SQLRiteError::Internal(format!("LIMIT must be a non-negative integer: {e}"))
825            }),
826            _ => Err(SQLRiteError::Internal(
827                "LIMIT must be an integer literal".to_string(),
828            )),
829        },
830        _ => Err(SQLRiteError::NotImplemented(
831            "LIMIT expression must be a literal number".to_string(),
832        )),
833    }
834}