Skip to main content

fsqlite_parser/
semantic.rs

1//! Semantic analysis: name resolution, type checking, and scope validation.
2//!
3//! Validates AST nodes against a schema to ensure:
4//! - Column references resolve to known tables/columns
5//! - Table aliases are unique within a query scope
6//! - Function arity matches known functions
7//! - CTE names are visible in the correct scope
8//! - Type affinity is tracked for expression results
9//!
10//! # Usage
11//!
12//! ```ignore
13//! let schema = Schema::new();
14//! schema.add_table(TableDef { name: "users", columns: vec![...] });
15//! let mut resolver = Resolver::new(&schema);
16//! let errors = resolver.resolve_statement(&stmt);
17//! ```
18
19use std::collections::{HashMap, HashSet};
20use std::sync::atomic::{AtomicU64, Ordering};
21
22use fsqlite_ast::{
23    ColumnRef, Expr, FromClause, FunctionArgs, InSet, JoinClause, JoinConstraint, ResultColumn,
24    SelectCore, SelectStatement, Statement, TableOrSubquery, WithClause,
25};
26use fsqlite_types::TypeAffinity;
27
28// ---------------------------------------------------------------------------
29// Metrics
30// ---------------------------------------------------------------------------
31
32/// Monotonic counter of semantic errors encountered.
33static FSQLITE_SEMANTIC_ERRORS_TOTAL: AtomicU64 = AtomicU64::new(0);
34
35/// Point-in-time snapshot of semantic analysis metrics.
36#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
37pub struct SemanticMetricsSnapshot {
38    pub fsqlite_semantic_errors_total: u64,
39}
40
41/// Take a point-in-time snapshot of semantic metrics.
42#[must_use]
43pub fn semantic_metrics_snapshot() -> SemanticMetricsSnapshot {
44    SemanticMetricsSnapshot {
45        fsqlite_semantic_errors_total: FSQLITE_SEMANTIC_ERRORS_TOTAL.load(Ordering::Relaxed),
46    }
47}
48
49/// Reset semantic metrics.
50pub fn reset_semantic_metrics() {
51    FSQLITE_SEMANTIC_ERRORS_TOTAL.store(0, Ordering::Relaxed);
52}
53
54// ---------------------------------------------------------------------------
55// Schema types
56// ---------------------------------------------------------------------------
57
58/// A column definition in the schema.
59#[derive(Debug, Clone, PartialEq, Eq)]
60pub struct ColumnDef {
61    /// Column name (stored in original case).
62    pub name: String,
63    /// Type affinity determined from the DDL type name.
64    pub affinity: TypeAffinity,
65    /// Whether this column is an INTEGER PRIMARY KEY (rowid alias).
66    pub is_ipk: bool,
67    /// Whether this column has a NOT NULL constraint.
68    pub not_null: bool,
69}
70
71/// A table definition in the schema.
72#[derive(Debug, Clone)]
73pub struct TableDef {
74    /// Table name.
75    pub name: String,
76    /// Column definitions in declaration order.
77    pub columns: Vec<ColumnDef>,
78    /// Whether this is a WITHOUT ROWID table.
79    pub without_rowid: bool,
80    /// Whether this is a STRICT table.
81    pub strict: bool,
82}
83
84impl TableDef {
85    /// Find a column by name (case-insensitive).
86    #[must_use]
87    pub fn find_column(&self, name: &str) -> Option<&ColumnDef> {
88        self.columns
89            .iter()
90            .find(|c| c.name.eq_ignore_ascii_case(name))
91    }
92
93    /// Check if this table has a column with the given name (case-insensitive).
94    #[must_use]
95    pub fn has_column(&self, name: &str) -> bool {
96        self.find_column(name).is_some()
97    }
98
99    /// Check if a name is a rowid alias for this table.
100    #[must_use]
101    pub fn is_rowid_alias(&self, name: &str) -> bool {
102        if self.without_rowid {
103            return false;
104        }
105        if let Some(column) = self.find_column(name) {
106            return column.is_ipk;
107        }
108        is_hidden_rowid_alias_name(name)
109    }
110}
111
112fn is_hidden_rowid_alias_name(name: &str) -> bool {
113    matches!(
114        name.to_ascii_lowercase().as_str(),
115        "rowid" | "_rowid_" | "oid"
116    )
117}
118
119/// The database schema: a collection of table definitions.
120#[derive(Debug, Clone, Default)]
121pub struct Schema {
122    /// Tables by lowercase name.
123    tables: HashMap<String, TableDef>,
124}
125
126impl Schema {
127    /// Create an empty schema.
128    #[must_use]
129    pub fn new() -> Self {
130        Self::default()
131    }
132
133    /// Add a table definition.
134    pub fn add_table(&mut self, table: TableDef) {
135        self.tables.insert(table.name.to_ascii_lowercase(), table);
136    }
137
138    /// Look up a table by name (case-insensitive).
139    #[must_use]
140    pub fn find_table(&self, name: &str) -> Option<&TableDef> {
141        self.tables.get(&name.to_ascii_lowercase())
142    }
143
144    /// Number of tables in the schema.
145    #[must_use]
146    pub fn table_count(&self) -> usize {
147        self.tables.len()
148    }
149}
150
151// ---------------------------------------------------------------------------
152// Scope tracking
153// ---------------------------------------------------------------------------
154
155/// A name scope for query resolution. Scopes nest for subqueries and CTEs.
156#[derive(Debug, Clone)]
157pub struct Scope {
158    /// Table aliases visible in this scope: alias → table name.
159    aliases: HashMap<String, String>,
160    /// Columns visible from each alias: alias → set of column names.
161    /// None means the columns are unknown (CTE or subquery), so any column reference is optimistically accepted.
162    columns: HashMap<String, Option<HashSet<String>>>,
163    /// Columns that were joined via `USING` and are therefore unambiguous.
164    pub using_columns: HashSet<String>,
165    /// CTE names visible in this scope.
166    ctes: HashSet<String>,
167    /// Aliases that can only be referenced by qualified names (e.g. UPSERT's "excluded").
168    qualified_only: HashSet<String>,
169    /// Parent scope (for subquery nesting).
170    parent: Option<Box<Self>>,
171}
172
173impl Scope {
174    /// Create a root scope.
175    #[must_use]
176    pub fn root() -> Self {
177        Self {
178            aliases: HashMap::new(),
179            columns: HashMap::new(),
180            using_columns: HashSet::new(),
181            ctes: HashSet::new(),
182            qualified_only: HashSet::new(),
183            parent: None,
184        }
185    }
186
187    /// Create a child scope (for subqueries).
188    #[must_use]
189    pub fn child(parent: Self) -> Self {
190        Self {
191            aliases: HashMap::new(),
192            columns: HashMap::new(),
193            using_columns: HashSet::new(),
194            ctes: HashSet::new(),
195            qualified_only: HashSet::new(),
196            parent: Some(Box::new(parent)),
197        }
198    }
199
200    /// Register a table alias with its columns.
201    pub fn add_alias(&mut self, alias: &str, table_name: &str, columns: Option<HashSet<String>>) {
202        let key = alias.to_ascii_lowercase();
203        if self.aliases.contains_key(&key) {
204            self.aliases.insert(key.clone(), "<AMBIGUOUS>".to_owned());
205            self.columns.insert(key, None);
206        } else {
207            self.aliases.insert(key.clone(), table_name.to_owned());
208            self.columns.insert(key, columns);
209        }
210    }
211
212    /// Register an alias that does not participate in unqualified column resolution.
213    pub fn add_qualified_only_alias(
214        &mut self,
215        alias: &str,
216        table_name: &str,
217        columns: Option<HashSet<String>>,
218    ) {
219        self.add_alias(alias, table_name, columns);
220        self.qualified_only.insert(alias.to_ascii_lowercase());
221    }
222
223    /// Register a CTE name.
224    pub fn add_cte(&mut self, name: &str) {
225        self.ctes.insert(name.to_ascii_lowercase());
226    }
227
228    /// Check if a CTE is visible in this scope (or parent scopes).
229    #[must_use]
230    pub fn has_cte(&self, name: &str) -> bool {
231        let key = name.to_ascii_lowercase();
232        if self.ctes.contains(&key) {
233            return true;
234        }
235        self.parent.as_ref().is_some_and(|p| p.has_cte(name))
236    }
237
238    /// Check if an alias is visible in this scope (or parent scopes).
239    #[must_use]
240    pub fn has_alias(&self, alias: &str) -> bool {
241        let key = alias.to_ascii_lowercase();
242        if self.aliases.contains_key(&key) {
243            return true;
244        }
245        self.parent.as_ref().is_some_and(|p| p.has_alias(alias))
246    }
247
248    /// Check if an alias is defined locally in this scope.
249    #[must_use]
250    pub fn has_alias_local(&self, alias: &str) -> bool {
251        let key = alias.to_ascii_lowercase();
252        self.aliases.contains_key(&key)
253    }
254
255    /// Resolve a column reference: find which alias provides it.
256    ///
257    /// If `table_qualifier` is Some, checks only that alias.
258    /// If None, searches all visible aliases for the column name.
259    /// Returns the resolved (alias, column_name) or None.
260    #[must_use]
261    pub fn resolve_column(
262        &self,
263        schema: &Schema,
264        table_qualifier: Option<&str>,
265        column_name: &str,
266    ) -> ResolveResult {
267        let col_lower = column_name.to_ascii_lowercase();
268
269        if let Some(qualifier) = table_qualifier {
270            let key = qualifier.to_ascii_lowercase();
271            if self.aliases.get(&key).map(String::as_str) == Some("<AMBIGUOUS>") {
272                return ResolveResult::Ambiguous(vec![key]);
273            }
274            if let Some(cols) = self.columns.get(&key) {
275                if cols.as_ref().is_none_or(|c| c.contains(&col_lower)) {
276                    return ResolveResult::Resolved(key);
277                }
278                if let Some(table_name) = self.aliases.get(&key) {
279                    if let Some(table_def) = schema.find_table(table_name) {
280                        if table_def.is_rowid_alias(&col_lower) {
281                            return ResolveResult::Resolved(key);
282                        }
283                    }
284                }
285                return ResolveResult::ColumnNotFound;
286            }
287            // Check parent scope.
288            if let Some(ref parent) = self.parent {
289                return parent.resolve_column(schema, table_qualifier, column_name);
290            }
291            return ResolveResult::TableNotFound;
292        }
293
294        // Unqualified: search all aliases in this scope.
295        let mut known_matches = Vec::new();
296        let mut unknown_matches = Vec::new();
297
298        for (alias, cols) in &self.columns {
299            if self.qualified_only.contains(alias) {
300                continue;
301            }
302            if self.aliases.get(alias).map(String::as_str) == Some("<AMBIGUOUS>") {
303                continue; // Do not resolve unqualified columns from ambiguous aliases
304            }
305            let is_match = match cols {
306                Some(c) => {
307                    c.contains(&col_lower) || {
308                        self.aliases
309                            .get(alias)
310                            .and_then(|t| schema.find_table(t))
311                            .is_some_and(|td| td.is_rowid_alias(&col_lower))
312                    }
313                }
314                None => true,
315            };
316            if is_match {
317                if cols.is_some() {
318                    known_matches.push(alias.clone());
319                } else {
320                    unknown_matches.push(alias.clone());
321                }
322            }
323        }
324
325        match (known_matches.len(), unknown_matches.len()) {
326            (0, 0) => {
327                // Check parent scope.
328                if let Some(ref parent) = self.parent {
329                    return parent.resolve_column(schema, None, column_name);
330                }
331                ResolveResult::ColumnNotFound
332            }
333            (1, 0) => ResolveResult::Resolved(known_matches.into_iter().next().unwrap_or_default()),
334            (0, 1) => {
335                ResolveResult::Resolved(unknown_matches.into_iter().next().unwrap_or_default())
336            }
337            _ => {
338                let mut all_matches = known_matches;
339                all_matches.extend(unknown_matches);
340                all_matches.sort();
341                if self.using_columns.contains(&col_lower) {
342                    // For USING columns, just pick the first one (they are equivalent).
343                    ResolveResult::Resolved(all_matches.into_iter().next().unwrap_or_default())
344                } else if all_matches.contains(&"<output>".to_owned()) {
345                    ResolveResult::Resolved("<output>".to_owned())
346                } else {
347                    ResolveResult::Ambiguous(all_matches)
348                }
349            }
350        }
351    }
352
353    /// Number of aliases registered in this scope (not counting parents).
354    #[must_use]
355    pub fn alias_count(&self) -> usize {
356        self.aliases.len()
357    }
358
359    /// Return known column sets from all local aliases (for NATURAL JOIN).
360    /// Aliases with unknown columns (`None`) are omitted.
361    #[must_use]
362    pub fn known_local_column_sets(&self) -> Vec<&HashSet<String>> {
363        self.columns
364            .values()
365            .filter_map(|opt| opt.as_ref())
366            .collect()
367    }
368
369    /// Return the column set for a specific alias (lowercased lookup).
370    #[must_use]
371    pub fn columns_for_alias(&self, alias: &str) -> Option<&HashSet<String>> {
372        self.columns
373            .get(&alias.to_ascii_lowercase())
374            .and_then(|opt| opt.as_ref())
375    }
376}
377
378/// Result of resolving a column reference.
379#[derive(Debug, Clone, PartialEq, Eq)]
380pub enum ResolveResult {
381    /// Column resolved to the given alias.
382    Resolved(String),
383    /// The table qualifier was not found.
384    TableNotFound,
385    /// The column was not found in the specified table.
386    ColumnNotFound,
387    /// The column was found in multiple tables (ambiguous).
388    Ambiguous(Vec<String>),
389}
390
391// ---------------------------------------------------------------------------
392// Semantic errors
393// ---------------------------------------------------------------------------
394
395/// A semantic analysis error.
396#[derive(Debug, Clone, PartialEq, Eq)]
397pub struct SemanticError {
398    /// Error kind.
399    pub kind: SemanticErrorKind,
400    /// Human-readable message.
401    pub message: String,
402}
403
404/// Kinds of semantic errors.
405#[derive(Debug, Clone, PartialEq, Eq)]
406pub enum SemanticErrorKind {
407    /// Column reference could not be resolved.
408    UnresolvedColumn {
409        table: Option<String>,
410        column: String,
411    },
412    /// Column reference is ambiguous (exists in multiple tables).
413    AmbiguousColumn {
414        column: String,
415        candidates: Vec<String>,
416    },
417    /// Table or alias not found.
418    UnresolvedTable { name: String },
419    /// Duplicate alias in the same scope.
420    DuplicateAlias { alias: String },
421    /// Function called with wrong number of arguments.
422    FunctionArityMismatch {
423        function: String,
424        expected: FunctionArity,
425        actual: usize,
426    },
427    /// SELECT * used without any tables in scope.
428    NoTablesSpecifiedForStar,
429    /// Type coercion warning (not fatal).
430    ImplicitTypeCoercion {
431        from: TypeAffinity,
432        to: TypeAffinity,
433        context: String,
434    },
435}
436
437/// Expected function arity.
438#[derive(Debug, Clone, PartialEq, Eq)]
439pub enum FunctionArity {
440    /// Exact number of arguments.
441    Exact(usize),
442    /// Range of acceptable argument counts.
443    Range(usize, usize),
444    /// Any number of arguments.
445    Variadic,
446    /// Minimum number of arguments.
447    VariadicMin(usize),
448}
449
450impl std::fmt::Display for SemanticError {
451    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
452        write!(f, "{}", self.message)
453    }
454}
455
456// ---------------------------------------------------------------------------
457// Resolver
458// ---------------------------------------------------------------------------
459
460/// The semantic analyzer / name resolver.
461///
462/// Given a `Schema` and an AST, validates all name references and collects
463/// errors. Uses scope tracking for nested queries and CTEs.
464pub struct Resolver<'a> {
465    schema: &'a Schema,
466    errors: Vec<SemanticError>,
467    tables_resolved: u64,
468    columns_bound: u64,
469}
470
471impl<'a> Resolver<'a> {
472    /// Create a new resolver for the given schema.
473    #[must_use]
474    pub fn new(schema: &'a Schema) -> Self {
475        Self {
476            schema,
477            errors: Vec::new(),
478            tables_resolved: 0,
479            columns_bound: 0,
480        }
481    }
482
483    /// Resolve all name references in a statement.
484    ///
485    /// Returns the list of semantic errors found.
486    pub fn resolve_statement(&mut self, stmt: &Statement) -> Vec<SemanticError> {
487        let span = tracing::debug_span!(
488            target: "fsqlite.parse",
489            "semantic_analysis",
490            tables_resolved = tracing::field::Empty,
491            columns_bound = tracing::field::Empty,
492            errors = tracing::field::Empty,
493        );
494        let _guard = span.enter();
495
496        self.errors.clear();
497        self.tables_resolved = 0;
498        self.columns_bound = 0;
499
500        let mut scope = Scope::root();
501        self.resolve_stmt_inner(stmt, &mut scope);
502
503        span.record("tables_resolved", self.tables_resolved);
504        span.record("columns_bound", self.columns_bound);
505        span.record("errors", self.errors.len() as u64);
506
507        // Record error metrics.
508        if !self.errors.is_empty() {
509            FSQLITE_SEMANTIC_ERRORS_TOTAL.fetch_add(self.errors.len() as u64, Ordering::Relaxed);
510        }
511
512        self.errors.clone()
513    }
514
515    fn resolve_stmt_inner(&mut self, stmt: &Statement, scope: &mut Scope) {
516        match stmt {
517            Statement::Select(select) => self.resolve_select(select, scope),
518            Statement::Insert(insert) => {
519                // Process WITH clause CTEs if present.
520                if let Some(ref with) = insert.with {
521                    self.resolve_with_clause(with, scope);
522                }
523
524                // Resolve the data source (VALUES or SELECT).
525                // The target table is NOT visible to the body.
526                match &insert.source {
527                    fsqlite_ast::InsertSource::Values(rows) => {
528                        for row in rows {
529                            for expr in row {
530                                self.resolve_expr(expr, scope);
531                            }
532                        }
533                    }
534                    fsqlite_ast::InsertSource::Select(select) => {
535                        let mut source_scope = scope.clone();
536                        self.resolve_select(select, &mut source_scope);
537                    }
538                    fsqlite_ast::InsertSource::DefaultValues => {}
539                }
540
541                // Bind the target table so RETURNING or UPSERT can reference it.
542                self.bind_table_to_scope(&insert.table.name, None, scope);
543
544                // Scope strictly for target column checks
545                let mut target_scope = Scope::root();
546                if scope.has_cte(&insert.table.name) {
547                    target_scope.add_alias(&insert.table.name, &insert.table.name, None);
548                } else if let Some(table_def) = self.schema.find_table(&insert.table.name) {
549                    let col_set: HashSet<String> = table_def
550                        .columns
551                        .iter()
552                        .map(|c| c.name.to_ascii_lowercase())
553                        .collect();
554                    target_scope.add_alias(&insert.table.name, &insert.table.name, Some(col_set));
555                }
556
557                for col in &insert.columns {
558                    self.resolve_unqualified_column(col, &target_scope, false);
559                }
560
561                // Resolve UPSERT.
562                for upsert in &insert.upsert {
563                    if let Some(target) = &upsert.target {
564                        for col in &target.columns {
565                            self.resolve_expr(&col.expr, scope);
566                        }
567                        if let Some(where_clause) = &target.where_clause {
568                            self.resolve_expr(where_clause, scope);
569                        }
570                    }
571                    match &upsert.action {
572                        fsqlite_ast::UpsertAction::Update {
573                            assignments,
574                            where_clause,
575                        } => {
576                            let mut upsert_scope = Scope::child(scope.clone());
577                            let alias_name = insert.alias.as_deref().unwrap_or(&insert.table.name);
578                            if let Some(table_def) = self.schema.find_table(&insert.table.name) {
579                                let col_set: HashSet<String> = table_def
580                                    .columns
581                                    .iter()
582                                    .map(|c| c.name.to_ascii_lowercase())
583                                    .collect();
584                                upsert_scope.add_qualified_only_alias(
585                                    "excluded",
586                                    &insert.table.name,
587                                    Some(col_set.clone()),
588                                );
589                                upsert_scope.add_alias(
590                                    alias_name,
591                                    &insert.table.name,
592                                    Some(col_set),
593                                );
594                            } else {
595                                upsert_scope.add_qualified_only_alias("excluded", "<pseudo>", None);
596                                upsert_scope.add_alias(alias_name, "<pseudo>", None);
597                            }
598
599                            for assignment in assignments {
600                                match &assignment.target {
601                                    fsqlite_ast::AssignmentTarget::Column(col) => {
602                                        self.resolve_unqualified_column(col, &target_scope, false);
603                                    }
604                                    fsqlite_ast::AssignmentTarget::ColumnList(cols) => {
605                                        for col in cols {
606                                            self.resolve_unqualified_column(
607                                                col,
608                                                &target_scope,
609                                                false,
610                                            );
611                                        }
612                                    }
613                                }
614                                self.resolve_expr(&assignment.value, &upsert_scope);
615                            }
616                            if let Some(w) = where_clause {
617                                self.resolve_expr(w, &upsert_scope);
618                            }
619                        }
620                        fsqlite_ast::UpsertAction::Nothing => {}
621                    }
622                }
623                for ret in &insert.returning {
624                    self.resolve_result_column(ret, scope);
625                }
626            }
627            Statement::Update(update) => {
628                // Process WITH clause CTEs if present.
629                if let Some(ref with) = update.with {
630                    self.resolve_with_clause(with, scope);
631                }
632
633                // LIMIT and OFFSET cannot reference target or FROM tables.
634                let limit_scope = scope.clone();
635
636                self.bind_table_to_scope(
637                    &update.table.name.name,
638                    update.table.alias.as_deref(),
639                    scope,
640                );
641
642                // Scope strictly for target column checks
643                let mut target_scope = Scope::root();
644                self.bind_table_to_scope(
645                    &update.table.name.name,
646                    update.table.alias.as_deref(),
647                    &mut target_scope,
648                );
649
650                // The RETURNING clause can ONLY see the target table (and outer scopes/CTEs).
651                // It CANNOT see tables from the FROM clause.
652                let returning_scope = scope.clone();
653
654                for assignment in &update.assignments {
655                    match &assignment.target {
656                        fsqlite_ast::AssignmentTarget::Column(col) => {
657                            self.resolve_unqualified_column(col, &target_scope, false);
658                        }
659                        fsqlite_ast::AssignmentTarget::ColumnList(cols) => {
660                            for col in cols {
661                                self.resolve_unqualified_column(col, &target_scope, false);
662                            }
663                        }
664                    }
665                }
666                if let Some(from) = &update.from {
667                    self.resolve_from(from, scope);
668                }
669                for assignment in &update.assignments {
670                    self.resolve_expr(&assignment.value, scope);
671                }
672                if let Some(where_clause) = &update.where_clause {
673                    self.resolve_expr(where_clause, scope);
674                }
675                for ret in &update.returning {
676                    self.resolve_result_column(ret, &returning_scope);
677                }
678                for term in &update.order_by {
679                    self.resolve_expr(&term.expr, scope);
680                }
681                if let Some(limit) = &update.limit {
682                    self.resolve_expr(&limit.limit, &limit_scope);
683                    if let Some(offset) = &limit.offset {
684                        self.resolve_expr(offset, &limit_scope);
685                    }
686                }
687            }
688            Statement::Delete(delete) => {
689                // Process WITH clause CTEs if present.
690                if let Some(ref with) = delete.with {
691                    self.resolve_with_clause(with, scope);
692                }
693
694                // LIMIT and OFFSET cannot reference the target table.
695                let limit_scope = scope.clone();
696
697                self.bind_table_to_scope(
698                    &delete.table.name.name,
699                    delete.table.alias.as_deref(),
700                    scope,
701                );
702                if let Some(where_clause) = &delete.where_clause {
703                    self.resolve_expr(where_clause, scope);
704                }
705                for ret in &delete.returning {
706                    self.resolve_result_column(ret, scope);
707                }
708                for term in &delete.order_by {
709                    self.resolve_expr(&term.expr, scope);
710                }
711                if let Some(limit) = &delete.limit {
712                    self.resolve_expr(&limit.limit, &limit_scope);
713                    if let Some(offset) = &limit.offset {
714                        self.resolve_expr(offset, &limit_scope);
715                    }
716                }
717            }
718            // DDL and control statements don't need name resolution.
719            _ => {}
720        }
721    }
722
723    fn resolve_with_clause(&mut self, with: &WithClause, scope: &mut Scope) {
724        if with.recursive {
725            // In WITH RECURSIVE, all CTE names are visible to all CTE bodies.
726            for cte in &with.ctes {
727                scope.add_cte(&cte.name);
728            }
729            for cte in &with.ctes {
730                let mut cte_scope = scope.clone();
731                self.resolve_select(&cte.query, &mut cte_scope);
732            }
733        } else {
734            // In plain WITH, a CTE body can only see previously defined CTEs.
735            for cte in &with.ctes {
736                let mut cte_scope = scope.clone();
737                self.resolve_select(&cte.query, &mut cte_scope);
738                // Add *after* resolving the query so it can't see itself or subsequent CTEs.
739                scope.add_cte(&cte.name);
740            }
741        }
742    }
743
744    // SQLite compound SELECTs allow ORDER BY terms to reuse a projected
745    // expression verbatim, even though underlying table aliases are no longer
746    // in scope at the compound boundary.
747    fn compound_order_by_matches_output_expr(select: &SelectStatement, order_expr: &Expr) -> bool {
748        if select.body.compounds.is_empty() {
749            return false;
750        }
751
752        std::iter::once(&select.body.select)
753            .chain(select.body.compounds.iter().map(|(_, core)| core))
754            .filter_map(|core| match core {
755                SelectCore::Select { columns, .. } => Some(columns.iter()),
756                _ => None,
757            })
758            .flatten()
759            .any(|column| match column {
760                ResultColumn::Expr { expr, .. } => expr == order_expr,
761                _ => false,
762            })
763    }
764
765    fn resolve_select(&mut self, select: &SelectStatement, scope: &mut Scope) {
766        // Register CTEs if present.
767        if let Some(ref with) = select.with {
768            self.resolve_with_clause(with, scope);
769        }
770
771        // Resolve the primary select core in an isolated scope.
772        let mut first_core_scope = scope.clone();
773        self.resolve_select_core(&select.body.select, &mut first_core_scope);
774
775        // Resolve any compound queries (UNION, INTERSECT, EXCEPT) in isolated scopes.
776        for (_op, core) in &select.body.compounds {
777            let mut comp_scope = scope.clone();
778            self.resolve_select_core(core, &mut comp_scope);
779        }
780
781        // Resolve ORDER BY against the appropriate scope.
782        let mut order_by_scope = if select.body.compounds.is_empty() {
783            first_core_scope.clone()
784        } else {
785            scope.clone() // Compounds can only see outer scope + result columns
786        };
787
788        let mut output_cols = HashSet::new();
789        for core in std::iter::once(&select.body.select)
790            .chain(select.body.compounds.iter().map(|(_, core)| core))
791        {
792            if let SelectCore::Select { columns, .. } = core {
793                for col in columns {
794                    match col {
795                        ResultColumn::Expr {
796                            alias: Some(alias_id),
797                            ..
798                        } => {
799                            output_cols.insert(alias_id.to_ascii_lowercase());
800                        }
801                        ResultColumn::Expr {
802                            expr: Expr::Column(col_ref, _),
803                            ..
804                        } => {
805                            output_cols.insert(col_ref.column.to_ascii_lowercase());
806                        }
807                        _ => {}
808                    }
809                }
810            }
811        }
812        if !output_cols.is_empty() {
813            // Add the output columns as a pseudo-table so ORDER BY can reference them.
814            order_by_scope.add_alias("<output>", "<output>", Some(output_cols));
815        }
816
817        for term in &select.order_by {
818            if Self::compound_order_by_matches_output_expr(select, &term.expr) {
819                continue;
820            }
821            self.resolve_expr(&term.expr, &order_by_scope);
822        }
823
824        // Resolve LIMIT against the base scope (no FROM aliases).
825        if let Some(limit) = &select.limit {
826            self.resolve_expr(&limit.limit, scope);
827            if let Some(offset) = &limit.offset {
828                self.resolve_expr(offset, scope);
829            }
830        }
831    }
832
833    fn resolve_select_core(&mut self, core: &SelectCore, scope: &mut Scope) {
834        match core {
835            SelectCore::Select {
836                columns,
837                from,
838                where_clause,
839                group_by,
840                having,
841                windows,
842                ..
843            } => {
844                // Resolve FROM clause first (registers table aliases).
845                if let Some(from) = from {
846                    self.resolve_from(from, scope);
847                }
848
849                // Resolve column references in SELECT list.
850                for col in columns {
851                    self.resolve_result_column(col, scope);
852                }
853
854                // Resolve WHERE clause.
855                if let Some(where_expr) = where_clause {
856                    self.resolve_expr(where_expr, scope);
857                }
858
859                // Create a scope for GROUP BY, HAVING, and WINDOW that includes output columns.
860                let mut post_select_scope = scope.clone();
861                let mut output_cols = HashSet::new();
862                for col in columns {
863                    if let ResultColumn::Expr {
864                        alias: Some(alias_id),
865                        ..
866                    } = col
867                    {
868                        output_cols.insert(alias_id.to_ascii_lowercase());
869                    } else if let ResultColumn::Expr {
870                        expr: Expr::Column(col_ref, _),
871                        ..
872                    } = col
873                    {
874                        output_cols.insert(col_ref.column.to_ascii_lowercase());
875                    }
876                }
877                if !output_cols.is_empty() {
878                    post_select_scope.add_alias("<output>", "<output>", Some(output_cols));
879                } else {
880                    post_select_scope.add_alias("<output>", "<output>", None);
881                }
882
883                for expr in group_by {
884                    self.resolve_expr(expr, &post_select_scope);
885                }
886                if let Some(having) = having {
887                    self.resolve_expr(having, &post_select_scope);
888                }
889                for window in windows {
890                    for part in &window.spec.partition_by {
891                        self.resolve_expr(part, &post_select_scope);
892                    }
893                    for order in &window.spec.order_by {
894                        self.resolve_expr(&order.expr, &post_select_scope);
895                    }
896                }
897            }
898            SelectCore::Values(rows) => {
899                for row in rows {
900                    for expr in row {
901                        self.resolve_expr(expr, scope);
902                    }
903                }
904            }
905        }
906    }
907
908    fn resolve_from(&mut self, from: &FromClause, scope: &mut Scope) {
909        self.resolve_table_or_subquery(&from.source, scope);
910
911        for join in &from.joins {
912            self.resolve_join(join, scope);
913        }
914    }
915
916    fn resolve_table_or_subquery(&mut self, tos: &TableOrSubquery, scope: &mut Scope) {
917        match tos {
918            TableOrSubquery::Table { name, alias, .. } => {
919                let table_name = &name.name;
920                let alias_name = alias.as_deref().unwrap_or(table_name);
921
922                // Check for duplicate alias in the CURRENT scope only.
923                if scope.has_alias_local(alias_name) {
924                    self.push_error(SemanticErrorKind::DuplicateAlias {
925                        alias: alias_name.to_owned(),
926                    });
927                }
928
929                // Resolve table name against schema or CTEs.
930                if scope.has_cte(table_name) {
931                    // CTE reference — columns are unknown at this stage.
932                    scope.add_alias(alias_name, table_name, None);
933                    self.tables_resolved += 1;
934                } else if let Some(table_def) = self.schema.find_table(table_name) {
935                    let col_set: HashSet<String> = table_def
936                        .columns
937                        .iter()
938                        .map(|c| c.name.to_ascii_lowercase())
939                        .collect();
940                    scope.add_alias(alias_name, table_name, Some(col_set));
941                    self.tables_resolved += 1;
942                } else {
943                    self.push_error(SemanticErrorKind::UnresolvedTable {
944                        name: table_name.clone(),
945                    });
946                }
947            }
948            TableOrSubquery::Subquery { query, alias, .. } => {
949                // Resolve subquery in a child scope.
950                let mut child = Scope::child(scope.clone());
951                self.resolve_select(query, &mut child);
952
953                let alias_name = if let Some(a) = alias {
954                    a.clone()
955                } else {
956                    format!("<subquery_{}>", self.tables_resolved)
957                };
958
959                if !alias_name.starts_with("<subquery_") && scope.has_alias_local(&alias_name) {
960                    self.push_error(SemanticErrorKind::DuplicateAlias {
961                        alias: alias_name.clone(),
962                    });
963                }
964
965                let mut output_cols = HashSet::new();
966                let mut is_complete = true;
967                if let SelectCore::Select { columns, .. } = &query.body.select {
968                    for col in columns {
969                        match col {
970                            ResultColumn::Expr {
971                                alias: Some(alias_id),
972                                ..
973                            } => {
974                                output_cols.insert(alias_id.to_ascii_lowercase());
975                            }
976                            ResultColumn::Expr {
977                                expr: Expr::Column(col_ref, _),
978                                ..
979                            } => {
980                                output_cols.insert(col_ref.column.to_ascii_lowercase());
981                            }
982                            ResultColumn::Star | ResultColumn::TableStar(_) => {
983                                is_complete = false;
984                            }
985                            _ => {}
986                        }
987                    }
988                } else {
989                    is_complete = false;
990                }
991
992                if is_complete {
993                    scope.add_alias(&alias_name, "<subquery>", Some(output_cols));
994                } else {
995                    scope.add_alias(&alias_name, "<subquery>", None);
996                }
997
998                self.tables_resolved += 1;
999            }
1000            TableOrSubquery::TableFunction {
1001                name, args, alias, ..
1002            } => {
1003                for arg in args {
1004                    self.resolve_expr(arg, scope);
1005                }
1006
1007                let alias_name = alias.as_deref().unwrap_or(name);
1008
1009                if scope.has_alias_local(alias_name) {
1010                    self.push_error(SemanticErrorKind::DuplicateAlias {
1011                        alias: alias_name.to_owned(),
1012                    });
1013                }
1014
1015                scope.add_alias(alias_name, name, None);
1016                self.tables_resolved += 1;
1017            }
1018            TableOrSubquery::ParenJoin(inner_from) => {
1019                self.resolve_from(inner_from, scope);
1020            }
1021        }
1022    }
1023
1024    fn resolve_join(&mut self, join: &JoinClause, scope: &mut Scope) {
1025        // Snapshot column names from existing aliases BEFORE adding the new
1026        // table, so we can compute shared columns for NATURAL JOIN and USING.
1027        let pre_join_columns: Vec<HashSet<String>> = scope
1028            .known_local_column_sets()
1029            .into_iter()
1030            .cloned()
1031            .collect();
1032        let pre_join_aliases: HashSet<String> = scope.aliases.keys().cloned().collect();
1033
1034        self.resolve_table_or_subquery(&join.table, scope);
1035
1036        if join.join_type.natural && join.constraint.is_none() {
1037            // NATURAL JOIN: implicitly equate all columns with matching names
1038            // between the pre-existing tables and the newly joined table(s).
1039            let mut to_insert = Vec::new();
1040            for (alias, cols_opt) in &scope.columns {
1041                if !pre_join_aliases.contains(alias) {
1042                    if let Some(new_cols) = cols_opt {
1043                        for col_name in new_cols {
1044                            if pre_join_columns.iter().any(|cs| cs.contains(col_name)) {
1045                                to_insert.push(col_name.clone());
1046                            }
1047                        }
1048                    }
1049                }
1050            }
1051            for col_name in to_insert {
1052                scope.using_columns.insert(col_name);
1053            }
1054        }
1055
1056        if let Some(ref constraint) = join.constraint {
1057            match constraint {
1058                JoinConstraint::On(expr) => self.resolve_expr(expr, scope),
1059                JoinConstraint::Using(cols) => {
1060                    for col in cols {
1061                        let col_lower = col.to_ascii_lowercase();
1062                        scope.using_columns.insert(col_lower.clone());
1063
1064                        // Validate that column exists on the left side
1065                        let in_left = pre_join_columns.iter().any(|cs| cs.contains(&col_lower));
1066                        // Validate that column exists on the right side
1067                        let mut in_right = false;
1068                        for (alias, cols_opt) in &scope.columns {
1069                            if !pre_join_aliases.contains(alias) {
1070                                if let Some(new_cols) = cols_opt {
1071                                    if new_cols.contains(&col_lower) {
1072                                        in_right = true;
1073                                        break;
1074                                    }
1075                                } else {
1076                                    // If right side columns are unknown (e.g. subquery), assume it exists
1077                                    in_right = true;
1078                                    break;
1079                                }
1080                            }
1081                        }
1082
1083                        // If left side has unknown columns, we might not find it in `pre_join_columns`
1084                        let left_has_unknown = scope.columns.iter().any(|(alias, cols_opt)| {
1085                            pre_join_aliases.contains(alias) && cols_opt.is_none()
1086                        });
1087
1088                        if (!in_left && !left_has_unknown) || !in_right {
1089                            self.push_error(SemanticErrorKind::UnresolvedColumn {
1090                                table: None,
1091                                column: col.clone(),
1092                            });
1093                        }
1094
1095                        self.resolve_unqualified_column(col, scope, true);
1096                    }
1097                }
1098            }
1099        }
1100    }
1101
1102    fn resolve_result_column(&mut self, col: &ResultColumn, scope: &Scope) {
1103        match col {
1104            ResultColumn::Star => {
1105                // SELECT * is valid if there's at least one table in scope.
1106                // Suppress this error if we already reported an UnresolvedTable
1107                // error — the missing star target is a cascading consequence.
1108                if scope.alias_count() == 0
1109                    && !self
1110                        .errors
1111                        .iter()
1112                        .any(|e| matches!(e.kind, SemanticErrorKind::UnresolvedTable { .. }))
1113                {
1114                    self.push_error(SemanticErrorKind::NoTablesSpecifiedForStar);
1115                }
1116            }
1117            ResultColumn::TableStar(table_name) => {
1118                if !scope.has_alias(table_name) {
1119                    self.push_error(SemanticErrorKind::UnresolvedTable {
1120                        name: table_name.clone(),
1121                    });
1122                }
1123            }
1124            ResultColumn::Expr { expr, .. } => {
1125                self.resolve_expr(expr, scope);
1126            }
1127        }
1128    }
1129
1130    #[allow(clippy::too_many_lines)]
1131    fn resolve_expr(&mut self, expr: &Expr, scope: &Scope) {
1132        match expr {
1133            Expr::Column(col_ref, _span) => {
1134                self.resolve_column_ref(col_ref, scope);
1135            }
1136            Expr::BinaryOp { left, right, .. } => {
1137                self.resolve_expr(left, scope);
1138                self.resolve_expr(right, scope);
1139            }
1140            Expr::UnaryOp { expr: inner, .. }
1141            | Expr::Cast { expr: inner, .. }
1142            | Expr::Collate { expr: inner, .. }
1143            | Expr::IsNull { expr: inner, .. } => {
1144                self.resolve_expr(inner, scope);
1145            }
1146            Expr::Between {
1147                expr: inner,
1148                low,
1149                high,
1150                ..
1151            } => {
1152                self.resolve_expr(inner, scope);
1153                self.resolve_expr(low, scope);
1154                self.resolve_expr(high, scope);
1155            }
1156            Expr::In {
1157                expr: inner, set, ..
1158            } => {
1159                self.resolve_expr(inner, scope);
1160                match set {
1161                    InSet::List(items) => {
1162                        for item in items {
1163                            self.resolve_expr(item, scope);
1164                        }
1165                    }
1166                    InSet::Subquery(select) => {
1167                        let mut child = Scope::child(scope.clone());
1168                        self.resolve_select(select, &mut child);
1169                    }
1170                    InSet::Table(name) => {
1171                        self.resolve_table_name(&name.name, scope);
1172                    }
1173                }
1174            }
1175            Expr::Like {
1176                expr: inner,
1177                pattern,
1178                escape,
1179                op,
1180                ..
1181            } => {
1182                self.resolve_expr(inner, scope);
1183                self.resolve_expr(pattern, scope);
1184                if let Some(esc) = escape {
1185                    if *op != fsqlite_ast::LikeOp::Like {
1186                        // SQLite only supports ESCAPE with LIKE. For GLOB, MATCH, REGEXP it throws "wrong number of arguments to function X()"
1187                        self.push_error(SemanticErrorKind::FunctionArityMismatch {
1188                            function: match op {
1189                                fsqlite_ast::LikeOp::Like => "LIKE",
1190                                fsqlite_ast::LikeOp::Glob => "GLOB",
1191                                fsqlite_ast::LikeOp::Match => "MATCH",
1192                                fsqlite_ast::LikeOp::Regexp => "REGEXP",
1193                            }
1194                            .to_owned(),
1195                            expected: FunctionArity::Exact(2),
1196                            actual: 3,
1197                        });
1198                    }
1199                    self.resolve_expr(esc, scope);
1200                }
1201            }
1202            Expr::Subquery(select, _)
1203            | Expr::Exists {
1204                subquery: select, ..
1205            } => {
1206                let mut child = Scope::child(scope.clone());
1207                self.resolve_select(select, &mut child);
1208            }
1209            Expr::FunctionCall {
1210                name,
1211                args,
1212                filter,
1213                over,
1214                ..
1215            } => {
1216                self.resolve_function(name, args, scope);
1217                if let Some(filter) = filter {
1218                    self.resolve_expr(filter, scope);
1219                }
1220                if let Some(window_spec) = over {
1221                    for expr in &window_spec.partition_by {
1222                        self.resolve_expr(expr, scope);
1223                    }
1224                    for term in &window_spec.order_by {
1225                        self.resolve_expr(&term.expr, scope);
1226                    }
1227                    if let Some(frame) = &window_spec.frame {
1228                        match &frame.start {
1229                            fsqlite_ast::FrameBound::Preceding(expr)
1230                            | fsqlite_ast::FrameBound::Following(expr) => {
1231                                self.resolve_expr(expr, scope);
1232                            }
1233                            _ => {}
1234                        }
1235                        if let Some(
1236                            fsqlite_ast::FrameBound::Preceding(expr)
1237                            | fsqlite_ast::FrameBound::Following(expr),
1238                        ) = &frame.end
1239                        {
1240                            self.resolve_expr(expr, scope);
1241                        }
1242                    }
1243                }
1244            }
1245            Expr::Case {
1246                operand,
1247                whens,
1248                else_expr,
1249                ..
1250            } => {
1251                if let Some(op) = operand {
1252                    self.resolve_expr(op, scope);
1253                }
1254                for (when_expr, then_expr) in whens {
1255                    self.resolve_expr(when_expr, scope);
1256                    self.resolve_expr(then_expr, scope);
1257                }
1258                if let Some(else_e) = else_expr {
1259                    self.resolve_expr(else_e, scope);
1260                }
1261            }
1262            Expr::JsonAccess {
1263                expr: inner, path, ..
1264            } => {
1265                self.resolve_expr(inner, scope);
1266                self.resolve_expr(path, scope);
1267            }
1268            Expr::RowValue(exprs, _) => {
1269                for e in exprs {
1270                    self.resolve_expr(e, scope);
1271                }
1272            }
1273            // Literals, placeholders, and RAISE don't need resolution.
1274            Expr::Literal(_, _) | Expr::Placeholder(_, _) | Expr::Raise { .. } => {}
1275        }
1276    }
1277
1278    fn resolve_column_ref(&mut self, col_ref: &ColumnRef, scope: &Scope) {
1279        let result = scope.resolve_column(self.schema, col_ref.table.as_deref(), &col_ref.column);
1280        match result {
1281            ResolveResult::Resolved(_) => {
1282                self.columns_bound += 1;
1283            }
1284            ResolveResult::TableNotFound => {
1285                tracing::error!(
1286                    target: "fsqlite.parse",
1287                    table = ?col_ref.table,
1288                    column = %col_ref.column,
1289                    "unresolvable table reference"
1290                );
1291                self.push_error(SemanticErrorKind::UnresolvedColumn {
1292                    table: col_ref.table.clone(),
1293                    column: col_ref.column.clone(),
1294                });
1295            }
1296            ResolveResult::ColumnNotFound => {
1297                tracing::error!(
1298                    target: "fsqlite.parse",
1299                    table = ?col_ref.table,
1300                    column = %col_ref.column,
1301                    "unresolvable column reference"
1302                );
1303                self.push_error(SemanticErrorKind::UnresolvedColumn {
1304                    table: col_ref.table.clone(),
1305                    column: col_ref.column.clone(),
1306                });
1307            }
1308            ResolveResult::Ambiguous(candidates) => {
1309                tracing::error!(
1310                    target: "fsqlite.parse",
1311                    column = %col_ref.column,
1312                    candidates = ?candidates,
1313                    "ambiguous column reference"
1314                );
1315                self.push_error(SemanticErrorKind::AmbiguousColumn {
1316                    column: col_ref.column.clone(),
1317                    candidates,
1318                });
1319            }
1320        }
1321    }
1322
1323    fn resolve_unqualified_column(&mut self, name: &str, scope: &Scope, is_using_clause: bool) {
1324        let result = scope.resolve_column(self.schema, None, name);
1325        match result {
1326            ResolveResult::Resolved(_) => {
1327                self.columns_bound += 1;
1328            }
1329            ResolveResult::Ambiguous(candidates) => {
1330                if is_using_clause {
1331                    self.columns_bound += 1;
1332                } else {
1333                    self.push_error(SemanticErrorKind::AmbiguousColumn {
1334                        column: name.to_owned(),
1335                        candidates,
1336                    });
1337                }
1338            }
1339            ResolveResult::ColumnNotFound | ResolveResult::TableNotFound => {
1340                self.push_error(SemanticErrorKind::UnresolvedColumn {
1341                    table: None,
1342                    column: name.to_owned(),
1343                });
1344            }
1345        }
1346    }
1347
1348    fn bind_table_to_scope(&mut self, name: &str, alias: Option<&str>, scope: &mut Scope) {
1349        let alias_name = alias.unwrap_or(name);
1350        if scope.has_cte(name) {
1351            scope.add_alias(alias_name, name, None);
1352            self.tables_resolved += 1;
1353        } else if let Some(table_def) = self.schema.find_table(name) {
1354            let col_set: HashSet<String> = table_def
1355                .columns
1356                .iter()
1357                .map(|c| c.name.to_ascii_lowercase())
1358                .collect();
1359            scope.add_alias(alias_name, name, Some(col_set));
1360            self.tables_resolved += 1;
1361        } else {
1362            self.push_error(SemanticErrorKind::UnresolvedTable {
1363                name: name.to_owned(),
1364            });
1365        }
1366    }
1367
1368    fn resolve_table_name(&mut self, name: &str, _scope: &Scope) {
1369        if self.schema.find_table(name).is_some() {
1370            self.tables_resolved += 1;
1371        } else {
1372            self.push_error(SemanticErrorKind::UnresolvedTable {
1373                name: name.to_owned(),
1374            });
1375        }
1376    }
1377
1378    fn resolve_function(&mut self, name: &str, args: &FunctionArgs, scope: &Scope) {
1379        // Resolve argument expressions.
1380        let actual = match args {
1381            FunctionArgs::Star => {
1382                if !name.eq_ignore_ascii_case("count") {
1383                    let expected = known_function_arity(name).unwrap_or(FunctionArity::Range(0, 1));
1384                    self.push_error(SemanticErrorKind::FunctionArityMismatch {
1385                        function: name.to_owned(),
1386                        expected,
1387                        actual: 1,
1388                    });
1389                }
1390                1 // `*` counts as 1 argument for arity purposes (e.g. count(*))
1391            }
1392            FunctionArgs::List(list) => {
1393                for arg in list {
1394                    self.resolve_expr(arg, scope);
1395                }
1396                list.len()
1397            }
1398        };
1399
1400        // Validate known function arity.
1401        if let Some(expected) = known_function_arity(name) {
1402            let valid = match &expected {
1403                FunctionArity::Exact(n) => actual == *n,
1404                FunctionArity::Range(lo, hi) => actual >= *lo && actual <= *hi,
1405                FunctionArity::Variadic => true,
1406                FunctionArity::VariadicMin(min) => actual >= *min,
1407            };
1408            if !valid {
1409                self.push_error(SemanticErrorKind::FunctionArityMismatch {
1410                    function: name.to_owned(),
1411                    expected,
1412                    actual,
1413                });
1414            }
1415        }
1416    }
1417
1418    fn push_error(&mut self, kind: SemanticErrorKind) {
1419        let message = match &kind {
1420            SemanticErrorKind::UnresolvedColumn { table, column } => {
1421                if let Some(t) = table {
1422                    format!("no such column: {t}.{column}")
1423                } else {
1424                    format!("no such column: {column}")
1425                }
1426            }
1427            SemanticErrorKind::AmbiguousColumn {
1428                column, candidates, ..
1429            } => {
1430                format!(
1431                    "ambiguous column name: {column} (candidates: {})",
1432                    candidates.join(", ")
1433                )
1434            }
1435            SemanticErrorKind::UnresolvedTable { name } => {
1436                format!("no such table: {name}")
1437            }
1438            SemanticErrorKind::DuplicateAlias { alias } => {
1439                format!("duplicate alias: {alias}")
1440            }
1441            SemanticErrorKind::FunctionArityMismatch {
1442                function,
1443                expected,
1444                actual,
1445            } => {
1446                format!(
1447                    "wrong number of arguments to function {function}: expected {expected:?}, got {actual}"
1448                )
1449            }
1450            SemanticErrorKind::NoTablesSpecifiedForStar => "no tables specified".to_string(),
1451            SemanticErrorKind::ImplicitTypeCoercion {
1452                from, to, context, ..
1453            } => {
1454                format!("implicit type coercion from {from:?} to {to:?} in {context}")
1455            }
1456        };
1457
1458        self.errors.push(SemanticError { kind, message });
1459    }
1460}
1461
1462// ---------------------------------------------------------------------------
1463// Known function arity table
1464// ---------------------------------------------------------------------------
1465
1466/// Returns the expected arity for a known SQLite function, if recognized.
1467#[must_use]
1468fn known_function_arity(name: &str) -> Option<FunctionArity> {
1469    match name.to_ascii_lowercase().as_str() {
1470        "random" | "changes" | "last_insert_rowid" | "total_changes" => {
1471            Some(FunctionArity::Exact(0))
1472        }
1473        // Aggregate (1-arg) and scalar (1-arg) functions
1474        "sum" | "total" | "avg" | "abs" | "hex" | "length" | "lower" | "upper" | "typeof"
1475        | "unicode" | "quote" | "zeroblob" | "soundex" | "likely" | "unlikely" | "randomblob" => {
1476            Some(FunctionArity::Exact(1))
1477        }
1478        "ifnull" | "nullif" | "instr" | "glob" | "likelihood" => Some(FunctionArity::Exact(2)),
1479        "iif" | "replace" => Some(FunctionArity::Exact(3)),
1480        "count" => Some(FunctionArity::Range(0, 1)),
1481        "group_concat" | "trim" | "ltrim" | "rtrim" | "round" => Some(FunctionArity::Range(1, 2)),
1482        "substr" | "substring" | "like" => Some(FunctionArity::Range(2, 3)),
1483        "coalesce" | "json_extract" => Some(FunctionArity::VariadicMin(2)),
1484        "json_remove" => Some(FunctionArity::VariadicMin(1)),
1485        "json_insert" | "json_replace" | "json_set" => Some(FunctionArity::VariadicMin(3)),
1486        // Variadic: aggregates, scalars, date/time, and JSON functions
1487        "min" | "max" | "printf" | "format" | "strftime" | "json" | "json_type" | "json_valid" => {
1488            Some(FunctionArity::VariadicMin(1))
1489        }
1490        "date" | "time" | "datetime" | "julianday" | "unixepoch" => {
1491            Some(FunctionArity::VariadicMin(0))
1492        }
1493        "char" | "json_array" | "json_object" => Some(FunctionArity::Variadic),
1494
1495        _ => None, // Unknown function — skip arity check.
1496    }
1497}
1498
1499// ---------------------------------------------------------------------------
1500// Tests
1501// ---------------------------------------------------------------------------
1502
1503#[cfg(test)]
1504#[path = "semantic_test.rs"]
1505mod semantic_test;
1506
1507#[cfg(test)]
1508mod tests {
1509    use super::*;
1510    use crate::parser::Parser;
1511
1512    fn make_schema() -> Schema {
1513        let mut schema = Schema::new();
1514        schema.add_table(TableDef {
1515            name: "users".to_owned(),
1516            columns: vec![
1517                ColumnDef {
1518                    name: "id".to_owned(),
1519                    affinity: TypeAffinity::Integer,
1520                    is_ipk: true,
1521                    not_null: true,
1522                },
1523                ColumnDef {
1524                    name: "name".to_owned(),
1525                    affinity: TypeAffinity::Text,
1526                    is_ipk: false,
1527                    not_null: true,
1528                },
1529                ColumnDef {
1530                    name: "email".to_owned(),
1531                    affinity: TypeAffinity::Text,
1532                    is_ipk: false,
1533                    not_null: false,
1534                },
1535            ],
1536            without_rowid: false,
1537            strict: false,
1538        });
1539        schema.add_table(TableDef {
1540            name: "orders".to_owned(),
1541            columns: vec![
1542                ColumnDef {
1543                    name: "id".to_owned(),
1544                    affinity: TypeAffinity::Integer,
1545                    is_ipk: true,
1546                    not_null: true,
1547                },
1548                ColumnDef {
1549                    name: "user_id".to_owned(),
1550                    affinity: TypeAffinity::Integer,
1551                    is_ipk: false,
1552                    not_null: true,
1553                },
1554                ColumnDef {
1555                    name: "amount".to_owned(),
1556                    affinity: TypeAffinity::Real,
1557                    is_ipk: false,
1558                    not_null: false,
1559                },
1560            ],
1561            without_rowid: false,
1562            strict: false,
1563        });
1564        schema
1565    }
1566
1567    fn parse_one(sql: &str) -> Statement {
1568        let mut p = Parser::from_sql(sql);
1569        let (stmts, errs) = p.parse_all();
1570        assert!(errs.is_empty(), "parse errors: {errs:?}");
1571        assert_eq!(stmts.len(), 1);
1572        stmts.into_iter().next().unwrap()
1573    }
1574
1575    // ── Schema tests ──
1576
1577    #[test]
1578    fn test_schema_find_table_case_insensitive() {
1579        let schema = make_schema();
1580        assert!(schema.find_table("users").is_some());
1581        assert!(schema.find_table("USERS").is_some());
1582        assert!(schema.find_table("Users").is_some());
1583        assert!(schema.find_table("nonexistent").is_none());
1584    }
1585
1586    #[test]
1587    fn test_table_find_column() {
1588        let schema = make_schema();
1589        let users = schema.find_table("users").unwrap();
1590        assert!(users.has_column("id"));
1591        assert!(users.has_column("ID"));
1592        assert!(!users.has_column("nonexistent"));
1593    }
1594
1595    #[test]
1596    fn test_table_rowid_alias() {
1597        let schema = make_schema();
1598        let users = schema.find_table("users").unwrap();
1599        assert!(users.is_rowid_alias("rowid"));
1600        assert!(users.is_rowid_alias("_rowid_"));
1601        assert!(users.is_rowid_alias("oid"));
1602        assert!(users.is_rowid_alias("id")); // IPK
1603        assert!(!users.is_rowid_alias("name"));
1604    }
1605
1606    #[test]
1607    fn test_table_rowid_alias_respects_shadowing() {
1608        let mut schema = Schema::new();
1609        schema.add_table(TableDef {
1610            name: "shadowed".to_owned(),
1611            columns: vec![
1612                ColumnDef {
1613                    name: "rowid".to_owned(),
1614                    affinity: TypeAffinity::Text,
1615                    is_ipk: false,
1616                    not_null: false,
1617                },
1618                ColumnDef {
1619                    name: "_rowid_".to_owned(),
1620                    affinity: TypeAffinity::Text,
1621                    is_ipk: false,
1622                    not_null: false,
1623                },
1624                ColumnDef {
1625                    name: "id".to_owned(),
1626                    affinity: TypeAffinity::Integer,
1627                    is_ipk: true,
1628                    not_null: false,
1629                },
1630            ],
1631            without_rowid: false,
1632            strict: false,
1633        });
1634
1635        let shadowed = schema.find_table("shadowed").unwrap();
1636        assert!(!shadowed.is_rowid_alias("rowid"));
1637        assert!(!shadowed.is_rowid_alias("_rowid_"));
1638        assert!(shadowed.is_rowid_alias("oid"));
1639        assert!(shadowed.is_rowid_alias("id"));
1640    }
1641
1642    #[test]
1643    fn test_table_rowid_alias_disabled_for_without_rowid_tables() {
1644        let mut schema = Schema::new();
1645        schema.add_table(TableDef {
1646            name: "wr".to_owned(),
1647            columns: vec![
1648                ColumnDef {
1649                    name: "id".to_owned(),
1650                    affinity: TypeAffinity::Integer,
1651                    is_ipk: true,
1652                    not_null: true,
1653                },
1654                ColumnDef {
1655                    name: "payload".to_owned(),
1656                    affinity: TypeAffinity::Text,
1657                    is_ipk: false,
1658                    not_null: false,
1659                },
1660            ],
1661            without_rowid: true,
1662            strict: false,
1663        });
1664
1665        let wr = schema.find_table("wr").unwrap();
1666        assert!(!wr.is_rowid_alias("rowid"));
1667        assert!(!wr.is_rowid_alias("_rowid_"));
1668        assert!(!wr.is_rowid_alias("oid"));
1669        assert!(!wr.is_rowid_alias("id"));
1670        assert!(wr.has_column("id"));
1671    }
1672
1673    // ── Scope tests ──
1674
1675    #[test]
1676    fn test_scope_resolve_qualified_column() {
1677        let mut scope = Scope::root();
1678        let schema = make_schema();
1679        let cols: HashSet<String> = ["id", "name", "email"]
1680            .iter()
1681            .map(ToString::to_string)
1682            .collect();
1683        scope.add_alias("u", "users", Some(cols));
1684
1685        assert_eq!(
1686            scope.resolve_column(&schema, Some("u"), "id"),
1687            ResolveResult::Resolved("u".to_string())
1688        );
1689        assert_eq!(
1690            scope.resolve_column(&schema, Some("u"), "nonexistent"),
1691            ResolveResult::ColumnNotFound
1692        );
1693        assert_eq!(
1694            scope.resolve_column(&schema, Some("x"), "id"),
1695            ResolveResult::TableNotFound
1696        );
1697    }
1698
1699    #[test]
1700    fn test_scope_resolve_unqualified_column() {
1701        let mut scope = Scope::root();
1702        let schema = make_schema();
1703        scope.add_alias(
1704            "u",
1705            "users",
1706            Some(["id", "name"].iter().map(ToString::to_string).collect()),
1707        );
1708        scope.add_alias(
1709            "o",
1710            "orders",
1711            Some(["id", "user_id"].iter().map(ToString::to_string).collect()),
1712        );
1713
1714        // "name" is unique → resolved to "u"
1715        assert_eq!(
1716            scope.resolve_column(&schema, None, "name"),
1717            ResolveResult::Resolved("u".to_string())
1718        );
1719
1720        // "user_id" is unique → resolved to "o"
1721        assert_eq!(
1722            scope.resolve_column(&schema, None, "user_id"),
1723            ResolveResult::Resolved("o".to_string())
1724        );
1725
1726        // "id" is ambiguous
1727        match scope.resolve_column(&schema, None, "id") {
1728            ResolveResult::Ambiguous(candidates) => {
1729                assert_eq!(candidates.len(), 2);
1730            }
1731            other => panic!("expected Ambiguous, got {other:?}"),
1732        }
1733
1734        // "nonexistent" not found
1735        assert_eq!(
1736            scope.resolve_column(&schema, None, "nonexistent"),
1737            ResolveResult::ColumnNotFound
1738        );
1739    }
1740
1741    #[test]
1742    fn test_scope_child_inherits_parent() {
1743        let mut parent = Scope::root();
1744        let schema = make_schema();
1745        parent.add_alias(
1746            "u",
1747            "users",
1748            Some(["id", "name"].iter().map(ToString::to_string).collect()),
1749        );
1750        let child = Scope::child(parent);
1751
1752        // Child can see parent's columns.
1753        assert_eq!(
1754            child.resolve_column(&schema, Some("u"), "id"),
1755            ResolveResult::Resolved("u".to_string())
1756        );
1757    }
1758
1759    // ── Resolver tests ──
1760
1761    #[test]
1762    fn test_resolve_simple_select() {
1763        let schema = make_schema();
1764        let stmt = parse_one("SELECT id, name FROM users");
1765        let mut resolver = Resolver::new(&schema);
1766        let errors = resolver.resolve_statement(&stmt);
1767        assert!(errors.is_empty(), "unexpected errors: {errors:?}");
1768        assert_eq!(resolver.tables_resolved, 1);
1769        assert_eq!(resolver.columns_bound, 2);
1770    }
1771
1772    #[test]
1773    fn test_resolve_qualified_column() {
1774        let schema = make_schema();
1775        let stmt = parse_one("SELECT u.id, u.name FROM users u");
1776        let mut resolver = Resolver::new(&schema);
1777        let errors = resolver.resolve_statement(&stmt);
1778        assert!(errors.is_empty(), "unexpected errors: {errors:?}");
1779        assert_eq!(resolver.tables_resolved, 1);
1780        assert_eq!(resolver.columns_bound, 2);
1781    }
1782
1783    #[test]
1784    fn test_resolve_join() {
1785        let schema = make_schema();
1786        let stmt =
1787            parse_one("SELECT u.name, o.amount FROM users u JOIN orders o ON u.id = o.user_id");
1788        let mut resolver = Resolver::new(&schema);
1789        let errors = resolver.resolve_statement(&stmt);
1790        assert!(errors.is_empty(), "unexpected errors: {errors:?}");
1791        assert_eq!(resolver.tables_resolved, 2);
1792        assert_eq!(resolver.columns_bound, 4); // u.name, o.amount, u.id, o.user_id
1793    }
1794
1795    #[test]
1796    fn test_resolve_join_using() {
1797        let schema = make_schema();
1798        let stmt = parse_one("SELECT u.name, o.amount FROM users u JOIN orders o USING (id)");
1799        let mut resolver = Resolver::new(&schema);
1800        let errors = resolver.resolve_statement(&stmt);
1801        assert!(errors.is_empty(), "unexpected errors: {errors:?}");
1802        assert_eq!(resolver.tables_resolved, 2);
1803        assert_eq!(resolver.columns_bound, 3); // u.name, o.amount, id (resolved redundantly but bounded once)
1804    }
1805
1806    #[test]
1807    fn test_resolve_unresolved_table() {
1808        let schema = make_schema();
1809        let stmt = parse_one("SELECT * FROM nonexistent");
1810        let mut resolver = Resolver::new(&schema);
1811        let errors = resolver.resolve_statement(&stmt);
1812        assert_eq!(errors.len(), 1);
1813        assert!(matches!(
1814            errors[0].kind,
1815            SemanticErrorKind::UnresolvedTable { .. }
1816        ));
1817    }
1818
1819    #[test]
1820    fn test_resolve_unresolved_column() {
1821        let schema = make_schema();
1822        let stmt = parse_one("SELECT nonexistent FROM users");
1823        let mut resolver = Resolver::new(&schema);
1824        let errors = resolver.resolve_statement(&stmt);
1825        assert_eq!(errors.len(), 1);
1826        assert!(matches!(
1827            errors[0].kind,
1828            SemanticErrorKind::UnresolvedColumn { .. }
1829        ));
1830    }
1831
1832    #[test]
1833    fn test_unaliased_subqueries() {
1834        let schema = make_schema();
1835        // Since there are two unknown subqueries and a is not known, "a" should be reported as unresolved
1836        let stmt = parse_one("SELECT a FROM (SELECT 1), (SELECT 2)");
1837        let mut resolver = Resolver::new(&schema);
1838        let errors = resolver.resolve_statement(&stmt);
1839        assert_eq!(errors.len(), 1, "Expected unresolved column error!");
1840        assert!(matches!(
1841            errors[0].kind,
1842            SemanticErrorKind::UnresolvedColumn { .. }
1843        ));
1844    }
1845
1846    #[test]
1847    fn test_resolve_ambiguous_column() {
1848        let schema = make_schema();
1849        let stmt = parse_one("SELECT id FROM users, orders");
1850        let mut resolver = Resolver::new(&schema);
1851        let errors = resolver.resolve_statement(&stmt);
1852        assert_eq!(errors.len(), 1);
1853        assert!(matches!(
1854            errors[0].kind,
1855            SemanticErrorKind::AmbiguousColumn { .. }
1856        ));
1857    }
1858
1859    #[test]
1860    fn test_resolve_where_clause() {
1861        let schema = make_schema();
1862        let stmt = parse_one("SELECT name FROM users WHERE id > 10");
1863        let mut resolver = Resolver::new(&schema);
1864        let errors = resolver.resolve_statement(&stmt);
1865        assert!(errors.is_empty(), "unexpected errors: {errors:?}");
1866        assert_eq!(resolver.columns_bound, 2); // name, id
1867    }
1868
1869    #[test]
1870    fn test_resolve_star_select() {
1871        let schema = make_schema();
1872        let stmt = parse_one("SELECT * FROM users");
1873        let mut resolver = Resolver::new(&schema);
1874        let errors = resolver.resolve_statement(&stmt);
1875        assert!(errors.is_empty(), "unexpected errors: {errors:?}");
1876        assert_eq!(resolver.tables_resolved, 1);
1877    }
1878
1879    #[test]
1880    fn test_resolve_star_in_subquery_without_tables() {
1881        let schema = make_schema();
1882        let stmt = parse_one("SELECT (SELECT *) FROM users");
1883        let mut resolver = Resolver::new(&schema);
1884        let errors = resolver.resolve_statement(&stmt);
1885        assert_eq!(errors.len(), 1);
1886        assert!(matches!(
1887            errors[0].kind,
1888            SemanticErrorKind::NoTablesSpecifiedForStar
1889        ));
1890    }
1891
1892    #[test]
1893    fn test_resolve_insert_checks_table() {
1894        let schema = make_schema();
1895        let stmt = parse_one("INSERT INTO nonexistent VALUES (1)");
1896        let mut resolver = Resolver::new(&schema);
1897        let errors = resolver.resolve_statement(&stmt);
1898        assert_eq!(errors.len(), 1);
1899        assert!(matches!(
1900            errors[0].kind,
1901            SemanticErrorKind::UnresolvedTable { .. }
1902        ));
1903    }
1904
1905    #[test]
1906    fn test_resolve_rowid_column() {
1907        let schema = make_schema();
1908        let stmt = parse_one("SELECT rowid, _rowid_, oid FROM users");
1909        let mut resolver = Resolver::new(&schema);
1910        let errors = resolver.resolve_statement(&stmt);
1911        assert!(errors.is_empty(), "unexpected errors: {errors:?}");
1912    }
1913
1914    #[test]
1915    fn test_order_by_select_alias_shadowing() {
1916        let mut schema = Schema::new();
1917        schema.add_table(TableDef {
1918            name: "tbl".to_owned(),
1919            columns: vec![ColumnDef {
1920                name: "a".to_owned(),
1921                affinity: TypeAffinity::Integer,
1922                is_ipk: false,
1923                not_null: false,
1924            }],
1925            without_rowid: false,
1926            strict: false,
1927        });
1928
1929        // "a" is both an alias and a column in the table.
1930        let stmt = parse_one("SELECT 1 AS a FROM tbl ORDER BY a");
1931        let mut resolver = Resolver::new(&schema);
1932        let errors = resolver.resolve_statement(&stmt);
1933
1934        // SQLite permits ORDER BY to resolve the SELECT-list alias here rather
1935        // than treating the alias/column name overlap as ambiguous.
1936        if !errors.is_empty() {
1937            panic!("Expected no errors, but got: {:?}", errors);
1938        }
1939    }
1940
1941    #[test]
1942    fn test_compound_order_by_can_resolve_alias_from_later_arm() {
1943        let schema = make_schema();
1944        let stmt = parse_one("SELECT 1 AS a UNION SELECT 2 AS b ORDER BY b");
1945        let mut resolver = Resolver::new(&schema);
1946        let errors = resolver.resolve_statement(&stmt);
1947        assert!(errors.is_empty(), "unexpected errors: {errors:?}");
1948    }
1949
1950    #[test]
1951    fn test_compound_order_by_can_match_output_expression_from_later_arm() {
1952        let mut schema = Schema::new();
1953        schema.add_table(TableDef {
1954            name: "tbl".to_owned(),
1955            columns: vec![
1956                ColumnDef {
1957                    name: "a".to_owned(),
1958                    affinity: TypeAffinity::Integer,
1959                    is_ipk: false,
1960                    not_null: false,
1961                },
1962                ColumnDef {
1963                    name: "b".to_owned(),
1964                    affinity: TypeAffinity::Integer,
1965                    is_ipk: false,
1966                    not_null: false,
1967                },
1968            ],
1969            without_rowid: false,
1970            strict: false,
1971        });
1972
1973        let stmt = parse_one("SELECT a + 1 FROM tbl UNION SELECT b + 1 FROM tbl ORDER BY b + 1");
1974        let mut resolver = Resolver::new(&schema);
1975        let errors = resolver.resolve_statement(&stmt);
1976        assert!(errors.is_empty(), "unexpected errors: {errors:?}");
1977    }
1978
1979    // ── Metrics tests ──
1980
1981    #[test]
1982    fn test_semantic_metrics() {
1983        // Delta-based assertion: never call reset_semantic_metrics() in tests
1984        // as it races with parallel tests.
1985        let before = semantic_metrics_snapshot();
1986        let schema = make_schema();
1987
1988        // Trigger an error.
1989        let stmt = parse_one("SELECT nonexistent FROM users");
1990        let mut resolver = Resolver::new(&schema);
1991        let _ = resolver.resolve_statement(&stmt);
1992
1993        let after = semantic_metrics_snapshot();
1994        assert!(
1995            after.fsqlite_semantic_errors_total > before.fsqlite_semantic_errors_total,
1996            "expected at least 1 new semantic error, before={}, after={}",
1997            before.fsqlite_semantic_errors_total,
1998            after.fsqlite_semantic_errors_total,
1999        );
2000    }
2001
2002    #[test]
2003    fn test_resolve_function_arity() {
2004        let schema = make_schema();
2005        let stmt = parse_one("SELECT sum(1, 2)");
2006        let mut resolver = Resolver::new(&schema);
2007        let errors = resolver.resolve_statement(&stmt);
2008        assert_eq!(errors.len(), 1);
2009        assert!(matches!(
2010            errors[0].kind,
2011            SemanticErrorKind::FunctionArityMismatch { .. }
2012        ));
2013    }
2014
2015    #[test]
2016    fn test_resolve_group_by_alias() {
2017        let schema = make_schema();
2018        let stmt = parse_one("SELECT id AS x FROM users GROUP BY x");
2019        let mut resolver = Resolver::new(&schema);
2020        let errors = resolver.resolve_statement(&stmt);
2021        assert!(errors.is_empty(), "unexpected errors: {errors:?}");
2022    }
2023
2024    #[test]
2025    fn test_resolve_escape_on_non_like() {
2026        let schema = make_schema();
2027        // LIKE with ESCAPE is valid.
2028        let stmt_like = parse_one("SELECT 1 LIKE 2 ESCAPE 3");
2029        let mut resolver_like = Resolver::new(&schema);
2030        let errors_like = resolver_like.resolve_statement(&stmt_like);
2031        assert!(errors_like.is_empty(), "LIKE ESCAPE should be valid");
2032
2033        // GLOB with ESCAPE is invalid.
2034        let stmt_glob = parse_one("SELECT 1 GLOB 2 ESCAPE 3");
2035        let mut resolver_glob = Resolver::new(&schema);
2036        let errors_glob = resolver_glob.resolve_statement(&stmt_glob);
2037        assert_eq!(errors_glob.len(), 1);
2038        assert!(matches!(
2039            errors_glob[0].kind,
2040            SemanticErrorKind::FunctionArityMismatch { .. }
2041        ));
2042    }
2043
2044    #[test]
2045    fn test_update_assignment_target_strict() {
2046        let schema = make_schema();
2047        // The outer query has a table `orders` with `amount`.
2048        // The inner query updates `users`.
2049        // `users` does not have `amount`.
2050        // If the assignment target incorrectly resolves against the outer scope, no error is emitted.
2051        // It SHOULD emit an error because `amount` is not in `users`.
2052        let stmt = parse_one("WITH cte(amount) AS (SELECT 1) UPDATE users SET amount = 1 FROM cte");
2053        let mut resolver = Resolver::new(&schema);
2054        let errors = resolver.resolve_statement(&stmt);
2055        assert_eq!(
2056            errors.len(),
2057            1,
2058            "Should report amount as unresolved for users table, instead got: {:?}",
2059            errors
2060        );
2061    }
2062
2063    #[test]
2064    fn test_rowid_resolution() {
2065        let schema = make_schema();
2066        let mut p = Parser::from_sql("SELECT rowid FROM users");
2067        let (stmts, _) = p.parse_all();
2068        let stmt = stmts.into_iter().next().unwrap();
2069        let mut resolver = Resolver::new(&schema);
2070        let errors = resolver.resolve_statement(&stmt);
2071        assert!(errors.is_empty(), "errors: {:?}", errors);
2072    }
2073}