Skip to main content

fsqlite_parser/
semantic.rs

1//! Semantic analysis: name resolution, type checking, and scope validation.
2//!
3//! Validates AST nodes against a schema to ensure:
4//! - Column references resolve to known tables/columns
5//! - Table aliases are unique within a query scope
6//! - Function arity matches known functions
7//! - CTE names are visible in the correct scope
8//! - Type affinity is tracked for expression results
9//!
10//! # Usage
11//!
12//! ```ignore
13//! let schema = Schema::new();
14//! schema.add_table(TableDef { name: "users", columns: vec![...] });
15//! let mut resolver = Resolver::new(&schema);
16//! let errors = resolver.resolve_statement(&stmt);
17//! ```
18
19use std::collections::{HashMap, HashSet};
20use std::sync::atomic::{AtomicU64, Ordering};
21
22use fsqlite_ast::{
23    ColumnRef, Expr, FromClause, FunctionArgs, InSet, JoinClause, JoinConstraint, ResultColumn,
24    SelectCore, SelectStatement, Statement, TableOrSubquery,
25};
26use fsqlite_types::TypeAffinity;
27
28// ---------------------------------------------------------------------------
29// Metrics
30// ---------------------------------------------------------------------------
31
32/// Monotonic counter of semantic errors encountered.
33static FSQLITE_SEMANTIC_ERRORS_TOTAL: AtomicU64 = AtomicU64::new(0);
34
35/// Point-in-time snapshot of semantic analysis metrics.
36#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
37pub struct SemanticMetricsSnapshot {
38    pub fsqlite_semantic_errors_total: u64,
39}
40
41/// Take a point-in-time snapshot of semantic metrics.
42#[must_use]
43pub fn semantic_metrics_snapshot() -> SemanticMetricsSnapshot {
44    SemanticMetricsSnapshot {
45        fsqlite_semantic_errors_total: FSQLITE_SEMANTIC_ERRORS_TOTAL.load(Ordering::Relaxed),
46    }
47}
48
49/// Reset semantic metrics.
50pub fn reset_semantic_metrics() {
51    FSQLITE_SEMANTIC_ERRORS_TOTAL.store(0, Ordering::Relaxed);
52}
53
54// ---------------------------------------------------------------------------
55// Schema types
56// ---------------------------------------------------------------------------
57
58/// A column definition in the schema.
59#[derive(Debug, Clone, PartialEq, Eq)]
60pub struct ColumnDef {
61    /// Column name (stored in original case).
62    pub name: String,
63    /// Type affinity determined from the DDL type name.
64    pub affinity: TypeAffinity,
65    /// Whether this column is an INTEGER PRIMARY KEY (rowid alias).
66    pub is_ipk: bool,
67    /// Whether this column has a NOT NULL constraint.
68    pub not_null: bool,
69}
70
71/// A table definition in the schema.
72#[derive(Debug, Clone)]
73pub struct TableDef {
74    /// Table name.
75    pub name: String,
76    /// Column definitions in declaration order.
77    pub columns: Vec<ColumnDef>,
78    /// Whether this is a WITHOUT ROWID table.
79    pub without_rowid: bool,
80    /// Whether this is a STRICT table.
81    pub strict: bool,
82}
83
84impl TableDef {
85    /// Find a column by name (case-insensitive).
86    #[must_use]
87    pub fn find_column(&self, name: &str) -> Option<&ColumnDef> {
88        self.columns
89            .iter()
90            .find(|c| c.name.eq_ignore_ascii_case(name))
91    }
92
93    /// Check if this table has a column with the given name (case-insensitive).
94    #[must_use]
95    pub fn has_column(&self, name: &str) -> bool {
96        self.find_column(name).is_some()
97    }
98
99    /// Check if a name is a rowid alias for this table.
100    #[must_use]
101    pub fn is_rowid_alias(&self, name: &str) -> bool {
102        if self.without_rowid {
103            return false;
104        }
105        let lower = name.to_ascii_lowercase();
106        matches!(lower.as_str(), "rowid" | "_rowid_" | "oid")
107            || self
108                .columns
109                .iter()
110                .any(|c| c.is_ipk && c.name.eq_ignore_ascii_case(name))
111    }
112}
113
114/// The database schema: a collection of table definitions.
115#[derive(Debug, Clone, Default)]
116pub struct Schema {
117    /// Tables by lowercase name.
118    tables: HashMap<String, TableDef>,
119}
120
121impl Schema {
122    /// Create an empty schema.
123    #[must_use]
124    pub fn new() -> Self {
125        Self::default()
126    }
127
128    /// Add a table definition.
129    pub fn add_table(&mut self, table: TableDef) {
130        self.tables.insert(table.name.to_ascii_lowercase(), table);
131    }
132
133    /// Look up a table by name (case-insensitive).
134    #[must_use]
135    pub fn find_table(&self, name: &str) -> Option<&TableDef> {
136        self.tables.get(&name.to_ascii_lowercase())
137    }
138
139    /// Number of tables in the schema.
140    #[must_use]
141    pub fn table_count(&self) -> usize {
142        self.tables.len()
143    }
144}
145
146// ---------------------------------------------------------------------------
147// Scope tracking
148// ---------------------------------------------------------------------------
149
150/// A name scope for query resolution. Scopes nest for subqueries and CTEs.
151#[derive(Debug, Clone)]
152pub struct Scope {
153    /// Table aliases visible in this scope: alias → table name.
154    aliases: HashMap<String, String>,
155    /// Columns visible from each alias: alias → set of column names.
156    /// None means the columns are unknown (CTE or subquery), so any column reference is optimistically accepted.
157    columns: HashMap<String, Option<HashSet<String>>>,
158    /// CTE names visible in this scope.
159    ctes: HashSet<String>,
160    /// Parent scope (for subquery nesting).
161    parent: Option<Box<Self>>,
162}
163
164impl Scope {
165    /// Create a root scope.
166    #[must_use]
167    pub fn root() -> Self {
168        Self {
169            aliases: HashMap::new(),
170            columns: HashMap::new(),
171            ctes: HashSet::new(),
172            parent: None,
173        }
174    }
175
176    /// Create a child scope (for subqueries).
177    #[must_use]
178    pub fn child(parent: Self) -> Self {
179        Self {
180            aliases: HashMap::new(),
181            columns: HashMap::new(),
182            ctes: HashSet::new(),
183            parent: Some(Box::new(parent)),
184        }
185    }
186
187    /// Register a table alias with its columns.
188    pub fn add_alias(&mut self, alias: &str, table_name: &str, columns: Option<HashSet<String>>) {
189        let key = alias.to_ascii_lowercase();
190        self.aliases.insert(key.clone(), table_name.to_owned());
191        self.columns.insert(key, columns);
192    }
193
194    /// Register a CTE name.
195    pub fn add_cte(&mut self, name: &str) {
196        self.ctes.insert(name.to_ascii_lowercase());
197    }
198
199    /// Check if an alias is visible in this scope (or parent scopes).
200    #[must_use]
201    pub fn has_alias(&self, alias: &str) -> bool {
202        let key = alias.to_ascii_lowercase();
203        if self.aliases.contains_key(&key) || self.ctes.contains(&key) {
204            return true;
205        }
206        self.parent.as_ref().is_some_and(|p| p.has_alias(alias))
207    }
208
209    /// Resolve a column reference: find which alias provides it.
210    ///
211    /// If `table_qualifier` is Some, checks only that alias.
212    /// If None, searches all visible aliases for the column name.
213    /// Returns the resolved (alias, column_name) or None.
214    #[must_use]
215    pub fn resolve_column(
216        &self,
217        table_qualifier: Option<&str>,
218        column_name: &str,
219    ) -> ResolveResult {
220        let col_lower = column_name.to_ascii_lowercase();
221
222        if let Some(qualifier) = table_qualifier {
223            let key = qualifier.to_ascii_lowercase();
224            if let Some(cols) = self.columns.get(&key) {
225                if cols.as_ref().is_none_or(|c| c.contains(&col_lower)) {
226                    return ResolveResult::Resolved(key);
227                }
228                return ResolveResult::ColumnNotFound;
229            }
230            // Check parent scope.
231            if let Some(ref parent) = self.parent {
232                return parent.resolve_column(table_qualifier, column_name);
233            }
234            return ResolveResult::TableNotFound;
235        }
236
237        // Unqualified: search all aliases in this scope.
238        let mut matches = Vec::new();
239        for (alias, cols) in &self.columns {
240            if cols.as_ref().is_none_or(|c| c.contains(&col_lower)) {
241                matches.push(alias.clone());
242            }
243        }
244
245        match matches.len() {
246            0 => {
247                // Check parent scope.
248                if let Some(ref parent) = self.parent {
249                    return parent.resolve_column(None, column_name);
250                }
251                ResolveResult::ColumnNotFound
252            }
253            1 => ResolveResult::Resolved(matches.into_iter().next().unwrap()),
254            _ => ResolveResult::Ambiguous(matches),
255        }
256    }
257
258    /// Number of aliases registered in this scope (not counting parents).
259    #[must_use]
260    pub fn alias_count(&self) -> usize {
261        self.aliases.len()
262    }
263}
264
265/// Result of resolving a column reference.
266#[derive(Debug, Clone, PartialEq, Eq)]
267pub enum ResolveResult {
268    /// Column resolved to the given alias.
269    Resolved(String),
270    /// The table qualifier was not found.
271    TableNotFound,
272    /// The column was not found in the specified table.
273    ColumnNotFound,
274    /// The column was found in multiple tables (ambiguous).
275    Ambiguous(Vec<String>),
276}
277
278// ---------------------------------------------------------------------------
279// Semantic errors
280// ---------------------------------------------------------------------------
281
282/// A semantic analysis error.
283#[derive(Debug, Clone, PartialEq, Eq)]
284pub struct SemanticError {
285    /// Error kind.
286    pub kind: SemanticErrorKind,
287    /// Human-readable message.
288    pub message: String,
289}
290
291/// Kinds of semantic errors.
292#[derive(Debug, Clone, PartialEq, Eq)]
293pub enum SemanticErrorKind {
294    /// Column reference could not be resolved.
295    UnresolvedColumn {
296        table: Option<String>,
297        column: String,
298    },
299    /// Column reference is ambiguous (exists in multiple tables).
300    AmbiguousColumn {
301        column: String,
302        candidates: Vec<String>,
303    },
304    /// Table or alias not found.
305    UnresolvedTable { name: String },
306    /// Duplicate alias in the same scope.
307    DuplicateAlias { alias: String },
308    /// Function called with wrong number of arguments.
309    FunctionArityMismatch {
310        function: String,
311        expected: FunctionArity,
312        actual: usize,
313    },
314    /// Type coercion warning (not fatal).
315    ImplicitTypeCoercion {
316        from: TypeAffinity,
317        to: TypeAffinity,
318        context: String,
319    },
320}
321
322/// Expected function arity.
323#[derive(Debug, Clone, PartialEq, Eq)]
324pub enum FunctionArity {
325    /// Exact number of arguments.
326    Exact(usize),
327    /// Range of acceptable argument counts.
328    Range(usize, usize),
329    /// Any number of arguments.
330    Variadic,
331}
332
333impl std::fmt::Display for SemanticError {
334    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
335        write!(f, "{}", self.message)
336    }
337}
338
339// ---------------------------------------------------------------------------
340// Resolver
341// ---------------------------------------------------------------------------
342
343/// The semantic analyzer / name resolver.
344///
345/// Given a `Schema` and an AST, validates all name references and collects
346/// errors. Uses scope tracking for nested queries and CTEs.
347pub struct Resolver<'a> {
348    schema: &'a Schema,
349    errors: Vec<SemanticError>,
350    tables_resolved: u64,
351    columns_bound: u64,
352}
353
354impl<'a> Resolver<'a> {
355    /// Create a new resolver for the given schema.
356    #[must_use]
357    pub fn new(schema: &'a Schema) -> Self {
358        Self {
359            schema,
360            errors: Vec::new(),
361            tables_resolved: 0,
362            columns_bound: 0,
363        }
364    }
365
366    /// Resolve all name references in a statement.
367    ///
368    /// Returns the list of semantic errors found.
369    pub fn resolve_statement(&mut self, stmt: &Statement) -> Vec<SemanticError> {
370        let span = tracing::debug_span!(
371            target: "fsqlite.parse",
372            "semantic_analysis",
373            tables_resolved = tracing::field::Empty,
374            columns_bound = tracing::field::Empty,
375            errors = tracing::field::Empty,
376        );
377        let _guard = span.enter();
378
379        self.errors.clear();
380        self.tables_resolved = 0;
381        self.columns_bound = 0;
382
383        let mut scope = Scope::root();
384        self.resolve_stmt_inner(stmt, &mut scope);
385
386        span.record("tables_resolved", self.tables_resolved);
387        span.record("columns_bound", self.columns_bound);
388        span.record("errors", self.errors.len() as u64);
389
390        // Record error metrics.
391        if !self.errors.is_empty() {
392            FSQLITE_SEMANTIC_ERRORS_TOTAL.fetch_add(self.errors.len() as u64, Ordering::Relaxed);
393        }
394
395        self.errors.clone()
396    }
397
398    fn resolve_stmt_inner(&mut self, stmt: &Statement, scope: &mut Scope) {
399        match stmt {
400            Statement::Select(select) => self.resolve_select(select, scope),
401            Statement::Insert(insert) => {
402                self.resolve_table_name(&insert.table.name, scope);
403            }
404            Statement::Update(update) => {
405                self.resolve_table_name(&update.table.name.name, scope);
406            }
407            Statement::Delete(delete) => {
408                self.resolve_table_name(&delete.table.name.name, scope);
409            }
410            // DDL and control statements don't need name resolution.
411            _ => {}
412        }
413    }
414
415    fn resolve_select(&mut self, select: &SelectStatement, scope: &mut Scope) {
416        // Register CTEs first (they are visible in the entire WITH scope).
417        if let Some(ref with) = select.with {
418            for cte in &with.ctes {
419                scope.add_cte(&cte.name);
420            }
421        }
422
423        // Resolve the primary select core.
424        self.resolve_select_core(&select.body.select, scope);
425
426        // Resolve any compound queries (UNION, INTERSECT, EXCEPT).
427        for (_op, core) in &select.body.compounds {
428            self.resolve_select_core(core, scope);
429        }
430    }
431
432    fn resolve_select_core(&mut self, core: &SelectCore, scope: &mut Scope) {
433        match core {
434            SelectCore::Select {
435                columns,
436                from,
437                where_clause,
438                group_by,
439                having,
440                ..
441            } => {
442                // Resolve FROM clause first (registers table aliases).
443                if let Some(from) = from {
444                    self.resolve_from(from, scope);
445                }
446
447                // Resolve column references in SELECT list.
448                for col in columns {
449                    self.resolve_result_column(col, scope);
450                }
451
452                // Resolve WHERE clause.
453                if let Some(where_expr) = where_clause {
454                    self.resolve_expr(where_expr, scope);
455                }
456
457                // Resolve GROUP BY.
458                for expr in group_by {
459                    self.resolve_expr(expr, scope);
460                }
461
462                // Resolve HAVING.
463                if let Some(having_expr) = having {
464                    self.resolve_expr(having_expr, scope);
465                }
466            }
467            SelectCore::Values(_) => {
468                // VALUES doesn't reference columns.
469            }
470        }
471    }
472
473    fn resolve_from(&mut self, from: &FromClause, scope: &mut Scope) {
474        self.resolve_table_or_subquery(&from.source, scope);
475
476        for join in &from.joins {
477            self.resolve_join(join, scope);
478        }
479    }
480
481    fn resolve_table_or_subquery(&mut self, tos: &TableOrSubquery, scope: &mut Scope) {
482        match tos {
483            TableOrSubquery::Table { name, alias, .. } => {
484                let table_name = &name.name;
485                let alias_name = alias.as_deref().unwrap_or(table_name);
486
487                // Check for duplicate alias.
488                if scope.has_alias(alias_name) {
489                    self.push_error(SemanticErrorKind::DuplicateAlias {
490                        alias: alias_name.to_owned(),
491                    });
492                }
493
494                // Resolve table name against schema or CTEs.
495                if scope.ctes.contains(&table_name.to_ascii_lowercase()) {
496                    // CTE reference — columns are unknown at this stage.
497                    scope.add_alias(alias_name, table_name, None);
498                    self.tables_resolved += 1;
499                } else if let Some(table_def) = self.schema.find_table(table_name) {
500                    let col_set: HashSet<String> = table_def
501                        .columns
502                        .iter()
503                        .map(|c| c.name.to_ascii_lowercase())
504                        .collect();
505                    scope.add_alias(alias_name, table_name, Some(col_set));
506                    self.tables_resolved += 1;
507                } else {
508                    self.push_error(SemanticErrorKind::UnresolvedTable {
509                        name: table_name.clone(),
510                    });
511                }
512            }
513            TableOrSubquery::Subquery { query, alias, .. } => {
514                // Resolve subquery in a child scope.
515                let mut child = Scope::child(scope.clone());
516                self.resolve_select(query, &mut child);
517
518                // Register the subquery alias with empty columns (we don't
519                // track subquery output columns at this stage).
520                if let Some(alias) = alias {
521                    scope.add_alias(alias, "<subquery>", None);
522                }
523            }
524            TableOrSubquery::TableFunction { name, alias, .. } => {
525                let alias_name = alias.as_deref().unwrap_or(name);
526                scope.add_alias(alias_name, name, None);
527                self.tables_resolved += 1;
528            }
529            TableOrSubquery::ParenJoin(inner_from) => {
530                self.resolve_from(inner_from, scope);
531            }
532        }
533    }
534
535    fn resolve_join(&mut self, join: &JoinClause, scope: &mut Scope) {
536        self.resolve_table_or_subquery(&join.table, scope);
537        if let Some(ref constraint) = join.constraint {
538            match constraint {
539                JoinConstraint::On(expr) => self.resolve_expr(expr, scope),
540                JoinConstraint::Using(cols) => {
541                    for col in cols {
542                        self.resolve_unqualified_column(col, scope);
543                    }
544                }
545            }
546        }
547    }
548
549    fn resolve_result_column(&mut self, col: &ResultColumn, scope: &Scope) {
550        match col {
551            ResultColumn::Star => {
552                // SELECT * is valid if there's at least one table in scope.
553                if scope.alias_count() == 0 && scope.parent.is_none() {
554                    tracing::warn!(
555                        target: "fsqlite.parse",
556                        "SELECT * with no tables in scope"
557                    );
558                }
559            }
560            ResultColumn::TableStar(table_name) => {
561                if !scope.has_alias(table_name) {
562                    self.push_error(SemanticErrorKind::UnresolvedTable {
563                        name: table_name.clone(),
564                    });
565                }
566            }
567            ResultColumn::Expr { expr, .. } => {
568                self.resolve_expr(expr, scope);
569            }
570        }
571    }
572
573    #[allow(clippy::too_many_lines)]
574    fn resolve_expr(&mut self, expr: &Expr, scope: &Scope) {
575        match expr {
576            Expr::Column(col_ref, _span) => {
577                self.resolve_column_ref(col_ref, scope);
578            }
579            Expr::BinaryOp { left, right, .. } => {
580                self.resolve_expr(left, scope);
581                self.resolve_expr(right, scope);
582            }
583            Expr::UnaryOp { expr: inner, .. }
584            | Expr::Cast { expr: inner, .. }
585            | Expr::Collate { expr: inner, .. }
586            | Expr::IsNull { expr: inner, .. } => {
587                self.resolve_expr(inner, scope);
588            }
589            Expr::Between {
590                expr: inner,
591                low,
592                high,
593                ..
594            } => {
595                self.resolve_expr(inner, scope);
596                self.resolve_expr(low, scope);
597                self.resolve_expr(high, scope);
598            }
599            Expr::In {
600                expr: inner, set, ..
601            } => {
602                self.resolve_expr(inner, scope);
603                match set {
604                    InSet::List(items) => {
605                        for item in items {
606                            self.resolve_expr(item, scope);
607                        }
608                    }
609                    InSet::Subquery(select) => {
610                        let mut child = Scope::child(scope.clone());
611                        self.resolve_select(select, &mut child);
612                    }
613                    InSet::Table(name) => {
614                        self.resolve_table_name(&name.name, scope);
615                    }
616                }
617            }
618            Expr::Like {
619                expr: inner,
620                pattern,
621                escape,
622                ..
623            } => {
624                self.resolve_expr(inner, scope);
625                self.resolve_expr(pattern, scope);
626                if let Some(esc) = escape {
627                    self.resolve_expr(esc, scope);
628                }
629            }
630            Expr::Subquery(select, _)
631            | Expr::Exists {
632                subquery: select, ..
633            } => {
634                let mut child = Scope::child(scope.clone());
635                self.resolve_select(select, &mut child);
636            }
637            Expr::FunctionCall {
638                name, args, filter, ..
639            } => {
640                let arg_slice: &[Expr] = match args {
641                    FunctionArgs::Star => &[],
642                    FunctionArgs::List(list) => list,
643                };
644                self.resolve_function(name, arg_slice, scope);
645                if let Some(filter) = filter {
646                    self.resolve_expr(filter, scope);
647                }
648            }
649            Expr::Case {
650                operand,
651                whens,
652                else_expr,
653                ..
654            } => {
655                if let Some(op) = operand {
656                    self.resolve_expr(op, scope);
657                }
658                for (when_expr, then_expr) in whens {
659                    self.resolve_expr(when_expr, scope);
660                    self.resolve_expr(then_expr, scope);
661                }
662                if let Some(else_e) = else_expr {
663                    self.resolve_expr(else_e, scope);
664                }
665            }
666            Expr::JsonAccess {
667                expr: inner, path, ..
668            } => {
669                self.resolve_expr(inner, scope);
670                self.resolve_expr(path, scope);
671            }
672            Expr::RowValue(exprs, _) => {
673                for e in exprs {
674                    self.resolve_expr(e, scope);
675                }
676            }
677            // Literals, placeholders, and RAISE don't need resolution.
678            Expr::Literal(_, _) | Expr::Placeholder(_, _) | Expr::Raise { .. } => {}
679        }
680    }
681
682    fn resolve_column_ref(&mut self, col_ref: &ColumnRef, scope: &Scope) {
683        let result = scope.resolve_column(col_ref.table.as_deref(), &col_ref.column);
684        match result {
685            ResolveResult::Resolved(_) => {
686                self.columns_bound += 1;
687            }
688            ResolveResult::TableNotFound => {
689                tracing::error!(
690                    target: "fsqlite.parse",
691                    table = ?col_ref.table,
692                    column = %col_ref.column,
693                    "unresolvable table reference"
694                );
695                self.push_error(SemanticErrorKind::UnresolvedColumn {
696                    table: col_ref.table.clone(),
697                    column: col_ref.column.clone(),
698                });
699            }
700            ResolveResult::ColumnNotFound => {
701                tracing::error!(
702                    target: "fsqlite.parse",
703                    table = ?col_ref.table,
704                    column = %col_ref.column,
705                    "unresolvable column reference"
706                );
707                self.push_error(SemanticErrorKind::UnresolvedColumn {
708                    table: col_ref.table.clone(),
709                    column: col_ref.column.clone(),
710                });
711            }
712            ResolveResult::Ambiguous(candidates) => {
713                tracing::error!(
714                    target: "fsqlite.parse",
715                    column = %col_ref.column,
716                    candidates = ?candidates,
717                    "ambiguous column reference"
718                );
719                self.push_error(SemanticErrorKind::AmbiguousColumn {
720                    column: col_ref.column.clone(),
721                    candidates,
722                });
723            }
724        }
725    }
726
727    fn resolve_unqualified_column(&mut self, name: &str, scope: &Scope) {
728        let result = scope.resolve_column(None, name);
729        match result {
730            ResolveResult::Resolved(_) => {
731                self.columns_bound += 1;
732            }
733            ResolveResult::ColumnNotFound | ResolveResult::TableNotFound => {
734                self.push_error(SemanticErrorKind::UnresolvedColumn {
735                    table: None,
736                    column: name.to_owned(),
737                });
738            }
739            ResolveResult::Ambiguous(candidates) => {
740                self.push_error(SemanticErrorKind::AmbiguousColumn {
741                    column: name.to_owned(),
742                    candidates,
743                });
744            }
745        }
746    }
747
748    fn resolve_table_name(&mut self, name: &str, scope: &Scope) {
749        if scope.ctes.contains(&name.to_ascii_lowercase()) || self.schema.find_table(name).is_some()
750        {
751            self.tables_resolved += 1;
752        } else {
753            self.push_error(SemanticErrorKind::UnresolvedTable {
754                name: name.to_owned(),
755            });
756        }
757    }
758
759    fn resolve_function(&mut self, name: &str, args: &[Expr], scope: &Scope) {
760        // Resolve argument expressions.
761        for arg in args {
762            self.resolve_expr(arg, scope);
763        }
764
765        // Validate known function arity.
766        if let Some(expected) = known_function_arity(name) {
767            let actual = args.len();
768            let valid = match &expected {
769                FunctionArity::Exact(n) => actual == *n,
770                FunctionArity::Range(lo, hi) => actual >= *lo && actual <= *hi,
771                FunctionArity::Variadic => true,
772            };
773            if !valid {
774                self.push_error(SemanticErrorKind::FunctionArityMismatch {
775                    function: name.to_owned(),
776                    expected,
777                    actual,
778                });
779            }
780        }
781    }
782
783    fn push_error(&mut self, kind: SemanticErrorKind) {
784        let message = match &kind {
785            SemanticErrorKind::UnresolvedColumn { table, column } => {
786                if let Some(t) = table {
787                    format!("no such column: {t}.{column}")
788                } else {
789                    format!("no such column: {column}")
790                }
791            }
792            SemanticErrorKind::AmbiguousColumn {
793                column, candidates, ..
794            } => {
795                format!(
796                    "ambiguous column name: {column} (candidates: {})",
797                    candidates.join(", ")
798                )
799            }
800            SemanticErrorKind::UnresolvedTable { name } => {
801                format!("no such table: {name}")
802            }
803            SemanticErrorKind::DuplicateAlias { alias } => {
804                format!("duplicate alias: {alias}")
805            }
806            SemanticErrorKind::FunctionArityMismatch {
807                function,
808                expected,
809                actual,
810            } => {
811                format!(
812                    "wrong number of arguments to function {function}: expected {expected:?}, got {actual}"
813                )
814            }
815            SemanticErrorKind::ImplicitTypeCoercion {
816                from, to, context, ..
817            } => {
818                format!("implicit type coercion from {from:?} to {to:?} in {context}")
819            }
820        };
821
822        self.errors.push(SemanticError { kind, message });
823    }
824}
825
826// ---------------------------------------------------------------------------
827// Known function arity table
828// ---------------------------------------------------------------------------
829
830/// Returns the expected arity for a known SQLite function, if recognized.
831#[must_use]
832fn known_function_arity(name: &str) -> Option<FunctionArity> {
833    match name.to_ascii_lowercase().as_str() {
834        "random" | "changes" | "last_insert_rowid" | "total_changes" => {
835            Some(FunctionArity::Exact(0))
836        }
837        // Aggregate (1-arg) and scalar (1-arg) functions
838        "sum" | "total" | "avg" | "abs" | "hex" | "length" | "lower" | "upper" | "typeof"
839        | "unicode" | "quote" | "zeroblob" | "soundex" | "likelihood" | "randomblob" => {
840            Some(FunctionArity::Exact(1))
841        }
842        "ifnull" | "nullif" | "instr" | "glob" => Some(FunctionArity::Exact(2)),
843        "iif" | "replace" => Some(FunctionArity::Exact(3)),
844        "count" => Some(FunctionArity::Range(0, 1)),
845        "group_concat" | "trim" | "ltrim" | "rtrim" => Some(FunctionArity::Range(1, 2)),
846        "substr" | "substring" | "like" => Some(FunctionArity::Range(2, 3)),
847        // Variadic: aggregates, scalars, date/time, and JSON functions
848        "min" | "max" | "coalesce" | "printf" | "format" | "char" | "date" | "time"
849        | "datetime" | "julianday" | "strftime" | "unixepoch" | "json" | "json_array"
850        | "json_object" | "json_type" | "json_valid" | "json_extract" | "json_insert"
851        | "json_replace" | "json_set" | "json_remove" => Some(FunctionArity::Variadic),
852
853        _ => None, // Unknown function — skip arity check.
854    }
855}
856
857// ---------------------------------------------------------------------------
858// Tests
859// ---------------------------------------------------------------------------
860
861#[cfg(test)]
862mod tests {
863    use super::*;
864    use crate::parser::Parser;
865
866    fn make_schema() -> Schema {
867        let mut schema = Schema::new();
868        schema.add_table(TableDef {
869            name: "users".to_owned(),
870            columns: vec![
871                ColumnDef {
872                    name: "id".to_owned(),
873                    affinity: TypeAffinity::Integer,
874                    is_ipk: true,
875                    not_null: true,
876                },
877                ColumnDef {
878                    name: "name".to_owned(),
879                    affinity: TypeAffinity::Text,
880                    is_ipk: false,
881                    not_null: true,
882                },
883                ColumnDef {
884                    name: "email".to_owned(),
885                    affinity: TypeAffinity::Text,
886                    is_ipk: false,
887                    not_null: false,
888                },
889            ],
890            without_rowid: false,
891            strict: false,
892        });
893        schema.add_table(TableDef {
894            name: "orders".to_owned(),
895            columns: vec![
896                ColumnDef {
897                    name: "id".to_owned(),
898                    affinity: TypeAffinity::Integer,
899                    is_ipk: true,
900                    not_null: true,
901                },
902                ColumnDef {
903                    name: "user_id".to_owned(),
904                    affinity: TypeAffinity::Integer,
905                    is_ipk: false,
906                    not_null: true,
907                },
908                ColumnDef {
909                    name: "amount".to_owned(),
910                    affinity: TypeAffinity::Real,
911                    is_ipk: false,
912                    not_null: false,
913                },
914            ],
915            without_rowid: false,
916            strict: false,
917        });
918        schema
919    }
920
921    fn parse_one(sql: &str) -> Statement {
922        let mut p = Parser::from_sql(sql);
923        let (stmts, errs) = p.parse_all();
924        assert!(errs.is_empty(), "parse errors: {errs:?}");
925        assert_eq!(stmts.len(), 1);
926        stmts.into_iter().next().unwrap()
927    }
928
929    // ── Schema tests ──
930
931    #[test]
932    fn test_schema_find_table_case_insensitive() {
933        let schema = make_schema();
934        assert!(schema.find_table("users").is_some());
935        assert!(schema.find_table("USERS").is_some());
936        assert!(schema.find_table("Users").is_some());
937        assert!(schema.find_table("nonexistent").is_none());
938    }
939
940    #[test]
941    fn test_table_find_column() {
942        let schema = make_schema();
943        let users = schema.find_table("users").unwrap();
944        assert!(users.has_column("id"));
945        assert!(users.has_column("ID"));
946        assert!(!users.has_column("nonexistent"));
947    }
948
949    #[test]
950    fn test_table_rowid_alias() {
951        let schema = make_schema();
952        let users = schema.find_table("users").unwrap();
953        assert!(users.is_rowid_alias("rowid"));
954        assert!(users.is_rowid_alias("_rowid_"));
955        assert!(users.is_rowid_alias("oid"));
956        assert!(users.is_rowid_alias("id")); // IPK
957        assert!(!users.is_rowid_alias("name"));
958    }
959
960    // ── Scope tests ──
961
962    #[test]
963    fn test_scope_resolve_qualified_column() {
964        let mut scope = Scope::root();
965        let cols: HashSet<String> = ["id", "name", "email"]
966            .iter()
967            .map(ToString::to_string)
968            .collect();
969        scope.add_alias("u", "users", Some(cols));
970
971        assert_eq!(
972            scope.resolve_column(Some("u"), "id"),
973            ResolveResult::Resolved("u".to_string())
974        );
975        assert_eq!(
976            scope.resolve_column(Some("u"), "nonexistent"),
977            ResolveResult::ColumnNotFound
978        );
979        assert_eq!(
980            scope.resolve_column(Some("x"), "id"),
981            ResolveResult::TableNotFound
982        );
983    }
984
985    #[test]
986    fn test_scope_resolve_unqualified_column() {
987        let mut scope = Scope::root();
988        scope.add_alias(
989            "u",
990            "users",
991            Some(["id", "name"].iter().map(ToString::to_string).collect()),
992        );
993        scope.add_alias(
994            "o",
995            "orders",
996            Some(["id", "user_id"].iter().map(ToString::to_string).collect()),
997        );
998
999        // "name" is unique → resolved to "u"
1000        assert_eq!(
1001            scope.resolve_column(None, "name"),
1002            ResolveResult::Resolved("u".to_string())
1003        );
1004
1005        // "user_id" is unique → resolved to "o"
1006        assert_eq!(
1007            scope.resolve_column(None, "user_id"),
1008            ResolveResult::Resolved("o".to_string())
1009        );
1010
1011        // "id" is ambiguous
1012        match scope.resolve_column(None, "id") {
1013            ResolveResult::Ambiguous(candidates) => {
1014                assert_eq!(candidates.len(), 2);
1015            }
1016            other => panic!("expected Ambiguous, got {other:?}"),
1017        }
1018
1019        // "nonexistent" not found
1020        assert_eq!(
1021            scope.resolve_column(None, "nonexistent"),
1022            ResolveResult::ColumnNotFound
1023        );
1024    }
1025
1026    #[test]
1027    fn test_scope_child_inherits_parent() {
1028        let mut parent = Scope::root();
1029        parent.add_alias(
1030            "u",
1031            "users",
1032            Some(["id", "name"].iter().map(ToString::to_string).collect()),
1033        );
1034        let child = Scope::child(parent);
1035
1036        // Child can see parent's columns.
1037        assert_eq!(
1038            child.resolve_column(Some("u"), "id"),
1039            ResolveResult::Resolved("u".to_string())
1040        );
1041    }
1042
1043    // ── Resolver tests ──
1044
1045    #[test]
1046    fn test_resolve_simple_select() {
1047        let schema = make_schema();
1048        let stmt = parse_one("SELECT id, name FROM users");
1049        let mut resolver = Resolver::new(&schema);
1050        let errors = resolver.resolve_statement(&stmt);
1051        assert!(errors.is_empty(), "unexpected errors: {errors:?}");
1052        assert_eq!(resolver.tables_resolved, 1);
1053        assert_eq!(resolver.columns_bound, 2);
1054    }
1055
1056    #[test]
1057    fn test_resolve_qualified_column() {
1058        let schema = make_schema();
1059        let stmt = parse_one("SELECT u.id, u.name FROM users u");
1060        let mut resolver = Resolver::new(&schema);
1061        let errors = resolver.resolve_statement(&stmt);
1062        assert!(errors.is_empty(), "unexpected errors: {errors:?}");
1063        assert_eq!(resolver.tables_resolved, 1);
1064        assert_eq!(resolver.columns_bound, 2);
1065    }
1066
1067    #[test]
1068    fn test_resolve_join() {
1069        let schema = make_schema();
1070        let stmt =
1071            parse_one("SELECT u.name, o.amount FROM users u JOIN orders o ON u.id = o.user_id");
1072        let mut resolver = Resolver::new(&schema);
1073        let errors = resolver.resolve_statement(&stmt);
1074        assert!(errors.is_empty(), "unexpected errors: {errors:?}");
1075        assert_eq!(resolver.tables_resolved, 2);
1076        assert_eq!(resolver.columns_bound, 4); // u.name, o.amount, u.id, o.user_id
1077    }
1078
1079    #[test]
1080    fn test_resolve_unresolved_table() {
1081        let schema = make_schema();
1082        let stmt = parse_one("SELECT * FROM nonexistent");
1083        let mut resolver = Resolver::new(&schema);
1084        let errors = resolver.resolve_statement(&stmt);
1085        assert_eq!(errors.len(), 1);
1086        assert!(matches!(
1087            errors[0].kind,
1088            SemanticErrorKind::UnresolvedTable { .. }
1089        ));
1090    }
1091
1092    #[test]
1093    fn test_resolve_unresolved_column() {
1094        let schema = make_schema();
1095        let stmt = parse_one("SELECT nonexistent FROM users");
1096        let mut resolver = Resolver::new(&schema);
1097        let errors = resolver.resolve_statement(&stmt);
1098        assert_eq!(errors.len(), 1);
1099        assert!(matches!(
1100            errors[0].kind,
1101            SemanticErrorKind::UnresolvedColumn { .. }
1102        ));
1103    }
1104
1105    #[test]
1106    fn test_resolve_ambiguous_column() {
1107        let schema = make_schema();
1108        let stmt = parse_one("SELECT id FROM users, orders");
1109        let mut resolver = Resolver::new(&schema);
1110        let errors = resolver.resolve_statement(&stmt);
1111        assert_eq!(errors.len(), 1);
1112        assert!(matches!(
1113            errors[0].kind,
1114            SemanticErrorKind::AmbiguousColumn { .. }
1115        ));
1116    }
1117
1118    #[test]
1119    fn test_resolve_where_clause() {
1120        let schema = make_schema();
1121        let stmt = parse_one("SELECT name FROM users WHERE id > 10");
1122        let mut resolver = Resolver::new(&schema);
1123        let errors = resolver.resolve_statement(&stmt);
1124        assert!(errors.is_empty(), "unexpected errors: {errors:?}");
1125        assert_eq!(resolver.columns_bound, 2); // name, id
1126    }
1127
1128    #[test]
1129    fn test_resolve_star_select() {
1130        let schema = make_schema();
1131        let stmt = parse_one("SELECT * FROM users");
1132        let mut resolver = Resolver::new(&schema);
1133        let errors = resolver.resolve_statement(&stmt);
1134        assert!(errors.is_empty(), "unexpected errors: {errors:?}");
1135        assert_eq!(resolver.tables_resolved, 1);
1136    }
1137
1138    #[test]
1139    fn test_resolve_insert_checks_table() {
1140        let schema = make_schema();
1141        let stmt = parse_one("INSERT INTO nonexistent VALUES (1)");
1142        let mut resolver = Resolver::new(&schema);
1143        let errors = resolver.resolve_statement(&stmt);
1144        assert_eq!(errors.len(), 1);
1145        assert!(matches!(
1146            errors[0].kind,
1147            SemanticErrorKind::UnresolvedTable { .. }
1148        ));
1149    }
1150
1151    // ── Metrics tests ──
1152
1153    #[test]
1154    fn test_semantic_metrics() {
1155        reset_semantic_metrics();
1156        let schema = make_schema();
1157
1158        // Trigger an error.
1159        let stmt = parse_one("SELECT nonexistent FROM users");
1160        let mut resolver = Resolver::new(&schema);
1161        let _ = resolver.resolve_statement(&stmt);
1162
1163        let snap = semantic_metrics_snapshot();
1164        assert!(snap.fsqlite_semantic_errors_total >= 1);
1165    }
1166}