Skip to main content

flowscope_core/linter/rules/
am_006.rs

1//! LINT_AM_006: Ambiguous column references.
2//!
3//! SQLFluff AM06 parity: enforce consistent `GROUP BY` / `ORDER BY` reference
4//! styles (implicit numeric position vs explicit expressions/identifiers).
5
6use crate::linter::config::LintConfig;
7use crate::linter::rule::{LintContext, LintRule};
8use crate::types::{issue_codes, Issue};
9use sqlparser::ast::{
10    CreateView, Expr, FunctionArg, FunctionArgExpr, FunctionArguments, GroupByExpr,
11    GroupByWithModifier, OrderByKind, Query, Select, SelectItem, SetExpr, Statement, TableFactor,
12    Value, WindowType,
13};
14
15use super::semantic_helpers::join_on_expr;
16
17pub struct AmbiguousColumnRefs {
18    style_policy: GroupByAndOrderByStylePolicy,
19}
20
21#[derive(Clone, Copy, PartialEq, Eq)]
22enum ReferenceStyle {
23    Explicit,
24    Implicit,
25}
26
27#[derive(Clone, Copy, PartialEq, Eq)]
28enum GroupByAndOrderByStylePolicy {
29    Consistent,
30    Explicit,
31    Implicit,
32}
33
34impl GroupByAndOrderByStylePolicy {
35    fn from_config(config: &LintConfig) -> Self {
36        match config
37            .rule_option_str(issue_codes::LINT_AM_006, "group_by_and_order_by_style")
38            .unwrap_or("consistent")
39            .to_ascii_lowercase()
40            .as_str()
41        {
42            "explicit" => Self::Explicit,
43            "implicit" => Self::Implicit,
44            _ => Self::Consistent,
45        }
46    }
47}
48
49impl AmbiguousColumnRefs {
50    pub fn from_config(config: &LintConfig) -> Self {
51        Self {
52            style_policy: GroupByAndOrderByStylePolicy::from_config(config),
53        }
54    }
55}
56
57impl Default for AmbiguousColumnRefs {
58    fn default() -> Self {
59        Self {
60            style_policy: GroupByAndOrderByStylePolicy::Consistent,
61        }
62    }
63}
64
65impl LintRule for AmbiguousColumnRefs {
66    fn code(&self) -> &'static str {
67        issue_codes::LINT_AM_006
68    }
69
70    fn name(&self) -> &'static str {
71        "Ambiguous column references"
72    }
73
74    fn description(&self) -> &'static str {
75        "Inconsistent column references in 'GROUP BY/ORDER BY' clauses."
76    }
77
78    fn check(&self, statement: &Statement, ctx: &LintContext) -> Vec<Issue> {
79        let mut issues = Vec::new();
80        let mut prior_style = None;
81        check_statement(
82            statement,
83            ctx.statement_index,
84            self.style_policy,
85            &mut prior_style,
86            &mut issues,
87        );
88        issues
89    }
90}
91
92fn check_statement(
93    statement: &Statement,
94    statement_index: usize,
95    style_policy: GroupByAndOrderByStylePolicy,
96    prior_style: &mut Option<ReferenceStyle>,
97    issues: &mut Vec<Issue>,
98) {
99    match statement {
100        Statement::Query(query) => {
101            check_query(query, statement_index, style_policy, prior_style, issues)
102        }
103        Statement::Insert(insert) => {
104            if let Some(source) = &insert.source {
105                check_query(source, statement_index, style_policy, prior_style, issues);
106            }
107        }
108        Statement::CreateView(CreateView { query, .. }) => {
109            check_query(query, statement_index, style_policy, prior_style, issues);
110        }
111        Statement::CreateTable(create) => {
112            if let Some(query) = &create.query {
113                check_query(query, statement_index, style_policy, prior_style, issues);
114            }
115        }
116        _ => {}
117    }
118}
119
120fn check_query(
121    query: &Query,
122    statement_index: usize,
123    style_policy: GroupByAndOrderByStylePolicy,
124    prior_style: &mut Option<ReferenceStyle>,
125    issues: &mut Vec<Issue>,
126) {
127    if let Some(with) = &query.with {
128        for cte in &with.cte_tables {
129            check_query(
130                &cte.query,
131                statement_index,
132                style_policy,
133                prior_style,
134                issues,
135            );
136        }
137    }
138
139    check_set_expr(
140        &query.body,
141        statement_index,
142        style_policy,
143        prior_style,
144        issues,
145    );
146
147    if let Some(order_by) = &query.order_by {
148        let mut has_explicit = false;
149        let mut has_implicit = false;
150
151        if let OrderByKind::Expressions(order_exprs) = &order_by.kind {
152            for order_expr in order_exprs {
153                classify_reference_style_in_expr(
154                    &order_expr.expr,
155                    &mut has_explicit,
156                    &mut has_implicit,
157                );
158            }
159        }
160
161        apply_consistent_style_policy(
162            has_explicit,
163            has_implicit,
164            statement_index,
165            style_policy,
166            prior_style,
167            issues,
168        );
169    }
170}
171
172fn check_set_expr(
173    set_expr: &SetExpr,
174    statement_index: usize,
175    style_policy: GroupByAndOrderByStylePolicy,
176    prior_style: &mut Option<ReferenceStyle>,
177    issues: &mut Vec<Issue>,
178) {
179    match set_expr {
180        SetExpr::Select(select) => {
181            check_select(select, statement_index, style_policy, prior_style, issues)
182        }
183        SetExpr::Query(query) => {
184            check_query(query, statement_index, style_policy, prior_style, issues)
185        }
186        SetExpr::SetOperation { left, right, .. } => {
187            check_set_expr(left, statement_index, style_policy, prior_style, issues);
188            check_set_expr(right, statement_index, style_policy, prior_style, issues);
189        }
190        SetExpr::Insert(statement)
191        | SetExpr::Update(statement)
192        | SetExpr::Delete(statement)
193        | SetExpr::Merge(statement) => {
194            check_statement(
195                statement,
196                statement_index,
197                style_policy,
198                prior_style,
199                issues,
200            );
201        }
202        _ => {}
203    }
204}
205
206fn check_select(
207    select: &Select,
208    statement_index: usize,
209    style_policy: GroupByAndOrderByStylePolicy,
210    prior_style: &mut Option<ReferenceStyle>,
211    issues: &mut Vec<Issue>,
212) {
213    // Traverse subqueries in approximate lexical order before evaluating this
214    // SELECT's GROUP BY clause, matching SQLFluff's clause-order precedence.
215    for item in &select.projection {
216        if let SelectItem::UnnamedExpr(expr) | SelectItem::ExprWithAlias { expr, .. } = item {
217            visit_subqueries_in_expr(expr, statement_index, style_policy, prior_style, issues);
218        }
219    }
220
221    if let Some(prewhere) = &select.prewhere {
222        visit_subqueries_in_expr(prewhere, statement_index, style_policy, prior_style, issues);
223    }
224
225    for table in &select.from {
226        check_table_factor(
227            &table.relation,
228            statement_index,
229            style_policy,
230            prior_style,
231            issues,
232        );
233        for join in &table.joins {
234            check_table_factor(
235                &join.relation,
236                statement_index,
237                style_policy,
238                prior_style,
239                issues,
240            );
241            if let Some(on_expr) = join_on_expr(&join.join_operator) {
242                visit_subqueries_in_expr(
243                    on_expr,
244                    statement_index,
245                    style_policy,
246                    prior_style,
247                    issues,
248                );
249            }
250        }
251    }
252
253    if let Some(selection) = &select.selection {
254        visit_subqueries_in_expr(
255            selection,
256            statement_index,
257            style_policy,
258            prior_style,
259            issues,
260        );
261    }
262
263    let mut has_explicit = false;
264    let mut has_implicit = false;
265    if let GroupByExpr::Expressions(exprs, modifiers) = &select.group_by {
266        for expr in exprs {
267            classify_reference_style_in_expr(expr, &mut has_explicit, &mut has_implicit);
268        }
269
270        for modifier in modifiers {
271            classify_reference_style_in_group_modifier(
272                modifier,
273                &mut has_explicit,
274                &mut has_implicit,
275            );
276        }
277    }
278    apply_consistent_style_policy(
279        has_explicit,
280        has_implicit,
281        statement_index,
282        style_policy,
283        prior_style,
284        issues,
285    );
286
287    if let Some(having) = &select.having {
288        visit_subqueries_in_expr(having, statement_index, style_policy, prior_style, issues);
289    }
290    if let Some(qualify) = &select.qualify {
291        visit_subqueries_in_expr(qualify, statement_index, style_policy, prior_style, issues);
292    }
293
294    for sort_expr in &select.sort_by {
295        visit_subqueries_in_expr(
296            &sort_expr.expr,
297            statement_index,
298            style_policy,
299            prior_style,
300            issues,
301        );
302    }
303}
304
305fn check_table_factor(
306    table_factor: &TableFactor,
307    statement_index: usize,
308    style_policy: GroupByAndOrderByStylePolicy,
309    prior_style: &mut Option<ReferenceStyle>,
310    issues: &mut Vec<Issue>,
311) {
312    match table_factor {
313        TableFactor::Derived { subquery, .. } => {
314            check_query(subquery, statement_index, style_policy, prior_style, issues);
315        }
316        TableFactor::NestedJoin {
317            table_with_joins, ..
318        } => {
319            check_table_factor(
320                &table_with_joins.relation,
321                statement_index,
322                style_policy,
323                prior_style,
324                issues,
325            );
326            for join in &table_with_joins.joins {
327                check_table_factor(
328                    &join.relation,
329                    statement_index,
330                    style_policy,
331                    prior_style,
332                    issues,
333                );
334                if let Some(on_expr) = join_on_expr(&join.join_operator) {
335                    visit_subqueries_in_expr(
336                        on_expr,
337                        statement_index,
338                        style_policy,
339                        prior_style,
340                        issues,
341                    );
342                }
343            }
344        }
345        TableFactor::Pivot { table, .. }
346        | TableFactor::Unpivot { table, .. }
347        | TableFactor::MatchRecognize { table, .. } => {
348            check_table_factor(table, statement_index, style_policy, prior_style, issues)
349        }
350        _ => {}
351    }
352}
353
354fn visit_subqueries_in_expr(
355    expr: &Expr,
356    statement_index: usize,
357    style_policy: GroupByAndOrderByStylePolicy,
358    prior_style: &mut Option<ReferenceStyle>,
359    issues: &mut Vec<Issue>,
360) {
361    match expr {
362        Expr::Subquery(query)
363        | Expr::Exists {
364            subquery: query, ..
365        } => check_query(query, statement_index, style_policy, prior_style, issues),
366        Expr::InSubquery {
367            expr: inner,
368            subquery,
369            ..
370        } => {
371            visit_subqueries_in_expr(inner, statement_index, style_policy, prior_style, issues);
372            check_query(subquery, statement_index, style_policy, prior_style, issues);
373        }
374        Expr::BinaryOp { left, right, .. }
375        | Expr::AnyOp { left, right, .. }
376        | Expr::AllOp { left, right, .. } => {
377            visit_subqueries_in_expr(left, statement_index, style_policy, prior_style, issues);
378            visit_subqueries_in_expr(right, statement_index, style_policy, prior_style, issues);
379        }
380        Expr::UnaryOp { expr: inner, .. }
381        | Expr::Nested(inner)
382        | Expr::IsNull(inner)
383        | Expr::IsNotNull(inner)
384        | Expr::Cast { expr: inner, .. } => {
385            visit_subqueries_in_expr(inner, statement_index, style_policy, prior_style, issues);
386        }
387        Expr::InList { expr, list, .. } => {
388            visit_subqueries_in_expr(expr, statement_index, style_policy, prior_style, issues);
389            for item in list {
390                visit_subqueries_in_expr(item, statement_index, style_policy, prior_style, issues);
391            }
392        }
393        Expr::Between {
394            expr, low, high, ..
395        } => {
396            visit_subqueries_in_expr(expr, statement_index, style_policy, prior_style, issues);
397            visit_subqueries_in_expr(low, statement_index, style_policy, prior_style, issues);
398            visit_subqueries_in_expr(high, statement_index, style_policy, prior_style, issues);
399        }
400        Expr::Case {
401            operand,
402            conditions,
403            else_result,
404            ..
405        } => {
406            if let Some(operand) = operand {
407                visit_subqueries_in_expr(
408                    operand,
409                    statement_index,
410                    style_policy,
411                    prior_style,
412                    issues,
413                );
414            }
415            for when in conditions {
416                visit_subqueries_in_expr(
417                    &when.condition,
418                    statement_index,
419                    style_policy,
420                    prior_style,
421                    issues,
422                );
423                visit_subqueries_in_expr(
424                    &when.result,
425                    statement_index,
426                    style_policy,
427                    prior_style,
428                    issues,
429                );
430            }
431            if let Some(otherwise) = else_result {
432                visit_subqueries_in_expr(
433                    otherwise,
434                    statement_index,
435                    style_policy,
436                    prior_style,
437                    issues,
438                );
439            }
440        }
441        Expr::Function(function) => {
442            if let FunctionArguments::List(arguments) = &function.args {
443                for arg in &arguments.args {
444                    match arg {
445                        FunctionArg::Unnamed(FunctionArgExpr::Expr(expr))
446                        | FunctionArg::Named {
447                            arg: FunctionArgExpr::Expr(expr),
448                            ..
449                        } => visit_subqueries_in_expr(
450                            expr,
451                            statement_index,
452                            style_policy,
453                            prior_style,
454                            issues,
455                        ),
456                        _ => {}
457                    }
458                }
459            }
460
461            if let Some(filter) = &function.filter {
462                visit_subqueries_in_expr(
463                    filter,
464                    statement_index,
465                    style_policy,
466                    prior_style,
467                    issues,
468                );
469            }
470
471            for order_expr in &function.within_group {
472                visit_subqueries_in_expr(
473                    &order_expr.expr,
474                    statement_index,
475                    style_policy,
476                    prior_style,
477                    issues,
478                );
479            }
480
481            if let Some(WindowType::WindowSpec(spec)) = &function.over {
482                for expr in &spec.partition_by {
483                    visit_subqueries_in_expr(
484                        expr,
485                        statement_index,
486                        style_policy,
487                        prior_style,
488                        issues,
489                    );
490                }
491                for order_expr in &spec.order_by {
492                    visit_subqueries_in_expr(
493                        &order_expr.expr,
494                        statement_index,
495                        style_policy,
496                        prior_style,
497                        issues,
498                    );
499                }
500            }
501        }
502        _ => {}
503    }
504}
505
506fn classify_reference_style_in_group_modifier(
507    modifier: &GroupByWithModifier,
508    has_explicit: &mut bool,
509    has_implicit: &mut bool,
510) {
511    if let GroupByWithModifier::GroupingSets(expr) = modifier {
512        classify_reference_style_in_expr(expr, has_explicit, has_implicit);
513    }
514}
515
516fn classify_reference_style_in_expr(expr: &Expr, has_explicit: &mut bool, has_implicit: &mut bool) {
517    match expr {
518        Expr::Value(value) if matches!(value.value, Value::Number(_, _)) => *has_implicit = true,
519        Expr::Rollup(sets) | Expr::Cube(sets) | Expr::GroupingSets(sets) => {
520            for set in sets {
521                for item in set {
522                    classify_reference_style_in_expr(item, has_explicit, has_implicit);
523                }
524            }
525        }
526        _ => *has_explicit = true,
527    }
528}
529
530fn apply_consistent_style_policy(
531    has_explicit: bool,
532    has_implicit: bool,
533    statement_index: usize,
534    style_policy: GroupByAndOrderByStylePolicy,
535    prior_style: &mut Option<ReferenceStyle>,
536    issues: &mut Vec<Issue>,
537) {
538    if !has_explicit && !has_implicit {
539        return;
540    }
541
542    if has_explicit && has_implicit {
543        issues.push(
544            Issue::warning(
545                issue_codes::LINT_AM_006,
546                "Inconsistent column references in 'GROUP BY/ORDER BY' clauses.",
547            )
548            .with_statement(statement_index),
549        );
550        return;
551    }
552
553    if matches!(style_policy, GroupByAndOrderByStylePolicy::Explicit) && has_implicit {
554        issues.push(
555            Issue::warning(
556                issue_codes::LINT_AM_006,
557                "Inconsistent column references in 'GROUP BY/ORDER BY' clauses.",
558            )
559            .with_statement(statement_index),
560        );
561        return;
562    }
563
564    if matches!(style_policy, GroupByAndOrderByStylePolicy::Implicit) && has_explicit {
565        issues.push(
566            Issue::warning(
567                issue_codes::LINT_AM_006,
568                "Inconsistent column references in 'GROUP BY/ORDER BY' clauses.",
569            )
570            .with_statement(statement_index),
571        );
572        return;
573    }
574
575    if !matches!(style_policy, GroupByAndOrderByStylePolicy::Consistent) {
576        return;
577    }
578
579    let current_style = if has_implicit {
580        ReferenceStyle::Implicit
581    } else {
582        ReferenceStyle::Explicit
583    };
584
585    if let Some(previous_style) = prior_style {
586        if *previous_style != current_style {
587            issues.push(
588                Issue::warning(
589                    issue_codes::LINT_AM_006,
590                    "Inconsistent column references in 'GROUP BY/ORDER BY' clauses.",
591                )
592                .with_statement(statement_index),
593            );
594            return;
595        }
596    }
597
598    *prior_style = Some(current_style);
599}
600
601#[cfg(test)]
602mod tests {
603    use super::*;
604    use crate::linter::config::LintConfig;
605    use crate::parser::parse_sql;
606
607    fn run(sql: &str) -> Vec<Issue> {
608        let statements = parse_sql(sql).expect("parse");
609        let rule = AmbiguousColumnRefs::default();
610        statements
611            .iter()
612            .enumerate()
613            .flat_map(|(index, statement)| {
614                rule.check(
615                    statement,
616                    &LintContext {
617                        sql,
618                        statement_range: 0..sql.len(),
619                        statement_index: index,
620                    },
621                )
622            })
623            .collect()
624    }
625
626    // --- Edge cases adopted from sqlfluff AM06 ---
627
628    #[test]
629    fn passes_explicit_group_by_default() {
630        let issues =
631            run("SELECT foo, bar, sum(baz) AS sum_value FROM fake_table GROUP BY foo, bar");
632        assert!(issues.is_empty());
633    }
634
635    #[test]
636    fn passes_implicit_group_by_default() {
637        let issues = run("SELECT foo, bar, sum(baz) AS sum_value FROM fake_table GROUP BY 1, 2");
638        assert!(issues.is_empty());
639    }
640
641    #[test]
642    fn flags_mixed_group_by_references() {
643        let issues = run("SELECT foo, bar, sum(baz) AS sum_value FROM fake_table GROUP BY 1, bar");
644        assert_eq!(issues.len(), 1);
645        assert_eq!(issues[0].code, issue_codes::LINT_AM_006);
646    }
647
648    #[test]
649    fn flags_mixed_order_by_references() {
650        let issues =
651            run("SELECT foo, bar, sum(baz) AS sum_value FROM fake_table ORDER BY 1, power(bar, 2)");
652        assert_eq!(issues.len(), 1);
653        assert_eq!(issues[0].code, issue_codes::LINT_AM_006);
654    }
655
656    #[test]
657    fn flags_across_clause_style_switch() {
658        let issues = run(
659            "SELECT foo, bar, sum(baz) AS sum_value FROM fake_table GROUP BY 1, 2 ORDER BY foo, bar",
660        );
661        assert_eq!(issues.len(), 1);
662    }
663
664    #[test]
665    fn passes_consistent_explicit_group_and_order() {
666        let issues = run(
667            "SELECT foo, bar, sum(baz) AS sum_value FROM fake_table GROUP BY foo, bar ORDER BY foo, bar",
668        );
669        assert!(issues.is_empty());
670    }
671
672    #[test]
673    fn passes_consistent_implicit_group_and_order() {
674        let issues = run(
675            "SELECT foo, bar, sum(baz) AS sum_value FROM fake_table GROUP BY 1, 2 ORDER BY 1, 2",
676        );
677        assert!(issues.is_empty());
678    }
679
680    #[test]
681    fn ignores_window_order_by() {
682        let issues = run(
683            "SELECT field_1, SUM(field_3) OVER (ORDER BY field_1) FROM table1 GROUP BY 1 ORDER BY 1",
684        );
685        assert!(issues.is_empty());
686    }
687
688    #[test]
689    fn ignores_within_group_order_by() {
690        let issues = run(
691            "SELECT LISTAGG(x) WITHIN GROUP (ORDER BY list_order) AS my_list FROM main GROUP BY 1",
692        );
693        assert!(issues.is_empty());
694    }
695
696    #[test]
697    fn flags_two_violations_when_subquery_sets_explicit_precedent() {
698        let issues = run(
699            "SELECT foo, bar, sum(baz) AS sum_value FROM (SELECT foo, bar, sum(baz) AS baz FROM fake_table GROUP BY foo, bar) q GROUP BY 1, 2 ORDER BY 1, 2",
700        );
701        assert_eq!(issues.len(), 2);
702        assert!(issues
703            .iter()
704            .all(|issue| issue.code == issue_codes::LINT_AM_006));
705    }
706
707    #[test]
708    fn passes_rollup_when_consistent_with_prior_implicit_style() {
709        let issues = run(
710            "SELECT column1, column2 FROM table_name GROUP BY 1, 2 UNION ALL SELECT column1, column2 FROM table_name2 GROUP BY ROLLUP(1, 2)",
711        );
712        assert!(issues.is_empty());
713    }
714
715    #[test]
716    fn flags_rollup_when_inconsistent_with_prior_explicit_style() {
717        let issues = run(
718            "SELECT column1, column2 FROM table_name GROUP BY column1, column2 UNION ALL SELECT column1, column2 FROM table_name2 GROUP BY ROLLUP(1, 2)",
719        );
720        assert_eq!(issues.len(), 1);
721    }
722
723    #[test]
724    fn ignores_statements_without_group_or_order_references() {
725        let issues = run("SELECT a.id, name FROM a");
726        assert!(issues.is_empty());
727    }
728
729    #[test]
730    fn explicit_policy_flags_implicit_group_by_position() {
731        let config = LintConfig {
732            enabled: true,
733            disabled_rules: vec![],
734            rule_configs: std::collections::BTreeMap::from([(
735                "ambiguous.column_references".to_string(),
736                serde_json::json!({"group_by_and_order_by_style": "explicit"}),
737            )]),
738        };
739        let rule = AmbiguousColumnRefs::from_config(&config);
740        let sql = "SELECT foo, bar FROM fake_table GROUP BY 1, 2";
741        let statements = parse_sql(sql).expect("parse");
742        let issues = rule.check(
743            &statements[0],
744            &LintContext {
745                sql,
746                statement_range: 0..sql.len(),
747                statement_index: 0,
748            },
749        );
750        assert_eq!(issues.len(), 1);
751    }
752
753    #[test]
754    fn implicit_policy_flags_explicit_group_by_reference() {
755        let config = LintConfig {
756            enabled: true,
757            disabled_rules: vec![],
758            rule_configs: std::collections::BTreeMap::from([(
759                "LINT_AM_006".to_string(),
760                serde_json::json!({"group_by_and_order_by_style": "implicit"}),
761            )]),
762        };
763        let rule = AmbiguousColumnRefs::from_config(&config);
764        let sql = "SELECT foo, bar FROM fake_table GROUP BY foo, bar";
765        let statements = parse_sql(sql).expect("parse");
766        let issues = rule.check(
767            &statements[0],
768            &LintContext {
769                sql,
770                statement_range: 0..sql.len(),
771                statement_index: 0,
772            },
773        );
774        assert_eq!(issues.len(), 1);
775    }
776}