Skip to main content

flowscope_core/linter/rules/
am_006.rs

1//! LINT_AM_006: Ambiguous column references.
2//!
3//! SQLFluff AM06 parity: enforce consistent `GROUP BY` / `ORDER BY` reference
4//! styles (implicit numeric position vs explicit expressions/identifiers).
5
6use crate::linter::config::LintConfig;
7use crate::linter::rule::{LintContext, LintRule};
8use crate::types::{issue_codes, Issue};
9use sqlparser::ast::{
10    Expr, FunctionArg, FunctionArgExpr, FunctionArguments, GroupByExpr, GroupByWithModifier,
11    OrderByKind, Query, Select, SelectItem, SetExpr, Statement, TableFactor, Value, WindowType,
12};
13
14use super::semantic_helpers::join_on_expr;
15
16pub struct AmbiguousColumnRefs {
17    style_policy: GroupByAndOrderByStylePolicy,
18}
19
20#[derive(Clone, Copy, PartialEq, Eq)]
21enum ReferenceStyle {
22    Explicit,
23    Implicit,
24}
25
26#[derive(Clone, Copy, PartialEq, Eq)]
27enum GroupByAndOrderByStylePolicy {
28    Consistent,
29    Explicit,
30    Implicit,
31}
32
33impl GroupByAndOrderByStylePolicy {
34    fn from_config(config: &LintConfig) -> Self {
35        match config
36            .rule_option_str(issue_codes::LINT_AM_006, "group_by_and_order_by_style")
37            .unwrap_or("consistent")
38            .to_ascii_lowercase()
39            .as_str()
40        {
41            "explicit" => Self::Explicit,
42            "implicit" => Self::Implicit,
43            _ => Self::Consistent,
44        }
45    }
46}
47
48impl AmbiguousColumnRefs {
49    pub fn from_config(config: &LintConfig) -> Self {
50        Self {
51            style_policy: GroupByAndOrderByStylePolicy::from_config(config),
52        }
53    }
54}
55
56impl Default for AmbiguousColumnRefs {
57    fn default() -> Self {
58        Self {
59            style_policy: GroupByAndOrderByStylePolicy::Consistent,
60        }
61    }
62}
63
64impl LintRule for AmbiguousColumnRefs {
65    fn code(&self) -> &'static str {
66        issue_codes::LINT_AM_006
67    }
68
69    fn name(&self) -> &'static str {
70        "Ambiguous column references"
71    }
72
73    fn description(&self) -> &'static str {
74        "Inconsistent column references in 'GROUP BY/ORDER BY' clauses."
75    }
76
77    fn check(&self, statement: &Statement, ctx: &LintContext) -> Vec<Issue> {
78        let mut issues = Vec::new();
79        let mut prior_style = None;
80        check_statement(
81            statement,
82            ctx.statement_index,
83            self.style_policy,
84            &mut prior_style,
85            &mut issues,
86        );
87        issues
88    }
89}
90
91fn check_statement(
92    statement: &Statement,
93    statement_index: usize,
94    style_policy: GroupByAndOrderByStylePolicy,
95    prior_style: &mut Option<ReferenceStyle>,
96    issues: &mut Vec<Issue>,
97) {
98    match statement {
99        Statement::Query(query) => {
100            check_query(query, statement_index, style_policy, prior_style, issues)
101        }
102        Statement::Insert(insert) => {
103            if let Some(source) = &insert.source {
104                check_query(source, statement_index, style_policy, prior_style, issues);
105            }
106        }
107        Statement::CreateView { query, .. } => {
108            check_query(query, statement_index, style_policy, prior_style, issues);
109        }
110        Statement::CreateTable(create) => {
111            if let Some(query) = &create.query {
112                check_query(query, statement_index, style_policy, prior_style, issues);
113            }
114        }
115        _ => {}
116    }
117}
118
119fn check_query(
120    query: &Query,
121    statement_index: usize,
122    style_policy: GroupByAndOrderByStylePolicy,
123    prior_style: &mut Option<ReferenceStyle>,
124    issues: &mut Vec<Issue>,
125) {
126    if let Some(with) = &query.with {
127        for cte in &with.cte_tables {
128            check_query(
129                &cte.query,
130                statement_index,
131                style_policy,
132                prior_style,
133                issues,
134            );
135        }
136    }
137
138    check_set_expr(
139        &query.body,
140        statement_index,
141        style_policy,
142        prior_style,
143        issues,
144    );
145
146    if let Some(order_by) = &query.order_by {
147        let mut has_explicit = false;
148        let mut has_implicit = false;
149
150        if let OrderByKind::Expressions(order_exprs) = &order_by.kind {
151            for order_expr in order_exprs {
152                classify_reference_style_in_expr(
153                    &order_expr.expr,
154                    &mut has_explicit,
155                    &mut has_implicit,
156                );
157            }
158        }
159
160        apply_consistent_style_policy(
161            has_explicit,
162            has_implicit,
163            statement_index,
164            style_policy,
165            prior_style,
166            issues,
167        );
168    }
169}
170
171fn check_set_expr(
172    set_expr: &SetExpr,
173    statement_index: usize,
174    style_policy: GroupByAndOrderByStylePolicy,
175    prior_style: &mut Option<ReferenceStyle>,
176    issues: &mut Vec<Issue>,
177) {
178    match set_expr {
179        SetExpr::Select(select) => {
180            check_select(select, statement_index, style_policy, prior_style, issues)
181        }
182        SetExpr::Query(query) => {
183            check_query(query, statement_index, style_policy, prior_style, issues)
184        }
185        SetExpr::SetOperation { left, right, .. } => {
186            check_set_expr(left, statement_index, style_policy, prior_style, issues);
187            check_set_expr(right, statement_index, style_policy, prior_style, issues);
188        }
189        SetExpr::Insert(statement)
190        | SetExpr::Update(statement)
191        | SetExpr::Delete(statement)
192        | SetExpr::Merge(statement) => {
193            check_statement(
194                statement,
195                statement_index,
196                style_policy,
197                prior_style,
198                issues,
199            );
200        }
201        _ => {}
202    }
203}
204
205fn check_select(
206    select: &Select,
207    statement_index: usize,
208    style_policy: GroupByAndOrderByStylePolicy,
209    prior_style: &mut Option<ReferenceStyle>,
210    issues: &mut Vec<Issue>,
211) {
212    // Traverse subqueries in approximate lexical order before evaluating this
213    // SELECT's GROUP BY clause, matching SQLFluff's clause-order precedence.
214    for item in &select.projection {
215        if let SelectItem::UnnamedExpr(expr) | SelectItem::ExprWithAlias { expr, .. } = item {
216            visit_subqueries_in_expr(expr, statement_index, style_policy, prior_style, issues);
217        }
218    }
219
220    if let Some(prewhere) = &select.prewhere {
221        visit_subqueries_in_expr(prewhere, statement_index, style_policy, prior_style, issues);
222    }
223
224    for table in &select.from {
225        check_table_factor(
226            &table.relation,
227            statement_index,
228            style_policy,
229            prior_style,
230            issues,
231        );
232        for join in &table.joins {
233            check_table_factor(
234                &join.relation,
235                statement_index,
236                style_policy,
237                prior_style,
238                issues,
239            );
240            if let Some(on_expr) = join_on_expr(&join.join_operator) {
241                visit_subqueries_in_expr(
242                    on_expr,
243                    statement_index,
244                    style_policy,
245                    prior_style,
246                    issues,
247                );
248            }
249        }
250    }
251
252    if let Some(selection) = &select.selection {
253        visit_subqueries_in_expr(
254            selection,
255            statement_index,
256            style_policy,
257            prior_style,
258            issues,
259        );
260    }
261
262    let mut has_explicit = false;
263    let mut has_implicit = false;
264    if let GroupByExpr::Expressions(exprs, modifiers) = &select.group_by {
265        for expr in exprs {
266            classify_reference_style_in_expr(expr, &mut has_explicit, &mut has_implicit);
267        }
268
269        for modifier in modifiers {
270            classify_reference_style_in_group_modifier(
271                modifier,
272                &mut has_explicit,
273                &mut has_implicit,
274            );
275        }
276    }
277    apply_consistent_style_policy(
278        has_explicit,
279        has_implicit,
280        statement_index,
281        style_policy,
282        prior_style,
283        issues,
284    );
285
286    if let Some(having) = &select.having {
287        visit_subqueries_in_expr(having, statement_index, style_policy, prior_style, issues);
288    }
289    if let Some(qualify) = &select.qualify {
290        visit_subqueries_in_expr(qualify, statement_index, style_policy, prior_style, issues);
291    }
292
293    for sort_expr in &select.sort_by {
294        visit_subqueries_in_expr(
295            &sort_expr.expr,
296            statement_index,
297            style_policy,
298            prior_style,
299            issues,
300        );
301    }
302}
303
304fn check_table_factor(
305    table_factor: &TableFactor,
306    statement_index: usize,
307    style_policy: GroupByAndOrderByStylePolicy,
308    prior_style: &mut Option<ReferenceStyle>,
309    issues: &mut Vec<Issue>,
310) {
311    match table_factor {
312        TableFactor::Derived { subquery, .. } => {
313            check_query(subquery, statement_index, style_policy, prior_style, issues);
314        }
315        TableFactor::NestedJoin {
316            table_with_joins, ..
317        } => {
318            check_table_factor(
319                &table_with_joins.relation,
320                statement_index,
321                style_policy,
322                prior_style,
323                issues,
324            );
325            for join in &table_with_joins.joins {
326                check_table_factor(
327                    &join.relation,
328                    statement_index,
329                    style_policy,
330                    prior_style,
331                    issues,
332                );
333                if let Some(on_expr) = join_on_expr(&join.join_operator) {
334                    visit_subqueries_in_expr(
335                        on_expr,
336                        statement_index,
337                        style_policy,
338                        prior_style,
339                        issues,
340                    );
341                }
342            }
343        }
344        TableFactor::Pivot { table, .. }
345        | TableFactor::Unpivot { table, .. }
346        | TableFactor::MatchRecognize { table, .. } => {
347            check_table_factor(table, statement_index, style_policy, prior_style, issues)
348        }
349        _ => {}
350    }
351}
352
353fn visit_subqueries_in_expr(
354    expr: &Expr,
355    statement_index: usize,
356    style_policy: GroupByAndOrderByStylePolicy,
357    prior_style: &mut Option<ReferenceStyle>,
358    issues: &mut Vec<Issue>,
359) {
360    match expr {
361        Expr::Subquery(query)
362        | Expr::Exists {
363            subquery: query, ..
364        } => check_query(query, statement_index, style_policy, prior_style, issues),
365        Expr::InSubquery {
366            expr: inner,
367            subquery,
368            ..
369        } => {
370            visit_subqueries_in_expr(inner, statement_index, style_policy, prior_style, issues);
371            check_query(subquery, statement_index, style_policy, prior_style, issues);
372        }
373        Expr::BinaryOp { left, right, .. }
374        | Expr::AnyOp { left, right, .. }
375        | Expr::AllOp { left, right, .. } => {
376            visit_subqueries_in_expr(left, statement_index, style_policy, prior_style, issues);
377            visit_subqueries_in_expr(right, statement_index, style_policy, prior_style, issues);
378        }
379        Expr::UnaryOp { expr: inner, .. }
380        | Expr::Nested(inner)
381        | Expr::IsNull(inner)
382        | Expr::IsNotNull(inner)
383        | Expr::Cast { expr: inner, .. } => {
384            visit_subqueries_in_expr(inner, statement_index, style_policy, prior_style, issues);
385        }
386        Expr::InList { expr, list, .. } => {
387            visit_subqueries_in_expr(expr, statement_index, style_policy, prior_style, issues);
388            for item in list {
389                visit_subqueries_in_expr(item, statement_index, style_policy, prior_style, issues);
390            }
391        }
392        Expr::Between {
393            expr, low, high, ..
394        } => {
395            visit_subqueries_in_expr(expr, statement_index, style_policy, prior_style, issues);
396            visit_subqueries_in_expr(low, statement_index, style_policy, prior_style, issues);
397            visit_subqueries_in_expr(high, statement_index, style_policy, prior_style, issues);
398        }
399        Expr::Case {
400            operand,
401            conditions,
402            else_result,
403            ..
404        } => {
405            if let Some(operand) = operand {
406                visit_subqueries_in_expr(
407                    operand,
408                    statement_index,
409                    style_policy,
410                    prior_style,
411                    issues,
412                );
413            }
414            for when in conditions {
415                visit_subqueries_in_expr(
416                    &when.condition,
417                    statement_index,
418                    style_policy,
419                    prior_style,
420                    issues,
421                );
422                visit_subqueries_in_expr(
423                    &when.result,
424                    statement_index,
425                    style_policy,
426                    prior_style,
427                    issues,
428                );
429            }
430            if let Some(otherwise) = else_result {
431                visit_subqueries_in_expr(
432                    otherwise,
433                    statement_index,
434                    style_policy,
435                    prior_style,
436                    issues,
437                );
438            }
439        }
440        Expr::Function(function) => {
441            if let FunctionArguments::List(arguments) = &function.args {
442                for arg in &arguments.args {
443                    match arg {
444                        FunctionArg::Unnamed(FunctionArgExpr::Expr(expr))
445                        | FunctionArg::Named {
446                            arg: FunctionArgExpr::Expr(expr),
447                            ..
448                        } => visit_subqueries_in_expr(
449                            expr,
450                            statement_index,
451                            style_policy,
452                            prior_style,
453                            issues,
454                        ),
455                        _ => {}
456                    }
457                }
458            }
459
460            if let Some(filter) = &function.filter {
461                visit_subqueries_in_expr(
462                    filter,
463                    statement_index,
464                    style_policy,
465                    prior_style,
466                    issues,
467                );
468            }
469
470            for order_expr in &function.within_group {
471                visit_subqueries_in_expr(
472                    &order_expr.expr,
473                    statement_index,
474                    style_policy,
475                    prior_style,
476                    issues,
477                );
478            }
479
480            if let Some(WindowType::WindowSpec(spec)) = &function.over {
481                for expr in &spec.partition_by {
482                    visit_subqueries_in_expr(
483                        expr,
484                        statement_index,
485                        style_policy,
486                        prior_style,
487                        issues,
488                    );
489                }
490                for order_expr in &spec.order_by {
491                    visit_subqueries_in_expr(
492                        &order_expr.expr,
493                        statement_index,
494                        style_policy,
495                        prior_style,
496                        issues,
497                    );
498                }
499            }
500        }
501        _ => {}
502    }
503}
504
505fn classify_reference_style_in_group_modifier(
506    modifier: &GroupByWithModifier,
507    has_explicit: &mut bool,
508    has_implicit: &mut bool,
509) {
510    if let GroupByWithModifier::GroupingSets(expr) = modifier {
511        classify_reference_style_in_expr(expr, has_explicit, has_implicit);
512    }
513}
514
515fn classify_reference_style_in_expr(expr: &Expr, has_explicit: &mut bool, has_implicit: &mut bool) {
516    match expr {
517        Expr::Value(value) if matches!(value.value, Value::Number(_, _)) => *has_implicit = true,
518        Expr::Rollup(sets) | Expr::Cube(sets) | Expr::GroupingSets(sets) => {
519            for set in sets {
520                for item in set {
521                    classify_reference_style_in_expr(item, has_explicit, has_implicit);
522                }
523            }
524        }
525        _ => *has_explicit = true,
526    }
527}
528
529fn apply_consistent_style_policy(
530    has_explicit: bool,
531    has_implicit: bool,
532    statement_index: usize,
533    style_policy: GroupByAndOrderByStylePolicy,
534    prior_style: &mut Option<ReferenceStyle>,
535    issues: &mut Vec<Issue>,
536) {
537    if !has_explicit && !has_implicit {
538        return;
539    }
540
541    if has_explicit && has_implicit {
542        issues.push(
543            Issue::warning(
544                issue_codes::LINT_AM_006,
545                "Inconsistent column references in 'GROUP BY/ORDER BY' clauses.",
546            )
547            .with_statement(statement_index),
548        );
549        return;
550    }
551
552    if matches!(style_policy, GroupByAndOrderByStylePolicy::Explicit) && has_implicit {
553        issues.push(
554            Issue::warning(
555                issue_codes::LINT_AM_006,
556                "Inconsistent column references in 'GROUP BY/ORDER BY' clauses.",
557            )
558            .with_statement(statement_index),
559        );
560        return;
561    }
562
563    if matches!(style_policy, GroupByAndOrderByStylePolicy::Implicit) && has_explicit {
564        issues.push(
565            Issue::warning(
566                issue_codes::LINT_AM_006,
567                "Inconsistent column references in 'GROUP BY/ORDER BY' clauses.",
568            )
569            .with_statement(statement_index),
570        );
571        return;
572    }
573
574    if !matches!(style_policy, GroupByAndOrderByStylePolicy::Consistent) {
575        return;
576    }
577
578    let current_style = if has_implicit {
579        ReferenceStyle::Implicit
580    } else {
581        ReferenceStyle::Explicit
582    };
583
584    if let Some(previous_style) = prior_style {
585        if *previous_style != current_style {
586            issues.push(
587                Issue::warning(
588                    issue_codes::LINT_AM_006,
589                    "Inconsistent column references in 'GROUP BY/ORDER BY' clauses.",
590                )
591                .with_statement(statement_index),
592            );
593            return;
594        }
595    }
596
597    *prior_style = Some(current_style);
598}
599
600#[cfg(test)]
601mod tests {
602    use super::*;
603    use crate::linter::config::LintConfig;
604    use crate::parser::parse_sql;
605
606    fn run(sql: &str) -> Vec<Issue> {
607        let statements = parse_sql(sql).expect("parse");
608        let rule = AmbiguousColumnRefs::default();
609        statements
610            .iter()
611            .enumerate()
612            .flat_map(|(index, statement)| {
613                rule.check(
614                    statement,
615                    &LintContext {
616                        sql,
617                        statement_range: 0..sql.len(),
618                        statement_index: index,
619                    },
620                )
621            })
622            .collect()
623    }
624
625    // --- Edge cases adopted from sqlfluff AM06 ---
626
627    #[test]
628    fn passes_explicit_group_by_default() {
629        let issues =
630            run("SELECT foo, bar, sum(baz) AS sum_value FROM fake_table GROUP BY foo, bar");
631        assert!(issues.is_empty());
632    }
633
634    #[test]
635    fn passes_implicit_group_by_default() {
636        let issues = run("SELECT foo, bar, sum(baz) AS sum_value FROM fake_table GROUP BY 1, 2");
637        assert!(issues.is_empty());
638    }
639
640    #[test]
641    fn flags_mixed_group_by_references() {
642        let issues = run("SELECT foo, bar, sum(baz) AS sum_value FROM fake_table GROUP BY 1, bar");
643        assert_eq!(issues.len(), 1);
644        assert_eq!(issues[0].code, issue_codes::LINT_AM_006);
645    }
646
647    #[test]
648    fn flags_mixed_order_by_references() {
649        let issues =
650            run("SELECT foo, bar, sum(baz) AS sum_value FROM fake_table ORDER BY 1, power(bar, 2)");
651        assert_eq!(issues.len(), 1);
652        assert_eq!(issues[0].code, issue_codes::LINT_AM_006);
653    }
654
655    #[test]
656    fn flags_across_clause_style_switch() {
657        let issues = run(
658            "SELECT foo, bar, sum(baz) AS sum_value FROM fake_table GROUP BY 1, 2 ORDER BY foo, bar",
659        );
660        assert_eq!(issues.len(), 1);
661    }
662
663    #[test]
664    fn passes_consistent_explicit_group_and_order() {
665        let issues = run(
666            "SELECT foo, bar, sum(baz) AS sum_value FROM fake_table GROUP BY foo, bar ORDER BY foo, bar",
667        );
668        assert!(issues.is_empty());
669    }
670
671    #[test]
672    fn passes_consistent_implicit_group_and_order() {
673        let issues = run(
674            "SELECT foo, bar, sum(baz) AS sum_value FROM fake_table GROUP BY 1, 2 ORDER BY 1, 2",
675        );
676        assert!(issues.is_empty());
677    }
678
679    #[test]
680    fn ignores_window_order_by() {
681        let issues = run(
682            "SELECT field_1, SUM(field_3) OVER (ORDER BY field_1) FROM table1 GROUP BY 1 ORDER BY 1",
683        );
684        assert!(issues.is_empty());
685    }
686
687    #[test]
688    fn ignores_within_group_order_by() {
689        let issues = run(
690            "SELECT LISTAGG(x) WITHIN GROUP (ORDER BY list_order) AS my_list FROM main GROUP BY 1",
691        );
692        assert!(issues.is_empty());
693    }
694
695    #[test]
696    fn flags_two_violations_when_subquery_sets_explicit_precedent() {
697        let issues = run(
698            "SELECT foo, bar, sum(baz) AS sum_value FROM (SELECT foo, bar, sum(baz) AS baz FROM fake_table GROUP BY foo, bar) q GROUP BY 1, 2 ORDER BY 1, 2",
699        );
700        assert_eq!(issues.len(), 2);
701        assert!(issues
702            .iter()
703            .all(|issue| issue.code == issue_codes::LINT_AM_006));
704    }
705
706    #[test]
707    fn passes_rollup_when_consistent_with_prior_implicit_style() {
708        let issues = run(
709            "SELECT column1, column2 FROM table_name GROUP BY 1, 2 UNION ALL SELECT column1, column2 FROM table_name2 GROUP BY ROLLUP(1, 2)",
710        );
711        assert!(issues.is_empty());
712    }
713
714    #[test]
715    fn flags_rollup_when_inconsistent_with_prior_explicit_style() {
716        let issues = run(
717            "SELECT column1, column2 FROM table_name GROUP BY column1, column2 UNION ALL SELECT column1, column2 FROM table_name2 GROUP BY ROLLUP(1, 2)",
718        );
719        assert_eq!(issues.len(), 1);
720    }
721
722    #[test]
723    fn ignores_statements_without_group_or_order_references() {
724        let issues = run("SELECT a.id, name FROM a");
725        assert!(issues.is_empty());
726    }
727
728    #[test]
729    fn explicit_policy_flags_implicit_group_by_position() {
730        let config = LintConfig {
731            enabled: true,
732            disabled_rules: vec![],
733            rule_configs: std::collections::BTreeMap::from([(
734                "ambiguous.column_references".to_string(),
735                serde_json::json!({"group_by_and_order_by_style": "explicit"}),
736            )]),
737        };
738        let rule = AmbiguousColumnRefs::from_config(&config);
739        let sql = "SELECT foo, bar FROM fake_table GROUP BY 1, 2";
740        let statements = parse_sql(sql).expect("parse");
741        let issues = rule.check(
742            &statements[0],
743            &LintContext {
744                sql,
745                statement_range: 0..sql.len(),
746                statement_index: 0,
747            },
748        );
749        assert_eq!(issues.len(), 1);
750    }
751
752    #[test]
753    fn implicit_policy_flags_explicit_group_by_reference() {
754        let config = LintConfig {
755            enabled: true,
756            disabled_rules: vec![],
757            rule_configs: std::collections::BTreeMap::from([(
758                "LINT_AM_006".to_string(),
759                serde_json::json!({"group_by_and_order_by_style": "implicit"}),
760            )]),
761        };
762        let rule = AmbiguousColumnRefs::from_config(&config);
763        let sql = "SELECT foo, bar FROM fake_table GROUP BY foo, bar";
764        let statements = parse_sql(sql).expect("parse");
765        let issues = rule.check(
766            &statements[0],
767            &LintContext {
768                sql,
769                statement_range: 0..sql.len(),
770                statement_index: 0,
771            },
772        );
773        assert_eq!(issues.len(), 1);
774    }
775}