Skip to main content

flowscope_core/linter/rules/
am_004.rs

1//! LINT_AM_004: Ambiguous column count (SQLFluff AM04 parity).
2//!
3//! Flags queries whose output width is not deterministically known, usually due
4//! to unresolved wildcard projections (`*` / `alias.*`).
5
6use crate::linter::rule::{LintContext, LintRule};
7use crate::types::{issue_codes, Issue};
8use sqlparser::ast::{Expr, SelectItem, SetExpr, Statement, Value};
9use std::collections::HashMap;
10
11use super::column_count_helpers::{resolve_query_output_columns_strict, CteColumnCounts};
12
13pub struct AmbiguousColumnCount;
14
15impl LintRule for AmbiguousColumnCount {
16    fn code(&self) -> &'static str {
17        issue_codes::LINT_AM_004
18    }
19
20    fn name(&self) -> &'static str {
21        "Ambiguous column count"
22    }
23
24    fn description(&self) -> &'static str {
25        "Query produces an unknown number of result columns."
26    }
27
28    fn check(&self, stmt: &Statement, ctx: &LintContext) -> Vec<Issue> {
29        let ast_unknown = statement_has_unknown_result_columns(stmt, &HashMap::new());
30        let fallback_unknown = !ast_unknown
31            && statement_has_unknown_result_columns_fallback(stmt, ctx.statement_sql());
32
33        if ast_unknown || fallback_unknown {
34            vec![Issue::warning(
35                issue_codes::LINT_AM_004,
36                "Query produces an unknown number of result columns.",
37            )
38            .with_statement(ctx.statement_index)]
39        } else {
40            Vec::new()
41        }
42    }
43}
44
45fn statement_has_unknown_result_columns(stmt: &Statement, outer_ctes: &CteColumnCounts) -> bool {
46    match stmt {
47        Statement::Query(query) => {
48            query_outputs_result_set(query)
49                && resolve_query_output_columns_strict(query, outer_ctes).is_none()
50        }
51        Statement::Insert(insert) => insert.source.as_ref().is_some_and(|source| {
52            resolve_query_output_columns_strict(source, outer_ctes).is_none()
53        }),
54        Statement::CreateView { query, .. } => {
55            query_outputs_result_set(query)
56                && resolve_query_output_columns_strict(query, outer_ctes).is_none()
57        }
58        Statement::CreateTable(create) => create.query.as_ref().is_some_and(|query| {
59            query_outputs_result_set(query)
60                && resolve_query_output_columns_strict(query, outer_ctes).is_none()
61        }),
62        _ => false,
63    }
64}
65
66fn query_outputs_result_set(query: &sqlparser::ast::Query) -> bool {
67    matches!(
68        query.body.as_ref(),
69        SetExpr::Select(_) | SetExpr::Query(_) | SetExpr::Values(_) | SetExpr::SetOperation { .. }
70    )
71}
72
73fn statement_has_unknown_result_columns_fallback(stmt: &Statement, sql: &str) -> bool {
74    if !is_synthetic_select_one_statement(stmt, sql) {
75        return false;
76    }
77
78    // Targeted parser-fallback heuristic for AM04 issue #930-style CTE chains:
79    // WITH a AS (SELECT * FROM ...), b AS (SELECT * FROM a) SELECT * FROM b
80    // should be treated as unknown-width output.
81    let compact = sql
82        .split_whitespace()
83        .collect::<Vec<_>>()
84        .join(" ")
85        .to_ascii_lowercase();
86
87    if !compact.starts_with("with ") {
88        return false;
89    }
90
91    let wildcard_count = compact.match_indices("select * from").count();
92    wildcard_count >= 3 && compact.contains("),") && compact.contains(" as (")
93}
94
95fn is_synthetic_select_one_statement(stmt: &Statement, statement_sql: &str) -> bool {
96    if statement_sql.trim().eq_ignore_ascii_case("select 1") {
97        return false;
98    }
99
100    let Statement::Query(query) = stmt else {
101        return false;
102    };
103    let SetExpr::Select(select) = query.body.as_ref() else {
104        return false;
105    };
106    let group_by_empty = match &select.group_by {
107        sqlparser::ast::GroupByExpr::Expressions(exprs, _) => exprs.is_empty(),
108        _ => false,
109    };
110    if !select.from.is_empty()
111        || select.selection.is_some()
112        || !group_by_empty
113        || select.having.is_some()
114    {
115        return false;
116    }
117    if select.projection.len() != 1 {
118        return false;
119    }
120
121    matches!(
122        &select.projection[0],
123        SelectItem::UnnamedExpr(Expr::Value(v))
124            if matches!(&v.value, Value::Number(num, false) if num == "1")
125    )
126}
127
128#[cfg(test)]
129mod tests {
130    use super::*;
131    use crate::parser::parse_sql;
132
133    fn run(sql: &str) -> Vec<Issue> {
134        let statements = parse_sql(sql).expect("parse");
135        let rule = AmbiguousColumnCount;
136        statements
137            .iter()
138            .enumerate()
139            .flat_map(|(index, statement)| {
140                rule.check(
141                    statement,
142                    &LintContext {
143                        sql,
144                        statement_range: 0..sql.len(),
145                        statement_index: index,
146                    },
147                )
148            })
149            .collect()
150    }
151
152    // --- Edge cases adopted from sqlfluff AM04 ---
153
154    #[test]
155    fn flags_unknown_result_columns_for_select_star_from_table() {
156        let issues = run("select * from t");
157        assert_eq!(issues.len(), 1);
158        assert_eq!(issues[0].code, issue_codes::LINT_AM_004);
159    }
160
161    #[test]
162    fn allows_known_result_columns_for_explicit_projection() {
163        let issues = run("select a, b from t");
164        assert!(issues.is_empty());
165    }
166
167    #[test]
168    fn allows_select_star_from_known_cte_columns() {
169        let issues = run("with cte as (select a, b from t) select * from cte");
170        assert!(issues.is_empty());
171    }
172
173    #[test]
174    fn allows_select_star_from_declared_cte_columns_even_if_query_uses_wildcard() {
175        let issues = run("with cte(a, b) as (select * from t) select * from cte");
176        assert!(issues.is_empty());
177    }
178
179    #[test]
180    fn flags_select_star_from_unknown_cte_columns() {
181        let issues = run("with cte as (select * from t) select * from cte");
182        assert_eq!(issues.len(), 1);
183    }
184
185    #[test]
186    fn allows_explicit_projection_even_if_cte_uses_wildcard() {
187        let issues = run("with cte as (select * from t) select a, b from cte");
188        assert!(issues.is_empty());
189    }
190
191    #[test]
192    fn flags_qualified_wildcard_from_external_source() {
193        let issues = run("select t.* from t");
194        assert_eq!(issues.len(), 1);
195    }
196
197    #[test]
198    fn allows_qualified_wildcard_from_known_derived_alias() {
199        let issues = run("select t_alias.* from (select a from t) as t_alias");
200        assert!(issues.is_empty());
201    }
202
203    #[test]
204    fn allows_qualified_wildcard_from_declared_derived_alias_columns() {
205        let issues = run("select t_alias.* from (select * from t) as t_alias(a, b)");
206        assert!(issues.is_empty());
207    }
208
209    #[test]
210    fn allows_qualified_wildcard_from_known_nested_join_alias() {
211        let issues = run(
212            "select j.* from ((select a from t1) as a1 join (select b from t2) as b1 on a1.a = b1.b) as j",
213        );
214        assert!(issues.is_empty());
215    }
216
217    #[test]
218    fn flags_qualified_wildcard_from_unknown_derived_alias() {
219        let issues = run("select t_alias.* from (select * from t) as t_alias");
220        assert_eq!(issues.len(), 1);
221    }
222
223    #[test]
224    fn allows_nested_join_wildcard_when_using_width_is_resolved() {
225        let issues =
226            run("select j.* from ((select a from t1) as a1 join (select a from t2) as b1 using (a)) as j");
227        assert!(issues.is_empty());
228    }
229
230    #[test]
231    fn allows_nested_join_wildcard_when_natural_join_width_is_resolved() {
232        let issues = run(
233            "select j.* from ((select a from t1) as a1 natural join (select a from t2) as b1) as j",
234        );
235        assert!(issues.is_empty());
236    }
237
238    #[test]
239    fn flags_nested_join_wildcard_when_natural_join_width_is_unknown() {
240        let issues = run(
241            "select j.* from ((select * from t1) as a1 natural join (select a from t2) as b1) as j",
242        );
243        assert_eq!(issues.len(), 1);
244    }
245
246    #[test]
247    fn flags_any_unknown_wildcard_in_projection() {
248        let issues = run("select *, t.*, t.a, b from t");
249        assert_eq!(issues.len(), 1);
250    }
251
252    #[test]
253    fn flags_set_operation_with_unknown_wildcard_branch() {
254        let issues = run("select a from t1 union all select * from t2");
255        assert_eq!(issues.len(), 1);
256    }
257
258    #[test]
259    fn allows_set_operation_with_known_columns() {
260        let issues = run("select a from t1 union all select b from t2");
261        assert!(issues.is_empty());
262    }
263
264    #[test]
265    fn flags_nested_cte_unknown_column_chain() {
266        let issues = run("with a as (with b as (select * from c) select * from b) select * from a");
267        assert_eq!(issues.len(), 1);
268    }
269
270    #[test]
271    fn allows_non_select_statement_without_query_body() {
272        let issues = run("create table my_table (id integer)");
273        assert!(issues.is_empty());
274    }
275
276    #[test]
277    fn flags_select_star_without_from_source() {
278        let issues = run("select *");
279        assert_eq!(issues.len(), 1);
280    }
281
282    #[test]
283    fn allows_unresolved_qualified_wildcard_for_non_am04_concerns() {
284        let sql = "with cte as (\n    select\n        a, b\n    from\n        t\n)\nselect\n    cte.*,\n    t_alias.a\nfrom cte1\njoin (select * from t) as t_alias\nusing (a)\n";
285        let issues = run(sql);
286        assert!(issues.is_empty());
287    }
288
289    #[test]
290    fn allows_with_update_statement_without_select_output() {
291        let sql = "WITH mycte AS ( SELECT foo, bar FROM mytable1 )\nUPDATE sometable SET sometable.baz = mycte.bar FROM mycte;";
292        let issues = run(sql);
293        assert!(issues.is_empty());
294    }
295
296    #[test]
297    fn statementless_fallback_flags_unknown_cte_wildcard_chain() {
298        let sql = "with\nhubspot__contacts as (\n  select * from ANALYTICS.PUBLIC_intermediate.hubspot__contacts\n),\nfinal as (\n  select *\n  from\n    hubspot__contacts\n    where not coalesce(_fivetran_deleted, false)\n)\nselect * from final\n";
299        let synthetic = parse_sql("SELECT 1").expect("parse");
300        let rule = AmbiguousColumnCount;
301        let issues = rule.check(
302            &synthetic[0],
303            &LintContext {
304                sql,
305                statement_range: 0..sql.len(),
306                statement_index: 0,
307            },
308        );
309        assert_eq!(issues.len(), 1);
310        assert_eq!(issues[0].code, issue_codes::LINT_AM_004);
311    }
312}