flowscope_core/linter/rules/
am_004.rs1use crate::linter::rule::{LintContext, LintRule};
7use crate::types::{issue_codes, Issue};
8use sqlparser::ast::{Expr, SelectItem, SetExpr, Statement, Value};
9use std::collections::HashMap;
10
11use super::column_count_helpers::{resolve_query_output_columns_strict, CteColumnCounts};
12
13pub struct AmbiguousColumnCount;
14
15impl LintRule for AmbiguousColumnCount {
16 fn code(&self) -> &'static str {
17 issue_codes::LINT_AM_004
18 }
19
20 fn name(&self) -> &'static str {
21 "Ambiguous column count"
22 }
23
24 fn description(&self) -> &'static str {
25 "Query produces an unknown number of result columns."
26 }
27
28 fn check(&self, stmt: &Statement, ctx: &LintContext) -> Vec<Issue> {
29 let ast_unknown = statement_has_unknown_result_columns(stmt, &HashMap::new());
30 let fallback_unknown = !ast_unknown
31 && statement_has_unknown_result_columns_fallback(stmt, ctx.statement_sql());
32
33 if ast_unknown || fallback_unknown {
34 vec![Issue::warning(
35 issue_codes::LINT_AM_004,
36 "Query produces an unknown number of result columns.",
37 )
38 .with_statement(ctx.statement_index)]
39 } else {
40 Vec::new()
41 }
42 }
43}
44
45fn statement_has_unknown_result_columns(stmt: &Statement, outer_ctes: &CteColumnCounts) -> bool {
46 match stmt {
47 Statement::Query(query) => {
48 query_outputs_result_set(query)
49 && resolve_query_output_columns_strict(query, outer_ctes).is_none()
50 }
51 Statement::Insert(insert) => insert.source.as_ref().is_some_and(|source| {
52 resolve_query_output_columns_strict(source, outer_ctes).is_none()
53 }),
54 Statement::CreateView { query, .. } => {
55 query_outputs_result_set(query)
56 && resolve_query_output_columns_strict(query, outer_ctes).is_none()
57 }
58 Statement::CreateTable(create) => create.query.as_ref().is_some_and(|query| {
59 query_outputs_result_set(query)
60 && resolve_query_output_columns_strict(query, outer_ctes).is_none()
61 }),
62 _ => false,
63 }
64}
65
66fn query_outputs_result_set(query: &sqlparser::ast::Query) -> bool {
67 matches!(
68 query.body.as_ref(),
69 SetExpr::Select(_) | SetExpr::Query(_) | SetExpr::Values(_) | SetExpr::SetOperation { .. }
70 )
71}
72
73fn statement_has_unknown_result_columns_fallback(stmt: &Statement, sql: &str) -> bool {
74 if !is_synthetic_select_one_statement(stmt, sql) {
75 return false;
76 }
77
78 let compact = sql
82 .split_whitespace()
83 .collect::<Vec<_>>()
84 .join(" ")
85 .to_ascii_lowercase();
86
87 if !compact.starts_with("with ") {
88 return false;
89 }
90
91 let wildcard_count = compact.match_indices("select * from").count();
92 wildcard_count >= 3 && compact.contains("),") && compact.contains(" as (")
93}
94
95fn is_synthetic_select_one_statement(stmt: &Statement, statement_sql: &str) -> bool {
96 if statement_sql.trim().eq_ignore_ascii_case("select 1") {
97 return false;
98 }
99
100 let Statement::Query(query) = stmt else {
101 return false;
102 };
103 let SetExpr::Select(select) = query.body.as_ref() else {
104 return false;
105 };
106 let group_by_empty = match &select.group_by {
107 sqlparser::ast::GroupByExpr::Expressions(exprs, _) => exprs.is_empty(),
108 _ => false,
109 };
110 if !select.from.is_empty()
111 || select.selection.is_some()
112 || !group_by_empty
113 || select.having.is_some()
114 {
115 return false;
116 }
117 if select.projection.len() != 1 {
118 return false;
119 }
120
121 matches!(
122 &select.projection[0],
123 SelectItem::UnnamedExpr(Expr::Value(v))
124 if matches!(&v.value, Value::Number(num, false) if num == "1")
125 )
126}
127
128#[cfg(test)]
129mod tests {
130 use super::*;
131 use crate::parser::parse_sql;
132
133 fn run(sql: &str) -> Vec<Issue> {
134 let statements = parse_sql(sql).expect("parse");
135 let rule = AmbiguousColumnCount;
136 statements
137 .iter()
138 .enumerate()
139 .flat_map(|(index, statement)| {
140 rule.check(
141 statement,
142 &LintContext {
143 sql,
144 statement_range: 0..sql.len(),
145 statement_index: index,
146 },
147 )
148 })
149 .collect()
150 }
151
152 #[test]
155 fn flags_unknown_result_columns_for_select_star_from_table() {
156 let issues = run("select * from t");
157 assert_eq!(issues.len(), 1);
158 assert_eq!(issues[0].code, issue_codes::LINT_AM_004);
159 }
160
161 #[test]
162 fn allows_known_result_columns_for_explicit_projection() {
163 let issues = run("select a, b from t");
164 assert!(issues.is_empty());
165 }
166
167 #[test]
168 fn allows_select_star_from_known_cte_columns() {
169 let issues = run("with cte as (select a, b from t) select * from cte");
170 assert!(issues.is_empty());
171 }
172
173 #[test]
174 fn allows_select_star_from_declared_cte_columns_even_if_query_uses_wildcard() {
175 let issues = run("with cte(a, b) as (select * from t) select * from cte");
176 assert!(issues.is_empty());
177 }
178
179 #[test]
180 fn flags_select_star_from_unknown_cte_columns() {
181 let issues = run("with cte as (select * from t) select * from cte");
182 assert_eq!(issues.len(), 1);
183 }
184
185 #[test]
186 fn allows_explicit_projection_even_if_cte_uses_wildcard() {
187 let issues = run("with cte as (select * from t) select a, b from cte");
188 assert!(issues.is_empty());
189 }
190
191 #[test]
192 fn flags_qualified_wildcard_from_external_source() {
193 let issues = run("select t.* from t");
194 assert_eq!(issues.len(), 1);
195 }
196
197 #[test]
198 fn allows_qualified_wildcard_from_known_derived_alias() {
199 let issues = run("select t_alias.* from (select a from t) as t_alias");
200 assert!(issues.is_empty());
201 }
202
203 #[test]
204 fn allows_qualified_wildcard_from_declared_derived_alias_columns() {
205 let issues = run("select t_alias.* from (select * from t) as t_alias(a, b)");
206 assert!(issues.is_empty());
207 }
208
209 #[test]
210 fn allows_qualified_wildcard_from_known_nested_join_alias() {
211 let issues = run(
212 "select j.* from ((select a from t1) as a1 join (select b from t2) as b1 on a1.a = b1.b) as j",
213 );
214 assert!(issues.is_empty());
215 }
216
217 #[test]
218 fn flags_qualified_wildcard_from_unknown_derived_alias() {
219 let issues = run("select t_alias.* from (select * from t) as t_alias");
220 assert_eq!(issues.len(), 1);
221 }
222
223 #[test]
224 fn allows_nested_join_wildcard_when_using_width_is_resolved() {
225 let issues =
226 run("select j.* from ((select a from t1) as a1 join (select a from t2) as b1 using (a)) as j");
227 assert!(issues.is_empty());
228 }
229
230 #[test]
231 fn allows_nested_join_wildcard_when_natural_join_width_is_resolved() {
232 let issues = run(
233 "select j.* from ((select a from t1) as a1 natural join (select a from t2) as b1) as j",
234 );
235 assert!(issues.is_empty());
236 }
237
238 #[test]
239 fn flags_nested_join_wildcard_when_natural_join_width_is_unknown() {
240 let issues = run(
241 "select j.* from ((select * from t1) as a1 natural join (select a from t2) as b1) as j",
242 );
243 assert_eq!(issues.len(), 1);
244 }
245
246 #[test]
247 fn flags_any_unknown_wildcard_in_projection() {
248 let issues = run("select *, t.*, t.a, b from t");
249 assert_eq!(issues.len(), 1);
250 }
251
252 #[test]
253 fn flags_set_operation_with_unknown_wildcard_branch() {
254 let issues = run("select a from t1 union all select * from t2");
255 assert_eq!(issues.len(), 1);
256 }
257
258 #[test]
259 fn allows_set_operation_with_known_columns() {
260 let issues = run("select a from t1 union all select b from t2");
261 assert!(issues.is_empty());
262 }
263
264 #[test]
265 fn flags_nested_cte_unknown_column_chain() {
266 let issues = run("with a as (with b as (select * from c) select * from b) select * from a");
267 assert_eq!(issues.len(), 1);
268 }
269
270 #[test]
271 fn allows_non_select_statement_without_query_body() {
272 let issues = run("create table my_table (id integer)");
273 assert!(issues.is_empty());
274 }
275
276 #[test]
277 fn flags_select_star_without_from_source() {
278 let issues = run("select *");
279 assert_eq!(issues.len(), 1);
280 }
281
282 #[test]
283 fn allows_unresolved_qualified_wildcard_for_non_am04_concerns() {
284 let sql = "with cte as (\n select\n a, b\n from\n t\n)\nselect\n cte.*,\n t_alias.a\nfrom cte1\njoin (select * from t) as t_alias\nusing (a)\n";
285 let issues = run(sql);
286 assert!(issues.is_empty());
287 }
288
289 #[test]
290 fn allows_with_update_statement_without_select_output() {
291 let sql = "WITH mycte AS ( SELECT foo, bar FROM mytable1 )\nUPDATE sometable SET sometable.baz = mycte.bar FROM mycte;";
292 let issues = run(sql);
293 assert!(issues.is_empty());
294 }
295
296 #[test]
297 fn statementless_fallback_flags_unknown_cte_wildcard_chain() {
298 let sql = "with\nhubspot__contacts as (\n select * from ANALYTICS.PUBLIC_intermediate.hubspot__contacts\n),\nfinal as (\n select *\n from\n hubspot__contacts\n where not coalesce(_fivetran_deleted, false)\n)\nselect * from final\n";
299 let synthetic = parse_sql("SELECT 1").expect("parse");
300 let rule = AmbiguousColumnCount;
301 let issues = rule.check(
302 &synthetic[0],
303 &LintContext {
304 sql,
305 statement_range: 0..sql.len(),
306 statement_index: 0,
307 },
308 );
309 assert_eq!(issues.len(), 1);
310 assert_eq!(issues[0].code, issue_codes::LINT_AM_004);
311 }
312}