Skip to main content

flowscope_core/linter/rules/
cv_003.rs

1//! LINT_CV_003: Select trailing comma.
2//!
3//! Avoid trailing comma before FROM.
4
5use crate::linter::config::LintConfig;
6use crate::linter::rule::{LintContext, LintRule};
7use crate::linter::rules::semantic_helpers::visit_selects_in_statement;
8use crate::types::{issue_codes, Dialect, Issue, IssueAutofixApplicability, IssuePatchEdit};
9use sqlparser::ast::{GroupByExpr, Select, SelectItem, Spanned, Statement};
10use sqlparser::tokenizer::{Token, TokenWithSpan, Tokenizer, Whitespace};
11
12#[derive(Clone, Copy, Debug, Eq, PartialEq)]
13enum SelectClauseTrailingCommaPolicy {
14    Forbid,
15    Require,
16}
17
18impl SelectClauseTrailingCommaPolicy {
19    fn from_config(config: &LintConfig) -> Self {
20        match config
21            .rule_option_str(issue_codes::LINT_CV_003, "select_clause_trailing_comma")
22            .unwrap_or("forbid")
23            .to_ascii_lowercase()
24            .as_str()
25        {
26            "require" => Self::Require,
27            _ => Self::Forbid,
28        }
29    }
30
31    fn violated(self, trailing_comma_present: bool) -> bool {
32        match self {
33            Self::Forbid => trailing_comma_present,
34            Self::Require => !trailing_comma_present,
35        }
36    }
37
38    fn message(self) -> &'static str {
39        match self {
40            Self::Forbid => "Avoid trailing comma before FROM in SELECT clause.",
41            Self::Require => "Use trailing comma before FROM in SELECT clause.",
42        }
43    }
44}
45
46pub struct ConventionSelectTrailingComma {
47    policy: SelectClauseTrailingCommaPolicy,
48}
49
50impl ConventionSelectTrailingComma {
51    pub fn from_config(config: &LintConfig) -> Self {
52        Self {
53            policy: SelectClauseTrailingCommaPolicy::from_config(config),
54        }
55    }
56}
57
58impl Default for ConventionSelectTrailingComma {
59    fn default() -> Self {
60        Self {
61            policy: SelectClauseTrailingCommaPolicy::Forbid,
62        }
63    }
64}
65
66impl LintRule for ConventionSelectTrailingComma {
67    fn code(&self) -> &'static str {
68        issue_codes::LINT_CV_003
69    }
70
71    fn name(&self) -> &'static str {
72        "Select trailing comma"
73    }
74
75    fn description(&self) -> &'static str {
76        "Trailing commas within select clause."
77    }
78
79    fn check(&self, statement: &Statement, ctx: &LintContext) -> Vec<Issue> {
80        let tokens =
81            tokenized_for_context(ctx).or_else(|| tokenized(ctx.statement_sql(), ctx.dialect()));
82        let violations = select_clause_policy_violations(
83            statement,
84            ctx.statement_sql(),
85            self.policy,
86            tokens.as_deref(),
87        );
88        let Some(first_violation) = violations.first().copied() else {
89            return Vec::new();
90        };
91
92        let mut issue = Issue::warning(issue_codes::LINT_CV_003, self.policy.message())
93            .with_statement(ctx.statement_index)
94            .with_span(ctx.span_from_statement_offset(
95                first_violation.issue_start,
96                first_violation.issue_end,
97            ));
98
99        let edits: Vec<IssuePatchEdit> = violations
100            .iter()
101            .filter_map(|violation| violation.fix)
102            .map(|edit| {
103                IssuePatchEdit::new(
104                    ctx.span_from_statement_offset(edit.start, edit.end),
105                    edit.replacement,
106                )
107            })
108            .collect();
109        if !edits.is_empty() {
110            issue = issue.with_autofix_edits(IssueAutofixApplicability::Safe, edits);
111        }
112
113        vec![issue]
114    }
115}
116
117#[derive(Clone, Copy, Debug, Eq, PartialEq)]
118struct SelectClauseEdit {
119    start: usize,
120    end: usize,
121    replacement: &'static str,
122}
123
124#[derive(Clone, Copy, Debug, Eq, PartialEq)]
125struct SelectClauseViolation {
126    issue_start: usize,
127    issue_end: usize,
128    fix: Option<SelectClauseEdit>,
129}
130
131fn select_clause_policy_violations(
132    statement: &Statement,
133    sql: &str,
134    policy: SelectClauseTrailingCommaPolicy,
135    tokens: Option<&[LocatedToken]>,
136) -> Vec<SelectClauseViolation> {
137    let mut violations = Vec::new();
138    visit_selects_in_statement(statement, &mut |select| {
139        if let Some(violation) = select_clause_violation(select, sql, policy, tokens) {
140            violations.push(violation);
141        }
142    });
143    violations
144}
145
146fn select_clause_violation(
147    select: &Select,
148    sql: &str,
149    policy: SelectClauseTrailingCommaPolicy,
150    tokens: Option<&[LocatedToken]>,
151) -> Option<SelectClauseViolation> {
152    let last_projection_end = select_last_projection_end(select)?;
153    let last_projection_end_offset = line_col_to_offset(
154        sql,
155        last_projection_end.0 as usize,
156        last_projection_end.1 as usize,
157    )?;
158
159    let boundary = select_clause_boundary(select).or_else(|| span_end(select.span()))?;
160    let boundary_offset = line_col_to_offset(sql, boundary.0 as usize, boundary.1 as usize)?;
161    if boundary_offset < last_projection_end_offset {
162        return None;
163    }
164
165    let significant_token =
166        first_significant_token_between(tokens, last_projection_end_offset, boundary_offset);
167    let has_trailing_comma = significant_token
168        .as_ref()
169        .is_some_and(|token| matches!(token.token, Token::Comma));
170    if !policy.violated(has_trailing_comma) {
171        return None;
172    }
173
174    match policy {
175        SelectClauseTrailingCommaPolicy::Forbid => {
176            let comma = significant_token
177                .as_ref()
178                .copied()
179                .filter(|token| matches!(token.token, Token::Comma))?;
180            Some(SelectClauseViolation {
181                issue_start: comma.start,
182                issue_end: comma.end,
183                fix: Some(SelectClauseEdit {
184                    start: comma.start,
185                    end: comma.end,
186                    replacement: "",
187                }),
188            })
189        }
190        SelectClauseTrailingCommaPolicy::Require => {
191            // Without a following clause boundary, adding a terminal comma may
192            // produce invalid SQL (`SELECT 1,`), so report-only in this shape.
193            if boundary_offset == last_projection_end_offset {
194                return Some(SelectClauseViolation {
195                    issue_start: last_projection_end_offset,
196                    issue_end: last_projection_end_offset,
197                    fix: None,
198                });
199            }
200
201            Some(SelectClauseViolation {
202                issue_start: last_projection_end_offset,
203                issue_end: last_projection_end_offset,
204                fix: Some(SelectClauseEdit {
205                    start: last_projection_end_offset,
206                    end: last_projection_end_offset,
207                    replacement: ",",
208                }),
209            })
210        }
211    }
212}
213
214fn select_last_projection_end(select: &Select) -> Option<(u64, u64)> {
215    let item = select.projection.last()?;
216    match item {
217        SelectItem::ExprWithAlias { alias, .. } => span_end(alias.span),
218        SelectItem::UnnamedExpr(expr) => span_end(expr.span()),
219        SelectItem::Wildcard(_) | SelectItem::QualifiedWildcard(_, _) => span_end(item.span()),
220    }
221}
222
223fn select_clause_boundary(select: &Select) -> Option<(u64, u64)> {
224    let mut candidates = Vec::new();
225
226    if let Some(first_from) = select.from.first() {
227        if let Some(start) = span_start(first_from.relation.span()) {
228            candidates.push(start);
229        }
230    }
231
232    if let Some(into) = &select.into {
233        if let Some(start) = span_start(into.span()) {
234            candidates.push(start);
235        }
236    }
237
238    if let Some(expr) = &select.prewhere {
239        if let Some(start) = span_start(expr.span()) {
240            candidates.push(start);
241        }
242    }
243    if let Some(expr) = &select.selection {
244        if let Some(start) = span_start(expr.span()) {
245            candidates.push(start);
246        }
247    }
248    if let Some(expr) = &select.having {
249        if let Some(start) = span_start(expr.span()) {
250            candidates.push(start);
251        }
252    }
253    if let Some(expr) = &select.qualify {
254        if let Some(start) = span_start(expr.span()) {
255            candidates.push(start);
256        }
257    }
258    if let Some(start) = select
259        .connect_by
260        .first()
261        .and_then(|cb| span_start(cb.span()))
262    {
263        candidates.push(start);
264    }
265
266    if let GroupByExpr::Expressions(exprs, _) = &select.group_by {
267        if let Some(start) = exprs.first().and_then(|expr| span_start(expr.span())) {
268            candidates.push(start);
269        }
270    }
271    if let Some(start) = select
272        .cluster_by
273        .first()
274        .and_then(|expr| span_start(expr.span()))
275    {
276        candidates.push(start);
277    }
278    if let Some(start) = select
279        .distribute_by
280        .first()
281        .and_then(|expr| span_start(expr.span()))
282    {
283        candidates.push(start);
284    }
285    if let Some(start) = select
286        .sort_by
287        .first()
288        .and_then(|expr| span_start(expr.expr.span()))
289    {
290        candidates.push(start);
291    }
292    if let Some(start) = select
293        .named_window
294        .first()
295        .and_then(|window| span_start(window.span()))
296    {
297        candidates.push(start);
298    }
299
300    candidates.into_iter().min()
301}
302
303fn span_start(span: sqlparser::tokenizer::Span) -> Option<(u64, u64)> {
304    if span.start.line == 0 || span.start.column == 0 {
305        None
306    } else {
307        Some((span.start.line, span.start.column))
308    }
309}
310
311fn span_end(span: sqlparser::tokenizer::Span) -> Option<(u64, u64)> {
312    if span.end.line == 0 || span.end.column == 0 {
313        None
314    } else {
315        Some((span.end.line, span.end.column))
316    }
317}
318
319fn first_significant_token_between(
320    tokens: Option<&[LocatedToken]>,
321    start: usize,
322    end: usize,
323) -> Option<&LocatedToken> {
324    let tokens = tokens?;
325
326    for token in tokens {
327        if token.end <= start || token.start >= end {
328            continue;
329        }
330        if is_trivia_token(&token.token) {
331            continue;
332        }
333        return Some(token);
334    }
335    None
336}
337
338#[derive(Clone)]
339struct LocatedToken {
340    token: Token,
341    start: usize,
342    end: usize,
343}
344
345fn tokenized(sql: &str, dialect: Dialect) -> Option<Vec<LocatedToken>> {
346    let dialect = dialect.to_sqlparser_dialect();
347    let mut tokenizer = Tokenizer::new(dialect.as_ref(), sql);
348    let tokens = tokenizer.tokenize_with_location().ok()?;
349
350    let mut out = Vec::with_capacity(tokens.len());
351    for token in tokens {
352        let Some((start, end)) = token_with_span_offsets(sql, &token) else {
353            continue;
354        };
355        out.push(LocatedToken {
356            token: token.token,
357            start,
358            end,
359        });
360    }
361    Some(out)
362}
363
364fn tokenized_for_context(ctx: &LintContext) -> Option<Vec<LocatedToken>> {
365    let statement_start = ctx.statement_range.start;
366    let from_document = ctx.with_document_tokens(|tokens| {
367        if tokens.is_empty() {
368            return None;
369        }
370
371        Some(
372            tokens
373                .iter()
374                .filter_map(|token| {
375                    let (start, end) = token_with_span_offsets(ctx.sql, token)?;
376                    if start < ctx.statement_range.start || end > ctx.statement_range.end {
377                        return None;
378                    }
379
380                    Some(LocatedToken {
381                        token: token.token.clone(),
382                        start: start - statement_start,
383                        end: end - statement_start,
384                    })
385                })
386                .collect::<Vec<_>>(),
387        )
388    });
389
390    if let Some(tokens) = from_document {
391        return Some(tokens);
392    }
393
394    tokenized(ctx.statement_sql(), ctx.dialect())
395}
396
397fn token_with_span_offsets(sql: &str, token: &TokenWithSpan) -> Option<(usize, usize)> {
398    let start = line_col_to_offset(
399        sql,
400        token.span.start.line as usize,
401        token.span.start.column as usize,
402    )?;
403    let end = line_col_to_offset(
404        sql,
405        token.span.end.line as usize,
406        token.span.end.column as usize,
407    )?;
408    Some((start, end))
409}
410
411fn is_trivia_token(token: &Token) -> bool {
412    matches!(
413        token,
414        Token::Whitespace(Whitespace::Space | Whitespace::Tab | Whitespace::Newline)
415            | Token::Whitespace(Whitespace::SingleLineComment { .. })
416            | Token::Whitespace(Whitespace::MultiLineComment(_))
417    )
418}
419
420fn line_col_to_offset(sql: &str, line: usize, column: usize) -> Option<usize> {
421    if line == 0 || column == 0 {
422        return None;
423    }
424    let mut current_line = 1usize;
425    let mut current_col = 1usize;
426    for (idx, ch) in sql.char_indices() {
427        if current_line == line && current_col == column {
428            return Some(idx);
429        }
430        if ch == '\n' {
431            current_line += 1;
432            current_col = 1;
433        } else {
434            current_col += 1;
435        }
436    }
437    if current_line == line && current_col == column {
438        return Some(sql.len());
439    }
440    None
441}
442
443#[cfg(test)]
444mod tests {
445    use super::*;
446    use crate::linter::config::LintConfig;
447    use crate::parser::parse_sql;
448    use crate::types::IssueAutofixApplicability;
449
450    fn run(sql: &str) -> Vec<Issue> {
451        run_with_rule(sql, ConventionSelectTrailingComma::default())
452    }
453
454    fn run_with_rule(sql: &str, rule: ConventionSelectTrailingComma) -> Vec<Issue> {
455        let stmts = parse_sql(sql).expect("parse");
456        stmts
457            .iter()
458            .enumerate()
459            .flat_map(|(index, stmt)| {
460                rule.check(
461                    stmt,
462                    &LintContext {
463                        sql,
464                        statement_range: 0..sql.len(),
465                        statement_index: index,
466                    },
467                )
468            })
469            .collect()
470    }
471
472    fn apply_issue_autofix(sql: &str, issue: &Issue) -> Option<String> {
473        let autofix = issue.autofix.as_ref()?;
474        let mut out = sql.to_string();
475        let mut edits = autofix.edits.clone();
476        edits.sort_by_key(|edit| (edit.span.start, edit.span.end));
477        for edit in edits.iter().rev() {
478            out.replace_range(edit.span.start..edit.span.end, &edit.replacement);
479        }
480        Some(out)
481    }
482
483    #[test]
484    fn flags_trailing_comma_before_from() {
485        let issues = run("select a, from t");
486        assert_eq!(issues.len(), 1);
487        assert_eq!(issues[0].code, issue_codes::LINT_CV_003);
488        let autofix = issues[0].autofix.as_ref().expect("autofix metadata");
489        assert_eq!(autofix.applicability, IssueAutofixApplicability::Safe);
490        assert_eq!(autofix.edits.len(), 1);
491        assert_eq!(autofix.edits[0].replacement, "");
492        let fixed = apply_issue_autofix("select a, from t", &issues[0]).expect("apply autofix");
493        assert_eq!(fixed, "select a from t");
494    }
495
496    #[test]
497    fn wildcard_select_without_trailing_comma_is_allowed() {
498        let issues = run("SELECT * FROM t");
499        assert!(issues.is_empty());
500    }
501
502    #[test]
503    fn wildcard_select_with_trailing_comma_is_flagged() {
504        let issues = run("SELECT *, FROM t");
505        assert_eq!(issues.len(), 1);
506        assert_eq!(issues[0].code, issue_codes::LINT_CV_003);
507    }
508
509    #[test]
510    fn allows_no_trailing_comma() {
511        let issues = run("select a from t");
512        assert!(issues.is_empty());
513    }
514
515    #[test]
516    fn flags_nested_select_trailing_comma() {
517        let issues = run("SELECT (SELECT a, FROM t) AS x FROM outer_t");
518        assert_eq!(issues.len(), 1);
519        assert_eq!(issues[0].code, issue_codes::LINT_CV_003);
520    }
521
522    #[test]
523    fn does_not_flag_comma_in_string_or_comment() {
524        let issues = run("SELECT 'a, from t' AS txt -- select a, from t\nFROM t");
525        assert!(issues.is_empty());
526    }
527
528    #[test]
529    fn require_policy_flags_missing_trailing_comma() {
530        let config = LintConfig {
531            enabled: true,
532            disabled_rules: vec![],
533            rule_configs: std::collections::BTreeMap::from([(
534                "convention.select_trailing_comma".to_string(),
535                serde_json::json!({"select_clause_trailing_comma": "require"}),
536            )]),
537        };
538        let rule = ConventionSelectTrailingComma::from_config(&config);
539        let issues = run_with_rule("SELECT a FROM t", rule);
540        assert_eq!(issues.len(), 1);
541        let autofix = issues[0].autofix.as_ref().expect("autofix metadata");
542        assert_eq!(autofix.applicability, IssueAutofixApplicability::Safe);
543        assert_eq!(autofix.edits.len(), 1);
544        assert_eq!(autofix.edits[0].replacement, ",");
545        let fixed = apply_issue_autofix("SELECT a FROM t", &issues[0]).expect("apply autofix");
546        assert_eq!(fixed, "SELECT a, FROM t");
547    }
548
549    #[test]
550    fn require_policy_allows_trailing_comma() {
551        let config = LintConfig {
552            enabled: true,
553            disabled_rules: vec![],
554            rule_configs: std::collections::BTreeMap::from([(
555                "LINT_CV_003".to_string(),
556                serde_json::json!({"select_clause_trailing_comma": "require"}),
557            )]),
558        };
559        let rule = ConventionSelectTrailingComma::from_config(&config);
560        let issues = run_with_rule("SELECT a, FROM t", rule);
561        assert!(issues.is_empty());
562    }
563
564    #[test]
565    fn require_policy_flags_select_without_from() {
566        let config = LintConfig {
567            enabled: true,
568            disabled_rules: vec![],
569            rule_configs: std::collections::BTreeMap::from([(
570                "convention.select_trailing_comma".to_string(),
571                serde_json::json!({"select_clause_trailing_comma": "require"}),
572            )]),
573        };
574        let rule = ConventionSelectTrailingComma::from_config(&config);
575        let issues = run_with_rule("SELECT 1", rule);
576        assert_eq!(issues.len(), 1);
577        assert!(
578            issues[0].autofix.is_none(),
579            "require-mode SELECT without clause boundary should remain report-only"
580        );
581    }
582}