Skip to main content

flowscope_core/linter/rules/
cv_003.rs

1//! LINT_CV_003: Select trailing comma.
2//!
3//! Avoid trailing comma before FROM.
4
5use crate::linter::config::LintConfig;
6use crate::linter::rule::{LintContext, LintRule};
7use crate::linter::rules::semantic_helpers::visit_selects_in_statement;
8use crate::types::{issue_codes, Dialect, Issue, IssueAutofixApplicability, IssuePatchEdit};
9use sqlparser::ast::{GroupByExpr, Select, SelectItem, Spanned, Statement};
10use sqlparser::tokenizer::{Token, TokenWithSpan, Tokenizer, Whitespace};
11
12#[derive(Clone, Copy, Debug, Eq, PartialEq)]
13enum SelectClauseTrailingCommaPolicy {
14    Forbid,
15    Require,
16}
17
18impl SelectClauseTrailingCommaPolicy {
19    fn from_config(config: &LintConfig) -> Self {
20        match config
21            .rule_option_str(issue_codes::LINT_CV_003, "select_clause_trailing_comma")
22            .unwrap_or("forbid")
23            .to_ascii_lowercase()
24            .as_str()
25        {
26            "require" => Self::Require,
27            _ => Self::Forbid,
28        }
29    }
30
31    fn violated(self, trailing_comma_present: bool) -> bool {
32        match self {
33            Self::Forbid => trailing_comma_present,
34            Self::Require => !trailing_comma_present,
35        }
36    }
37
38    fn message(self) -> &'static str {
39        match self {
40            Self::Forbid => "Avoid trailing comma before FROM in SELECT clause.",
41            Self::Require => "Use trailing comma before FROM in SELECT clause.",
42        }
43    }
44}
45
46pub struct ConventionSelectTrailingComma {
47    policy: SelectClauseTrailingCommaPolicy,
48}
49
50impl ConventionSelectTrailingComma {
51    pub fn from_config(config: &LintConfig) -> Self {
52        Self {
53            policy: SelectClauseTrailingCommaPolicy::from_config(config),
54        }
55    }
56}
57
58impl Default for ConventionSelectTrailingComma {
59    fn default() -> Self {
60        Self {
61            policy: SelectClauseTrailingCommaPolicy::Forbid,
62        }
63    }
64}
65
66impl LintRule for ConventionSelectTrailingComma {
67    fn code(&self) -> &'static str {
68        issue_codes::LINT_CV_003
69    }
70
71    fn name(&self) -> &'static str {
72        "Select trailing comma"
73    }
74
75    fn description(&self) -> &'static str {
76        "Trailing commas within select clause."
77    }
78
79    fn check(&self, statement: &Statement, ctx: &LintContext) -> Vec<Issue> {
80        let tokens =
81            tokenized_for_context(ctx).or_else(|| tokenized(ctx.statement_sql(), ctx.dialect()));
82        let violations = select_clause_policy_violations(
83            statement,
84            ctx.statement_sql(),
85            self.policy,
86            tokens.as_deref(),
87        );
88        let Some(first_violation) = violations.first().copied() else {
89            return Vec::new();
90        };
91
92        let mut issue = Issue::warning(issue_codes::LINT_CV_003, self.policy.message())
93            .with_statement(ctx.statement_index)
94            .with_span(ctx.span_from_statement_offset(
95                first_violation.issue_start,
96                first_violation.issue_end,
97            ));
98
99        let edits: Vec<IssuePatchEdit> = violations
100            .iter()
101            .filter_map(|violation| violation.fix)
102            .map(|edit| {
103                IssuePatchEdit::new(
104                    ctx.span_from_statement_offset(edit.start, edit.end),
105                    edit.replacement,
106                )
107            })
108            .collect();
109        if !edits.is_empty() {
110            issue = issue.with_autofix_edits(IssueAutofixApplicability::Safe, edits);
111        }
112
113        vec![issue]
114    }
115}
116
117#[derive(Clone, Copy, Debug, Eq, PartialEq)]
118struct SelectClauseEdit {
119    start: usize,
120    end: usize,
121    replacement: &'static str,
122}
123
124#[derive(Clone, Copy, Debug, Eq, PartialEq)]
125struct SelectClauseViolation {
126    issue_start: usize,
127    issue_end: usize,
128    fix: Option<SelectClauseEdit>,
129}
130
131fn select_clause_policy_violations(
132    statement: &Statement,
133    sql: &str,
134    policy: SelectClauseTrailingCommaPolicy,
135    tokens: Option<&[LocatedToken]>,
136) -> Vec<SelectClauseViolation> {
137    let mut violations = Vec::new();
138    visit_selects_in_statement(statement, &mut |select| {
139        if let Some(violation) = select_clause_violation(select, sql, policy, tokens) {
140            violations.push(violation);
141        }
142    });
143    violations
144}
145
146fn select_clause_violation(
147    select: &Select,
148    sql: &str,
149    policy: SelectClauseTrailingCommaPolicy,
150    tokens: Option<&[LocatedToken]>,
151) -> Option<SelectClauseViolation> {
152    let last_projection_end = select_last_projection_end(select)?;
153    let last_projection_end_offset = line_col_to_offset(
154        sql,
155        last_projection_end.0 as usize,
156        last_projection_end.1 as usize,
157    )?;
158
159    let boundary = select_clause_boundary(select).or_else(|| span_end(select.span()))?;
160    let boundary_offset = line_col_to_offset(sql, boundary.0 as usize, boundary.1 as usize)?;
161    if boundary_offset < last_projection_end_offset {
162        return None;
163    }
164
165    let significant_token =
166        first_significant_token_between(tokens, last_projection_end_offset, boundary_offset);
167    let has_trailing_comma = significant_token
168        .as_ref()
169        .is_some_and(|token| matches!(token.token, Token::Comma));
170    if !policy.violated(has_trailing_comma) {
171        return None;
172    }
173
174    match policy {
175        SelectClauseTrailingCommaPolicy::Forbid => {
176            let comma = significant_token
177                .as_ref()
178                .copied()
179                .filter(|token| matches!(token.token, Token::Comma))?;
180            Some(SelectClauseViolation {
181                issue_start: comma.start,
182                issue_end: comma.end,
183                fix: Some(SelectClauseEdit {
184                    start: comma.start,
185                    end: comma.end,
186                    replacement: "",
187                }),
188            })
189        }
190        SelectClauseTrailingCommaPolicy::Require => {
191            // Without a following clause boundary, adding a terminal comma may
192            // produce invalid SQL (`SELECT 1,`), so report-only in this shape.
193            if boundary_offset == last_projection_end_offset {
194                return Some(SelectClauseViolation {
195                    issue_start: last_projection_end_offset,
196                    issue_end: last_projection_end_offset,
197                    fix: None,
198                });
199            }
200
201            Some(SelectClauseViolation {
202                issue_start: last_projection_end_offset,
203                issue_end: last_projection_end_offset,
204                fix: Some(SelectClauseEdit {
205                    start: last_projection_end_offset,
206                    end: last_projection_end_offset,
207                    replacement: ",",
208                }),
209            })
210        }
211    }
212}
213
214fn select_last_projection_end(select: &Select) -> Option<(u64, u64)> {
215    let item = select.projection.last()?;
216    match item {
217        SelectItem::ExprWithAlias { alias, .. } => span_end(alias.span),
218        SelectItem::UnnamedExpr(expr) => span_end(expr.span()),
219        SelectItem::Wildcard(_) | SelectItem::QualifiedWildcard(_, _) => span_end(item.span()),
220    }
221}
222
223fn select_clause_boundary(select: &Select) -> Option<(u64, u64)> {
224    let mut candidates = Vec::new();
225
226    if let Some(first_from) = select.from.first() {
227        if let Some(start) = span_start(first_from.relation.span()) {
228            candidates.push(start);
229        }
230    }
231
232    if let Some(into) = &select.into {
233        if let Some(start) = span_start(into.span()) {
234            candidates.push(start);
235        }
236    }
237
238    if let Some(expr) = &select.prewhere {
239        if let Some(start) = span_start(expr.span()) {
240            candidates.push(start);
241        }
242    }
243    if let Some(expr) = &select.selection {
244        if let Some(start) = span_start(expr.span()) {
245            candidates.push(start);
246        }
247    }
248    if let Some(expr) = &select.having {
249        if let Some(start) = span_start(expr.span()) {
250            candidates.push(start);
251        }
252    }
253    if let Some(expr) = &select.qualify {
254        if let Some(start) = span_start(expr.span()) {
255            candidates.push(start);
256        }
257    }
258    if let Some(connect_by) = &select.connect_by {
259        if let Some(start) = span_start(connect_by.span()) {
260            candidates.push(start);
261        }
262    }
263
264    if let GroupByExpr::Expressions(exprs, _) = &select.group_by {
265        if let Some(start) = exprs.first().and_then(|expr| span_start(expr.span())) {
266            candidates.push(start);
267        }
268    }
269    if let Some(start) = select
270        .cluster_by
271        .first()
272        .and_then(|expr| span_start(expr.span()))
273    {
274        candidates.push(start);
275    }
276    if let Some(start) = select
277        .distribute_by
278        .first()
279        .and_then(|expr| span_start(expr.span()))
280    {
281        candidates.push(start);
282    }
283    if let Some(start) = select
284        .sort_by
285        .first()
286        .and_then(|expr| span_start(expr.expr.span()))
287    {
288        candidates.push(start);
289    }
290    if let Some(start) = select
291        .named_window
292        .first()
293        .and_then(|window| span_start(window.span()))
294    {
295        candidates.push(start);
296    }
297
298    candidates.into_iter().min()
299}
300
301fn span_start(span: sqlparser::tokenizer::Span) -> Option<(u64, u64)> {
302    if span.start.line == 0 || span.start.column == 0 {
303        None
304    } else {
305        Some((span.start.line, span.start.column))
306    }
307}
308
309fn span_end(span: sqlparser::tokenizer::Span) -> Option<(u64, u64)> {
310    if span.end.line == 0 || span.end.column == 0 {
311        None
312    } else {
313        Some((span.end.line, span.end.column))
314    }
315}
316
317fn first_significant_token_between(
318    tokens: Option<&[LocatedToken]>,
319    start: usize,
320    end: usize,
321) -> Option<&LocatedToken> {
322    let tokens = tokens?;
323
324    for token in tokens {
325        if token.end <= start || token.start >= end {
326            continue;
327        }
328        if is_trivia_token(&token.token) {
329            continue;
330        }
331        return Some(token);
332    }
333    None
334}
335
336#[derive(Clone)]
337struct LocatedToken {
338    token: Token,
339    start: usize,
340    end: usize,
341}
342
343fn tokenized(sql: &str, dialect: Dialect) -> Option<Vec<LocatedToken>> {
344    let dialect = dialect.to_sqlparser_dialect();
345    let mut tokenizer = Tokenizer::new(dialect.as_ref(), sql);
346    let tokens = tokenizer.tokenize_with_location().ok()?;
347
348    let mut out = Vec::with_capacity(tokens.len());
349    for token in tokens {
350        let Some((start, end)) = token_with_span_offsets(sql, &token) else {
351            continue;
352        };
353        out.push(LocatedToken {
354            token: token.token,
355            start,
356            end,
357        });
358    }
359    Some(out)
360}
361
362fn tokenized_for_context(ctx: &LintContext) -> Option<Vec<LocatedToken>> {
363    let statement_start = ctx.statement_range.start;
364    let from_document = ctx.with_document_tokens(|tokens| {
365        if tokens.is_empty() {
366            return None;
367        }
368
369        Some(
370            tokens
371                .iter()
372                .filter_map(|token| {
373                    let (start, end) = token_with_span_offsets(ctx.sql, token)?;
374                    if start < ctx.statement_range.start || end > ctx.statement_range.end {
375                        return None;
376                    }
377
378                    Some(LocatedToken {
379                        token: token.token.clone(),
380                        start: start - statement_start,
381                        end: end - statement_start,
382                    })
383                })
384                .collect::<Vec<_>>(),
385        )
386    });
387
388    if let Some(tokens) = from_document {
389        return Some(tokens);
390    }
391
392    tokenized(ctx.statement_sql(), ctx.dialect())
393}
394
395fn token_with_span_offsets(sql: &str, token: &TokenWithSpan) -> Option<(usize, usize)> {
396    let start = line_col_to_offset(
397        sql,
398        token.span.start.line as usize,
399        token.span.start.column as usize,
400    )?;
401    let end = line_col_to_offset(
402        sql,
403        token.span.end.line as usize,
404        token.span.end.column as usize,
405    )?;
406    Some((start, end))
407}
408
409fn is_trivia_token(token: &Token) -> bool {
410    matches!(
411        token,
412        Token::Whitespace(Whitespace::Space | Whitespace::Tab | Whitespace::Newline)
413            | Token::Whitespace(Whitespace::SingleLineComment { .. })
414            | Token::Whitespace(Whitespace::MultiLineComment(_))
415    )
416}
417
418fn line_col_to_offset(sql: &str, line: usize, column: usize) -> Option<usize> {
419    if line == 0 || column == 0 {
420        return None;
421    }
422    let mut current_line = 1usize;
423    let mut current_col = 1usize;
424    for (idx, ch) in sql.char_indices() {
425        if current_line == line && current_col == column {
426            return Some(idx);
427        }
428        if ch == '\n' {
429            current_line += 1;
430            current_col = 1;
431        } else {
432            current_col += 1;
433        }
434    }
435    if current_line == line && current_col == column {
436        return Some(sql.len());
437    }
438    None
439}
440
441#[cfg(test)]
442mod tests {
443    use super::*;
444    use crate::linter::config::LintConfig;
445    use crate::parser::parse_sql;
446    use crate::types::IssueAutofixApplicability;
447
448    fn run(sql: &str) -> Vec<Issue> {
449        run_with_rule(sql, ConventionSelectTrailingComma::default())
450    }
451
452    fn run_with_rule(sql: &str, rule: ConventionSelectTrailingComma) -> Vec<Issue> {
453        let stmts = parse_sql(sql).expect("parse");
454        stmts
455            .iter()
456            .enumerate()
457            .flat_map(|(index, stmt)| {
458                rule.check(
459                    stmt,
460                    &LintContext {
461                        sql,
462                        statement_range: 0..sql.len(),
463                        statement_index: index,
464                    },
465                )
466            })
467            .collect()
468    }
469
470    fn apply_issue_autofix(sql: &str, issue: &Issue) -> Option<String> {
471        let autofix = issue.autofix.as_ref()?;
472        let mut out = sql.to_string();
473        let mut edits = autofix.edits.clone();
474        edits.sort_by_key(|edit| (edit.span.start, edit.span.end));
475        for edit in edits.iter().rev() {
476            out.replace_range(edit.span.start..edit.span.end, &edit.replacement);
477        }
478        Some(out)
479    }
480
481    #[test]
482    fn flags_trailing_comma_before_from() {
483        let issues = run("select a, from t");
484        assert_eq!(issues.len(), 1);
485        assert_eq!(issues[0].code, issue_codes::LINT_CV_003);
486        let autofix = issues[0].autofix.as_ref().expect("autofix metadata");
487        assert_eq!(autofix.applicability, IssueAutofixApplicability::Safe);
488        assert_eq!(autofix.edits.len(), 1);
489        assert_eq!(autofix.edits[0].replacement, "");
490        let fixed = apply_issue_autofix("select a, from t", &issues[0]).expect("apply autofix");
491        assert_eq!(fixed, "select a from t");
492    }
493
494    #[test]
495    fn wildcard_select_without_trailing_comma_is_allowed() {
496        let issues = run("SELECT * FROM t");
497        assert!(issues.is_empty());
498    }
499
500    #[test]
501    fn wildcard_select_with_trailing_comma_is_flagged() {
502        let issues = run("SELECT *, FROM t");
503        assert_eq!(issues.len(), 1);
504        assert_eq!(issues[0].code, issue_codes::LINT_CV_003);
505    }
506
507    #[test]
508    fn allows_no_trailing_comma() {
509        let issues = run("select a from t");
510        assert!(issues.is_empty());
511    }
512
513    #[test]
514    fn flags_nested_select_trailing_comma() {
515        let issues = run("SELECT (SELECT a, FROM t) AS x FROM outer_t");
516        assert_eq!(issues.len(), 1);
517        assert_eq!(issues[0].code, issue_codes::LINT_CV_003);
518    }
519
520    #[test]
521    fn does_not_flag_comma_in_string_or_comment() {
522        let issues = run("SELECT 'a, from t' AS txt -- select a, from t\nFROM t");
523        assert!(issues.is_empty());
524    }
525
526    #[test]
527    fn require_policy_flags_missing_trailing_comma() {
528        let config = LintConfig {
529            enabled: true,
530            disabled_rules: vec![],
531            rule_configs: std::collections::BTreeMap::from([(
532                "convention.select_trailing_comma".to_string(),
533                serde_json::json!({"select_clause_trailing_comma": "require"}),
534            )]),
535        };
536        let rule = ConventionSelectTrailingComma::from_config(&config);
537        let issues = run_with_rule("SELECT a FROM t", rule);
538        assert_eq!(issues.len(), 1);
539        let autofix = issues[0].autofix.as_ref().expect("autofix metadata");
540        assert_eq!(autofix.applicability, IssueAutofixApplicability::Safe);
541        assert_eq!(autofix.edits.len(), 1);
542        assert_eq!(autofix.edits[0].replacement, ",");
543        let fixed = apply_issue_autofix("SELECT a FROM t", &issues[0]).expect("apply autofix");
544        assert_eq!(fixed, "SELECT a, FROM t");
545    }
546
547    #[test]
548    fn require_policy_allows_trailing_comma() {
549        let config = LintConfig {
550            enabled: true,
551            disabled_rules: vec![],
552            rule_configs: std::collections::BTreeMap::from([(
553                "LINT_CV_003".to_string(),
554                serde_json::json!({"select_clause_trailing_comma": "require"}),
555            )]),
556        };
557        let rule = ConventionSelectTrailingComma::from_config(&config);
558        let issues = run_with_rule("SELECT a, FROM t", rule);
559        assert!(issues.is_empty());
560    }
561
562    #[test]
563    fn require_policy_flags_select_without_from() {
564        let config = LintConfig {
565            enabled: true,
566            disabled_rules: vec![],
567            rule_configs: std::collections::BTreeMap::from([(
568                "convention.select_trailing_comma".to_string(),
569                serde_json::json!({"select_clause_trailing_comma": "require"}),
570            )]),
571        };
572        let rule = ConventionSelectTrailingComma::from_config(&config);
573        let issues = run_with_rule("SELECT 1", rule);
574        assert_eq!(issues.len(), 1);
575        assert!(
576            issues[0].autofix.is_none(),
577            "require-mode SELECT without clause boundary should remain report-only"
578        );
579    }
580}