Skip to main content

flowscope_core/linter/rules/
lt_010.rs

1//! LINT_LT_010: Layout select modifiers.
2//!
3//! SQLFluff LT10 parity (current scope): detect multiline SELECT modifiers in
4//! inconsistent positions.
5
6use crate::linter::rule::{LintContext, LintRule};
7use crate::types::{issue_codes, Dialect, Issue, IssueAutofixApplicability, IssuePatchEdit};
8use sqlparser::ast::Statement;
9use sqlparser::keywords::Keyword;
10use sqlparser::tokenizer::{
11    Location, Span as TokenSpan, Token, TokenWithSpan, Tokenizer, Whitespace,
12};
13
14pub struct LayoutSelectModifiers;
15
16type SimpleCollapseSpans = Vec<(usize, usize)>;
17type CommentAwareEdits = Vec<(usize, usize, String)>;
18type Lt010ViolationResult = (bool, SimpleCollapseSpans, CommentAwareEdits);
19
20impl LintRule for LayoutSelectModifiers {
21    fn code(&self) -> &'static str {
22        issue_codes::LINT_LT_010
23    }
24
25    fn name(&self) -> &'static str {
26        "Layout select modifiers"
27    }
28
29    fn description(&self) -> &'static str {
30        "'SELECT' modifiers (e.g. 'DISTINCT') must be on the same line as 'SELECT'."
31    }
32
33    fn check(&self, _statement: &Statement, ctx: &LintContext) -> Vec<Issue> {
34        let (has_violation, fixable_spans, comment_aware_edits) =
35            select_modifier_violations_and_fixable_spans(ctx);
36        if has_violation {
37            let mut issue = Issue::info(
38                issue_codes::LINT_LT_010,
39                "SELECT modifiers (DISTINCT/ALL) should be consistently formatted.",
40            )
41            .with_statement(ctx.statement_index);
42
43            if !comment_aware_edits.is_empty() {
44                // Comment-aware edits: use the first edit's start for the span.
45                let (start, end, _) = &comment_aware_edits[0];
46                issue = issue.with_span(ctx.span_from_statement_offset(*start, *end));
47                let edits = comment_aware_edits
48                    .into_iter()
49                    .map(|(edit_start, edit_end, replacement)| {
50                        IssuePatchEdit::new(
51                            ctx.span_from_statement_offset(edit_start, edit_end),
52                            replacement,
53                        )
54                    })
55                    .collect();
56                issue = issue.with_autofix_edits(IssueAutofixApplicability::Safe, edits);
57            } else if let Some((start, end)) = fixable_spans.first().copied() {
58                issue = issue.with_span(ctx.span_from_statement_offset(start, end));
59                let edits = fixable_spans
60                    .into_iter()
61                    .map(|(edit_start, edit_end)| {
62                        IssuePatchEdit::new(
63                            ctx.span_from_statement_offset(edit_start, edit_end),
64                            " ",
65                        )
66                    })
67                    .collect();
68                issue = issue.with_autofix_edits(IssueAutofixApplicability::Safe, edits);
69            }
70
71            vec![issue]
72        } else {
73            Vec::new()
74        }
75    }
76}
77
78/// Returns (has_violation, simple_collapse_spans, comment_aware_edits).
79/// `simple_collapse_spans` are (start, end) ranges to replace with " ".
80/// `comment_aware_edits` are (start, end, replacement) triples for surgical edits.
81fn select_modifier_violations_and_fixable_spans(ctx: &LintContext) -> Lt010ViolationResult {
82    let tokens =
83        tokenized_for_context(ctx).or_else(|| tokenized(ctx.statement_sql(), ctx.dialect()));
84    let Some(tokens) = tokens else {
85        return (false, Vec::new(), Vec::new());
86    };
87
88    let mut has_violation = false;
89    let mut fixable_spans = Vec::new();
90    let mut comment_aware_edits = Vec::new();
91    let sql = ctx.statement_sql();
92
93    for (index, token) in tokens.iter().enumerate() {
94        let Token::Word(word) = &token.token else {
95            continue;
96        };
97
98        if word.keyword != Keyword::SELECT {
99            continue;
100        }
101
102        let Some(next_index) = next_non_trivia_index(&tokens, index + 1) else {
103            continue;
104        };
105        let Token::Word(next_word) = &tokens[next_index].token else {
106            continue;
107        };
108
109        if !matches!(next_word.keyword, Keyword::DISTINCT | Keyword::ALL) {
110            continue;
111        }
112
113        if tokens[next_index].span.start.line > token.span.end.line {
114            has_violation = true;
115
116            let Some(select_end) = line_col_to_offset(
117                sql,
118                token.span.end.line as usize,
119                token.span.end.column as usize,
120            ) else {
121                continue;
122            };
123            let Some(modifier_start) = line_col_to_offset(
124                sql,
125                tokens[next_index].span.start.line as usize,
126                tokens[next_index].span.start.column as usize,
127            ) else {
128                continue;
129            };
130            let Some(modifier_end) = line_col_to_offset(
131                sql,
132                tokens[next_index].span.end.line as usize,
133                tokens[next_index].span.end.column as usize,
134            ) else {
135                continue;
136            };
137
138            if trivia_between_is_whitespace_only(&tokens, index, next_index) {
139                // Simple case: no comments — collapse whitespace between SELECT
140                // and modifier to a single space.
141                if select_end < modifier_start {
142                    fixable_spans.push((select_end, modifier_start));
143                }
144            } else {
145                // Comment-aware case: place modifier after SELECT and remove
146                // it from its original position. Uses surgical edits around
147                // the comment's protected range.
148                let modifier_text = &sql[modifier_start..modifier_end];
149
150                // Find first comment token to determine where the gap before
151                // it ends.
152                let first_comment_start = (index + 1..next_index)
153                    .filter(|&i| is_comment_token(&tokens[i].token))
154                    .find_map(|i| {
155                        line_col_to_offset(
156                            sql,
157                            tokens[i].span.start.line as usize,
158                            tokens[i].span.start.column as usize,
159                        )
160                    });
161
162                if let Some(comment_start) = first_comment_start {
163                    // Determine the indent of the modifier's line for
164                    // preserving alignment.
165                    let indent = detect_indent(sql, modifier_start);
166                    // Edit 1: Replace gap between SELECT and first comment
167                    // with " MODIFIER\n indent".
168                    comment_aware_edits.push((
169                        select_end,
170                        comment_start,
171                        format!(" {modifier_text}\n{indent}"),
172                    ));
173                    // Edit 2: Remove the modifier + trailing space from its
174                    // original line.
175                    let remove_end = skip_trailing_space(sql, modifier_end);
176                    comment_aware_edits.push((modifier_start, remove_end, String::new()));
177                }
178            }
179        }
180    }
181
182    fixable_spans.sort_unstable();
183    fixable_spans.dedup();
184    (has_violation, fixable_spans, comment_aware_edits)
185}
186
187fn tokenized(sql: &str, dialect: Dialect) -> Option<Vec<TokenWithSpan>> {
188    let dialect = dialect.to_sqlparser_dialect();
189    let mut tokenizer = Tokenizer::new(dialect.as_ref(), sql);
190    tokenizer.tokenize_with_location().ok()
191}
192
193fn tokenized_for_context(ctx: &LintContext) -> Option<Vec<TokenWithSpan>> {
194    let (statement_start_line, statement_start_column) =
195        offset_to_line_col(ctx.sql, ctx.statement_range.start)?;
196
197    ctx.with_document_tokens(|tokens| {
198        if tokens.is_empty() {
199            return None;
200        }
201
202        let mut out = Vec::new();
203        for token in tokens {
204            let Some((start, end)) = token_with_span_offsets(ctx.sql, token) else {
205                continue;
206            };
207            if start < ctx.statement_range.start || end > ctx.statement_range.end {
208                continue;
209            }
210
211            let Some(start_loc) = relative_location(
212                token.span.start,
213                statement_start_line,
214                statement_start_column,
215            ) else {
216                continue;
217            };
218            let Some(end_loc) =
219                relative_location(token.span.end, statement_start_line, statement_start_column)
220            else {
221                continue;
222            };
223
224            out.push(TokenWithSpan::new(
225                token.token.clone(),
226                TokenSpan::new(start_loc, end_loc),
227            ));
228        }
229
230        if out.is_empty() {
231            None
232        } else {
233            Some(out)
234        }
235    })
236}
237
238fn next_non_trivia_index(
239    tokens: &[sqlparser::tokenizer::TokenWithSpan],
240    mut index: usize,
241) -> Option<usize> {
242    while index < tokens.len() {
243        if !is_trivia_token(&tokens[index].token) {
244            return Some(index);
245        }
246        index += 1;
247    }
248    None
249}
250
251fn is_trivia_token(token: &Token) -> bool {
252    matches!(
253        token,
254        Token::Whitespace(Whitespace::Space | Whitespace::Newline | Whitespace::Tab)
255            | Token::Whitespace(Whitespace::SingleLineComment { .. })
256            | Token::Whitespace(Whitespace::MultiLineComment(_))
257    )
258}
259
260fn is_comment_token(token: &Token) -> bool {
261    matches!(
262        token,
263        Token::Whitespace(Whitespace::SingleLineComment { .. })
264            | Token::Whitespace(Whitespace::MultiLineComment(_))
265    )
266}
267
268/// Detect the indentation prefix on the line where `offset` points.
269fn detect_indent(sql: &str, offset: usize) -> String {
270    let line_start = sql[..offset].rfind('\n').map(|pos| pos + 1).unwrap_or(0);
271    sql[line_start..]
272        .chars()
273        .take_while(|ch| ch.is_whitespace() && *ch != '\n')
274        .collect()
275}
276
277/// Skip trailing spaces after `offset`, stopping at newline or non-space.
278fn skip_trailing_space(sql: &str, offset: usize) -> usize {
279    let mut pos = offset;
280    for ch in sql[offset..].chars() {
281        if ch == ' ' {
282            pos += 1;
283        } else {
284            break;
285        }
286    }
287    pos
288}
289
290fn trivia_between_is_whitespace_only(tokens: &[TokenWithSpan], left: usize, right: usize) -> bool {
291    if right <= left + 1 {
292        return true;
293    }
294
295    tokens[left + 1..right].iter().all(|token| {
296        matches!(
297            token.token,
298            Token::Whitespace(Whitespace::Space | Whitespace::Newline | Whitespace::Tab)
299        )
300    })
301}
302
303fn line_col_to_offset(sql: &str, line: usize, column: usize) -> Option<usize> {
304    if line == 0 || column == 0 {
305        return None;
306    }
307
308    let mut current_line = 1usize;
309    let mut current_col = 1usize;
310
311    for (offset, ch) in sql.char_indices() {
312        if current_line == line && current_col == column {
313            return Some(offset);
314        }
315
316        if ch == '\n' {
317            current_line += 1;
318            current_col = 1;
319        } else {
320            current_col += 1;
321        }
322    }
323
324    if current_line == line && current_col == column {
325        return Some(sql.len());
326    }
327
328    None
329}
330
331fn token_with_span_offsets(sql: &str, token: &TokenWithSpan) -> Option<(usize, usize)> {
332    let start = line_col_to_offset(
333        sql,
334        token.span.start.line as usize,
335        token.span.start.column as usize,
336    )?;
337    let end = line_col_to_offset(
338        sql,
339        token.span.end.line as usize,
340        token.span.end.column as usize,
341    )?;
342    Some((start, end))
343}
344
345fn offset_to_line_col(sql: &str, offset: usize) -> Option<(usize, usize)> {
346    if offset > sql.len() {
347        return None;
348    }
349    if offset == sql.len() {
350        let mut line = 1usize;
351        let mut column = 1usize;
352        for ch in sql.chars() {
353            if ch == '\n' {
354                line += 1;
355                column = 1;
356            } else {
357                column += 1;
358            }
359        }
360        return Some((line, column));
361    }
362
363    let mut line = 1usize;
364    let mut column = 1usize;
365    for (index, ch) in sql.char_indices() {
366        if index == offset {
367            return Some((line, column));
368        }
369        if ch == '\n' {
370            line += 1;
371            column = 1;
372        } else {
373            column += 1;
374        }
375    }
376
377    None
378}
379
380fn relative_location(
381    location: Location,
382    statement_start_line: usize,
383    statement_start_column: usize,
384) -> Option<Location> {
385    let line = location.line as usize;
386    let column = location.column as usize;
387    if line < statement_start_line {
388        return None;
389    }
390
391    if line == statement_start_line {
392        if column < statement_start_column {
393            return None;
394        }
395        return Some(Location::new(
396            1,
397            (column - statement_start_column + 1) as u64,
398        ));
399    }
400
401    Some(Location::new(
402        (line - statement_start_line + 1) as u64,
403        column as u64,
404    ))
405}
406
407#[cfg(test)]
408mod tests {
409    use super::*;
410    use crate::parser::parse_sql;
411    use crate::types::IssueAutofixApplicability;
412
413    fn run(sql: &str) -> Vec<Issue> {
414        let statements = parse_sql(sql).expect("parse");
415        let rule = LayoutSelectModifiers;
416        statements
417            .iter()
418            .enumerate()
419            .flat_map(|(index, statement)| {
420                rule.check(
421                    statement,
422                    &LintContext {
423                        sql,
424                        statement_range: 0..sql.len(),
425                        statement_index: index,
426                    },
427                )
428            })
429            .collect()
430    }
431
432    fn apply_issue_autofix(sql: &str, issue: &Issue) -> Option<String> {
433        let autofix = issue.autofix.as_ref()?;
434        let mut out = sql.to_string();
435        let mut edits = autofix.edits.clone();
436        edits.sort_by_key(|edit| (edit.span.start, edit.span.end));
437        for edit in edits.into_iter().rev() {
438            out.replace_range(edit.span.start..edit.span.end, &edit.replacement);
439        }
440        Some(out)
441    }
442
443    #[test]
444    fn flags_distinct_on_next_line() {
445        let sql = "SELECT\nDISTINCT a\nFROM t";
446        let issues = run(sql);
447        assert_eq!(issues.len(), 1);
448        assert_eq!(issues[0].code, issue_codes::LINT_LT_010);
449        let autofix = issues[0].autofix.as_ref().expect("autofix metadata");
450        assert_eq!(autofix.applicability, IssueAutofixApplicability::Safe);
451        let fixed = apply_issue_autofix(sql, &issues[0]).expect("apply autofix");
452        assert_eq!(fixed, "SELECT DISTINCT a\nFROM t");
453    }
454
455    #[test]
456    fn does_not_flag_single_line_modifier() {
457        assert!(run("SELECT DISTINCT a FROM t").is_empty());
458    }
459
460    #[test]
461    fn does_not_flag_modifier_text_in_string() {
462        assert!(run("SELECT 'SELECT\nDISTINCT a' AS txt").is_empty());
463    }
464
465    #[test]
466    fn comment_between_select_and_modifier_has_autofix() {
467        let sql = "SELECT\n-- keep\nDISTINCT a\nFROM t";
468        let issues = run(sql);
469        assert_eq!(issues.len(), 1);
470        assert_eq!(issues[0].code, issue_codes::LINT_LT_010);
471        let fixed = apply_issue_autofix(sql, &issues[0]).expect("apply autofix");
472        assert_eq!(fixed, "SELECT DISTINCT\n-- keep\na\nFROM t");
473    }
474
475    #[test]
476    fn comment_between_select_and_distinct_with_indent() {
477        let sql = "SELECT\n    -- The table contains duplicates, so we use DISTINCT.\n    DISTINCT user_id\nFROM\n    safe_user";
478        let issues = run(sql);
479        assert_eq!(issues.len(), 1);
480        let fixed = apply_issue_autofix(sql, &issues[0]).expect("apply autofix");
481        assert_eq!(
482            fixed,
483            "SELECT DISTINCT\n    -- The table contains duplicates, so we use DISTINCT.\n    user_id\nFROM\n    safe_user"
484        );
485    }
486}