use crate::linter::rule::{LintContext, LintRule};
use crate::types::{issue_codes, Dialect, Issue, IssueAutofixApplicability, IssuePatchEdit};
use sqlparser::ast::Statement;
use sqlparser::keywords::Keyword;
use sqlparser::tokenizer::{
Location, Span as TokenSpan, Token, TokenWithSpan, Tokenizer, Whitespace,
};
pub struct LayoutSelectModifiers;
type SimpleCollapseSpans = Vec<(usize, usize)>;
type CommentAwareEdits = Vec<(usize, usize, String)>;
type Lt010ViolationResult = (bool, SimpleCollapseSpans, CommentAwareEdits);
impl LintRule for LayoutSelectModifiers {
fn code(&self) -> &'static str {
issue_codes::LINT_LT_010
}
fn name(&self) -> &'static str {
"Layout select modifiers"
}
fn description(&self) -> &'static str {
"'SELECT' modifiers (e.g. 'DISTINCT') must be on the same line as 'SELECT'."
}
fn check(&self, _statement: &Statement, ctx: &LintContext) -> Vec<Issue> {
let (has_violation, fixable_spans, comment_aware_edits) =
select_modifier_violations_and_fixable_spans(ctx);
if has_violation {
let mut issue = Issue::info(
issue_codes::LINT_LT_010,
"SELECT modifiers (DISTINCT/ALL) should be consistently formatted.",
)
.with_statement(ctx.statement_index);
if !comment_aware_edits.is_empty() {
let (start, end, _) = &comment_aware_edits[0];
issue = issue.with_span(ctx.span_from_statement_offset(*start, *end));
let edits = comment_aware_edits
.into_iter()
.map(|(edit_start, edit_end, replacement)| {
IssuePatchEdit::new(
ctx.span_from_statement_offset(edit_start, edit_end),
replacement,
)
})
.collect();
issue = issue.with_autofix_edits(IssueAutofixApplicability::Safe, edits);
} else if let Some((start, end)) = fixable_spans.first().copied() {
issue = issue.with_span(ctx.span_from_statement_offset(start, end));
let edits = fixable_spans
.into_iter()
.map(|(edit_start, edit_end)| {
IssuePatchEdit::new(
ctx.span_from_statement_offset(edit_start, edit_end),
" ",
)
})
.collect();
issue = issue.with_autofix_edits(IssueAutofixApplicability::Safe, edits);
}
vec![issue]
} else {
Vec::new()
}
}
}
fn select_modifier_violations_and_fixable_spans(ctx: &LintContext) -> Lt010ViolationResult {
let tokens =
tokenized_for_context(ctx).or_else(|| tokenized(ctx.statement_sql(), ctx.dialect()));
let Some(tokens) = tokens else {
return (false, Vec::new(), Vec::new());
};
let mut has_violation = false;
let mut fixable_spans = Vec::new();
let mut comment_aware_edits = Vec::new();
let sql = ctx.statement_sql();
for (index, token) in tokens.iter().enumerate() {
let Token::Word(word) = &token.token else {
continue;
};
if word.keyword != Keyword::SELECT {
continue;
}
let Some(next_index) = next_non_trivia_index(&tokens, index + 1) else {
continue;
};
let Token::Word(next_word) = &tokens[next_index].token else {
continue;
};
if !matches!(next_word.keyword, Keyword::DISTINCT | Keyword::ALL) {
continue;
}
if tokens[next_index].span.start.line > token.span.end.line {
has_violation = true;
let Some(select_end) = line_col_to_offset(
sql,
token.span.end.line as usize,
token.span.end.column as usize,
) else {
continue;
};
let Some(modifier_start) = line_col_to_offset(
sql,
tokens[next_index].span.start.line as usize,
tokens[next_index].span.start.column as usize,
) else {
continue;
};
let Some(modifier_end) = line_col_to_offset(
sql,
tokens[next_index].span.end.line as usize,
tokens[next_index].span.end.column as usize,
) else {
continue;
};
if trivia_between_is_whitespace_only(&tokens, index, next_index) {
if select_end < modifier_start {
fixable_spans.push((select_end, modifier_start));
}
} else {
let modifier_text = &sql[modifier_start..modifier_end];
let first_comment_start = (index + 1..next_index)
.filter(|&i| is_comment_token(&tokens[i].token))
.find_map(|i| {
line_col_to_offset(
sql,
tokens[i].span.start.line as usize,
tokens[i].span.start.column as usize,
)
});
if let Some(comment_start) = first_comment_start {
let indent = detect_indent(sql, modifier_start);
comment_aware_edits.push((
select_end,
comment_start,
format!(" {modifier_text}\n{indent}"),
));
let remove_end = skip_trailing_space(sql, modifier_end);
comment_aware_edits.push((modifier_start, remove_end, String::new()));
}
}
}
}
fixable_spans.sort_unstable();
fixable_spans.dedup();
(has_violation, fixable_spans, comment_aware_edits)
}
fn tokenized(sql: &str, dialect: Dialect) -> Option<Vec<TokenWithSpan>> {
let dialect = dialect.to_sqlparser_dialect();
let mut tokenizer = Tokenizer::new(dialect.as_ref(), sql);
tokenizer.tokenize_with_location().ok()
}
fn tokenized_for_context(ctx: &LintContext) -> Option<Vec<TokenWithSpan>> {
let (statement_start_line, statement_start_column) =
offset_to_line_col(ctx.sql, ctx.statement_range.start)?;
ctx.with_document_tokens(|tokens| {
if tokens.is_empty() {
return None;
}
let mut out = Vec::new();
for token in tokens {
let Some((start, end)) = token_with_span_offsets(ctx.sql, token) else {
continue;
};
if start < ctx.statement_range.start || end > ctx.statement_range.end {
continue;
}
let Some(start_loc) = relative_location(
token.span.start,
statement_start_line,
statement_start_column,
) else {
continue;
};
let Some(end_loc) =
relative_location(token.span.end, statement_start_line, statement_start_column)
else {
continue;
};
out.push(TokenWithSpan::new(
token.token.clone(),
TokenSpan::new(start_loc, end_loc),
));
}
if out.is_empty() {
None
} else {
Some(out)
}
})
}
fn next_non_trivia_index(
tokens: &[sqlparser::tokenizer::TokenWithSpan],
mut index: usize,
) -> Option<usize> {
while index < tokens.len() {
if !is_trivia_token(&tokens[index].token) {
return Some(index);
}
index += 1;
}
None
}
fn is_trivia_token(token: &Token) -> bool {
matches!(
token,
Token::Whitespace(Whitespace::Space | Whitespace::Newline | Whitespace::Tab)
| Token::Whitespace(Whitespace::SingleLineComment { .. })
| Token::Whitespace(Whitespace::MultiLineComment(_))
)
}
fn is_comment_token(token: &Token) -> bool {
matches!(
token,
Token::Whitespace(Whitespace::SingleLineComment { .. })
| Token::Whitespace(Whitespace::MultiLineComment(_))
)
}
fn detect_indent(sql: &str, offset: usize) -> String {
let line_start = sql[..offset].rfind('\n').map(|pos| pos + 1).unwrap_or(0);
sql[line_start..]
.chars()
.take_while(|ch| ch.is_whitespace() && *ch != '\n')
.collect()
}
fn skip_trailing_space(sql: &str, offset: usize) -> usize {
let mut pos = offset;
for ch in sql[offset..].chars() {
if ch == ' ' {
pos += 1;
} else {
break;
}
}
pos
}
fn trivia_between_is_whitespace_only(tokens: &[TokenWithSpan], left: usize, right: usize) -> bool {
if right <= left + 1 {
return true;
}
tokens[left + 1..right].iter().all(|token| {
matches!(
token.token,
Token::Whitespace(Whitespace::Space | Whitespace::Newline | Whitespace::Tab)
)
})
}
fn line_col_to_offset(sql: &str, line: usize, column: usize) -> Option<usize> {
if line == 0 || column == 0 {
return None;
}
let mut current_line = 1usize;
let mut current_col = 1usize;
for (offset, ch) in sql.char_indices() {
if current_line == line && current_col == column {
return Some(offset);
}
if ch == '\n' {
current_line += 1;
current_col = 1;
} else {
current_col += 1;
}
}
if current_line == line && current_col == column {
return Some(sql.len());
}
None
}
fn token_with_span_offsets(sql: &str, token: &TokenWithSpan) -> Option<(usize, usize)> {
let start = line_col_to_offset(
sql,
token.span.start.line as usize,
token.span.start.column as usize,
)?;
let end = line_col_to_offset(
sql,
token.span.end.line as usize,
token.span.end.column as usize,
)?;
Some((start, end))
}
fn offset_to_line_col(sql: &str, offset: usize) -> Option<(usize, usize)> {
if offset > sql.len() {
return None;
}
if offset == sql.len() {
let mut line = 1usize;
let mut column = 1usize;
for ch in sql.chars() {
if ch == '\n' {
line += 1;
column = 1;
} else {
column += 1;
}
}
return Some((line, column));
}
let mut line = 1usize;
let mut column = 1usize;
for (index, ch) in sql.char_indices() {
if index == offset {
return Some((line, column));
}
if ch == '\n' {
line += 1;
column = 1;
} else {
column += 1;
}
}
None
}
fn relative_location(
location: Location,
statement_start_line: usize,
statement_start_column: usize,
) -> Option<Location> {
let line = location.line as usize;
let column = location.column as usize;
if line < statement_start_line {
return None;
}
if line == statement_start_line {
if column < statement_start_column {
return None;
}
return Some(Location::new(
1,
(column - statement_start_column + 1) as u64,
));
}
Some(Location::new(
(line - statement_start_line + 1) as u64,
column as u64,
))
}
#[cfg(test)]
mod tests {
use super::*;
use crate::parser::parse_sql;
use crate::types::IssueAutofixApplicability;
fn run(sql: &str) -> Vec<Issue> {
let statements = parse_sql(sql).expect("parse");
let rule = LayoutSelectModifiers;
statements
.iter()
.enumerate()
.flat_map(|(index, statement)| {
rule.check(
statement,
&LintContext {
sql,
statement_range: 0..sql.len(),
statement_index: index,
},
)
})
.collect()
}
fn apply_issue_autofix(sql: &str, issue: &Issue) -> Option<String> {
let autofix = issue.autofix.as_ref()?;
let mut out = sql.to_string();
let mut edits = autofix.edits.clone();
edits.sort_by_key(|edit| (edit.span.start, edit.span.end));
for edit in edits.into_iter().rev() {
out.replace_range(edit.span.start..edit.span.end, &edit.replacement);
}
Some(out)
}
#[test]
fn flags_distinct_on_next_line() {
let sql = "SELECT\nDISTINCT a\nFROM t";
let issues = run(sql);
assert_eq!(issues.len(), 1);
assert_eq!(issues[0].code, issue_codes::LINT_LT_010);
let autofix = issues[0].autofix.as_ref().expect("autofix metadata");
assert_eq!(autofix.applicability, IssueAutofixApplicability::Safe);
let fixed = apply_issue_autofix(sql, &issues[0]).expect("apply autofix");
assert_eq!(fixed, "SELECT DISTINCT a\nFROM t");
}
#[test]
fn does_not_flag_single_line_modifier() {
assert!(run("SELECT DISTINCT a FROM t").is_empty());
}
#[test]
fn does_not_flag_modifier_text_in_string() {
assert!(run("SELECT 'SELECT\nDISTINCT a' AS txt").is_empty());
}
#[test]
fn comment_between_select_and_modifier_has_autofix() {
let sql = "SELECT\n-- keep\nDISTINCT a\nFROM t";
let issues = run(sql);
assert_eq!(issues.len(), 1);
assert_eq!(issues[0].code, issue_codes::LINT_LT_010);
let fixed = apply_issue_autofix(sql, &issues[0]).expect("apply autofix");
assert_eq!(fixed, "SELECT DISTINCT\n-- keep\na\nFROM t");
}
#[test]
fn comment_between_select_and_distinct_with_indent() {
let sql = "SELECT\n -- The table contains duplicates, so we use DISTINCT.\n DISTINCT user_id\nFROM\n safe_user";
let issues = run(sql);
assert_eq!(issues.len(), 1);
let fixed = apply_issue_autofix(sql, &issues[0]).expect("apply autofix");
assert_eq!(
fixed,
"SELECT DISTINCT\n -- The table contains duplicates, so we use DISTINCT.\n user_id\nFROM\n safe_user"
);
}
}