flowscope_core/linter/rules/
lt_010.rs1use crate::linter::rule::{LintContext, LintRule};
7use crate::types::{issue_codes, Dialect, Issue, IssueAutofixApplicability, IssuePatchEdit};
8use sqlparser::ast::Statement;
9use sqlparser::keywords::Keyword;
10use sqlparser::tokenizer::{
11 Location, Span as TokenSpan, Token, TokenWithSpan, Tokenizer, Whitespace,
12};
13
14pub struct LayoutSelectModifiers;
15
16type SimpleCollapseSpans = Vec<(usize, usize)>;
17type CommentAwareEdits = Vec<(usize, usize, String)>;
18type Lt010ViolationResult = (bool, SimpleCollapseSpans, CommentAwareEdits);
19
20impl LintRule for LayoutSelectModifiers {
21 fn code(&self) -> &'static str {
22 issue_codes::LINT_LT_010
23 }
24
25 fn name(&self) -> &'static str {
26 "Layout select modifiers"
27 }
28
29 fn description(&self) -> &'static str {
30 "'SELECT' modifiers (e.g. 'DISTINCT') must be on the same line as 'SELECT'."
31 }
32
33 fn check(&self, _statement: &Statement, ctx: &LintContext) -> Vec<Issue> {
34 let (has_violation, fixable_spans, comment_aware_edits) =
35 select_modifier_violations_and_fixable_spans(ctx);
36 if has_violation {
37 let mut issue = Issue::info(
38 issue_codes::LINT_LT_010,
39 "SELECT modifiers (DISTINCT/ALL) should be consistently formatted.",
40 )
41 .with_statement(ctx.statement_index);
42
43 if !comment_aware_edits.is_empty() {
44 let (start, end, _) = &comment_aware_edits[0];
46 issue = issue.with_span(ctx.span_from_statement_offset(*start, *end));
47 let edits = comment_aware_edits
48 .into_iter()
49 .map(|(edit_start, edit_end, replacement)| {
50 IssuePatchEdit::new(
51 ctx.span_from_statement_offset(edit_start, edit_end),
52 replacement,
53 )
54 })
55 .collect();
56 issue = issue.with_autofix_edits(IssueAutofixApplicability::Safe, edits);
57 } else if let Some((start, end)) = fixable_spans.first().copied() {
58 issue = issue.with_span(ctx.span_from_statement_offset(start, end));
59 let edits = fixable_spans
60 .into_iter()
61 .map(|(edit_start, edit_end)| {
62 IssuePatchEdit::new(
63 ctx.span_from_statement_offset(edit_start, edit_end),
64 " ",
65 )
66 })
67 .collect();
68 issue = issue.with_autofix_edits(IssueAutofixApplicability::Safe, edits);
69 }
70
71 vec![issue]
72 } else {
73 Vec::new()
74 }
75 }
76}
77
78fn select_modifier_violations_and_fixable_spans(ctx: &LintContext) -> Lt010ViolationResult {
82 let tokens =
83 tokenized_for_context(ctx).or_else(|| tokenized(ctx.statement_sql(), ctx.dialect()));
84 let Some(tokens) = tokens else {
85 return (false, Vec::new(), Vec::new());
86 };
87
88 let mut has_violation = false;
89 let mut fixable_spans = Vec::new();
90 let mut comment_aware_edits = Vec::new();
91 let sql = ctx.statement_sql();
92
93 for (index, token) in tokens.iter().enumerate() {
94 let Token::Word(word) = &token.token else {
95 continue;
96 };
97
98 if word.keyword != Keyword::SELECT {
99 continue;
100 }
101
102 let Some(next_index) = next_non_trivia_index(&tokens, index + 1) else {
103 continue;
104 };
105 let Token::Word(next_word) = &tokens[next_index].token else {
106 continue;
107 };
108
109 if !matches!(next_word.keyword, Keyword::DISTINCT | Keyword::ALL) {
110 continue;
111 }
112
113 if tokens[next_index].span.start.line > token.span.end.line {
114 has_violation = true;
115
116 let Some(select_end) = line_col_to_offset(
117 sql,
118 token.span.end.line as usize,
119 token.span.end.column as usize,
120 ) else {
121 continue;
122 };
123 let Some(modifier_start) = line_col_to_offset(
124 sql,
125 tokens[next_index].span.start.line as usize,
126 tokens[next_index].span.start.column as usize,
127 ) else {
128 continue;
129 };
130 let Some(modifier_end) = line_col_to_offset(
131 sql,
132 tokens[next_index].span.end.line as usize,
133 tokens[next_index].span.end.column as usize,
134 ) else {
135 continue;
136 };
137
138 if trivia_between_is_whitespace_only(&tokens, index, next_index) {
139 if select_end < modifier_start {
142 fixable_spans.push((select_end, modifier_start));
143 }
144 } else {
145 let modifier_text = &sql[modifier_start..modifier_end];
149
150 let first_comment_start = (index + 1..next_index)
153 .filter(|&i| is_comment_token(&tokens[i].token))
154 .find_map(|i| {
155 line_col_to_offset(
156 sql,
157 tokens[i].span.start.line as usize,
158 tokens[i].span.start.column as usize,
159 )
160 });
161
162 if let Some(comment_start) = first_comment_start {
163 let indent = detect_indent(sql, modifier_start);
166 comment_aware_edits.push((
169 select_end,
170 comment_start,
171 format!(" {modifier_text}\n{indent}"),
172 ));
173 let remove_end = skip_trailing_space(sql, modifier_end);
176 comment_aware_edits.push((modifier_start, remove_end, String::new()));
177 }
178 }
179 }
180 }
181
182 fixable_spans.sort_unstable();
183 fixable_spans.dedup();
184 (has_violation, fixable_spans, comment_aware_edits)
185}
186
187fn tokenized(sql: &str, dialect: Dialect) -> Option<Vec<TokenWithSpan>> {
188 let dialect = dialect.to_sqlparser_dialect();
189 let mut tokenizer = Tokenizer::new(dialect.as_ref(), sql);
190 tokenizer.tokenize_with_location().ok()
191}
192
193fn tokenized_for_context(ctx: &LintContext) -> Option<Vec<TokenWithSpan>> {
194 let (statement_start_line, statement_start_column) =
195 offset_to_line_col(ctx.sql, ctx.statement_range.start)?;
196
197 ctx.with_document_tokens(|tokens| {
198 if tokens.is_empty() {
199 return None;
200 }
201
202 let mut out = Vec::new();
203 for token in tokens {
204 let Some((start, end)) = token_with_span_offsets(ctx.sql, token) else {
205 continue;
206 };
207 if start < ctx.statement_range.start || end > ctx.statement_range.end {
208 continue;
209 }
210
211 let Some(start_loc) = relative_location(
212 token.span.start,
213 statement_start_line,
214 statement_start_column,
215 ) else {
216 continue;
217 };
218 let Some(end_loc) =
219 relative_location(token.span.end, statement_start_line, statement_start_column)
220 else {
221 continue;
222 };
223
224 out.push(TokenWithSpan::new(
225 token.token.clone(),
226 TokenSpan::new(start_loc, end_loc),
227 ));
228 }
229
230 if out.is_empty() {
231 None
232 } else {
233 Some(out)
234 }
235 })
236}
237
238fn next_non_trivia_index(
239 tokens: &[sqlparser::tokenizer::TokenWithSpan],
240 mut index: usize,
241) -> Option<usize> {
242 while index < tokens.len() {
243 if !is_trivia_token(&tokens[index].token) {
244 return Some(index);
245 }
246 index += 1;
247 }
248 None
249}
250
251fn is_trivia_token(token: &Token) -> bool {
252 matches!(
253 token,
254 Token::Whitespace(Whitespace::Space | Whitespace::Newline | Whitespace::Tab)
255 | Token::Whitespace(Whitespace::SingleLineComment { .. })
256 | Token::Whitespace(Whitespace::MultiLineComment(_))
257 )
258}
259
260fn is_comment_token(token: &Token) -> bool {
261 matches!(
262 token,
263 Token::Whitespace(Whitespace::SingleLineComment { .. })
264 | Token::Whitespace(Whitespace::MultiLineComment(_))
265 )
266}
267
268fn detect_indent(sql: &str, offset: usize) -> String {
270 let line_start = sql[..offset].rfind('\n').map(|pos| pos + 1).unwrap_or(0);
271 sql[line_start..]
272 .chars()
273 .take_while(|ch| ch.is_whitespace() && *ch != '\n')
274 .collect()
275}
276
277fn skip_trailing_space(sql: &str, offset: usize) -> usize {
279 let mut pos = offset;
280 for ch in sql[offset..].chars() {
281 if ch == ' ' {
282 pos += 1;
283 } else {
284 break;
285 }
286 }
287 pos
288}
289
290fn trivia_between_is_whitespace_only(tokens: &[TokenWithSpan], left: usize, right: usize) -> bool {
291 if right <= left + 1 {
292 return true;
293 }
294
295 tokens[left + 1..right].iter().all(|token| {
296 matches!(
297 token.token,
298 Token::Whitespace(Whitespace::Space | Whitespace::Newline | Whitespace::Tab)
299 )
300 })
301}
302
303fn line_col_to_offset(sql: &str, line: usize, column: usize) -> Option<usize> {
304 if line == 0 || column == 0 {
305 return None;
306 }
307
308 let mut current_line = 1usize;
309 let mut current_col = 1usize;
310
311 for (offset, ch) in sql.char_indices() {
312 if current_line == line && current_col == column {
313 return Some(offset);
314 }
315
316 if ch == '\n' {
317 current_line += 1;
318 current_col = 1;
319 } else {
320 current_col += 1;
321 }
322 }
323
324 if current_line == line && current_col == column {
325 return Some(sql.len());
326 }
327
328 None
329}
330
331fn token_with_span_offsets(sql: &str, token: &TokenWithSpan) -> Option<(usize, usize)> {
332 let start = line_col_to_offset(
333 sql,
334 token.span.start.line as usize,
335 token.span.start.column as usize,
336 )?;
337 let end = line_col_to_offset(
338 sql,
339 token.span.end.line as usize,
340 token.span.end.column as usize,
341 )?;
342 Some((start, end))
343}
344
345fn offset_to_line_col(sql: &str, offset: usize) -> Option<(usize, usize)> {
346 if offset > sql.len() {
347 return None;
348 }
349 if offset == sql.len() {
350 let mut line = 1usize;
351 let mut column = 1usize;
352 for ch in sql.chars() {
353 if ch == '\n' {
354 line += 1;
355 column = 1;
356 } else {
357 column += 1;
358 }
359 }
360 return Some((line, column));
361 }
362
363 let mut line = 1usize;
364 let mut column = 1usize;
365 for (index, ch) in sql.char_indices() {
366 if index == offset {
367 return Some((line, column));
368 }
369 if ch == '\n' {
370 line += 1;
371 column = 1;
372 } else {
373 column += 1;
374 }
375 }
376
377 None
378}
379
380fn relative_location(
381 location: Location,
382 statement_start_line: usize,
383 statement_start_column: usize,
384) -> Option<Location> {
385 let line = location.line as usize;
386 let column = location.column as usize;
387 if line < statement_start_line {
388 return None;
389 }
390
391 if line == statement_start_line {
392 if column < statement_start_column {
393 return None;
394 }
395 return Some(Location::new(
396 1,
397 (column - statement_start_column + 1) as u64,
398 ));
399 }
400
401 Some(Location::new(
402 (line - statement_start_line + 1) as u64,
403 column as u64,
404 ))
405}
406
407#[cfg(test)]
408mod tests {
409 use super::*;
410 use crate::parser::parse_sql;
411 use crate::types::IssueAutofixApplicability;
412
413 fn run(sql: &str) -> Vec<Issue> {
414 let statements = parse_sql(sql).expect("parse");
415 let rule = LayoutSelectModifiers;
416 statements
417 .iter()
418 .enumerate()
419 .flat_map(|(index, statement)| {
420 rule.check(
421 statement,
422 &LintContext {
423 sql,
424 statement_range: 0..sql.len(),
425 statement_index: index,
426 },
427 )
428 })
429 .collect()
430 }
431
432 fn apply_issue_autofix(sql: &str, issue: &Issue) -> Option<String> {
433 let autofix = issue.autofix.as_ref()?;
434 let mut out = sql.to_string();
435 let mut edits = autofix.edits.clone();
436 edits.sort_by_key(|edit| (edit.span.start, edit.span.end));
437 for edit in edits.into_iter().rev() {
438 out.replace_range(edit.span.start..edit.span.end, &edit.replacement);
439 }
440 Some(out)
441 }
442
443 #[test]
444 fn flags_distinct_on_next_line() {
445 let sql = "SELECT\nDISTINCT a\nFROM t";
446 let issues = run(sql);
447 assert_eq!(issues.len(), 1);
448 assert_eq!(issues[0].code, issue_codes::LINT_LT_010);
449 let autofix = issues[0].autofix.as_ref().expect("autofix metadata");
450 assert_eq!(autofix.applicability, IssueAutofixApplicability::Safe);
451 let fixed = apply_issue_autofix(sql, &issues[0]).expect("apply autofix");
452 assert_eq!(fixed, "SELECT DISTINCT a\nFROM t");
453 }
454
455 #[test]
456 fn does_not_flag_single_line_modifier() {
457 assert!(run("SELECT DISTINCT a FROM t").is_empty());
458 }
459
460 #[test]
461 fn does_not_flag_modifier_text_in_string() {
462 assert!(run("SELECT 'SELECT\nDISTINCT a' AS txt").is_empty());
463 }
464
465 #[test]
466 fn comment_between_select_and_modifier_has_autofix() {
467 let sql = "SELECT\n-- keep\nDISTINCT a\nFROM t";
468 let issues = run(sql);
469 assert_eq!(issues.len(), 1);
470 assert_eq!(issues[0].code, issue_codes::LINT_LT_010);
471 let fixed = apply_issue_autofix(sql, &issues[0]).expect("apply autofix");
472 assert_eq!(fixed, "SELECT DISTINCT\n-- keep\na\nFROM t");
473 }
474
475 #[test]
476 fn comment_between_select_and_distinct_with_indent() {
477 let sql = "SELECT\n -- The table contains duplicates, so we use DISTINCT.\n DISTINCT user_id\nFROM\n safe_user";
478 let issues = run(sql);
479 assert_eq!(issues.len(), 1);
480 let fixed = apply_issue_autofix(sql, &issues[0]).expect("apply autofix");
481 assert_eq!(
482 fixed,
483 "SELECT DISTINCT\n -- The table contains duplicates, so we use DISTINCT.\n user_id\nFROM\n safe_user"
484 );
485 }
486}