1use crate::linter::config::LintConfig;
7use crate::linter::rule::{LintContext, LintRule};
8use crate::types::{issue_codes, Dialect, Issue, IssueAutofixApplicability, IssuePatchEdit};
9use sqlparser::ast::Statement;
10use sqlparser::tokenizer::{Token, TokenWithSpan, Tokenizer, Whitespace};
11
12#[derive(Clone, Copy, Debug, Eq, PartialEq)]
13enum CommaLinePosition {
14 Leading,
15 Trailing,
16}
17
18impl CommaLinePosition {
19 fn from_config(config: &LintConfig) -> Self {
20 let Some(layout_commas) = config.config_section_object("layout.commas") else {
21 return Self::Trailing;
22 };
23
24 match layout_commas
25 .get("line_position")
26 .and_then(serde_json::Value::as_str)
27 .unwrap_or("trailing")
28 .to_ascii_lowercase()
29 .as_str()
30 {
31 "leading" => Self::Leading,
32 _ => Self::Trailing,
33 }
34 }
35}
36
37pub struct LayoutCteNewline {
38 comma_line_position: CommaLinePosition,
39}
40
41impl LayoutCteNewline {
42 pub fn from_config(config: &LintConfig) -> Self {
43 Self {
44 comma_line_position: CommaLinePosition::from_config(config),
45 }
46 }
47}
48
49impl Default for LayoutCteNewline {
50 fn default() -> Self {
51 Self {
52 comma_line_position: CommaLinePosition::Trailing,
53 }
54 }
55}
56
57impl LintRule for LayoutCteNewline {
58 fn code(&self) -> &'static str {
59 issue_codes::LINT_LT_008
60 }
61
62 fn name(&self) -> &'static str {
63 "Layout CTE newline"
64 }
65
66 fn description(&self) -> &'static str {
67 "Blank line expected but not found after CTE closing bracket."
68 }
69
70 fn check(&self, statement: &Statement, ctx: &LintContext) -> Vec<Issue> {
71 lt08_violation_spans(statement, ctx, self.comma_line_position)
72 .into_iter()
73 .map(|((start, end), fix_span)| {
74 let mut issue = Issue::info(
75 issue_codes::LINT_LT_008,
76 "Blank line expected but not found after CTE closing bracket.",
77 )
78 .with_statement(ctx.statement_index)
79 .with_span(ctx.span_from_statement_offset(start, end));
80 if let Some((fix_start, fix_end)) = fix_span {
81 issue = issue.with_autofix_edits(
82 IssueAutofixApplicability::Safe,
83 vec![IssuePatchEdit::new(
84 ctx.span_from_statement_offset(fix_start, fix_end),
85 "\n\n",
86 )],
87 );
88 }
89 issue
90 })
91 .collect()
92 }
93}
94
95#[derive(Clone)]
96struct LocatedToken {
97 token: Token,
98 start: usize,
99 end: usize,
100}
101
102type Lt08Span = (usize, usize);
103type Lt08AutofixSpan = (usize, usize);
104type Lt08Violation = (Lt08Span, Option<Lt08AutofixSpan>);
105
106fn lt08_violation_spans(
107 statement: &Statement,
108 ctx: &LintContext,
109 comma_line_position: CommaLinePosition,
110) -> Vec<Lt08Violation> {
111 let Statement::Query(query) = statement else {
112 return Vec::new();
113 };
114 let Some(with_clause) = &query.with else {
115 return Vec::new();
116 };
117
118 let Some(tokens) = tokenize_with_offsets_for_context(ctx) else {
119 return Vec::new();
120 };
121
122 let statement_start = ctx.statement_range.start;
123 let mut spans = Vec::new();
124
125 for cte in &with_clause.cte_tables {
126 let Some(close_abs) = token_start_offset_for_context(ctx, &cte.closing_paren_token.0)
127 else {
128 continue;
129 };
130
131 if close_abs < ctx.statement_range.start || close_abs >= ctx.statement_range.end {
132 continue;
133 }
134
135 let (blank_lines, next_code_span) =
136 suffix_summary_after_offset(ctx.sql, &tokens, close_abs + 1, ctx.statement_range.end);
137
138 if blank_lines == 0 {
139 if let Some((next_start, next_end)) = next_code_span {
140 let mut autofix_span = None;
141 let gap_start = close_abs + 1;
142 let next_token = &ctx.sql[next_start..next_end];
143
144 if matches!(next_token, "," if matches!(comma_line_position, CommaLinePosition::Trailing))
145 {
146 let comma_end = next_end;
147 let (_blank_lines_after_comma, next_after_comma) = suffix_summary_after_offset(
148 ctx.sql,
149 &tokens,
150 comma_end,
151 ctx.statement_range.end,
152 );
153 if let Some((after_comma_start, _after_comma_end)) = next_after_comma {
154 if let Some((fix_start, fix_end)) =
155 whitespace_gap_span(ctx.sql, comma_end, after_comma_start)
156 {
157 autofix_span =
158 Some((fix_start - statement_start, fix_end - statement_start));
159 } else if let Some(comment_start) =
160 first_comment_start_in_range(&tokens, comma_end, after_comma_start)
161 {
162 if let Some((fix_start, fix_end)) =
163 whitespace_gap_span(ctx.sql, comma_end, comment_start)
164 {
165 autofix_span =
166 Some((fix_start - statement_start, fix_end - statement_start));
167 }
168 }
169 }
170 } else if next_token.eq_ignore_ascii_case("SELECT")
171 || next_token.eq_ignore_ascii_case("INSERT")
172 || next_token.eq_ignore_ascii_case("UPDATE")
173 || next_token.eq_ignore_ascii_case("DELETE")
174 {
175 if let Some((fix_start, fix_end)) =
176 whitespace_gap_span(ctx.sql, gap_start, next_start)
177 {
178 autofix_span =
179 Some((fix_start - statement_start, fix_end - statement_start));
180 }
181 } else if matches!(comma_line_position, CommaLinePosition::Trailing) {
182 if let Some(after_comma) =
183 first_comma_end_in_range(&tokens, gap_start, next_start)
184 {
185 if let Some((fix_start, fix_end)) =
186 whitespace_gap_span(ctx.sql, after_comma, next_start)
187 {
188 autofix_span =
189 Some((fix_start - statement_start, fix_end - statement_start));
190 } else if let Some(comment_start) =
191 first_comment_start_in_range(&tokens, after_comma, next_start)
192 {
193 if let Some((fix_start, fix_end)) =
194 whitespace_gap_span(ctx.sql, after_comma, comment_start)
195 {
196 autofix_span =
197 Some((fix_start - statement_start, fix_end - statement_start));
198 }
199 }
200 }
201 } else if gap_start == next_start
202 && matches!(comma_line_position, CommaLinePosition::Leading)
203 && next_token == ","
204 {
205 autofix_span = Some((gap_start - statement_start, gap_start - statement_start));
206 }
207
208 spans.push((
209 (next_start - statement_start, next_end - statement_start),
210 autofix_span,
211 ));
212 }
213 }
214 }
215
216 if matches!(comma_line_position, CommaLinePosition::Leading) {
217 if let Some(comma_abs) = find_oneline_leading_cte_comma(
218 ctx.sql,
219 ctx.statement_range.start,
220 ctx.statement_range.end,
221 ) {
222 let relative = comma_abs - statement_start;
223 let has_existing = spans
224 .iter()
225 .any(|((start, end), _)| *start <= relative && relative < *end);
226 if !has_existing {
227 spans.push(((relative, relative + 1), Some((relative, relative))));
228 }
229 }
230 }
231
232 spans
233}
234
235fn find_oneline_leading_cte_comma(
236 sql: &str,
237 statement_start: usize,
238 statement_end: usize,
239) -> Option<usize> {
240 let statement_sql = &sql[statement_start..statement_end];
241 if !statement_sql
242 .trim_start()
243 .to_ascii_lowercase()
244 .starts_with("with")
245 {
246 return None;
247 }
248
249 let bytes = statement_sql.as_bytes();
250 for (index, byte) in bytes.iter().enumerate() {
251 if *byte != b',' {
252 continue;
253 }
254
255 let mut probe = index;
257 while probe > 0 && matches!(bytes[probe - 1], b' ' | b'\t') {
258 probe -= 1;
259 }
260 if probe == 0 || bytes[probe - 1] != b')' {
261 continue;
262 }
263 if statement_sql[probe - 1..index].contains('\n')
264 || statement_sql[probe - 1..index].contains('\r')
265 {
266 continue;
267 }
268
269 let mut next = index + 1;
272 while next < bytes.len() && matches!(bytes[next], b' ' | b'\t') {
273 next += 1;
274 }
275 if next >= bytes.len() || !is_identifier_start(bytes[next]) {
276 continue;
277 }
278 next += 1;
279 while next < bytes.len() && is_identifier_part(bytes[next]) {
280 next += 1;
281 }
282
283 let mut saw_space = false;
284 while next < bytes.len() && matches!(bytes[next], b' ' | b'\t') {
285 saw_space = true;
286 next += 1;
287 }
288 if !saw_space {
289 continue;
290 }
291 if !starts_with_ascii_case_insensitive(bytes, next, b"AS") {
292 continue;
293 }
294 next += 2;
295 while next < bytes.len() && matches!(bytes[next], b' ' | b'\t') {
296 next += 1;
297 }
298 if next >= bytes.len() || bytes[next] != b'(' {
299 continue;
300 }
301
302 return Some(statement_start + index);
303 }
304
305 None
306}
307
308fn is_identifier_start(byte: u8) -> bool {
309 byte.is_ascii_alphabetic() || byte == b'_'
310}
311
312fn is_identifier_part(byte: u8) -> bool {
313 byte.is_ascii_alphanumeric() || matches!(byte, b'_' | b'$')
314}
315
316fn starts_with_ascii_case_insensitive(haystack: &[u8], start: usize, needle: &[u8]) -> bool {
317 if start + needle.len() > haystack.len() {
318 return false;
319 }
320 haystack[start..start + needle.len()]
321 .iter()
322 .zip(needle.iter())
323 .all(|(left, right)| left.eq_ignore_ascii_case(right))
324}
325
326fn tokenize_with_offsets(sql: &str, dialect: Dialect) -> Option<Vec<LocatedToken>> {
327 let dialect = dialect.to_sqlparser_dialect();
328 let mut tokenizer = Tokenizer::new(dialect.as_ref(), sql);
329 let tokens = tokenizer.tokenize_with_location().ok()?;
330
331 let mut out = Vec::with_capacity(tokens.len());
332 for token in tokens {
333 let Some(start) = line_col_to_offset(
334 sql,
335 token.span.start.line as usize,
336 token.span.start.column as usize,
337 ) else {
338 continue;
339 };
340 let Some(end) = line_col_to_offset(
341 sql,
342 token.span.end.line as usize,
343 token.span.end.column as usize,
344 ) else {
345 continue;
346 };
347
348 out.push(LocatedToken {
349 token: token.token,
350 start,
351 end,
352 });
353 }
354
355 Some(out)
356}
357
358fn tokenize_with_offsets_for_context(ctx: &LintContext) -> Option<Vec<LocatedToken>> {
359 let tokens = ctx.with_document_tokens(|tokens| {
360 if tokens.is_empty() {
361 return None;
362 }
363
364 Some(
365 tokens
366 .iter()
367 .filter_map(|token| {
368 token_with_span_offsets(ctx.sql, token).map(|(start, end)| LocatedToken {
369 token: token.token.clone(),
370 start,
371 end,
372 })
373 })
374 .collect::<Vec<_>>(),
375 )
376 });
377
378 if let Some(tokens) = tokens {
379 return Some(tokens);
380 }
381
382 tokenize_with_offsets(ctx.sql, ctx.dialect())
383}
384
385fn token_start_offset(sql: &str, token: &TokenWithSpan) -> Option<usize> {
386 line_col_to_offset(
387 sql,
388 token.span.start.line as usize,
389 token.span.start.column as usize,
390 )
391}
392
393fn token_start_offset_for_context(ctx: &LintContext, token: &TokenWithSpan) -> Option<usize> {
394 if ctx.statement_range.start > 0 {
395 if let Some(abs_start) = token_start_offset(ctx.sql, token) {
396 if abs_start >= ctx.statement_range.start && abs_start < ctx.statement_range.end {
397 return Some(abs_start);
398 }
399 }
400
401 if let Some(rel_start) = token_start_offset(ctx.statement_sql(), token) {
402 let abs_start = ctx.statement_range.start + rel_start;
403 if abs_start < ctx.statement_range.end {
404 return Some(abs_start);
405 }
406 }
407
408 return None;
409 }
410
411 token_start_offset(ctx.statement_sql(), token).or_else(|| token_start_offset(ctx.sql, token))
412}
413
414fn suffix_summary_after_offset(
415 sql: &str,
416 tokens: &[LocatedToken],
417 start_offset: usize,
418 statement_end: usize,
419) -> (usize, Option<(usize, usize)>) {
420 let mut blank_lines = 0usize;
421 let mut line_blank = false;
422
423 for token in tokens {
424 if token.start < start_offset {
425 continue;
426 }
427 if token.start >= statement_end {
428 break;
429 }
430
431 match &token.token {
432 Token::Comma => {
433 line_blank = false;
434 }
435 trivia if is_trivia_token(trivia) => {
436 consume_text_for_blank_lines(
437 &sql[token.start..token.end],
438 &mut blank_lines,
439 &mut line_blank,
440 );
441 }
442 _ => return (blank_lines, Some((token.start, token.end))),
443 }
444 }
445
446 (blank_lines, None)
447}
448
449fn consume_text_for_blank_lines(text: &str, blank_lines: &mut usize, line_blank: &mut bool) {
450 let mut chars = text.chars().peekable();
451
452 while let Some(ch) = chars.next() {
453 match ch {
454 '\n' => {
455 if *line_blank {
456 *blank_lines += 1;
457 }
458 *line_blank = true;
459 }
460 '\r' => {
461 if matches!(chars.peek(), Some('\n')) {
462 let _ = chars.next();
463 }
464 if *line_blank {
465 *blank_lines += 1;
466 }
467 *line_blank = true;
468 }
469 c if c.is_whitespace() => {}
470 _ => *line_blank = false,
471 }
472 }
473}
474
475fn whitespace_gap_span(sql: &str, start: usize, end: usize) -> Option<(usize, usize)> {
476 if start > end || end > sql.len() {
477 return None;
478 }
479 let gap = &sql[start..end];
480 if gap.chars().all(char::is_whitespace) {
481 Some((start, end))
482 } else {
483 None
484 }
485}
486
487fn is_trivia_token(token: &Token) -> bool {
488 matches!(
489 token,
490 Token::Whitespace(Whitespace::Space | Whitespace::Tab | Whitespace::Newline)
491 | Token::Whitespace(Whitespace::SingleLineComment { .. })
492 | Token::Whitespace(Whitespace::MultiLineComment(_))
493 )
494}
495
496fn is_comment_token(token: &Token) -> bool {
497 matches!(
498 token,
499 Token::Whitespace(Whitespace::SingleLineComment { .. })
500 | Token::Whitespace(Whitespace::MultiLineComment(_))
501 )
502}
503
504fn first_comment_start_in_range(
505 tokens: &[LocatedToken],
506 start_offset: usize,
507 end_offset: usize,
508) -> Option<usize> {
509 tokens
510 .iter()
511 .find(|token| {
512 token.start >= start_offset
513 && token.start < end_offset
514 && is_comment_token(&token.token)
515 })
516 .map(|token| token.start)
517}
518
519fn first_comma_end_in_range(
520 tokens: &[LocatedToken],
521 start_offset: usize,
522 end_offset: usize,
523) -> Option<usize> {
524 tokens
525 .iter()
526 .find(|token| {
527 token.start >= start_offset
528 && token.start < end_offset
529 && matches!(token.token, Token::Comma)
530 })
531 .map(|token| token.end)
532}
533
534fn line_col_to_offset(sql: &str, line: usize, column: usize) -> Option<usize> {
535 if line == 0 || column == 0 {
536 return None;
537 }
538
539 let mut current_line = 1usize;
540 let mut current_col = 1usize;
541
542 for (offset, ch) in sql.char_indices() {
543 if current_line == line && current_col == column {
544 return Some(offset);
545 }
546
547 if ch == '\n' {
548 current_line += 1;
549 current_col = 1;
550 } else {
551 current_col += 1;
552 }
553 }
554
555 if current_line == line && current_col == column {
556 return Some(sql.len());
557 }
558
559 None
560}
561
562fn token_with_span_offsets(sql: &str, token: &TokenWithSpan) -> Option<(usize, usize)> {
563 let start = line_col_to_offset(
564 sql,
565 token.span.start.line as usize,
566 token.span.start.column as usize,
567 )?;
568 let end = line_col_to_offset(
569 sql,
570 token.span.end.line as usize,
571 token.span.end.column as usize,
572 )?;
573 Some((start, end))
574}
575
576#[cfg(test)]
577mod tests {
578 use super::*;
579 use crate::linter::config::LintConfig;
580 use crate::parser::parse_sql;
581 use crate::types::IssueAutofixApplicability;
582
583 fn run(sql: &str) -> Vec<Issue> {
584 let statements = parse_sql(sql).expect("parse");
585 let rule = LayoutCteNewline::default();
586 statements
587 .iter()
588 .enumerate()
589 .flat_map(|(index, statement)| {
590 rule.check(
591 statement,
592 &LintContext {
593 sql,
594 statement_range: 0..sql.len(),
595 statement_index: index,
596 },
597 )
598 })
599 .collect()
600 }
601
602 fn apply_issue_autofix(sql: &str, issue: &Issue) -> Option<String> {
603 let autofix = issue.autofix.as_ref()?;
604 let mut out = sql.to_string();
605 let mut edits = autofix.edits.clone();
606 edits.sort_by_key(|edit| (edit.span.start, edit.span.end));
607 for edit in edits.into_iter().rev() {
608 out.replace_range(edit.span.start..edit.span.end, &edit.replacement);
609 }
610 Some(out)
611 }
612
613 #[test]
614 fn flags_missing_blank_line_after_cte() {
615 let inline_sql = "WITH cte AS (SELECT 1) SELECT * FROM cte";
616 let inline_issues = run(inline_sql);
617 assert!(!inline_issues.is_empty());
618 let autofix = inline_issues[0].autofix.as_ref().expect("autofix metadata");
619 assert_eq!(autofix.applicability, IssueAutofixApplicability::Safe);
620 let fixed = apply_issue_autofix(inline_sql, &inline_issues[0]).expect("apply autofix");
621 assert_eq!(fixed, "WITH cte AS (SELECT 1)\n\nSELECT * FROM cte");
622
623 let newline_sql = "WITH cte AS (SELECT 1)\nSELECT * FROM cte";
624 let newline_issues = run(newline_sql);
625 assert!(!newline_issues.is_empty());
626 let newline_fixed =
627 apply_issue_autofix(newline_sql, &newline_issues[0]).expect("apply newline autofix");
628 assert_eq!(newline_fixed, "WITH cte AS (SELECT 1)\n\nSELECT * FROM cte");
629 }
630
631 #[test]
632 fn does_not_flag_with_blank_line_after_cte() {
633 assert!(run("WITH cte AS (SELECT 1)\n\nSELECT * FROM cte").is_empty());
634 }
635
636 #[test]
637 fn flags_each_missing_separator_between_multiple_ctes() {
638 let issues = run("WITH a AS (SELECT 1),
639-- comment between CTEs
640b AS (SELECT 2)
641SELECT * FROM b");
642 assert_eq!(
643 issues
644 .iter()
645 .filter(|issue| issue.code == issue_codes::LINT_LT_008)
646 .count(),
647 2,
648 );
649 }
650
651 #[test]
652 fn comment_only_line_is_not_a_blank_line_separator() {
653 assert!(!run("WITH cte AS (SELECT 1)\n-- separator\nSELECT * FROM cte").is_empty());
654 }
655
656 #[test]
657 fn leading_comma_oneline_cte_autofix_inserts_blank_line_before_comma() {
658 let sql =
659 "with my_cte as (select 1), other_cte as (select 1) select * from my_cte\ncross join other_cte\n";
660 let config = LintConfig {
661 enabled: true,
662 disabled_rules: vec![],
663 rule_configs: std::collections::BTreeMap::from([(
664 "layout.commas".to_string(),
665 serde_json::json!({"line_position": "leading"}),
666 )]),
667 };
668 let rule = LayoutCteNewline::from_config(&config);
669 let statements = parse_sql(sql).expect("parse");
670 let issues = statements
671 .iter()
672 .enumerate()
673 .flat_map(|(index, statement)| {
674 rule.check(
675 statement,
676 &LintContext {
677 sql,
678 statement_range: 0..sql.len(),
679 statement_index: index,
680 },
681 )
682 })
683 .collect::<Vec<_>>();
684
685 let fixed = issues.iter().fold(sql.to_string(), |current, issue| {
686 let Some(autofix) = issue.autofix.as_ref() else {
687 return current;
688 };
689 let mut edits = autofix.edits.clone();
690 edits.sort_by_key(|edit| (edit.span.start, edit.span.end));
691 let mut updated = current;
692 for edit in edits.into_iter().rev() {
693 updated.replace_range(edit.span.start..edit.span.end, &edit.replacement);
694 }
695 updated
696 });
697 assert_eq!(
698 fixed,
699 "with my_cte as (select 1)\n\n, other_cte as (select 1)\n\nselect * from my_cte\ncross join other_cte\n"
700 );
701 }
702
703 #[test]
704 fn trailing_comma_cte_autofix_inserts_blank_line_after_comma() {
705 let sql = "WITH a AS (SELECT 1),\nb AS (SELECT 2)\nSELECT * FROM b";
706 let issues = run(sql);
707 assert!(!issues.is_empty());
708 let mut edits = issues
709 .iter()
710 .filter_map(|issue| issue.autofix.as_ref())
711 .flat_map(|autofix| autofix.edits.clone())
712 .collect::<Vec<_>>();
713 edits.sort_by_key(|edit| (edit.span.start, edit.span.end));
714 let mut fixed = sql.to_string();
715 for edit in edits.into_iter().rev() {
716 fixed.replace_range(edit.span.start..edit.span.end, &edit.replacement);
717 }
718 assert_eq!(
719 fixed,
720 "WITH a AS (SELECT 1),\n\nb AS (SELECT 2)\n\nSELECT * FROM b"
721 );
722 }
723
724 #[test]
725 fn trailing_comma_cte_autofix_preserves_comment_between_ctes() {
726 let sql = "WITH a AS (SELECT 1),\n-- keep this note\nb AS (SELECT 2)\nSELECT * FROM b";
727 let issues = run(sql);
728 assert!(!issues.is_empty());
729 let mut edits = issues
730 .iter()
731 .filter_map(|issue| issue.autofix.as_ref())
732 .flat_map(|autofix| autofix.edits.clone())
733 .collect::<Vec<_>>();
734 edits.sort_by_key(|edit| (edit.span.start, edit.span.end));
735 let mut fixed = sql.to_string();
736 for edit in edits.into_iter().rev() {
737 fixed.replace_range(edit.span.start..edit.span.end, &edit.replacement);
738 }
739 assert_eq!(
740 fixed,
741 "WITH a AS (SELECT 1),\n\n-- keep this note\nb AS (SELECT 2)\n\nSELECT * FROM b"
742 );
743 }
744}