1use crate::linter::config::LintConfig;
8use crate::linter::rule::{LintContext, LintRule};
9use crate::types::{issue_codes, Dialect, Issue, IssueAutofixApplicability, IssuePatchEdit};
10use sqlparser::ast::Statement;
11use sqlparser::keywords::Keyword;
12use sqlparser::tokenizer::{Location, Span, Token, TokenWithSpan, Tokenizer, Whitespace};
13
14#[derive(Clone, Copy, Debug, Eq, PartialEq)]
19enum KeywordLinePosition {
20 Leading,
21 Alone,
22 Trailing,
23 None,
24}
25
26impl KeywordLinePosition {
27 fn parse(s: &str) -> Option<Self> {
28 match s.to_ascii_lowercase().as_str() {
29 "leading" => Some(Self::Leading),
30 "alone" => Some(Self::Alone),
31 "trailing" => Some(Self::Trailing),
32 "none" => Some(Self::None),
33 _ => Option::None,
34 }
35 }
36}
37
38#[derive(Clone, Debug, Default)]
40struct ClauseConfigs {
41 from_clause: Option<KeywordLinePosition>,
42 where_clause: Option<KeywordLinePosition>,
43 join_clause: Option<KeywordLinePosition>,
44 join_on_condition: Option<KeywordLinePosition>,
45 orderby_clause: Option<KeywordLinePosition>,
46 orderby_exclusions: Vec<String>,
47 groupby_clause: Option<KeywordLinePosition>,
48 partitionby_clause: Option<KeywordLinePosition>,
49 qualify_clause: Option<KeywordLinePosition>,
50 select_clause: Option<KeywordLinePosition>,
51 data_type: Option<KeywordLinePosition>,
52 having_clause: Option<KeywordLinePosition>,
53 limit_clause: Option<KeywordLinePosition>,
54}
55
56impl ClauseConfigs {
57 fn from_lint_config(config: &LintConfig) -> Self {
58 let mut out = Self::default();
59
60 let obj = config.rule_config_object(issue_codes::LINT_LT_014);
61 let Some(obj) = obj else {
62 return out;
63 };
64
65 fn read_clause(
66 obj: &serde_json::Map<String, serde_json::Value>,
67 key: &str,
68 ) -> Option<KeywordLinePosition> {
69 let clause_obj = obj.get(key)?.as_object()?;
70 let pos_str = clause_obj.get("keyword_line_position")?.as_str()?;
71 KeywordLinePosition::parse(pos_str)
72 }
73
74 fn read_exclusions(
75 obj: &serde_json::Map<String, serde_json::Value>,
76 key: &str,
77 ) -> Vec<String> {
78 let Some(clause_obj) = obj.get(key).and_then(|v| v.as_object()) else {
79 return Vec::new();
80 };
81 let Some(excl) = clause_obj.get("keyword_line_position_exclusions") else {
82 return Vec::new();
83 };
84 if excl.is_null() {
87 return vec!["__none__".to_string()];
88 }
89 if let Some(s) = excl.as_str() {
90 if s.eq_ignore_ascii_case("None") {
91 return vec!["__none__".to_string()];
92 }
93 return s
94 .split(',')
95 .map(|s| s.trim().to_ascii_lowercase())
96 .filter(|s| !s.is_empty())
97 .collect();
98 }
99 Vec::new()
100 }
101
102 out.from_clause = read_clause(obj, "from_clause");
103 out.where_clause = read_clause(obj, "where_clause");
104 out.join_clause = read_clause(obj, "join_clause");
105 out.join_on_condition = read_clause(obj, "join_on_condition");
106 out.orderby_clause = read_clause(obj, "orderby_clause");
107 out.orderby_exclusions = read_exclusions(obj, "orderby_clause");
108 out.groupby_clause = read_clause(obj, "groupby_clause");
109 out.partitionby_clause = read_clause(obj, "partitionby_clause");
110 out.qualify_clause = read_clause(obj, "qualify_clause");
111 out.select_clause = read_clause(obj, "select_clause");
112 out.data_type = read_clause(obj, "data_type");
113 out.having_clause = read_clause(obj, "having_clause");
114 out.limit_clause = read_clause(obj, "limit_clause");
115
116 out
117 }
118
119 fn has_any_config(&self) -> bool {
121 self.from_clause.is_some()
122 || self.where_clause.is_some()
123 || self.join_clause.is_some()
124 || self.join_on_condition.is_some()
125 || self.orderby_clause.is_some()
126 || self.groupby_clause.is_some()
127 || self.partitionby_clause.is_some()
128 || self.qualify_clause.is_some()
129 || self.select_clause.is_some()
130 || self.data_type.is_some()
131 || self.having_clause.is_some()
132 || self.limit_clause.is_some()
133 }
134}
135
136#[derive(Default)]
141pub struct LayoutKeywordNewline {
142 clause_configs: ClauseConfigs,
143}
144
145impl LayoutKeywordNewline {
146 pub fn from_config(config: &LintConfig) -> Self {
147 Self {
148 clause_configs: ClauseConfigs::from_lint_config(config),
149 }
150 }
151}
152
153impl LintRule for LayoutKeywordNewline {
154 fn code(&self) -> &'static str {
155 issue_codes::LINT_LT_014
156 }
157
158 fn name(&self) -> &'static str {
159 "Layout keyword newline"
160 }
161
162 fn description(&self) -> &'static str {
163 "Keyword clauses should follow a standard for being before/after newlines."
164 }
165
166 fn check(&self, _statement: &Statement, ctx: &LintContext) -> Vec<Issue> {
167 let tokens = tokenized_for_context(ctx);
168 let sql = ctx.statement_sql();
169
170 if self.clause_configs.has_any_config() {
171 return check_with_configs(sql, ctx, &self.clause_configs, tokens.as_deref());
172 }
173
174 let Some((keyword_start, keyword_end)) =
176 keyword_newline_violation_span(sql, ctx.dialect(), tokens.as_deref())
177 else {
178 return Vec::new();
179 };
180
181 let keyword_span = ctx.span_from_statement_offset(keyword_start, keyword_end);
182 let ws_start = sql[..keyword_start].trim_end().len();
183 let replace_span = ctx.span_from_statement_offset(ws_start, keyword_start);
184 vec![Issue::info(
185 issue_codes::LINT_LT_014,
186 "Major clauses should be consistently line-broken.",
187 )
188 .with_statement(ctx.statement_index)
189 .with_span(keyword_span)
190 .with_autofix_edits(
191 IssueAutofixApplicability::Safe,
192 vec![IssuePatchEdit::new(replace_span, "\n")],
193 )]
194 }
195}
196
197#[derive(Clone, Debug)]
203struct KeywordOccurrence {
204 clause_type: &'static str,
206 start: usize,
208 end: usize,
210 has_content_before: bool,
212 has_content_after: bool,
214 is_first_on_line: bool,
216 in_window: bool,
218 in_aggregate_order_by: bool,
220}
221
222fn check_with_configs(
223 sql: &str,
224 ctx: &LintContext,
225 configs: &ClauseConfigs,
226 tokens: Option<&[TokenWithSpan]>,
227) -> Vec<Issue> {
228 let owned_tokens;
229 let tokens = if let Some(tokens) = tokens {
230 tokens
231 } else {
232 owned_tokens = match tokenized(sql, ctx.dialect()) {
233 Some(t) => t,
234 std::option::Option::None => return Vec::new(),
235 };
236 &owned_tokens
237 };
238
239 let occurrences = find_clause_keyword_occurrences(sql, tokens);
240
241 let mut issues = Vec::new();
242
243 for occ in &occurrences {
244 let Some(position) = config_for_clause(configs, occ) else {
245 continue;
246 };
247
248 if position == KeywordLinePosition::None {
249 continue;
250 }
251
252 if occ.clause_type == "orderby_clause" && !configs.orderby_exclusions.is_empty() {
254 let has_none_exclusion = configs.orderby_exclusions.iter().any(|e| e == "__none__");
255 if !has_none_exclusion {
256 let in_excluded = configs.orderby_exclusions.iter().any(|e| {
258 (e == "window_specification" && occ.in_window)
259 || (e == "aggregate_order_by" && occ.in_aggregate_order_by)
260 });
261 if in_excluded {
262 continue;
263 }
264 }
265 }
267
268 let violation = match position {
269 KeywordLinePosition::Leading => {
270 !occ.is_first_on_line
272 }
273 KeywordLinePosition::Alone => {
274 !occ.is_first_on_line || occ.has_content_after
276 }
277 KeywordLinePosition::Trailing => {
278 !occ.has_content_before || occ.has_content_after
280 }
281 KeywordLinePosition::None => false,
282 };
283
284 if !violation {
285 continue;
286 }
287
288 let keyword_span = ctx.span_from_statement_offset(occ.start, occ.end);
289 let edits = build_autofix_edits(sql, ctx, occ, position);
290
291 issues.push(
292 Issue::info(
293 issue_codes::LINT_LT_014,
294 "Keyword clauses should follow a standard for being before/after newlines.",
295 )
296 .with_statement(ctx.statement_index)
297 .with_span(keyword_span)
298 .with_autofix_edits(IssueAutofixApplicability::Safe, edits),
299 );
300 }
301
302 issues
303}
304
305fn config_for_clause(
306 configs: &ClauseConfigs,
307 occ: &KeywordOccurrence,
308) -> Option<KeywordLinePosition> {
309 match occ.clause_type {
310 "from_clause" => configs.from_clause,
311 "where_clause" => configs.where_clause,
312 "join_clause" => configs.join_clause,
313 "join_on_condition" => configs.join_on_condition,
314 "orderby_clause" => configs.orderby_clause,
315 "groupby_clause" => configs.groupby_clause,
316 "partitionby_clause" => configs.partitionby_clause,
317 "qualify_clause" => configs.qualify_clause,
318 "select_clause" => configs.select_clause,
319 "data_type" => configs.data_type,
320 "having_clause" => configs.having_clause,
321 "limit_clause" => configs.limit_clause,
322 _ => Option::None,
323 }
324}
325
326fn build_autofix_edits(
327 sql: &str,
328 ctx: &LintContext,
329 occ: &KeywordOccurrence,
330 position: KeywordLinePosition,
331) -> Vec<IssuePatchEdit> {
332 match position {
333 KeywordLinePosition::Leading => {
334 let ws_start = sql[..occ.start].trim_end().len();
336 let replace_span = ctx.span_from_statement_offset(ws_start, occ.start);
337 vec![IssuePatchEdit::new(replace_span, "\n")]
338 }
339 KeywordLinePosition::Alone => {
340 let mut edits = Vec::new();
341
342 if !occ.is_first_on_line {
344 let ws_start = sql[..occ.start].trim_end().len();
345 let replace_span = ctx.span_from_statement_offset(ws_start, occ.start);
346 edits.push(IssuePatchEdit::new(replace_span, "\n"));
347 }
348
349 if occ.has_content_after {
351 let after_keyword = &sql[occ.end..];
352 let content_offset = after_keyword
353 .find(|c: char| c != ' ' && c != '\t')
354 .unwrap_or(0);
355 let replace_start = occ.end;
356 let replace_end = occ.end + content_offset;
357 let replace_span = ctx.span_from_statement_offset(replace_start, replace_end);
358 edits.push(IssuePatchEdit::new(replace_span, "\n"));
359 }
360
361 edits
362 }
363 KeywordLinePosition::Trailing => {
364 let mut edits = Vec::new();
367
368 if !occ.has_content_before {
370 let ws_start = sql[..occ.start].trim_end().len();
371 let replace_span = ctx.span_from_statement_offset(ws_start, occ.start);
372 edits.push(IssuePatchEdit::new(replace_span, " "));
373 }
374
375 if occ.has_content_after {
377 let after_keyword = &sql[occ.end..];
378 let content_offset = after_keyword
379 .find(|c: char| c != ' ' && c != '\t')
380 .unwrap_or(0);
381 let replace_start = occ.end;
382 let replace_end = occ.end + content_offset;
383 let replace_span = ctx.span_from_statement_offset(replace_start, replace_end);
384 edits.push(IssuePatchEdit::new(replace_span, "\n"));
385 }
386
387 edits
388 }
389 KeywordLinePosition::None => Vec::new(),
390 }
391}
392
393fn find_clause_keyword_occurrences(sql: &str, tokens: &[TokenWithSpan]) -> Vec<KeywordOccurrence> {
398 let significant: Vec<(usize, &TokenWithSpan)> = tokens
399 .iter()
400 .enumerate()
401 .filter(|(_, t)| !is_trivia_token(&t.token))
402 .collect();
403
404 let mut out = Vec::new();
405 let mut paren_depth: i32 = 0;
406 let mut window_paren_depth: Option<i32> = Option::None;
408 let mut function_call_depths: Vec<i32> = Vec::new();
410
411 let mut sig_idx = 0;
412 while sig_idx < significant.len() {
413 let (_, token) = significant[sig_idx];
414
415 match &token.token {
417 Token::LParen => {
418 paren_depth += 1;
419 }
420 Token::RParen => {
421 if window_paren_depth == Some(paren_depth) {
422 window_paren_depth = Option::None;
423 }
424 if function_call_depths.last() == Some(&paren_depth) {
425 function_call_depths.pop();
426 }
427 paren_depth -= 1;
428 }
429 _ => {}
430 }
431
432 let Token::Word(word) = &token.token else {
433 sig_idx += 1;
434 continue;
435 };
436
437 match word.keyword {
438 Keyword::OVER => {
439 if let Some((_, next)) = significant.get(sig_idx + 1) {
441 if matches!(&next.token, Token::LParen) {
442 window_paren_depth = Some(paren_depth + 1);
443 }
444 }
445 sig_idx += 1;
446 }
447 Keyword::FROM => {
448 if let Some(occ) = single_keyword_occurrence(
449 sql,
450 token,
451 "from_clause",
452 window_paren_depth.is_some(),
453 false,
454 ) {
455 out.push(occ);
456 }
457 sig_idx += 1;
458 }
459 Keyword::WHERE => {
460 if let Some(occ) = single_keyword_occurrence(
461 sql,
462 token,
463 "where_clause",
464 window_paren_depth.is_some(),
465 false,
466 ) {
467 out.push(occ);
468 }
469 sig_idx += 1;
470 }
471 Keyword::ON => {
472 let is_join_on = significant[..sig_idx].iter().rev().any(|(_, t)| {
474 if let Token::Word(w) = &t.token {
475 matches!(
476 w.keyword,
477 Keyword::JOIN
478 | Keyword::INNER
479 | Keyword::LEFT
480 | Keyword::RIGHT
481 | Keyword::FULL
482 | Keyword::CROSS
483 | Keyword::OUTER
484 )
485 } else {
486 false
487 }
488 });
489 if is_join_on {
490 if let Some(occ) = single_keyword_occurrence(
491 sql,
492 token,
493 "join_on_condition",
494 window_paren_depth.is_some(),
495 false,
496 ) {
497 out.push(occ);
498 }
499 }
500 sig_idx += 1;
501 }
502 Keyword::QUALIFY => {
503 if let Some(occ) = single_keyword_occurrence(
504 sql,
505 token,
506 "qualify_clause",
507 window_paren_depth.is_some(),
508 false,
509 ) {
510 out.push(occ);
511 }
512 sig_idx += 1;
513 }
514 Keyword::SELECT => {
515 if let Some(occ) =
516 single_keyword_occurrence(sql, token, "select_clause", false, false)
517 {
518 out.push(occ);
519 }
520 sig_idx += 1;
521 }
522 Keyword::HAVING => {
523 if let Some(occ) =
524 single_keyword_occurrence(sql, token, "having_clause", false, false)
525 {
526 out.push(occ);
527 }
528 sig_idx += 1;
529 }
530 Keyword::LIMIT => {
531 if let Some(occ) =
532 single_keyword_occurrence(sql, token, "limit_clause", false, false)
533 {
534 out.push(occ);
535 }
536 sig_idx += 1;
537 }
538 Keyword::JOIN => {
539 let join_start = find_join_keyword_start(sql, &significant, sig_idx);
542 if let Some(join_end) = token_end_offset(sql, token) {
543 if let Some(occ) = make_keyword_occurrence(
544 sql,
545 join_start,
546 join_end,
547 "join_clause",
548 window_paren_depth.is_some(),
549 false,
550 ) {
551 out.push(occ);
552 }
553 }
554
555 if let Some((_, next)) = significant.get(sig_idx + 1) {
557 if matches!(&next.token, Token::LParen) {
558 function_call_depths.push(paren_depth + 1);
559 }
560 }
561
562 sig_idx += 1;
563 }
564 Keyword::ORDER | Keyword::GROUP => {
565 let Some((_, next)) = significant.get(sig_idx + 1) else {
566 sig_idx += 1;
567 continue;
568 };
569 let is_by = matches!(&next.token, Token::Word(w) if w.keyword == Keyword::BY);
570 if !is_by {
571 sig_idx += 1;
572 continue;
573 }
574
575 if let (Some(kw_start), Some(kw_end)) =
576 (token_start_offset(sql, token), token_end_offset(sql, next))
577 {
578 let clause_type = if word.keyword == Keyword::ORDER {
579 "orderby_clause"
580 } else {
581 "groupby_clause"
582 };
583
584 let in_window = window_paren_depth.is_some();
585 let in_aggregate =
586 !function_call_depths.is_empty() && word.keyword == Keyword::ORDER;
587
588 if let Some(occ) = make_keyword_occurrence(
589 sql,
590 kw_start,
591 kw_end,
592 clause_type,
593 in_window,
594 in_aggregate,
595 ) {
596 out.push(occ);
597 }
598 }
599
600 sig_idx += 2;
601 }
602 Keyword::PARTITION => {
603 let Some((_, next)) = significant.get(sig_idx + 1) else {
604 sig_idx += 1;
605 continue;
606 };
607 let is_by = matches!(&next.token, Token::Word(w) if w.keyword == Keyword::BY);
608 if !is_by {
609 sig_idx += 1;
610 continue;
611 }
612
613 if let (Some(kw_start), Some(kw_end)) =
614 (token_start_offset(sql, token), token_end_offset(sql, next))
615 {
616 if let Some(occ) = make_keyword_occurrence(
617 sql,
618 kw_start,
619 kw_end,
620 "partitionby_clause",
621 window_paren_depth.is_some(),
622 false,
623 ) {
624 out.push(occ);
625 }
626 }
627
628 sig_idx += 2;
629 }
630 Keyword::DOUBLE | Keyword::NOT => {
631 if let Some((_, next)) = significant.get(sig_idx + 1) {
634 let is_data_type_compound = match word.keyword {
635 Keyword::DOUBLE => {
636 matches!(&next.token, Token::Word(w) if w.keyword == Keyword::PRECISION)
637 }
638 Keyword::NOT => {
639 matches!(&next.token, Token::Word(w) if w.keyword == Keyword::NULL)
640 }
641 _ => false,
642 };
643 if is_data_type_compound {
644 if let (Some(kw_start), Some(kw_end)) =
645 (token_start_offset(sql, token), token_end_offset(sql, next))
646 {
647 if let Some(occ) = make_keyword_occurrence(
648 sql,
649 kw_start,
650 kw_end,
651 "data_type",
652 false,
653 false,
654 ) {
655 out.push(occ);
656 }
657 }
658
659 sig_idx += 2;
660 continue;
661 }
662 }
663 sig_idx += 1;
664 }
665 _ => {
666 if let Some((_, next)) = significant.get(sig_idx + 1) {
670 if matches!(&next.token, Token::LParen) {
671 function_call_depths.push(paren_depth + 1);
672 }
673 }
674 sig_idx += 1;
675 }
676 }
677 }
678
679 out
680}
681
682fn single_keyword_occurrence(
683 sql: &str,
684 token: &TokenWithSpan,
685 clause_type: &'static str,
686 in_window: bool,
687 in_aggregate: bool,
688) -> Option<KeywordOccurrence> {
689 let start = token_start_offset(sql, token)?;
690 let end = token_end_offset(sql, token)?;
691 make_keyword_occurrence(sql, start, end, clause_type, in_window, in_aggregate)
692}
693
694fn make_keyword_occurrence(
695 sql: &str,
696 start: usize,
697 end: usize,
698 clause_type: &'static str,
699 in_window: bool,
700 in_aggregate: bool,
701) -> Option<KeywordOccurrence> {
702 let line_start = sql[..start].rfind('\n').map_or(0, |i| i + 1);
703 let line_end = sql[end..].find('\n').map_or(sql.len(), |i| end + i);
704
705 let before_on_line = &sql[line_start..start];
706 let after_on_line = &sql[end..line_end];
707
708 let has_content_before = before_on_line.chars().any(|c| !c.is_ascii_whitespace());
709 let after_trimmed = after_on_line.trim_start();
712 let has_content_after = !after_trimmed.is_empty()
713 && !after_trimmed
714 .chars()
715 .all(|c| c == ')' || c == ']' || c.is_ascii_whitespace());
716 let is_first_on_line = !has_content_before;
717
718 Some(KeywordOccurrence {
719 clause_type,
720 start,
721 end,
722 has_content_before,
723 has_content_after,
724 is_first_on_line,
725 in_window,
726 in_aggregate_order_by: in_aggregate,
727 })
728}
729
730fn find_join_keyword_start(
733 sql: &str,
734 significant: &[(usize, &TokenWithSpan)],
735 sig_idx: usize,
736) -> usize {
737 let (_, token) = significant[sig_idx];
738 let join_start = match token_start_offset(sql, token) {
739 Some(s) => s,
740 std::option::Option::None => return 0,
741 };
742
743 let join_line_start = sql[..join_start].rfind('\n').map_or(0, |i| i + 1);
745
746 let mut earliest_start = join_start;
747 let mut look_back = sig_idx;
748 while look_back > 0 {
749 look_back -= 1;
750 let (_, prev) = significant[look_back];
751 let Token::Word(w) = &prev.token else {
752 break;
753 };
754 if !matches!(
755 w.keyword,
756 Keyword::LEFT
757 | Keyword::RIGHT
758 | Keyword::INNER
759 | Keyword::FULL
760 | Keyword::CROSS
761 | Keyword::OUTER
762 ) {
763 break;
764 }
765 let prev_start = match token_start_offset(sql, prev) {
766 Some(s) => s,
767 std::option::Option::None => break,
768 };
769 if prev_start < join_line_start {
771 break;
772 }
773 earliest_start = prev_start;
774 }
775
776 earliest_start
777}
778
779fn token_start_offset(sql: &str, token: &TokenWithSpan) -> Option<usize> {
780 line_col_to_offset(
781 sql,
782 token.span.start.line as usize,
783 token.span.start.column as usize,
784 )
785}
786
787fn token_end_offset(sql: &str, token: &TokenWithSpan) -> Option<usize> {
788 line_col_to_offset(
789 sql,
790 token.span.end.line as usize,
791 token.span.end.column as usize,
792 )
793}
794
795#[derive(Clone, Copy)]
800struct ClauseOccurrence {
801 line: u64,
802 start: usize,
803 end: usize,
804}
805
806fn keyword_newline_violation_span(
807 sql: &str,
808 dialect: Dialect,
809 tokens: Option<&[TokenWithSpan]>,
810) -> Option<(usize, usize)> {
811 let owned_tokens;
812 let tokens = if let Some(tokens) = tokens {
813 tokens
814 } else {
815 owned_tokens = tokenized(sql, dialect)?;
816 &owned_tokens
817 };
818
819 let select_line = {
823 let mut depth = 0i32;
824 let mut found = None;
825 for token in tokens {
826 match &token.token {
827 Token::LParen => depth += 1,
828 Token::RParen => depth -= 1,
829 Token::Word(word) if word.keyword == Keyword::SELECT && depth == 0 => {
830 let select_start = line_col_to_offset(
831 sql,
832 token.span.start.line as usize,
833 token.span.start.column as usize,
834 );
835 if let Some(start) = select_start {
836 let line_start = sql[..start].rfind('\n').map_or(0, |idx| idx + 1);
837 if sql[line_start..start].trim().is_empty() {
838 found = Some(token.span.start.line);
839 break;
840 }
841 }
842 }
843 _ => {}
844 }
845 }
846 found
847 }?;
848
849 let clauses = major_clause_occurrences(sql, tokens)?;
850
851 let mut clauses_on_select_line = clauses.iter().filter(|clause| clause.line == select_line);
852 let first_clause_on_select_line = clauses_on_select_line.next()?;
853
854 let has_second_clause_on_select_line = clauses_on_select_line.next().is_some();
855 let has_major_clause_on_later_line = clauses.iter().any(|clause| clause.line > select_line);
856
857 if !has_second_clause_on_select_line && !has_major_clause_on_later_line {
858 return None;
859 }
860
861 Some((
862 first_clause_on_select_line.start,
863 first_clause_on_select_line.end,
864 ))
865}
866
867fn major_clause_occurrences(sql: &str, tokens: &[TokenWithSpan]) -> Option<Vec<ClauseOccurrence>> {
868 let mut depth = 0i32;
870 let mut significant: Vec<(&TokenWithSpan, i32)> = Vec::new();
871 for token in tokens {
872 match &token.token {
873 Token::LParen => {
874 significant.push((token, depth));
875 depth += 1;
876 }
877 Token::RParen => {
878 depth -= 1;
879 significant.push((token, depth));
880 }
881 t if is_trivia_token(t) => {}
882 _ => significant.push((token, depth)),
883 }
884 }
885
886 let mut out = Vec::new();
887 let mut index = 0usize;
888
889 while index < significant.len() {
890 let (token, token_depth) = significant[index];
891 if token_depth != 0 {
893 index += 1;
894 continue;
895 }
896 let Token::Word(word) = &token.token else {
897 index += 1;
898 continue;
899 };
900
901 match word.keyword {
902 Keyword::FROM | Keyword::WHERE => {
903 let start = line_col_to_offset(
904 sql,
905 token.span.start.line as usize,
906 token.span.start.column as usize,
907 )?;
908 let end = line_col_to_offset(
909 sql,
910 token.span.end.line as usize,
911 token.span.end.column as usize,
912 )?;
913 out.push(ClauseOccurrence {
914 line: token.span.start.line,
915 start,
916 end,
917 });
918 index += 1;
919 }
920 Keyword::GROUP | Keyword::ORDER => {
921 let Some((next, _)) = significant.get(index + 1) else {
922 index += 1;
923 continue;
924 };
925
926 let is_by = matches!(&next.token, Token::Word(next_word) if next_word.keyword == Keyword::BY);
927 if !is_by {
928 index += 1;
929 continue;
930 }
931
932 let start = line_col_to_offset(
933 sql,
934 token.span.start.line as usize,
935 token.span.start.column as usize,
936 )?;
937 let end = line_col_to_offset(
938 sql,
939 next.span.end.line as usize,
940 next.span.end.column as usize,
941 )?;
942 out.push(ClauseOccurrence {
943 line: token.span.start.line,
944 start,
945 end,
946 });
947 index += 2;
948 }
949 _ => index += 1,
950 }
951 }
952
953 Some(out)
954}
955
956fn tokenized(sql: &str, dialect: Dialect) -> Option<Vec<TokenWithSpan>> {
961 let dialect = dialect.to_sqlparser_dialect();
962 let mut tokenizer = Tokenizer::new(dialect.as_ref(), sql);
963 tokenizer.tokenize_with_location().ok()
964}
965
966fn tokenized_for_context(ctx: &LintContext) -> Option<Vec<TokenWithSpan>> {
967 let (statement_start_line, statement_start_column) =
968 offset_to_line_col(ctx.sql, ctx.statement_range.start)?;
969
970 ctx.with_document_tokens(|tokens| {
971 if tokens.is_empty() {
972 return None;
973 }
974
975 let mut out = Vec::new();
976 for token in tokens {
977 let Some((start, end)) = token_with_span_offsets(ctx.sql, token) else {
978 continue;
979 };
980 if start < ctx.statement_range.start || end > ctx.statement_range.end {
981 continue;
982 }
983
984 let Some(start_loc) = relative_location(
985 token.span.start,
986 statement_start_line,
987 statement_start_column,
988 ) else {
989 continue;
990 };
991 let Some(end_loc) =
992 relative_location(token.span.end, statement_start_line, statement_start_column)
993 else {
994 continue;
995 };
996
997 out.push(TokenWithSpan::new(
998 token.token.clone(),
999 Span::new(start_loc, end_loc),
1000 ));
1001 }
1002
1003 if out.is_empty() {
1004 None
1005 } else {
1006 Some(out)
1007 }
1008 })
1009}
1010
1011fn is_trivia_token(token: &Token) -> bool {
1012 matches!(
1013 token,
1014 Token::Whitespace(Whitespace::Space | Whitespace::Newline | Whitespace::Tab)
1015 | Token::Whitespace(Whitespace::SingleLineComment { .. })
1016 | Token::Whitespace(Whitespace::MultiLineComment(_))
1017 )
1018}
1019
1020fn line_col_to_offset(sql: &str, line: usize, column: usize) -> Option<usize> {
1021 if line == 0 || column == 0 {
1022 return None;
1023 }
1024
1025 let mut current_line = 1usize;
1026 let mut current_col = 1usize;
1027
1028 for (offset, ch) in sql.char_indices() {
1029 if current_line == line && current_col == column {
1030 return Some(offset);
1031 }
1032
1033 if ch == '\n' {
1034 current_line += 1;
1035 current_col = 1;
1036 } else {
1037 current_col += 1;
1038 }
1039 }
1040
1041 if current_line == line && current_col == column {
1042 return Some(sql.len());
1043 }
1044
1045 None
1046}
1047
1048fn token_with_span_offsets(sql: &str, token: &TokenWithSpan) -> Option<(usize, usize)> {
1049 let start = line_col_to_offset(
1050 sql,
1051 token.span.start.line as usize,
1052 token.span.start.column as usize,
1053 )?;
1054 let end = line_col_to_offset(
1055 sql,
1056 token.span.end.line as usize,
1057 token.span.end.column as usize,
1058 )?;
1059 Some((start, end))
1060}
1061
1062fn offset_to_line_col(sql: &str, offset: usize) -> Option<(usize, usize)> {
1063 if offset > sql.len() {
1064 return None;
1065 }
1066 if offset == sql.len() {
1067 let mut line = 1usize;
1068 let mut column = 1usize;
1069 for ch in sql.chars() {
1070 if ch == '\n' {
1071 line += 1;
1072 column = 1;
1073 } else {
1074 column += 1;
1075 }
1076 }
1077 return Some((line, column));
1078 }
1079
1080 let mut line = 1usize;
1081 let mut column = 1usize;
1082 for (index, ch) in sql.char_indices() {
1083 if index == offset {
1084 return Some((line, column));
1085 }
1086 if ch == '\n' {
1087 line += 1;
1088 column = 1;
1089 } else {
1090 column += 1;
1091 }
1092 }
1093
1094 None
1095}
1096
1097fn relative_location(
1098 location: Location,
1099 statement_start_line: usize,
1100 statement_start_column: usize,
1101) -> Option<Location> {
1102 let line = location.line as usize;
1103 let column = location.column as usize;
1104 if line < statement_start_line {
1105 return None;
1106 }
1107
1108 if line == statement_start_line {
1109 if column < statement_start_column {
1110 return None;
1111 }
1112 return Some(Location::new(
1113 1,
1114 (column - statement_start_column + 1) as u64,
1115 ));
1116 }
1117
1118 Some(Location::new(
1119 (line - statement_start_line + 1) as u64,
1120 column as u64,
1121 ))
1122}
1123
1124#[cfg(test)]
1125mod tests {
1126 use super::*;
1127 use crate::linter::config::LintConfig;
1128 use crate::parser::parse_sql;
1129 use crate::types::IssueAutofixApplicability;
1130 use std::collections::BTreeMap;
1131
1132 fn run(sql: &str) -> Vec<Issue> {
1133 let rule = LayoutKeywordNewline::default();
1134 run_with_rule(sql, &rule)
1135 }
1136
1137 fn run_with_config(sql: &str, config: &LintConfig) -> Vec<Issue> {
1138 let rule = LayoutKeywordNewline::from_config(config);
1139 run_with_rule(sql, &rule)
1140 }
1141
1142 fn run_with_rule(sql: &str, rule: &LayoutKeywordNewline) -> Vec<Issue> {
1143 let statements = parse_sql(sql).expect("parse");
1144 statements
1145 .iter()
1146 .enumerate()
1147 .flat_map(|(index, statement)| {
1148 rule.check(
1149 statement,
1150 &LintContext {
1151 sql,
1152 statement_range: 0..sql.len(),
1153 statement_index: index,
1154 },
1155 )
1156 })
1157 .collect()
1158 }
1159
1160 fn apply_all_autofixes(sql: &str, issues: &[Issue]) -> String {
1161 let mut edits: Vec<IssuePatchEdit> = issues
1162 .iter()
1163 .filter_map(|i| i.autofix.as_ref())
1164 .flat_map(|a| a.edits.iter().cloned())
1165 .collect();
1166 edits.sort_by(|a, b| b.span.start.cmp(&a.span.start));
1167 let mut out = sql.to_string();
1168 for edit in edits {
1169 out.replace_range(edit.span.start..edit.span.end, &edit.replacement);
1170 }
1171 out
1172 }
1173
1174 fn make_config(clause_configs: serde_json::Value) -> LintConfig {
1175 LintConfig {
1176 enabled: true,
1177 disabled_rules: vec![],
1178 rule_configs: BTreeMap::from([("layout.keyword_newline".to_string(), clause_configs)]),
1179 }
1180 }
1181
1182 #[test]
1185 fn flags_inconsistent_major_clause_placement() {
1186 assert!(!run("SELECT a FROM t WHERE a = 1").is_empty());
1187 assert!(!run("SELECT a FROM t\nWHERE a = 1").is_empty());
1188 }
1189
1190 #[test]
1191 fn does_not_flag_consistent_layout() {
1192 assert!(run("SELECT a FROM t").is_empty());
1193 assert!(run("SELECT a\nFROM t\nWHERE a = 1").is_empty());
1194 }
1195
1196 #[test]
1197 fn does_not_flag_clause_words_in_string_literal() {
1198 assert!(run("SELECT 'FROM t WHERE x = 1' AS txt").is_empty());
1199 }
1200
1201 #[test]
1202 fn emits_safe_autofix_patch_for_first_clause_on_select_line() {
1203 let sql = "SELECT a FROM t\nWHERE a = 1";
1204 let issues = run(sql);
1205 let issue = &issues[0];
1206 let autofix = issue.autofix.as_ref().expect("autofix metadata");
1207
1208 assert_eq!(autofix.applicability, IssueAutofixApplicability::Safe);
1209 assert_eq!(autofix.edits.len(), 1);
1210
1211 let fixed = apply_all_autofixes(sql, &issues);
1212 assert!(
1213 fixed.contains("\nFROM t"),
1214 "expected FROM to move to new line: {fixed}"
1215 );
1216 }
1217
1218 #[test]
1219 fn does_not_flag_exists_subquery_select_from_inline() {
1220 let sql = "UPDATE t SET x = 1\nWHERE EXISTS (\n SELECT 1 FROM s\n WHERE s.id = t.id\n)";
1223 assert!(run(sql).is_empty());
1224 }
1225
1226 #[test]
1229 fn pass_leading_from_clause() {
1230 let config = make_config(serde_json::json!({
1231 "from_clause": {"keyword_line_position": "leading"}
1232 }));
1233 assert!(run_with_config("SELECT foo\nFROM bar\n", &config).is_empty());
1234 }
1235
1236 #[test]
1237 fn pass_alone_from_clause() {
1238 let config = make_config(serde_json::json!({
1239 "from_clause": {"keyword_line_position": "alone"}
1240 }));
1241 assert!(run_with_config("SELECT foo\nFROM\n bar\n", &config).is_empty());
1242 }
1243
1244 #[test]
1245 fn fail_leading_from_clause() {
1246 let config = make_config(serde_json::json!({
1247 "from_clause": {"keyword_line_position": "leading"}
1248 }));
1249 let sql = "SELECT foo FROM bar\n";
1250 let issues = run_with_config(sql, &config);
1251 assert!(!issues.is_empty(), "should flag FROM not on new line");
1252 let fixed = apply_all_autofixes(sql, &issues);
1253 assert_eq!(fixed, "SELECT foo\nFROM bar\n");
1254 }
1255
1256 #[test]
1257 fn fail_alone_from_clause() {
1258 let config = make_config(serde_json::json!({
1259 "from_clause": {"keyword_line_position": "alone"}
1260 }));
1261 let sql = "SELECT foo FROM bar\n";
1262 let issues = run_with_config(sql, &config);
1263 assert!(!issues.is_empty(), "should flag FROM not alone");
1264 let fixed = apply_all_autofixes(sql, &issues);
1265 assert_eq!(fixed, "SELECT foo\nFROM\nbar\n");
1266 }
1267
1268 #[test]
1269 fn pass_leading_join_clause() {
1270 let config = make_config(serde_json::json!({
1271 "join_clause": {"keyword_line_position": "leading"}
1272 }));
1273 let sql = "SELECT foo\nFROM bar a\nJOIN baz b\n ON a.id = b.id\nINNER JOIN qux c\n ON a.id = c.id\nLEFT OUTER JOIN quux d\n ON a.id = d.id\n";
1274 assert!(run_with_config(sql, &config).is_empty());
1275 }
1276
1277 #[test]
1278 fn fail_leading_join_clause() {
1279 let config = make_config(serde_json::json!({
1280 "join_clause": {"keyword_line_position": "leading"}
1281 }));
1282 let sql = "SELECT foo\nFROM bar a JOIN baz b\nON a.id = b.id INNER JOIN qux c\nON a.id = c.id LEFT OUTER JOIN quux d\nON a.id = d.id\n";
1283 let issues = run_with_config(sql, &config);
1284 assert!(!issues.is_empty(), "should flag JOINs not on new line");
1285 let fixed = apply_all_autofixes(sql, &issues);
1286 assert_eq!(
1287 fixed,
1288 "SELECT foo\nFROM bar a\nJOIN baz b\nON a.id = b.id\nINNER JOIN qux c\nON a.id = c.id\nLEFT OUTER JOIN quux d\nON a.id = d.id\n"
1289 );
1290 }
1291
1292 #[test]
1293 fn fail_alone_join_clause() {
1294 let config = make_config(serde_json::json!({
1295 "join_clause": {"keyword_line_position": "alone"}
1296 }));
1297 let sql = "SELECT foo\nFROM bar a JOIN baz b\nON a.id = b.id INNER JOIN qux c\nON a.id = c.id LEFT OUTER JOIN quux d\nON a.id = d.id\n";
1298 let issues = run_with_config(sql, &config);
1299 assert!(!issues.is_empty(), "should flag JOINs not alone");
1300 let fixed = apply_all_autofixes(sql, &issues);
1301 assert_eq!(
1302 fixed,
1303 "SELECT foo\nFROM bar a\nJOIN\nbaz b\nON a.id = b.id\nINNER JOIN\nqux c\nON a.id = c.id\nLEFT OUTER JOIN\nquux d\nON a.id = d.id\n"
1304 );
1305 }
1306
1307 #[test]
1308 fn pass_none_where_clause() {
1309 let config = make_config(serde_json::json!({
1310 "where_clause": {"keyword_line_position": "none"}
1311 }));
1312 assert!(run_with_config("SELECT a, b FROM tabx WHERE b = 2;\n", &config).is_empty());
1313 }
1314
1315 #[test]
1316 fn fail_leading_on_condition() {
1317 let config = make_config(serde_json::json!({
1318 "join_clause": {"keyword_line_position": "leading"},
1319 "join_on_condition": {"keyword_line_position": "leading"}
1320 }));
1321 let sql = "SELECT foo\nFROM bar a JOIN baz b ON a.id = b.id\n";
1322 let issues = run_with_config(sql, &config);
1323 assert!(!issues.is_empty());
1324 let fixed = apply_all_autofixes(sql, &issues);
1325 assert_eq!(
1326 fixed,
1327 "SELECT foo\nFROM bar a\nJOIN baz b\nON a.id = b.id\n"
1328 );
1329 }
1330
1331 #[test]
1332 fn fail_trailing_on_condition() {
1333 let config = make_config(serde_json::json!({
1334 "join_clause": {"keyword_line_position": "leading"},
1335 "join_on_condition": {"keyword_line_position": "trailing"}
1336 }));
1337 let sql = "SELECT foo\nFROM bar a JOIN baz b ON a.id = b.id\n";
1338 let issues = run_with_config(sql, &config);
1339 assert!(!issues.is_empty());
1340 let fixed = apply_all_autofixes(sql, &issues);
1341 assert_eq!(
1342 fixed,
1343 "SELECT foo\nFROM bar a\nJOIN baz b ON\na.id = b.id\n"
1344 );
1345 }
1346
1347 #[test]
1348 fn pass_leading_orderby_with_window_exclusion() {
1349 let config = make_config(serde_json::json!({
1350 "orderby_clause": {
1351 "keyword_line_position": "leading",
1352 "keyword_line_position_exclusions": "window_specification"
1353 }
1354 }));
1355 let sql = "SELECT\na,\nb,\nROW_NUMBER() OVER (PARTITION BY c ORDER BY d) AS e\nFROM f\nJOIN g\nON g.h = f.h\n";
1356 assert!(run_with_config(sql, &config).is_empty());
1357 }
1358
1359 #[test]
1360 fn fail_leading_orderby_except_window_outer_orderby() {
1361 let config = make_config(serde_json::json!({
1362 "orderby_clause": {
1363 "keyword_line_position": "leading",
1364 "keyword_line_position_exclusions": "window_specification"
1365 }
1366 }));
1367 let sql = "SELECT\na,\nb,\nROW_NUMBER() OVER (PARTITION BY c ORDER BY d) AS e\nFROM f\nJOIN g\nON g.h = f.h ORDER BY a\n";
1368 let issues = run_with_config(sql, &config);
1369 assert!(!issues.is_empty(), "should flag outer ORDER BY");
1370 let fixed = apply_all_autofixes(sql, &issues);
1371 assert_eq!(
1372 fixed,
1373 "SELECT\na,\nb,\nROW_NUMBER() OVER (PARTITION BY c ORDER BY d) AS e\nFROM f\nJOIN g\nON g.h = f.h\nORDER BY a\n"
1374 );
1375 }
1376
1377 #[test]
1378 fn fail_alone_window_function_partitionby_orderby() {
1379 let config = make_config(serde_json::json!({
1380 "partitionby_clause": {"keyword_line_position": "alone"},
1381 "orderby_clause": {
1382 "keyword_line_position": "alone",
1383 "keyword_line_position_exclusions": null
1384 }
1385 }));
1386 let sql = "SELECT\na,\nb,\nROW_NUMBER() OVER (PARTITION BY c ORDER BY d) AS e\nFROM f\nJOIN g\nON g.h = f.h\n";
1387 let issues = run_with_config(sql, &config);
1388 assert!(
1389 !issues.is_empty(),
1390 "should flag PARTITION BY and ORDER BY in window"
1391 );
1392 let fixed = apply_all_autofixes(sql, &issues);
1393 assert_eq!(
1394 fixed,
1395 "SELECT\na,\nb,\nROW_NUMBER() OVER (\nPARTITION BY\nc\nORDER BY\nd) AS e\nFROM f\nJOIN g\nON g.h = f.h\n"
1396 );
1397 }
1398
1399 #[test]
1400 fn fail_select_clause_alone() {
1401 let config = make_config(serde_json::json!({
1402 "select_clause": {"keyword_line_position": "alone"}
1403 }));
1404 let sql = "WITH some_cte AS (SELECT\n column1,\n column2\n FROM some_table\n) SELECT\n column1,\n column2\nFROM some_cte\n";
1405 let issues = run_with_config(sql, &config);
1406 assert!(!issues.is_empty(), "should flag SELECT not alone");
1407 }
1408
1409 #[test]
1410 fn fail_data_type_alone() {
1411 let config = make_config(serde_json::json!({
1412 "data_type": {"keyword_line_position": "alone"}
1413 }));
1414 let sql = "CREATE TABLE t (c1 DOUBLE PRECISION NOT NULL)\n";
1415 let issues = run_with_config(sql, &config);
1416 assert!(
1417 !issues.is_empty(),
1418 "should flag DOUBLE PRECISION and NOT NULL"
1419 );
1420 let fixed = apply_all_autofixes(sql, &issues);
1421 assert_eq!(fixed, "CREATE TABLE t (c1\nDOUBLE PRECISION\nNOT NULL)\n");
1422 }
1423
1424 #[test]
1425 fn pass_leading_orderby_except_window_and_aggregate() {
1426 let config = make_config(serde_json::json!({
1427 "orderby_clause": {
1428 "keyword_line_position": "leading",
1429 "keyword_line_position_exclusions": "window_specification, aggregate_order_by"
1430 }
1431 }));
1432 let sql = "SELECT\na,\nb,\nROW_NUMBER() OVER (PARTITION BY c ORDER BY d) AS e,\nSTRING_AGG(a ORDER BY b, c)\nFROM f\nJOIN g\nON g.h = f.h\n";
1433 assert!(run_with_config(sql, &config).is_empty());
1434 }
1435}