1use crate::linter::config::LintConfig;
8use crate::linter::rule::{LintContext, LintRule};
9use crate::types::{issue_codes, Dialect, Issue, IssueAutofixApplicability, IssuePatchEdit};
10use sqlparser::ast::Statement;
11use sqlparser::keywords::Keyword;
12use sqlparser::tokenizer::{Location, Span, Token, TokenWithSpan, Tokenizer, Whitespace};
13use std::collections::HashSet;
14
15pub struct LayoutSpacing {
16 ignore_templated_areas: bool,
17 align_alias_expression: bool,
18 align_data_type: bool,
19 align_column_constraint: bool,
20 align_with_tabs: bool,
21 tab_space_size: usize,
22}
23
24impl LayoutSpacing {
25 pub fn from_config(config: &LintConfig) -> Self {
26 let spacing_before_align = |type_name: &str| {
27 config
28 .config_section_object("layout.keyword_newline")
29 .and_then(|layout| layout.get(type_name))
30 .and_then(serde_json::Value::as_object)
31 .and_then(|entry| entry.get("spacing_before"))
32 .and_then(serde_json::Value::as_str)
33 .is_some_and(|value| value.to_ascii_lowercase().starts_with("align"))
34 };
35
36 Self {
37 ignore_templated_areas: config
38 .core_option_bool("ignore_templated_areas")
39 .unwrap_or(true),
40 align_alias_expression: spacing_before_align("alias_expression"),
41 align_data_type: spacing_before_align("data_type"),
42 align_column_constraint: spacing_before_align("column_constraint_segment"),
43 align_with_tabs: config
44 .section_option_str("indentation", "indent_unit")
45 .or_else(|| config.section_option_str("rules", "indent_unit"))
46 .is_some_and(|value| value.eq_ignore_ascii_case("tab")),
47 tab_space_size: config
48 .section_option_usize("indentation", "tab_space_size")
49 .or_else(|| config.section_option_usize("rules", "tab_space_size"))
50 .unwrap_or(4)
51 .max(1),
52 }
53 }
54
55 fn alignment_options(&self) -> Lt01AlignmentOptions {
56 Lt01AlignmentOptions {
57 alias_expression: self.align_alias_expression,
58 data_type: self.align_data_type,
59 column_constraint: self.align_column_constraint,
60 align_with_tabs: self.align_with_tabs,
61 tab_space_size: self.tab_space_size,
62 }
63 }
64}
65
66impl Default for LayoutSpacing {
67 fn default() -> Self {
68 Self {
69 ignore_templated_areas: true,
70 align_alias_expression: false,
71 align_data_type: false,
72 align_column_constraint: false,
73 align_with_tabs: false,
74 tab_space_size: 4,
75 }
76 }
77}
78
79impl LintRule for LayoutSpacing {
80 fn code(&self) -> &'static str {
81 issue_codes::LINT_LT_001
82 }
83
84 fn name(&self) -> &'static str {
85 "Layout spacing"
86 }
87
88 fn description(&self) -> &'static str {
89 "Inappropriate Spacing."
90 }
91
92 fn check(&self, _statement: &Statement, ctx: &LintContext) -> Vec<Issue> {
93 let mut violations =
94 spacing_violations(ctx, self.ignore_templated_areas, self.alignment_options());
95 let has_remaining_non_whitespace = ctx.sql[ctx.statement_range.end..]
96 .chars()
97 .any(|ch| !ch.is_whitespace());
98 let parser_fragment_fallback = ctx.statement_index == 0
99 && ctx.statement_range.start == 0
100 && ctx.statement_range.end < ctx.sql.len()
101 && has_remaining_non_whitespace
102 && !ctx.statement_sql().trim_end().ends_with(';');
103 let template_fragment_fallback = ctx.statement_index == 0
104 && contains_template_marker(ctx.sql)
105 && (ctx.statement_range.start > 0 || ctx.statement_range.end < ctx.sql.len());
106 if parser_fragment_fallback || template_fragment_fallback {
107 let full_ctx = LintContext {
108 sql: ctx.sql,
109 statement_range: 0..ctx.sql.len(),
110 statement_index: 0,
111 };
112 violations.extend(spacing_violations(
113 &full_ctx,
114 self.ignore_templated_areas,
115 self.alignment_options(),
116 ));
117 merge_violations_by_span(&mut violations);
118 }
119
120 violations
121 .into_iter()
122 .map(|((start, end), edits)| {
123 let mut issue =
124 Issue::info(issue_codes::LINT_LT_001, "Inappropriate spacing found.")
125 .with_statement(ctx.statement_index)
126 .with_span(ctx.span_from_statement_offset(start, end));
127 if !edits.is_empty() {
128 let edits = edits
129 .into_iter()
130 .map(|(edit_start, edit_end, replacement)| {
131 IssuePatchEdit::new(
132 ctx.span_from_statement_offset(edit_start, edit_end),
133 replacement.to_string(),
134 )
135 })
136 .collect();
137 issue = issue.with_autofix_edits(IssueAutofixApplicability::Safe, edits);
138 }
139 issue
140 })
141 .collect()
142 }
143}
144
145type Lt01Span = (usize, usize);
146type Lt01AutofixEdit = (usize, usize, String);
147type Lt01Violation = (Lt01Span, Vec<Lt01AutofixEdit>);
148type Lt01TemplateSpan = (usize, usize);
149
150fn merge_violations_by_span(violations: &mut Vec<Lt01Violation>) {
151 violations.sort_unstable_by_key(|(span, _)| *span);
152 let mut merged: Vec<Lt01Violation> = Vec::with_capacity(violations.len());
153
154 for (span, edits) in violations.drain(..) {
155 if let Some((last_span, last_edits)) = merged.last_mut() {
156 if *last_span == span {
157 if last_edits.is_empty() && !edits.is_empty() {
158 *last_edits = edits;
159 } else if !last_edits.is_empty() && !edits.is_empty() {
160 for edit in edits {
161 if !last_edits.contains(&edit) {
162 last_edits.push(edit);
163 }
164 }
165 }
166 continue;
167 }
168 }
169
170 merged.push((span, edits));
171 }
172
173 *violations = merged;
174}
175
176#[derive(Clone, Copy)]
177struct Lt01AlignmentOptions {
178 alias_expression: bool,
179 data_type: bool,
180 column_constraint: bool,
181 align_with_tabs: bool,
182 tab_space_size: usize,
183}
184
185fn spacing_violations(
186 ctx: &LintContext,
187 ignore_templated_areas: bool,
188 alignment: Lt01AlignmentOptions,
189) -> Vec<Lt01Violation> {
190 let sql = ctx.statement_sql();
191 let mut violations = Vec::new();
192 let templated_spans = template_spans(sql);
193 let prefer_raw_template_tokens = ctx.is_templated() && contains_template_marker(sql);
194 let tokens = if prefer_raw_template_tokens {
195 tokenized(sql, ctx.dialect()).or_else(|| tokenized_for_context(ctx))
196 } else {
197 tokenized_for_context(ctx).or_else(|| tokenized(sql, ctx.dialect()))
198 };
199 let Some(tokens) = tokens else {
200 return violations;
201 };
202
203 let dialect = ctx.dialect();
204
205 collect_trailing_whitespace_violations(sql, &mut violations);
206 collect_pair_spacing_violations(sql, &tokens, dialect, &templated_spans, &mut violations);
207 collect_ansi_national_string_literal_violations(
208 sql,
209 &tokens,
210 dialect,
211 &templated_spans,
212 &mut violations,
213 );
214 if !ignore_templated_areas {
215 collect_template_string_spacing_violations(sql, dialect, &templated_spans, &mut violations);
216 }
217 collect_alignment_detection_violations(sql, alignment, &mut violations);
218
219 violations.sort_unstable_by_key(|(span, _)| *span);
220 violations.dedup_by_key(|(span, _)| *span);
221
222 violations
223}
224
225fn collect_trailing_whitespace_violations(sql: &str, violations: &mut Vec<Lt01Violation>) {
230 let mut offset = 0;
231 for line in sql.split('\n') {
232 let trimmed = line.trim_end_matches([' ', '\t']);
233 let trailing_start = offset + trimmed.len();
234 let trailing_end = offset + line.len();
235 if trailing_end > trailing_start {
236 let span = (trailing_start, trailing_end);
237 let edit = (trailing_start, trailing_end, String::new());
238 violations.push((span, vec![edit]));
239 }
240 offset += line.len() + 1; }
242}
243
244fn collect_alignment_detection_violations(
245 sql: &str,
246 alignment: Lt01AlignmentOptions,
247 violations: &mut Vec<Lt01Violation>,
248) {
249 if alignment.alias_expression {
250 collect_alias_alignment_detection(
251 sql,
252 alignment.tab_space_size,
253 alignment.align_with_tabs,
254 violations,
255 );
256 }
257 if alignment.data_type || alignment.column_constraint {
258 collect_create_table_alignment_detection(sql, alignment.tab_space_size, violations);
259 }
260}
261
262#[derive(Clone, Copy)]
263struct AliasAlignmentEntry {
264 as_start: usize,
265 visual_col: usize,
266 separator_uses_tabs: bool,
267}
268
269fn collect_alias_alignment_detection(
270 sql: &str,
271 tab_space_size: usize,
272 align_with_tabs: bool,
273 violations: &mut Vec<Lt01Violation>,
274) {
275 let lines: Vec<&str> = sql.split('\n').collect();
276 if lines.len() < 2 {
277 return;
278 }
279
280 let mut offset = 0usize;
281 let mut current_group: Vec<AliasAlignmentEntry> = Vec::new();
282
283 for line in &lines {
284 let lower = line.to_ascii_lowercase();
285 let alias_pos = lower.find(" as ");
286 let is_alias_line = alias_pos.is_some() && !lower.trim_start().starts_with("from ");
287
288 if is_alias_line {
289 let as_index = alias_pos.unwrap_or_default() + 1;
290 current_group.push(AliasAlignmentEntry {
291 as_start: offset + as_index,
292 visual_col: visual_width(&line[..as_index], tab_space_size),
293 separator_uses_tabs: alias_separator_uses_tabs(line, as_index),
294 });
295 } else if !current_group.is_empty() {
296 emit_alias_alignment_group(¤t_group, align_with_tabs, violations);
297 current_group.clear();
298 }
299
300 offset += line.len() + 1;
301 }
302
303 if !current_group.is_empty() {
304 emit_alias_alignment_group(¤t_group, align_with_tabs, violations);
305 }
306}
307
308fn alias_separator_uses_tabs(line: &str, as_index: usize) -> bool {
309 let prefix = &line[..as_index];
310 let separator_start = prefix
311 .char_indices()
312 .rev()
313 .find(|(_, ch)| !ch.is_whitespace())
314 .map(|(idx, ch)| idx + ch.len_utf8())
315 .unwrap_or(0);
316 let separator = &prefix[separator_start..];
317 !separator.is_empty() && separator.chars().all(|ch| ch == '\t')
318}
319
320fn emit_alias_alignment_group(
321 group: &[AliasAlignmentEntry],
322 align_with_tabs: bool,
323 violations: &mut Vec<Lt01Violation>,
324) {
325 if group.len() < 2 {
326 return;
327 }
328 let target_col = group
329 .iter()
330 .map(|entry| entry.visual_col)
331 .max()
332 .unwrap_or(0);
333 for entry in group {
334 if entry.visual_col != target_col || (align_with_tabs && !entry.separator_uses_tabs) {
335 let end = entry.as_start + 2;
336 violations.push(((entry.as_start, end), Vec::new()));
337 }
338 }
339}
340
341fn collect_create_table_alignment_detection(
342 sql: &str,
343 tab_space_size: usize,
344 violations: &mut Vec<Lt01Violation>,
345) {
346 let lines: Vec<&str> = sql.split('\n').collect();
347 let mut offset = 0usize;
348 let mut in_create_table = false;
349 let mut entries: Vec<(usize, usize)> = Vec::new();
350
351 for line in &lines {
352 let trimmed = line.trim_start();
353 let upper = trimmed.to_ascii_uppercase();
354 if !in_create_table && upper.starts_with("CREATE TABLE") {
355 in_create_table = true;
356 } else if in_create_table && (trimmed.starts_with(')') || trimmed.starts_with(';')) {
357 emit_create_table_alignment_group(&entries, violations);
358 entries.clear();
359 in_create_table = false;
360 }
361
362 if in_create_table
363 && !trimmed.is_empty()
364 && !trimmed.starts_with('(')
365 && !trimmed.starts_with(')')
366 && !trimmed.starts_with("--")
367 && !upper.starts_with("CREATE TABLE")
368 {
369 if let Some(data_type_start) = second_token_start(trimmed) {
370 let prefix_len = line.len() - trimmed.len();
371 let absolute = offset + prefix_len + data_type_start;
372 let visual = visual_width(&trimmed[..data_type_start], tab_space_size);
373 entries.push((absolute, visual));
374 }
375 }
376
377 offset += line.len() + 1;
378 }
379
380 if in_create_table && !entries.is_empty() {
381 emit_create_table_alignment_group(&entries, violations);
382 }
383}
384
385fn emit_create_table_alignment_group(
386 group: &[(usize, usize)],
387 violations: &mut Vec<Lt01Violation>,
388) {
389 if group.len() < 2 {
390 return;
391 }
392 let target_col = group.iter().map(|(_, col)| *col).max().unwrap_or(0);
393 for (start, col) in group {
394 if *col != target_col {
395 let end = *start + 1;
396 violations.push(((*start, end), Vec::new()));
397 }
398 }
399}
400
401fn second_token_start(line: &str) -> Option<usize> {
402 let mut seen_first = false;
403 let mut in_token = false;
404
405 for (index, ch) in line.char_indices() {
406 if ch.is_whitespace() {
407 if in_token {
408 in_token = false;
409 seen_first = true;
410 }
411 continue;
412 }
413
414 if seen_first && !in_token {
415 return Some(index);
416 }
417 in_token = true;
418 }
419 None
420}
421
422fn visual_width(text: &str, tab_space_size: usize) -> usize {
423 let mut width = 0usize;
424 for ch in text.chars() {
425 if ch == '\t' {
426 let next_tab = ((width / tab_space_size) + 1) * tab_space_size;
427 width = next_tab;
428 } else {
429 width += 1;
430 }
431 }
432 width
433}
434
435#[derive(Debug, Clone, Copy, PartialEq)]
441enum ExpectedSpacing {
442 Single,
444 None,
446 NoneInline,
448 Skip,
450 SingleInline,
452}
453
454fn collect_pair_spacing_violations(
455 sql: &str,
456 tokens: &[TokenWithSpan],
457 dialect: Dialect,
458 templated_spans: &[Lt01TemplateSpan],
459 violations: &mut Vec<Lt01Violation>,
460) {
461 let non_trivia: Vec<usize> = tokens
462 .iter()
463 .enumerate()
464 .filter(|(_, t)| !is_trivia_token(&t.token) && !matches!(t.token, Token::EOF))
465 .map(|(i, _)| i)
466 .collect();
467 let type_angle_tokens = if supports_type_angle_spacing(dialect) {
468 type_angle_token_indices(tokens, &non_trivia)
469 } else {
470 HashSet::new()
471 };
472 let snowflake_pattern_tokens = if dialect == Dialect::Snowflake {
473 snowflake_pattern_token_indices(tokens, &non_trivia)
474 } else {
475 HashSet::new()
476 };
477
478 for window in non_trivia.windows(2) {
479 let left_idx = window[0];
480 let right_idx = window[1];
481 if dialect == Dialect::Snowflake
482 && (snowflake_pattern_tokens.contains(&left_idx)
483 || snowflake_pattern_tokens.contains(&right_idx))
484 {
485 continue;
486 }
487 let left = &tokens[left_idx];
488 let right = &tokens[right_idx];
489
490 let Some((left_start, left_end)) = token_offsets(sql, left) else {
491 continue;
492 };
493 let Some((right_start, _)) = token_offsets(sql, right) else {
494 continue;
495 };
496
497 if left_end > right_start || right_start > sql.len() || left_end > sql.len() {
498 continue;
499 }
500 if overlaps_template_span(templated_spans, left_start, right_start) {
501 continue;
502 }
503
504 let gap = &sql[left_end..right_start];
505 let has_newline = gap.contains('\n') || gap.contains('\r');
506 let has_comment = has_comment_between(tokens, left_idx, right_idx);
507
508 let expected = if supports_type_angle_spacing(dialect)
509 && is_type_angle_spacing_pair(left, right, left_idx, right_idx, &type_angle_tokens)
510 {
511 ExpectedSpacing::None
512 } else {
513 expected_spacing(left, right, tokens, left_idx, right_idx, dialect)
514 };
515
516 match expected {
517 ExpectedSpacing::Skip => continue,
518 ExpectedSpacing::None => {
519 if !gap.is_empty() && !has_newline && !has_comment {
521 let span = (left_end, right_start);
522 let edit = (left_end, right_start, String::new());
523 violations.push((span, vec![edit]));
524 }
525 }
526 ExpectedSpacing::NoneInline => {
527 if !gap.is_empty() && !has_comment {
528 let span = (left_end, right_start);
529 let edit = (left_end, right_start, String::new());
530 violations.push((span, vec![edit]));
531 }
532 }
533 ExpectedSpacing::Single => {
534 if has_comment {
535 continue;
536 }
537 if has_newline {
538 continue;
542 }
543 if gap == " " {
544 continue;
546 }
547 if gap.is_empty() && matches!(left.token, Token::Comma) {
548 let replacement = format!("{} ", &sql[left_start..left_end]);
552 let span = (left_start, left_end);
553 let edit = (left_start, left_end, replacement);
554 violations.push((span, vec![edit]));
555 continue;
556 }
557 if gap.is_empty() && is_exists_keyword_token(&left.token) {
558 let replacement = format!("{} ", &sql[left_start..left_end]);
561 let span = (left_start, left_end);
562 let edit = (left_start, left_end, replacement);
563 violations.push((span, vec![edit]));
564 continue;
565 }
566 let span = (left_end, right_start);
568 let edit = (left_end, right_start, " ".to_string());
569 violations.push((span, vec![edit]));
570 }
571 ExpectedSpacing::SingleInline => {
572 if has_comment {
573 continue;
574 }
575 if gap == " " {
576 continue;
577 }
578 let span = (left_end, right_start);
580 let edit = (left_end, right_start, " ".to_string());
581 violations.push((span, vec![edit]));
582 }
583 }
584 }
585}
586
587fn expected_spacing(
589 left: &TokenWithSpan,
590 right: &TokenWithSpan,
591 tokens: &[TokenWithSpan],
592 left_idx: usize,
593 right_idx: usize,
594 dialect: Dialect,
595) -> ExpectedSpacing {
596 if matches!(left.token, Token::Period) || matches!(right.token, Token::Period) {
598 return ExpectedSpacing::NoneInline;
599 }
600
601 if matches!(left.token, Token::DoubleColon) || matches!(right.token, Token::DoubleColon) {
603 return ExpectedSpacing::NoneInline;
604 }
605
606 if dialect == Dialect::Snowflake
608 && (matches!(left.token, Token::Colon) || matches!(right.token, Token::Colon))
609 {
610 return ExpectedSpacing::NoneInline;
612 }
613
614 if is_split_compound_comparison_pair(left, right) {
616 return ExpectedSpacing::NoneInline;
617 }
618
619 if dialect == Dialect::Mssql && is_tsql_compound_assignment_pair(left, right) {
621 return ExpectedSpacing::NoneInline;
622 }
623
624 if matches!(right.token, Token::LParen) {
626 return expected_spacing_before_lparen(left, tokens, left_idx, dialect);
627 }
628
629 if matches!(left.token, Token::RParen) {
631 return expected_spacing_after_rparen(right, tokens, right_idx);
632 }
633
634 if matches!(right.token, Token::LBracket) {
636 if is_type_keyword_for_bracket(&left.token) {
638 return ExpectedSpacing::Single;
639 }
640 return ExpectedSpacing::None;
641 }
642
643 if matches!(left.token, Token::RBracket) {
645 if matches!(
647 right.token,
648 Token::DoubleColon | Token::Period | Token::LBracket | Token::RParen
649 ) {
650 return ExpectedSpacing::None;
651 }
652 return ExpectedSpacing::Single;
653 }
654
655 if matches!(right.token, Token::Comma) {
657 return ExpectedSpacing::None;
658 }
659 if matches!(left.token, Token::Comma) {
660 return ExpectedSpacing::Single;
661 }
662
663 if matches!(right.token, Token::SemiColon) {
665 return ExpectedSpacing::Skip;
666 }
667 if matches!(left.token, Token::SemiColon) {
668 return ExpectedSpacing::Skip;
669 }
670
671 if matches!(left.token, Token::LParen) {
673 return ExpectedSpacing::None;
674 }
675 if matches!(right.token, Token::RParen) {
676 return ExpectedSpacing::None;
677 }
678
679 if dialect == Dialect::Bigquery
681 && is_bigquery_hyphenated_identifier_pair(left, right, tokens, left_idx, right_idx)
682 {
683 return ExpectedSpacing::None;
684 }
685
686 if is_filesystem_path_pair(left, right, tokens, left_idx, right_idx, dialect) {
687 return ExpectedSpacing::NoneInline;
688 }
689
690 if is_binary_operator(&left.token) || is_binary_operator(&right.token) {
692 if is_unary_operator_pair(left, right, tokens, left_idx) {
694 return ExpectedSpacing::Skip;
695 }
696 return ExpectedSpacing::Single;
697 }
698
699 if is_comparison_operator(&left.token) || is_comparison_operator(&right.token) {
701 if dialect == Dialect::Mssql
702 && is_tsql_assignment_rhs_pair(left, right, tokens, left_idx, right_idx)
703 {
704 return ExpectedSpacing::Single;
705 }
706 return ExpectedSpacing::Single;
707 }
708
709 if is_json_operator(&left.token) || is_json_operator(&right.token) {
711 return ExpectedSpacing::Single;
712 }
713
714 if matches!(left.token, Token::Mul) || matches!(right.token, Token::Mul) {
716 return ExpectedSpacing::Skip;
718 }
719
720 if is_word_like(&left.token) && is_word_like(&right.token) {
722 return ExpectedSpacing::Single;
723 }
724
725 if (is_word_like(&left.token) && is_literal(&right.token))
727 || (is_literal(&left.token) && is_word_like(&right.token))
728 {
729 return ExpectedSpacing::Single;
730 }
731
732 if is_literal(&left.token) && is_literal(&right.token) {
734 return ExpectedSpacing::Single;
735 }
736
737 if (matches!(left.token, Token::Number(_, _)) && is_word_like(&right.token))
739 || (is_word_like(&left.token) && matches!(right.token, Token::Number(_, _)))
740 {
741 return ExpectedSpacing::Single;
742 }
743
744 ExpectedSpacing::Skip
745}
746
747fn is_binary_operator(token: &Token) -> bool {
752 matches!(
753 token,
754 Token::Plus
755 | Token::Minus
756 | Token::Div
757 | Token::Mod
758 | Token::StringConcat
759 | Token::Ampersand
760 | Token::Pipe
761 | Token::Caret
762 | Token::ShiftLeft
763 | Token::ShiftRight
764 | Token::Assignment
765 )
766}
767
768fn is_comparison_operator(token: &Token) -> bool {
769 matches!(
770 token,
771 Token::Eq
772 | Token::Neq
773 | Token::Lt
774 | Token::Gt
775 | Token::LtEq
776 | Token::GtEq
777 | Token::Spaceship
778 | Token::DoubleEq
779 | Token::TildeEqual
780 )
781}
782
783fn is_split_compound_comparison_pair(left: &TokenWithSpan, right: &TokenWithSpan) -> bool {
784 matches!(
785 (&left.token, &right.token),
786 (Token::Gt, Token::Eq)
787 | (Token::Lt, Token::Eq)
788 | (Token::Lt, Token::Gt)
789 | (Token::Neq, Token::Eq)
790 )
791}
792
793fn is_assignment_operator_token(token: &Token) -> bool {
794 matches!(
795 token,
796 Token::Plus
797 | Token::Minus
798 | Token::Mul
799 | Token::Div
800 | Token::Mod
801 | Token::Ampersand
802 | Token::Pipe
803 | Token::Caret
804 )
805}
806
807fn is_tsql_compound_assignment_pair(left: &TokenWithSpan, right: &TokenWithSpan) -> bool {
808 matches!(right.token, Token::Eq) && is_assignment_operator_token(&left.token)
809}
810
811fn is_tsql_assignment_rhs_pair(
812 left: &TokenWithSpan,
813 _right: &TokenWithSpan,
814 tokens: &[TokenWithSpan],
815 left_idx: usize,
816 _right_idx: usize,
817) -> bool {
818 if !matches!(left.token, Token::Eq) {
819 return false;
820 }
821 prev_non_trivia_index(tokens, left_idx)
822 .map(|index| is_assignment_operator_token(&tokens[index].token))
823 .unwrap_or(false)
824}
825
826fn is_json_operator(token: &Token) -> bool {
827 matches!(
828 token,
829 Token::Arrow
830 | Token::LongArrow
831 | Token::HashArrow
832 | Token::HashLongArrow
833 | Token::AtArrow
834 | Token::ArrowAt
835 )
836}
837
838fn is_word_like(token: &Token) -> bool {
839 matches!(token, Token::Word(_) | Token::Placeholder(_))
840}
841
842fn is_literal(token: &Token) -> bool {
843 matches!(
844 token,
845 Token::SingleQuotedString(_)
846 | Token::DoubleQuotedString(_)
847 | Token::TripleSingleQuotedString(_)
848 | Token::TripleDoubleQuotedString(_)
849 | Token::NationalStringLiteral(_)
850 | Token::EscapedStringLiteral(_)
851 | Token::UnicodeStringLiteral(_)
852 | Token::HexStringLiteral(_)
853 | Token::SingleQuotedByteStringLiteral(_)
854 | Token::DoubleQuotedByteStringLiteral(_)
855 | Token::Number(_, _)
856 )
857}
858
859fn is_type_keyword_for_bracket(token: &Token) -> bool {
860 if let Token::Word(w) = token {
861 if w.quote_style.is_some() {
862 return false;
863 }
864 matches!(
865 w.value.to_ascii_uppercase().as_str(),
866 "TEXT"
867 | "UUID"
868 | "INT"
869 | "INTEGER"
870 | "BIGINT"
871 | "SMALLINT"
872 | "VARCHAR"
873 | "CHAR"
874 | "BOOLEAN"
875 | "BOOL"
876 | "NUMERIC"
877 | "DECIMAL"
878 | "FLOAT"
879 | "DOUBLE"
880 | "DATE"
881 | "TIME"
882 | "TIMESTAMP"
883 | "INTERVAL"
884 | "JSONB"
885 | "JSON"
886 | "BYTEA"
887 | "REAL"
888 | "SERIAL"
889 | "BIGSERIAL"
890 | "INET"
891 | "CIDR"
892 | "MACADDR"
893 )
894 } else {
895 false
896 }
897}
898
899fn is_exists_keyword_token(token: &Token) -> bool {
900 matches!(token, Token::Word(word) if word.keyword == Keyword::EXISTS)
901}
902
903fn is_ddl_object_keyword(token: &Token) -> bool {
906 if let Token::Word(w) = token {
907 matches!(
908 w.keyword,
909 Keyword::TABLE
910 | Keyword::VIEW
911 | Keyword::INDEX
912 | Keyword::FUNCTION
913 | Keyword::PROCEDURE
914 | Keyword::TRIGGER
915 | Keyword::SEQUENCE
916 | Keyword::TYPE
917 | Keyword::SCHEMA
918 | Keyword::DATABASE
919 )
920 } else {
921 false
922 }
923}
924
925fn is_qualified_ddl_object_name(tokens: &[TokenWithSpan], word_index: usize) -> bool {
926 let mut cursor = word_index;
927
928 loop {
929 let Some(prev_idx) = prev_non_trivia_index(tokens, cursor) else {
930 return false;
931 };
932
933 if matches!(tokens[prev_idx].token, Token::Period) {
934 let Some(prev_word_idx) = prev_non_trivia_index(tokens, prev_idx) else {
935 return false;
936 };
937 if !is_word_like(&tokens[prev_word_idx].token) {
938 return false;
939 }
940 cursor = prev_word_idx;
941 continue;
942 }
943
944 if !is_ddl_object_keyword(&tokens[prev_idx].token) {
945 return false;
946 }
947 return is_ddl_object_definition_context(tokens, prev_idx);
948 }
949}
950
951fn is_reference_target_name(tokens: &[TokenWithSpan], word_index: usize) -> bool {
952 let mut cursor = word_index;
953
954 loop {
955 let Some(prev_idx) = prev_non_trivia_index(tokens, cursor) else {
956 return false;
957 };
958
959 if matches!(tokens[prev_idx].token, Token::Period) {
960 let Some(prev_word_idx) = prev_non_trivia_index(tokens, prev_idx) else {
961 return false;
962 };
963 if !is_word_like(&tokens[prev_word_idx].token) {
964 return false;
965 }
966 cursor = prev_word_idx;
967 continue;
968 }
969
970 let Token::Word(prev_word) = &tokens[prev_idx].token else {
971 return false;
972 };
973
974 return prev_word.keyword == Keyword::REFERENCES;
975 }
976}
977
978fn is_copy_into_target_name(tokens: &[TokenWithSpan], word_index: usize) -> bool {
979 let mut cursor = word_index;
980 let mut steps = 0usize;
981
982 while let Some(prev_idx) = prev_non_trivia_index(tokens, cursor) {
983 match &tokens[prev_idx].token {
984 Token::Word(word) if word.keyword == Keyword::INTO => {
985 let Some(copy_idx) = prev_non_trivia_index(tokens, prev_idx) else {
986 return false;
987 };
988 return matches!(
989 &tokens[copy_idx].token,
990 Token::Word(copy_word) if copy_word.keyword == Keyword::COPY
991 );
992 }
993 Token::Word(word)
994 if matches!(
995 word.keyword,
996 Keyword::FROM
997 | Keyword::SELECT
998 | Keyword::WHERE
999 | Keyword::JOIN
1000 | Keyword::ON
1001 | Keyword::HAVING
1002 ) =>
1003 {
1004 return false;
1005 }
1006 Token::SemiColon | Token::Comma | Token::LParen | Token::RParen => return false,
1007 _ => {}
1008 }
1009
1010 cursor = prev_idx;
1011 steps += 1;
1012 if steps > 64 {
1013 return false;
1014 }
1015 }
1016
1017 false
1018}
1019
1020fn is_insert_into_target_name(tokens: &[TokenWithSpan], word_index: usize) -> bool {
1022 let mut cursor = word_index;
1023 let mut steps = 0usize;
1024
1025 while let Some(prev_idx) = prev_non_trivia_index(tokens, cursor) {
1026 match &tokens[prev_idx].token {
1027 Token::Word(word) if word.keyword == Keyword::INTO => {
1028 let Some(insert_idx) = prev_non_trivia_index(tokens, prev_idx) else {
1030 return false;
1031 };
1032 return matches!(
1033 &tokens[insert_idx].token,
1034 Token::Word(w) if w.keyword == Keyword::INSERT
1035 );
1036 }
1037 Token::Period => {}
1039 Token::Word(word) if word.quote_style.is_none() => {}
1042 _ => return false,
1043 }
1044
1045 cursor = prev_idx;
1046 steps += 1;
1047 if steps > 16 {
1048 return false;
1049 }
1050 }
1051
1052 false
1053}
1054
1055fn is_ddl_object_definition_context(tokens: &[TokenWithSpan], ddl_keyword_index: usize) -> bool {
1056 let Some(prev_idx) = prev_non_trivia_index(tokens, ddl_keyword_index) else {
1057 return false;
1058 };
1059 let Token::Word(prev_word) = &tokens[prev_idx].token else {
1060 return false;
1061 };
1062
1063 if matches!(
1064 prev_word.keyword,
1065 Keyword::CREATE | Keyword::ALTER | Keyword::DROP | Keyword::TRUNCATE
1066 ) {
1067 return true;
1068 }
1069
1070 if prev_word.keyword == Keyword::OR {
1071 if let Some(prev_prev_idx) = prev_non_trivia_index(tokens, prev_idx) {
1072 if let Token::Word(prev_prev_word) = &tokens[prev_prev_idx].token {
1073 return matches!(prev_prev_word.keyword, Keyword::CREATE | Keyword::ALTER);
1074 }
1075 }
1076 }
1077
1078 false
1079}
1080
1081fn is_unary_operator_pair(
1083 left: &TokenWithSpan,
1084 right: &TokenWithSpan,
1085 tokens: &[TokenWithSpan],
1086 left_idx: usize,
1087) -> bool {
1088 if matches!(right.token, Token::Plus | Token::Minus)
1090 && is_unary_prefix_context(&tokens[left_idx].token)
1091 {
1092 return true;
1093 }
1094 if matches!(left.token, Token::Plus | Token::Minus) {
1096 if let Some(prev_idx) = prev_non_trivia_index(tokens, left_idx) {
1097 if is_unary_prefix_context(&tokens[prev_idx].token) {
1098 return true;
1099 }
1100 } else {
1101 return true;
1103 }
1104 }
1105 false
1106}
1107
1108fn is_bigquery_hyphenated_identifier_pair(
1109 left: &TokenWithSpan,
1110 right: &TokenWithSpan,
1111 tokens: &[TokenWithSpan],
1112 left_idx: usize,
1113 right_idx: usize,
1114) -> bool {
1115 if matches!(right.token, Token::Minus) {
1116 if !matches!(left.token, Token::Word(_)) {
1117 return false;
1118 }
1119 let Some(next_word_idx) = next_non_trivia_index(tokens, right_idx + 1) else {
1120 return false;
1121 };
1122 if !matches!(tokens[next_word_idx].token, Token::Word(_)) {
1123 return false;
1124 }
1125 let Some(next_after_word_idx) = next_non_trivia_index(tokens, next_word_idx + 1) else {
1126 return false;
1127 };
1128 return matches!(tokens[next_after_word_idx].token, Token::Period);
1129 }
1130
1131 if matches!(left.token, Token::Minus) {
1132 if !matches!(right.token, Token::Word(_)) {
1133 return false;
1134 }
1135 let Some(prev_word_idx) = prev_non_trivia_index(tokens, left_idx) else {
1136 return false;
1137 };
1138 if !matches!(tokens[prev_word_idx].token, Token::Word(_)) {
1139 return false;
1140 }
1141 let Some(next_idx) = next_non_trivia_index(tokens, right_idx + 1) else {
1142 return false;
1143 };
1144 return matches!(tokens[next_idx].token, Token::Period);
1145 }
1146
1147 false
1148}
1149
1150fn is_filesystem_path_pair(
1151 left: &TokenWithSpan,
1152 right: &TokenWithSpan,
1153 tokens: &[TokenWithSpan],
1154 left_idx: usize,
1155 right_idx: usize,
1156 dialect: Dialect,
1157) -> bool {
1158 if !matches!(
1159 dialect,
1160 Dialect::Databricks | Dialect::Clickhouse | Dialect::Snowflake
1161 ) {
1162 return false;
1163 }
1164
1165 let div_index = if matches!(left.token, Token::Div) {
1166 Some(left_idx)
1167 } else if matches!(right.token, Token::Div) {
1168 let left_is_context_keyword = is_path_context_keyword_token(&left.token);
1169 let left_is_path_segment = prev_non_trivia_index(tokens, left_idx)
1170 .is_some_and(|idx| matches!(tokens[idx].token, Token::Div));
1171 if left_is_context_keyword && !left_is_path_segment {
1172 return false;
1173 }
1174 Some(right_idx)
1175 } else {
1176 None
1177 };
1178 let Some(div_index) = div_index else {
1179 return false;
1180 };
1181
1182 let prev_idx = prev_non_trivia_index(tokens, div_index);
1183 let next_idx = next_non_trivia_index(tokens, div_index + 1);
1184 let prev_ok = prev_idx.is_some_and(|idx| matches!(tokens[idx].token, Token::Word(_)));
1185 let next_ok = next_idx.is_some_and(|idx| matches!(tokens[idx].token, Token::Word(_)));
1186 if !(prev_ok || next_ok) {
1187 return false;
1188 }
1189
1190 if dialect == Dialect::Snowflake {
1191 return snowflake_stage_path_context_within(tokens, div_index, 12);
1192 }
1193
1194 path_context_keyword_within(tokens, div_index, 6)
1195}
1196
1197fn is_path_context_keyword_token(token: &Token) -> bool {
1198 let Token::Word(word) = token else {
1199 return false;
1200 };
1201 word.value.eq_ignore_ascii_case("JAR") || word.value.eq_ignore_ascii_case("MODEL")
1202}
1203
1204fn path_context_keyword_within(tokens: &[TokenWithSpan], from_idx: usize, limit: usize) -> bool {
1205 let mut cursor = from_idx;
1206 let mut steps = 0usize;
1207 while let Some(prev_idx) = prev_non_trivia_index(tokens, cursor) {
1208 if let Token::Word(word) = &tokens[prev_idx].token {
1209 if matches!(word.keyword, Keyword::JAR) {
1210 return true;
1211 }
1212 if word.value.eq_ignore_ascii_case("JAR") || word.value.eq_ignore_ascii_case("MODEL") {
1213 return true;
1214 }
1215 }
1216 cursor = prev_idx;
1217 steps += 1;
1218 if steps >= limit {
1219 break;
1220 }
1221 }
1222 false
1223}
1224
1225fn snowflake_stage_path_context_within(
1226 tokens: &[TokenWithSpan],
1227 from_idx: usize,
1228 limit: usize,
1229) -> bool {
1230 let mut cursor = from_idx;
1231 let mut steps = 0usize;
1232 while let Some(prev_idx) = prev_non_trivia_index(tokens, cursor) {
1233 match &tokens[prev_idx].token {
1234 Token::AtSign => return true,
1235 Token::Word(word) if word.value.starts_with('@') => return true,
1236 _ => {}
1237 }
1238 cursor = prev_idx;
1239 steps += 1;
1240 if steps >= limit {
1241 break;
1242 }
1243 }
1244 false
1245}
1246
1247fn is_unary_prefix_context(token: &Token) -> bool {
1249 if matches!(
1250 token,
1251 Token::Comma
1252 | Token::LParen
1253 | Token::Eq
1254 | Token::Neq
1255 | Token::Lt
1256 | Token::Gt
1257 | Token::LtEq
1258 | Token::GtEq
1259 ) {
1260 return true;
1261 }
1262 if let Token::Word(w) = token {
1263 if matches!(
1264 w.keyword,
1265 Keyword::SELECT
1266 | Keyword::WHERE
1267 | Keyword::WHEN
1268 | Keyword::THEN
1269 | Keyword::ELSE
1270 | Keyword::AND
1271 | Keyword::OR
1272 | Keyword::ON
1273 | Keyword::SET
1274 | Keyword::CASE
1275 | Keyword::BETWEEN
1276 | Keyword::IN
1277 | Keyword::VALUES
1278 | Keyword::INTERVAL
1279 | Keyword::YEAR
1280 | Keyword::MONTH
1281 | Keyword::DAY
1282 | Keyword::HOUR
1283 | Keyword::MINUTE
1284 | Keyword::SECOND
1285 | Keyword::RETURN
1286 | Keyword::RETURNS
1287 ) {
1288 return true;
1289 }
1290 }
1291 false
1292}
1293
1294fn expected_spacing_before_lparen(
1296 left: &TokenWithSpan,
1297 tokens: &[TokenWithSpan],
1298 left_idx: usize,
1299 dialect: Dialect,
1300) -> ExpectedSpacing {
1301 match &left.token {
1302 Token::Word(w) if w.quote_style.is_none() => {
1304 if dialect == Dialect::Snowflake {
1305 if w.value.eq_ignore_ascii_case("MATCH_RECOGNIZE")
1306 || w.value.eq_ignore_ascii_case("PATTERN")
1307 {
1308 return ExpectedSpacing::Single;
1309 }
1310 if w.value.eq_ignore_ascii_case("MATCH_CONDITION") {
1311 return ExpectedSpacing::NoneInline;
1312 }
1313 }
1314 if w.value.eq_ignore_ascii_case("EXISTS") {
1315 if exists_requires_space_before_lparen(tokens, left_idx) {
1316 return ExpectedSpacing::Single;
1317 }
1318 return ExpectedSpacing::NoneInline;
1319 }
1320 if is_keyword_requiring_space_before_paren(w.keyword) {
1322 if matches!(w.keyword, Keyword::AS) {
1325 return ExpectedSpacing::SingleInline;
1326 }
1327 return ExpectedSpacing::Single;
1328 }
1329 if is_insert_into_target_name(tokens, left_idx) {
1333 return ExpectedSpacing::Single;
1334 }
1335 if w.keyword == Keyword::NoKeyword {
1338 if is_reference_target_name(tokens, left_idx) {
1339 return ExpectedSpacing::Single;
1340 }
1341 if is_copy_into_target_name(tokens, left_idx) {
1342 return ExpectedSpacing::Single;
1343 }
1344 if is_qualified_ddl_object_name(tokens, left_idx) {
1345 return ExpectedSpacing::Skip;
1346 }
1347 }
1348 ExpectedSpacing::NoneInline
1350 }
1351 Token::RParen | Token::RBracket => ExpectedSpacing::Single,
1353 _ if is_literal(&left.token) => ExpectedSpacing::Single,
1355 Token::Number(_, _) => ExpectedSpacing::None,
1357 Token::Comma => ExpectedSpacing::Single,
1359 _ if is_binary_operator(&left.token) || is_comparison_operator(&left.token) => {
1361 ExpectedSpacing::Skip
1362 }
1363 _ => ExpectedSpacing::Skip,
1364 }
1365}
1366
1367fn exists_requires_space_before_lparen(tokens: &[TokenWithSpan], left_idx: usize) -> bool {
1368 let Some(prev_idx) = prev_non_trivia_index(tokens, left_idx) else {
1369 return false;
1370 };
1371
1372 match &tokens[prev_idx].token {
1373 Token::Word(word) => {
1374 matches!(
1375 word.keyword,
1376 Keyword::AND
1377 | Keyword::OR
1378 | Keyword::NOT
1379 | Keyword::WHERE
1380 | Keyword::HAVING
1381 | Keyword::WHEN
1382 | Keyword::THEN
1383 | Keyword::ELSE
1384 ) || matches!(
1385 word.value.to_ascii_uppercase().as_str(),
1386 "AND" | "OR" | "NOT" | "WHERE" | "HAVING" | "WHEN" | "THEN" | "ELSE"
1387 )
1388 }
1389 Token::RParen
1390 | Token::LParen
1391 | Token::Eq
1392 | Token::Neq
1393 | Token::Lt
1394 | Token::Gt
1395 | Token::LtEq
1396 | Token::GtEq => true,
1397 _ => false,
1398 }
1399}
1400
1401fn is_keyword_requiring_space_before_paren(keyword: Keyword) -> bool {
1403 matches!(
1404 keyword,
1405 Keyword::AS
1406 | Keyword::USING
1407 | Keyword::FROM
1408 | Keyword::JOIN
1409 | Keyword::ON
1410 | Keyword::WHERE
1411 | Keyword::IN
1412 | Keyword::BETWEEN
1413 | Keyword::WHEN
1414 | Keyword::THEN
1415 | Keyword::ELSE
1416 | Keyword::AND
1417 | Keyword::OR
1418 | Keyword::NOT
1419 | Keyword::HAVING
1420 | Keyword::OVER
1421 | Keyword::PARTITION
1422 | Keyword::ORDER
1423 | Keyword::GROUP
1424 | Keyword::LIMIT
1425 | Keyword::UNION
1426 | Keyword::INTERSECT
1427 | Keyword::EXCEPT
1428 | Keyword::RECURSIVE
1429 | Keyword::WITH
1430 | Keyword::SELECT
1431 | Keyword::INTO
1432 | Keyword::TABLE
1433 | Keyword::VALUES
1434 | Keyword::SET
1435 | Keyword::RETURNS
1436 | Keyword::FILTER
1437 | Keyword::CONFLICT
1438 | Keyword::BY
1439 )
1440}
1441
1442fn expected_spacing_after_rparen(
1444 right: &TokenWithSpan,
1445 _tokens: &[TokenWithSpan],
1446 _right_idx: usize,
1447) -> ExpectedSpacing {
1448 match &right.token {
1449 Token::Period | Token::DoubleColon | Token::LBracket | Token::RBracket => {
1451 ExpectedSpacing::None
1452 }
1453 Token::Comma => ExpectedSpacing::None,
1455 Token::SemiColon => ExpectedSpacing::Skip,
1457 Token::RParen => ExpectedSpacing::None,
1459 Token::LParen => ExpectedSpacing::Single,
1461 _ => ExpectedSpacing::Single,
1463 }
1464}
1465
1466fn has_comment_between(tokens: &[TokenWithSpan], left: usize, right: usize) -> bool {
1467 tokens[left + 1..right].iter().any(|t| {
1468 matches!(
1469 t.token,
1470 Token::Whitespace(Whitespace::SingleLineComment { .. })
1471 | Token::Whitespace(Whitespace::MultiLineComment(_))
1472 )
1473 })
1474}
1475
1476fn template_spans(sql: &str) -> Vec<Lt01TemplateSpan> {
1477 let mut spans = Vec::new();
1478 let mut index = 0usize;
1479 while let Some((open, close)) = find_next_template_open(sql, index) {
1480 let payload_start = open + 2;
1481 if let Some(rel_close) = sql[payload_start..].find(close) {
1482 let close_index = payload_start + rel_close + close.len();
1483 spans.push((open, close_index));
1484 index = close_index;
1485 } else {
1486 spans.push((open, sql.len()));
1487 break;
1488 }
1489 }
1490 spans
1491}
1492
1493fn find_next_template_open(sql: &str, from: usize) -> Option<(usize, &'static str)> {
1494 let rest = sql.get(from..)?;
1495 [("{{", "}}"), ("{%", "%}"), ("{#", "#}")]
1496 .into_iter()
1497 .filter_map(|(open, close)| rest.find(open).map(|offset| (from + offset, close)))
1498 .min_by_key(|(index, _)| *index)
1499}
1500
1501fn contains_template_marker(sql: &str) -> bool {
1502 sql.contains("{{") || sql.contains("{%") || sql.contains("{#")
1503}
1504
1505fn overlaps_template_span(spans: &[Lt01TemplateSpan], start: usize, end: usize) -> bool {
1506 spans
1507 .iter()
1508 .any(|(template_start, template_end)| start < *template_end && end > *template_start)
1509}
1510
1511fn collect_ansi_national_string_literal_violations(
1512 sql: &str,
1513 tokens: &[TokenWithSpan],
1514 dialect: Dialect,
1515 templated_spans: &[Lt01TemplateSpan],
1516 violations: &mut Vec<Lt01Violation>,
1517) {
1518 if matches!(dialect, Dialect::Mssql) {
1519 return;
1520 }
1521
1522 for token in tokens {
1523 let Token::NationalStringLiteral(_) = token.token else {
1524 continue;
1525 };
1526 let Some((start, end)) = token_offsets(sql, token) else {
1527 continue;
1528 };
1529 if start >= end || end > sql.len() || overlaps_template_span(templated_spans, start, end) {
1530 continue;
1531 }
1532 let raw = &sql[start..end];
1533 if raw.len() < 3 {
1534 continue;
1535 }
1536 let Some(prefix) = raw.chars().next() else {
1537 continue;
1538 };
1539 if !(prefix == 'N' || prefix == 'n') || !raw[1..].starts_with('\'') {
1540 continue;
1541 }
1542 let replacement = format!("{prefix} {}", &raw[1..]);
1543 violations.push(((start, end), vec![(start, end, replacement)]));
1544 }
1545}
1546
1547fn collect_template_string_spacing_violations(
1548 sql: &str,
1549 dialect: Dialect,
1550 templated_spans: &[Lt01TemplateSpan],
1551 violations: &mut Vec<Lt01Violation>,
1552) {
1553 for (template_start, template_end) in templated_spans {
1554 let mut cursor = *template_start;
1555 while cursor < *template_end {
1556 let Some((quote_start, quote_char)) = next_quote_in_range(sql, cursor, *template_end)
1557 else {
1558 break;
1559 };
1560 let Some(quote_end) =
1561 find_closing_quote(sql, quote_start + 1, *template_end, quote_char)
1562 else {
1563 break;
1564 };
1565 let content = &sql[quote_start + 1..quote_end];
1566 let Some(tokens) = tokenized(content, dialect) else {
1567 cursor = quote_end + 1;
1568 continue;
1569 };
1570
1571 let mut fragment_violations = Vec::new();
1572 collect_pair_spacing_violations(
1573 content,
1574 &tokens,
1575 dialect,
1576 &[],
1577 &mut fragment_violations,
1578 );
1579 collect_ansi_national_string_literal_violations(
1580 content,
1581 &tokens,
1582 dialect,
1583 &[],
1584 &mut fragment_violations,
1585 );
1586
1587 for ((start, end), _) in fragment_violations {
1588 if start >= end || end > content.len() {
1589 continue;
1590 }
1591 let absolute_start = quote_start + 1 + start;
1592 let absolute_end = quote_start + 1 + end;
1593 violations.push(((absolute_start, absolute_end), Vec::new()));
1594 }
1595
1596 cursor = quote_end + 1;
1597 }
1598 }
1599}
1600
1601fn next_quote_in_range(sql: &str, start: usize, end: usize) -> Option<(usize, char)> {
1602 let mut index = start;
1603 while index < end {
1604 let ch = sql[index..].chars().next()?;
1605 if ch == '\'' || ch == '"' {
1606 return Some((index, ch));
1607 }
1608 index += ch.len_utf8();
1609 }
1610 None
1611}
1612
1613fn find_closing_quote(sql: &str, start: usize, end: usize, quote: char) -> Option<usize> {
1614 let mut index = start;
1615 while index < end {
1616 let ch = sql[index..].chars().next()?;
1617 if ch == '\\' {
1618 let next = index + ch.len_utf8();
1619 if next < end {
1620 let escaped = sql[next..].chars().next()?;
1621 index = next + escaped.len_utf8();
1622 continue;
1623 }
1624 }
1625 if ch == quote {
1626 return Some(index);
1627 }
1628 index += ch.len_utf8();
1629 }
1630 None
1631}
1632
1633fn snowflake_pattern_token_indices(
1634 tokens: &[TokenWithSpan],
1635 non_trivia: &[usize],
1636) -> HashSet<usize> {
1637 let mut out = HashSet::new();
1638 let mut cursor = 0usize;
1639
1640 while cursor < non_trivia.len() {
1641 let token_index = non_trivia[cursor];
1642 let Token::Word(word) = &tokens[token_index].token else {
1643 cursor += 1;
1644 continue;
1645 };
1646 if !word.value.eq_ignore_ascii_case("PATTERN") {
1647 cursor += 1;
1648 continue;
1649 }
1650
1651 let Some(paren_pos) = ((cursor + 1)..non_trivia.len())
1652 .find(|idx| matches!(tokens[non_trivia[*idx]].token, Token::LParen))
1653 else {
1654 cursor += 1;
1655 continue;
1656 };
1657
1658 let mut depth = 0usize;
1659 let mut end_pos = None;
1660 for (pos, idx) in non_trivia.iter().copied().enumerate().skip(paren_pos) {
1661 match tokens[idx].token {
1662 Token::LParen => depth += 1,
1663 Token::RParen => {
1664 if depth == 0 {
1665 break;
1666 }
1667 depth -= 1;
1668 if depth == 0 {
1669 end_pos = Some(pos);
1670 break;
1671 }
1672 }
1673 _ => {}
1674 }
1675 }
1676
1677 let Some(end_pos) = end_pos else {
1678 cursor += 1;
1679 continue;
1680 };
1681 for idx in non_trivia.iter().take(end_pos + 1).skip(paren_pos) {
1682 out.insert(*idx);
1683 }
1684 cursor = end_pos + 1;
1685 }
1686
1687 out
1688}
1689
1690fn type_angle_token_indices(tokens: &[TokenWithSpan], non_trivia: &[usize]) -> HashSet<usize> {
1691 let mut out = HashSet::new();
1692 let mut stack = Vec::<usize>::new();
1693
1694 for (pos, token_idx) in non_trivia.iter().copied().enumerate() {
1695 let token = &tokens[token_idx].token;
1696 match token {
1697 Token::Lt => {
1698 let prev_idx = pos
1699 .checked_sub(1)
1700 .and_then(|value| non_trivia.get(value).copied());
1701 if prev_idx.is_some_and(|idx| is_type_constructor(&tokens[idx].token)) {
1702 out.insert(token_idx);
1703 stack.push(token_idx);
1704 }
1705 }
1706 Token::Gt => {
1707 if !stack.is_empty() {
1708 out.insert(token_idx);
1709 stack.pop();
1710 }
1711 }
1712 Token::ShiftRight => {
1713 if stack.len() >= 2 {
1714 out.insert(token_idx);
1715 stack.pop();
1716 stack.pop();
1717 }
1718 }
1719 _ => {}
1720 }
1721 }
1722
1723 out
1724}
1725
1726fn supports_type_angle_spacing(dialect: Dialect) -> bool {
1727 matches!(
1728 dialect,
1729 Dialect::Bigquery | Dialect::Hive | Dialect::Databricks
1730 )
1731}
1732
1733fn is_type_constructor(token: &Token) -> bool {
1734 let Token::Word(word) = token else {
1735 return false;
1736 };
1737 word.value.eq_ignore_ascii_case("ARRAY")
1738 || word.value.eq_ignore_ascii_case("STRUCT")
1739 || word.value.eq_ignore_ascii_case("MAP")
1740}
1741
1742fn is_type_angle_spacing_pair(
1743 left: &TokenWithSpan,
1744 right: &TokenWithSpan,
1745 left_idx: usize,
1746 right_idx: usize,
1747 type_angle_tokens: &HashSet<usize>,
1748) -> bool {
1749 let left_is_type_angle = type_angle_tokens.contains(&left_idx);
1750 let right_is_type_angle = type_angle_tokens.contains(&right_idx);
1751
1752 if right_is_type_angle && matches!(right.token, Token::Lt | Token::Gt | Token::ShiftRight) {
1753 return true;
1754 }
1755 if left_is_type_angle && matches!(left.token, Token::Lt) {
1756 return true;
1757 }
1758 if left_is_type_angle
1759 && matches!(left.token, Token::Gt | Token::ShiftRight)
1760 && matches!(
1761 right.token,
1762 Token::Comma | Token::RParen | Token::RBracket | Token::LBracket | Token::Gt
1763 )
1764 {
1765 return true;
1766 }
1767
1768 false
1769}
1770
1771fn tokenized(sql: &str, dialect: Dialect) -> Option<Vec<TokenWithSpan>> {
1776 let dialect = dialect.to_sqlparser_dialect();
1777 let mut tokenizer = Tokenizer::new(dialect.as_ref(), sql);
1778 tokenizer.tokenize_with_location().ok()
1779}
1780
1781fn tokenized_for_context(ctx: &LintContext) -> Option<Vec<TokenWithSpan>> {
1782 let (statement_start_line, statement_start_column) =
1783 offset_to_line_col(ctx.sql, ctx.statement_range.start)?;
1784
1785 ctx.with_document_tokens(|tokens| {
1786 if tokens.is_empty() {
1787 return None;
1788 }
1789
1790 let mut out = Vec::new();
1791 for token in tokens {
1792 let Some((start, end)) = token_with_span_offsets(ctx.sql, token) else {
1793 continue;
1794 };
1795 if start < ctx.statement_range.start || end > ctx.statement_range.end {
1796 continue;
1797 }
1798
1799 let Some(start_loc) = relative_location(
1800 token.span.start,
1801 statement_start_line,
1802 statement_start_column,
1803 ) else {
1804 continue;
1805 };
1806 let Some(end_loc) =
1807 relative_location(token.span.end, statement_start_line, statement_start_column)
1808 else {
1809 continue;
1810 };
1811
1812 out.push(TokenWithSpan::new(
1813 token.token.clone(),
1814 Span::new(start_loc, end_loc),
1815 ));
1816 }
1817
1818 if out.is_empty() {
1819 None
1820 } else {
1821 Some(out)
1822 }
1823 })
1824}
1825
1826fn token_offsets(sql: &str, token: &TokenWithSpan) -> Option<(usize, usize)> {
1827 let start = line_col_to_offset(
1828 sql,
1829 token.span.start.line as usize,
1830 token.span.start.column as usize,
1831 )?;
1832 let end = line_col_to_offset(
1833 sql,
1834 token.span.end.line as usize,
1835 token.span.end.column as usize,
1836 )?;
1837 Some((start, end))
1838}
1839
1840fn next_non_trivia_index(tokens: &[TokenWithSpan], mut index: usize) -> Option<usize> {
1841 while index < tokens.len() {
1842 if !is_trivia_token(&tokens[index].token) {
1843 return Some(index);
1844 }
1845 index += 1;
1846 }
1847 None
1848}
1849
1850fn prev_non_trivia_index(tokens: &[TokenWithSpan], mut index: usize) -> Option<usize> {
1851 while index > 0 {
1852 index -= 1;
1853 if !is_trivia_token(&tokens[index].token) {
1854 return Some(index);
1855 }
1856 }
1857 None
1858}
1859
1860fn is_trivia_token(token: &Token) -> bool {
1861 matches!(
1862 token,
1863 Token::Whitespace(Whitespace::Space | Whitespace::Newline | Whitespace::Tab)
1864 | Token::Whitespace(Whitespace::SingleLineComment { .. })
1865 | Token::Whitespace(Whitespace::MultiLineComment(_))
1866 )
1867}
1868
1869fn line_col_to_offset(sql: &str, line: usize, column: usize) -> Option<usize> {
1870 if line == 0 || column == 0 {
1871 return None;
1872 }
1873
1874 let mut current_line = 1usize;
1875 let mut current_col = 1usize;
1876
1877 for (offset, ch) in sql.char_indices() {
1878 if current_line == line && current_col == column {
1879 return Some(offset);
1880 }
1881
1882 if ch == '\n' {
1883 current_line += 1;
1884 current_col = 1;
1885 } else {
1886 current_col += 1;
1887 }
1888 }
1889
1890 if current_line == line && current_col == column {
1891 return Some(sql.len());
1892 }
1893
1894 None
1895}
1896
1897fn token_with_span_offsets(sql: &str, token: &TokenWithSpan) -> Option<(usize, usize)> {
1898 let start = line_col_to_offset(
1899 sql,
1900 token.span.start.line as usize,
1901 token.span.start.column as usize,
1902 )?;
1903 let end = line_col_to_offset(
1904 sql,
1905 token.span.end.line as usize,
1906 token.span.end.column as usize,
1907 )?;
1908 Some((start, end))
1909}
1910
1911fn offset_to_line_col(sql: &str, offset: usize) -> Option<(usize, usize)> {
1912 if offset > sql.len() {
1913 return None;
1914 }
1915 if offset == sql.len() {
1916 let line = 1 + sql.as_bytes().iter().filter(|byte| **byte == b'\n').count();
1917 let column = sql
1918 .rsplit_once('\n')
1919 .map_or(sql.chars().count() + 1, |(_, tail)| {
1920 tail.chars().count() + 1
1921 });
1922 return Some((line, column));
1923 }
1924
1925 let mut line = 1usize;
1926 let mut column = 1usize;
1927 for (index, ch) in sql.char_indices() {
1928 if index == offset {
1929 return Some((line, column));
1930 }
1931 if ch == '\n' {
1932 line += 1;
1933 column = 1;
1934 } else {
1935 column += 1;
1936 }
1937 }
1938 Some((line, column))
1939}
1940
1941fn relative_location(
1942 location: Location,
1943 statement_start_line: usize,
1944 statement_start_column: usize,
1945) -> Option<Location> {
1946 if location.line == 0 || location.column == 0 {
1947 return None;
1948 }
1949
1950 let line = location.line as usize;
1951 let column = location.column as usize;
1952 if line < statement_start_line {
1953 return None;
1954 }
1955
1956 let relative_line = line - statement_start_line + 1;
1957 let relative_column = if line == statement_start_line {
1958 if column < statement_start_column {
1959 return None;
1960 }
1961 column - statement_start_column + 1
1962 } else {
1963 column
1964 };
1965
1966 Some(Location::new(relative_line as u64, relative_column as u64))
1967}
1968
1969#[cfg(test)]
1970mod tests {
1971 use super::*;
1972 use crate::linter::rule::with_active_dialect;
1973 use crate::parser::parse_sql;
1974 use crate::types::{Dialect, IssueAutofixApplicability};
1975
1976 fn run(sql: &str) -> Vec<Issue> {
1977 run_with_dialect(sql, Dialect::Generic)
1978 }
1979
1980 fn run_with_dialect(sql: &str, dialect: Dialect) -> Vec<Issue> {
1981 let statements = parse_sql(sql).expect("parse");
1982 let rule = LayoutSpacing::default();
1983 with_active_dialect(dialect, || {
1984 statements
1985 .iter()
1986 .enumerate()
1987 .flat_map(|(index, statement)| {
1988 rule.check(
1989 statement,
1990 &LintContext {
1991 sql,
1992 statement_range: 0..sql.len(),
1993 statement_index: index,
1994 },
1995 )
1996 })
1997 .collect()
1998 })
1999 }
2000
2001 fn run_statementless_with_dialect(sql: &str, dialect: Dialect) -> Vec<Issue> {
2002 run_statementless_with_rule(sql, dialect, LayoutSpacing::default())
2003 }
2004
2005 fn run_statementless_with_rule(sql: &str, dialect: Dialect, rule: LayoutSpacing) -> Vec<Issue> {
2006 let placeholder = parse_sql("SELECT 1").expect("parse placeholder");
2007 with_active_dialect(dialect, || {
2008 rule.check(
2009 &placeholder[0],
2010 &LintContext {
2011 sql,
2012 statement_range: 0..sql.len(),
2013 statement_index: 0,
2014 },
2015 )
2016 })
2017 }
2018
2019 fn apply_all_issue_autofixes(sql: &str, issues: &[Issue]) -> String {
2020 let mut out = sql.to_string();
2021 let mut edits = issues
2022 .iter()
2023 .filter_map(|issue| issue.autofix.as_ref())
2024 .flat_map(|autofix| autofix.edits.clone())
2025 .collect::<Vec<_>>();
2026 edits.sort_by_key(|edit| (edit.span.start, edit.span.end));
2027 for edit in edits.into_iter().rev() {
2028 out.replace_range(edit.span.start..edit.span.end, &edit.replacement);
2029 }
2030 out
2031 }
2032
2033 #[test]
2034 fn allows_bigquery_array_type_angle_brackets_without_spaces() {
2035 let issues = run_with_dialect(
2036 "SELECT ARRAY<FLOAT64>[1, 2, 3] AS floats;",
2037 Dialect::Bigquery,
2038 );
2039 assert!(issues.is_empty());
2040 }
2041
2042 #[test]
2043 fn allows_create_table_with_qualified_name_before_column_list() {
2044 let issues = run("CREATE TABLE db.schema_name.tbl_name (id INT)");
2045 assert!(issues.is_empty());
2046 }
2047
2048 #[test]
2049 fn fixes_reference_target_column_list_spacing() {
2050 let sql = "create table tab1 (b int references tab2(b))";
2051 let issues = run_statementless_with_dialect(sql, Dialect::Ansi);
2052 assert!(!issues.is_empty());
2053 let fixed = apply_all_issue_autofixes(sql, &issues);
2054 assert_eq!(fixed, "create table tab1 (b int references tab2 (b))");
2055 }
2056
2057 #[test]
2058 fn allows_bigquery_hyphenated_project_identifier() {
2059 let issues = run_statementless_with_dialect(
2060 "SELECT col_foo FROM foo-bar.foo.bar",
2061 Dialect::Bigquery,
2062 );
2063 assert!(issues.is_empty());
2064 }
2065
2066 #[test]
2067 fn allows_bigquery_function_array_offset_access() {
2068 let sql = "SELECT testFunction(a)[OFFSET(0)].* FROM table1";
2069 let issues = run_statementless_with_dialect(sql, Dialect::Bigquery);
2070 assert!(issues.is_empty());
2071 }
2072
2073 #[test]
2074 fn allows_hive_struct_and_array_datatype_angles() {
2075 let sql = "select col1::STRUCT<foo: int>, col2::ARRAY<int> from t";
2076 let issues = run_statementless_with_dialect(sql, Dialect::Hive);
2077 assert!(issues.is_empty());
2078 }
2079
2080 #[test]
2081 fn allows_sparksql_file_literal_path() {
2082 let sql = "ADD JAR path/to/some.jar;";
2083 let issues = run_statementless_with_dialect(sql, Dialect::Databricks);
2084 assert!(issues.is_empty());
2085 }
2086
2087 #[test]
2088 fn allows_clickhouse_system_model_path() {
2089 let sql = "SYSTEM RELOAD MODEL /model/path;";
2090 let issues = run_statementless_with_dialect(sql, Dialect::Clickhouse);
2091 assert!(issues.is_empty(), "unexpected issues: {issues:?}");
2092 }
2093
2094 #[test]
2095 fn detects_alias_alignment_when_configured() {
2096 let sql = "SELECT\n\tcol1 AS a,\n\tlonger_col AS b\nFROM t";
2097 let issues = run_statementless_with_rule(
2098 sql,
2099 Dialect::Ansi,
2100 LayoutSpacing {
2101 align_alias_expression: true,
2102 tab_space_size: 4,
2103 ..LayoutSpacing::default()
2104 },
2105 );
2106 assert!(!issues.is_empty());
2107 }
2108
2109 #[test]
2110 fn detects_alias_alignment_with_tabs_when_columns_are_equal_width() {
2111 let sql = "SELECT\n\tcol1 AS alias1,\n\tcol2 AS alias2\nFROM table1";
2112 let issues = run_statementless_with_rule(
2113 sql,
2114 Dialect::Ansi,
2115 LayoutSpacing {
2116 align_alias_expression: true,
2117 align_with_tabs: true,
2118 tab_space_size: 4,
2119 ..LayoutSpacing::default()
2120 },
2121 );
2122 assert!(
2123 !issues.is_empty(),
2124 "tab indentation alignment should flag spaces before AS"
2125 );
2126 }
2127
2128 #[test]
2129 fn detects_create_table_datatype_alignment_when_configured() {
2130 let sql = "CREATE TABLE tbl (\n foo VARCHAR(25) NOT NULL,\n barbar INT NULL\n)";
2131 let issues = run_statementless_with_rule(
2132 sql,
2133 Dialect::Ansi,
2134 LayoutSpacing {
2135 align_data_type: true,
2136 ..LayoutSpacing::default()
2137 },
2138 );
2139 assert!(!issues.is_empty());
2140 }
2141
2142 #[test]
2143 fn does_not_flag_create_table_alignment_when_columns_are_already_aligned() {
2144 let sql = "CREATE TABLE foo (\n x INT NOT NULL PRIMARY KEY,\n y INT NULL,\n z INT NULL\n);";
2145 let issues = run_statementless_with_rule(
2146 sql,
2147 Dialect::Ansi,
2148 LayoutSpacing {
2149 align_data_type: true,
2150 align_column_constraint: true,
2151 ..LayoutSpacing::default()
2152 },
2153 );
2154 assert!(
2155 issues.is_empty(),
2156 "expected no LT01 alignment issues: {issues:?}"
2157 );
2158 }
2159
2160 #[test]
2161 fn statementless_fixes_comment_on_function_spacing() {
2162 let sql = "COMMENT ON FUNCTION x (foo) IS 'y';";
2163 let issues = run_statementless_with_dialect(sql, Dialect::Postgres);
2164 assert!(!issues.is_empty());
2165 let fixed = apply_all_issue_autofixes(sql, &issues);
2166 assert_eq!(fixed, "COMMENT ON FUNCTION x(foo) IS 'y';");
2167 }
2168
2169 #[test]
2170 fn statementless_fixes_split_tsql_comparison_operator() {
2171 let sql = "SELECT col1 FROM table1 WHERE 1 > = 1";
2172 let issues = run_statementless_with_dialect(sql, Dialect::Mssql);
2173 assert!(!issues.is_empty());
2174 let fixed = apply_all_issue_autofixes(sql, &issues);
2175 assert_eq!(fixed, "SELECT col1 FROM table1 WHERE 1 >= 1");
2176 }
2177
2178 #[test]
2179 fn statementless_fixes_tsql_compound_assignment_operator() {
2180 let sql = "SET @param1+=1";
2181 let issues = run_statementless_with_dialect(sql, Dialect::Mssql);
2182 assert!(!issues.is_empty());
2183 let fixed = apply_all_issue_autofixes(sql, &issues);
2184 assert_eq!(fixed, "SET @param1 += 1");
2185 }
2186
2187 #[test]
2188 fn allows_sparksql_multi_unit_interval_minus() {
2189 let sql = "SELECT INTERVAL -2 HOUR '3' MINUTE AS col;";
2190 let issues = run_statementless_with_dialect(sql, Dialect::Databricks);
2191 assert!(issues.is_empty());
2192 }
2193
2194 #[test]
2195 fn ignore_templated_areas_skips_template_artifacts() {
2196 let sql = "{{ 'SELECT 1, 4' }}, 5, 6";
2197 let issues = run_statementless_with_rule(
2198 sql,
2199 Dialect::Generic,
2200 LayoutSpacing {
2201 ignore_templated_areas: true,
2202 ..LayoutSpacing::default()
2203 },
2204 );
2205 assert!(issues.is_empty(), "template-only spacing should be ignored");
2206 }
2207
2208 #[test]
2209 fn ignore_templated_areas_still_fixes_non_template_region() {
2210 let sql = "{{ 'SELECT 1, 4' }}, 5 , 6";
2211 let issues = run_statementless_with_rule(
2212 sql,
2213 Dialect::Generic,
2214 LayoutSpacing {
2215 ignore_templated_areas: true,
2216 ..LayoutSpacing::default()
2217 },
2218 );
2219 assert!(!issues.is_empty());
2220 let fixed = apply_all_issue_autofixes(sql, &issues);
2221 assert_eq!(fixed, "{{ 'SELECT 1, 4' }}, 5, 6");
2222 }
2223
2224 #[test]
2225 fn templated_string_content_is_checked_when_not_ignored() {
2226 let sql = "{{ 'SELECT 1 ,4' }}";
2227 let issues = run_statementless_with_rule(
2228 sql,
2229 Dialect::Generic,
2230 LayoutSpacing {
2231 ignore_templated_areas: false,
2232 ..LayoutSpacing::default()
2233 },
2234 );
2235 assert!(!issues.is_empty());
2236 assert!(
2237 issues.iter().all(|issue| issue.autofix.is_none()),
2238 "template-internal checks are detection-only"
2239 );
2240 }
2241
2242 #[test]
2243 fn templated_string_content_passes_when_clean() {
2244 let sql = "{{ 'SELECT 1, 4' }}";
2245 let issues = run_statementless_with_rule(
2246 sql,
2247 Dialect::Generic,
2248 LayoutSpacing {
2249 ignore_templated_areas: false,
2250 ..LayoutSpacing::default()
2251 },
2252 );
2253 assert!(issues.is_empty());
2254 }
2255
2256 #[test]
2257 fn allows_snowflake_match_recognize_pattern_spacing() {
2258 let sql = "select * from stock_price_history\n match_recognize (\n pattern ((A | B){5} C+)\n )";
2259 let issues = run_statementless_with_dialect(sql, Dialect::Snowflake);
2260 assert!(issues.is_empty(), "snowflake pattern syntax should pass");
2261 }
2262
2263 #[test]
2264 fn fixes_snowflake_match_condition_newline_before_paren() {
2265 let sql = "select\n table1.pk1\nfrom table1\n asof join\n table2\n match_condition\n (t1 > t2)";
2266 let issues = run_with_dialect(sql, Dialect::Snowflake);
2267 assert!(!issues.is_empty());
2268 let fixed = apply_all_issue_autofixes(sql, &issues);
2269 assert!(
2270 fixed.contains("match_condition(t1 > t2)"),
2271 "expected inline match_condition: {fixed}"
2272 );
2273 }
2274
2275 #[test]
2276 fn fixes_snowflake_copy_into_target_column_list_spacing() {
2277 let sql = "copy into DB.SCHEMA.ProblemHere(col1)\nfrom @my_stage/file";
2278 let issues = run_statementless_with_dialect(sql, Dialect::Snowflake);
2279 assert!(!issues.is_empty());
2280 let fixed = apply_all_issue_autofixes(sql, &issues);
2281 assert!(
2282 fixed.contains("DB.SCHEMA.ProblemHere (col1)"),
2283 "fixed: {fixed}"
2284 );
2285 }
2286
2287 #[test]
2288 fn fixes_snowflake_copy_into_target_column_list_spacing_with_placeholder_prefix() {
2289 let sql = "copy into ${env}_ENT_LANDING.SCHEMA_NAME.ProblemHere(col1)\nfrom @my_stage/file";
2290 let issues = run_statementless_with_dialect(sql, Dialect::Snowflake);
2291 assert!(!issues.is_empty());
2292 let fixed = apply_all_issue_autofixes(sql, &issues);
2293 assert!(
2294 fixed.contains(".SCHEMA_NAME.ProblemHere (col1)"),
2295 "fixed: {fixed}"
2296 );
2297 }
2298
2299 #[test]
2300 fn allows_snowflake_stage_path_without_spacing_around_slash() {
2301 let sql = "copy into t from @my_stage/file";
2302 let issues = run_statementless_with_dialect(sql, Dialect::Snowflake);
2303 assert!(
2304 issues.is_empty(),
2305 "snowflake stage path should not force spaces around slash: {issues:?}"
2306 );
2307 }
2308
2309 #[test]
2312 fn flags_trailing_whitespace() {
2313 let sql = "SELECT 1 \n";
2314 let issues = run(sql);
2315 assert!(!issues.is_empty(), "should flag trailing whitespace");
2316 let fixed = apply_all_issue_autofixes(sql, &issues);
2317 assert_eq!(fixed, "SELECT 1\n");
2318 }
2319
2320 #[test]
2321 fn flags_trailing_whitespace_on_initial_blank_line() {
2322 let sql = " \nSELECT 1 \n";
2323 let issues = run(sql);
2324 assert!(!issues.is_empty());
2325 let fixed = apply_all_issue_autofixes(sql, &issues);
2326 assert_eq!(fixed, "\nSELECT 1\n");
2327 }
2328
2329 #[test]
2332 fn flags_compact_operator() {
2333 let sql = "SELECT 1+2";
2334 let issues = run(sql);
2335 assert!(!issues.is_empty(), "should flag compact 1+2");
2336 let fixed = apply_all_issue_autofixes(sql, &issues);
2337 assert_eq!(fixed, "SELECT 1 + 2");
2338 }
2339
2340 #[test]
2341 fn flags_compact_operator_expression() {
2342 let sql = "select\n field,\n date(field_1)-date(field_2) as diff\nfrom tbl";
2343 let issues = run(sql);
2344 assert!(!issues.is_empty());
2345 let fixed = apply_all_issue_autofixes(sql, &issues);
2346 assert!(
2347 fixed.contains("date(field_1) - date(field_2)"),
2348 "should fix operator spacing: {fixed}"
2349 );
2350 }
2351
2352 #[test]
2353 fn flags_plus_between_identifier_and_literal() {
2354 let sql = "SELECT a +'b'+ 'c' FROM tbl";
2355 let issues = run(sql);
2356 assert!(
2357 !issues.is_empty(),
2358 "should flag operator spacing around string literals"
2359 );
2360 let fixed = apply_all_issue_autofixes(sql, &issues);
2361 assert_eq!(fixed, "SELECT a + 'b' + 'c' FROM tbl");
2362 }
2363
2364 #[test]
2365 fn does_not_flag_simple_spacing() {
2366 assert!(run("SELECT * FROM t WHERE a = 1").is_empty());
2367 }
2368
2369 #[test]
2370 fn does_not_flag_sign_indicators() {
2371 let issues = run("SELECT 1, +2, -4");
2372 assert!(
2374 issues.is_empty(),
2375 "unary signs should not be flagged: {issues:?}"
2376 );
2377 }
2378
2379 #[test]
2380 fn does_not_flag_newline_operator() {
2381 assert!(run("SELECT 1\n+ 2").is_empty());
2382 assert!(run("SELECT 1\n + 2").is_empty());
2383 }
2384
2385 #[test]
2388 fn flags_space_before_comma() {
2389 let sql = "SELECT 1 ,4";
2390 let issues = run(sql);
2391 assert!(!issues.is_empty(), "should flag space before comma");
2392 let fixed = apply_all_issue_autofixes(sql, &issues);
2393 assert_eq!(fixed, "SELECT 1, 4");
2394 }
2395
2396 #[test]
2397 fn flags_no_space_after_comma() {
2398 let sql = "SELECT 1,4";
2399 let issues = run(sql);
2400 assert!(!issues.is_empty(), "should flag missing space after comma");
2401 let fixed = apply_all_issue_autofixes(sql, &issues);
2402 assert_eq!(fixed, "SELECT 1, 4");
2403 }
2404
2405 #[test]
2406 fn flags_excessive_space_after_comma() {
2407 let sql = "SELECT 1, 4";
2408 let issues = run(sql);
2409 assert!(
2410 !issues.is_empty(),
2411 "should flag excessive space after comma"
2412 );
2413 let fixed = apply_all_issue_autofixes(sql, &issues);
2414 assert_eq!(fixed, "SELECT 1, 4");
2415 }
2416
2417 #[test]
2420 fn flags_missing_space_before_paren_after_keyword() {
2421 let sql = "SELECT * FROM(SELECT 1 AS C1)AS T1;";
2422 let issues = run(sql);
2423 assert!(!issues.is_empty(), "should flag FROM( and )AS: {issues:?}");
2424 let fixed = apply_all_issue_autofixes(sql, &issues);
2425 assert_eq!(fixed, "SELECT * FROM (SELECT 1 AS C1) AS T1;");
2426 }
2427
2428 #[test]
2431 fn flags_cte_missing_space_after_as() {
2432 let sql = "WITH a AS(select 1) select * from a";
2433 let issues = run(sql);
2434 assert!(!issues.is_empty(), "should flag AS(");
2435 let fixed = apply_all_issue_autofixes(sql, &issues);
2436 assert_eq!(fixed, "WITH a AS (select 1) select * from a");
2437 }
2438
2439 #[test]
2440 fn flags_cte_multiple_spaces_after_as() {
2441 let sql = "WITH a AS (select 1) select * from a";
2442 let issues = run(sql);
2443 assert!(!issues.is_empty(), "should flag AS (");
2444 let fixed = apply_all_issue_autofixes(sql, &issues);
2445 assert_eq!(fixed, "WITH a AS (select 1) select * from a");
2446 }
2447
2448 #[test]
2449 fn flags_missing_space_after_using() {
2450 let sql = "select * from a JOIN b USING(x)";
2451 let issues = run(sql);
2452 assert!(!issues.is_empty(), "should flag USING(");
2453 let fixed = apply_all_issue_autofixes(sql, &issues);
2454 assert_eq!(fixed, "select * from a JOIN b USING (x)");
2455 }
2456
2457 #[test]
2460 fn flags_excessive_whitespace() {
2461 let sql = "SELECT 1";
2462 let issues = run(sql);
2463 assert!(!issues.is_empty(), "should flag excessive whitespace");
2464 let fixed = apply_all_issue_autofixes(sql, &issues);
2465 assert_eq!(fixed, "SELECT 1");
2466 }
2467
2468 #[test]
2469 fn flags_excessive_whitespace_multi() {
2470 let sql = "select\n 1 + 2 + 3 + 4 -- Comment\nfrom foo";
2471 let issues = run(sql);
2472 assert!(!issues.is_empty());
2473 let fixed = apply_all_issue_autofixes(sql, &issues);
2474 assert_eq!(
2475 fixed,
2476 "select\n 1 + 2 + 3 + 4 -- Comment\nfrom foo"
2477 );
2478 }
2479
2480 #[test]
2483 fn flags_literal_operator_spacing() {
2484 let sql = "SELECT ('foo'||'bar') as buzz";
2485 let issues = run(sql);
2486 assert!(
2487 !issues.is_empty(),
2488 "should flag compact || operator: {issues:?}"
2489 );
2490 let fixed = apply_all_issue_autofixes(sql, &issues);
2491 assert_eq!(fixed, "SELECT ('foo' || 'bar') as buzz");
2492 }
2493
2494 #[test]
2495 fn flags_literal_as_spacing() {
2496 let sql = "SELECT\n 'foo'AS bar\nFROM foo";
2497 let issues = run(sql);
2498 assert!(!issues.is_empty());
2499 let fixed = apply_all_issue_autofixes(sql, &issues);
2500 assert_eq!(fixed, "SELECT\n 'foo' AS bar\nFROM foo");
2501 }
2502
2503 #[test]
2504 fn flags_ansi_national_string_literal_spacing() {
2505 let sql = "SELECT a + N'b' + N'c' FROM tbl;";
2506 let issues = run_with_dialect(sql, Dialect::Ansi);
2507 assert!(!issues.is_empty());
2508 let fixed = apply_all_issue_autofixes(sql, &issues);
2509 assert_eq!(fixed, "SELECT a + N 'b' + N 'c' FROM tbl;");
2510 }
2511
2512 #[test]
2515 fn does_not_flag_function_call() {
2516 assert!(run("SELECT foo(5) FROM T1;").is_empty());
2517 assert!(run("SELECT COUNT(*) FROM tbl\n\n").is_empty());
2518 }
2519
2520 #[test]
2523 fn flags_spaced_cast_operator() {
2524 let sql = "SELECT '1' :: INT;";
2525 let issues = run(sql);
2526 assert!(!issues.is_empty(), "should flag space around ::");
2527 let fixed = apply_all_issue_autofixes(sql, &issues);
2528 assert_eq!(fixed, "SELECT '1'::INT;");
2529 }
2530
2531 #[test]
2534 fn flags_compact_json_arrow_operator() {
2535 let sql = "SELECT payload->>'id' FROM t";
2536 let issues = run(sql);
2537 assert!(
2538 issues.len() >= 2,
2539 "should flag 2+ violations for compact json-arrow"
2540 );
2541 assert!(
2542 issues
2543 .iter()
2544 .all(|issue| issue.autofix.as_ref().is_some_and(
2545 |autofix| autofix.applicability == IssueAutofixApplicability::Safe
2546 )),
2547 "expected safe autofix metadata"
2548 );
2549
2550 let fixed = apply_all_issue_autofixes(sql, &issues);
2551 assert_eq!(fixed, "SELECT payload ->> 'id' FROM t");
2552 }
2553
2554 #[test]
2555 fn does_not_flag_exists_without_space_before_parenthesis() {
2556 let no_space = "SELECT\n EXISTS(\n SELECT 1\n ) AS has_row\nFROM t";
2557 assert!(run(no_space).is_empty());
2558 }
2559
2560 #[test]
2561 fn flags_space_before_exists_parenthesis_in_select_list() {
2562 let sql = "SELECT 1,\n EXISTS (\n SELECT 1\n ) AS has_row\nFROM t";
2563 let issues = run(sql);
2564 assert!(
2565 !issues.is_empty(),
2566 "expected EXISTS-space violation in select list"
2567 );
2568 let fixed = apply_all_issue_autofixes(sql, &issues);
2569 assert!(
2570 fixed.contains("EXISTS(\n"),
2571 "expected EXISTS( after fix, got: {fixed}"
2572 );
2573 }
2574
2575 #[test]
2576 fn requires_space_before_exists_parenthesis_after_where() {
2577 let sql = "SELECT 1\nWHERE EXISTS(\n SELECT 1\n)";
2578 let issues = run(sql);
2579 assert!(
2580 !issues.is_empty(),
2581 "expected missing-space violation for WHERE EXISTS("
2582 );
2583 let fixed = apply_all_issue_autofixes(sql, &issues);
2584 assert!(
2585 fixed.contains("WHERE EXISTS (\n"),
2586 "expected WHERE EXISTS ( after fix, got: {fixed}"
2587 );
2588 }
2589
2590 #[test]
2591 fn merge_violations_prefers_fixable_duplicate_span() {
2592 let mut violations = vec![
2593 ((10, 10), Vec::new()),
2594 ((10, 10), vec![(10, 10, " ".to_string())]),
2595 ];
2596 merge_violations_by_span(&mut violations);
2597 assert_eq!(violations.len(), 1);
2598 assert_eq!(violations[0].0, (10, 10));
2599 assert_eq!(violations[0].1, vec![(10, 10, " ".to_string())]);
2600 }
2601
2602 #[test]
2605 fn does_not_flag_spacing_patterns_inside_literals_or_comments() {
2606 let issues = run("SELECT 'payload->>''id''' AS txt -- EXISTS (\nFROM t");
2607 assert!(
2608 issues.is_empty(),
2609 "should not flag content inside literals/comments: {issues:?}"
2610 );
2611 }
2612
2613 #[test]
2614 fn does_not_flag_correct_comma_spacing() {
2615 assert!(run("SELECT 1, 4").is_empty());
2616 }
2617
2618 #[test]
2619 fn does_not_flag_correct_cast() {
2620 assert!(run("SELECT '1'::INT;").is_empty());
2621 }
2622
2623 #[test]
2624 fn does_not_flag_qualified_identifiers() {
2625 assert!(run("SELECT a.b FROM c.d").is_empty());
2627 }
2628
2629 #[test]
2630 fn does_not_flag_newline_after_using() {
2631 assert!(
2632 run("select * from a JOIN b USING\n(x)").is_empty(),
2633 "newline between USING and ( should be acceptable"
2634 );
2635 }
2636
2637 #[test]
2638 fn flags_cte_newline_after_as() {
2639 let sql = "WITH a AS\n(\n select 1\n)\nselect * from a";
2640 let issues = run(sql);
2641 assert!(!issues.is_empty(), "should flag AS + newline + (");
2642 let fixed = apply_all_issue_autofixes(sql, &issues);
2643 assert_eq!(fixed, "WITH a AS (\n select 1\n)\nselect * from a");
2644 }
2645
2646 #[test]
2647 fn flags_cte_newline_and_spaces_after_as() {
2648 let sql = "WITH a AS\n\n\n (\n select 1\n)\nselect * from a";
2649 let issues = run(sql);
2650 assert!(!issues.is_empty());
2651 let fixed = apply_all_issue_autofixes(sql, &issues);
2652 assert_eq!(fixed, "WITH a AS (\n select 1\n)\nselect * from a");
2653 }
2654
2655 #[test]
2656 fn does_not_flag_comment_after_as() {
2657 assert!(
2659 run("WITH\na AS -- comment\n(\nselect 1\n)\nselect * from a").is_empty(),
2660 "comment between AS and ( should be acceptable"
2661 );
2662 }
2663
2664 #[test]
2665 fn insert_into_table_paren_allows_space() {
2666 let issues = run("INSERT INTO metrics.cold_start_daily (\n workspace_id\n) SELECT 1");
2668 let lt01 = issues
2669 .iter()
2670 .filter(|i| i.code == "LT01")
2671 .collect::<Vec<_>>();
2672 assert!(
2673 lt01.is_empty(),
2674 "INSERT INTO table ( should not flag LT01, got: {lt01:?}"
2675 );
2676 }
2677
2678 #[test]
2679 fn insert_into_table_paren_with_cte() {
2680 let sql = "WITH starts AS (\n SELECT 1\n)\nINSERT INTO metrics.cold_start_daily (\n workspace_id\n) SELECT workspace_id FROM starts";
2682 let issues = run_with_dialect(sql, Dialect::Postgres);
2683 let lt01 = issues
2684 .iter()
2685 .filter(|i| i.code == "LT01")
2686 .collect::<Vec<_>>();
2687 assert!(
2688 lt01.is_empty(),
2689 "INSERT INTO table ( with CTE should not flag LT01, got: {lt01:?}"
2690 );
2691 }
2692
2693 #[test]
2694 fn insert_into_table_paren_on_conflict() {
2695 let sql = "\
2697WITH cte AS (
2698 SELECT workspace_id
2699 FROM ledger.query_history
2700 WHERE start_time >= $1
2701)
2702
2703INSERT INTO metrics.cold_start_daily (
2704 workspace_id
2705)
2706SELECT workspace_id
2707FROM cte
2708ON CONFLICT (workspace_id) DO UPDATE
2709 SET workspace_id = excluded.workspace_id";
2710 let issues = run_statementless_with_dialect(sql, Dialect::Postgres);
2711 let lt01 = issues
2712 .iter()
2713 .filter(|i| i.code == "LT01")
2714 .collect::<Vec<_>>();
2715 assert!(
2716 lt01.is_empty(),
2717 "INSERT INTO table ( with ON CONFLICT should not flag LT01, got: {lt01:?}"
2718 );
2719 }
2720}