1use crate::linter::config::LintConfig;
8use crate::linter::rule::{LintContext, LintRule};
9use crate::types::{issue_codes, Dialect, Issue, IssueAutofixApplicability, IssuePatchEdit};
10use sqlparser::ast::Statement;
11use sqlparser::keywords::Keyword;
12use sqlparser::tokenizer::{Location, Span, Token, TokenWithSpan, Tokenizer, Whitespace};
13use std::collections::HashSet;
14
15pub struct LayoutSpacing {
16 ignore_templated_areas: bool,
17 align_alias_expression: bool,
18 align_data_type: bool,
19 align_column_constraint: bool,
20 align_with_tabs: bool,
21 tab_space_size: usize,
22}
23
24impl LayoutSpacing {
25 pub fn from_config(config: &LintConfig) -> Self {
26 let spacing_before_align = |type_name: &str| {
27 config
28 .config_section_object("layout.keyword_newline")
29 .and_then(|layout| layout.get(type_name))
30 .and_then(serde_json::Value::as_object)
31 .and_then(|entry| entry.get("spacing_before"))
32 .and_then(serde_json::Value::as_str)
33 .is_some_and(|value| value.to_ascii_lowercase().starts_with("align"))
34 };
35
36 Self {
37 ignore_templated_areas: config
38 .core_option_bool("ignore_templated_areas")
39 .unwrap_or(true),
40 align_alias_expression: spacing_before_align("alias_expression"),
41 align_data_type: spacing_before_align("data_type"),
42 align_column_constraint: spacing_before_align("column_constraint_segment"),
43 align_with_tabs: config
44 .section_option_str("indentation", "indent_unit")
45 .or_else(|| config.section_option_str("rules", "indent_unit"))
46 .is_some_and(|value| value.eq_ignore_ascii_case("tab")),
47 tab_space_size: config
48 .section_option_usize("indentation", "tab_space_size")
49 .or_else(|| config.section_option_usize("rules", "tab_space_size"))
50 .unwrap_or(4)
51 .max(1),
52 }
53 }
54
55 fn alignment_options(&self) -> Lt01AlignmentOptions {
56 Lt01AlignmentOptions {
57 alias_expression: self.align_alias_expression,
58 data_type: self.align_data_type,
59 column_constraint: self.align_column_constraint,
60 align_with_tabs: self.align_with_tabs,
61 tab_space_size: self.tab_space_size,
62 }
63 }
64}
65
66impl Default for LayoutSpacing {
67 fn default() -> Self {
68 Self {
69 ignore_templated_areas: true,
70 align_alias_expression: false,
71 align_data_type: false,
72 align_column_constraint: false,
73 align_with_tabs: false,
74 tab_space_size: 4,
75 }
76 }
77}
78
79impl LintRule for LayoutSpacing {
80 fn code(&self) -> &'static str {
81 issue_codes::LINT_LT_001
82 }
83
84 fn name(&self) -> &'static str {
85 "Layout spacing"
86 }
87
88 fn description(&self) -> &'static str {
89 "Inappropriate Spacing."
90 }
91
92 fn check(&self, _statement: &Statement, ctx: &LintContext) -> Vec<Issue> {
93 let mut violations =
94 spacing_violations(ctx, self.ignore_templated_areas, self.alignment_options());
95 let has_remaining_non_whitespace = ctx.sql[ctx.statement_range.end..]
96 .chars()
97 .any(|ch| !ch.is_whitespace());
98 let parser_fragment_fallback = ctx.statement_index == 0
99 && ctx.statement_range.start == 0
100 && ctx.statement_range.end < ctx.sql.len()
101 && has_remaining_non_whitespace
102 && !ctx.statement_sql().trim_end().ends_with(';');
103 let template_fragment_fallback = ctx.statement_index == 0
104 && contains_template_marker(ctx.sql)
105 && (ctx.statement_range.start > 0 || ctx.statement_range.end < ctx.sql.len());
106 if parser_fragment_fallback || template_fragment_fallback {
107 let full_ctx = LintContext {
108 sql: ctx.sql,
109 statement_range: 0..ctx.sql.len(),
110 statement_index: 0,
111 };
112 violations.extend(spacing_violations(
113 &full_ctx,
114 self.ignore_templated_areas,
115 self.alignment_options(),
116 ));
117 merge_violations_by_span(&mut violations);
118 }
119
120 violations
121 .into_iter()
122 .map(|((start, end), edits)| {
123 let mut issue =
124 Issue::info(issue_codes::LINT_LT_001, "Inappropriate spacing found.")
125 .with_statement(ctx.statement_index)
126 .with_span(ctx.span_from_statement_offset(start, end));
127 if !edits.is_empty() {
128 let edits = edits
129 .into_iter()
130 .map(|(edit_start, edit_end, replacement)| {
131 IssuePatchEdit::new(
132 ctx.span_from_statement_offset(edit_start, edit_end),
133 replacement.to_string(),
134 )
135 })
136 .collect();
137 issue = issue.with_autofix_edits(IssueAutofixApplicability::Safe, edits);
138 }
139 issue
140 })
141 .collect()
142 }
143}
144
145type Lt01Span = (usize, usize);
146type Lt01AutofixEdit = (usize, usize, String);
147type Lt01Violation = (Lt01Span, Vec<Lt01AutofixEdit>);
148type Lt01TemplateSpan = (usize, usize);
149
150fn merge_violations_by_span(violations: &mut Vec<Lt01Violation>) {
151 violations.sort_unstable_by_key(|(span, _)| *span);
152 let mut merged: Vec<Lt01Violation> = Vec::with_capacity(violations.len());
153
154 for (span, edits) in violations.drain(..) {
155 if let Some((last_span, last_edits)) = merged.last_mut() {
156 if *last_span == span {
157 if last_edits.is_empty() && !edits.is_empty() {
158 *last_edits = edits;
159 } else if !last_edits.is_empty() && !edits.is_empty() {
160 for edit in edits {
161 if !last_edits.contains(&edit) {
162 last_edits.push(edit);
163 }
164 }
165 }
166 continue;
167 }
168 }
169
170 merged.push((span, edits));
171 }
172
173 *violations = merged;
174}
175
176#[derive(Clone, Copy)]
177struct Lt01AlignmentOptions {
178 alias_expression: bool,
179 data_type: bool,
180 column_constraint: bool,
181 align_with_tabs: bool,
182 tab_space_size: usize,
183}
184
185fn spacing_violations(
186 ctx: &LintContext,
187 ignore_templated_areas: bool,
188 alignment: Lt01AlignmentOptions,
189) -> Vec<Lt01Violation> {
190 let sql = ctx.statement_sql();
191 let mut violations = Vec::new();
192 let templated_spans = template_spans(sql);
193 let prefer_raw_template_tokens = ctx.is_templated() && contains_template_marker(sql);
194 let tokens = if prefer_raw_template_tokens {
195 tokenized(sql, ctx.dialect()).or_else(|| tokenized_for_context(ctx))
196 } else {
197 tokenized_for_context(ctx).or_else(|| tokenized(sql, ctx.dialect()))
198 };
199 let Some(tokens) = tokens else {
200 return violations;
201 };
202
203 let dialect = ctx.dialect();
204
205 collect_trailing_whitespace_violations(sql, &mut violations);
206 collect_pair_spacing_violations(sql, &tokens, dialect, &templated_spans, &mut violations);
207 collect_ansi_national_string_literal_violations(
208 sql,
209 &tokens,
210 dialect,
211 &templated_spans,
212 &mut violations,
213 );
214 if !ignore_templated_areas {
215 collect_template_string_spacing_violations(sql, dialect, &templated_spans, &mut violations);
216 }
217 collect_alignment_detection_violations(sql, alignment, &mut violations);
218
219 violations.sort_unstable_by_key(|(span, _)| *span);
220 violations.dedup_by_key(|(span, _)| *span);
221
222 violations
223}
224
225fn collect_trailing_whitespace_violations(sql: &str, violations: &mut Vec<Lt01Violation>) {
230 let mut offset = 0;
231 for line in sql.split('\n') {
232 let trimmed = line.trim_end_matches([' ', '\t']);
233 let trailing_start = offset + trimmed.len();
234 let trailing_end = offset + line.len();
235 if trailing_end > trailing_start {
236 let span = (trailing_start, trailing_end);
237 let edit = (trailing_start, trailing_end, String::new());
238 violations.push((span, vec![edit]));
239 }
240 offset += line.len() + 1; }
242}
243
244fn collect_alignment_detection_violations(
245 sql: &str,
246 alignment: Lt01AlignmentOptions,
247 violations: &mut Vec<Lt01Violation>,
248) {
249 if alignment.alias_expression {
250 collect_alias_alignment_detection(
251 sql,
252 alignment.tab_space_size,
253 alignment.align_with_tabs,
254 violations,
255 );
256 }
257 if alignment.data_type || alignment.column_constraint {
258 collect_create_table_alignment_detection(sql, alignment.tab_space_size, violations);
259 }
260}
261
262#[derive(Clone, Copy)]
263struct AliasAlignmentEntry {
264 as_start: usize,
265 visual_col: usize,
266 separator_uses_tabs: bool,
267}
268
269fn collect_alias_alignment_detection(
270 sql: &str,
271 tab_space_size: usize,
272 align_with_tabs: bool,
273 violations: &mut Vec<Lt01Violation>,
274) {
275 let lines: Vec<&str> = sql.split('\n').collect();
276 if lines.len() < 2 {
277 return;
278 }
279
280 let mut offset = 0usize;
281 let mut current_group: Vec<AliasAlignmentEntry> = Vec::new();
282
283 for line in &lines {
284 let lower = line.to_ascii_lowercase();
285 let alias_pos = lower.find(" as ");
286 let is_alias_line = alias_pos.is_some() && !lower.trim_start().starts_with("from ");
287
288 if is_alias_line {
289 let as_index = alias_pos.unwrap_or_default() + 1;
290 current_group.push(AliasAlignmentEntry {
291 as_start: offset + as_index,
292 visual_col: visual_width(&line[..as_index], tab_space_size),
293 separator_uses_tabs: alias_separator_uses_tabs(line, as_index),
294 });
295 } else if !current_group.is_empty() {
296 emit_alias_alignment_group(¤t_group, align_with_tabs, violations);
297 current_group.clear();
298 }
299
300 offset += line.len() + 1;
301 }
302
303 if !current_group.is_empty() {
304 emit_alias_alignment_group(¤t_group, align_with_tabs, violations);
305 }
306}
307
308fn alias_separator_uses_tabs(line: &str, as_index: usize) -> bool {
309 let prefix = &line[..as_index];
310 let separator_start = prefix
311 .char_indices()
312 .rev()
313 .find(|(_, ch)| !ch.is_whitespace())
314 .map(|(idx, ch)| idx + ch.len_utf8())
315 .unwrap_or(0);
316 let separator = &prefix[separator_start..];
317 !separator.is_empty() && separator.chars().all(|ch| ch == '\t')
318}
319
320fn emit_alias_alignment_group(
321 group: &[AliasAlignmentEntry],
322 align_with_tabs: bool,
323 violations: &mut Vec<Lt01Violation>,
324) {
325 if group.len() < 2 {
326 return;
327 }
328 let target_col = group
329 .iter()
330 .map(|entry| entry.visual_col)
331 .max()
332 .unwrap_or(0);
333 for entry in group {
334 if entry.visual_col != target_col || (align_with_tabs && !entry.separator_uses_tabs) {
335 let end = entry.as_start + 2;
336 violations.push(((entry.as_start, end), Vec::new()));
337 }
338 }
339}
340
341fn collect_create_table_alignment_detection(
342 sql: &str,
343 tab_space_size: usize,
344 violations: &mut Vec<Lt01Violation>,
345) {
346 let lines: Vec<&str> = sql.split('\n').collect();
347 let mut offset = 0usize;
348 let mut in_create_table = false;
349 let mut entries: Vec<(usize, usize)> = Vec::new();
350
351 for line in &lines {
352 let trimmed = line.trim_start();
353 let upper = trimmed.to_ascii_uppercase();
354 if !in_create_table && upper.starts_with("CREATE TABLE") {
355 in_create_table = true;
356 } else if in_create_table && (trimmed.starts_with(')') || trimmed.starts_with(';')) {
357 emit_create_table_alignment_group(&entries, violations);
358 entries.clear();
359 in_create_table = false;
360 }
361
362 if in_create_table
363 && !trimmed.is_empty()
364 && !trimmed.starts_with('(')
365 && !trimmed.starts_with(')')
366 && !trimmed.starts_with("--")
367 && !upper.starts_with("CREATE TABLE")
368 {
369 if let Some(data_type_start) = second_token_start(trimmed) {
370 let prefix_len = line.len() - trimmed.len();
371 let absolute = offset + prefix_len + data_type_start;
372 let visual = visual_width(&trimmed[..data_type_start], tab_space_size);
373 entries.push((absolute, visual));
374 }
375 }
376
377 offset += line.len() + 1;
378 }
379
380 if in_create_table && !entries.is_empty() {
381 emit_create_table_alignment_group(&entries, violations);
382 }
383}
384
385fn emit_create_table_alignment_group(
386 group: &[(usize, usize)],
387 violations: &mut Vec<Lt01Violation>,
388) {
389 if group.len() < 2 {
390 return;
391 }
392 let target_col = group.iter().map(|(_, col)| *col).max().unwrap_or(0);
393 for (start, col) in group {
394 if *col != target_col {
395 let end = *start + 1;
396 violations.push(((*start, end), Vec::new()));
397 }
398 }
399}
400
401fn second_token_start(line: &str) -> Option<usize> {
402 let mut seen_first = false;
403 let mut in_token = false;
404
405 for (index, ch) in line.char_indices() {
406 if ch.is_whitespace() {
407 if in_token {
408 in_token = false;
409 seen_first = true;
410 }
411 continue;
412 }
413
414 if seen_first && !in_token {
415 return Some(index);
416 }
417 in_token = true;
418 }
419 None
420}
421
422fn visual_width(text: &str, tab_space_size: usize) -> usize {
423 let mut width = 0usize;
424 for ch in text.chars() {
425 if ch == '\t' {
426 let next_tab = ((width / tab_space_size) + 1) * tab_space_size;
427 width = next_tab;
428 } else {
429 width += 1;
430 }
431 }
432 width
433}
434
435#[derive(Debug, Clone, Copy, PartialEq)]
441enum ExpectedSpacing {
442 Single,
444 None,
446 NoneInline,
448 Skip,
450 SingleInline,
452}
453
454fn collect_pair_spacing_violations(
455 sql: &str,
456 tokens: &[TokenWithSpan],
457 dialect: Dialect,
458 templated_spans: &[Lt01TemplateSpan],
459 violations: &mut Vec<Lt01Violation>,
460) {
461 let non_trivia: Vec<usize> = tokens
462 .iter()
463 .enumerate()
464 .filter(|(_, t)| !is_trivia_token(&t.token) && !matches!(t.token, Token::EOF))
465 .map(|(i, _)| i)
466 .collect();
467 let type_angle_tokens = if supports_type_angle_spacing(dialect) {
468 type_angle_token_indices(tokens, &non_trivia)
469 } else {
470 HashSet::new()
471 };
472 let snowflake_pattern_tokens = if dialect == Dialect::Snowflake {
473 snowflake_pattern_token_indices(tokens, &non_trivia)
474 } else {
475 HashSet::new()
476 };
477
478 for window in non_trivia.windows(2) {
479 let left_idx = window[0];
480 let right_idx = window[1];
481 if dialect == Dialect::Snowflake
482 && (snowflake_pattern_tokens.contains(&left_idx)
483 || snowflake_pattern_tokens.contains(&right_idx))
484 {
485 continue;
486 }
487 let left = &tokens[left_idx];
488 let right = &tokens[right_idx];
489
490 let Some((left_start, left_end)) = token_offsets(sql, left) else {
491 continue;
492 };
493 let Some((right_start, _)) = token_offsets(sql, right) else {
494 continue;
495 };
496
497 if left_end > right_start || right_start > sql.len() || left_end > sql.len() {
498 continue;
499 }
500 if overlaps_template_span(templated_spans, left_start, right_start) {
501 continue;
502 }
503
504 let gap = &sql[left_end..right_start];
505 let has_newline = gap.contains('\n') || gap.contains('\r');
506 let has_comment = has_comment_between(tokens, left_idx, right_idx);
507
508 let expected = if supports_type_angle_spacing(dialect)
509 && is_type_angle_spacing_pair(left, right, left_idx, right_idx, &type_angle_tokens)
510 {
511 ExpectedSpacing::None
512 } else {
513 expected_spacing(left, right, tokens, left_idx, right_idx, dialect)
514 };
515
516 match expected {
517 ExpectedSpacing::Skip => continue,
518 ExpectedSpacing::None => {
519 if !gap.is_empty() && !has_newline && !has_comment {
521 let span = (left_end, right_start);
522 let edit = (left_end, right_start, String::new());
523 violations.push((span, vec![edit]));
524 }
525 }
526 ExpectedSpacing::NoneInline => {
527 if !gap.is_empty() && !has_comment {
528 let span = (left_end, right_start);
529 let edit = (left_end, right_start, String::new());
530 violations.push((span, vec![edit]));
531 }
532 }
533 ExpectedSpacing::Single => {
534 if has_comment {
535 continue;
536 }
537 if has_newline {
538 continue;
542 }
543 if gap == " " {
544 continue;
546 }
547 if gap.is_empty() && matches!(left.token, Token::Comma) {
548 let replacement = format!("{} ", &sql[left_start..left_end]);
552 let span = (left_start, left_end);
553 let edit = (left_start, left_end, replacement);
554 violations.push((span, vec![edit]));
555 continue;
556 }
557 if gap.is_empty() && is_exists_keyword_token(&left.token) {
558 let replacement = format!("{} ", &sql[left_start..left_end]);
561 let span = (left_start, left_end);
562 let edit = (left_start, left_end, replacement);
563 violations.push((span, vec![edit]));
564 continue;
565 }
566 let span = (left_end, right_start);
568 let edit = (left_end, right_start, " ".to_string());
569 violations.push((span, vec![edit]));
570 }
571 ExpectedSpacing::SingleInline => {
572 if has_comment {
573 continue;
574 }
575 if gap == " " {
576 continue;
577 }
578 let span = (left_end, right_start);
580 let edit = (left_end, right_start, " ".to_string());
581 violations.push((span, vec![edit]));
582 }
583 }
584 }
585}
586
587fn expected_spacing(
589 left: &TokenWithSpan,
590 right: &TokenWithSpan,
591 tokens: &[TokenWithSpan],
592 left_idx: usize,
593 right_idx: usize,
594 dialect: Dialect,
595) -> ExpectedSpacing {
596 if matches!(left.token, Token::Period) || matches!(right.token, Token::Period) {
598 return ExpectedSpacing::NoneInline;
599 }
600
601 if matches!(left.token, Token::DoubleColon) || matches!(right.token, Token::DoubleColon) {
603 return ExpectedSpacing::NoneInline;
604 }
605
606 if dialect == Dialect::Snowflake
608 && (matches!(left.token, Token::Colon) || matches!(right.token, Token::Colon))
609 {
610 return ExpectedSpacing::NoneInline;
612 }
613
614 if is_split_compound_comparison_pair(left, right) {
616 return ExpectedSpacing::NoneInline;
617 }
618
619 if dialect == Dialect::Mssql && is_tsql_compound_assignment_pair(left, right) {
621 return ExpectedSpacing::NoneInline;
622 }
623
624 if matches!(right.token, Token::LParen) {
626 return expected_spacing_before_lparen(left, tokens, left_idx, dialect);
627 }
628
629 if matches!(left.token, Token::RParen) {
631 return expected_spacing_after_rparen(right, tokens, right_idx);
632 }
633
634 if matches!(right.token, Token::LBracket) {
636 if is_type_keyword_for_bracket(&left.token) {
638 return ExpectedSpacing::Single;
639 }
640 return ExpectedSpacing::None;
641 }
642
643 if matches!(left.token, Token::RBracket) {
645 if matches!(
647 right.token,
648 Token::DoubleColon | Token::Period | Token::LBracket | Token::RParen
649 ) {
650 return ExpectedSpacing::None;
651 }
652 return ExpectedSpacing::Single;
653 }
654
655 if matches!(right.token, Token::Comma) {
657 return ExpectedSpacing::None;
658 }
659 if matches!(left.token, Token::Comma) {
660 return ExpectedSpacing::Single;
661 }
662
663 if matches!(right.token, Token::SemiColon) {
665 return ExpectedSpacing::Skip;
666 }
667 if matches!(left.token, Token::SemiColon) {
668 return ExpectedSpacing::Skip;
669 }
670
671 if matches!(left.token, Token::LParen) {
673 return ExpectedSpacing::None;
674 }
675 if matches!(right.token, Token::RParen) {
676 return ExpectedSpacing::None;
677 }
678
679 if dialect == Dialect::Bigquery
681 && is_bigquery_hyphenated_identifier_pair(left, right, tokens, left_idx, right_idx)
682 {
683 return ExpectedSpacing::None;
684 }
685
686 if is_filesystem_path_pair(left, right, tokens, left_idx, right_idx, dialect) {
687 return ExpectedSpacing::NoneInline;
688 }
689
690 if is_binary_operator(&left.token) || is_binary_operator(&right.token) {
692 if is_unary_operator_pair(left, right, tokens, left_idx) {
694 return ExpectedSpacing::Skip;
695 }
696 return ExpectedSpacing::Single;
697 }
698
699 if is_comparison_operator(&left.token) || is_comparison_operator(&right.token) {
701 if dialect == Dialect::Mssql
702 && is_tsql_assignment_rhs_pair(left, right, tokens, left_idx, right_idx)
703 {
704 return ExpectedSpacing::Single;
705 }
706 return ExpectedSpacing::Single;
707 }
708
709 if is_json_operator(&left.token) || is_json_operator(&right.token) {
711 return ExpectedSpacing::Single;
712 }
713
714 if matches!(left.token, Token::Mul) || matches!(right.token, Token::Mul) {
716 return ExpectedSpacing::Skip;
718 }
719
720 if is_word_like(&left.token) && is_word_like(&right.token) {
722 return ExpectedSpacing::Single;
723 }
724
725 if (is_word_like(&left.token) && is_literal(&right.token))
727 || (is_literal(&left.token) && is_word_like(&right.token))
728 {
729 return ExpectedSpacing::Single;
730 }
731
732 if is_literal(&left.token) && is_literal(&right.token) {
734 return ExpectedSpacing::Single;
735 }
736
737 if (matches!(left.token, Token::Number(_, _)) && is_word_like(&right.token))
739 || (is_word_like(&left.token) && matches!(right.token, Token::Number(_, _)))
740 {
741 return ExpectedSpacing::Single;
742 }
743
744 ExpectedSpacing::Skip
745}
746
747fn is_binary_operator(token: &Token) -> bool {
752 matches!(
753 token,
754 Token::Plus
755 | Token::Minus
756 | Token::Div
757 | Token::Mod
758 | Token::StringConcat
759 | Token::Ampersand
760 | Token::Pipe
761 | Token::Caret
762 | Token::ShiftLeft
763 | Token::ShiftRight
764 | Token::Assignment
765 )
766}
767
768fn is_comparison_operator(token: &Token) -> bool {
769 matches!(
770 token,
771 Token::Eq
772 | Token::Neq
773 | Token::Lt
774 | Token::Gt
775 | Token::LtEq
776 | Token::GtEq
777 | Token::Spaceship
778 | Token::DoubleEq
779 | Token::TildeEqual
780 )
781}
782
783fn is_split_compound_comparison_pair(left: &TokenWithSpan, right: &TokenWithSpan) -> bool {
784 matches!(
785 (&left.token, &right.token),
786 (Token::Gt, Token::Eq)
787 | (Token::Lt, Token::Eq)
788 | (Token::Lt, Token::Gt)
789 | (Token::Neq, Token::Eq)
790 )
791}
792
793fn is_assignment_operator_token(token: &Token) -> bool {
794 matches!(
795 token,
796 Token::Plus
797 | Token::Minus
798 | Token::Mul
799 | Token::Div
800 | Token::Mod
801 | Token::Ampersand
802 | Token::Pipe
803 | Token::Caret
804 )
805}
806
807fn is_tsql_compound_assignment_pair(left: &TokenWithSpan, right: &TokenWithSpan) -> bool {
808 matches!(right.token, Token::Eq) && is_assignment_operator_token(&left.token)
809}
810
811fn is_tsql_assignment_rhs_pair(
812 left: &TokenWithSpan,
813 _right: &TokenWithSpan,
814 tokens: &[TokenWithSpan],
815 left_idx: usize,
816 _right_idx: usize,
817) -> bool {
818 if !matches!(left.token, Token::Eq) {
819 return false;
820 }
821 prev_non_trivia_index(tokens, left_idx)
822 .map(|index| is_assignment_operator_token(&tokens[index].token))
823 .unwrap_or(false)
824}
825
826fn is_json_operator(token: &Token) -> bool {
827 matches!(
828 token,
829 Token::Arrow
830 | Token::LongArrow
831 | Token::HashArrow
832 | Token::HashLongArrow
833 | Token::AtArrow
834 | Token::ArrowAt
835 )
836}
837
838fn is_word_like(token: &Token) -> bool {
839 matches!(token, Token::Word(_) | Token::Placeholder(_))
840}
841
842fn is_literal(token: &Token) -> bool {
843 matches!(
844 token,
845 Token::SingleQuotedString(_)
846 | Token::DoubleQuotedString(_)
847 | Token::TripleSingleQuotedString(_)
848 | Token::TripleDoubleQuotedString(_)
849 | Token::NationalStringLiteral(_)
850 | Token::EscapedStringLiteral(_)
851 | Token::UnicodeStringLiteral(_)
852 | Token::HexStringLiteral(_)
853 | Token::SingleQuotedByteStringLiteral(_)
854 | Token::DoubleQuotedByteStringLiteral(_)
855 | Token::Number(_, _)
856 )
857}
858
859fn is_type_keyword_for_bracket(token: &Token) -> bool {
860 if let Token::Word(w) = token {
861 if w.quote_style.is_some() {
862 return false;
863 }
864 matches!(
865 w.value.to_ascii_uppercase().as_str(),
866 "TEXT"
867 | "UUID"
868 | "INT"
869 | "INTEGER"
870 | "BIGINT"
871 | "SMALLINT"
872 | "VARCHAR"
873 | "CHAR"
874 | "BOOLEAN"
875 | "BOOL"
876 | "NUMERIC"
877 | "DECIMAL"
878 | "FLOAT"
879 | "DOUBLE"
880 | "DATE"
881 | "TIME"
882 | "TIMESTAMP"
883 | "INTERVAL"
884 | "JSONB"
885 | "JSON"
886 | "BYTEA"
887 | "REAL"
888 | "SERIAL"
889 | "BIGSERIAL"
890 | "INET"
891 | "CIDR"
892 | "MACADDR"
893 )
894 } else {
895 false
896 }
897}
898
899fn is_exists_keyword_token(token: &Token) -> bool {
900 matches!(token, Token::Word(word) if word.keyword == Keyword::EXISTS)
901}
902
903fn is_ddl_object_keyword(token: &Token) -> bool {
906 if let Token::Word(w) = token {
907 matches!(
908 w.keyword,
909 Keyword::TABLE
910 | Keyword::VIEW
911 | Keyword::INDEX
912 | Keyword::FUNCTION
913 | Keyword::PROCEDURE
914 | Keyword::TRIGGER
915 | Keyword::SEQUENCE
916 | Keyword::TYPE
917 | Keyword::SCHEMA
918 | Keyword::DATABASE
919 )
920 } else {
921 false
922 }
923}
924
925fn is_qualified_ddl_object_name(tokens: &[TokenWithSpan], word_index: usize) -> bool {
926 let mut cursor = word_index;
927
928 loop {
929 let Some(prev_idx) = prev_non_trivia_index(tokens, cursor) else {
930 return false;
931 };
932
933 if matches!(tokens[prev_idx].token, Token::Period) {
934 let Some(prev_word_idx) = prev_non_trivia_index(tokens, prev_idx) else {
935 return false;
936 };
937 if !is_word_like(&tokens[prev_word_idx].token) {
938 return false;
939 }
940 cursor = prev_word_idx;
941 continue;
942 }
943
944 if !is_ddl_object_keyword(&tokens[prev_idx].token) {
945 return false;
946 }
947 return is_ddl_object_definition_context(tokens, prev_idx);
948 }
949}
950
951fn is_reference_target_name(tokens: &[TokenWithSpan], word_index: usize) -> bool {
952 let mut cursor = word_index;
953
954 loop {
955 let Some(prev_idx) = prev_non_trivia_index(tokens, cursor) else {
956 return false;
957 };
958
959 if matches!(tokens[prev_idx].token, Token::Period) {
960 let Some(prev_word_idx) = prev_non_trivia_index(tokens, prev_idx) else {
961 return false;
962 };
963 if !is_word_like(&tokens[prev_word_idx].token) {
964 return false;
965 }
966 cursor = prev_word_idx;
967 continue;
968 }
969
970 let Token::Word(prev_word) = &tokens[prev_idx].token else {
971 return false;
972 };
973
974 return prev_word.keyword == Keyword::REFERENCES;
975 }
976}
977
978fn is_copy_into_target_name(tokens: &[TokenWithSpan], word_index: usize) -> bool {
979 let mut cursor = word_index;
980 let mut steps = 0usize;
981
982 while let Some(prev_idx) = prev_non_trivia_index(tokens, cursor) {
983 match &tokens[prev_idx].token {
984 Token::Word(word) if word.keyword == Keyword::INTO => {
985 let Some(copy_idx) = prev_non_trivia_index(tokens, prev_idx) else {
986 return false;
987 };
988 return matches!(
989 &tokens[copy_idx].token,
990 Token::Word(copy_word) if copy_word.keyword == Keyword::COPY
991 );
992 }
993 Token::Word(word)
994 if matches!(
995 word.keyword,
996 Keyword::FROM
997 | Keyword::SELECT
998 | Keyword::WHERE
999 | Keyword::JOIN
1000 | Keyword::ON
1001 | Keyword::HAVING
1002 ) =>
1003 {
1004 return false;
1005 }
1006 Token::SemiColon | Token::Comma | Token::LParen | Token::RParen => return false,
1007 _ => {}
1008 }
1009
1010 cursor = prev_idx;
1011 steps += 1;
1012 if steps > 64 {
1013 return false;
1014 }
1015 }
1016
1017 false
1018}
1019
1020fn is_insert_into_target_name(tokens: &[TokenWithSpan], word_index: usize) -> bool {
1022 let mut cursor = word_index;
1023 let mut steps = 0usize;
1024
1025 while let Some(prev_idx) = prev_non_trivia_index(tokens, cursor) {
1026 match &tokens[prev_idx].token {
1027 Token::Word(word) if word.keyword == Keyword::INTO => {
1028 let Some(insert_idx) = prev_non_trivia_index(tokens, prev_idx) else {
1030 return false;
1031 };
1032 return matches!(
1033 &tokens[insert_idx].token,
1034 Token::Word(w) if w.keyword == Keyword::INSERT
1035 );
1036 }
1037 Token::Period => {}
1039 Token::Word(word) if word.quote_style.is_none() => {}
1042 _ => return false,
1043 }
1044
1045 cursor = prev_idx;
1046 steps += 1;
1047 if steps > 16 {
1048 return false;
1049 }
1050 }
1051
1052 false
1053}
1054
1055fn is_ddl_object_definition_context(tokens: &[TokenWithSpan], ddl_keyword_index: usize) -> bool {
1056 let Some(prev_idx) = prev_non_trivia_index(tokens, ddl_keyword_index) else {
1057 return false;
1058 };
1059 let Token::Word(prev_word) = &tokens[prev_idx].token else {
1060 return false;
1061 };
1062
1063 if matches!(
1064 prev_word.keyword,
1065 Keyword::CREATE | Keyword::ALTER | Keyword::DROP | Keyword::TRUNCATE
1066 ) {
1067 return true;
1068 }
1069
1070 if prev_word.keyword == Keyword::OR {
1071 if let Some(prev_prev_idx) = prev_non_trivia_index(tokens, prev_idx) {
1072 if let Token::Word(prev_prev_word) = &tokens[prev_prev_idx].token {
1073 return matches!(prev_prev_word.keyword, Keyword::CREATE | Keyword::ALTER);
1074 }
1075 }
1076 }
1077
1078 false
1079}
1080
1081fn is_unary_operator_pair(
1083 left: &TokenWithSpan,
1084 right: &TokenWithSpan,
1085 tokens: &[TokenWithSpan],
1086 left_idx: usize,
1087) -> bool {
1088 if matches!(right.token, Token::Plus | Token::Minus)
1090 && is_unary_prefix_context(&tokens[left_idx].token)
1091 {
1092 return true;
1093 }
1094 if matches!(left.token, Token::Plus | Token::Minus) {
1096 if let Some(prev_idx) = prev_non_trivia_index(tokens, left_idx) {
1097 if is_unary_prefix_context(&tokens[prev_idx].token) {
1098 return true;
1099 }
1100 } else {
1101 return true;
1103 }
1104 }
1105 false
1106}
1107
1108fn is_bigquery_hyphenated_identifier_pair(
1109 left: &TokenWithSpan,
1110 right: &TokenWithSpan,
1111 tokens: &[TokenWithSpan],
1112 left_idx: usize,
1113 right_idx: usize,
1114) -> bool {
1115 if matches!(right.token, Token::Minus) {
1116 if !matches!(left.token, Token::Word(_)) {
1117 return false;
1118 }
1119 let Some(next_word_idx) = next_non_trivia_index(tokens, right_idx + 1) else {
1120 return false;
1121 };
1122 if !matches!(tokens[next_word_idx].token, Token::Word(_)) {
1123 return false;
1124 }
1125 let Some(next_after_word_idx) = next_non_trivia_index(tokens, next_word_idx + 1) else {
1126 return false;
1127 };
1128 return matches!(tokens[next_after_word_idx].token, Token::Period);
1129 }
1130
1131 if matches!(left.token, Token::Minus) {
1132 if !matches!(right.token, Token::Word(_)) {
1133 return false;
1134 }
1135 let Some(prev_word_idx) = prev_non_trivia_index(tokens, left_idx) else {
1136 return false;
1137 };
1138 if !matches!(tokens[prev_word_idx].token, Token::Word(_)) {
1139 return false;
1140 }
1141 let Some(next_idx) = next_non_trivia_index(tokens, right_idx + 1) else {
1142 return false;
1143 };
1144 return matches!(tokens[next_idx].token, Token::Period);
1145 }
1146
1147 false
1148}
1149
1150fn is_filesystem_path_pair(
1151 left: &TokenWithSpan,
1152 right: &TokenWithSpan,
1153 tokens: &[TokenWithSpan],
1154 left_idx: usize,
1155 right_idx: usize,
1156 dialect: Dialect,
1157) -> bool {
1158 if !matches!(
1159 dialect,
1160 Dialect::Databricks | Dialect::Clickhouse | Dialect::Snowflake
1161 ) {
1162 return false;
1163 }
1164
1165 let div_index = if matches!(left.token, Token::Div) {
1166 Some(left_idx)
1167 } else if matches!(right.token, Token::Div) {
1168 let left_is_context_keyword = is_path_context_keyword_token(&left.token);
1169 let left_is_path_segment = prev_non_trivia_index(tokens, left_idx)
1170 .is_some_and(|idx| matches!(tokens[idx].token, Token::Div));
1171 if left_is_context_keyword && !left_is_path_segment {
1172 return false;
1173 }
1174 Some(right_idx)
1175 } else {
1176 None
1177 };
1178 let Some(div_index) = div_index else {
1179 return false;
1180 };
1181
1182 let prev_idx = prev_non_trivia_index(tokens, div_index);
1183 let next_idx = next_non_trivia_index(tokens, div_index + 1);
1184 let prev_ok = prev_idx.is_some_and(|idx| matches!(tokens[idx].token, Token::Word(_)));
1185 let next_ok = next_idx.is_some_and(|idx| matches!(tokens[idx].token, Token::Word(_)));
1186 if !(prev_ok || next_ok) {
1187 return false;
1188 }
1189
1190 if dialect == Dialect::Snowflake {
1191 return snowflake_stage_path_context_within(tokens, div_index, 12);
1192 }
1193
1194 path_context_keyword_within(tokens, div_index, 6)
1195}
1196
1197fn is_path_context_keyword_token(token: &Token) -> bool {
1198 let Token::Word(word) = token else {
1199 return false;
1200 };
1201 word.value.eq_ignore_ascii_case("JAR") || word.value.eq_ignore_ascii_case("MODEL")
1202}
1203
1204fn path_context_keyword_within(tokens: &[TokenWithSpan], from_idx: usize, limit: usize) -> bool {
1205 let mut cursor = from_idx;
1206 let mut steps = 0usize;
1207 while let Some(prev_idx) = prev_non_trivia_index(tokens, cursor) {
1208 if let Token::Word(word) = &tokens[prev_idx].token {
1209 if matches!(word.keyword, Keyword::JAR) {
1210 return true;
1211 }
1212 if word.value.eq_ignore_ascii_case("JAR") || word.value.eq_ignore_ascii_case("MODEL") {
1213 return true;
1214 }
1215 }
1216 cursor = prev_idx;
1217 steps += 1;
1218 if steps >= limit {
1219 break;
1220 }
1221 }
1222 false
1223}
1224
1225fn snowflake_stage_path_context_within(
1226 tokens: &[TokenWithSpan],
1227 from_idx: usize,
1228 limit: usize,
1229) -> bool {
1230 let mut cursor = from_idx;
1231 let mut steps = 0usize;
1232 while let Some(prev_idx) = prev_non_trivia_index(tokens, cursor) {
1233 match &tokens[prev_idx].token {
1234 Token::AtSign => return true,
1235 Token::Word(word) if word.value.starts_with('@') => return true,
1236 _ => {}
1237 }
1238 cursor = prev_idx;
1239 steps += 1;
1240 if steps >= limit {
1241 break;
1242 }
1243 }
1244 false
1245}
1246
1247fn is_unary_prefix_context(token: &Token) -> bool {
1249 if matches!(
1250 token,
1251 Token::Comma
1252 | Token::LParen
1253 | Token::Eq
1254 | Token::Neq
1255 | Token::Lt
1256 | Token::Gt
1257 | Token::LtEq
1258 | Token::GtEq
1259 ) {
1260 return true;
1261 }
1262 if let Token::Word(w) = token {
1263 if matches!(
1264 w.keyword,
1265 Keyword::SELECT
1266 | Keyword::WHERE
1267 | Keyword::WHEN
1268 | Keyword::THEN
1269 | Keyword::ELSE
1270 | Keyword::AND
1271 | Keyword::OR
1272 | Keyword::ON
1273 | Keyword::SET
1274 | Keyword::CASE
1275 | Keyword::BETWEEN
1276 | Keyword::IN
1277 | Keyword::VALUES
1278 | Keyword::INTERVAL
1279 | Keyword::YEAR
1280 | Keyword::MONTH
1281 | Keyword::DAY
1282 | Keyword::HOUR
1283 | Keyword::MINUTE
1284 | Keyword::SECOND
1285 | Keyword::RETURN
1286 | Keyword::RETURNS
1287 ) {
1288 return true;
1289 }
1290 }
1291 false
1292}
1293
1294fn expected_spacing_before_lparen(
1296 left: &TokenWithSpan,
1297 tokens: &[TokenWithSpan],
1298 left_idx: usize,
1299 dialect: Dialect,
1300) -> ExpectedSpacing {
1301 match &left.token {
1302 Token::Word(w) if w.quote_style.is_none() => {
1304 if dialect == Dialect::Snowflake {
1305 if w.value.eq_ignore_ascii_case("MATCH_RECOGNIZE")
1306 || w.value.eq_ignore_ascii_case("PATTERN")
1307 {
1308 return ExpectedSpacing::Single;
1309 }
1310 if w.value.eq_ignore_ascii_case("MATCH_CONDITION") {
1311 return ExpectedSpacing::NoneInline;
1312 }
1313 }
1314 if w.value.eq_ignore_ascii_case("EXISTS") {
1315 if exists_requires_space_before_lparen(tokens, left_idx) {
1316 return ExpectedSpacing::Single;
1317 }
1318 return ExpectedSpacing::NoneInline;
1319 }
1320 if is_keyword_requiring_space_before_paren(w.keyword) {
1322 if matches!(w.keyword, Keyword::AS) {
1325 return ExpectedSpacing::SingleInline;
1326 }
1327 return ExpectedSpacing::Single;
1328 }
1329 if is_insert_into_target_name(tokens, left_idx) {
1333 return ExpectedSpacing::Single;
1334 }
1335 if w.keyword == Keyword::NoKeyword {
1338 if is_reference_target_name(tokens, left_idx) {
1339 return ExpectedSpacing::Single;
1340 }
1341 if is_copy_into_target_name(tokens, left_idx) {
1342 return ExpectedSpacing::Single;
1343 }
1344 if is_qualified_ddl_object_name(tokens, left_idx) {
1345 return ExpectedSpacing::Skip;
1346 }
1347 }
1348 ExpectedSpacing::NoneInline
1350 }
1351 Token::RParen | Token::RBracket => ExpectedSpacing::Single,
1353 _ if is_literal(&left.token) => ExpectedSpacing::Single,
1355 Token::Number(_, _) => ExpectedSpacing::None,
1357 Token::Comma => ExpectedSpacing::Single,
1359 _ if is_binary_operator(&left.token) || is_comparison_operator(&left.token) => {
1361 ExpectedSpacing::Skip
1362 }
1363 _ => ExpectedSpacing::Skip,
1364 }
1365}
1366
1367fn exists_requires_space_before_lparen(tokens: &[TokenWithSpan], left_idx: usize) -> bool {
1368 let Some(prev_idx) = prev_non_trivia_index(tokens, left_idx) else {
1369 return false;
1370 };
1371
1372 match &tokens[prev_idx].token {
1373 Token::Word(word) => {
1374 matches!(
1375 word.keyword,
1376 Keyword::AND
1377 | Keyword::OR
1378 | Keyword::NOT
1379 | Keyword::WHERE
1380 | Keyword::HAVING
1381 | Keyword::WHEN
1382 | Keyword::THEN
1383 | Keyword::ELSE
1384 ) || matches!(
1385 word.value.to_ascii_uppercase().as_str(),
1386 "AND" | "OR" | "NOT" | "WHERE" | "HAVING" | "WHEN" | "THEN" | "ELSE"
1387 )
1388 }
1389 Token::RParen
1390 | Token::LParen
1391 | Token::Eq
1392 | Token::Neq
1393 | Token::Lt
1394 | Token::Gt
1395 | Token::LtEq
1396 | Token::GtEq => true,
1397 _ => false,
1398 }
1399}
1400
1401fn is_keyword_requiring_space_before_paren(keyword: Keyword) -> bool {
1403 matches!(
1404 keyword,
1405 Keyword::AS
1406 | Keyword::USING
1407 | Keyword::FROM
1408 | Keyword::JOIN
1409 | Keyword::ON
1410 | Keyword::WHERE
1411 | Keyword::IN
1412 | Keyword::BETWEEN
1413 | Keyword::WHEN
1414 | Keyword::THEN
1415 | Keyword::ELSE
1416 | Keyword::AND
1417 | Keyword::OR
1418 | Keyword::NOT
1419 | Keyword::HAVING
1420 | Keyword::OVER
1421 | Keyword::PARTITION
1422 | Keyword::ORDER
1423 | Keyword::GROUP
1424 | Keyword::LIMIT
1425 | Keyword::UNION
1426 | Keyword::INTERSECT
1427 | Keyword::EXCEPT
1428 | Keyword::RECURSIVE
1429 | Keyword::WITH
1430 | Keyword::SELECT
1431 | Keyword::INTO
1432 | Keyword::TABLE
1433 | Keyword::VALUES
1434 | Keyword::SET
1435 | Keyword::RETURNS
1436 | Keyword::FILTER
1437 | Keyword::CONFLICT
1438 | Keyword::BY
1439 )
1440}
1441
1442fn expected_spacing_after_rparen(
1444 right: &TokenWithSpan,
1445 _tokens: &[TokenWithSpan],
1446 _right_idx: usize,
1447) -> ExpectedSpacing {
1448 match &right.token {
1449 Token::Period | Token::DoubleColon | Token::LBracket | Token::RBracket => {
1451 ExpectedSpacing::None
1452 }
1453 Token::Comma => ExpectedSpacing::None,
1455 Token::SemiColon => ExpectedSpacing::Skip,
1457 Token::RParen => ExpectedSpacing::None,
1459 Token::LParen => ExpectedSpacing::Single,
1461 _ => ExpectedSpacing::Single,
1463 }
1464}
1465
1466fn has_comment_between(tokens: &[TokenWithSpan], left: usize, right: usize) -> bool {
1467 tokens[left + 1..right].iter().any(|t| {
1468 matches!(
1469 t.token,
1470 Token::Whitespace(Whitespace::SingleLineComment { .. })
1471 | Token::Whitespace(Whitespace::MultiLineComment(_))
1472 )
1473 })
1474}
1475
1476fn template_spans(sql: &str) -> Vec<Lt01TemplateSpan> {
1477 let mut spans = Vec::new();
1478 let mut index = 0usize;
1479 while let Some((open, close)) = find_next_template_open(sql, index) {
1480 let payload_start = open + 2;
1481 if let Some(rel_close) = sql[payload_start..].find(close) {
1482 let close_index = payload_start + rel_close + close.len();
1483 spans.push((open, close_index));
1484 index = close_index;
1485 } else {
1486 spans.push((open, sql.len()));
1487 break;
1488 }
1489 }
1490 spans
1491}
1492
1493fn find_next_template_open(sql: &str, from: usize) -> Option<(usize, &'static str)> {
1494 let rest = sql.get(from..)?;
1495 [("{{", "}}"), ("{%", "%}"), ("{#", "#}")]
1496 .into_iter()
1497 .filter_map(|(open, close)| rest.find(open).map(|offset| (from + offset, close)))
1498 .min_by_key(|(index, _)| *index)
1499}
1500
1501fn contains_template_marker(sql: &str) -> bool {
1502 sql.contains("{{") || sql.contains("{%") || sql.contains("{#")
1503}
1504
1505fn overlaps_template_span(spans: &[Lt01TemplateSpan], start: usize, end: usize) -> bool {
1506 spans
1507 .iter()
1508 .any(|(template_start, template_end)| start < *template_end && end > *template_start)
1509}
1510
1511fn collect_ansi_national_string_literal_violations(
1512 sql: &str,
1513 tokens: &[TokenWithSpan],
1514 dialect: Dialect,
1515 templated_spans: &[Lt01TemplateSpan],
1516 violations: &mut Vec<Lt01Violation>,
1517) {
1518 if matches!(dialect, Dialect::Mssql) {
1519 return;
1520 }
1521
1522 for token in tokens {
1523 let Token::NationalStringLiteral(_) = token.token else {
1524 continue;
1525 };
1526 let Some((start, end)) = token_offsets(sql, token) else {
1527 continue;
1528 };
1529 if start >= end || end > sql.len() || overlaps_template_span(templated_spans, start, end) {
1530 continue;
1531 }
1532 let raw = &sql[start..end];
1533 if raw.len() < 3 {
1534 continue;
1535 }
1536 let Some(prefix) = raw.chars().next() else {
1537 continue;
1538 };
1539 if !(prefix == 'N' || prefix == 'n') || !raw[1..].starts_with('\'') {
1540 continue;
1541 }
1542 let replacement = format!("{prefix} {}", &raw[1..]);
1543 violations.push(((start, end), vec![(start, end, replacement)]));
1544 }
1545}
1546
1547fn collect_template_string_spacing_violations(
1548 sql: &str,
1549 dialect: Dialect,
1550 templated_spans: &[Lt01TemplateSpan],
1551 violations: &mut Vec<Lt01Violation>,
1552) {
1553 for (template_start, template_end) in templated_spans {
1554 let mut cursor = *template_start;
1555 while cursor < *template_end {
1556 let Some((quote_start, quote_char)) = next_quote_in_range(sql, cursor, *template_end)
1557 else {
1558 break;
1559 };
1560 let Some(quote_end) =
1561 find_closing_quote(sql, quote_start + 1, *template_end, quote_char)
1562 else {
1563 break;
1564 };
1565 let content = &sql[quote_start + 1..quote_end];
1566 let Some(tokens) = tokenized(content, dialect) else {
1567 cursor = quote_end + 1;
1568 continue;
1569 };
1570
1571 let mut fragment_violations = Vec::new();
1572 collect_pair_spacing_violations(
1573 content,
1574 &tokens,
1575 dialect,
1576 &[],
1577 &mut fragment_violations,
1578 );
1579 collect_ansi_national_string_literal_violations(
1580 content,
1581 &tokens,
1582 dialect,
1583 &[],
1584 &mut fragment_violations,
1585 );
1586
1587 for ((start, end), _) in fragment_violations {
1588 if start >= end || end > content.len() {
1589 continue;
1590 }
1591 let absolute_start = quote_start + 1 + start;
1592 let absolute_end = quote_start + 1 + end;
1593 violations.push(((absolute_start, absolute_end), Vec::new()));
1594 }
1595
1596 cursor = quote_end + 1;
1597 }
1598 }
1599}
1600
1601fn next_quote_in_range(sql: &str, start: usize, end: usize) -> Option<(usize, char)> {
1602 let mut index = start;
1603 while index < end {
1604 let ch = sql[index..].chars().next()?;
1605 if ch == '\'' || ch == '"' {
1606 return Some((index, ch));
1607 }
1608 index += ch.len_utf8();
1609 }
1610 None
1611}
1612
1613fn find_closing_quote(sql: &str, start: usize, end: usize, quote: char) -> Option<usize> {
1614 let mut index = start;
1615 while index < end {
1616 let ch = sql[index..].chars().next()?;
1617 if ch == '\\' {
1618 let next = index + ch.len_utf8();
1619 if next < end {
1620 let escaped = sql[next..].chars().next()?;
1621 index = next + escaped.len_utf8();
1622 continue;
1623 }
1624 }
1625 if ch == quote {
1626 return Some(index);
1627 }
1628 index += ch.len_utf8();
1629 }
1630 None
1631}
1632
1633fn snowflake_pattern_token_indices(
1634 tokens: &[TokenWithSpan],
1635 non_trivia: &[usize],
1636) -> HashSet<usize> {
1637 let mut out = HashSet::new();
1638 let mut cursor = 0usize;
1639
1640 while cursor < non_trivia.len() {
1641 let token_index = non_trivia[cursor];
1642 let Token::Word(word) = &tokens[token_index].token else {
1643 cursor += 1;
1644 continue;
1645 };
1646 if !word.value.eq_ignore_ascii_case("PATTERN") {
1647 cursor += 1;
1648 continue;
1649 }
1650
1651 let Some(paren_pos) = ((cursor + 1)..non_trivia.len())
1652 .find(|idx| matches!(tokens[non_trivia[*idx]].token, Token::LParen))
1653 else {
1654 cursor += 1;
1655 continue;
1656 };
1657
1658 let mut depth = 0usize;
1659 let mut end_pos = None;
1660 for (pos, idx) in non_trivia.iter().copied().enumerate().skip(paren_pos) {
1661 match tokens[idx].token {
1662 Token::LParen => depth += 1,
1663 Token::RParen => {
1664 if depth == 0 {
1665 break;
1666 }
1667 depth -= 1;
1668 if depth == 0 {
1669 end_pos = Some(pos);
1670 break;
1671 }
1672 }
1673 _ => {}
1674 }
1675 }
1676
1677 let Some(end_pos) = end_pos else {
1678 cursor += 1;
1679 continue;
1680 };
1681 for idx in non_trivia.iter().take(end_pos + 1).skip(paren_pos) {
1682 out.insert(*idx);
1683 }
1684 cursor = end_pos + 1;
1685 }
1686
1687 out
1688}
1689
1690fn type_angle_token_indices(tokens: &[TokenWithSpan], non_trivia: &[usize]) -> HashSet<usize> {
1691 let mut out = HashSet::new();
1692 let mut stack = Vec::<usize>::new();
1693
1694 for (pos, token_idx) in non_trivia.iter().copied().enumerate() {
1695 let token = &tokens[token_idx].token;
1696 match token {
1697 Token::Lt => {
1698 let prev_idx = pos
1699 .checked_sub(1)
1700 .and_then(|value| non_trivia.get(value).copied());
1701 if prev_idx.is_some_and(|idx| is_type_constructor(&tokens[idx].token)) {
1702 out.insert(token_idx);
1703 stack.push(token_idx);
1704 }
1705 }
1706 Token::Gt if !stack.is_empty() => {
1707 out.insert(token_idx);
1708 stack.pop();
1709 }
1710 Token::ShiftRight if stack.len() >= 2 => {
1711 out.insert(token_idx);
1712 stack.pop();
1713 stack.pop();
1714 }
1715 _ => {}
1716 }
1717 }
1718
1719 out
1720}
1721
1722fn supports_type_angle_spacing(dialect: Dialect) -> bool {
1723 matches!(
1724 dialect,
1725 Dialect::Bigquery | Dialect::Hive | Dialect::Databricks
1726 )
1727}
1728
1729fn is_type_constructor(token: &Token) -> bool {
1730 let Token::Word(word) = token else {
1731 return false;
1732 };
1733 word.value.eq_ignore_ascii_case("ARRAY")
1734 || word.value.eq_ignore_ascii_case("STRUCT")
1735 || word.value.eq_ignore_ascii_case("MAP")
1736}
1737
1738fn is_type_angle_spacing_pair(
1739 left: &TokenWithSpan,
1740 right: &TokenWithSpan,
1741 left_idx: usize,
1742 right_idx: usize,
1743 type_angle_tokens: &HashSet<usize>,
1744) -> bool {
1745 let left_is_type_angle = type_angle_tokens.contains(&left_idx);
1746 let right_is_type_angle = type_angle_tokens.contains(&right_idx);
1747
1748 if right_is_type_angle && matches!(right.token, Token::Lt | Token::Gt | Token::ShiftRight) {
1749 return true;
1750 }
1751 if left_is_type_angle && matches!(left.token, Token::Lt) {
1752 return true;
1753 }
1754 if left_is_type_angle
1755 && matches!(left.token, Token::Gt | Token::ShiftRight)
1756 && matches!(
1757 right.token,
1758 Token::Comma | Token::RParen | Token::RBracket | Token::LBracket | Token::Gt
1759 )
1760 {
1761 return true;
1762 }
1763
1764 false
1765}
1766
1767fn tokenized(sql: &str, dialect: Dialect) -> Option<Vec<TokenWithSpan>> {
1772 let dialect = dialect.to_sqlparser_dialect();
1773 let mut tokenizer = Tokenizer::new(dialect.as_ref(), sql);
1774 tokenizer.tokenize_with_location().ok()
1775}
1776
1777fn tokenized_for_context(ctx: &LintContext) -> Option<Vec<TokenWithSpan>> {
1778 let (statement_start_line, statement_start_column) =
1779 offset_to_line_col(ctx.sql, ctx.statement_range.start)?;
1780
1781 ctx.with_document_tokens(|tokens| {
1782 if tokens.is_empty() {
1783 return None;
1784 }
1785
1786 let mut out = Vec::new();
1787 for token in tokens {
1788 let Some((start, end)) = token_with_span_offsets(ctx.sql, token) else {
1789 continue;
1790 };
1791 if start < ctx.statement_range.start || end > ctx.statement_range.end {
1792 continue;
1793 }
1794
1795 let Some(start_loc) = relative_location(
1796 token.span.start,
1797 statement_start_line,
1798 statement_start_column,
1799 ) else {
1800 continue;
1801 };
1802 let Some(end_loc) =
1803 relative_location(token.span.end, statement_start_line, statement_start_column)
1804 else {
1805 continue;
1806 };
1807
1808 out.push(TokenWithSpan::new(
1809 token.token.clone(),
1810 Span::new(start_loc, end_loc),
1811 ));
1812 }
1813
1814 if out.is_empty() {
1815 None
1816 } else {
1817 Some(out)
1818 }
1819 })
1820}
1821
1822fn token_offsets(sql: &str, token: &TokenWithSpan) -> Option<(usize, usize)> {
1823 let start = line_col_to_offset(
1824 sql,
1825 token.span.start.line as usize,
1826 token.span.start.column as usize,
1827 )?;
1828 let end = line_col_to_offset(
1829 sql,
1830 token.span.end.line as usize,
1831 token.span.end.column as usize,
1832 )?;
1833 Some((start, end))
1834}
1835
1836fn next_non_trivia_index(tokens: &[TokenWithSpan], mut index: usize) -> Option<usize> {
1837 while index < tokens.len() {
1838 if !is_trivia_token(&tokens[index].token) {
1839 return Some(index);
1840 }
1841 index += 1;
1842 }
1843 None
1844}
1845
1846fn prev_non_trivia_index(tokens: &[TokenWithSpan], mut index: usize) -> Option<usize> {
1847 while index > 0 {
1848 index -= 1;
1849 if !is_trivia_token(&tokens[index].token) {
1850 return Some(index);
1851 }
1852 }
1853 None
1854}
1855
1856fn is_trivia_token(token: &Token) -> bool {
1857 matches!(
1858 token,
1859 Token::Whitespace(Whitespace::Space | Whitespace::Newline | Whitespace::Tab)
1860 | Token::Whitespace(Whitespace::SingleLineComment { .. })
1861 | Token::Whitespace(Whitespace::MultiLineComment(_))
1862 )
1863}
1864
1865fn line_col_to_offset(sql: &str, line: usize, column: usize) -> Option<usize> {
1866 if line == 0 || column == 0 {
1867 return None;
1868 }
1869
1870 let mut current_line = 1usize;
1871 let mut current_col = 1usize;
1872
1873 for (offset, ch) in sql.char_indices() {
1874 if current_line == line && current_col == column {
1875 return Some(offset);
1876 }
1877
1878 if ch == '\n' {
1879 current_line += 1;
1880 current_col = 1;
1881 } else {
1882 current_col += 1;
1883 }
1884 }
1885
1886 if current_line == line && current_col == column {
1887 return Some(sql.len());
1888 }
1889
1890 None
1891}
1892
1893fn token_with_span_offsets(sql: &str, token: &TokenWithSpan) -> Option<(usize, usize)> {
1894 let start = line_col_to_offset(
1895 sql,
1896 token.span.start.line as usize,
1897 token.span.start.column as usize,
1898 )?;
1899 let end = line_col_to_offset(
1900 sql,
1901 token.span.end.line as usize,
1902 token.span.end.column as usize,
1903 )?;
1904 Some((start, end))
1905}
1906
1907fn offset_to_line_col(sql: &str, offset: usize) -> Option<(usize, usize)> {
1908 if offset > sql.len() {
1909 return None;
1910 }
1911 if offset == sql.len() {
1912 let line = 1 + sql.as_bytes().iter().filter(|byte| **byte == b'\n').count();
1913 let column = sql
1914 .rsplit_once('\n')
1915 .map_or(sql.chars().count() + 1, |(_, tail)| {
1916 tail.chars().count() + 1
1917 });
1918 return Some((line, column));
1919 }
1920
1921 let mut line = 1usize;
1922 let mut column = 1usize;
1923 for (index, ch) in sql.char_indices() {
1924 if index == offset {
1925 return Some((line, column));
1926 }
1927 if ch == '\n' {
1928 line += 1;
1929 column = 1;
1930 } else {
1931 column += 1;
1932 }
1933 }
1934 Some((line, column))
1935}
1936
1937fn relative_location(
1938 location: Location,
1939 statement_start_line: usize,
1940 statement_start_column: usize,
1941) -> Option<Location> {
1942 if location.line == 0 || location.column == 0 {
1943 return None;
1944 }
1945
1946 let line = location.line as usize;
1947 let column = location.column as usize;
1948 if line < statement_start_line {
1949 return None;
1950 }
1951
1952 let relative_line = line - statement_start_line + 1;
1953 let relative_column = if line == statement_start_line {
1954 if column < statement_start_column {
1955 return None;
1956 }
1957 column - statement_start_column + 1
1958 } else {
1959 column
1960 };
1961
1962 Some(Location::new(relative_line as u64, relative_column as u64))
1963}
1964
1965#[cfg(test)]
1966mod tests {
1967 use super::*;
1968 use crate::linter::rule::with_active_dialect;
1969 use crate::parser::parse_sql;
1970 use crate::types::{Dialect, IssueAutofixApplicability};
1971
1972 fn run(sql: &str) -> Vec<Issue> {
1973 run_with_dialect(sql, Dialect::Generic)
1974 }
1975
1976 fn run_with_dialect(sql: &str, dialect: Dialect) -> Vec<Issue> {
1977 let statements = parse_sql(sql).expect("parse");
1978 let rule = LayoutSpacing::default();
1979 with_active_dialect(dialect, || {
1980 statements
1981 .iter()
1982 .enumerate()
1983 .flat_map(|(index, statement)| {
1984 rule.check(
1985 statement,
1986 &LintContext {
1987 sql,
1988 statement_range: 0..sql.len(),
1989 statement_index: index,
1990 },
1991 )
1992 })
1993 .collect()
1994 })
1995 }
1996
1997 fn run_statementless_with_dialect(sql: &str, dialect: Dialect) -> Vec<Issue> {
1998 run_statementless_with_rule(sql, dialect, LayoutSpacing::default())
1999 }
2000
2001 fn run_statementless_with_rule(sql: &str, dialect: Dialect, rule: LayoutSpacing) -> Vec<Issue> {
2002 let placeholder = parse_sql("SELECT 1").expect("parse placeholder");
2003 with_active_dialect(dialect, || {
2004 rule.check(
2005 &placeholder[0],
2006 &LintContext {
2007 sql,
2008 statement_range: 0..sql.len(),
2009 statement_index: 0,
2010 },
2011 )
2012 })
2013 }
2014
2015 fn apply_all_issue_autofixes(sql: &str, issues: &[Issue]) -> String {
2016 let mut out = sql.to_string();
2017 let mut edits = issues
2018 .iter()
2019 .filter_map(|issue| issue.autofix.as_ref())
2020 .flat_map(|autofix| autofix.edits.clone())
2021 .collect::<Vec<_>>();
2022 edits.sort_by_key(|edit| (edit.span.start, edit.span.end));
2023 for edit in edits.into_iter().rev() {
2024 out.replace_range(edit.span.start..edit.span.end, &edit.replacement);
2025 }
2026 out
2027 }
2028
2029 #[test]
2030 fn allows_bigquery_array_type_angle_brackets_without_spaces() {
2031 let issues = run_with_dialect(
2032 "SELECT ARRAY<FLOAT64>[1, 2, 3] AS floats;",
2033 Dialect::Bigquery,
2034 );
2035 assert!(issues.is_empty());
2036 }
2037
2038 #[test]
2039 fn allows_create_table_with_qualified_name_before_column_list() {
2040 let issues = run("CREATE TABLE db.schema_name.tbl_name (id INT)");
2041 assert!(issues.is_empty());
2042 }
2043
2044 #[test]
2045 fn fixes_reference_target_column_list_spacing() {
2046 let sql = "create table tab1 (b int references tab2(b))";
2047 let issues = run_statementless_with_dialect(sql, Dialect::Ansi);
2048 assert!(!issues.is_empty());
2049 let fixed = apply_all_issue_autofixes(sql, &issues);
2050 assert_eq!(fixed, "create table tab1 (b int references tab2 (b))");
2051 }
2052
2053 #[test]
2054 fn allows_bigquery_hyphenated_project_identifier() {
2055 let issues = run_statementless_with_dialect(
2056 "SELECT col_foo FROM foo-bar.foo.bar",
2057 Dialect::Bigquery,
2058 );
2059 assert!(issues.is_empty());
2060 }
2061
2062 #[test]
2063 fn allows_bigquery_function_array_offset_access() {
2064 let sql = "SELECT testFunction(a)[OFFSET(0)].* FROM table1";
2065 let issues = run_statementless_with_dialect(sql, Dialect::Bigquery);
2066 assert!(issues.is_empty());
2067 }
2068
2069 #[test]
2070 fn allows_hive_struct_and_array_datatype_angles() {
2071 let sql = "select col1::STRUCT<foo: int>, col2::ARRAY<int> from t";
2072 let issues = run_statementless_with_dialect(sql, Dialect::Hive);
2073 assert!(issues.is_empty());
2074 }
2075
2076 #[test]
2077 fn allows_sparksql_file_literal_path() {
2078 let sql = "ADD JAR path/to/some.jar;";
2079 let issues = run_statementless_with_dialect(sql, Dialect::Databricks);
2080 assert!(issues.is_empty());
2081 }
2082
2083 #[test]
2084 fn allows_clickhouse_system_model_path() {
2085 let sql = "SYSTEM RELOAD MODEL /model/path;";
2086 let issues = run_statementless_with_dialect(sql, Dialect::Clickhouse);
2087 assert!(issues.is_empty(), "unexpected issues: {issues:?}");
2088 }
2089
2090 #[test]
2091 fn detects_alias_alignment_when_configured() {
2092 let sql = "SELECT\n\tcol1 AS a,\n\tlonger_col AS b\nFROM t";
2093 let issues = run_statementless_with_rule(
2094 sql,
2095 Dialect::Ansi,
2096 LayoutSpacing {
2097 align_alias_expression: true,
2098 tab_space_size: 4,
2099 ..LayoutSpacing::default()
2100 },
2101 );
2102 assert!(!issues.is_empty());
2103 }
2104
2105 #[test]
2106 fn detects_alias_alignment_with_tabs_when_columns_are_equal_width() {
2107 let sql = "SELECT\n\tcol1 AS alias1,\n\tcol2 AS alias2\nFROM table1";
2108 let issues = run_statementless_with_rule(
2109 sql,
2110 Dialect::Ansi,
2111 LayoutSpacing {
2112 align_alias_expression: true,
2113 align_with_tabs: true,
2114 tab_space_size: 4,
2115 ..LayoutSpacing::default()
2116 },
2117 );
2118 assert!(
2119 !issues.is_empty(),
2120 "tab indentation alignment should flag spaces before AS"
2121 );
2122 }
2123
2124 #[test]
2125 fn detects_create_table_datatype_alignment_when_configured() {
2126 let sql = "CREATE TABLE tbl (\n foo VARCHAR(25) NOT NULL,\n barbar INT NULL\n)";
2127 let issues = run_statementless_with_rule(
2128 sql,
2129 Dialect::Ansi,
2130 LayoutSpacing {
2131 align_data_type: true,
2132 ..LayoutSpacing::default()
2133 },
2134 );
2135 assert!(!issues.is_empty());
2136 }
2137
2138 #[test]
2139 fn does_not_flag_create_table_alignment_when_columns_are_already_aligned() {
2140 let sql = "CREATE TABLE foo (\n x INT NOT NULL PRIMARY KEY,\n y INT NULL,\n z INT NULL\n);";
2141 let issues = run_statementless_with_rule(
2142 sql,
2143 Dialect::Ansi,
2144 LayoutSpacing {
2145 align_data_type: true,
2146 align_column_constraint: true,
2147 ..LayoutSpacing::default()
2148 },
2149 );
2150 assert!(
2151 issues.is_empty(),
2152 "expected no LT01 alignment issues: {issues:?}"
2153 );
2154 }
2155
2156 #[test]
2157 fn statementless_fixes_comment_on_function_spacing() {
2158 let sql = "COMMENT ON FUNCTION x (foo) IS 'y';";
2159 let issues = run_statementless_with_dialect(sql, Dialect::Postgres);
2160 assert!(!issues.is_empty());
2161 let fixed = apply_all_issue_autofixes(sql, &issues);
2162 assert_eq!(fixed, "COMMENT ON FUNCTION x(foo) IS 'y';");
2163 }
2164
2165 #[test]
2166 fn statementless_fixes_split_tsql_comparison_operator() {
2167 let sql = "SELECT col1 FROM table1 WHERE 1 > = 1";
2168 let issues = run_statementless_with_dialect(sql, Dialect::Mssql);
2169 assert!(!issues.is_empty());
2170 let fixed = apply_all_issue_autofixes(sql, &issues);
2171 assert_eq!(fixed, "SELECT col1 FROM table1 WHERE 1 >= 1");
2172 }
2173
2174 #[test]
2175 fn statementless_fixes_tsql_compound_assignment_operator() {
2176 let sql = "SET @param1+=1";
2177 let issues = run_statementless_with_dialect(sql, Dialect::Mssql);
2178 assert!(!issues.is_empty());
2179 let fixed = apply_all_issue_autofixes(sql, &issues);
2180 assert_eq!(fixed, "SET @param1 += 1");
2181 }
2182
2183 #[test]
2184 fn allows_sparksql_multi_unit_interval_minus() {
2185 let sql = "SELECT INTERVAL -2 HOUR '3' MINUTE AS col;";
2186 let issues = run_statementless_with_dialect(sql, Dialect::Databricks);
2187 assert!(issues.is_empty());
2188 }
2189
2190 #[test]
2191 fn ignore_templated_areas_skips_template_artifacts() {
2192 let sql = "{{ 'SELECT 1, 4' }}, 5, 6";
2193 let issues = run_statementless_with_rule(
2194 sql,
2195 Dialect::Generic,
2196 LayoutSpacing {
2197 ignore_templated_areas: true,
2198 ..LayoutSpacing::default()
2199 },
2200 );
2201 assert!(issues.is_empty(), "template-only spacing should be ignored");
2202 }
2203
2204 #[test]
2205 fn ignore_templated_areas_still_fixes_non_template_region() {
2206 let sql = "{{ 'SELECT 1, 4' }}, 5 , 6";
2207 let issues = run_statementless_with_rule(
2208 sql,
2209 Dialect::Generic,
2210 LayoutSpacing {
2211 ignore_templated_areas: true,
2212 ..LayoutSpacing::default()
2213 },
2214 );
2215 assert!(!issues.is_empty());
2216 let fixed = apply_all_issue_autofixes(sql, &issues);
2217 assert_eq!(fixed, "{{ 'SELECT 1, 4' }}, 5, 6");
2218 }
2219
2220 #[test]
2221 fn templated_string_content_is_checked_when_not_ignored() {
2222 let sql = "{{ 'SELECT 1 ,4' }}";
2223 let issues = run_statementless_with_rule(
2224 sql,
2225 Dialect::Generic,
2226 LayoutSpacing {
2227 ignore_templated_areas: false,
2228 ..LayoutSpacing::default()
2229 },
2230 );
2231 assert!(!issues.is_empty());
2232 assert!(
2233 issues.iter().all(|issue| issue.autofix.is_none()),
2234 "template-internal checks are detection-only"
2235 );
2236 }
2237
2238 #[test]
2239 fn templated_string_content_passes_when_clean() {
2240 let sql = "{{ 'SELECT 1, 4' }}";
2241 let issues = run_statementless_with_rule(
2242 sql,
2243 Dialect::Generic,
2244 LayoutSpacing {
2245 ignore_templated_areas: false,
2246 ..LayoutSpacing::default()
2247 },
2248 );
2249 assert!(issues.is_empty());
2250 }
2251
2252 #[test]
2253 fn allows_snowflake_match_recognize_pattern_spacing() {
2254 let sql = "select * from stock_price_history\n match_recognize (\n pattern ((A | B){5} C+)\n )";
2255 let issues = run_statementless_with_dialect(sql, Dialect::Snowflake);
2256 assert!(issues.is_empty(), "snowflake pattern syntax should pass");
2257 }
2258
2259 #[test]
2260 fn fixes_snowflake_match_condition_newline_before_paren() {
2261 let sql = "select\n table1.pk1\nfrom table1\n asof join\n table2\n match_condition\n (t1 > t2)";
2262 let issues = run_with_dialect(sql, Dialect::Snowflake);
2263 assert!(!issues.is_empty());
2264 let fixed = apply_all_issue_autofixes(sql, &issues);
2265 assert!(
2266 fixed.contains("match_condition(t1 > t2)"),
2267 "expected inline match_condition: {fixed}"
2268 );
2269 }
2270
2271 #[test]
2272 fn fixes_snowflake_copy_into_target_column_list_spacing() {
2273 let sql = "copy into DB.SCHEMA.ProblemHere(col1)\nfrom @my_stage/file";
2274 let issues = run_statementless_with_dialect(sql, Dialect::Snowflake);
2275 assert!(!issues.is_empty());
2276 let fixed = apply_all_issue_autofixes(sql, &issues);
2277 assert!(
2278 fixed.contains("DB.SCHEMA.ProblemHere (col1)"),
2279 "fixed: {fixed}"
2280 );
2281 }
2282
2283 #[test]
2284 fn fixes_snowflake_copy_into_target_column_list_spacing_with_placeholder_prefix() {
2285 let sql = "copy into ${env}_ENT_LANDING.SCHEMA_NAME.ProblemHere(col1)\nfrom @my_stage/file";
2286 let issues = run_statementless_with_dialect(sql, Dialect::Snowflake);
2287 assert!(!issues.is_empty());
2288 let fixed = apply_all_issue_autofixes(sql, &issues);
2289 assert!(
2290 fixed.contains(".SCHEMA_NAME.ProblemHere (col1)"),
2291 "fixed: {fixed}"
2292 );
2293 }
2294
2295 #[test]
2296 fn allows_snowflake_stage_path_without_spacing_around_slash() {
2297 let sql = "copy into t from @my_stage/file";
2298 let issues = run_statementless_with_dialect(sql, Dialect::Snowflake);
2299 assert!(
2300 issues.is_empty(),
2301 "snowflake stage path should not force spaces around slash: {issues:?}"
2302 );
2303 }
2304
2305 #[test]
2308 fn flags_trailing_whitespace() {
2309 let sql = "SELECT 1 \n";
2310 let issues = run(sql);
2311 assert!(!issues.is_empty(), "should flag trailing whitespace");
2312 let fixed = apply_all_issue_autofixes(sql, &issues);
2313 assert_eq!(fixed, "SELECT 1\n");
2314 }
2315
2316 #[test]
2317 fn flags_trailing_whitespace_on_initial_blank_line() {
2318 let sql = " \nSELECT 1 \n";
2319 let issues = run(sql);
2320 assert!(!issues.is_empty());
2321 let fixed = apply_all_issue_autofixes(sql, &issues);
2322 assert_eq!(fixed, "\nSELECT 1\n");
2323 }
2324
2325 #[test]
2328 fn flags_compact_operator() {
2329 let sql = "SELECT 1+2";
2330 let issues = run(sql);
2331 assert!(!issues.is_empty(), "should flag compact 1+2");
2332 let fixed = apply_all_issue_autofixes(sql, &issues);
2333 assert_eq!(fixed, "SELECT 1 + 2");
2334 }
2335
2336 #[test]
2337 fn flags_compact_operator_expression() {
2338 let sql = "select\n field,\n date(field_1)-date(field_2) as diff\nfrom tbl";
2339 let issues = run(sql);
2340 assert!(!issues.is_empty());
2341 let fixed = apply_all_issue_autofixes(sql, &issues);
2342 assert!(
2343 fixed.contains("date(field_1) - date(field_2)"),
2344 "should fix operator spacing: {fixed}"
2345 );
2346 }
2347
2348 #[test]
2349 fn flags_plus_between_identifier_and_literal() {
2350 let sql = "SELECT a +'b'+ 'c' FROM tbl";
2351 let issues = run(sql);
2352 assert!(
2353 !issues.is_empty(),
2354 "should flag operator spacing around string literals"
2355 );
2356 let fixed = apply_all_issue_autofixes(sql, &issues);
2357 assert_eq!(fixed, "SELECT a + 'b' + 'c' FROM tbl");
2358 }
2359
2360 #[test]
2361 fn does_not_flag_simple_spacing() {
2362 assert!(run("SELECT * FROM t WHERE a = 1").is_empty());
2363 }
2364
2365 #[test]
2366 fn does_not_flag_sign_indicators() {
2367 let issues = run("SELECT 1, +2, -4");
2368 assert!(
2370 issues.is_empty(),
2371 "unary signs should not be flagged: {issues:?}"
2372 );
2373 }
2374
2375 #[test]
2376 fn does_not_flag_newline_operator() {
2377 assert!(run("SELECT 1\n+ 2").is_empty());
2378 assert!(run("SELECT 1\n + 2").is_empty());
2379 }
2380
2381 #[test]
2384 fn flags_space_before_comma() {
2385 let sql = "SELECT 1 ,4";
2386 let issues = run(sql);
2387 assert!(!issues.is_empty(), "should flag space before comma");
2388 let fixed = apply_all_issue_autofixes(sql, &issues);
2389 assert_eq!(fixed, "SELECT 1, 4");
2390 }
2391
2392 #[test]
2393 fn flags_no_space_after_comma() {
2394 let sql = "SELECT 1,4";
2395 let issues = run(sql);
2396 assert!(!issues.is_empty(), "should flag missing space after comma");
2397 let fixed = apply_all_issue_autofixes(sql, &issues);
2398 assert_eq!(fixed, "SELECT 1, 4");
2399 }
2400
2401 #[test]
2402 fn flags_excessive_space_after_comma() {
2403 let sql = "SELECT 1, 4";
2404 let issues = run(sql);
2405 assert!(
2406 !issues.is_empty(),
2407 "should flag excessive space after comma"
2408 );
2409 let fixed = apply_all_issue_autofixes(sql, &issues);
2410 assert_eq!(fixed, "SELECT 1, 4");
2411 }
2412
2413 #[test]
2416 fn flags_missing_space_before_paren_after_keyword() {
2417 let sql = "SELECT * FROM(SELECT 1 AS C1)AS T1;";
2418 let issues = run(sql);
2419 assert!(!issues.is_empty(), "should flag FROM( and )AS: {issues:?}");
2420 let fixed = apply_all_issue_autofixes(sql, &issues);
2421 assert_eq!(fixed, "SELECT * FROM (SELECT 1 AS C1) AS T1;");
2422 }
2423
2424 #[test]
2427 fn flags_cte_missing_space_after_as() {
2428 let sql = "WITH a AS(select 1) select * from a";
2429 let issues = run(sql);
2430 assert!(!issues.is_empty(), "should flag AS(");
2431 let fixed = apply_all_issue_autofixes(sql, &issues);
2432 assert_eq!(fixed, "WITH a AS (select 1) select * from a");
2433 }
2434
2435 #[test]
2436 fn flags_cte_multiple_spaces_after_as() {
2437 let sql = "WITH a AS (select 1) select * from a";
2438 let issues = run(sql);
2439 assert!(!issues.is_empty(), "should flag AS (");
2440 let fixed = apply_all_issue_autofixes(sql, &issues);
2441 assert_eq!(fixed, "WITH a AS (select 1) select * from a");
2442 }
2443
2444 #[test]
2445 fn flags_missing_space_after_using() {
2446 let sql = "select * from a JOIN b USING(x)";
2447 let issues = run(sql);
2448 assert!(!issues.is_empty(), "should flag USING(");
2449 let fixed = apply_all_issue_autofixes(sql, &issues);
2450 assert_eq!(fixed, "select * from a JOIN b USING (x)");
2451 }
2452
2453 #[test]
2456 fn flags_excessive_whitespace() {
2457 let sql = "SELECT 1";
2458 let issues = run(sql);
2459 assert!(!issues.is_empty(), "should flag excessive whitespace");
2460 let fixed = apply_all_issue_autofixes(sql, &issues);
2461 assert_eq!(fixed, "SELECT 1");
2462 }
2463
2464 #[test]
2465 fn flags_excessive_whitespace_multi() {
2466 let sql = "select\n 1 + 2 + 3 + 4 -- Comment\nfrom foo";
2467 let issues = run(sql);
2468 assert!(!issues.is_empty());
2469 let fixed = apply_all_issue_autofixes(sql, &issues);
2470 assert_eq!(
2471 fixed,
2472 "select\n 1 + 2 + 3 + 4 -- Comment\nfrom foo"
2473 );
2474 }
2475
2476 #[test]
2479 fn flags_literal_operator_spacing() {
2480 let sql = "SELECT ('foo'||'bar') as buzz";
2481 let issues = run(sql);
2482 assert!(
2483 !issues.is_empty(),
2484 "should flag compact || operator: {issues:?}"
2485 );
2486 let fixed = apply_all_issue_autofixes(sql, &issues);
2487 assert_eq!(fixed, "SELECT ('foo' || 'bar') as buzz");
2488 }
2489
2490 #[test]
2491 fn flags_literal_as_spacing() {
2492 let sql = "SELECT\n 'foo'AS bar\nFROM foo";
2493 let issues = run(sql);
2494 assert!(!issues.is_empty());
2495 let fixed = apply_all_issue_autofixes(sql, &issues);
2496 assert_eq!(fixed, "SELECT\n 'foo' AS bar\nFROM foo");
2497 }
2498
2499 #[test]
2500 fn flags_ansi_national_string_literal_spacing() {
2501 let sql = "SELECT a + N'b' + N'c' FROM tbl;";
2502 let issues = run_with_dialect(sql, Dialect::Ansi);
2503 assert!(!issues.is_empty());
2504 let fixed = apply_all_issue_autofixes(sql, &issues);
2505 assert_eq!(fixed, "SELECT a + N 'b' + N 'c' FROM tbl;");
2506 }
2507
2508 #[test]
2511 fn does_not_flag_function_call() {
2512 assert!(run("SELECT foo(5) FROM T1;").is_empty());
2513 assert!(run("SELECT COUNT(*) FROM tbl\n\n").is_empty());
2514 }
2515
2516 #[test]
2519 fn flags_spaced_cast_operator() {
2520 let sql = "SELECT '1' :: INT;";
2521 let issues = run(sql);
2522 assert!(!issues.is_empty(), "should flag space around ::");
2523 let fixed = apply_all_issue_autofixes(sql, &issues);
2524 assert_eq!(fixed, "SELECT '1'::INT;");
2525 }
2526
2527 #[test]
2530 fn flags_compact_json_arrow_operator() {
2531 let sql = "SELECT payload->>'id' FROM t";
2532 let issues = run(sql);
2533 assert!(
2534 issues.len() >= 2,
2535 "should flag 2+ violations for compact json-arrow"
2536 );
2537 assert!(
2538 issues
2539 .iter()
2540 .all(|issue| issue.autofix.as_ref().is_some_and(
2541 |autofix| autofix.applicability == IssueAutofixApplicability::Safe
2542 )),
2543 "expected safe autofix metadata"
2544 );
2545
2546 let fixed = apply_all_issue_autofixes(sql, &issues);
2547 assert_eq!(fixed, "SELECT payload ->> 'id' FROM t");
2548 }
2549
2550 #[test]
2551 fn does_not_flag_exists_without_space_before_parenthesis() {
2552 let no_space = "SELECT\n EXISTS(\n SELECT 1\n ) AS has_row\nFROM t";
2553 assert!(run(no_space).is_empty());
2554 }
2555
2556 #[test]
2557 fn flags_space_before_exists_parenthesis_in_select_list() {
2558 let sql = "SELECT 1,\n EXISTS (\n SELECT 1\n ) AS has_row\nFROM t";
2559 let issues = run(sql);
2560 assert!(
2561 !issues.is_empty(),
2562 "expected EXISTS-space violation in select list"
2563 );
2564 let fixed = apply_all_issue_autofixes(sql, &issues);
2565 assert!(
2566 fixed.contains("EXISTS(\n"),
2567 "expected EXISTS( after fix, got: {fixed}"
2568 );
2569 }
2570
2571 #[test]
2572 fn requires_space_before_exists_parenthesis_after_where() {
2573 let sql = "SELECT 1\nWHERE EXISTS(\n SELECT 1\n)";
2574 let issues = run(sql);
2575 assert!(
2576 !issues.is_empty(),
2577 "expected missing-space violation for WHERE EXISTS("
2578 );
2579 let fixed = apply_all_issue_autofixes(sql, &issues);
2580 assert!(
2581 fixed.contains("WHERE EXISTS (\n"),
2582 "expected WHERE EXISTS ( after fix, got: {fixed}"
2583 );
2584 }
2585
2586 #[test]
2587 fn merge_violations_prefers_fixable_duplicate_span() {
2588 let mut violations = vec![
2589 ((10, 10), Vec::new()),
2590 ((10, 10), vec![(10, 10, " ".to_string())]),
2591 ];
2592 merge_violations_by_span(&mut violations);
2593 assert_eq!(violations.len(), 1);
2594 assert_eq!(violations[0].0, (10, 10));
2595 assert_eq!(violations[0].1, vec![(10, 10, " ".to_string())]);
2596 }
2597
2598 #[test]
2601 fn does_not_flag_spacing_patterns_inside_literals_or_comments() {
2602 let issues = run("SELECT 'payload->>''id''' AS txt -- EXISTS (\nFROM t");
2603 assert!(
2604 issues.is_empty(),
2605 "should not flag content inside literals/comments: {issues:?}"
2606 );
2607 }
2608
2609 #[test]
2610 fn does_not_flag_correct_comma_spacing() {
2611 assert!(run("SELECT 1, 4").is_empty());
2612 }
2613
2614 #[test]
2615 fn does_not_flag_correct_cast() {
2616 assert!(run("SELECT '1'::INT;").is_empty());
2617 }
2618
2619 #[test]
2620 fn does_not_flag_qualified_identifiers() {
2621 assert!(run("SELECT a.b FROM c.d").is_empty());
2623 }
2624
2625 #[test]
2626 fn does_not_flag_newline_after_using() {
2627 assert!(
2628 run("select * from a JOIN b USING\n(x)").is_empty(),
2629 "newline between USING and ( should be acceptable"
2630 );
2631 }
2632
2633 #[test]
2634 fn flags_cte_newline_after_as() {
2635 let sql = "WITH a AS\n(\n select 1\n)\nselect * from a";
2636 let issues = run(sql);
2637 assert!(!issues.is_empty(), "should flag AS + newline + (");
2638 let fixed = apply_all_issue_autofixes(sql, &issues);
2639 assert_eq!(fixed, "WITH a AS (\n select 1\n)\nselect * from a");
2640 }
2641
2642 #[test]
2643 fn flags_cte_newline_and_spaces_after_as() {
2644 let sql = "WITH a AS\n\n\n (\n select 1\n)\nselect * from a";
2645 let issues = run(sql);
2646 assert!(!issues.is_empty());
2647 let fixed = apply_all_issue_autofixes(sql, &issues);
2648 assert_eq!(fixed, "WITH a AS (\n select 1\n)\nselect * from a");
2649 }
2650
2651 #[test]
2652 fn does_not_flag_comment_after_as() {
2653 assert!(
2655 run("WITH\na AS -- comment\n(\nselect 1\n)\nselect * from a").is_empty(),
2656 "comment between AS and ( should be acceptable"
2657 );
2658 }
2659
2660 #[test]
2661 fn insert_into_table_paren_allows_space() {
2662 let issues = run("INSERT INTO metrics.cold_start_daily (\n workspace_id\n) SELECT 1");
2664 let lt01 = issues
2665 .iter()
2666 .filter(|i| i.code == "LT01")
2667 .collect::<Vec<_>>();
2668 assert!(
2669 lt01.is_empty(),
2670 "INSERT INTO table ( should not flag LT01, got: {lt01:?}"
2671 );
2672 }
2673
2674 #[test]
2675 fn insert_into_table_paren_with_cte() {
2676 let sql = "WITH starts AS (\n SELECT 1\n)\nINSERT INTO metrics.cold_start_daily (\n workspace_id\n) SELECT workspace_id FROM starts";
2678 let issues = run_with_dialect(sql, Dialect::Postgres);
2679 let lt01 = issues
2680 .iter()
2681 .filter(|i| i.code == "LT01")
2682 .collect::<Vec<_>>();
2683 assert!(
2684 lt01.is_empty(),
2685 "INSERT INTO table ( with CTE should not flag LT01, got: {lt01:?}"
2686 );
2687 }
2688
2689 #[test]
2690 fn insert_into_table_paren_on_conflict() {
2691 let sql = "\
2693WITH cte AS (
2694 SELECT workspace_id
2695 FROM ledger.query_history
2696 WHERE start_time >= $1
2697)
2698
2699INSERT INTO metrics.cold_start_daily (
2700 workspace_id
2701)
2702SELECT workspace_id
2703FROM cte
2704ON CONFLICT (workspace_id) DO UPDATE
2705 SET workspace_id = excluded.workspace_id";
2706 let issues = run_statementless_with_dialect(sql, Dialect::Postgres);
2707 let lt01 = issues
2708 .iter()
2709 .filter(|i| i.code == "LT01")
2710 .collect::<Vec<_>>();
2711 assert!(
2712 lt01.is_empty(),
2713 "INSERT INTO table ( with ON CONFLICT should not flag LT01, got: {lt01:?}"
2714 );
2715 }
2716}