use crate::linter::config::LintConfig;
use crate::linter::rule::{LintContext, LintRule};
use crate::types::{issue_codes, Dialect, Issue, IssueAutofixApplicability, IssuePatchEdit};
use sqlparser::ast::Statement;
use sqlparser::keywords::Keyword;
use sqlparser::tokenizer::{Location, Span, Token, TokenWithSpan, Tokenizer, Whitespace};
use std::collections::HashSet;
pub struct LayoutSpacing {
ignore_templated_areas: bool,
align_alias_expression: bool,
align_data_type: bool,
align_column_constraint: bool,
align_with_tabs: bool,
tab_space_size: usize,
}
impl LayoutSpacing {
pub fn from_config(config: &LintConfig) -> Self {
let spacing_before_align = |type_name: &str| {
config
.config_section_object("layout.keyword_newline")
.and_then(|layout| layout.get(type_name))
.and_then(serde_json::Value::as_object)
.and_then(|entry| entry.get("spacing_before"))
.and_then(serde_json::Value::as_str)
.is_some_and(|value| value.to_ascii_lowercase().starts_with("align"))
};
Self {
ignore_templated_areas: config
.core_option_bool("ignore_templated_areas")
.unwrap_or(true),
align_alias_expression: spacing_before_align("alias_expression"),
align_data_type: spacing_before_align("data_type"),
align_column_constraint: spacing_before_align("column_constraint_segment"),
align_with_tabs: config
.section_option_str("indentation", "indent_unit")
.or_else(|| config.section_option_str("rules", "indent_unit"))
.is_some_and(|value| value.eq_ignore_ascii_case("tab")),
tab_space_size: config
.section_option_usize("indentation", "tab_space_size")
.or_else(|| config.section_option_usize("rules", "tab_space_size"))
.unwrap_or(4)
.max(1),
}
}
fn alignment_options(&self) -> Lt01AlignmentOptions {
Lt01AlignmentOptions {
alias_expression: self.align_alias_expression,
data_type: self.align_data_type,
column_constraint: self.align_column_constraint,
align_with_tabs: self.align_with_tabs,
tab_space_size: self.tab_space_size,
}
}
}
impl Default for LayoutSpacing {
fn default() -> Self {
Self {
ignore_templated_areas: true,
align_alias_expression: false,
align_data_type: false,
align_column_constraint: false,
align_with_tabs: false,
tab_space_size: 4,
}
}
}
impl LintRule for LayoutSpacing {
fn code(&self) -> &'static str {
issue_codes::LINT_LT_001
}
fn name(&self) -> &'static str {
"Layout spacing"
}
fn description(&self) -> &'static str {
"Inappropriate Spacing."
}
fn check(&self, _statement: &Statement, ctx: &LintContext) -> Vec<Issue> {
let mut violations =
spacing_violations(ctx, self.ignore_templated_areas, self.alignment_options());
let has_remaining_non_whitespace = ctx.sql[ctx.statement_range.end..]
.chars()
.any(|ch| !ch.is_whitespace());
let parser_fragment_fallback = ctx.statement_index == 0
&& ctx.statement_range.start == 0
&& ctx.statement_range.end < ctx.sql.len()
&& has_remaining_non_whitespace
&& !ctx.statement_sql().trim_end().ends_with(';');
let template_fragment_fallback = ctx.statement_index == 0
&& contains_template_marker(ctx.sql)
&& (ctx.statement_range.start > 0 || ctx.statement_range.end < ctx.sql.len());
if parser_fragment_fallback || template_fragment_fallback {
let full_ctx = LintContext {
sql: ctx.sql,
statement_range: 0..ctx.sql.len(),
statement_index: 0,
};
violations.extend(spacing_violations(
&full_ctx,
self.ignore_templated_areas,
self.alignment_options(),
));
merge_violations_by_span(&mut violations);
}
violations
.into_iter()
.map(|((start, end), edits)| {
let mut issue =
Issue::info(issue_codes::LINT_LT_001, "Inappropriate spacing found.")
.with_statement(ctx.statement_index)
.with_span(ctx.span_from_statement_offset(start, end));
if !edits.is_empty() {
let edits = edits
.into_iter()
.map(|(edit_start, edit_end, replacement)| {
IssuePatchEdit::new(
ctx.span_from_statement_offset(edit_start, edit_end),
replacement.to_string(),
)
})
.collect();
issue = issue.with_autofix_edits(IssueAutofixApplicability::Safe, edits);
}
issue
})
.collect()
}
}
type Lt01Span = (usize, usize);
type Lt01AutofixEdit = (usize, usize, String);
type Lt01Violation = (Lt01Span, Vec<Lt01AutofixEdit>);
type Lt01TemplateSpan = (usize, usize);
fn merge_violations_by_span(violations: &mut Vec<Lt01Violation>) {
violations.sort_unstable_by_key(|(span, _)| *span);
let mut merged: Vec<Lt01Violation> = Vec::with_capacity(violations.len());
for (span, edits) in violations.drain(..) {
if let Some((last_span, last_edits)) = merged.last_mut() {
if *last_span == span {
if last_edits.is_empty() && !edits.is_empty() {
*last_edits = edits;
} else if !last_edits.is_empty() && !edits.is_empty() {
for edit in edits {
if !last_edits.contains(&edit) {
last_edits.push(edit);
}
}
}
continue;
}
}
merged.push((span, edits));
}
*violations = merged;
}
#[derive(Clone, Copy)]
struct Lt01AlignmentOptions {
alias_expression: bool,
data_type: bool,
column_constraint: bool,
align_with_tabs: bool,
tab_space_size: usize,
}
fn spacing_violations(
ctx: &LintContext,
ignore_templated_areas: bool,
alignment: Lt01AlignmentOptions,
) -> Vec<Lt01Violation> {
let sql = ctx.statement_sql();
let mut violations = Vec::new();
let templated_spans = template_spans(sql);
let prefer_raw_template_tokens = ctx.is_templated() && contains_template_marker(sql);
let tokens = if prefer_raw_template_tokens {
tokenized(sql, ctx.dialect()).or_else(|| tokenized_for_context(ctx))
} else {
tokenized_for_context(ctx).or_else(|| tokenized(sql, ctx.dialect()))
};
let Some(tokens) = tokens else {
return violations;
};
let dialect = ctx.dialect();
collect_trailing_whitespace_violations(sql, &mut violations);
collect_pair_spacing_violations(sql, &tokens, dialect, &templated_spans, &mut violations);
collect_ansi_national_string_literal_violations(
sql,
&tokens,
dialect,
&templated_spans,
&mut violations,
);
if !ignore_templated_areas {
collect_template_string_spacing_violations(sql, dialect, &templated_spans, &mut violations);
}
collect_alignment_detection_violations(sql, alignment, &mut violations);
violations.sort_unstable_by_key(|(span, _)| *span);
violations.dedup_by_key(|(span, _)| *span);
violations
}
fn collect_trailing_whitespace_violations(sql: &str, violations: &mut Vec<Lt01Violation>) {
let mut offset = 0;
for line in sql.split('\n') {
let trimmed = line.trim_end_matches([' ', '\t']);
let trailing_start = offset + trimmed.len();
let trailing_end = offset + line.len();
if trailing_end > trailing_start {
let span = (trailing_start, trailing_end);
let edit = (trailing_start, trailing_end, String::new());
violations.push((span, vec![edit]));
}
offset += line.len() + 1; }
}
fn collect_alignment_detection_violations(
sql: &str,
alignment: Lt01AlignmentOptions,
violations: &mut Vec<Lt01Violation>,
) {
if alignment.alias_expression {
collect_alias_alignment_detection(
sql,
alignment.tab_space_size,
alignment.align_with_tabs,
violations,
);
}
if alignment.data_type || alignment.column_constraint {
collect_create_table_alignment_detection(sql, alignment.tab_space_size, violations);
}
}
#[derive(Clone, Copy)]
struct AliasAlignmentEntry {
as_start: usize,
visual_col: usize,
separator_uses_tabs: bool,
}
fn collect_alias_alignment_detection(
sql: &str,
tab_space_size: usize,
align_with_tabs: bool,
violations: &mut Vec<Lt01Violation>,
) {
let lines: Vec<&str> = sql.split('\n').collect();
if lines.len() < 2 {
return;
}
let mut offset = 0usize;
let mut current_group: Vec<AliasAlignmentEntry> = Vec::new();
for line in &lines {
let lower = line.to_ascii_lowercase();
let alias_pos = lower.find(" as ");
let is_alias_line = alias_pos.is_some() && !lower.trim_start().starts_with("from ");
if is_alias_line {
let as_index = alias_pos.unwrap_or_default() + 1;
current_group.push(AliasAlignmentEntry {
as_start: offset + as_index,
visual_col: visual_width(&line[..as_index], tab_space_size),
separator_uses_tabs: alias_separator_uses_tabs(line, as_index),
});
} else if !current_group.is_empty() {
emit_alias_alignment_group(¤t_group, align_with_tabs, violations);
current_group.clear();
}
offset += line.len() + 1;
}
if !current_group.is_empty() {
emit_alias_alignment_group(¤t_group, align_with_tabs, violations);
}
}
fn alias_separator_uses_tabs(line: &str, as_index: usize) -> bool {
let prefix = &line[..as_index];
let separator_start = prefix
.char_indices()
.rev()
.find(|(_, ch)| !ch.is_whitespace())
.map(|(idx, ch)| idx + ch.len_utf8())
.unwrap_or(0);
let separator = &prefix[separator_start..];
!separator.is_empty() && separator.chars().all(|ch| ch == '\t')
}
fn emit_alias_alignment_group(
group: &[AliasAlignmentEntry],
align_with_tabs: bool,
violations: &mut Vec<Lt01Violation>,
) {
if group.len() < 2 {
return;
}
let target_col = group
.iter()
.map(|entry| entry.visual_col)
.max()
.unwrap_or(0);
for entry in group {
if entry.visual_col != target_col || (align_with_tabs && !entry.separator_uses_tabs) {
let end = entry.as_start + 2;
violations.push(((entry.as_start, end), Vec::new()));
}
}
}
fn collect_create_table_alignment_detection(
sql: &str,
tab_space_size: usize,
violations: &mut Vec<Lt01Violation>,
) {
let lines: Vec<&str> = sql.split('\n').collect();
let mut offset = 0usize;
let mut in_create_table = false;
let mut entries: Vec<(usize, usize)> = Vec::new();
for line in &lines {
let trimmed = line.trim_start();
let upper = trimmed.to_ascii_uppercase();
if !in_create_table && upper.starts_with("CREATE TABLE") {
in_create_table = true;
} else if in_create_table && (trimmed.starts_with(')') || trimmed.starts_with(';')) {
emit_create_table_alignment_group(&entries, violations);
entries.clear();
in_create_table = false;
}
if in_create_table
&& !trimmed.is_empty()
&& !trimmed.starts_with('(')
&& !trimmed.starts_with(')')
&& !trimmed.starts_with("--")
&& !upper.starts_with("CREATE TABLE")
{
if let Some(data_type_start) = second_token_start(trimmed) {
let prefix_len = line.len() - trimmed.len();
let absolute = offset + prefix_len + data_type_start;
let visual = visual_width(&trimmed[..data_type_start], tab_space_size);
entries.push((absolute, visual));
}
}
offset += line.len() + 1;
}
if in_create_table && !entries.is_empty() {
emit_create_table_alignment_group(&entries, violations);
}
}
fn emit_create_table_alignment_group(
group: &[(usize, usize)],
violations: &mut Vec<Lt01Violation>,
) {
if group.len() < 2 {
return;
}
let target_col = group.iter().map(|(_, col)| *col).max().unwrap_or(0);
for (start, col) in group {
if *col != target_col {
let end = *start + 1;
violations.push(((*start, end), Vec::new()));
}
}
}
fn second_token_start(line: &str) -> Option<usize> {
let mut seen_first = false;
let mut in_token = false;
for (index, ch) in line.char_indices() {
if ch.is_whitespace() {
if in_token {
in_token = false;
seen_first = true;
}
continue;
}
if seen_first && !in_token {
return Some(index);
}
in_token = true;
}
None
}
fn visual_width(text: &str, tab_space_size: usize) -> usize {
let mut width = 0usize;
for ch in text.chars() {
if ch == '\t' {
let next_tab = ((width / tab_space_size) + 1) * tab_space_size;
width = next_tab;
} else {
width += 1;
}
}
width
}
#[derive(Debug, Clone, Copy, PartialEq)]
enum ExpectedSpacing {
Single,
None,
NoneInline,
Skip,
SingleInline,
}
fn collect_pair_spacing_violations(
sql: &str,
tokens: &[TokenWithSpan],
dialect: Dialect,
templated_spans: &[Lt01TemplateSpan],
violations: &mut Vec<Lt01Violation>,
) {
let non_trivia: Vec<usize> = tokens
.iter()
.enumerate()
.filter(|(_, t)| !is_trivia_token(&t.token) && !matches!(t.token, Token::EOF))
.map(|(i, _)| i)
.collect();
let type_angle_tokens = if supports_type_angle_spacing(dialect) {
type_angle_token_indices(tokens, &non_trivia)
} else {
HashSet::new()
};
let snowflake_pattern_tokens = if dialect == Dialect::Snowflake {
snowflake_pattern_token_indices(tokens, &non_trivia)
} else {
HashSet::new()
};
for window in non_trivia.windows(2) {
let left_idx = window[0];
let right_idx = window[1];
if dialect == Dialect::Snowflake
&& (snowflake_pattern_tokens.contains(&left_idx)
|| snowflake_pattern_tokens.contains(&right_idx))
{
continue;
}
let left = &tokens[left_idx];
let right = &tokens[right_idx];
let Some((left_start, left_end)) = token_offsets(sql, left) else {
continue;
};
let Some((right_start, _)) = token_offsets(sql, right) else {
continue;
};
if left_end > right_start || right_start > sql.len() || left_end > sql.len() {
continue;
}
if overlaps_template_span(templated_spans, left_start, right_start) {
continue;
}
let gap = &sql[left_end..right_start];
let has_newline = gap.contains('\n') || gap.contains('\r');
let has_comment = has_comment_between(tokens, left_idx, right_idx);
let expected = if supports_type_angle_spacing(dialect)
&& is_type_angle_spacing_pair(left, right, left_idx, right_idx, &type_angle_tokens)
{
ExpectedSpacing::None
} else {
expected_spacing(left, right, tokens, left_idx, right_idx, dialect)
};
match expected {
ExpectedSpacing::Skip => continue,
ExpectedSpacing::None => {
if !gap.is_empty() && !has_newline && !has_comment {
let span = (left_end, right_start);
let edit = (left_end, right_start, String::new());
violations.push((span, vec![edit]));
}
}
ExpectedSpacing::NoneInline => {
if !gap.is_empty() && !has_comment {
let span = (left_end, right_start);
let edit = (left_end, right_start, String::new());
violations.push((span, vec![edit]));
}
}
ExpectedSpacing::Single => {
if has_comment {
continue;
}
if has_newline {
continue;
}
if gap == " " {
continue;
}
if gap.is_empty() && matches!(left.token, Token::Comma) {
let replacement = format!("{} ", &sql[left_start..left_end]);
let span = (left_start, left_end);
let edit = (left_start, left_end, replacement);
violations.push((span, vec![edit]));
continue;
}
if gap.is_empty() && is_exists_keyword_token(&left.token) {
let replacement = format!("{} ", &sql[left_start..left_end]);
let span = (left_start, left_end);
let edit = (left_start, left_end, replacement);
violations.push((span, vec![edit]));
continue;
}
let span = (left_end, right_start);
let edit = (left_end, right_start, " ".to_string());
violations.push((span, vec![edit]));
}
ExpectedSpacing::SingleInline => {
if has_comment {
continue;
}
if gap == " " {
continue;
}
let span = (left_end, right_start);
let edit = (left_end, right_start, " ".to_string());
violations.push((span, vec![edit]));
}
}
}
}
fn expected_spacing(
left: &TokenWithSpan,
right: &TokenWithSpan,
tokens: &[TokenWithSpan],
left_idx: usize,
right_idx: usize,
dialect: Dialect,
) -> ExpectedSpacing {
if matches!(left.token, Token::Period) || matches!(right.token, Token::Period) {
return ExpectedSpacing::NoneInline;
}
if matches!(left.token, Token::DoubleColon) || matches!(right.token, Token::DoubleColon) {
return ExpectedSpacing::NoneInline;
}
if dialect == Dialect::Snowflake
&& (matches!(left.token, Token::Colon) || matches!(right.token, Token::Colon))
{
return ExpectedSpacing::NoneInline;
}
if is_split_compound_comparison_pair(left, right) {
return ExpectedSpacing::NoneInline;
}
if dialect == Dialect::Mssql && is_tsql_compound_assignment_pair(left, right) {
return ExpectedSpacing::NoneInline;
}
if matches!(right.token, Token::LParen) {
return expected_spacing_before_lparen(left, tokens, left_idx, dialect);
}
if matches!(left.token, Token::RParen) {
return expected_spacing_after_rparen(right, tokens, right_idx);
}
if matches!(right.token, Token::LBracket) {
if is_type_keyword_for_bracket(&left.token) {
return ExpectedSpacing::Single;
}
return ExpectedSpacing::None;
}
if matches!(left.token, Token::RBracket) {
if matches!(
right.token,
Token::DoubleColon | Token::Period | Token::LBracket | Token::RParen
) {
return ExpectedSpacing::None;
}
return ExpectedSpacing::Single;
}
if matches!(right.token, Token::Comma) {
return ExpectedSpacing::None;
}
if matches!(left.token, Token::Comma) {
return ExpectedSpacing::Single;
}
if matches!(right.token, Token::SemiColon) {
return ExpectedSpacing::Skip;
}
if matches!(left.token, Token::SemiColon) {
return ExpectedSpacing::Skip;
}
if matches!(left.token, Token::LParen) {
return ExpectedSpacing::None;
}
if matches!(right.token, Token::RParen) {
return ExpectedSpacing::None;
}
if dialect == Dialect::Bigquery
&& is_bigquery_hyphenated_identifier_pair(left, right, tokens, left_idx, right_idx)
{
return ExpectedSpacing::None;
}
if is_filesystem_path_pair(left, right, tokens, left_idx, right_idx, dialect) {
return ExpectedSpacing::NoneInline;
}
if is_binary_operator(&left.token) || is_binary_operator(&right.token) {
if is_unary_operator_pair(left, right, tokens, left_idx) {
return ExpectedSpacing::Skip;
}
return ExpectedSpacing::Single;
}
if is_comparison_operator(&left.token) || is_comparison_operator(&right.token) {
if dialect == Dialect::Mssql
&& is_tsql_assignment_rhs_pair(left, right, tokens, left_idx, right_idx)
{
return ExpectedSpacing::Single;
}
return ExpectedSpacing::Single;
}
if is_json_operator(&left.token) || is_json_operator(&right.token) {
return ExpectedSpacing::Single;
}
if matches!(left.token, Token::Mul) || matches!(right.token, Token::Mul) {
return ExpectedSpacing::Skip;
}
if is_word_like(&left.token) && is_word_like(&right.token) {
return ExpectedSpacing::Single;
}
if (is_word_like(&left.token) && is_literal(&right.token))
|| (is_literal(&left.token) && is_word_like(&right.token))
{
return ExpectedSpacing::Single;
}
if is_literal(&left.token) && is_literal(&right.token) {
return ExpectedSpacing::Single;
}
if (matches!(left.token, Token::Number(_, _)) && is_word_like(&right.token))
|| (is_word_like(&left.token) && matches!(right.token, Token::Number(_, _)))
{
return ExpectedSpacing::Single;
}
ExpectedSpacing::Skip
}
fn is_binary_operator(token: &Token) -> bool {
matches!(
token,
Token::Plus
| Token::Minus
| Token::Div
| Token::Mod
| Token::StringConcat
| Token::Ampersand
| Token::Pipe
| Token::Caret
| Token::ShiftLeft
| Token::ShiftRight
| Token::Assignment
)
}
fn is_comparison_operator(token: &Token) -> bool {
matches!(
token,
Token::Eq
| Token::Neq
| Token::Lt
| Token::Gt
| Token::LtEq
| Token::GtEq
| Token::Spaceship
| Token::DoubleEq
| Token::TildeEqual
)
}
fn is_split_compound_comparison_pair(left: &TokenWithSpan, right: &TokenWithSpan) -> bool {
matches!(
(&left.token, &right.token),
(Token::Gt, Token::Eq)
| (Token::Lt, Token::Eq)
| (Token::Lt, Token::Gt)
| (Token::Neq, Token::Eq)
)
}
fn is_assignment_operator_token(token: &Token) -> bool {
matches!(
token,
Token::Plus
| Token::Minus
| Token::Mul
| Token::Div
| Token::Mod
| Token::Ampersand
| Token::Pipe
| Token::Caret
)
}
fn is_tsql_compound_assignment_pair(left: &TokenWithSpan, right: &TokenWithSpan) -> bool {
matches!(right.token, Token::Eq) && is_assignment_operator_token(&left.token)
}
fn is_tsql_assignment_rhs_pair(
left: &TokenWithSpan,
_right: &TokenWithSpan,
tokens: &[TokenWithSpan],
left_idx: usize,
_right_idx: usize,
) -> bool {
if !matches!(left.token, Token::Eq) {
return false;
}
prev_non_trivia_index(tokens, left_idx)
.map(|index| is_assignment_operator_token(&tokens[index].token))
.unwrap_or(false)
}
fn is_json_operator(token: &Token) -> bool {
matches!(
token,
Token::Arrow
| Token::LongArrow
| Token::HashArrow
| Token::HashLongArrow
| Token::AtArrow
| Token::ArrowAt
)
}
fn is_word_like(token: &Token) -> bool {
matches!(token, Token::Word(_) | Token::Placeholder(_))
}
fn is_literal(token: &Token) -> bool {
matches!(
token,
Token::SingleQuotedString(_)
| Token::DoubleQuotedString(_)
| Token::TripleSingleQuotedString(_)
| Token::TripleDoubleQuotedString(_)
| Token::NationalStringLiteral(_)
| Token::EscapedStringLiteral(_)
| Token::UnicodeStringLiteral(_)
| Token::HexStringLiteral(_)
| Token::SingleQuotedByteStringLiteral(_)
| Token::DoubleQuotedByteStringLiteral(_)
| Token::Number(_, _)
)
}
fn is_type_keyword_for_bracket(token: &Token) -> bool {
if let Token::Word(w) = token {
if w.quote_style.is_some() {
return false;
}
matches!(
w.value.to_ascii_uppercase().as_str(),
"TEXT"
| "UUID"
| "INT"
| "INTEGER"
| "BIGINT"
| "SMALLINT"
| "VARCHAR"
| "CHAR"
| "BOOLEAN"
| "BOOL"
| "NUMERIC"
| "DECIMAL"
| "FLOAT"
| "DOUBLE"
| "DATE"
| "TIME"
| "TIMESTAMP"
| "INTERVAL"
| "JSONB"
| "JSON"
| "BYTEA"
| "REAL"
| "SERIAL"
| "BIGSERIAL"
| "INET"
| "CIDR"
| "MACADDR"
)
} else {
false
}
}
fn is_exists_keyword_token(token: &Token) -> bool {
matches!(token, Token::Word(word) if word.keyword == Keyword::EXISTS)
}
fn is_ddl_object_keyword(token: &Token) -> bool {
if let Token::Word(w) = token {
matches!(
w.keyword,
Keyword::TABLE
| Keyword::VIEW
| Keyword::INDEX
| Keyword::FUNCTION
| Keyword::PROCEDURE
| Keyword::TRIGGER
| Keyword::SEQUENCE
| Keyword::TYPE
| Keyword::SCHEMA
| Keyword::DATABASE
)
} else {
false
}
}
fn is_qualified_ddl_object_name(tokens: &[TokenWithSpan], word_index: usize) -> bool {
let mut cursor = word_index;
loop {
let Some(prev_idx) = prev_non_trivia_index(tokens, cursor) else {
return false;
};
if matches!(tokens[prev_idx].token, Token::Period) {
let Some(prev_word_idx) = prev_non_trivia_index(tokens, prev_idx) else {
return false;
};
if !is_word_like(&tokens[prev_word_idx].token) {
return false;
}
cursor = prev_word_idx;
continue;
}
if !is_ddl_object_keyword(&tokens[prev_idx].token) {
return false;
}
return is_ddl_object_definition_context(tokens, prev_idx);
}
}
fn is_reference_target_name(tokens: &[TokenWithSpan], word_index: usize) -> bool {
let mut cursor = word_index;
loop {
let Some(prev_idx) = prev_non_trivia_index(tokens, cursor) else {
return false;
};
if matches!(tokens[prev_idx].token, Token::Period) {
let Some(prev_word_idx) = prev_non_trivia_index(tokens, prev_idx) else {
return false;
};
if !is_word_like(&tokens[prev_word_idx].token) {
return false;
}
cursor = prev_word_idx;
continue;
}
let Token::Word(prev_word) = &tokens[prev_idx].token else {
return false;
};
return prev_word.keyword == Keyword::REFERENCES;
}
}
fn is_copy_into_target_name(tokens: &[TokenWithSpan], word_index: usize) -> bool {
let mut cursor = word_index;
let mut steps = 0usize;
while let Some(prev_idx) = prev_non_trivia_index(tokens, cursor) {
match &tokens[prev_idx].token {
Token::Word(word) if word.keyword == Keyword::INTO => {
let Some(copy_idx) = prev_non_trivia_index(tokens, prev_idx) else {
return false;
};
return matches!(
&tokens[copy_idx].token,
Token::Word(copy_word) if copy_word.keyword == Keyword::COPY
);
}
Token::Word(word)
if matches!(
word.keyword,
Keyword::FROM
| Keyword::SELECT
| Keyword::WHERE
| Keyword::JOIN
| Keyword::ON
| Keyword::HAVING
) =>
{
return false;
}
Token::SemiColon | Token::Comma | Token::LParen | Token::RParen => return false,
_ => {}
}
cursor = prev_idx;
steps += 1;
if steps > 64 {
return false;
}
}
false
}
fn is_insert_into_target_name(tokens: &[TokenWithSpan], word_index: usize) -> bool {
let mut cursor = word_index;
let mut steps = 0usize;
while let Some(prev_idx) = prev_non_trivia_index(tokens, cursor) {
match &tokens[prev_idx].token {
Token::Word(word) if word.keyword == Keyword::INTO => {
let Some(insert_idx) = prev_non_trivia_index(tokens, prev_idx) else {
return false;
};
return matches!(
&tokens[insert_idx].token,
Token::Word(w) if w.keyword == Keyword::INSERT
);
}
Token::Period => {}
Token::Word(word) if word.quote_style.is_none() => {}
_ => return false,
}
cursor = prev_idx;
steps += 1;
if steps > 16 {
return false;
}
}
false
}
fn is_ddl_object_definition_context(tokens: &[TokenWithSpan], ddl_keyword_index: usize) -> bool {
let Some(prev_idx) = prev_non_trivia_index(tokens, ddl_keyword_index) else {
return false;
};
let Token::Word(prev_word) = &tokens[prev_idx].token else {
return false;
};
if matches!(
prev_word.keyword,
Keyword::CREATE | Keyword::ALTER | Keyword::DROP | Keyword::TRUNCATE
) {
return true;
}
if prev_word.keyword == Keyword::OR {
if let Some(prev_prev_idx) = prev_non_trivia_index(tokens, prev_idx) {
if let Token::Word(prev_prev_word) = &tokens[prev_prev_idx].token {
return matches!(prev_prev_word.keyword, Keyword::CREATE | Keyword::ALTER);
}
}
}
false
}
fn is_unary_operator_pair(
left: &TokenWithSpan,
right: &TokenWithSpan,
tokens: &[TokenWithSpan],
left_idx: usize,
) -> bool {
if matches!(right.token, Token::Plus | Token::Minus)
&& is_unary_prefix_context(&tokens[left_idx].token)
{
return true;
}
if matches!(left.token, Token::Plus | Token::Minus) {
if let Some(prev_idx) = prev_non_trivia_index(tokens, left_idx) {
if is_unary_prefix_context(&tokens[prev_idx].token) {
return true;
}
} else {
return true;
}
}
false
}
fn is_bigquery_hyphenated_identifier_pair(
left: &TokenWithSpan,
right: &TokenWithSpan,
tokens: &[TokenWithSpan],
left_idx: usize,
right_idx: usize,
) -> bool {
if matches!(right.token, Token::Minus) {
if !matches!(left.token, Token::Word(_)) {
return false;
}
let Some(next_word_idx) = next_non_trivia_index(tokens, right_idx + 1) else {
return false;
};
if !matches!(tokens[next_word_idx].token, Token::Word(_)) {
return false;
}
let Some(next_after_word_idx) = next_non_trivia_index(tokens, next_word_idx + 1) else {
return false;
};
return matches!(tokens[next_after_word_idx].token, Token::Period);
}
if matches!(left.token, Token::Minus) {
if !matches!(right.token, Token::Word(_)) {
return false;
}
let Some(prev_word_idx) = prev_non_trivia_index(tokens, left_idx) else {
return false;
};
if !matches!(tokens[prev_word_idx].token, Token::Word(_)) {
return false;
}
let Some(next_idx) = next_non_trivia_index(tokens, right_idx + 1) else {
return false;
};
return matches!(tokens[next_idx].token, Token::Period);
}
false
}
fn is_filesystem_path_pair(
left: &TokenWithSpan,
right: &TokenWithSpan,
tokens: &[TokenWithSpan],
left_idx: usize,
right_idx: usize,
dialect: Dialect,
) -> bool {
if !matches!(
dialect,
Dialect::Databricks | Dialect::Clickhouse | Dialect::Snowflake
) {
return false;
}
let div_index = if matches!(left.token, Token::Div) {
Some(left_idx)
} else if matches!(right.token, Token::Div) {
let left_is_context_keyword = is_path_context_keyword_token(&left.token);
let left_is_path_segment = prev_non_trivia_index(tokens, left_idx)
.is_some_and(|idx| matches!(tokens[idx].token, Token::Div));
if left_is_context_keyword && !left_is_path_segment {
return false;
}
Some(right_idx)
} else {
None
};
let Some(div_index) = div_index else {
return false;
};
let prev_idx = prev_non_trivia_index(tokens, div_index);
let next_idx = next_non_trivia_index(tokens, div_index + 1);
let prev_ok = prev_idx.is_some_and(|idx| matches!(tokens[idx].token, Token::Word(_)));
let next_ok = next_idx.is_some_and(|idx| matches!(tokens[idx].token, Token::Word(_)));
if !(prev_ok || next_ok) {
return false;
}
if dialect == Dialect::Snowflake {
return snowflake_stage_path_context_within(tokens, div_index, 12);
}
path_context_keyword_within(tokens, div_index, 6)
}
fn is_path_context_keyword_token(token: &Token) -> bool {
let Token::Word(word) = token else {
return false;
};
word.value.eq_ignore_ascii_case("JAR") || word.value.eq_ignore_ascii_case("MODEL")
}
fn path_context_keyword_within(tokens: &[TokenWithSpan], from_idx: usize, limit: usize) -> bool {
let mut cursor = from_idx;
let mut steps = 0usize;
while let Some(prev_idx) = prev_non_trivia_index(tokens, cursor) {
if let Token::Word(word) = &tokens[prev_idx].token {
if matches!(word.keyword, Keyword::JAR) {
return true;
}
if word.value.eq_ignore_ascii_case("JAR") || word.value.eq_ignore_ascii_case("MODEL") {
return true;
}
}
cursor = prev_idx;
steps += 1;
if steps >= limit {
break;
}
}
false
}
fn snowflake_stage_path_context_within(
tokens: &[TokenWithSpan],
from_idx: usize,
limit: usize,
) -> bool {
let mut cursor = from_idx;
let mut steps = 0usize;
while let Some(prev_idx) = prev_non_trivia_index(tokens, cursor) {
match &tokens[prev_idx].token {
Token::AtSign => return true,
Token::Word(word) if word.value.starts_with('@') => return true,
_ => {}
}
cursor = prev_idx;
steps += 1;
if steps >= limit {
break;
}
}
false
}
fn is_unary_prefix_context(token: &Token) -> bool {
if matches!(
token,
Token::Comma
| Token::LParen
| Token::Eq
| Token::Neq
| Token::Lt
| Token::Gt
| Token::LtEq
| Token::GtEq
) {
return true;
}
if let Token::Word(w) = token {
if matches!(
w.keyword,
Keyword::SELECT
| Keyword::WHERE
| Keyword::WHEN
| Keyword::THEN
| Keyword::ELSE
| Keyword::AND
| Keyword::OR
| Keyword::ON
| Keyword::SET
| Keyword::CASE
| Keyword::BETWEEN
| Keyword::IN
| Keyword::VALUES
| Keyword::INTERVAL
| Keyword::YEAR
| Keyword::MONTH
| Keyword::DAY
| Keyword::HOUR
| Keyword::MINUTE
| Keyword::SECOND
| Keyword::RETURN
| Keyword::RETURNS
) {
return true;
}
}
false
}
fn expected_spacing_before_lparen(
left: &TokenWithSpan,
tokens: &[TokenWithSpan],
left_idx: usize,
dialect: Dialect,
) -> ExpectedSpacing {
match &left.token {
Token::Word(w) if w.quote_style.is_none() => {
if dialect == Dialect::Snowflake {
if w.value.eq_ignore_ascii_case("MATCH_RECOGNIZE")
|| w.value.eq_ignore_ascii_case("PATTERN")
{
return ExpectedSpacing::Single;
}
if w.value.eq_ignore_ascii_case("MATCH_CONDITION") {
return ExpectedSpacing::NoneInline;
}
}
if w.value.eq_ignore_ascii_case("EXISTS") {
if exists_requires_space_before_lparen(tokens, left_idx) {
return ExpectedSpacing::Single;
}
return ExpectedSpacing::NoneInline;
}
if is_keyword_requiring_space_before_paren(w.keyword) {
if matches!(w.keyword, Keyword::AS) {
return ExpectedSpacing::SingleInline;
}
return ExpectedSpacing::Single;
}
if is_insert_into_target_name(tokens, left_idx) {
return ExpectedSpacing::Single;
}
if w.keyword == Keyword::NoKeyword {
if is_reference_target_name(tokens, left_idx) {
return ExpectedSpacing::Single;
}
if is_copy_into_target_name(tokens, left_idx) {
return ExpectedSpacing::Single;
}
if is_qualified_ddl_object_name(tokens, left_idx) {
return ExpectedSpacing::Skip;
}
}
ExpectedSpacing::NoneInline
}
Token::RParen | Token::RBracket => ExpectedSpacing::Single,
_ if is_literal(&left.token) => ExpectedSpacing::Single,
Token::Number(_, _) => ExpectedSpacing::None,
Token::Comma => ExpectedSpacing::Single,
_ if is_binary_operator(&left.token) || is_comparison_operator(&left.token) => {
ExpectedSpacing::Skip
}
_ => ExpectedSpacing::Skip,
}
}
fn exists_requires_space_before_lparen(tokens: &[TokenWithSpan], left_idx: usize) -> bool {
let Some(prev_idx) = prev_non_trivia_index(tokens, left_idx) else {
return false;
};
match &tokens[prev_idx].token {
Token::Word(word) => {
matches!(
word.keyword,
Keyword::AND
| Keyword::OR
| Keyword::NOT
| Keyword::WHERE
| Keyword::HAVING
| Keyword::WHEN
| Keyword::THEN
| Keyword::ELSE
) || matches!(
word.value.to_ascii_uppercase().as_str(),
"AND" | "OR" | "NOT" | "WHERE" | "HAVING" | "WHEN" | "THEN" | "ELSE"
)
}
Token::RParen
| Token::LParen
| Token::Eq
| Token::Neq
| Token::Lt
| Token::Gt
| Token::LtEq
| Token::GtEq => true,
_ => false,
}
}
fn is_keyword_requiring_space_before_paren(keyword: Keyword) -> bool {
matches!(
keyword,
Keyword::AS
| Keyword::USING
| Keyword::FROM
| Keyword::JOIN
| Keyword::ON
| Keyword::WHERE
| Keyword::IN
| Keyword::BETWEEN
| Keyword::WHEN
| Keyword::THEN
| Keyword::ELSE
| Keyword::AND
| Keyword::OR
| Keyword::NOT
| Keyword::HAVING
| Keyword::OVER
| Keyword::PARTITION
| Keyword::ORDER
| Keyword::GROUP
| Keyword::LIMIT
| Keyword::UNION
| Keyword::INTERSECT
| Keyword::EXCEPT
| Keyword::RECURSIVE
| Keyword::WITH
| Keyword::SELECT
| Keyword::INTO
| Keyword::TABLE
| Keyword::VALUES
| Keyword::SET
| Keyword::RETURNS
| Keyword::FILTER
| Keyword::CONFLICT
| Keyword::BY
)
}
fn expected_spacing_after_rparen(
right: &TokenWithSpan,
_tokens: &[TokenWithSpan],
_right_idx: usize,
) -> ExpectedSpacing {
match &right.token {
Token::Period | Token::DoubleColon | Token::LBracket | Token::RBracket => {
ExpectedSpacing::None
}
Token::Comma => ExpectedSpacing::None,
Token::SemiColon => ExpectedSpacing::Skip,
Token::RParen => ExpectedSpacing::None,
Token::LParen => ExpectedSpacing::Single,
_ => ExpectedSpacing::Single,
}
}
fn has_comment_between(tokens: &[TokenWithSpan], left: usize, right: usize) -> bool {
tokens[left + 1..right].iter().any(|t| {
matches!(
t.token,
Token::Whitespace(Whitespace::SingleLineComment { .. })
| Token::Whitespace(Whitespace::MultiLineComment(_))
)
})
}
fn template_spans(sql: &str) -> Vec<Lt01TemplateSpan> {
let mut spans = Vec::new();
let mut index = 0usize;
while let Some((open, close)) = find_next_template_open(sql, index) {
let payload_start = open + 2;
if let Some(rel_close) = sql[payload_start..].find(close) {
let close_index = payload_start + rel_close + close.len();
spans.push((open, close_index));
index = close_index;
} else {
spans.push((open, sql.len()));
break;
}
}
spans
}
fn find_next_template_open(sql: &str, from: usize) -> Option<(usize, &'static str)> {
let rest = sql.get(from..)?;
[("{{", "}}"), ("{%", "%}"), ("{#", "#}")]
.into_iter()
.filter_map(|(open, close)| rest.find(open).map(|offset| (from + offset, close)))
.min_by_key(|(index, _)| *index)
}
fn contains_template_marker(sql: &str) -> bool {
sql.contains("{{") || sql.contains("{%") || sql.contains("{#")
}
fn overlaps_template_span(spans: &[Lt01TemplateSpan], start: usize, end: usize) -> bool {
spans
.iter()
.any(|(template_start, template_end)| start < *template_end && end > *template_start)
}
fn collect_ansi_national_string_literal_violations(
sql: &str,
tokens: &[TokenWithSpan],
dialect: Dialect,
templated_spans: &[Lt01TemplateSpan],
violations: &mut Vec<Lt01Violation>,
) {
if matches!(dialect, Dialect::Mssql) {
return;
}
for token in tokens {
let Token::NationalStringLiteral(_) = token.token else {
continue;
};
let Some((start, end)) = token_offsets(sql, token) else {
continue;
};
if start >= end || end > sql.len() || overlaps_template_span(templated_spans, start, end) {
continue;
}
let raw = &sql[start..end];
if raw.len() < 3 {
continue;
}
let Some(prefix) = raw.chars().next() else {
continue;
};
if !(prefix == 'N' || prefix == 'n') || !raw[1..].starts_with('\'') {
continue;
}
let replacement = format!("{prefix} {}", &raw[1..]);
violations.push(((start, end), vec![(start, end, replacement)]));
}
}
fn collect_template_string_spacing_violations(
sql: &str,
dialect: Dialect,
templated_spans: &[Lt01TemplateSpan],
violations: &mut Vec<Lt01Violation>,
) {
for (template_start, template_end) in templated_spans {
let mut cursor = *template_start;
while cursor < *template_end {
let Some((quote_start, quote_char)) = next_quote_in_range(sql, cursor, *template_end)
else {
break;
};
let Some(quote_end) =
find_closing_quote(sql, quote_start + 1, *template_end, quote_char)
else {
break;
};
let content = &sql[quote_start + 1..quote_end];
let Some(tokens) = tokenized(content, dialect) else {
cursor = quote_end + 1;
continue;
};
let mut fragment_violations = Vec::new();
collect_pair_spacing_violations(
content,
&tokens,
dialect,
&[],
&mut fragment_violations,
);
collect_ansi_national_string_literal_violations(
content,
&tokens,
dialect,
&[],
&mut fragment_violations,
);
for ((start, end), _) in fragment_violations {
if start >= end || end > content.len() {
continue;
}
let absolute_start = quote_start + 1 + start;
let absolute_end = quote_start + 1 + end;
violations.push(((absolute_start, absolute_end), Vec::new()));
}
cursor = quote_end + 1;
}
}
}
fn next_quote_in_range(sql: &str, start: usize, end: usize) -> Option<(usize, char)> {
let mut index = start;
while index < end {
let ch = sql[index..].chars().next()?;
if ch == '\'' || ch == '"' {
return Some((index, ch));
}
index += ch.len_utf8();
}
None
}
fn find_closing_quote(sql: &str, start: usize, end: usize, quote: char) -> Option<usize> {
let mut index = start;
while index < end {
let ch = sql[index..].chars().next()?;
if ch == '\\' {
let next = index + ch.len_utf8();
if next < end {
let escaped = sql[next..].chars().next()?;
index = next + escaped.len_utf8();
continue;
}
}
if ch == quote {
return Some(index);
}
index += ch.len_utf8();
}
None
}
fn snowflake_pattern_token_indices(
tokens: &[TokenWithSpan],
non_trivia: &[usize],
) -> HashSet<usize> {
let mut out = HashSet::new();
let mut cursor = 0usize;
while cursor < non_trivia.len() {
let token_index = non_trivia[cursor];
let Token::Word(word) = &tokens[token_index].token else {
cursor += 1;
continue;
};
if !word.value.eq_ignore_ascii_case("PATTERN") {
cursor += 1;
continue;
}
let Some(paren_pos) = ((cursor + 1)..non_trivia.len())
.find(|idx| matches!(tokens[non_trivia[*idx]].token, Token::LParen))
else {
cursor += 1;
continue;
};
let mut depth = 0usize;
let mut end_pos = None;
for (pos, idx) in non_trivia.iter().copied().enumerate().skip(paren_pos) {
match tokens[idx].token {
Token::LParen => depth += 1,
Token::RParen => {
if depth == 0 {
break;
}
depth -= 1;
if depth == 0 {
end_pos = Some(pos);
break;
}
}
_ => {}
}
}
let Some(end_pos) = end_pos else {
cursor += 1;
continue;
};
for idx in non_trivia.iter().take(end_pos + 1).skip(paren_pos) {
out.insert(*idx);
}
cursor = end_pos + 1;
}
out
}
fn type_angle_token_indices(tokens: &[TokenWithSpan], non_trivia: &[usize]) -> HashSet<usize> {
let mut out = HashSet::new();
let mut stack = Vec::<usize>::new();
for (pos, token_idx) in non_trivia.iter().copied().enumerate() {
let token = &tokens[token_idx].token;
match token {
Token::Lt => {
let prev_idx = pos
.checked_sub(1)
.and_then(|value| non_trivia.get(value).copied());
if prev_idx.is_some_and(|idx| is_type_constructor(&tokens[idx].token)) {
out.insert(token_idx);
stack.push(token_idx);
}
}
Token::Gt if !stack.is_empty() => {
out.insert(token_idx);
stack.pop();
}
Token::ShiftRight if stack.len() >= 2 => {
out.insert(token_idx);
stack.pop();
stack.pop();
}
_ => {}
}
}
out
}
fn supports_type_angle_spacing(dialect: Dialect) -> bool {
matches!(
dialect,
Dialect::Bigquery | Dialect::Hive | Dialect::Databricks
)
}
fn is_type_constructor(token: &Token) -> bool {
let Token::Word(word) = token else {
return false;
};
word.value.eq_ignore_ascii_case("ARRAY")
|| word.value.eq_ignore_ascii_case("STRUCT")
|| word.value.eq_ignore_ascii_case("MAP")
}
fn is_type_angle_spacing_pair(
left: &TokenWithSpan,
right: &TokenWithSpan,
left_idx: usize,
right_idx: usize,
type_angle_tokens: &HashSet<usize>,
) -> bool {
let left_is_type_angle = type_angle_tokens.contains(&left_idx);
let right_is_type_angle = type_angle_tokens.contains(&right_idx);
if right_is_type_angle && matches!(right.token, Token::Lt | Token::Gt | Token::ShiftRight) {
return true;
}
if left_is_type_angle && matches!(left.token, Token::Lt) {
return true;
}
if left_is_type_angle
&& matches!(left.token, Token::Gt | Token::ShiftRight)
&& matches!(
right.token,
Token::Comma | Token::RParen | Token::RBracket | Token::LBracket | Token::Gt
)
{
return true;
}
false
}
fn tokenized(sql: &str, dialect: Dialect) -> Option<Vec<TokenWithSpan>> {
let dialect = dialect.to_sqlparser_dialect();
let mut tokenizer = Tokenizer::new(dialect.as_ref(), sql);
tokenizer.tokenize_with_location().ok()
}
fn tokenized_for_context(ctx: &LintContext) -> Option<Vec<TokenWithSpan>> {
let (statement_start_line, statement_start_column) =
offset_to_line_col(ctx.sql, ctx.statement_range.start)?;
ctx.with_document_tokens(|tokens| {
if tokens.is_empty() {
return None;
}
let mut out = Vec::new();
for token in tokens {
let Some((start, end)) = token_with_span_offsets(ctx.sql, token) else {
continue;
};
if start < ctx.statement_range.start || end > ctx.statement_range.end {
continue;
}
let Some(start_loc) = relative_location(
token.span.start,
statement_start_line,
statement_start_column,
) else {
continue;
};
let Some(end_loc) =
relative_location(token.span.end, statement_start_line, statement_start_column)
else {
continue;
};
out.push(TokenWithSpan::new(
token.token.clone(),
Span::new(start_loc, end_loc),
));
}
if out.is_empty() {
None
} else {
Some(out)
}
})
}
fn token_offsets(sql: &str, token: &TokenWithSpan) -> Option<(usize, usize)> {
let start = line_col_to_offset(
sql,
token.span.start.line as usize,
token.span.start.column as usize,
)?;
let end = line_col_to_offset(
sql,
token.span.end.line as usize,
token.span.end.column as usize,
)?;
Some((start, end))
}
fn next_non_trivia_index(tokens: &[TokenWithSpan], mut index: usize) -> Option<usize> {
while index < tokens.len() {
if !is_trivia_token(&tokens[index].token) {
return Some(index);
}
index += 1;
}
None
}
fn prev_non_trivia_index(tokens: &[TokenWithSpan], mut index: usize) -> Option<usize> {
while index > 0 {
index -= 1;
if !is_trivia_token(&tokens[index].token) {
return Some(index);
}
}
None
}
fn is_trivia_token(token: &Token) -> bool {
matches!(
token,
Token::Whitespace(Whitespace::Space | Whitespace::Newline | Whitespace::Tab)
| Token::Whitespace(Whitespace::SingleLineComment { .. })
| Token::Whitespace(Whitespace::MultiLineComment(_))
)
}
fn line_col_to_offset(sql: &str, line: usize, column: usize) -> Option<usize> {
if line == 0 || column == 0 {
return None;
}
let mut current_line = 1usize;
let mut current_col = 1usize;
for (offset, ch) in sql.char_indices() {
if current_line == line && current_col == column {
return Some(offset);
}
if ch == '\n' {
current_line += 1;
current_col = 1;
} else {
current_col += 1;
}
}
if current_line == line && current_col == column {
return Some(sql.len());
}
None
}
fn token_with_span_offsets(sql: &str, token: &TokenWithSpan) -> Option<(usize, usize)> {
let start = line_col_to_offset(
sql,
token.span.start.line as usize,
token.span.start.column as usize,
)?;
let end = line_col_to_offset(
sql,
token.span.end.line as usize,
token.span.end.column as usize,
)?;
Some((start, end))
}
fn offset_to_line_col(sql: &str, offset: usize) -> Option<(usize, usize)> {
if offset > sql.len() {
return None;
}
if offset == sql.len() {
let line = 1 + sql.as_bytes().iter().filter(|byte| **byte == b'\n').count();
let column = sql
.rsplit_once('\n')
.map_or(sql.chars().count() + 1, |(_, tail)| {
tail.chars().count() + 1
});
return Some((line, column));
}
let mut line = 1usize;
let mut column = 1usize;
for (index, ch) in sql.char_indices() {
if index == offset {
return Some((line, column));
}
if ch == '\n' {
line += 1;
column = 1;
} else {
column += 1;
}
}
Some((line, column))
}
fn relative_location(
location: Location,
statement_start_line: usize,
statement_start_column: usize,
) -> Option<Location> {
if location.line == 0 || location.column == 0 {
return None;
}
let line = location.line as usize;
let column = location.column as usize;
if line < statement_start_line {
return None;
}
let relative_line = line - statement_start_line + 1;
let relative_column = if line == statement_start_line {
if column < statement_start_column {
return None;
}
column - statement_start_column + 1
} else {
column
};
Some(Location::new(relative_line as u64, relative_column as u64))
}
#[cfg(test)]
mod tests {
use super::*;
use crate::linter::rule::with_active_dialect;
use crate::parser::parse_sql;
use crate::types::{Dialect, IssueAutofixApplicability};
fn run(sql: &str) -> Vec<Issue> {
run_with_dialect(sql, Dialect::Generic)
}
fn run_with_dialect(sql: &str, dialect: Dialect) -> Vec<Issue> {
let statements = parse_sql(sql).expect("parse");
let rule = LayoutSpacing::default();
with_active_dialect(dialect, || {
statements
.iter()
.enumerate()
.flat_map(|(index, statement)| {
rule.check(
statement,
&LintContext {
sql,
statement_range: 0..sql.len(),
statement_index: index,
},
)
})
.collect()
})
}
fn run_statementless_with_dialect(sql: &str, dialect: Dialect) -> Vec<Issue> {
run_statementless_with_rule(sql, dialect, LayoutSpacing::default())
}
fn run_statementless_with_rule(sql: &str, dialect: Dialect, rule: LayoutSpacing) -> Vec<Issue> {
let placeholder = parse_sql("SELECT 1").expect("parse placeholder");
with_active_dialect(dialect, || {
rule.check(
&placeholder[0],
&LintContext {
sql,
statement_range: 0..sql.len(),
statement_index: 0,
},
)
})
}
fn apply_all_issue_autofixes(sql: &str, issues: &[Issue]) -> String {
let mut out = sql.to_string();
let mut edits = issues
.iter()
.filter_map(|issue| issue.autofix.as_ref())
.flat_map(|autofix| autofix.edits.clone())
.collect::<Vec<_>>();
edits.sort_by_key(|edit| (edit.span.start, edit.span.end));
for edit in edits.into_iter().rev() {
out.replace_range(edit.span.start..edit.span.end, &edit.replacement);
}
out
}
#[test]
fn allows_bigquery_array_type_angle_brackets_without_spaces() {
let issues = run_with_dialect(
"SELECT ARRAY<FLOAT64>[1, 2, 3] AS floats;",
Dialect::Bigquery,
);
assert!(issues.is_empty());
}
#[test]
fn allows_create_table_with_qualified_name_before_column_list() {
let issues = run("CREATE TABLE db.schema_name.tbl_name (id INT)");
assert!(issues.is_empty());
}
#[test]
fn fixes_reference_target_column_list_spacing() {
let sql = "create table tab1 (b int references tab2(b))";
let issues = run_statementless_with_dialect(sql, Dialect::Ansi);
assert!(!issues.is_empty());
let fixed = apply_all_issue_autofixes(sql, &issues);
assert_eq!(fixed, "create table tab1 (b int references tab2 (b))");
}
#[test]
fn allows_bigquery_hyphenated_project_identifier() {
let issues = run_statementless_with_dialect(
"SELECT col_foo FROM foo-bar.foo.bar",
Dialect::Bigquery,
);
assert!(issues.is_empty());
}
#[test]
fn allows_bigquery_function_array_offset_access() {
let sql = "SELECT testFunction(a)[OFFSET(0)].* FROM table1";
let issues = run_statementless_with_dialect(sql, Dialect::Bigquery);
assert!(issues.is_empty());
}
#[test]
fn allows_hive_struct_and_array_datatype_angles() {
let sql = "select col1::STRUCT<foo: int>, col2::ARRAY<int> from t";
let issues = run_statementless_with_dialect(sql, Dialect::Hive);
assert!(issues.is_empty());
}
#[test]
fn allows_sparksql_file_literal_path() {
let sql = "ADD JAR path/to/some.jar;";
let issues = run_statementless_with_dialect(sql, Dialect::Databricks);
assert!(issues.is_empty());
}
#[test]
fn allows_clickhouse_system_model_path() {
let sql = "SYSTEM RELOAD MODEL /model/path;";
let issues = run_statementless_with_dialect(sql, Dialect::Clickhouse);
assert!(issues.is_empty(), "unexpected issues: {issues:?}");
}
#[test]
fn detects_alias_alignment_when_configured() {
let sql = "SELECT\n\tcol1 AS a,\n\tlonger_col AS b\nFROM t";
let issues = run_statementless_with_rule(
sql,
Dialect::Ansi,
LayoutSpacing {
align_alias_expression: true,
tab_space_size: 4,
..LayoutSpacing::default()
},
);
assert!(!issues.is_empty());
}
#[test]
fn detects_alias_alignment_with_tabs_when_columns_are_equal_width() {
let sql = "SELECT\n\tcol1 AS alias1,\n\tcol2 AS alias2\nFROM table1";
let issues = run_statementless_with_rule(
sql,
Dialect::Ansi,
LayoutSpacing {
align_alias_expression: true,
align_with_tabs: true,
tab_space_size: 4,
..LayoutSpacing::default()
},
);
assert!(
!issues.is_empty(),
"tab indentation alignment should flag spaces before AS"
);
}
#[test]
fn detects_create_table_datatype_alignment_when_configured() {
let sql = "CREATE TABLE tbl (\n foo VARCHAR(25) NOT NULL,\n barbar INT NULL\n)";
let issues = run_statementless_with_rule(
sql,
Dialect::Ansi,
LayoutSpacing {
align_data_type: true,
..LayoutSpacing::default()
},
);
assert!(!issues.is_empty());
}
#[test]
fn does_not_flag_create_table_alignment_when_columns_are_already_aligned() {
let sql = "CREATE TABLE foo (\n x INT NOT NULL PRIMARY KEY,\n y INT NULL,\n z INT NULL\n);";
let issues = run_statementless_with_rule(
sql,
Dialect::Ansi,
LayoutSpacing {
align_data_type: true,
align_column_constraint: true,
..LayoutSpacing::default()
},
);
assert!(
issues.is_empty(),
"expected no LT01 alignment issues: {issues:?}"
);
}
#[test]
fn statementless_fixes_comment_on_function_spacing() {
let sql = "COMMENT ON FUNCTION x (foo) IS 'y';";
let issues = run_statementless_with_dialect(sql, Dialect::Postgres);
assert!(!issues.is_empty());
let fixed = apply_all_issue_autofixes(sql, &issues);
assert_eq!(fixed, "COMMENT ON FUNCTION x(foo) IS 'y';");
}
#[test]
fn statementless_fixes_split_tsql_comparison_operator() {
let sql = "SELECT col1 FROM table1 WHERE 1 > = 1";
let issues = run_statementless_with_dialect(sql, Dialect::Mssql);
assert!(!issues.is_empty());
let fixed = apply_all_issue_autofixes(sql, &issues);
assert_eq!(fixed, "SELECT col1 FROM table1 WHERE 1 >= 1");
}
#[test]
fn statementless_fixes_tsql_compound_assignment_operator() {
let sql = "SET @param1+=1";
let issues = run_statementless_with_dialect(sql, Dialect::Mssql);
assert!(!issues.is_empty());
let fixed = apply_all_issue_autofixes(sql, &issues);
assert_eq!(fixed, "SET @param1 += 1");
}
#[test]
fn allows_sparksql_multi_unit_interval_minus() {
let sql = "SELECT INTERVAL -2 HOUR '3' MINUTE AS col;";
let issues = run_statementless_with_dialect(sql, Dialect::Databricks);
assert!(issues.is_empty());
}
#[test]
fn ignore_templated_areas_skips_template_artifacts() {
let sql = "{{ 'SELECT 1, 4' }}, 5, 6";
let issues = run_statementless_with_rule(
sql,
Dialect::Generic,
LayoutSpacing {
ignore_templated_areas: true,
..LayoutSpacing::default()
},
);
assert!(issues.is_empty(), "template-only spacing should be ignored");
}
#[test]
fn ignore_templated_areas_still_fixes_non_template_region() {
let sql = "{{ 'SELECT 1, 4' }}, 5 , 6";
let issues = run_statementless_with_rule(
sql,
Dialect::Generic,
LayoutSpacing {
ignore_templated_areas: true,
..LayoutSpacing::default()
},
);
assert!(!issues.is_empty());
let fixed = apply_all_issue_autofixes(sql, &issues);
assert_eq!(fixed, "{{ 'SELECT 1, 4' }}, 5, 6");
}
#[test]
fn templated_string_content_is_checked_when_not_ignored() {
let sql = "{{ 'SELECT 1 ,4' }}";
let issues = run_statementless_with_rule(
sql,
Dialect::Generic,
LayoutSpacing {
ignore_templated_areas: false,
..LayoutSpacing::default()
},
);
assert!(!issues.is_empty());
assert!(
issues.iter().all(|issue| issue.autofix.is_none()),
"template-internal checks are detection-only"
);
}
#[test]
fn templated_string_content_passes_when_clean() {
let sql = "{{ 'SELECT 1, 4' }}";
let issues = run_statementless_with_rule(
sql,
Dialect::Generic,
LayoutSpacing {
ignore_templated_areas: false,
..LayoutSpacing::default()
},
);
assert!(issues.is_empty());
}
#[test]
fn allows_snowflake_match_recognize_pattern_spacing() {
let sql = "select * from stock_price_history\n match_recognize (\n pattern ((A | B){5} C+)\n )";
let issues = run_statementless_with_dialect(sql, Dialect::Snowflake);
assert!(issues.is_empty(), "snowflake pattern syntax should pass");
}
#[test]
fn fixes_snowflake_match_condition_newline_before_paren() {
let sql = "select\n table1.pk1\nfrom table1\n asof join\n table2\n match_condition\n (t1 > t2)";
let issues = run_with_dialect(sql, Dialect::Snowflake);
assert!(!issues.is_empty());
let fixed = apply_all_issue_autofixes(sql, &issues);
assert!(
fixed.contains("match_condition(t1 > t2)"),
"expected inline match_condition: {fixed}"
);
}
#[test]
fn fixes_snowflake_copy_into_target_column_list_spacing() {
let sql = "copy into DB.SCHEMA.ProblemHere(col1)\nfrom @my_stage/file";
let issues = run_statementless_with_dialect(sql, Dialect::Snowflake);
assert!(!issues.is_empty());
let fixed = apply_all_issue_autofixes(sql, &issues);
assert!(
fixed.contains("DB.SCHEMA.ProblemHere (col1)"),
"fixed: {fixed}"
);
}
#[test]
fn fixes_snowflake_copy_into_target_column_list_spacing_with_placeholder_prefix() {
let sql = "copy into ${env}_ENT_LANDING.SCHEMA_NAME.ProblemHere(col1)\nfrom @my_stage/file";
let issues = run_statementless_with_dialect(sql, Dialect::Snowflake);
assert!(!issues.is_empty());
let fixed = apply_all_issue_autofixes(sql, &issues);
assert!(
fixed.contains(".SCHEMA_NAME.ProblemHere (col1)"),
"fixed: {fixed}"
);
}
#[test]
fn allows_snowflake_stage_path_without_spacing_around_slash() {
let sql = "copy into t from @my_stage/file";
let issues = run_statementless_with_dialect(sql, Dialect::Snowflake);
assert!(
issues.is_empty(),
"snowflake stage path should not force spaces around slash: {issues:?}"
);
}
#[test]
fn flags_trailing_whitespace() {
let sql = "SELECT 1 \n";
let issues = run(sql);
assert!(!issues.is_empty(), "should flag trailing whitespace");
let fixed = apply_all_issue_autofixes(sql, &issues);
assert_eq!(fixed, "SELECT 1\n");
}
#[test]
fn flags_trailing_whitespace_on_initial_blank_line() {
let sql = " \nSELECT 1 \n";
let issues = run(sql);
assert!(!issues.is_empty());
let fixed = apply_all_issue_autofixes(sql, &issues);
assert_eq!(fixed, "\nSELECT 1\n");
}
#[test]
fn flags_compact_operator() {
let sql = "SELECT 1+2";
let issues = run(sql);
assert!(!issues.is_empty(), "should flag compact 1+2");
let fixed = apply_all_issue_autofixes(sql, &issues);
assert_eq!(fixed, "SELECT 1 + 2");
}
#[test]
fn flags_compact_operator_expression() {
let sql = "select\n field,\n date(field_1)-date(field_2) as diff\nfrom tbl";
let issues = run(sql);
assert!(!issues.is_empty());
let fixed = apply_all_issue_autofixes(sql, &issues);
assert!(
fixed.contains("date(field_1) - date(field_2)"),
"should fix operator spacing: {fixed}"
);
}
#[test]
fn flags_plus_between_identifier_and_literal() {
let sql = "SELECT a +'b'+ 'c' FROM tbl";
let issues = run(sql);
assert!(
!issues.is_empty(),
"should flag operator spacing around string literals"
);
let fixed = apply_all_issue_autofixes(sql, &issues);
assert_eq!(fixed, "SELECT a + 'b' + 'c' FROM tbl");
}
#[test]
fn does_not_flag_simple_spacing() {
assert!(run("SELECT * FROM t WHERE a = 1").is_empty());
}
#[test]
fn does_not_flag_sign_indicators() {
let issues = run("SELECT 1, +2, -4");
assert!(
issues.is_empty(),
"unary signs should not be flagged: {issues:?}"
);
}
#[test]
fn does_not_flag_newline_operator() {
assert!(run("SELECT 1\n+ 2").is_empty());
assert!(run("SELECT 1\n + 2").is_empty());
}
#[test]
fn flags_space_before_comma() {
let sql = "SELECT 1 ,4";
let issues = run(sql);
assert!(!issues.is_empty(), "should flag space before comma");
let fixed = apply_all_issue_autofixes(sql, &issues);
assert_eq!(fixed, "SELECT 1, 4");
}
#[test]
fn flags_no_space_after_comma() {
let sql = "SELECT 1,4";
let issues = run(sql);
assert!(!issues.is_empty(), "should flag missing space after comma");
let fixed = apply_all_issue_autofixes(sql, &issues);
assert_eq!(fixed, "SELECT 1, 4");
}
#[test]
fn flags_excessive_space_after_comma() {
let sql = "SELECT 1, 4";
let issues = run(sql);
assert!(
!issues.is_empty(),
"should flag excessive space after comma"
);
let fixed = apply_all_issue_autofixes(sql, &issues);
assert_eq!(fixed, "SELECT 1, 4");
}
#[test]
fn flags_missing_space_before_paren_after_keyword() {
let sql = "SELECT * FROM(SELECT 1 AS C1)AS T1;";
let issues = run(sql);
assert!(!issues.is_empty(), "should flag FROM( and )AS: {issues:?}");
let fixed = apply_all_issue_autofixes(sql, &issues);
assert_eq!(fixed, "SELECT * FROM (SELECT 1 AS C1) AS T1;");
}
#[test]
fn flags_cte_missing_space_after_as() {
let sql = "WITH a AS(select 1) select * from a";
let issues = run(sql);
assert!(!issues.is_empty(), "should flag AS(");
let fixed = apply_all_issue_autofixes(sql, &issues);
assert_eq!(fixed, "WITH a AS (select 1) select * from a");
}
#[test]
fn flags_cte_multiple_spaces_after_as() {
let sql = "WITH a AS (select 1) select * from a";
let issues = run(sql);
assert!(!issues.is_empty(), "should flag AS (");
let fixed = apply_all_issue_autofixes(sql, &issues);
assert_eq!(fixed, "WITH a AS (select 1) select * from a");
}
#[test]
fn flags_missing_space_after_using() {
let sql = "select * from a JOIN b USING(x)";
let issues = run(sql);
assert!(!issues.is_empty(), "should flag USING(");
let fixed = apply_all_issue_autofixes(sql, &issues);
assert_eq!(fixed, "select * from a JOIN b USING (x)");
}
#[test]
fn flags_excessive_whitespace() {
let sql = "SELECT 1";
let issues = run(sql);
assert!(!issues.is_empty(), "should flag excessive whitespace");
let fixed = apply_all_issue_autofixes(sql, &issues);
assert_eq!(fixed, "SELECT 1");
}
#[test]
fn flags_excessive_whitespace_multi() {
let sql = "select\n 1 + 2 + 3 + 4 -- Comment\nfrom foo";
let issues = run(sql);
assert!(!issues.is_empty());
let fixed = apply_all_issue_autofixes(sql, &issues);
assert_eq!(
fixed,
"select\n 1 + 2 + 3 + 4 -- Comment\nfrom foo"
);
}
#[test]
fn flags_literal_operator_spacing() {
let sql = "SELECT ('foo'||'bar') as buzz";
let issues = run(sql);
assert!(
!issues.is_empty(),
"should flag compact || operator: {issues:?}"
);
let fixed = apply_all_issue_autofixes(sql, &issues);
assert_eq!(fixed, "SELECT ('foo' || 'bar') as buzz");
}
#[test]
fn flags_literal_as_spacing() {
let sql = "SELECT\n 'foo'AS bar\nFROM foo";
let issues = run(sql);
assert!(!issues.is_empty());
let fixed = apply_all_issue_autofixes(sql, &issues);
assert_eq!(fixed, "SELECT\n 'foo' AS bar\nFROM foo");
}
#[test]
fn flags_ansi_national_string_literal_spacing() {
let sql = "SELECT a + N'b' + N'c' FROM tbl;";
let issues = run_with_dialect(sql, Dialect::Ansi);
assert!(!issues.is_empty());
let fixed = apply_all_issue_autofixes(sql, &issues);
assert_eq!(fixed, "SELECT a + N 'b' + N 'c' FROM tbl;");
}
#[test]
fn does_not_flag_function_call() {
assert!(run("SELECT foo(5) FROM T1;").is_empty());
assert!(run("SELECT COUNT(*) FROM tbl\n\n").is_empty());
}
#[test]
fn flags_spaced_cast_operator() {
let sql = "SELECT '1' :: INT;";
let issues = run(sql);
assert!(!issues.is_empty(), "should flag space around ::");
let fixed = apply_all_issue_autofixes(sql, &issues);
assert_eq!(fixed, "SELECT '1'::INT;");
}
#[test]
fn flags_compact_json_arrow_operator() {
let sql = "SELECT payload->>'id' FROM t";
let issues = run(sql);
assert!(
issues.len() >= 2,
"should flag 2+ violations for compact json-arrow"
);
assert!(
issues
.iter()
.all(|issue| issue.autofix.as_ref().is_some_and(
|autofix| autofix.applicability == IssueAutofixApplicability::Safe
)),
"expected safe autofix metadata"
);
let fixed = apply_all_issue_autofixes(sql, &issues);
assert_eq!(fixed, "SELECT payload ->> 'id' FROM t");
}
#[test]
fn does_not_flag_exists_without_space_before_parenthesis() {
let no_space = "SELECT\n EXISTS(\n SELECT 1\n ) AS has_row\nFROM t";
assert!(run(no_space).is_empty());
}
#[test]
fn flags_space_before_exists_parenthesis_in_select_list() {
let sql = "SELECT 1,\n EXISTS (\n SELECT 1\n ) AS has_row\nFROM t";
let issues = run(sql);
assert!(
!issues.is_empty(),
"expected EXISTS-space violation in select list"
);
let fixed = apply_all_issue_autofixes(sql, &issues);
assert!(
fixed.contains("EXISTS(\n"),
"expected EXISTS( after fix, got: {fixed}"
);
}
#[test]
fn requires_space_before_exists_parenthesis_after_where() {
let sql = "SELECT 1\nWHERE EXISTS(\n SELECT 1\n)";
let issues = run(sql);
assert!(
!issues.is_empty(),
"expected missing-space violation for WHERE EXISTS("
);
let fixed = apply_all_issue_autofixes(sql, &issues);
assert!(
fixed.contains("WHERE EXISTS (\n"),
"expected WHERE EXISTS ( after fix, got: {fixed}"
);
}
#[test]
fn merge_violations_prefers_fixable_duplicate_span() {
let mut violations = vec![
((10, 10), Vec::new()),
((10, 10), vec![(10, 10, " ".to_string())]),
];
merge_violations_by_span(&mut violations);
assert_eq!(violations.len(), 1);
assert_eq!(violations[0].0, (10, 10));
assert_eq!(violations[0].1, vec![(10, 10, " ".to_string())]);
}
#[test]
fn does_not_flag_spacing_patterns_inside_literals_or_comments() {
let issues = run("SELECT 'payload->>''id''' AS txt -- EXISTS (\nFROM t");
assert!(
issues.is_empty(),
"should not flag content inside literals/comments: {issues:?}"
);
}
#[test]
fn does_not_flag_correct_comma_spacing() {
assert!(run("SELECT 1, 4").is_empty());
}
#[test]
fn does_not_flag_correct_cast() {
assert!(run("SELECT '1'::INT;").is_empty());
}
#[test]
fn does_not_flag_qualified_identifiers() {
assert!(run("SELECT a.b FROM c.d").is_empty());
}
#[test]
fn does_not_flag_newline_after_using() {
assert!(
run("select * from a JOIN b USING\n(x)").is_empty(),
"newline between USING and ( should be acceptable"
);
}
#[test]
fn flags_cte_newline_after_as() {
let sql = "WITH a AS\n(\n select 1\n)\nselect * from a";
let issues = run(sql);
assert!(!issues.is_empty(), "should flag AS + newline + (");
let fixed = apply_all_issue_autofixes(sql, &issues);
assert_eq!(fixed, "WITH a AS (\n select 1\n)\nselect * from a");
}
#[test]
fn flags_cte_newline_and_spaces_after_as() {
let sql = "WITH a AS\n\n\n (\n select 1\n)\nselect * from a";
let issues = run(sql);
assert!(!issues.is_empty());
let fixed = apply_all_issue_autofixes(sql, &issues);
assert_eq!(fixed, "WITH a AS (\n select 1\n)\nselect * from a");
}
#[test]
fn does_not_flag_comment_after_as() {
assert!(
run("WITH\na AS -- comment\n(\nselect 1\n)\nselect * from a").is_empty(),
"comment between AS and ( should be acceptable"
);
}
#[test]
fn insert_into_table_paren_allows_space() {
let issues = run("INSERT INTO metrics.cold_start_daily (\n workspace_id\n) SELECT 1");
let lt01 = issues
.iter()
.filter(|i| i.code == "LT01")
.collect::<Vec<_>>();
assert!(
lt01.is_empty(),
"INSERT INTO table ( should not flag LT01, got: {lt01:?}"
);
}
#[test]
fn insert_into_table_paren_with_cte() {
let sql = "WITH starts AS (\n SELECT 1\n)\nINSERT INTO metrics.cold_start_daily (\n workspace_id\n) SELECT workspace_id FROM starts";
let issues = run_with_dialect(sql, Dialect::Postgres);
let lt01 = issues
.iter()
.filter(|i| i.code == "LT01")
.collect::<Vec<_>>();
assert!(
lt01.is_empty(),
"INSERT INTO table ( with CTE should not flag LT01, got: {lt01:?}"
);
}
#[test]
fn insert_into_table_paren_on_conflict() {
let sql = "\
WITH cte AS (
SELECT workspace_id
FROM ledger.query_history
WHERE start_time >= $1
)
INSERT INTO metrics.cold_start_daily (
workspace_id
)
SELECT workspace_id
FROM cte
ON CONFLICT (workspace_id) DO UPDATE
SET workspace_id = excluded.workspace_id";
let issues = run_statementless_with_dialect(sql, Dialect::Postgres);
let lt01 = issues
.iter()
.filter(|i| i.code == "LT01")
.collect::<Vec<_>>();
assert!(
lt01.is_empty(),
"INSERT INTO table ( with ON CONFLICT should not flag LT01, got: {lt01:?}"
);
}
}