flowscope_core/linter/rules/
lt_001.rs

1//! LINT_LT_001: Layout spacing.
2//!
3//! SQLFluff LT01 parity: comprehensive spacing checks covering operators,
4//! commas, brackets, keywords, literals, trailing whitespace, excessive
5//! whitespace, and cast operators.
6
7use crate::linter::config::LintConfig;
8use crate::linter::rule::{LintContext, LintRule};
9use crate::types::{issue_codes, Dialect, Issue, IssueAutofixApplicability, IssuePatchEdit};
10use sqlparser::ast::Statement;
11use sqlparser::keywords::Keyword;
12use sqlparser::tokenizer::{Location, Span, Token, TokenWithSpan, Tokenizer, Whitespace};
13use std::collections::HashSet;
14
15pub struct LayoutSpacing {
16    ignore_templated_areas: bool,
17    align_alias_expression: bool,
18    align_data_type: bool,
19    align_column_constraint: bool,
20    align_with_tabs: bool,
21    tab_space_size: usize,
22}
23
24impl LayoutSpacing {
25    pub fn from_config(config: &LintConfig) -> Self {
26        let spacing_before_align = |type_name: &str| {
27            config
28                .config_section_object("layout.keyword_newline")
29                .and_then(|layout| layout.get(type_name))
30                .and_then(serde_json::Value::as_object)
31                .and_then(|entry| entry.get("spacing_before"))
32                .and_then(serde_json::Value::as_str)
33                .is_some_and(|value| value.to_ascii_lowercase().starts_with("align"))
34        };
35
36        Self {
37            ignore_templated_areas: config
38                .core_option_bool("ignore_templated_areas")
39                .unwrap_or(true),
40            align_alias_expression: spacing_before_align("alias_expression"),
41            align_data_type: spacing_before_align("data_type"),
42            align_column_constraint: spacing_before_align("column_constraint_segment"),
43            align_with_tabs: config
44                .section_option_str("indentation", "indent_unit")
45                .or_else(|| config.section_option_str("rules", "indent_unit"))
46                .is_some_and(|value| value.eq_ignore_ascii_case("tab")),
47            tab_space_size: config
48                .section_option_usize("indentation", "tab_space_size")
49                .or_else(|| config.section_option_usize("rules", "tab_space_size"))
50                .unwrap_or(4)
51                .max(1),
52        }
53    }
54
55    fn alignment_options(&self) -> Lt01AlignmentOptions {
56        Lt01AlignmentOptions {
57            alias_expression: self.align_alias_expression,
58            data_type: self.align_data_type,
59            column_constraint: self.align_column_constraint,
60            align_with_tabs: self.align_with_tabs,
61            tab_space_size: self.tab_space_size,
62        }
63    }
64}
65
66impl Default for LayoutSpacing {
67    fn default() -> Self {
68        Self {
69            ignore_templated_areas: true,
70            align_alias_expression: false,
71            align_data_type: false,
72            align_column_constraint: false,
73            align_with_tabs: false,
74            tab_space_size: 4,
75        }
76    }
77}
78
79impl LintRule for LayoutSpacing {
80    fn code(&self) -> &'static str {
81        issue_codes::LINT_LT_001
82    }
83
84    fn name(&self) -> &'static str {
85        "Layout spacing"
86    }
87
88    fn description(&self) -> &'static str {
89        "Inappropriate Spacing."
90    }
91
92    fn check(&self, _statement: &Statement, ctx: &LintContext) -> Vec<Issue> {
93        let mut violations =
94            spacing_violations(ctx, self.ignore_templated_areas, self.alignment_options());
95        let has_remaining_non_whitespace = ctx.sql[ctx.statement_range.end..]
96            .chars()
97            .any(|ch| !ch.is_whitespace());
98        let parser_fragment_fallback = ctx.statement_index == 0
99            && ctx.statement_range.start == 0
100            && ctx.statement_range.end < ctx.sql.len()
101            && has_remaining_non_whitespace
102            && !ctx.statement_sql().trim_end().ends_with(';');
103        let template_fragment_fallback = ctx.statement_index == 0
104            && contains_template_marker(ctx.sql)
105            && (ctx.statement_range.start > 0 || ctx.statement_range.end < ctx.sql.len());
106        if parser_fragment_fallback || template_fragment_fallback {
107            let full_ctx = LintContext {
108                sql: ctx.sql,
109                statement_range: 0..ctx.sql.len(),
110                statement_index: 0,
111            };
112            violations.extend(spacing_violations(
113                &full_ctx,
114                self.ignore_templated_areas,
115                self.alignment_options(),
116            ));
117            merge_violations_by_span(&mut violations);
118        }
119
120        violations
121            .into_iter()
122            .map(|((start, end), edits)| {
123                let mut issue =
124                    Issue::info(issue_codes::LINT_LT_001, "Inappropriate spacing found.")
125                        .with_statement(ctx.statement_index)
126                        .with_span(ctx.span_from_statement_offset(start, end));
127                if !edits.is_empty() {
128                    let edits = edits
129                        .into_iter()
130                        .map(|(edit_start, edit_end, replacement)| {
131                            IssuePatchEdit::new(
132                                ctx.span_from_statement_offset(edit_start, edit_end),
133                                replacement.to_string(),
134                            )
135                        })
136                        .collect();
137                    issue = issue.with_autofix_edits(IssueAutofixApplicability::Safe, edits);
138                }
139                issue
140            })
141            .collect()
142    }
143}
144
145type Lt01Span = (usize, usize);
146type Lt01AutofixEdit = (usize, usize, String);
147type Lt01Violation = (Lt01Span, Vec<Lt01AutofixEdit>);
148type Lt01TemplateSpan = (usize, usize);
149
150fn merge_violations_by_span(violations: &mut Vec<Lt01Violation>) {
151    violations.sort_unstable_by_key(|(span, _)| *span);
152    let mut merged: Vec<Lt01Violation> = Vec::with_capacity(violations.len());
153
154    for (span, edits) in violations.drain(..) {
155        if let Some((last_span, last_edits)) = merged.last_mut() {
156            if *last_span == span {
157                if last_edits.is_empty() && !edits.is_empty() {
158                    *last_edits = edits;
159                } else if !last_edits.is_empty() && !edits.is_empty() {
160                    for edit in edits {
161                        if !last_edits.contains(&edit) {
162                            last_edits.push(edit);
163                        }
164                    }
165                }
166                continue;
167            }
168        }
169
170        merged.push((span, edits));
171    }
172
173    *violations = merged;
174}
175
176#[derive(Clone, Copy)]
177struct Lt01AlignmentOptions {
178    alias_expression: bool,
179    data_type: bool,
180    column_constraint: bool,
181    align_with_tabs: bool,
182    tab_space_size: usize,
183}
184
185fn spacing_violations(
186    ctx: &LintContext,
187    ignore_templated_areas: bool,
188    alignment: Lt01AlignmentOptions,
189) -> Vec<Lt01Violation> {
190    let sql = ctx.statement_sql();
191    let mut violations = Vec::new();
192    let templated_spans = template_spans(sql);
193    let prefer_raw_template_tokens = ctx.is_templated() && contains_template_marker(sql);
194    let tokens = if prefer_raw_template_tokens {
195        tokenized(sql, ctx.dialect()).or_else(|| tokenized_for_context(ctx))
196    } else {
197        tokenized_for_context(ctx).or_else(|| tokenized(sql, ctx.dialect()))
198    };
199    let Some(tokens) = tokens else {
200        return violations;
201    };
202
203    let dialect = ctx.dialect();
204
205    collect_trailing_whitespace_violations(sql, &mut violations);
206    collect_pair_spacing_violations(sql, &tokens, dialect, &templated_spans, &mut violations);
207    collect_ansi_national_string_literal_violations(
208        sql,
209        &tokens,
210        dialect,
211        &templated_spans,
212        &mut violations,
213    );
214    if !ignore_templated_areas {
215        collect_template_string_spacing_violations(sql, dialect, &templated_spans, &mut violations);
216    }
217    collect_alignment_detection_violations(sql, alignment, &mut violations);
218
219    violations.sort_unstable_by_key(|(span, _)| *span);
220    violations.dedup_by_key(|(span, _)| *span);
221
222    violations
223}
224
225// ---------------------------------------------------------------------------
226// Trailing whitespace
227// ---------------------------------------------------------------------------
228
229fn collect_trailing_whitespace_violations(sql: &str, violations: &mut Vec<Lt01Violation>) {
230    let mut offset = 0;
231    for line in sql.split('\n') {
232        let trimmed = line.trim_end_matches([' ', '\t']);
233        let trailing_start = offset + trimmed.len();
234        let trailing_end = offset + line.len();
235        if trailing_end > trailing_start {
236            let span = (trailing_start, trailing_end);
237            let edit = (trailing_start, trailing_end, String::new());
238            violations.push((span, vec![edit]));
239        }
240        offset += line.len() + 1; // +1 for the \n
241    }
242}
243
244fn collect_alignment_detection_violations(
245    sql: &str,
246    alignment: Lt01AlignmentOptions,
247    violations: &mut Vec<Lt01Violation>,
248) {
249    if alignment.alias_expression {
250        collect_alias_alignment_detection(
251            sql,
252            alignment.tab_space_size,
253            alignment.align_with_tabs,
254            violations,
255        );
256    }
257    if alignment.data_type || alignment.column_constraint {
258        collect_create_table_alignment_detection(sql, alignment.tab_space_size, violations);
259    }
260}
261
262#[derive(Clone, Copy)]
263struct AliasAlignmentEntry {
264    as_start: usize,
265    visual_col: usize,
266    separator_uses_tabs: bool,
267}
268
269fn collect_alias_alignment_detection(
270    sql: &str,
271    tab_space_size: usize,
272    align_with_tabs: bool,
273    violations: &mut Vec<Lt01Violation>,
274) {
275    let lines: Vec<&str> = sql.split('\n').collect();
276    if lines.len() < 2 {
277        return;
278    }
279
280    let mut offset = 0usize;
281    let mut current_group: Vec<AliasAlignmentEntry> = Vec::new();
282
283    for line in &lines {
284        let lower = line.to_ascii_lowercase();
285        let alias_pos = lower.find(" as ");
286        let is_alias_line = alias_pos.is_some() && !lower.trim_start().starts_with("from ");
287
288        if is_alias_line {
289            let as_index = alias_pos.unwrap_or_default() + 1;
290            current_group.push(AliasAlignmentEntry {
291                as_start: offset + as_index,
292                visual_col: visual_width(&line[..as_index], tab_space_size),
293                separator_uses_tabs: alias_separator_uses_tabs(line, as_index),
294            });
295        } else if !current_group.is_empty() {
296            emit_alias_alignment_group(&current_group, align_with_tabs, violations);
297            current_group.clear();
298        }
299
300        offset += line.len() + 1;
301    }
302
303    if !current_group.is_empty() {
304        emit_alias_alignment_group(&current_group, align_with_tabs, violations);
305    }
306}
307
308fn alias_separator_uses_tabs(line: &str, as_index: usize) -> bool {
309    let prefix = &line[..as_index];
310    let separator_start = prefix
311        .char_indices()
312        .rev()
313        .find(|(_, ch)| !ch.is_whitespace())
314        .map(|(idx, ch)| idx + ch.len_utf8())
315        .unwrap_or(0);
316    let separator = &prefix[separator_start..];
317    !separator.is_empty() && separator.chars().all(|ch| ch == '\t')
318}
319
320fn emit_alias_alignment_group(
321    group: &[AliasAlignmentEntry],
322    align_with_tabs: bool,
323    violations: &mut Vec<Lt01Violation>,
324) {
325    if group.len() < 2 {
326        return;
327    }
328    let target_col = group
329        .iter()
330        .map(|entry| entry.visual_col)
331        .max()
332        .unwrap_or(0);
333    for entry in group {
334        if entry.visual_col != target_col || (align_with_tabs && !entry.separator_uses_tabs) {
335            let end = entry.as_start + 2;
336            violations.push(((entry.as_start, end), Vec::new()));
337        }
338    }
339}
340
341fn collect_create_table_alignment_detection(
342    sql: &str,
343    tab_space_size: usize,
344    violations: &mut Vec<Lt01Violation>,
345) {
346    let lines: Vec<&str> = sql.split('\n').collect();
347    let mut offset = 0usize;
348    let mut in_create_table = false;
349    let mut entries: Vec<(usize, usize)> = Vec::new();
350
351    for line in &lines {
352        let trimmed = line.trim_start();
353        let upper = trimmed.to_ascii_uppercase();
354        if !in_create_table && upper.starts_with("CREATE TABLE") {
355            in_create_table = true;
356        } else if in_create_table && (trimmed.starts_with(')') || trimmed.starts_with(';')) {
357            emit_create_table_alignment_group(&entries, violations);
358            entries.clear();
359            in_create_table = false;
360        }
361
362        if in_create_table
363            && !trimmed.is_empty()
364            && !trimmed.starts_with('(')
365            && !trimmed.starts_with(')')
366            && !trimmed.starts_with("--")
367            && !upper.starts_with("CREATE TABLE")
368        {
369            if let Some(data_type_start) = second_token_start(trimmed) {
370                let prefix_len = line.len() - trimmed.len();
371                let absolute = offset + prefix_len + data_type_start;
372                let visual = visual_width(&trimmed[..data_type_start], tab_space_size);
373                entries.push((absolute, visual));
374            }
375        }
376
377        offset += line.len() + 1;
378    }
379
380    if in_create_table && !entries.is_empty() {
381        emit_create_table_alignment_group(&entries, violations);
382    }
383}
384
385fn emit_create_table_alignment_group(
386    group: &[(usize, usize)],
387    violations: &mut Vec<Lt01Violation>,
388) {
389    if group.len() < 2 {
390        return;
391    }
392    let target_col = group.iter().map(|(_, col)| *col).max().unwrap_or(0);
393    for (start, col) in group {
394        if *col != target_col {
395            let end = *start + 1;
396            violations.push(((*start, end), Vec::new()));
397        }
398    }
399}
400
401fn second_token_start(line: &str) -> Option<usize> {
402    let mut seen_first = false;
403    let mut in_token = false;
404
405    for (index, ch) in line.char_indices() {
406        if ch.is_whitespace() {
407            if in_token {
408                in_token = false;
409                seen_first = true;
410            }
411            continue;
412        }
413
414        if seen_first && !in_token {
415            return Some(index);
416        }
417        in_token = true;
418    }
419    None
420}
421
422fn visual_width(text: &str, tab_space_size: usize) -> usize {
423    let mut width = 0usize;
424    for ch in text.chars() {
425        if ch == '\t' {
426            let next_tab = ((width / tab_space_size) + 1) * tab_space_size;
427            width = next_tab;
428        } else {
429            width += 1;
430        }
431    }
432    width
433}
434
435// ---------------------------------------------------------------------------
436// Pair-based spacing: walk consecutive non-trivia token pairs
437// ---------------------------------------------------------------------------
438
439/// Expected spacing between two adjacent non-trivia tokens.
440#[derive(Debug, Clone, Copy, PartialEq)]
441enum ExpectedSpacing {
442    /// Exactly one space required (or newline acceptable).
443    Single,
444    /// No space allowed (tokens must be adjacent).
445    None,
446    /// No space allowed, including across newlines.
447    NoneInline,
448    /// Do not check this pair (e.g. start/end of statement).
449    Skip,
450    /// Single space required, and if there's a newline between, replace with single space.
451    SingleInline,
452}
453
454fn collect_pair_spacing_violations(
455    sql: &str,
456    tokens: &[TokenWithSpan],
457    dialect: Dialect,
458    templated_spans: &[Lt01TemplateSpan],
459    violations: &mut Vec<Lt01Violation>,
460) {
461    let non_trivia: Vec<usize> = tokens
462        .iter()
463        .enumerate()
464        .filter(|(_, t)| !is_trivia_token(&t.token) && !matches!(t.token, Token::EOF))
465        .map(|(i, _)| i)
466        .collect();
467    let type_angle_tokens = if supports_type_angle_spacing(dialect) {
468        type_angle_token_indices(tokens, &non_trivia)
469    } else {
470        HashSet::new()
471    };
472    let snowflake_pattern_tokens = if dialect == Dialect::Snowflake {
473        snowflake_pattern_token_indices(tokens, &non_trivia)
474    } else {
475        HashSet::new()
476    };
477
478    for window in non_trivia.windows(2) {
479        let left_idx = window[0];
480        let right_idx = window[1];
481        if dialect == Dialect::Snowflake
482            && (snowflake_pattern_tokens.contains(&left_idx)
483                || snowflake_pattern_tokens.contains(&right_idx))
484        {
485            continue;
486        }
487        let left = &tokens[left_idx];
488        let right = &tokens[right_idx];
489
490        let Some((left_start, left_end)) = token_offsets(sql, left) else {
491            continue;
492        };
493        let Some((right_start, _)) = token_offsets(sql, right) else {
494            continue;
495        };
496
497        if left_end > right_start || right_start > sql.len() || left_end > sql.len() {
498            continue;
499        }
500        if overlaps_template_span(templated_spans, left_start, right_start) {
501            continue;
502        }
503
504        let gap = &sql[left_end..right_start];
505        let has_newline = gap.contains('\n') || gap.contains('\r');
506        let has_comment = has_comment_between(tokens, left_idx, right_idx);
507
508        let expected = if supports_type_angle_spacing(dialect)
509            && is_type_angle_spacing_pair(left, right, left_idx, right_idx, &type_angle_tokens)
510        {
511            ExpectedSpacing::None
512        } else {
513            expected_spacing(left, right, tokens, left_idx, right_idx, dialect)
514        };
515
516        match expected {
517            ExpectedSpacing::Skip => continue,
518            ExpectedSpacing::None => {
519                // Tokens should be adjacent, no whitespace allowed.
520                if !gap.is_empty() && !has_newline && !has_comment {
521                    let span = (left_end, right_start);
522                    let edit = (left_end, right_start, String::new());
523                    violations.push((span, vec![edit]));
524                }
525            }
526            ExpectedSpacing::NoneInline => {
527                if !gap.is_empty() && !has_comment {
528                    let span = (left_end, right_start);
529                    let edit = (left_end, right_start, String::new());
530                    violations.push((span, vec![edit]));
531                }
532            }
533            ExpectedSpacing::Single => {
534                if has_comment {
535                    continue;
536                }
537                if has_newline {
538                    // Newline is acceptable as a separator for single-space contexts.
539                    // But check if there's excessive inline space on the same line
540                    // before or after the newline.
541                    continue;
542                }
543                if gap == " " {
544                    // Correct single space.
545                    continue;
546                }
547                if gap.is_empty() && matches!(left.token, Token::Comma) {
548                    // Avoid zero-width insert edits touching the next token.
549                    // Replacing the comma token itself allows CP02/LT01 fixes
550                    // to coexist in the same pass.
551                    let replacement = format!("{} ", &sql[left_start..left_end]);
552                    let span = (left_start, left_end);
553                    let edit = (left_start, left_end, replacement);
554                    violations.push((span, vec![edit]));
555                    continue;
556                }
557                if gap.is_empty() && is_exists_keyword_token(&left.token) {
558                    // Zero-width inserts are filtered by the fix planner.
559                    // Replace the EXISTS token itself to preserve fixability.
560                    let replacement = format!("{} ", &sql[left_start..left_end]);
561                    let span = (left_start, left_end);
562                    let edit = (left_start, left_end, replacement);
563                    violations.push((span, vec![edit]));
564                    continue;
565                }
566                // Either missing space (gap is empty) or excessive space (multiple spaces).
567                let span = (left_end, right_start);
568                let edit = (left_end, right_start, " ".to_string());
569                violations.push((span, vec![edit]));
570            }
571            ExpectedSpacing::SingleInline => {
572                if has_comment {
573                    continue;
574                }
575                if gap == " " {
576                    continue;
577                }
578                // Replace whatever gap (including newlines) with single space.
579                let span = (left_end, right_start);
580                let edit = (left_end, right_start, " ".to_string());
581                violations.push((span, vec![edit]));
582            }
583        }
584    }
585}
586
587/// Determine expected spacing between two adjacent non-trivia tokens.
588fn expected_spacing(
589    left: &TokenWithSpan,
590    right: &TokenWithSpan,
591    tokens: &[TokenWithSpan],
592    left_idx: usize,
593    right_idx: usize,
594    dialect: Dialect,
595) -> ExpectedSpacing {
596    // --- Period (dot) for qualified identifiers: no space around ---
597    if matches!(left.token, Token::Period) || matches!(right.token, Token::Period) {
598        return ExpectedSpacing::NoneInline;
599    }
600
601    // --- Cast operator (::) ---
602    if matches!(left.token, Token::DoubleColon) || matches!(right.token, Token::DoubleColon) {
603        return ExpectedSpacing::NoneInline;
604    }
605
606    // --- Snowflake colon (semi-structured access): no space around ---
607    if dialect == Dialect::Snowflake
608        && (matches!(left.token, Token::Colon) || matches!(right.token, Token::Colon))
609    {
610        // Snowflake a:b:c syntax — no spaces around colon
611        return ExpectedSpacing::NoneInline;
612    }
613
614    // --- Split compound comparison operators (>,<,!) + = ---
615    if is_split_compound_comparison_pair(left, right) {
616        return ExpectedSpacing::NoneInline;
617    }
618
619    // --- TSQL compound assignment operators (+=, -=, etc.) ---
620    if dialect == Dialect::Mssql && is_tsql_compound_assignment_pair(left, right) {
621        return ExpectedSpacing::NoneInline;
622    }
623
624    // --- Left paren: usually no space before (function calls) ---
625    if matches!(right.token, Token::LParen) {
626        return expected_spacing_before_lparen(left, tokens, left_idx, dialect);
627    }
628
629    // --- Right paren followed by something ---
630    if matches!(left.token, Token::RParen) {
631        return expected_spacing_after_rparen(right, tokens, right_idx);
632    }
633
634    // --- Left bracket: no space before in most contexts ---
635    if matches!(right.token, Token::LBracket) {
636        // text[] type syntax needs a space, but array access doesn't.
637        if is_type_keyword_for_bracket(&left.token) {
638            return ExpectedSpacing::Single;
639        }
640        return ExpectedSpacing::None;
641    }
642
643    // --- Right bracket ---
644    if matches!(left.token, Token::RBracket) {
645        // After ] usually no space before :: or . or [ or )
646        if matches!(
647            right.token,
648            Token::DoubleColon | Token::Period | Token::LBracket | Token::RParen
649        ) {
650            return ExpectedSpacing::None;
651        }
652        return ExpectedSpacing::Single;
653    }
654
655    // --- Comma: no space before, single space after ---
656    if matches!(right.token, Token::Comma) {
657        return ExpectedSpacing::None;
658    }
659    if matches!(left.token, Token::Comma) {
660        return ExpectedSpacing::Single;
661    }
662
663    // --- Semicolon: no space before ---
664    if matches!(right.token, Token::SemiColon) {
665        return ExpectedSpacing::Skip;
666    }
667    if matches!(left.token, Token::SemiColon) {
668        return ExpectedSpacing::Skip;
669    }
670
671    // --- Inside parens: no space after ( or before ) ---
672    if matches!(left.token, Token::LParen) {
673        return ExpectedSpacing::None;
674    }
675    if matches!(right.token, Token::RParen) {
676        return ExpectedSpacing::None;
677    }
678
679    // --- BigQuery project identifiers can include hyphens before dataset/table ---
680    if dialect == Dialect::Bigquery
681        && is_bigquery_hyphenated_identifier_pair(left, right, tokens, left_idx, right_idx)
682    {
683        return ExpectedSpacing::None;
684    }
685
686    if is_filesystem_path_pair(left, right, tokens, left_idx, right_idx, dialect) {
687        return ExpectedSpacing::NoneInline;
688    }
689
690    // --- Binary operators: single space on each side ---
691    if is_binary_operator(&left.token) || is_binary_operator(&right.token) {
692        // Special: unary minus/plus (sign indicators) — skip
693        if is_unary_operator_pair(left, right, tokens, left_idx) {
694            return ExpectedSpacing::Skip;
695        }
696        return ExpectedSpacing::Single;
697    }
698
699    // --- Comparison operators: single space around ---
700    if is_comparison_operator(&left.token) || is_comparison_operator(&right.token) {
701        if dialect == Dialect::Mssql
702            && is_tsql_assignment_rhs_pair(left, right, tokens, left_idx, right_idx)
703        {
704            return ExpectedSpacing::Single;
705        }
706        return ExpectedSpacing::Single;
707    }
708
709    // --- JSON operators (arrow, long arrow, etc.) ---
710    if is_json_operator(&left.token) || is_json_operator(&right.token) {
711        return ExpectedSpacing::Single;
712    }
713
714    // --- Star/Mul as wildcard inside COUNT(*) etc. ---
715    if matches!(left.token, Token::Mul) || matches!(right.token, Token::Mul) {
716        // If inside parens: skip (could be wildcard)
717        return ExpectedSpacing::Skip;
718    }
719
720    // --- Keywords and identifiers: single space between ---
721    if is_word_like(&left.token) && is_word_like(&right.token) {
722        return ExpectedSpacing::Single;
723    }
724
725    // --- Word followed by literal or vice versa ---
726    if (is_word_like(&left.token) && is_literal(&right.token))
727        || (is_literal(&left.token) && is_word_like(&right.token))
728    {
729        return ExpectedSpacing::Single;
730    }
731
732    // --- Literal followed by literal ---
733    if is_literal(&left.token) && is_literal(&right.token) {
734        return ExpectedSpacing::Single;
735    }
736
737    // --- Number followed by word or vice versa ---
738    if (matches!(left.token, Token::Number(_, _)) && is_word_like(&right.token))
739        || (is_word_like(&left.token) && matches!(right.token, Token::Number(_, _)))
740    {
741        return ExpectedSpacing::Single;
742    }
743
744    ExpectedSpacing::Skip
745}
746
747// ---------------------------------------------------------------------------
748// Token classification helpers
749// ---------------------------------------------------------------------------
750
751fn is_binary_operator(token: &Token) -> bool {
752    matches!(
753        token,
754        Token::Plus
755            | Token::Minus
756            | Token::Div
757            | Token::Mod
758            | Token::StringConcat
759            | Token::Ampersand
760            | Token::Pipe
761            | Token::Caret
762            | Token::ShiftLeft
763            | Token::ShiftRight
764            | Token::Assignment
765    )
766}
767
768fn is_comparison_operator(token: &Token) -> bool {
769    matches!(
770        token,
771        Token::Eq
772            | Token::Neq
773            | Token::Lt
774            | Token::Gt
775            | Token::LtEq
776            | Token::GtEq
777            | Token::Spaceship
778            | Token::DoubleEq
779            | Token::TildeEqual
780    )
781}
782
783fn is_split_compound_comparison_pair(left: &TokenWithSpan, right: &TokenWithSpan) -> bool {
784    matches!(
785        (&left.token, &right.token),
786        (Token::Gt, Token::Eq)
787            | (Token::Lt, Token::Eq)
788            | (Token::Lt, Token::Gt)
789            | (Token::Neq, Token::Eq)
790    )
791}
792
793fn is_assignment_operator_token(token: &Token) -> bool {
794    matches!(
795        token,
796        Token::Plus
797            | Token::Minus
798            | Token::Mul
799            | Token::Div
800            | Token::Mod
801            | Token::Ampersand
802            | Token::Pipe
803            | Token::Caret
804    )
805}
806
807fn is_tsql_compound_assignment_pair(left: &TokenWithSpan, right: &TokenWithSpan) -> bool {
808    matches!(right.token, Token::Eq) && is_assignment_operator_token(&left.token)
809}
810
811fn is_tsql_assignment_rhs_pair(
812    left: &TokenWithSpan,
813    _right: &TokenWithSpan,
814    tokens: &[TokenWithSpan],
815    left_idx: usize,
816    _right_idx: usize,
817) -> bool {
818    if !matches!(left.token, Token::Eq) {
819        return false;
820    }
821    prev_non_trivia_index(tokens, left_idx)
822        .map(|index| is_assignment_operator_token(&tokens[index].token))
823        .unwrap_or(false)
824}
825
826fn is_json_operator(token: &Token) -> bool {
827    matches!(
828        token,
829        Token::Arrow
830            | Token::LongArrow
831            | Token::HashArrow
832            | Token::HashLongArrow
833            | Token::AtArrow
834            | Token::ArrowAt
835    )
836}
837
838fn is_word_like(token: &Token) -> bool {
839    matches!(token, Token::Word(_) | Token::Placeholder(_))
840}
841
842fn is_literal(token: &Token) -> bool {
843    matches!(
844        token,
845        Token::SingleQuotedString(_)
846            | Token::DoubleQuotedString(_)
847            | Token::TripleSingleQuotedString(_)
848            | Token::TripleDoubleQuotedString(_)
849            | Token::NationalStringLiteral(_)
850            | Token::EscapedStringLiteral(_)
851            | Token::UnicodeStringLiteral(_)
852            | Token::HexStringLiteral(_)
853            | Token::SingleQuotedByteStringLiteral(_)
854            | Token::DoubleQuotedByteStringLiteral(_)
855            | Token::Number(_, _)
856    )
857}
858
859fn is_type_keyword_for_bracket(token: &Token) -> bool {
860    if let Token::Word(w) = token {
861        if w.quote_style.is_some() {
862            return false;
863        }
864        matches!(
865            w.value.to_ascii_uppercase().as_str(),
866            "TEXT"
867                | "UUID"
868                | "INT"
869                | "INTEGER"
870                | "BIGINT"
871                | "SMALLINT"
872                | "VARCHAR"
873                | "CHAR"
874                | "BOOLEAN"
875                | "BOOL"
876                | "NUMERIC"
877                | "DECIMAL"
878                | "FLOAT"
879                | "DOUBLE"
880                | "DATE"
881                | "TIME"
882                | "TIMESTAMP"
883                | "INTERVAL"
884                | "JSONB"
885                | "JSON"
886                | "BYTEA"
887                | "REAL"
888                | "SERIAL"
889                | "BIGSERIAL"
890                | "INET"
891                | "CIDR"
892                | "MACADDR"
893        )
894    } else {
895        false
896    }
897}
898
899fn is_exists_keyword_token(token: &Token) -> bool {
900    matches!(token, Token::Word(word) if word.keyword == Keyword::EXISTS)
901}
902
903/// Check if a token is a DDL keyword after which the next word is an object name
904/// (table, view, index, etc.) — not a function call.
905fn is_ddl_object_keyword(token: &Token) -> bool {
906    if let Token::Word(w) = token {
907        matches!(
908            w.keyword,
909            Keyword::TABLE
910                | Keyword::VIEW
911                | Keyword::INDEX
912                | Keyword::FUNCTION
913                | Keyword::PROCEDURE
914                | Keyword::TRIGGER
915                | Keyword::SEQUENCE
916                | Keyword::TYPE
917                | Keyword::SCHEMA
918                | Keyword::DATABASE
919        )
920    } else {
921        false
922    }
923}
924
925fn is_qualified_ddl_object_name(tokens: &[TokenWithSpan], word_index: usize) -> bool {
926    let mut cursor = word_index;
927
928    loop {
929        let Some(prev_idx) = prev_non_trivia_index(tokens, cursor) else {
930            return false;
931        };
932
933        if matches!(tokens[prev_idx].token, Token::Period) {
934            let Some(prev_word_idx) = prev_non_trivia_index(tokens, prev_idx) else {
935                return false;
936            };
937            if !is_word_like(&tokens[prev_word_idx].token) {
938                return false;
939            }
940            cursor = prev_word_idx;
941            continue;
942        }
943
944        if !is_ddl_object_keyword(&tokens[prev_idx].token) {
945            return false;
946        }
947        return is_ddl_object_definition_context(tokens, prev_idx);
948    }
949}
950
951fn is_reference_target_name(tokens: &[TokenWithSpan], word_index: usize) -> bool {
952    let mut cursor = word_index;
953
954    loop {
955        let Some(prev_idx) = prev_non_trivia_index(tokens, cursor) else {
956            return false;
957        };
958
959        if matches!(tokens[prev_idx].token, Token::Period) {
960            let Some(prev_word_idx) = prev_non_trivia_index(tokens, prev_idx) else {
961                return false;
962            };
963            if !is_word_like(&tokens[prev_word_idx].token) {
964                return false;
965            }
966            cursor = prev_word_idx;
967            continue;
968        }
969
970        let Token::Word(prev_word) = &tokens[prev_idx].token else {
971            return false;
972        };
973
974        return prev_word.keyword == Keyword::REFERENCES;
975    }
976}
977
978fn is_copy_into_target_name(tokens: &[TokenWithSpan], word_index: usize) -> bool {
979    let mut cursor = word_index;
980    let mut steps = 0usize;
981
982    while let Some(prev_idx) = prev_non_trivia_index(tokens, cursor) {
983        match &tokens[prev_idx].token {
984            Token::Word(word) if word.keyword == Keyword::INTO => {
985                let Some(copy_idx) = prev_non_trivia_index(tokens, prev_idx) else {
986                    return false;
987                };
988                return matches!(
989                    &tokens[copy_idx].token,
990                    Token::Word(copy_word) if copy_word.keyword == Keyword::COPY
991                );
992            }
993            Token::Word(word)
994                if matches!(
995                    word.keyword,
996                    Keyword::FROM
997                        | Keyword::SELECT
998                        | Keyword::WHERE
999                        | Keyword::JOIN
1000                        | Keyword::ON
1001                        | Keyword::HAVING
1002                ) =>
1003            {
1004                return false;
1005            }
1006            Token::SemiColon | Token::Comma | Token::LParen | Token::RParen => return false,
1007            _ => {}
1008        }
1009
1010        cursor = prev_idx;
1011        steps += 1;
1012        if steps > 64 {
1013            return false;
1014        }
1015    }
1016
1017    false
1018}
1019
1020/// Check if `word_index` is the table/view name in an `INSERT INTO schema.table` context.
1021fn is_insert_into_target_name(tokens: &[TokenWithSpan], word_index: usize) -> bool {
1022    let mut cursor = word_index;
1023    let mut steps = 0usize;
1024
1025    while let Some(prev_idx) = prev_non_trivia_index(tokens, cursor) {
1026        match &tokens[prev_idx].token {
1027            Token::Word(word) if word.keyword == Keyword::INTO => {
1028                // Check for INSERT before INTO.
1029                let Some(insert_idx) = prev_non_trivia_index(tokens, prev_idx) else {
1030                    return false;
1031                };
1032                return matches!(
1033                    &tokens[insert_idx].token,
1034                    Token::Word(w) if w.keyword == Keyword::INSERT
1035                );
1036            }
1037            // Walk through schema qualifiers (schema.table).
1038            Token::Period => {}
1039            // Accept any unquoted word as a schema/table identifier — the name
1040            // may coincide with a SQL keyword (e.g. `metrics`, `daily`).
1041            Token::Word(word) if word.quote_style.is_none() => {}
1042            _ => return false,
1043        }
1044
1045        cursor = prev_idx;
1046        steps += 1;
1047        if steps > 16 {
1048            return false;
1049        }
1050    }
1051
1052    false
1053}
1054
1055fn is_ddl_object_definition_context(tokens: &[TokenWithSpan], ddl_keyword_index: usize) -> bool {
1056    let Some(prev_idx) = prev_non_trivia_index(tokens, ddl_keyword_index) else {
1057        return false;
1058    };
1059    let Token::Word(prev_word) = &tokens[prev_idx].token else {
1060        return false;
1061    };
1062
1063    if matches!(
1064        prev_word.keyword,
1065        Keyword::CREATE | Keyword::ALTER | Keyword::DROP | Keyword::TRUNCATE
1066    ) {
1067        return true;
1068    }
1069
1070    if prev_word.keyword == Keyword::OR {
1071        if let Some(prev_prev_idx) = prev_non_trivia_index(tokens, prev_idx) {
1072            if let Token::Word(prev_prev_word) = &tokens[prev_prev_idx].token {
1073                return matches!(prev_prev_word.keyword, Keyword::CREATE | Keyword::ALTER);
1074            }
1075        }
1076    }
1077
1078    false
1079}
1080
1081/// Check if this pair involves a unary +/- (sign indicator) rather than binary.
1082fn is_unary_operator_pair(
1083    left: &TokenWithSpan,
1084    right: &TokenWithSpan,
1085    tokens: &[TokenWithSpan],
1086    left_idx: usize,
1087) -> bool {
1088    // Case 1: right token is +/- and left context suggests unary
1089    if matches!(right.token, Token::Plus | Token::Minus)
1090        && is_unary_prefix_context(&tokens[left_idx].token)
1091    {
1092        return true;
1093    }
1094    // Case 2: left token is +/- and the token before it suggests unary
1095    if matches!(left.token, Token::Plus | Token::Minus) {
1096        if let Some(prev_idx) = prev_non_trivia_index(tokens, left_idx) {
1097            if is_unary_prefix_context(&tokens[prev_idx].token) {
1098                return true;
1099            }
1100        } else {
1101            // No previous token — start of statement, so it's unary
1102            return true;
1103        }
1104    }
1105    false
1106}
1107
1108fn is_bigquery_hyphenated_identifier_pair(
1109    left: &TokenWithSpan,
1110    right: &TokenWithSpan,
1111    tokens: &[TokenWithSpan],
1112    left_idx: usize,
1113    right_idx: usize,
1114) -> bool {
1115    if matches!(right.token, Token::Minus) {
1116        if !matches!(left.token, Token::Word(_)) {
1117            return false;
1118        }
1119        let Some(next_word_idx) = next_non_trivia_index(tokens, right_idx + 1) else {
1120            return false;
1121        };
1122        if !matches!(tokens[next_word_idx].token, Token::Word(_)) {
1123            return false;
1124        }
1125        let Some(next_after_word_idx) = next_non_trivia_index(tokens, next_word_idx + 1) else {
1126            return false;
1127        };
1128        return matches!(tokens[next_after_word_idx].token, Token::Period);
1129    }
1130
1131    if matches!(left.token, Token::Minus) {
1132        if !matches!(right.token, Token::Word(_)) {
1133            return false;
1134        }
1135        let Some(prev_word_idx) = prev_non_trivia_index(tokens, left_idx) else {
1136            return false;
1137        };
1138        if !matches!(tokens[prev_word_idx].token, Token::Word(_)) {
1139            return false;
1140        }
1141        let Some(next_idx) = next_non_trivia_index(tokens, right_idx + 1) else {
1142            return false;
1143        };
1144        return matches!(tokens[next_idx].token, Token::Period);
1145    }
1146
1147    false
1148}
1149
1150fn is_filesystem_path_pair(
1151    left: &TokenWithSpan,
1152    right: &TokenWithSpan,
1153    tokens: &[TokenWithSpan],
1154    left_idx: usize,
1155    right_idx: usize,
1156    dialect: Dialect,
1157) -> bool {
1158    if !matches!(
1159        dialect,
1160        Dialect::Databricks | Dialect::Clickhouse | Dialect::Snowflake
1161    ) {
1162        return false;
1163    }
1164
1165    let div_index = if matches!(left.token, Token::Div) {
1166        Some(left_idx)
1167    } else if matches!(right.token, Token::Div) {
1168        let left_is_context_keyword = is_path_context_keyword_token(&left.token);
1169        let left_is_path_segment = prev_non_trivia_index(tokens, left_idx)
1170            .is_some_and(|idx| matches!(tokens[idx].token, Token::Div));
1171        if left_is_context_keyword && !left_is_path_segment {
1172            return false;
1173        }
1174        Some(right_idx)
1175    } else {
1176        None
1177    };
1178    let Some(div_index) = div_index else {
1179        return false;
1180    };
1181
1182    let prev_idx = prev_non_trivia_index(tokens, div_index);
1183    let next_idx = next_non_trivia_index(tokens, div_index + 1);
1184    let prev_ok = prev_idx.is_some_and(|idx| matches!(tokens[idx].token, Token::Word(_)));
1185    let next_ok = next_idx.is_some_and(|idx| matches!(tokens[idx].token, Token::Word(_)));
1186    if !(prev_ok || next_ok) {
1187        return false;
1188    }
1189
1190    if dialect == Dialect::Snowflake {
1191        return snowflake_stage_path_context_within(tokens, div_index, 12);
1192    }
1193
1194    path_context_keyword_within(tokens, div_index, 6)
1195}
1196
1197fn is_path_context_keyword_token(token: &Token) -> bool {
1198    let Token::Word(word) = token else {
1199        return false;
1200    };
1201    word.value.eq_ignore_ascii_case("JAR") || word.value.eq_ignore_ascii_case("MODEL")
1202}
1203
1204fn path_context_keyword_within(tokens: &[TokenWithSpan], from_idx: usize, limit: usize) -> bool {
1205    let mut cursor = from_idx;
1206    let mut steps = 0usize;
1207    while let Some(prev_idx) = prev_non_trivia_index(tokens, cursor) {
1208        if let Token::Word(word) = &tokens[prev_idx].token {
1209            if matches!(word.keyword, Keyword::JAR) {
1210                return true;
1211            }
1212            if word.value.eq_ignore_ascii_case("JAR") || word.value.eq_ignore_ascii_case("MODEL") {
1213                return true;
1214            }
1215        }
1216        cursor = prev_idx;
1217        steps += 1;
1218        if steps >= limit {
1219            break;
1220        }
1221    }
1222    false
1223}
1224
1225fn snowflake_stage_path_context_within(
1226    tokens: &[TokenWithSpan],
1227    from_idx: usize,
1228    limit: usize,
1229) -> bool {
1230    let mut cursor = from_idx;
1231    let mut steps = 0usize;
1232    while let Some(prev_idx) = prev_non_trivia_index(tokens, cursor) {
1233        match &tokens[prev_idx].token {
1234            Token::AtSign => return true,
1235            Token::Word(word) if word.value.starts_with('@') => return true,
1236            _ => {}
1237        }
1238        cursor = prev_idx;
1239        steps += 1;
1240        if steps >= limit {
1241            break;
1242        }
1243    }
1244    false
1245}
1246
1247/// Check if a token is a context where the following +/- is unary.
1248fn is_unary_prefix_context(token: &Token) -> bool {
1249    if matches!(
1250        token,
1251        Token::Comma
1252            | Token::LParen
1253            | Token::Eq
1254            | Token::Neq
1255            | Token::Lt
1256            | Token::Gt
1257            | Token::LtEq
1258            | Token::GtEq
1259    ) {
1260        return true;
1261    }
1262    if let Token::Word(w) = token {
1263        if matches!(
1264            w.keyword,
1265            Keyword::SELECT
1266                | Keyword::WHERE
1267                | Keyword::WHEN
1268                | Keyword::THEN
1269                | Keyword::ELSE
1270                | Keyword::AND
1271                | Keyword::OR
1272                | Keyword::ON
1273                | Keyword::SET
1274                | Keyword::CASE
1275                | Keyword::BETWEEN
1276                | Keyword::IN
1277                | Keyword::VALUES
1278                | Keyword::INTERVAL
1279                | Keyword::YEAR
1280                | Keyword::MONTH
1281                | Keyword::DAY
1282                | Keyword::HOUR
1283                | Keyword::MINUTE
1284                | Keyword::SECOND
1285                | Keyword::RETURN
1286                | Keyword::RETURNS
1287        ) {
1288            return true;
1289        }
1290    }
1291    false
1292}
1293
1294/// Expected spacing before left-paren.
1295fn expected_spacing_before_lparen(
1296    left: &TokenWithSpan,
1297    tokens: &[TokenWithSpan],
1298    left_idx: usize,
1299    dialect: Dialect,
1300) -> ExpectedSpacing {
1301    match &left.token {
1302        // Function call: no space between function name and (
1303        Token::Word(w) if w.quote_style.is_none() => {
1304            if dialect == Dialect::Snowflake {
1305                if w.value.eq_ignore_ascii_case("MATCH_RECOGNIZE")
1306                    || w.value.eq_ignore_ascii_case("PATTERN")
1307                {
1308                    return ExpectedSpacing::Single;
1309                }
1310                if w.value.eq_ignore_ascii_case("MATCH_CONDITION") {
1311                    return ExpectedSpacing::NoneInline;
1312                }
1313            }
1314            if w.value.eq_ignore_ascii_case("EXISTS") {
1315                if exists_requires_space_before_lparen(tokens, left_idx) {
1316                    return ExpectedSpacing::Single;
1317                }
1318                return ExpectedSpacing::NoneInline;
1319            }
1320            // Keywords that should have a space before (
1321            if is_keyword_requiring_space_before_paren(w.keyword) {
1322                // AS in CTE: `AS (` should be single-inline (collapse newlines to space)
1323                // USING, FROM, etc.: single space (newline acceptable)
1324                if matches!(w.keyword, Keyword::AS) {
1325                    return ExpectedSpacing::SingleInline;
1326                }
1327                return ExpectedSpacing::Single;
1328            }
1329            // INSERT INTO table_name ( — the ( opens a column list.
1330            // Checked before the NoKeyword guard because the table name may
1331            // coincide with a SQL keyword (e.g., metrics.daily → daily is Keyword).
1332            if is_insert_into_target_name(tokens, left_idx) {
1333                return ExpectedSpacing::Single;
1334            }
1335            // Check if this word is a table/view name after CREATE TABLE/VIEW —
1336            // the ( opens a column list, not a function call, so skip.
1337            if w.keyword == Keyword::NoKeyword {
1338                if is_reference_target_name(tokens, left_idx) {
1339                    return ExpectedSpacing::Single;
1340                }
1341                if is_copy_into_target_name(tokens, left_idx) {
1342                    return ExpectedSpacing::Single;
1343                }
1344                if is_qualified_ddl_object_name(tokens, left_idx) {
1345                    return ExpectedSpacing::Skip;
1346                }
1347            }
1348            // Regular function call or type name: no space
1349            ExpectedSpacing::NoneInline
1350        }
1351        // After closing paren/bracket: single space (subquery, etc.)
1352        Token::RParen | Token::RBracket => ExpectedSpacing::Single,
1353        // After literal: single space
1354        _ if is_literal(&left.token) => ExpectedSpacing::Single,
1355        // After number: no space (could be type precision like numeric(5,2))
1356        Token::Number(_, _) => ExpectedSpacing::None,
1357        // After comma: single space
1358        Token::Comma => ExpectedSpacing::Single,
1359        // After operator: skip
1360        _ if is_binary_operator(&left.token) || is_comparison_operator(&left.token) => {
1361            ExpectedSpacing::Skip
1362        }
1363        _ => ExpectedSpacing::Skip,
1364    }
1365}
1366
1367fn exists_requires_space_before_lparen(tokens: &[TokenWithSpan], left_idx: usize) -> bool {
1368    let Some(prev_idx) = prev_non_trivia_index(tokens, left_idx) else {
1369        return false;
1370    };
1371
1372    match &tokens[prev_idx].token {
1373        Token::Word(word) => {
1374            matches!(
1375                word.keyword,
1376                Keyword::AND
1377                    | Keyword::OR
1378                    | Keyword::NOT
1379                    | Keyword::WHERE
1380                    | Keyword::HAVING
1381                    | Keyword::WHEN
1382                    | Keyword::THEN
1383                    | Keyword::ELSE
1384            ) || matches!(
1385                word.value.to_ascii_uppercase().as_str(),
1386                "AND" | "OR" | "NOT" | "WHERE" | "HAVING" | "WHEN" | "THEN" | "ELSE"
1387            )
1388        }
1389        Token::RParen
1390        | Token::LParen
1391        | Token::Eq
1392        | Token::Neq
1393        | Token::Lt
1394        | Token::Gt
1395        | Token::LtEq
1396        | Token::GtEq => true,
1397        _ => false,
1398    }
1399}
1400
1401/// Keywords that should have a space before `(`.
1402fn is_keyword_requiring_space_before_paren(keyword: Keyword) -> bool {
1403    matches!(
1404        keyword,
1405        Keyword::AS
1406            | Keyword::USING
1407            | Keyword::FROM
1408            | Keyword::JOIN
1409            | Keyword::ON
1410            | Keyword::WHERE
1411            | Keyword::IN
1412            | Keyword::BETWEEN
1413            | Keyword::WHEN
1414            | Keyword::THEN
1415            | Keyword::ELSE
1416            | Keyword::AND
1417            | Keyword::OR
1418            | Keyword::NOT
1419            | Keyword::HAVING
1420            | Keyword::OVER
1421            | Keyword::PARTITION
1422            | Keyword::ORDER
1423            | Keyword::GROUP
1424            | Keyword::LIMIT
1425            | Keyword::UNION
1426            | Keyword::INTERSECT
1427            | Keyword::EXCEPT
1428            | Keyword::RECURSIVE
1429            | Keyword::WITH
1430            | Keyword::SELECT
1431            | Keyword::INTO
1432            | Keyword::TABLE
1433            | Keyword::VALUES
1434            | Keyword::SET
1435            | Keyword::RETURNS
1436            | Keyword::FILTER
1437            | Keyword::CONFLICT
1438            | Keyword::BY
1439    )
1440}
1441
1442/// Expected spacing after right-paren.
1443fn expected_spacing_after_rparen(
1444    right: &TokenWithSpan,
1445    _tokens: &[TokenWithSpan],
1446    _right_idx: usize,
1447) -> ExpectedSpacing {
1448    match &right.token {
1449        // ) followed by . or :: or [ — no space
1450        Token::Period | Token::DoubleColon | Token::LBracket | Token::RBracket => {
1451            ExpectedSpacing::None
1452        }
1453        // ) followed by , — no space before comma
1454        Token::Comma => ExpectedSpacing::None,
1455        // ) followed by ; — no space
1456        Token::SemiColon => ExpectedSpacing::Skip,
1457        // ) followed by ) — no space
1458        Token::RParen => ExpectedSpacing::None,
1459        // ) followed by ( — single space
1460        Token::LParen => ExpectedSpacing::Single,
1461        // ) followed by keyword or identifier — single space
1462        _ => ExpectedSpacing::Single,
1463    }
1464}
1465
1466fn has_comment_between(tokens: &[TokenWithSpan], left: usize, right: usize) -> bool {
1467    tokens[left + 1..right].iter().any(|t| {
1468        matches!(
1469            t.token,
1470            Token::Whitespace(Whitespace::SingleLineComment { .. })
1471                | Token::Whitespace(Whitespace::MultiLineComment(_))
1472        )
1473    })
1474}
1475
1476fn template_spans(sql: &str) -> Vec<Lt01TemplateSpan> {
1477    let mut spans = Vec::new();
1478    let mut index = 0usize;
1479    while let Some((open, close)) = find_next_template_open(sql, index) {
1480        let payload_start = open + 2;
1481        if let Some(rel_close) = sql[payload_start..].find(close) {
1482            let close_index = payload_start + rel_close + close.len();
1483            spans.push((open, close_index));
1484            index = close_index;
1485        } else {
1486            spans.push((open, sql.len()));
1487            break;
1488        }
1489    }
1490    spans
1491}
1492
1493fn find_next_template_open(sql: &str, from: usize) -> Option<(usize, &'static str)> {
1494    let rest = sql.get(from..)?;
1495    [("{{", "}}"), ("{%", "%}"), ("{#", "#}")]
1496        .into_iter()
1497        .filter_map(|(open, close)| rest.find(open).map(|offset| (from + offset, close)))
1498        .min_by_key(|(index, _)| *index)
1499}
1500
1501fn contains_template_marker(sql: &str) -> bool {
1502    sql.contains("{{") || sql.contains("{%") || sql.contains("{#")
1503}
1504
1505fn overlaps_template_span(spans: &[Lt01TemplateSpan], start: usize, end: usize) -> bool {
1506    spans
1507        .iter()
1508        .any(|(template_start, template_end)| start < *template_end && end > *template_start)
1509}
1510
1511fn collect_ansi_national_string_literal_violations(
1512    sql: &str,
1513    tokens: &[TokenWithSpan],
1514    dialect: Dialect,
1515    templated_spans: &[Lt01TemplateSpan],
1516    violations: &mut Vec<Lt01Violation>,
1517) {
1518    if matches!(dialect, Dialect::Mssql) {
1519        return;
1520    }
1521
1522    for token in tokens {
1523        let Token::NationalStringLiteral(_) = token.token else {
1524            continue;
1525        };
1526        let Some((start, end)) = token_offsets(sql, token) else {
1527            continue;
1528        };
1529        if start >= end || end > sql.len() || overlaps_template_span(templated_spans, start, end) {
1530            continue;
1531        }
1532        let raw = &sql[start..end];
1533        if raw.len() < 3 {
1534            continue;
1535        }
1536        let Some(prefix) = raw.chars().next() else {
1537            continue;
1538        };
1539        if !(prefix == 'N' || prefix == 'n') || !raw[1..].starts_with('\'') {
1540            continue;
1541        }
1542        let replacement = format!("{prefix} {}", &raw[1..]);
1543        violations.push(((start, end), vec![(start, end, replacement)]));
1544    }
1545}
1546
1547fn collect_template_string_spacing_violations(
1548    sql: &str,
1549    dialect: Dialect,
1550    templated_spans: &[Lt01TemplateSpan],
1551    violations: &mut Vec<Lt01Violation>,
1552) {
1553    for (template_start, template_end) in templated_spans {
1554        let mut cursor = *template_start;
1555        while cursor < *template_end {
1556            let Some((quote_start, quote_char)) = next_quote_in_range(sql, cursor, *template_end)
1557            else {
1558                break;
1559            };
1560            let Some(quote_end) =
1561                find_closing_quote(sql, quote_start + 1, *template_end, quote_char)
1562            else {
1563                break;
1564            };
1565            let content = &sql[quote_start + 1..quote_end];
1566            let Some(tokens) = tokenized(content, dialect) else {
1567                cursor = quote_end + 1;
1568                continue;
1569            };
1570
1571            let mut fragment_violations = Vec::new();
1572            collect_pair_spacing_violations(
1573                content,
1574                &tokens,
1575                dialect,
1576                &[],
1577                &mut fragment_violations,
1578            );
1579            collect_ansi_national_string_literal_violations(
1580                content,
1581                &tokens,
1582                dialect,
1583                &[],
1584                &mut fragment_violations,
1585            );
1586
1587            for ((start, end), _) in fragment_violations {
1588                if start >= end || end > content.len() {
1589                    continue;
1590                }
1591                let absolute_start = quote_start + 1 + start;
1592                let absolute_end = quote_start + 1 + end;
1593                violations.push(((absolute_start, absolute_end), Vec::new()));
1594            }
1595
1596            cursor = quote_end + 1;
1597        }
1598    }
1599}
1600
1601fn next_quote_in_range(sql: &str, start: usize, end: usize) -> Option<(usize, char)> {
1602    let mut index = start;
1603    while index < end {
1604        let ch = sql[index..].chars().next()?;
1605        if ch == '\'' || ch == '"' {
1606            return Some((index, ch));
1607        }
1608        index += ch.len_utf8();
1609    }
1610    None
1611}
1612
1613fn find_closing_quote(sql: &str, start: usize, end: usize, quote: char) -> Option<usize> {
1614    let mut index = start;
1615    while index < end {
1616        let ch = sql[index..].chars().next()?;
1617        if ch == '\\' {
1618            let next = index + ch.len_utf8();
1619            if next < end {
1620                let escaped = sql[next..].chars().next()?;
1621                index = next + escaped.len_utf8();
1622                continue;
1623            }
1624        }
1625        if ch == quote {
1626            return Some(index);
1627        }
1628        index += ch.len_utf8();
1629    }
1630    None
1631}
1632
1633fn snowflake_pattern_token_indices(
1634    tokens: &[TokenWithSpan],
1635    non_trivia: &[usize],
1636) -> HashSet<usize> {
1637    let mut out = HashSet::new();
1638    let mut cursor = 0usize;
1639
1640    while cursor < non_trivia.len() {
1641        let token_index = non_trivia[cursor];
1642        let Token::Word(word) = &tokens[token_index].token else {
1643            cursor += 1;
1644            continue;
1645        };
1646        if !word.value.eq_ignore_ascii_case("PATTERN") {
1647            cursor += 1;
1648            continue;
1649        }
1650
1651        let Some(paren_pos) = ((cursor + 1)..non_trivia.len())
1652            .find(|idx| matches!(tokens[non_trivia[*idx]].token, Token::LParen))
1653        else {
1654            cursor += 1;
1655            continue;
1656        };
1657
1658        let mut depth = 0usize;
1659        let mut end_pos = None;
1660        for (pos, idx) in non_trivia.iter().copied().enumerate().skip(paren_pos) {
1661            match tokens[idx].token {
1662                Token::LParen => depth += 1,
1663                Token::RParen => {
1664                    if depth == 0 {
1665                        break;
1666                    }
1667                    depth -= 1;
1668                    if depth == 0 {
1669                        end_pos = Some(pos);
1670                        break;
1671                    }
1672                }
1673                _ => {}
1674            }
1675        }
1676
1677        let Some(end_pos) = end_pos else {
1678            cursor += 1;
1679            continue;
1680        };
1681        for idx in non_trivia.iter().take(end_pos + 1).skip(paren_pos) {
1682            out.insert(*idx);
1683        }
1684        cursor = end_pos + 1;
1685    }
1686
1687    out
1688}
1689
1690fn type_angle_token_indices(tokens: &[TokenWithSpan], non_trivia: &[usize]) -> HashSet<usize> {
1691    let mut out = HashSet::new();
1692    let mut stack = Vec::<usize>::new();
1693
1694    for (pos, token_idx) in non_trivia.iter().copied().enumerate() {
1695        let token = &tokens[token_idx].token;
1696        match token {
1697            Token::Lt => {
1698                let prev_idx = pos
1699                    .checked_sub(1)
1700                    .and_then(|value| non_trivia.get(value).copied());
1701                if prev_idx.is_some_and(|idx| is_type_constructor(&tokens[idx].token)) {
1702                    out.insert(token_idx);
1703                    stack.push(token_idx);
1704                }
1705            }
1706            Token::Gt if !stack.is_empty() => {
1707                out.insert(token_idx);
1708                stack.pop();
1709            }
1710            Token::ShiftRight if stack.len() >= 2 => {
1711                out.insert(token_idx);
1712                stack.pop();
1713                stack.pop();
1714            }
1715            _ => {}
1716        }
1717    }
1718
1719    out
1720}
1721
1722fn supports_type_angle_spacing(dialect: Dialect) -> bool {
1723    matches!(
1724        dialect,
1725        Dialect::Bigquery | Dialect::Hive | Dialect::Databricks
1726    )
1727}
1728
1729fn is_type_constructor(token: &Token) -> bool {
1730    let Token::Word(word) = token else {
1731        return false;
1732    };
1733    word.value.eq_ignore_ascii_case("ARRAY")
1734        || word.value.eq_ignore_ascii_case("STRUCT")
1735        || word.value.eq_ignore_ascii_case("MAP")
1736}
1737
1738fn is_type_angle_spacing_pair(
1739    left: &TokenWithSpan,
1740    right: &TokenWithSpan,
1741    left_idx: usize,
1742    right_idx: usize,
1743    type_angle_tokens: &HashSet<usize>,
1744) -> bool {
1745    let left_is_type_angle = type_angle_tokens.contains(&left_idx);
1746    let right_is_type_angle = type_angle_tokens.contains(&right_idx);
1747
1748    if right_is_type_angle && matches!(right.token, Token::Lt | Token::Gt | Token::ShiftRight) {
1749        return true;
1750    }
1751    if left_is_type_angle && matches!(left.token, Token::Lt) {
1752        return true;
1753    }
1754    if left_is_type_angle
1755        && matches!(left.token, Token::Gt | Token::ShiftRight)
1756        && matches!(
1757            right.token,
1758            Token::Comma | Token::RParen | Token::RBracket | Token::LBracket | Token::Gt
1759        )
1760    {
1761        return true;
1762    }
1763
1764    false
1765}
1766
1767// ---------------------------------------------------------------------------
1768// Token utilities
1769// ---------------------------------------------------------------------------
1770
1771fn tokenized(sql: &str, dialect: Dialect) -> Option<Vec<TokenWithSpan>> {
1772    let dialect = dialect.to_sqlparser_dialect();
1773    let mut tokenizer = Tokenizer::new(dialect.as_ref(), sql);
1774    tokenizer.tokenize_with_location().ok()
1775}
1776
1777fn tokenized_for_context(ctx: &LintContext) -> Option<Vec<TokenWithSpan>> {
1778    let (statement_start_line, statement_start_column) =
1779        offset_to_line_col(ctx.sql, ctx.statement_range.start)?;
1780
1781    ctx.with_document_tokens(|tokens| {
1782        if tokens.is_empty() {
1783            return None;
1784        }
1785
1786        let mut out = Vec::new();
1787        for token in tokens {
1788            let Some((start, end)) = token_with_span_offsets(ctx.sql, token) else {
1789                continue;
1790            };
1791            if start < ctx.statement_range.start || end > ctx.statement_range.end {
1792                continue;
1793            }
1794
1795            let Some(start_loc) = relative_location(
1796                token.span.start,
1797                statement_start_line,
1798                statement_start_column,
1799            ) else {
1800                continue;
1801            };
1802            let Some(end_loc) =
1803                relative_location(token.span.end, statement_start_line, statement_start_column)
1804            else {
1805                continue;
1806            };
1807
1808            out.push(TokenWithSpan::new(
1809                token.token.clone(),
1810                Span::new(start_loc, end_loc),
1811            ));
1812        }
1813
1814        if out.is_empty() {
1815            None
1816        } else {
1817            Some(out)
1818        }
1819    })
1820}
1821
1822fn token_offsets(sql: &str, token: &TokenWithSpan) -> Option<(usize, usize)> {
1823    let start = line_col_to_offset(
1824        sql,
1825        token.span.start.line as usize,
1826        token.span.start.column as usize,
1827    )?;
1828    let end = line_col_to_offset(
1829        sql,
1830        token.span.end.line as usize,
1831        token.span.end.column as usize,
1832    )?;
1833    Some((start, end))
1834}
1835
1836fn next_non_trivia_index(tokens: &[TokenWithSpan], mut index: usize) -> Option<usize> {
1837    while index < tokens.len() {
1838        if !is_trivia_token(&tokens[index].token) {
1839            return Some(index);
1840        }
1841        index += 1;
1842    }
1843    None
1844}
1845
1846fn prev_non_trivia_index(tokens: &[TokenWithSpan], mut index: usize) -> Option<usize> {
1847    while index > 0 {
1848        index -= 1;
1849        if !is_trivia_token(&tokens[index].token) {
1850            return Some(index);
1851        }
1852    }
1853    None
1854}
1855
1856fn is_trivia_token(token: &Token) -> bool {
1857    matches!(
1858        token,
1859        Token::Whitespace(Whitespace::Space | Whitespace::Newline | Whitespace::Tab)
1860            | Token::Whitespace(Whitespace::SingleLineComment { .. })
1861            | Token::Whitespace(Whitespace::MultiLineComment(_))
1862    )
1863}
1864
1865fn line_col_to_offset(sql: &str, line: usize, column: usize) -> Option<usize> {
1866    if line == 0 || column == 0 {
1867        return None;
1868    }
1869
1870    let mut current_line = 1usize;
1871    let mut current_col = 1usize;
1872
1873    for (offset, ch) in sql.char_indices() {
1874        if current_line == line && current_col == column {
1875            return Some(offset);
1876        }
1877
1878        if ch == '\n' {
1879            current_line += 1;
1880            current_col = 1;
1881        } else {
1882            current_col += 1;
1883        }
1884    }
1885
1886    if current_line == line && current_col == column {
1887        return Some(sql.len());
1888    }
1889
1890    None
1891}
1892
1893fn token_with_span_offsets(sql: &str, token: &TokenWithSpan) -> Option<(usize, usize)> {
1894    let start = line_col_to_offset(
1895        sql,
1896        token.span.start.line as usize,
1897        token.span.start.column as usize,
1898    )?;
1899    let end = line_col_to_offset(
1900        sql,
1901        token.span.end.line as usize,
1902        token.span.end.column as usize,
1903    )?;
1904    Some((start, end))
1905}
1906
1907fn offset_to_line_col(sql: &str, offset: usize) -> Option<(usize, usize)> {
1908    if offset > sql.len() {
1909        return None;
1910    }
1911    if offset == sql.len() {
1912        let line = 1 + sql.as_bytes().iter().filter(|byte| **byte == b'\n').count();
1913        let column = sql
1914            .rsplit_once('\n')
1915            .map_or(sql.chars().count() + 1, |(_, tail)| {
1916                tail.chars().count() + 1
1917            });
1918        return Some((line, column));
1919    }
1920
1921    let mut line = 1usize;
1922    let mut column = 1usize;
1923    for (index, ch) in sql.char_indices() {
1924        if index == offset {
1925            return Some((line, column));
1926        }
1927        if ch == '\n' {
1928            line += 1;
1929            column = 1;
1930        } else {
1931            column += 1;
1932        }
1933    }
1934    Some((line, column))
1935}
1936
1937fn relative_location(
1938    location: Location,
1939    statement_start_line: usize,
1940    statement_start_column: usize,
1941) -> Option<Location> {
1942    if location.line == 0 || location.column == 0 {
1943        return None;
1944    }
1945
1946    let line = location.line as usize;
1947    let column = location.column as usize;
1948    if line < statement_start_line {
1949        return None;
1950    }
1951
1952    let relative_line = line - statement_start_line + 1;
1953    let relative_column = if line == statement_start_line {
1954        if column < statement_start_column {
1955            return None;
1956        }
1957        column - statement_start_column + 1
1958    } else {
1959        column
1960    };
1961
1962    Some(Location::new(relative_line as u64, relative_column as u64))
1963}
1964
1965#[cfg(test)]
1966mod tests {
1967    use super::*;
1968    use crate::linter::rule::with_active_dialect;
1969    use crate::parser::parse_sql;
1970    use crate::types::{Dialect, IssueAutofixApplicability};
1971
1972    fn run(sql: &str) -> Vec<Issue> {
1973        run_with_dialect(sql, Dialect::Generic)
1974    }
1975
1976    fn run_with_dialect(sql: &str, dialect: Dialect) -> Vec<Issue> {
1977        let statements = parse_sql(sql).expect("parse");
1978        let rule = LayoutSpacing::default();
1979        with_active_dialect(dialect, || {
1980            statements
1981                .iter()
1982                .enumerate()
1983                .flat_map(|(index, statement)| {
1984                    rule.check(
1985                        statement,
1986                        &LintContext {
1987                            sql,
1988                            statement_range: 0..sql.len(),
1989                            statement_index: index,
1990                        },
1991                    )
1992                })
1993                .collect()
1994        })
1995    }
1996
1997    fn run_statementless_with_dialect(sql: &str, dialect: Dialect) -> Vec<Issue> {
1998        run_statementless_with_rule(sql, dialect, LayoutSpacing::default())
1999    }
2000
2001    fn run_statementless_with_rule(sql: &str, dialect: Dialect, rule: LayoutSpacing) -> Vec<Issue> {
2002        let placeholder = parse_sql("SELECT 1").expect("parse placeholder");
2003        with_active_dialect(dialect, || {
2004            rule.check(
2005                &placeholder[0],
2006                &LintContext {
2007                    sql,
2008                    statement_range: 0..sql.len(),
2009                    statement_index: 0,
2010                },
2011            )
2012        })
2013    }
2014
2015    fn apply_all_issue_autofixes(sql: &str, issues: &[Issue]) -> String {
2016        let mut out = sql.to_string();
2017        let mut edits = issues
2018            .iter()
2019            .filter_map(|issue| issue.autofix.as_ref())
2020            .flat_map(|autofix| autofix.edits.clone())
2021            .collect::<Vec<_>>();
2022        edits.sort_by_key(|edit| (edit.span.start, edit.span.end));
2023        for edit in edits.into_iter().rev() {
2024            out.replace_range(edit.span.start..edit.span.end, &edit.replacement);
2025        }
2026        out
2027    }
2028
2029    #[test]
2030    fn allows_bigquery_array_type_angle_brackets_without_spaces() {
2031        let issues = run_with_dialect(
2032            "SELECT ARRAY<FLOAT64>[1, 2, 3] AS floats;",
2033            Dialect::Bigquery,
2034        );
2035        assert!(issues.is_empty());
2036    }
2037
2038    #[test]
2039    fn allows_create_table_with_qualified_name_before_column_list() {
2040        let issues = run("CREATE TABLE db.schema_name.tbl_name (id INT)");
2041        assert!(issues.is_empty());
2042    }
2043
2044    #[test]
2045    fn fixes_reference_target_column_list_spacing() {
2046        let sql = "create table tab1 (b int references tab2(b))";
2047        let issues = run_statementless_with_dialect(sql, Dialect::Ansi);
2048        assert!(!issues.is_empty());
2049        let fixed = apply_all_issue_autofixes(sql, &issues);
2050        assert_eq!(fixed, "create table tab1 (b int references tab2 (b))");
2051    }
2052
2053    #[test]
2054    fn allows_bigquery_hyphenated_project_identifier() {
2055        let issues = run_statementless_with_dialect(
2056            "SELECT col_foo FROM foo-bar.foo.bar",
2057            Dialect::Bigquery,
2058        );
2059        assert!(issues.is_empty());
2060    }
2061
2062    #[test]
2063    fn allows_bigquery_function_array_offset_access() {
2064        let sql = "SELECT testFunction(a)[OFFSET(0)].* FROM table1";
2065        let issues = run_statementless_with_dialect(sql, Dialect::Bigquery);
2066        assert!(issues.is_empty());
2067    }
2068
2069    #[test]
2070    fn allows_hive_struct_and_array_datatype_angles() {
2071        let sql = "select col1::STRUCT<foo: int>, col2::ARRAY<int> from t";
2072        let issues = run_statementless_with_dialect(sql, Dialect::Hive);
2073        assert!(issues.is_empty());
2074    }
2075
2076    #[test]
2077    fn allows_sparksql_file_literal_path() {
2078        let sql = "ADD JAR path/to/some.jar;";
2079        let issues = run_statementless_with_dialect(sql, Dialect::Databricks);
2080        assert!(issues.is_empty());
2081    }
2082
2083    #[test]
2084    fn allows_clickhouse_system_model_path() {
2085        let sql = "SYSTEM RELOAD MODEL /model/path;";
2086        let issues = run_statementless_with_dialect(sql, Dialect::Clickhouse);
2087        assert!(issues.is_empty(), "unexpected issues: {issues:?}");
2088    }
2089
2090    #[test]
2091    fn detects_alias_alignment_when_configured() {
2092        let sql = "SELECT\n\tcol1 AS a,\n\tlonger_col AS b\nFROM t";
2093        let issues = run_statementless_with_rule(
2094            sql,
2095            Dialect::Ansi,
2096            LayoutSpacing {
2097                align_alias_expression: true,
2098                tab_space_size: 4,
2099                ..LayoutSpacing::default()
2100            },
2101        );
2102        assert!(!issues.is_empty());
2103    }
2104
2105    #[test]
2106    fn detects_alias_alignment_with_tabs_when_columns_are_equal_width() {
2107        let sql = "SELECT\n\tcol1 AS alias1,\n\tcol2 AS alias2\nFROM table1";
2108        let issues = run_statementless_with_rule(
2109            sql,
2110            Dialect::Ansi,
2111            LayoutSpacing {
2112                align_alias_expression: true,
2113                align_with_tabs: true,
2114                tab_space_size: 4,
2115                ..LayoutSpacing::default()
2116            },
2117        );
2118        assert!(
2119            !issues.is_empty(),
2120            "tab indentation alignment should flag spaces before AS"
2121        );
2122    }
2123
2124    #[test]
2125    fn detects_create_table_datatype_alignment_when_configured() {
2126        let sql = "CREATE TABLE tbl (\n    foo VARCHAR(25) NOT NULL,\n    barbar INT NULL\n)";
2127        let issues = run_statementless_with_rule(
2128            sql,
2129            Dialect::Ansi,
2130            LayoutSpacing {
2131                align_data_type: true,
2132                ..LayoutSpacing::default()
2133            },
2134        );
2135        assert!(!issues.is_empty());
2136    }
2137
2138    #[test]
2139    fn does_not_flag_create_table_alignment_when_columns_are_already_aligned() {
2140        let sql = "CREATE TABLE foo (\n    x INT NOT NULL PRIMARY KEY,\n    y INT NULL,\n    z INT NULL\n);";
2141        let issues = run_statementless_with_rule(
2142            sql,
2143            Dialect::Ansi,
2144            LayoutSpacing {
2145                align_data_type: true,
2146                align_column_constraint: true,
2147                ..LayoutSpacing::default()
2148            },
2149        );
2150        assert!(
2151            issues.is_empty(),
2152            "expected no LT01 alignment issues: {issues:?}"
2153        );
2154    }
2155
2156    #[test]
2157    fn statementless_fixes_comment_on_function_spacing() {
2158        let sql = "COMMENT ON FUNCTION x (foo) IS 'y';";
2159        let issues = run_statementless_with_dialect(sql, Dialect::Postgres);
2160        assert!(!issues.is_empty());
2161        let fixed = apply_all_issue_autofixes(sql, &issues);
2162        assert_eq!(fixed, "COMMENT ON FUNCTION x(foo) IS 'y';");
2163    }
2164
2165    #[test]
2166    fn statementless_fixes_split_tsql_comparison_operator() {
2167        let sql = "SELECT col1 FROM table1 WHERE 1 > = 1";
2168        let issues = run_statementless_with_dialect(sql, Dialect::Mssql);
2169        assert!(!issues.is_empty());
2170        let fixed = apply_all_issue_autofixes(sql, &issues);
2171        assert_eq!(fixed, "SELECT col1 FROM table1 WHERE 1 >= 1");
2172    }
2173
2174    #[test]
2175    fn statementless_fixes_tsql_compound_assignment_operator() {
2176        let sql = "SET @param1+=1";
2177        let issues = run_statementless_with_dialect(sql, Dialect::Mssql);
2178        assert!(!issues.is_empty());
2179        let fixed = apply_all_issue_autofixes(sql, &issues);
2180        assert_eq!(fixed, "SET @param1 += 1");
2181    }
2182
2183    #[test]
2184    fn allows_sparksql_multi_unit_interval_minus() {
2185        let sql = "SELECT INTERVAL -2 HOUR '3' MINUTE AS col;";
2186        let issues = run_statementless_with_dialect(sql, Dialect::Databricks);
2187        assert!(issues.is_empty());
2188    }
2189
2190    #[test]
2191    fn ignore_templated_areas_skips_template_artifacts() {
2192        let sql = "{{ 'SELECT 1, 4' }}, 5, 6";
2193        let issues = run_statementless_with_rule(
2194            sql,
2195            Dialect::Generic,
2196            LayoutSpacing {
2197                ignore_templated_areas: true,
2198                ..LayoutSpacing::default()
2199            },
2200        );
2201        assert!(issues.is_empty(), "template-only spacing should be ignored");
2202    }
2203
2204    #[test]
2205    fn ignore_templated_areas_still_fixes_non_template_region() {
2206        let sql = "{{ 'SELECT 1, 4' }}, 5 , 6";
2207        let issues = run_statementless_with_rule(
2208            sql,
2209            Dialect::Generic,
2210            LayoutSpacing {
2211                ignore_templated_areas: true,
2212                ..LayoutSpacing::default()
2213            },
2214        );
2215        assert!(!issues.is_empty());
2216        let fixed = apply_all_issue_autofixes(sql, &issues);
2217        assert_eq!(fixed, "{{ 'SELECT 1, 4' }}, 5, 6");
2218    }
2219
2220    #[test]
2221    fn templated_string_content_is_checked_when_not_ignored() {
2222        let sql = "{{ 'SELECT 1 ,4' }}";
2223        let issues = run_statementless_with_rule(
2224            sql,
2225            Dialect::Generic,
2226            LayoutSpacing {
2227                ignore_templated_areas: false,
2228                ..LayoutSpacing::default()
2229            },
2230        );
2231        assert!(!issues.is_empty());
2232        assert!(
2233            issues.iter().all(|issue| issue.autofix.is_none()),
2234            "template-internal checks are detection-only"
2235        );
2236    }
2237
2238    #[test]
2239    fn templated_string_content_passes_when_clean() {
2240        let sql = "{{ 'SELECT 1, 4' }}";
2241        let issues = run_statementless_with_rule(
2242            sql,
2243            Dialect::Generic,
2244            LayoutSpacing {
2245                ignore_templated_areas: false,
2246                ..LayoutSpacing::default()
2247            },
2248        );
2249        assert!(issues.is_empty());
2250    }
2251
2252    #[test]
2253    fn allows_snowflake_match_recognize_pattern_spacing() {
2254        let sql = "select * from stock_price_history\n  match_recognize (\n    pattern ((A | B){5} C+)\n  )";
2255        let issues = run_statementless_with_dialect(sql, Dialect::Snowflake);
2256        assert!(issues.is_empty(), "snowflake pattern syntax should pass");
2257    }
2258
2259    #[test]
2260    fn fixes_snowflake_match_condition_newline_before_paren() {
2261        let sql = "select\n    table1.pk1\nfrom table1\n    asof join\n    table2\n    match_condition\n    (t1 > t2)";
2262        let issues = run_with_dialect(sql, Dialect::Snowflake);
2263        assert!(!issues.is_empty());
2264        let fixed = apply_all_issue_autofixes(sql, &issues);
2265        assert!(
2266            fixed.contains("match_condition(t1 > t2)"),
2267            "expected inline match_condition: {fixed}"
2268        );
2269    }
2270
2271    #[test]
2272    fn fixes_snowflake_copy_into_target_column_list_spacing() {
2273        let sql = "copy into DB.SCHEMA.ProblemHere(col1)\nfrom @my_stage/file";
2274        let issues = run_statementless_with_dialect(sql, Dialect::Snowflake);
2275        assert!(!issues.is_empty());
2276        let fixed = apply_all_issue_autofixes(sql, &issues);
2277        assert!(
2278            fixed.contains("DB.SCHEMA.ProblemHere (col1)"),
2279            "fixed: {fixed}"
2280        );
2281    }
2282
2283    #[test]
2284    fn fixes_snowflake_copy_into_target_column_list_spacing_with_placeholder_prefix() {
2285        let sql = "copy into ${env}_ENT_LANDING.SCHEMA_NAME.ProblemHere(col1)\nfrom @my_stage/file";
2286        let issues = run_statementless_with_dialect(sql, Dialect::Snowflake);
2287        assert!(!issues.is_empty());
2288        let fixed = apply_all_issue_autofixes(sql, &issues);
2289        assert!(
2290            fixed.contains(".SCHEMA_NAME.ProblemHere (col1)"),
2291            "fixed: {fixed}"
2292        );
2293    }
2294
2295    #[test]
2296    fn allows_snowflake_stage_path_without_spacing_around_slash() {
2297        let sql = "copy into t from @my_stage/file";
2298        let issues = run_statementless_with_dialect(sql, Dialect::Snowflake);
2299        assert!(
2300            issues.is_empty(),
2301            "snowflake stage path should not force spaces around slash: {issues:?}"
2302        );
2303    }
2304
2305    // --- Trailing whitespace tests ---
2306
2307    #[test]
2308    fn flags_trailing_whitespace() {
2309        let sql = "SELECT 1     \n";
2310        let issues = run(sql);
2311        assert!(!issues.is_empty(), "should flag trailing whitespace");
2312        let fixed = apply_all_issue_autofixes(sql, &issues);
2313        assert_eq!(fixed, "SELECT 1\n");
2314    }
2315
2316    #[test]
2317    fn flags_trailing_whitespace_on_initial_blank_line() {
2318        let sql = " \nSELECT 1     \n";
2319        let issues = run(sql);
2320        assert!(!issues.is_empty());
2321        let fixed = apply_all_issue_autofixes(sql, &issues);
2322        assert_eq!(fixed, "\nSELECT 1\n");
2323    }
2324
2325    // --- Operator spacing tests ---
2326
2327    #[test]
2328    fn flags_compact_operator() {
2329        let sql = "SELECT 1+2";
2330        let issues = run(sql);
2331        assert!(!issues.is_empty(), "should flag compact 1+2");
2332        let fixed = apply_all_issue_autofixes(sql, &issues);
2333        assert_eq!(fixed, "SELECT 1 + 2");
2334    }
2335
2336    #[test]
2337    fn flags_compact_operator_expression() {
2338        let sql = "select\n    field,\n    date(field_1)-date(field_2) as diff\nfrom tbl";
2339        let issues = run(sql);
2340        assert!(!issues.is_empty());
2341        let fixed = apply_all_issue_autofixes(sql, &issues);
2342        assert!(
2343            fixed.contains("date(field_1) - date(field_2)"),
2344            "should fix operator spacing: {fixed}"
2345        );
2346    }
2347
2348    #[test]
2349    fn flags_plus_between_identifier_and_literal() {
2350        let sql = "SELECT a +'b'+ 'c' FROM tbl";
2351        let issues = run(sql);
2352        assert!(
2353            !issues.is_empty(),
2354            "should flag operator spacing around string literals"
2355        );
2356        let fixed = apply_all_issue_autofixes(sql, &issues);
2357        assert_eq!(fixed, "SELECT a + 'b' + 'c' FROM tbl");
2358    }
2359
2360    #[test]
2361    fn does_not_flag_simple_spacing() {
2362        assert!(run("SELECT * FROM t WHERE a = 1").is_empty());
2363    }
2364
2365    #[test]
2366    fn does_not_flag_sign_indicators() {
2367        let issues = run("SELECT 1, +2, -4");
2368        // Sign indicators before numbers should not be flagged
2369        assert!(
2370            issues.is_empty(),
2371            "unary signs should not be flagged: {issues:?}"
2372        );
2373    }
2374
2375    #[test]
2376    fn does_not_flag_newline_operator() {
2377        assert!(run("SELECT 1\n+ 2").is_empty());
2378        assert!(run("SELECT 1\n    + 2").is_empty());
2379    }
2380
2381    // --- Comma spacing tests ---
2382
2383    #[test]
2384    fn flags_space_before_comma() {
2385        let sql = "SELECT 1 ,4";
2386        let issues = run(sql);
2387        assert!(!issues.is_empty(), "should flag space before comma");
2388        let fixed = apply_all_issue_autofixes(sql, &issues);
2389        assert_eq!(fixed, "SELECT 1, 4");
2390    }
2391
2392    #[test]
2393    fn flags_no_space_after_comma() {
2394        let sql = "SELECT 1,4";
2395        let issues = run(sql);
2396        assert!(!issues.is_empty(), "should flag missing space after comma");
2397        let fixed = apply_all_issue_autofixes(sql, &issues);
2398        assert_eq!(fixed, "SELECT 1, 4");
2399    }
2400
2401    #[test]
2402    fn flags_excessive_space_after_comma() {
2403        let sql = "SELECT 1,   4";
2404        let issues = run(sql);
2405        assert!(
2406            !issues.is_empty(),
2407            "should flag excessive space after comma"
2408        );
2409        let fixed = apply_all_issue_autofixes(sql, &issues);
2410        assert_eq!(fixed, "SELECT 1, 4");
2411    }
2412
2413    // --- Bracket spacing tests ---
2414
2415    #[test]
2416    fn flags_missing_space_before_paren_after_keyword() {
2417        let sql = "SELECT * FROM(SELECT 1 AS C1)AS T1;";
2418        let issues = run(sql);
2419        assert!(!issues.is_empty(), "should flag FROM( and )AS: {issues:?}");
2420        let fixed = apply_all_issue_autofixes(sql, &issues);
2421        assert_eq!(fixed, "SELECT * FROM (SELECT 1 AS C1) AS T1;");
2422    }
2423
2424    // --- Missing space tests ---
2425
2426    #[test]
2427    fn flags_cte_missing_space_after_as() {
2428        let sql = "WITH a AS(select 1) select * from a";
2429        let issues = run(sql);
2430        assert!(!issues.is_empty(), "should flag AS(");
2431        let fixed = apply_all_issue_autofixes(sql, &issues);
2432        assert_eq!(fixed, "WITH a AS (select 1) select * from a");
2433    }
2434
2435    #[test]
2436    fn flags_cte_multiple_spaces_after_as() {
2437        let sql = "WITH a AS  (select 1) select * from a";
2438        let issues = run(sql);
2439        assert!(!issues.is_empty(), "should flag AS  (");
2440        let fixed = apply_all_issue_autofixes(sql, &issues);
2441        assert_eq!(fixed, "WITH a AS (select 1) select * from a");
2442    }
2443
2444    #[test]
2445    fn flags_missing_space_after_using() {
2446        let sql = "select * from a JOIN b USING(x)";
2447        let issues = run(sql);
2448        assert!(!issues.is_empty(), "should flag USING(");
2449        let fixed = apply_all_issue_autofixes(sql, &issues);
2450        assert_eq!(fixed, "select * from a JOIN b USING (x)");
2451    }
2452
2453    // --- Excessive whitespace tests ---
2454
2455    #[test]
2456    fn flags_excessive_whitespace() {
2457        let sql = "SELECT     1";
2458        let issues = run(sql);
2459        assert!(!issues.is_empty(), "should flag excessive whitespace");
2460        let fixed = apply_all_issue_autofixes(sql, &issues);
2461        assert_eq!(fixed, "SELECT 1");
2462    }
2463
2464    #[test]
2465    fn flags_excessive_whitespace_multi() {
2466        let sql = "select\n    1 + 2     + 3     + 4        -- Comment\nfrom     foo";
2467        let issues = run(sql);
2468        assert!(!issues.is_empty());
2469        let fixed = apply_all_issue_autofixes(sql, &issues);
2470        assert_eq!(
2471            fixed,
2472            "select\n    1 + 2 + 3 + 4        -- Comment\nfrom foo"
2473        );
2474    }
2475
2476    // --- Literal spacing tests ---
2477
2478    #[test]
2479    fn flags_literal_operator_spacing() {
2480        let sql = "SELECT ('foo'||'bar') as buzz";
2481        let issues = run(sql);
2482        assert!(
2483            !issues.is_empty(),
2484            "should flag compact || operator: {issues:?}"
2485        );
2486        let fixed = apply_all_issue_autofixes(sql, &issues);
2487        assert_eq!(fixed, "SELECT ('foo' || 'bar') as buzz");
2488    }
2489
2490    #[test]
2491    fn flags_literal_as_spacing() {
2492        let sql = "SELECT\n    'foo'AS   bar\nFROM foo";
2493        let issues = run(sql);
2494        assert!(!issues.is_empty());
2495        let fixed = apply_all_issue_autofixes(sql, &issues);
2496        assert_eq!(fixed, "SELECT\n    'foo' AS bar\nFROM foo");
2497    }
2498
2499    #[test]
2500    fn flags_ansi_national_string_literal_spacing() {
2501        let sql = "SELECT a + N'b' + N'c' FROM tbl;";
2502        let issues = run_with_dialect(sql, Dialect::Ansi);
2503        assert!(!issues.is_empty());
2504        let fixed = apply_all_issue_autofixes(sql, &issues);
2505        assert_eq!(fixed, "SELECT a + N 'b' + N 'c' FROM tbl;");
2506    }
2507
2508    // --- Function spacing tests ---
2509
2510    #[test]
2511    fn does_not_flag_function_call() {
2512        assert!(run("SELECT foo(5) FROM T1;").is_empty());
2513        assert!(run("SELECT COUNT(*) FROM tbl\n\n").is_empty());
2514    }
2515
2516    // --- Cast operator tests ---
2517
2518    #[test]
2519    fn flags_spaced_cast_operator() {
2520        let sql = "SELECT '1' :: INT;";
2521        let issues = run(sql);
2522        assert!(!issues.is_empty(), "should flag space around ::");
2523        let fixed = apply_all_issue_autofixes(sql, &issues);
2524        assert_eq!(fixed, "SELECT '1'::INT;");
2525    }
2526
2527    // --- JSON arrow tests ---
2528
2529    #[test]
2530    fn flags_compact_json_arrow_operator() {
2531        let sql = "SELECT payload->>'id' FROM t";
2532        let issues = run(sql);
2533        assert!(
2534            issues.len() >= 2,
2535            "should flag 2+ violations for compact json-arrow"
2536        );
2537        assert!(
2538            issues
2539                .iter()
2540                .all(|issue| issue.autofix.as_ref().is_some_and(
2541                    |autofix| autofix.applicability == IssueAutofixApplicability::Safe
2542                )),
2543            "expected safe autofix metadata"
2544        );
2545
2546        let fixed = apply_all_issue_autofixes(sql, &issues);
2547        assert_eq!(fixed, "SELECT payload ->> 'id' FROM t");
2548    }
2549
2550    #[test]
2551    fn does_not_flag_exists_without_space_before_parenthesis() {
2552        let no_space = "SELECT\n    EXISTS(\n        SELECT 1\n    ) AS has_row\nFROM t";
2553        assert!(run(no_space).is_empty());
2554    }
2555
2556    #[test]
2557    fn flags_space_before_exists_parenthesis_in_select_list() {
2558        let sql = "SELECT 1,\n    EXISTS (\n        SELECT 1\n    ) AS has_row\nFROM t";
2559        let issues = run(sql);
2560        assert!(
2561            !issues.is_empty(),
2562            "expected EXISTS-space violation in select list"
2563        );
2564        let fixed = apply_all_issue_autofixes(sql, &issues);
2565        assert!(
2566            fixed.contains("EXISTS(\n"),
2567            "expected EXISTS( after fix, got: {fixed}"
2568        );
2569    }
2570
2571    #[test]
2572    fn requires_space_before_exists_parenthesis_after_where() {
2573        let sql = "SELECT 1\nWHERE EXISTS(\n    SELECT 1\n)";
2574        let issues = run(sql);
2575        assert!(
2576            !issues.is_empty(),
2577            "expected missing-space violation for WHERE EXISTS("
2578        );
2579        let fixed = apply_all_issue_autofixes(sql, &issues);
2580        assert!(
2581            fixed.contains("WHERE EXISTS (\n"),
2582            "expected WHERE EXISTS ( after fix, got: {fixed}"
2583        );
2584    }
2585
2586    #[test]
2587    fn merge_violations_prefers_fixable_duplicate_span() {
2588        let mut violations = vec![
2589            ((10, 10), Vec::new()),
2590            ((10, 10), vec![(10, 10, " ".to_string())]),
2591        ];
2592        merge_violations_by_span(&mut violations);
2593        assert_eq!(violations.len(), 1);
2594        assert_eq!(violations[0].0, (10, 10));
2595        assert_eq!(violations[0].1, vec![(10, 10, " ".to_string())]);
2596    }
2597
2598    // --- Safe pass cases ---
2599
2600    #[test]
2601    fn does_not_flag_spacing_patterns_inside_literals_or_comments() {
2602        let issues = run("SELECT 'payload->>''id''' AS txt -- EXISTS (\nFROM t");
2603        assert!(
2604            issues.is_empty(),
2605            "should not flag content inside literals/comments: {issues:?}"
2606        );
2607    }
2608
2609    #[test]
2610    fn does_not_flag_correct_comma_spacing() {
2611        assert!(run("SELECT 1, 4").is_empty());
2612    }
2613
2614    #[test]
2615    fn does_not_flag_correct_cast() {
2616        assert!(run("SELECT '1'::INT;").is_empty());
2617    }
2618
2619    #[test]
2620    fn does_not_flag_qualified_identifiers() {
2621        // Dot-separated identifiers should not have spaces
2622        assert!(run("SELECT a.b FROM c.d").is_empty());
2623    }
2624
2625    #[test]
2626    fn does_not_flag_newline_after_using() {
2627        assert!(
2628            run("select * from a JOIN b USING\n(x)").is_empty(),
2629            "newline between USING and ( should be acceptable"
2630        );
2631    }
2632
2633    #[test]
2634    fn flags_cte_newline_after_as() {
2635        let sql = "WITH a AS\n(\n  select 1\n)\nselect * from a";
2636        let issues = run(sql);
2637        assert!(!issues.is_empty(), "should flag AS + newline + (");
2638        let fixed = apply_all_issue_autofixes(sql, &issues);
2639        assert_eq!(fixed, "WITH a AS (\n  select 1\n)\nselect * from a");
2640    }
2641
2642    #[test]
2643    fn flags_cte_newline_and_spaces_after_as() {
2644        let sql = "WITH a AS\n\n\n    (\n  select 1\n)\nselect * from a";
2645        let issues = run(sql);
2646        assert!(!issues.is_empty());
2647        let fixed = apply_all_issue_autofixes(sql, &issues);
2648        assert_eq!(fixed, "WITH a AS (\n  select 1\n)\nselect * from a");
2649    }
2650
2651    #[test]
2652    fn does_not_flag_comment_after_as() {
2653        // When there's a comment between AS and (, it should pass
2654        assert!(
2655            run("WITH\na AS -- comment\n(\nselect 1\n)\nselect * from a").is_empty(),
2656            "comment between AS and ( should be acceptable"
2657        );
2658    }
2659
2660    #[test]
2661    fn insert_into_table_paren_allows_space() {
2662        // Space before ( in INSERT INTO table ( should be fine.
2663        let issues = run("INSERT INTO metrics.cold_start_daily (\n    workspace_id\n) SELECT 1");
2664        let lt01 = issues
2665            .iter()
2666            .filter(|i| i.code == "LT01")
2667            .collect::<Vec<_>>();
2668        assert!(
2669            lt01.is_empty(),
2670            "INSERT INTO table ( should not flag LT01, got: {lt01:?}"
2671        );
2672    }
2673
2674    #[test]
2675    fn insert_into_table_paren_with_cte() {
2676        // CTE + INSERT INTO: both parsed-statement and fallback paths.
2677        let sql = "WITH starts AS (\n    SELECT 1\n)\nINSERT INTO metrics.cold_start_daily (\n    workspace_id\n) SELECT workspace_id FROM starts";
2678        let issues = run_with_dialect(sql, Dialect::Postgres);
2679        let lt01 = issues
2680            .iter()
2681            .filter(|i| i.code == "LT01")
2682            .collect::<Vec<_>>();
2683        assert!(
2684            lt01.is_empty(),
2685            "INSERT INTO table ( with CTE should not flag LT01, got: {lt01:?}"
2686        );
2687    }
2688
2689    #[test]
2690    fn insert_into_table_paren_on_conflict() {
2691        // Regression: CTE + INSERT INTO + ON CONFLICT via statementless path.
2692        let sql = "\
2693WITH cte AS (
2694    SELECT workspace_id
2695    FROM ledger.query_history
2696    WHERE start_time >= $1
2697)
2698
2699INSERT INTO metrics.cold_start_daily (
2700    workspace_id
2701)
2702SELECT workspace_id
2703FROM cte
2704ON CONFLICT (workspace_id) DO UPDATE
2705    SET workspace_id = excluded.workspace_id";
2706        let issues = run_statementless_with_dialect(sql, Dialect::Postgres);
2707        let lt01 = issues
2708            .iter()
2709            .filter(|i| i.code == "LT01")
2710            .collect::<Vec<_>>();
2711        assert!(
2712            lt01.is_empty(),
2713            "INSERT INTO table ( with ON CONFLICT should not flag LT01, got: {lt01:?}"
2714        );
2715    }
2716}
flowscope_core/linter/rules/lt_001.rs

flowscope_core/linter/rules/
lt_001.rs