flowscope_core/linter/rules/
lt_001.rs

1//! LINT_LT_001: Layout spacing.
2//!
3//! SQLFluff LT01 parity: comprehensive spacing checks covering operators,
4//! commas, brackets, keywords, literals, trailing whitespace, excessive
5//! whitespace, and cast operators.
6
7use crate::linter::config::LintConfig;
8use crate::linter::rule::{LintContext, LintRule};
9use crate::types::{issue_codes, Dialect, Issue, IssueAutofixApplicability, IssuePatchEdit};
10use sqlparser::ast::Statement;
11use sqlparser::keywords::Keyword;
12use sqlparser::tokenizer::{Location, Span, Token, TokenWithSpan, Tokenizer, Whitespace};
13use std::collections::HashSet;
14
15pub struct LayoutSpacing {
16    ignore_templated_areas: bool,
17    align_alias_expression: bool,
18    align_data_type: bool,
19    align_column_constraint: bool,
20    align_with_tabs: bool,
21    tab_space_size: usize,
22}
23
24impl LayoutSpacing {
25    pub fn from_config(config: &LintConfig) -> Self {
26        let spacing_before_align = |type_name: &str| {
27            config
28                .config_section_object("layout.keyword_newline")
29                .and_then(|layout| layout.get(type_name))
30                .and_then(serde_json::Value::as_object)
31                .and_then(|entry| entry.get("spacing_before"))
32                .and_then(serde_json::Value::as_str)
33                .is_some_and(|value| value.to_ascii_lowercase().starts_with("align"))
34        };
35
36        Self {
37            ignore_templated_areas: config
38                .core_option_bool("ignore_templated_areas")
39                .unwrap_or(true),
40            align_alias_expression: spacing_before_align("alias_expression"),
41            align_data_type: spacing_before_align("data_type"),
42            align_column_constraint: spacing_before_align("column_constraint_segment"),
43            align_with_tabs: config
44                .section_option_str("indentation", "indent_unit")
45                .or_else(|| config.section_option_str("rules", "indent_unit"))
46                .is_some_and(|value| value.eq_ignore_ascii_case("tab")),
47            tab_space_size: config
48                .section_option_usize("indentation", "tab_space_size")
49                .or_else(|| config.section_option_usize("rules", "tab_space_size"))
50                .unwrap_or(4)
51                .max(1),
52        }
53    }
54
55    fn alignment_options(&self) -> Lt01AlignmentOptions {
56        Lt01AlignmentOptions {
57            alias_expression: self.align_alias_expression,
58            data_type: self.align_data_type,
59            column_constraint: self.align_column_constraint,
60            align_with_tabs: self.align_with_tabs,
61            tab_space_size: self.tab_space_size,
62        }
63    }
64}
65
66impl Default for LayoutSpacing {
67    fn default() -> Self {
68        Self {
69            ignore_templated_areas: true,
70            align_alias_expression: false,
71            align_data_type: false,
72            align_column_constraint: false,
73            align_with_tabs: false,
74            tab_space_size: 4,
75        }
76    }
77}
78
79impl LintRule for LayoutSpacing {
80    fn code(&self) -> &'static str {
81        issue_codes::LINT_LT_001
82    }
83
84    fn name(&self) -> &'static str {
85        "Layout spacing"
86    }
87
88    fn description(&self) -> &'static str {
89        "Inappropriate Spacing."
90    }
91
92    fn check(&self, _statement: &Statement, ctx: &LintContext) -> Vec<Issue> {
93        let mut violations =
94            spacing_violations(ctx, self.ignore_templated_areas, self.alignment_options());
95        let has_remaining_non_whitespace = ctx.sql[ctx.statement_range.end..]
96            .chars()
97            .any(|ch| !ch.is_whitespace());
98        let parser_fragment_fallback = ctx.statement_index == 0
99            && ctx.statement_range.start == 0
100            && ctx.statement_range.end < ctx.sql.len()
101            && has_remaining_non_whitespace
102            && !ctx.statement_sql().trim_end().ends_with(';');
103        let template_fragment_fallback = ctx.statement_index == 0
104            && contains_template_marker(ctx.sql)
105            && (ctx.statement_range.start > 0 || ctx.statement_range.end < ctx.sql.len());
106        if parser_fragment_fallback || template_fragment_fallback {
107            let full_ctx = LintContext {
108                sql: ctx.sql,
109                statement_range: 0..ctx.sql.len(),
110                statement_index: 0,
111            };
112            violations.extend(spacing_violations(
113                &full_ctx,
114                self.ignore_templated_areas,
115                self.alignment_options(),
116            ));
117            merge_violations_by_span(&mut violations);
118        }
119
120        violations
121            .into_iter()
122            .map(|((start, end), edits)| {
123                let mut issue =
124                    Issue::info(issue_codes::LINT_LT_001, "Inappropriate spacing found.")
125                        .with_statement(ctx.statement_index)
126                        .with_span(ctx.span_from_statement_offset(start, end));
127                if !edits.is_empty() {
128                    let edits = edits
129                        .into_iter()
130                        .map(|(edit_start, edit_end, replacement)| {
131                            IssuePatchEdit::new(
132                                ctx.span_from_statement_offset(edit_start, edit_end),
133                                replacement.to_string(),
134                            )
135                        })
136                        .collect();
137                    issue = issue.with_autofix_edits(IssueAutofixApplicability::Safe, edits);
138                }
139                issue
140            })
141            .collect()
142    }
143}
144
145type Lt01Span = (usize, usize);
146type Lt01AutofixEdit = (usize, usize, String);
147type Lt01Violation = (Lt01Span, Vec<Lt01AutofixEdit>);
148type Lt01TemplateSpan = (usize, usize);
149
150fn merge_violations_by_span(violations: &mut Vec<Lt01Violation>) {
151    violations.sort_unstable_by_key(|(span, _)| *span);
152    let mut merged: Vec<Lt01Violation> = Vec::with_capacity(violations.len());
153
154    for (span, edits) in violations.drain(..) {
155        if let Some((last_span, last_edits)) = merged.last_mut() {
156            if *last_span == span {
157                if last_edits.is_empty() && !edits.is_empty() {
158                    *last_edits = edits;
159                } else if !last_edits.is_empty() && !edits.is_empty() {
160                    for edit in edits {
161                        if !last_edits.contains(&edit) {
162                            last_edits.push(edit);
163                        }
164                    }
165                }
166                continue;
167            }
168        }
169
170        merged.push((span, edits));
171    }
172
173    *violations = merged;
174}
175
176#[derive(Clone, Copy)]
177struct Lt01AlignmentOptions {
178    alias_expression: bool,
179    data_type: bool,
180    column_constraint: bool,
181    align_with_tabs: bool,
182    tab_space_size: usize,
183}
184
185fn spacing_violations(
186    ctx: &LintContext,
187    ignore_templated_areas: bool,
188    alignment: Lt01AlignmentOptions,
189) -> Vec<Lt01Violation> {
190    let sql = ctx.statement_sql();
191    let mut violations = Vec::new();
192    let templated_spans = template_spans(sql);
193    let prefer_raw_template_tokens = ctx.is_templated() && contains_template_marker(sql);
194    let tokens = if prefer_raw_template_tokens {
195        tokenized(sql, ctx.dialect()).or_else(|| tokenized_for_context(ctx))
196    } else {
197        tokenized_for_context(ctx).or_else(|| tokenized(sql, ctx.dialect()))
198    };
199    let Some(tokens) = tokens else {
200        return violations;
201    };
202
203    let dialect = ctx.dialect();
204
205    collect_trailing_whitespace_violations(sql, &mut violations);
206    collect_pair_spacing_violations(sql, &tokens, dialect, &templated_spans, &mut violations);
207    collect_ansi_national_string_literal_violations(
208        sql,
209        &tokens,
210        dialect,
211        &templated_spans,
212        &mut violations,
213    );
214    if !ignore_templated_areas {
215        collect_template_string_spacing_violations(sql, dialect, &templated_spans, &mut violations);
216    }
217    collect_alignment_detection_violations(sql, alignment, &mut violations);
218
219    violations.sort_unstable_by_key(|(span, _)| *span);
220    violations.dedup_by_key(|(span, _)| *span);
221
222    violations
223}
224
225// ---------------------------------------------------------------------------
226// Trailing whitespace
227// ---------------------------------------------------------------------------
228
229fn collect_trailing_whitespace_violations(sql: &str, violations: &mut Vec<Lt01Violation>) {
230    let mut offset = 0;
231    for line in sql.split('\n') {
232        let trimmed = line.trim_end_matches([' ', '\t']);
233        let trailing_start = offset + trimmed.len();
234        let trailing_end = offset + line.len();
235        if trailing_end > trailing_start {
236            let span = (trailing_start, trailing_end);
237            let edit = (trailing_start, trailing_end, String::new());
238            violations.push((span, vec![edit]));
239        }
240        offset += line.len() + 1; // +1 for the \n
241    }
242}
243
244fn collect_alignment_detection_violations(
245    sql: &str,
246    alignment: Lt01AlignmentOptions,
247    violations: &mut Vec<Lt01Violation>,
248) {
249    if alignment.alias_expression {
250        collect_alias_alignment_detection(
251            sql,
252            alignment.tab_space_size,
253            alignment.align_with_tabs,
254            violations,
255        );
256    }
257    if alignment.data_type || alignment.column_constraint {
258        collect_create_table_alignment_detection(sql, alignment.tab_space_size, violations);
259    }
260}
261
262#[derive(Clone, Copy)]
263struct AliasAlignmentEntry {
264    as_start: usize,
265    visual_col: usize,
266    separator_uses_tabs: bool,
267}
268
269fn collect_alias_alignment_detection(
270    sql: &str,
271    tab_space_size: usize,
272    align_with_tabs: bool,
273    violations: &mut Vec<Lt01Violation>,
274) {
275    let lines: Vec<&str> = sql.split('\n').collect();
276    if lines.len() < 2 {
277        return;
278    }
279
280    let mut offset = 0usize;
281    let mut current_group: Vec<AliasAlignmentEntry> = Vec::new();
282
283    for line in &lines {
284        let lower = line.to_ascii_lowercase();
285        let alias_pos = lower.find(" as ");
286        let is_alias_line = alias_pos.is_some() && !lower.trim_start().starts_with("from ");
287
288        if is_alias_line {
289            let as_index = alias_pos.unwrap_or_default() + 1;
290            current_group.push(AliasAlignmentEntry {
291                as_start: offset + as_index,
292                visual_col: visual_width(&line[..as_index], tab_space_size),
293                separator_uses_tabs: alias_separator_uses_tabs(line, as_index),
294            });
295        } else if !current_group.is_empty() {
296            emit_alias_alignment_group(&current_group, align_with_tabs, violations);
297            current_group.clear();
298        }
299
300        offset += line.len() + 1;
301    }
302
303    if !current_group.is_empty() {
304        emit_alias_alignment_group(&current_group, align_with_tabs, violations);
305    }
306}
307
308fn alias_separator_uses_tabs(line: &str, as_index: usize) -> bool {
309    let prefix = &line[..as_index];
310    let separator_start = prefix
311        .char_indices()
312        .rev()
313        .find(|(_, ch)| !ch.is_whitespace())
314        .map(|(idx, ch)| idx + ch.len_utf8())
315        .unwrap_or(0);
316    let separator = &prefix[separator_start..];
317    !separator.is_empty() && separator.chars().all(|ch| ch == '\t')
318}
319
320fn emit_alias_alignment_group(
321    group: &[AliasAlignmentEntry],
322    align_with_tabs: bool,
323    violations: &mut Vec<Lt01Violation>,
324) {
325    if group.len() < 2 {
326        return;
327    }
328    let target_col = group
329        .iter()
330        .map(|entry| entry.visual_col)
331        .max()
332        .unwrap_or(0);
333    for entry in group {
334        if entry.visual_col != target_col || (align_with_tabs && !entry.separator_uses_tabs) {
335            let end = entry.as_start + 2;
336            violations.push(((entry.as_start, end), Vec::new()));
337        }
338    }
339}
340
341fn collect_create_table_alignment_detection(
342    sql: &str,
343    tab_space_size: usize,
344    violations: &mut Vec<Lt01Violation>,
345) {
346    let lines: Vec<&str> = sql.split('\n').collect();
347    let mut offset = 0usize;
348    let mut in_create_table = false;
349    let mut entries: Vec<(usize, usize)> = Vec::new();
350
351    for line in &lines {
352        let trimmed = line.trim_start();
353        let upper = trimmed.to_ascii_uppercase();
354        if !in_create_table && upper.starts_with("CREATE TABLE") {
355            in_create_table = true;
356        } else if in_create_table && (trimmed.starts_with(')') || trimmed.starts_with(';')) {
357            emit_create_table_alignment_group(&entries, violations);
358            entries.clear();
359            in_create_table = false;
360        }
361
362        if in_create_table
363            && !trimmed.is_empty()
364            && !trimmed.starts_with('(')
365            && !trimmed.starts_with(')')
366            && !trimmed.starts_with("--")
367            && !upper.starts_with("CREATE TABLE")
368        {
369            if let Some(data_type_start) = second_token_start(trimmed) {
370                let prefix_len = line.len() - trimmed.len();
371                let absolute = offset + prefix_len + data_type_start;
372                let visual = visual_width(&trimmed[..data_type_start], tab_space_size);
373                entries.push((absolute, visual));
374            }
375        }
376
377        offset += line.len() + 1;
378    }
379
380    if in_create_table && !entries.is_empty() {
381        emit_create_table_alignment_group(&entries, violations);
382    }
383}
384
385fn emit_create_table_alignment_group(
386    group: &[(usize, usize)],
387    violations: &mut Vec<Lt01Violation>,
388) {
389    if group.len() < 2 {
390        return;
391    }
392    let target_col = group.iter().map(|(_, col)| *col).max().unwrap_or(0);
393    for (start, col) in group {
394        if *col != target_col {
395            let end = *start + 1;
396            violations.push(((*start, end), Vec::new()));
397        }
398    }
399}
400
401fn second_token_start(line: &str) -> Option<usize> {
402    let mut seen_first = false;
403    let mut in_token = false;
404
405    for (index, ch) in line.char_indices() {
406        if ch.is_whitespace() {
407            if in_token {
408                in_token = false;
409                seen_first = true;
410            }
411            continue;
412        }
413
414        if seen_first && !in_token {
415            return Some(index);
416        }
417        in_token = true;
418    }
419    None
420}
421
422fn visual_width(text: &str, tab_space_size: usize) -> usize {
423    let mut width = 0usize;
424    for ch in text.chars() {
425        if ch == '\t' {
426            let next_tab = ((width / tab_space_size) + 1) * tab_space_size;
427            width = next_tab;
428        } else {
429            width += 1;
430        }
431    }
432    width
433}
434
435// ---------------------------------------------------------------------------
436// Pair-based spacing: walk consecutive non-trivia token pairs
437// ---------------------------------------------------------------------------
438
439/// Expected spacing between two adjacent non-trivia tokens.
440#[derive(Debug, Clone, Copy, PartialEq)]
441enum ExpectedSpacing {
442    /// Exactly one space required (or newline acceptable).
443    Single,
444    /// No space allowed (tokens must be adjacent).
445    None,
446    /// No space allowed, including across newlines.
447    NoneInline,
448    /// Do not check this pair (e.g. start/end of statement).
449    Skip,
450    /// Single space required, and if there's a newline between, replace with single space.
451    SingleInline,
452}
453
454fn collect_pair_spacing_violations(
455    sql: &str,
456    tokens: &[TokenWithSpan],
457    dialect: Dialect,
458    templated_spans: &[Lt01TemplateSpan],
459    violations: &mut Vec<Lt01Violation>,
460) {
461    let non_trivia: Vec<usize> = tokens
462        .iter()
463        .enumerate()
464        .filter(|(_, t)| !is_trivia_token(&t.token) && !matches!(t.token, Token::EOF))
465        .map(|(i, _)| i)
466        .collect();
467    let type_angle_tokens = if supports_type_angle_spacing(dialect) {
468        type_angle_token_indices(tokens, &non_trivia)
469    } else {
470        HashSet::new()
471    };
472    let snowflake_pattern_tokens = if dialect == Dialect::Snowflake {
473        snowflake_pattern_token_indices(tokens, &non_trivia)
474    } else {
475        HashSet::new()
476    };
477
478    for window in non_trivia.windows(2) {
479        let left_idx = window[0];
480        let right_idx = window[1];
481        if dialect == Dialect::Snowflake
482            && (snowflake_pattern_tokens.contains(&left_idx)
483                || snowflake_pattern_tokens.contains(&right_idx))
484        {
485            continue;
486        }
487        let left = &tokens[left_idx];
488        let right = &tokens[right_idx];
489
490        let Some((left_start, left_end)) = token_offsets(sql, left) else {
491            continue;
492        };
493        let Some((right_start, _)) = token_offsets(sql, right) else {
494            continue;
495        };
496
497        if left_end > right_start || right_start > sql.len() || left_end > sql.len() {
498            continue;
499        }
500        if overlaps_template_span(templated_spans, left_start, right_start) {
501            continue;
502        }
503
504        let gap = &sql[left_end..right_start];
505        let has_newline = gap.contains('\n') || gap.contains('\r');
506        let has_comment = has_comment_between(tokens, left_idx, right_idx);
507
508        let expected = if supports_type_angle_spacing(dialect)
509            && is_type_angle_spacing_pair(left, right, left_idx, right_idx, &type_angle_tokens)
510        {
511            ExpectedSpacing::None
512        } else {
513            expected_spacing(left, right, tokens, left_idx, right_idx, dialect)
514        };
515
516        match expected {
517            ExpectedSpacing::Skip => continue,
518            ExpectedSpacing::None => {
519                // Tokens should be adjacent, no whitespace allowed.
520                if !gap.is_empty() && !has_newline && !has_comment {
521                    let span = (left_end, right_start);
522                    let edit = (left_end, right_start, String::new());
523                    violations.push((span, vec![edit]));
524                }
525            }
526            ExpectedSpacing::NoneInline => {
527                if !gap.is_empty() && !has_comment {
528                    let span = (left_end, right_start);
529                    let edit = (left_end, right_start, String::new());
530                    violations.push((span, vec![edit]));
531                }
532            }
533            ExpectedSpacing::Single => {
534                if has_comment {
535                    continue;
536                }
537                if has_newline {
538                    // Newline is acceptable as a separator for single-space contexts.
539                    // But check if there's excessive inline space on the same line
540                    // before or after the newline.
541                    continue;
542                }
543                if gap == " " {
544                    // Correct single space.
545                    continue;
546                }
547                if gap.is_empty() && matches!(left.token, Token::Comma) {
548                    // Avoid zero-width insert edits touching the next token.
549                    // Replacing the comma token itself allows CP02/LT01 fixes
550                    // to coexist in the same pass.
551                    let replacement = format!("{} ", &sql[left_start..left_end]);
552                    let span = (left_start, left_end);
553                    let edit = (left_start, left_end, replacement);
554                    violations.push((span, vec![edit]));
555                    continue;
556                }
557                if gap.is_empty() && is_exists_keyword_token(&left.token) {
558                    // Zero-width inserts are filtered by the fix planner.
559                    // Replace the EXISTS token itself to preserve fixability.
560                    let replacement = format!("{} ", &sql[left_start..left_end]);
561                    let span = (left_start, left_end);
562                    let edit = (left_start, left_end, replacement);
563                    violations.push((span, vec![edit]));
564                    continue;
565                }
566                // Either missing space (gap is empty) or excessive space (multiple spaces).
567                let span = (left_end, right_start);
568                let edit = (left_end, right_start, " ".to_string());
569                violations.push((span, vec![edit]));
570            }
571            ExpectedSpacing::SingleInline => {
572                if has_comment {
573                    continue;
574                }
575                if gap == " " {
576                    continue;
577                }
578                // Replace whatever gap (including newlines) with single space.
579                let span = (left_end, right_start);
580                let edit = (left_end, right_start, " ".to_string());
581                violations.push((span, vec![edit]));
582            }
583        }
584    }
585}
586
587/// Determine expected spacing between two adjacent non-trivia tokens.
588fn expected_spacing(
589    left: &TokenWithSpan,
590    right: &TokenWithSpan,
591    tokens: &[TokenWithSpan],
592    left_idx: usize,
593    right_idx: usize,
594    dialect: Dialect,
595) -> ExpectedSpacing {
596    // --- Period (dot) for qualified identifiers: no space around ---
597    if matches!(left.token, Token::Period) || matches!(right.token, Token::Period) {
598        return ExpectedSpacing::NoneInline;
599    }
600
601    // --- Cast operator (::) ---
602    if matches!(left.token, Token::DoubleColon) || matches!(right.token, Token::DoubleColon) {
603        return ExpectedSpacing::NoneInline;
604    }
605
606    // --- Snowflake colon (semi-structured access): no space around ---
607    if dialect == Dialect::Snowflake
608        && (matches!(left.token, Token::Colon) || matches!(right.token, Token::Colon))
609    {
610        // Snowflake a:b:c syntax — no spaces around colon
611        return ExpectedSpacing::NoneInline;
612    }
613
614    // --- Split compound comparison operators (>,<,!) + = ---
615    if is_split_compound_comparison_pair(left, right) {
616        return ExpectedSpacing::NoneInline;
617    }
618
619    // --- TSQL compound assignment operators (+=, -=, etc.) ---
620    if dialect == Dialect::Mssql && is_tsql_compound_assignment_pair(left, right) {
621        return ExpectedSpacing::NoneInline;
622    }
623
624    // --- Left paren: usually no space before (function calls) ---
625    if matches!(right.token, Token::LParen) {
626        return expected_spacing_before_lparen(left, tokens, left_idx, dialect);
627    }
628
629    // --- Right paren followed by something ---
630    if matches!(left.token, Token::RParen) {
631        return expected_spacing_after_rparen(right, tokens, right_idx);
632    }
633
634    // --- Left bracket: no space before in most contexts ---
635    if matches!(right.token, Token::LBracket) {
636        // text[] type syntax needs a space, but array access doesn't.
637        if is_type_keyword_for_bracket(&left.token) {
638            return ExpectedSpacing::Single;
639        }
640        return ExpectedSpacing::None;
641    }
642
643    // --- Right bracket ---
644    if matches!(left.token, Token::RBracket) {
645        // After ] usually no space before :: or . or [ or )
646        if matches!(
647            right.token,
648            Token::DoubleColon | Token::Period | Token::LBracket | Token::RParen
649        ) {
650            return ExpectedSpacing::None;
651        }
652        return ExpectedSpacing::Single;
653    }
654
655    // --- Comma: no space before, single space after ---
656    if matches!(right.token, Token::Comma) {
657        return ExpectedSpacing::None;
658    }
659    if matches!(left.token, Token::Comma) {
660        return ExpectedSpacing::Single;
661    }
662
663    // --- Semicolon: no space before ---
664    if matches!(right.token, Token::SemiColon) {
665        return ExpectedSpacing::Skip;
666    }
667    if matches!(left.token, Token::SemiColon) {
668        return ExpectedSpacing::Skip;
669    }
670
671    // --- Inside parens: no space after ( or before ) ---
672    if matches!(left.token, Token::LParen) {
673        return ExpectedSpacing::None;
674    }
675    if matches!(right.token, Token::RParen) {
676        return ExpectedSpacing::None;
677    }
678
679    // --- BigQuery project identifiers can include hyphens before dataset/table ---
680    if dialect == Dialect::Bigquery
681        && is_bigquery_hyphenated_identifier_pair(left, right, tokens, left_idx, right_idx)
682    {
683        return ExpectedSpacing::None;
684    }
685
686    if is_filesystem_path_pair(left, right, tokens, left_idx, right_idx, dialect) {
687        return ExpectedSpacing::NoneInline;
688    }
689
690    // --- Binary operators: single space on each side ---
691    if is_binary_operator(&left.token) || is_binary_operator(&right.token) {
692        // Special: unary minus/plus (sign indicators) — skip
693        if is_unary_operator_pair(left, right, tokens, left_idx) {
694            return ExpectedSpacing::Skip;
695        }
696        return ExpectedSpacing::Single;
697    }
698
699    // --- Comparison operators: single space around ---
700    if is_comparison_operator(&left.token) || is_comparison_operator(&right.token) {
701        if dialect == Dialect::Mssql
702            && is_tsql_assignment_rhs_pair(left, right, tokens, left_idx, right_idx)
703        {
704            return ExpectedSpacing::Single;
705        }
706        return ExpectedSpacing::Single;
707    }
708
709    // --- JSON operators (arrow, long arrow, etc.) ---
710    if is_json_operator(&left.token) || is_json_operator(&right.token) {
711        return ExpectedSpacing::Single;
712    }
713
714    // --- Star/Mul as wildcard inside COUNT(*) etc. ---
715    if matches!(left.token, Token::Mul) || matches!(right.token, Token::Mul) {
716        // If inside parens: skip (could be wildcard)
717        return ExpectedSpacing::Skip;
718    }
719
720    // --- Keywords and identifiers: single space between ---
721    if is_word_like(&left.token) && is_word_like(&right.token) {
722        return ExpectedSpacing::Single;
723    }
724
725    // --- Word followed by literal or vice versa ---
726    if (is_word_like(&left.token) && is_literal(&right.token))
727        || (is_literal(&left.token) && is_word_like(&right.token))
728    {
729        return ExpectedSpacing::Single;
730    }
731
732    // --- Literal followed by literal ---
733    if is_literal(&left.token) && is_literal(&right.token) {
734        return ExpectedSpacing::Single;
735    }
736
737    // --- Number followed by word or vice versa ---
738    if (matches!(left.token, Token::Number(_, _)) && is_word_like(&right.token))
739        || (is_word_like(&left.token) && matches!(right.token, Token::Number(_, _)))
740    {
741        return ExpectedSpacing::Single;
742    }
743
744    ExpectedSpacing::Skip
745}
746
747// ---------------------------------------------------------------------------
748// Token classification helpers
749// ---------------------------------------------------------------------------
750
751fn is_binary_operator(token: &Token) -> bool {
752    matches!(
753        token,
754        Token::Plus
755            | Token::Minus
756            | Token::Div
757            | Token::Mod
758            | Token::StringConcat
759            | Token::Ampersand
760            | Token::Pipe
761            | Token::Caret
762            | Token::ShiftLeft
763            | Token::ShiftRight
764            | Token::Assignment
765    )
766}
767
768fn is_comparison_operator(token: &Token) -> bool {
769    matches!(
770        token,
771        Token::Eq
772            | Token::Neq
773            | Token::Lt
774            | Token::Gt
775            | Token::LtEq
776            | Token::GtEq
777            | Token::Spaceship
778            | Token::DoubleEq
779            | Token::TildeEqual
780    )
781}
782
783fn is_split_compound_comparison_pair(left: &TokenWithSpan, right: &TokenWithSpan) -> bool {
784    matches!(
785        (&left.token, &right.token),
786        (Token::Gt, Token::Eq)
787            | (Token::Lt, Token::Eq)
788            | (Token::Lt, Token::Gt)
789            | (Token::Neq, Token::Eq)
790    )
791}
792
793fn is_assignment_operator_token(token: &Token) -> bool {
794    matches!(
795        token,
796        Token::Plus
797            | Token::Minus
798            | Token::Mul
799            | Token::Div
800            | Token::Mod
801            | Token::Ampersand
802            | Token::Pipe
803            | Token::Caret
804    )
805}
806
807fn is_tsql_compound_assignment_pair(left: &TokenWithSpan, right: &TokenWithSpan) -> bool {
808    matches!(right.token, Token::Eq) && is_assignment_operator_token(&left.token)
809}
810
811fn is_tsql_assignment_rhs_pair(
812    left: &TokenWithSpan,
813    _right: &TokenWithSpan,
814    tokens: &[TokenWithSpan],
815    left_idx: usize,
816    _right_idx: usize,
817) -> bool {
818    if !matches!(left.token, Token::Eq) {
819        return false;
820    }
821    prev_non_trivia_index(tokens, left_idx)
822        .map(|index| is_assignment_operator_token(&tokens[index].token))
823        .unwrap_or(false)
824}
825
826fn is_json_operator(token: &Token) -> bool {
827    matches!(
828        token,
829        Token::Arrow
830            | Token::LongArrow
831            | Token::HashArrow
832            | Token::HashLongArrow
833            | Token::AtArrow
834            | Token::ArrowAt
835    )
836}
837
838fn is_word_like(token: &Token) -> bool {
839    matches!(token, Token::Word(_) | Token::Placeholder(_))
840}
841
842fn is_literal(token: &Token) -> bool {
843    matches!(
844        token,
845        Token::SingleQuotedString(_)
846            | Token::DoubleQuotedString(_)
847            | Token::TripleSingleQuotedString(_)
848            | Token::TripleDoubleQuotedString(_)
849            | Token::NationalStringLiteral(_)
850            | Token::EscapedStringLiteral(_)
851            | Token::UnicodeStringLiteral(_)
852            | Token::HexStringLiteral(_)
853            | Token::SingleQuotedByteStringLiteral(_)
854            | Token::DoubleQuotedByteStringLiteral(_)
855            | Token::Number(_, _)
856    )
857}
858
859fn is_type_keyword_for_bracket(token: &Token) -> bool {
860    if let Token::Word(w) = token {
861        if w.quote_style.is_some() {
862            return false;
863        }
864        matches!(
865            w.value.to_ascii_uppercase().as_str(),
866            "TEXT"
867                | "UUID"
868                | "INT"
869                | "INTEGER"
870                | "BIGINT"
871                | "SMALLINT"
872                | "VARCHAR"
873                | "CHAR"
874                | "BOOLEAN"
875                | "BOOL"
876                | "NUMERIC"
877                | "DECIMAL"
878                | "FLOAT"
879                | "DOUBLE"
880                | "DATE"
881                | "TIME"
882                | "TIMESTAMP"
883                | "INTERVAL"
884                | "JSONB"
885                | "JSON"
886                | "BYTEA"
887                | "REAL"
888                | "SERIAL"
889                | "BIGSERIAL"
890                | "INET"
891                | "CIDR"
892                | "MACADDR"
893        )
894    } else {
895        false
896    }
897}
898
899fn is_exists_keyword_token(token: &Token) -> bool {
900    matches!(token, Token::Word(word) if word.keyword == Keyword::EXISTS)
901}
902
903/// Check if a token is a DDL keyword after which the next word is an object name
904/// (table, view, index, etc.) — not a function call.
905fn is_ddl_object_keyword(token: &Token) -> bool {
906    if let Token::Word(w) = token {
907        matches!(
908            w.keyword,
909            Keyword::TABLE
910                | Keyword::VIEW
911                | Keyword::INDEX
912                | Keyword::FUNCTION
913                | Keyword::PROCEDURE
914                | Keyword::TRIGGER
915                | Keyword::SEQUENCE
916                | Keyword::TYPE
917                | Keyword::SCHEMA
918                | Keyword::DATABASE
919        )
920    } else {
921        false
922    }
923}
924
925fn is_qualified_ddl_object_name(tokens: &[TokenWithSpan], word_index: usize) -> bool {
926    let mut cursor = word_index;
927
928    loop {
929        let Some(prev_idx) = prev_non_trivia_index(tokens, cursor) else {
930            return false;
931        };
932
933        if matches!(tokens[prev_idx].token, Token::Period) {
934            let Some(prev_word_idx) = prev_non_trivia_index(tokens, prev_idx) else {
935                return false;
936            };
937            if !is_word_like(&tokens[prev_word_idx].token) {
938                return false;
939            }
940            cursor = prev_word_idx;
941            continue;
942        }
943
944        if !is_ddl_object_keyword(&tokens[prev_idx].token) {
945            return false;
946        }
947        return is_ddl_object_definition_context(tokens, prev_idx);
948    }
949}
950
951fn is_reference_target_name(tokens: &[TokenWithSpan], word_index: usize) -> bool {
952    let mut cursor = word_index;
953
954    loop {
955        let Some(prev_idx) = prev_non_trivia_index(tokens, cursor) else {
956            return false;
957        };
958
959        if matches!(tokens[prev_idx].token, Token::Period) {
960            let Some(prev_word_idx) = prev_non_trivia_index(tokens, prev_idx) else {
961                return false;
962            };
963            if !is_word_like(&tokens[prev_word_idx].token) {
964                return false;
965            }
966            cursor = prev_word_idx;
967            continue;
968        }
969
970        let Token::Word(prev_word) = &tokens[prev_idx].token else {
971            return false;
972        };
973
974        return prev_word.keyword == Keyword::REFERENCES;
975    }
976}
977
978fn is_copy_into_target_name(tokens: &[TokenWithSpan], word_index: usize) -> bool {
979    let mut cursor = word_index;
980    let mut steps = 0usize;
981
982    while let Some(prev_idx) = prev_non_trivia_index(tokens, cursor) {
983        match &tokens[prev_idx].token {
984            Token::Word(word) if word.keyword == Keyword::INTO => {
985                let Some(copy_idx) = prev_non_trivia_index(tokens, prev_idx) else {
986                    return false;
987                };
988                return matches!(
989                    &tokens[copy_idx].token,
990                    Token::Word(copy_word) if copy_word.keyword == Keyword::COPY
991                );
992            }
993            Token::Word(word)
994                if matches!(
995                    word.keyword,
996                    Keyword::FROM
997                        | Keyword::SELECT
998                        | Keyword::WHERE
999                        | Keyword::JOIN
1000                        | Keyword::ON
1001                        | Keyword::HAVING
1002                ) =>
1003            {
1004                return false;
1005            }
1006            Token::SemiColon | Token::Comma | Token::LParen | Token::RParen => return false,
1007            _ => {}
1008        }
1009
1010        cursor = prev_idx;
1011        steps += 1;
1012        if steps > 64 {
1013            return false;
1014        }
1015    }
1016
1017    false
1018}
1019
1020/// Check if `word_index` is the table/view name in an `INSERT INTO schema.table` context.
1021fn is_insert_into_target_name(tokens: &[TokenWithSpan], word_index: usize) -> bool {
1022    let mut cursor = word_index;
1023    let mut steps = 0usize;
1024
1025    while let Some(prev_idx) = prev_non_trivia_index(tokens, cursor) {
1026        match &tokens[prev_idx].token {
1027            Token::Word(word) if word.keyword == Keyword::INTO => {
1028                // Check for INSERT before INTO.
1029                let Some(insert_idx) = prev_non_trivia_index(tokens, prev_idx) else {
1030                    return false;
1031                };
1032                return matches!(
1033                    &tokens[insert_idx].token,
1034                    Token::Word(w) if w.keyword == Keyword::INSERT
1035                );
1036            }
1037            // Walk through schema qualifiers (schema.table).
1038            Token::Period => {}
1039            // Accept any unquoted word as a schema/table identifier — the name
1040            // may coincide with a SQL keyword (e.g. `metrics`, `daily`).
1041            Token::Word(word) if word.quote_style.is_none() => {}
1042            _ => return false,
1043        }
1044
1045        cursor = prev_idx;
1046        steps += 1;
1047        if steps > 16 {
1048            return false;
1049        }
1050    }
1051
1052    false
1053}
1054
1055fn is_ddl_object_definition_context(tokens: &[TokenWithSpan], ddl_keyword_index: usize) -> bool {
1056    let Some(prev_idx) = prev_non_trivia_index(tokens, ddl_keyword_index) else {
1057        return false;
1058    };
1059    let Token::Word(prev_word) = &tokens[prev_idx].token else {
1060        return false;
1061    };
1062
1063    if matches!(
1064        prev_word.keyword,
1065        Keyword::CREATE | Keyword::ALTER | Keyword::DROP | Keyword::TRUNCATE
1066    ) {
1067        return true;
1068    }
1069
1070    if prev_word.keyword == Keyword::OR {
1071        if let Some(prev_prev_idx) = prev_non_trivia_index(tokens, prev_idx) {
1072            if let Token::Word(prev_prev_word) = &tokens[prev_prev_idx].token {
1073                return matches!(prev_prev_word.keyword, Keyword::CREATE | Keyword::ALTER);
1074            }
1075        }
1076    }
1077
1078    false
1079}
1080
1081/// Check if this pair involves a unary +/- (sign indicator) rather than binary.
1082fn is_unary_operator_pair(
1083    left: &TokenWithSpan,
1084    right: &TokenWithSpan,
1085    tokens: &[TokenWithSpan],
1086    left_idx: usize,
1087) -> bool {
1088    // Case 1: right token is +/- and left context suggests unary
1089    if matches!(right.token, Token::Plus | Token::Minus)
1090        && is_unary_prefix_context(&tokens[left_idx].token)
1091    {
1092        return true;
1093    }
1094    // Case 2: left token is +/- and the token before it suggests unary
1095    if matches!(left.token, Token::Plus | Token::Minus) {
1096        if let Some(prev_idx) = prev_non_trivia_index(tokens, left_idx) {
1097            if is_unary_prefix_context(&tokens[prev_idx].token) {
1098                return true;
1099            }
1100        } else {
1101            // No previous token — start of statement, so it's unary
1102            return true;
1103        }
1104    }
1105    false
1106}
1107
1108fn is_bigquery_hyphenated_identifier_pair(
1109    left: &TokenWithSpan,
1110    right: &TokenWithSpan,
1111    tokens: &[TokenWithSpan],
1112    left_idx: usize,
1113    right_idx: usize,
1114) -> bool {
1115    if matches!(right.token, Token::Minus) {
1116        if !matches!(left.token, Token::Word(_)) {
1117            return false;
1118        }
1119        let Some(next_word_idx) = next_non_trivia_index(tokens, right_idx + 1) else {
1120            return false;
1121        };
1122        if !matches!(tokens[next_word_idx].token, Token::Word(_)) {
1123            return false;
1124        }
1125        let Some(next_after_word_idx) = next_non_trivia_index(tokens, next_word_idx + 1) else {
1126            return false;
1127        };
1128        return matches!(tokens[next_after_word_idx].token, Token::Period);
1129    }
1130
1131    if matches!(left.token, Token::Minus) {
1132        if !matches!(right.token, Token::Word(_)) {
1133            return false;
1134        }
1135        let Some(prev_word_idx) = prev_non_trivia_index(tokens, left_idx) else {
1136            return false;
1137        };
1138        if !matches!(tokens[prev_word_idx].token, Token::Word(_)) {
1139            return false;
1140        }
1141        let Some(next_idx) = next_non_trivia_index(tokens, right_idx + 1) else {
1142            return false;
1143        };
1144        return matches!(tokens[next_idx].token, Token::Period);
1145    }
1146
1147    false
1148}
1149
1150fn is_filesystem_path_pair(
1151    left: &TokenWithSpan,
1152    right: &TokenWithSpan,
1153    tokens: &[TokenWithSpan],
1154    left_idx: usize,
1155    right_idx: usize,
1156    dialect: Dialect,
1157) -> bool {
1158    if !matches!(
1159        dialect,
1160        Dialect::Databricks | Dialect::Clickhouse | Dialect::Snowflake
1161    ) {
1162        return false;
1163    }
1164
1165    let div_index = if matches!(left.token, Token::Div) {
1166        Some(left_idx)
1167    } else if matches!(right.token, Token::Div) {
1168        let left_is_context_keyword = is_path_context_keyword_token(&left.token);
1169        let left_is_path_segment = prev_non_trivia_index(tokens, left_idx)
1170            .is_some_and(|idx| matches!(tokens[idx].token, Token::Div));
1171        if left_is_context_keyword && !left_is_path_segment {
1172            return false;
1173        }
1174        Some(right_idx)
1175    } else {
1176        None
1177    };
1178    let Some(div_index) = div_index else {
1179        return false;
1180    };
1181
1182    let prev_idx = prev_non_trivia_index(tokens, div_index);
1183    let next_idx = next_non_trivia_index(tokens, div_index + 1);
1184    let prev_ok = prev_idx.is_some_and(|idx| matches!(tokens[idx].token, Token::Word(_)));
1185    let next_ok = next_idx.is_some_and(|idx| matches!(tokens[idx].token, Token::Word(_)));
1186    if !(prev_ok || next_ok) {
1187        return false;
1188    }
1189
1190    if dialect == Dialect::Snowflake {
1191        return snowflake_stage_path_context_within(tokens, div_index, 12);
1192    }
1193
1194    path_context_keyword_within(tokens, div_index, 6)
1195}
1196
1197fn is_path_context_keyword_token(token: &Token) -> bool {
1198    let Token::Word(word) = token else {
1199        return false;
1200    };
1201    word.value.eq_ignore_ascii_case("JAR") || word.value.eq_ignore_ascii_case("MODEL")
1202}
1203
1204fn path_context_keyword_within(tokens: &[TokenWithSpan], from_idx: usize, limit: usize) -> bool {
1205    let mut cursor = from_idx;
1206    let mut steps = 0usize;
1207    while let Some(prev_idx) = prev_non_trivia_index(tokens, cursor) {
1208        if let Token::Word(word) = &tokens[prev_idx].token {
1209            if matches!(word.keyword, Keyword::JAR) {
1210                return true;
1211            }
1212            if word.value.eq_ignore_ascii_case("JAR") || word.value.eq_ignore_ascii_case("MODEL") {
1213                return true;
1214            }
1215        }
1216        cursor = prev_idx;
1217        steps += 1;
1218        if steps >= limit {
1219            break;
1220        }
1221    }
1222    false
1223}
1224
1225fn snowflake_stage_path_context_within(
1226    tokens: &[TokenWithSpan],
1227    from_idx: usize,
1228    limit: usize,
1229) -> bool {
1230    let mut cursor = from_idx;
1231    let mut steps = 0usize;
1232    while let Some(prev_idx) = prev_non_trivia_index(tokens, cursor) {
1233        match &tokens[prev_idx].token {
1234            Token::AtSign => return true,
1235            Token::Word(word) if word.value.starts_with('@') => return true,
1236            _ => {}
1237        }
1238        cursor = prev_idx;
1239        steps += 1;
1240        if steps >= limit {
1241            break;
1242        }
1243    }
1244    false
1245}
1246
1247/// Check if a token is a context where the following +/- is unary.
1248fn is_unary_prefix_context(token: &Token) -> bool {
1249    if matches!(
1250        token,
1251        Token::Comma
1252            | Token::LParen
1253            | Token::Eq
1254            | Token::Neq
1255            | Token::Lt
1256            | Token::Gt
1257            | Token::LtEq
1258            | Token::GtEq
1259    ) {
1260        return true;
1261    }
1262    if let Token::Word(w) = token {
1263        if matches!(
1264            w.keyword,
1265            Keyword::SELECT
1266                | Keyword::WHERE
1267                | Keyword::WHEN
1268                | Keyword::THEN
1269                | Keyword::ELSE
1270                | Keyword::AND
1271                | Keyword::OR
1272                | Keyword::ON
1273                | Keyword::SET
1274                | Keyword::CASE
1275                | Keyword::BETWEEN
1276                | Keyword::IN
1277                | Keyword::VALUES
1278                | Keyword::INTERVAL
1279                | Keyword::YEAR
1280                | Keyword::MONTH
1281                | Keyword::DAY
1282                | Keyword::HOUR
1283                | Keyword::MINUTE
1284                | Keyword::SECOND
1285                | Keyword::RETURN
1286                | Keyword::RETURNS
1287        ) {
1288            return true;
1289        }
1290    }
1291    false
1292}
1293
1294/// Expected spacing before left-paren.
1295fn expected_spacing_before_lparen(
1296    left: &TokenWithSpan,
1297    tokens: &[TokenWithSpan],
1298    left_idx: usize,
1299    dialect: Dialect,
1300) -> ExpectedSpacing {
1301    match &left.token {
1302        // Function call: no space between function name and (
1303        Token::Word(w) if w.quote_style.is_none() => {
1304            if dialect == Dialect::Snowflake {
1305                if w.value.eq_ignore_ascii_case("MATCH_RECOGNIZE")
1306                    || w.value.eq_ignore_ascii_case("PATTERN")
1307                {
1308                    return ExpectedSpacing::Single;
1309                }
1310                if w.value.eq_ignore_ascii_case("MATCH_CONDITION") {
1311                    return ExpectedSpacing::NoneInline;
1312                }
1313            }
1314            if w.value.eq_ignore_ascii_case("EXISTS") {
1315                if exists_requires_space_before_lparen(tokens, left_idx) {
1316                    return ExpectedSpacing::Single;
1317                }
1318                return ExpectedSpacing::NoneInline;
1319            }
1320            // Keywords that should have a space before (
1321            if is_keyword_requiring_space_before_paren(w.keyword) {
1322                // AS in CTE: `AS (` should be single-inline (collapse newlines to space)
1323                // USING, FROM, etc.: single space (newline acceptable)
1324                if matches!(w.keyword, Keyword::AS) {
1325                    return ExpectedSpacing::SingleInline;
1326                }
1327                return ExpectedSpacing::Single;
1328            }
1329            // INSERT INTO table_name ( — the ( opens a column list.
1330            // Checked before the NoKeyword guard because the table name may
1331            // coincide with a SQL keyword (e.g., metrics.daily → daily is Keyword).
1332            if is_insert_into_target_name(tokens, left_idx) {
1333                return ExpectedSpacing::Single;
1334            }
1335            // Check if this word is a table/view name after CREATE TABLE/VIEW —
1336            // the ( opens a column list, not a function call, so skip.
1337            if w.keyword == Keyword::NoKeyword {
1338                if is_reference_target_name(tokens, left_idx) {
1339                    return ExpectedSpacing::Single;
1340                }
1341                if is_copy_into_target_name(tokens, left_idx) {
1342                    return ExpectedSpacing::Single;
1343                }
1344                if is_qualified_ddl_object_name(tokens, left_idx) {
1345                    return ExpectedSpacing::Skip;
1346                }
1347            }
1348            // Regular function call or type name: no space
1349            ExpectedSpacing::NoneInline
1350        }
1351        // After closing paren/bracket: single space (subquery, etc.)
1352        Token::RParen | Token::RBracket => ExpectedSpacing::Single,
1353        // After literal: single space
1354        _ if is_literal(&left.token) => ExpectedSpacing::Single,
1355        // After number: no space (could be type precision like numeric(5,2))
1356        Token::Number(_, _) => ExpectedSpacing::None,
1357        // After comma: single space
1358        Token::Comma => ExpectedSpacing::Single,
1359        // After operator: skip
1360        _ if is_binary_operator(&left.token) || is_comparison_operator(&left.token) => {
1361            ExpectedSpacing::Skip
1362        }
1363        _ => ExpectedSpacing::Skip,
1364    }
1365}
1366
1367fn exists_requires_space_before_lparen(tokens: &[TokenWithSpan], left_idx: usize) -> bool {
1368    let Some(prev_idx) = prev_non_trivia_index(tokens, left_idx) else {
1369        return false;
1370    };
1371
1372    match &tokens[prev_idx].token {
1373        Token::Word(word) => {
1374            matches!(
1375                word.keyword,
1376                Keyword::AND
1377                    | Keyword::OR
1378                    | Keyword::NOT
1379                    | Keyword::WHERE
1380                    | Keyword::HAVING
1381                    | Keyword::WHEN
1382                    | Keyword::THEN
1383                    | Keyword::ELSE
1384            ) || matches!(
1385                word.value.to_ascii_uppercase().as_str(),
1386                "AND" | "OR" | "NOT" | "WHERE" | "HAVING" | "WHEN" | "THEN" | "ELSE"
1387            )
1388        }
1389        Token::RParen
1390        | Token::LParen
1391        | Token::Eq
1392        | Token::Neq
1393        | Token::Lt
1394        | Token::Gt
1395        | Token::LtEq
1396        | Token::GtEq => true,
1397        _ => false,
1398    }
1399}
1400
1401/// Keywords that should have a space before `(`.
1402fn is_keyword_requiring_space_before_paren(keyword: Keyword) -> bool {
1403    matches!(
1404        keyword,
1405        Keyword::AS
1406            | Keyword::USING
1407            | Keyword::FROM
1408            | Keyword::JOIN
1409            | Keyword::ON
1410            | Keyword::WHERE
1411            | Keyword::IN
1412            | Keyword::BETWEEN
1413            | Keyword::WHEN
1414            | Keyword::THEN
1415            | Keyword::ELSE
1416            | Keyword::AND
1417            | Keyword::OR
1418            | Keyword::NOT
1419            | Keyword::HAVING
1420            | Keyword::OVER
1421            | Keyword::PARTITION
1422            | Keyword::ORDER
1423            | Keyword::GROUP
1424            | Keyword::LIMIT
1425            | Keyword::UNION
1426            | Keyword::INTERSECT
1427            | Keyword::EXCEPT
1428            | Keyword::RECURSIVE
1429            | Keyword::WITH
1430            | Keyword::SELECT
1431            | Keyword::INTO
1432            | Keyword::TABLE
1433            | Keyword::VALUES
1434            | Keyword::SET
1435            | Keyword::RETURNS
1436            | Keyword::FILTER
1437            | Keyword::CONFLICT
1438            | Keyword::BY
1439    )
1440}
1441
1442/// Expected spacing after right-paren.
1443fn expected_spacing_after_rparen(
1444    right: &TokenWithSpan,
1445    _tokens: &[TokenWithSpan],
1446    _right_idx: usize,
1447) -> ExpectedSpacing {
1448    match &right.token {
1449        // ) followed by . or :: or [ — no space
1450        Token::Period | Token::DoubleColon | Token::LBracket | Token::RBracket => {
1451            ExpectedSpacing::None
1452        }
1453        // ) followed by , — no space before comma
1454        Token::Comma => ExpectedSpacing::None,
1455        // ) followed by ; — no space
1456        Token::SemiColon => ExpectedSpacing::Skip,
1457        // ) followed by ) — no space
1458        Token::RParen => ExpectedSpacing::None,
1459        // ) followed by ( — single space
1460        Token::LParen => ExpectedSpacing::Single,
1461        // ) followed by keyword or identifier — single space
1462        _ => ExpectedSpacing::Single,
1463    }
1464}
1465
1466fn has_comment_between(tokens: &[TokenWithSpan], left: usize, right: usize) -> bool {
1467    tokens[left + 1..right].iter().any(|t| {
1468        matches!(
1469            t.token,
1470            Token::Whitespace(Whitespace::SingleLineComment { .. })
1471                | Token::Whitespace(Whitespace::MultiLineComment(_))
1472        )
1473    })
1474}
1475
1476fn template_spans(sql: &str) -> Vec<Lt01TemplateSpan> {
1477    let mut spans = Vec::new();
1478    let mut index = 0usize;
1479    while let Some((open, close)) = find_next_template_open(sql, index) {
1480        let payload_start = open + 2;
1481        if let Some(rel_close) = sql[payload_start..].find(close) {
1482            let close_index = payload_start + rel_close + close.len();
1483            spans.push((open, close_index));
1484            index = close_index;
1485        } else {
1486            spans.push((open, sql.len()));
1487            break;
1488        }
1489    }
1490    spans
1491}
1492
1493fn find_next_template_open(sql: &str, from: usize) -> Option<(usize, &'static str)> {
1494    let rest = sql.get(from..)?;
1495    [("{{", "}}"), ("{%", "%}"), ("{#", "#}")]
1496        .into_iter()
1497        .filter_map(|(open, close)| rest.find(open).map(|offset| (from + offset, close)))
1498        .min_by_key(|(index, _)| *index)
1499}
1500
1501fn contains_template_marker(sql: &str) -> bool {
1502    sql.contains("{{") || sql.contains("{%") || sql.contains("{#")
1503}
1504
1505fn overlaps_template_span(spans: &[Lt01TemplateSpan], start: usize, end: usize) -> bool {
1506    spans
1507        .iter()
1508        .any(|(template_start, template_end)| start < *template_end && end > *template_start)
1509}
1510
1511fn collect_ansi_national_string_literal_violations(
1512    sql: &str,
1513    tokens: &[TokenWithSpan],
1514    dialect: Dialect,
1515    templated_spans: &[Lt01TemplateSpan],
1516    violations: &mut Vec<Lt01Violation>,
1517) {
1518    if matches!(dialect, Dialect::Mssql) {
1519        return;
1520    }
1521
1522    for token in tokens {
1523        let Token::NationalStringLiteral(_) = token.token else {
1524            continue;
1525        };
1526        let Some((start, end)) = token_offsets(sql, token) else {
1527            continue;
1528        };
1529        if start >= end || end > sql.len() || overlaps_template_span(templated_spans, start, end) {
1530            continue;
1531        }
1532        let raw = &sql[start..end];
1533        if raw.len() < 3 {
1534            continue;
1535        }
1536        let Some(prefix) = raw.chars().next() else {
1537            continue;
1538        };
1539        if !(prefix == 'N' || prefix == 'n') || !raw[1..].starts_with('\'') {
1540            continue;
1541        }
1542        let replacement = format!("{prefix} {}", &raw[1..]);
1543        violations.push(((start, end), vec![(start, end, replacement)]));
1544    }
1545}
1546
1547fn collect_template_string_spacing_violations(
1548    sql: &str,
1549    dialect: Dialect,
1550    templated_spans: &[Lt01TemplateSpan],
1551    violations: &mut Vec<Lt01Violation>,
1552) {
1553    for (template_start, template_end) in templated_spans {
1554        let mut cursor = *template_start;
1555        while cursor < *template_end {
1556            let Some((quote_start, quote_char)) = next_quote_in_range(sql, cursor, *template_end)
1557            else {
1558                break;
1559            };
1560            let Some(quote_end) =
1561                find_closing_quote(sql, quote_start + 1, *template_end, quote_char)
1562            else {
1563                break;
1564            };
1565            let content = &sql[quote_start + 1..quote_end];
1566            let Some(tokens) = tokenized(content, dialect) else {
1567                cursor = quote_end + 1;
1568                continue;
1569            };
1570
1571            let mut fragment_violations = Vec::new();
1572            collect_pair_spacing_violations(
1573                content,
1574                &tokens,
1575                dialect,
1576                &[],
1577                &mut fragment_violations,
1578            );
1579            collect_ansi_national_string_literal_violations(
1580                content,
1581                &tokens,
1582                dialect,
1583                &[],
1584                &mut fragment_violations,
1585            );
1586
1587            for ((start, end), _) in fragment_violations {
1588                if start >= end || end > content.len() {
1589                    continue;
1590                }
1591                let absolute_start = quote_start + 1 + start;
1592                let absolute_end = quote_start + 1 + end;
1593                violations.push(((absolute_start, absolute_end), Vec::new()));
1594            }
1595
1596            cursor = quote_end + 1;
1597        }
1598    }
1599}
1600
1601fn next_quote_in_range(sql: &str, start: usize, end: usize) -> Option<(usize, char)> {
1602    let mut index = start;
1603    while index < end {
1604        let ch = sql[index..].chars().next()?;
1605        if ch == '\'' || ch == '"' {
1606            return Some((index, ch));
1607        }
1608        index += ch.len_utf8();
1609    }
1610    None
1611}
1612
1613fn find_closing_quote(sql: &str, start: usize, end: usize, quote: char) -> Option<usize> {
1614    let mut index = start;
1615    while index < end {
1616        let ch = sql[index..].chars().next()?;
1617        if ch == '\\' {
1618            let next = index + ch.len_utf8();
1619            if next < end {
1620                let escaped = sql[next..].chars().next()?;
1621                index = next + escaped.len_utf8();
1622                continue;
1623            }
1624        }
1625        if ch == quote {
1626            return Some(index);
1627        }
1628        index += ch.len_utf8();
1629    }
1630    None
1631}
1632
1633fn snowflake_pattern_token_indices(
1634    tokens: &[TokenWithSpan],
1635    non_trivia: &[usize],
1636) -> HashSet<usize> {
1637    let mut out = HashSet::new();
1638    let mut cursor = 0usize;
1639
1640    while cursor < non_trivia.len() {
1641        let token_index = non_trivia[cursor];
1642        let Token::Word(word) = &tokens[token_index].token else {
1643            cursor += 1;
1644            continue;
1645        };
1646        if !word.value.eq_ignore_ascii_case("PATTERN") {
1647            cursor += 1;
1648            continue;
1649        }
1650
1651        let Some(paren_pos) = ((cursor + 1)..non_trivia.len())
1652            .find(|idx| matches!(tokens[non_trivia[*idx]].token, Token::LParen))
1653        else {
1654            cursor += 1;
1655            continue;
1656        };
1657
1658        let mut depth = 0usize;
1659        let mut end_pos = None;
1660        for (pos, idx) in non_trivia.iter().copied().enumerate().skip(paren_pos) {
1661            match tokens[idx].token {
1662                Token::LParen => depth += 1,
1663                Token::RParen => {
1664                    if depth == 0 {
1665                        break;
1666                    }
1667                    depth -= 1;
1668                    if depth == 0 {
1669                        end_pos = Some(pos);
1670                        break;
1671                    }
1672                }
1673                _ => {}
1674            }
1675        }
1676
1677        let Some(end_pos) = end_pos else {
1678            cursor += 1;
1679            continue;
1680        };
1681        for idx in non_trivia.iter().take(end_pos + 1).skip(paren_pos) {
1682            out.insert(*idx);
1683        }
1684        cursor = end_pos + 1;
1685    }
1686
1687    out
1688}
1689
1690fn type_angle_token_indices(tokens: &[TokenWithSpan], non_trivia: &[usize]) -> HashSet<usize> {
1691    let mut out = HashSet::new();
1692    let mut stack = Vec::<usize>::new();
1693
1694    for (pos, token_idx) in non_trivia.iter().copied().enumerate() {
1695        let token = &tokens[token_idx].token;
1696        match token {
1697            Token::Lt => {
1698                let prev_idx = pos
1699                    .checked_sub(1)
1700                    .and_then(|value| non_trivia.get(value).copied());
1701                if prev_idx.is_some_and(|idx| is_type_constructor(&tokens[idx].token)) {
1702                    out.insert(token_idx);
1703                    stack.push(token_idx);
1704                }
1705            }
1706            Token::Gt => {
1707                if !stack.is_empty() {
1708                    out.insert(token_idx);
1709                    stack.pop();
1710                }
1711            }
1712            Token::ShiftRight => {
1713                if stack.len() >= 2 {
1714                    out.insert(token_idx);
1715                    stack.pop();
1716                    stack.pop();
1717                }
1718            }
1719            _ => {}
1720        }
1721    }
1722
1723    out
1724}
1725
1726fn supports_type_angle_spacing(dialect: Dialect) -> bool {
1727    matches!(
1728        dialect,
1729        Dialect::Bigquery | Dialect::Hive | Dialect::Databricks
1730    )
1731}
1732
1733fn is_type_constructor(token: &Token) -> bool {
1734    let Token::Word(word) = token else {
1735        return false;
1736    };
1737    word.value.eq_ignore_ascii_case("ARRAY")
1738        || word.value.eq_ignore_ascii_case("STRUCT")
1739        || word.value.eq_ignore_ascii_case("MAP")
1740}
1741
1742fn is_type_angle_spacing_pair(
1743    left: &TokenWithSpan,
1744    right: &TokenWithSpan,
1745    left_idx: usize,
1746    right_idx: usize,
1747    type_angle_tokens: &HashSet<usize>,
1748) -> bool {
1749    let left_is_type_angle = type_angle_tokens.contains(&left_idx);
1750    let right_is_type_angle = type_angle_tokens.contains(&right_idx);
1751
1752    if right_is_type_angle && matches!(right.token, Token::Lt | Token::Gt | Token::ShiftRight) {
1753        return true;
1754    }
1755    if left_is_type_angle && matches!(left.token, Token::Lt) {
1756        return true;
1757    }
1758    if left_is_type_angle
1759        && matches!(left.token, Token::Gt | Token::ShiftRight)
1760        && matches!(
1761            right.token,
1762            Token::Comma | Token::RParen | Token::RBracket | Token::LBracket | Token::Gt
1763        )
1764    {
1765        return true;
1766    }
1767
1768    false
1769}
1770
1771// ---------------------------------------------------------------------------
1772// Token utilities
1773// ---------------------------------------------------------------------------
1774
1775fn tokenized(sql: &str, dialect: Dialect) -> Option<Vec<TokenWithSpan>> {
1776    let dialect = dialect.to_sqlparser_dialect();
1777    let mut tokenizer = Tokenizer::new(dialect.as_ref(), sql);
1778    tokenizer.tokenize_with_location().ok()
1779}
1780
1781fn tokenized_for_context(ctx: &LintContext) -> Option<Vec<TokenWithSpan>> {
1782    let (statement_start_line, statement_start_column) =
1783        offset_to_line_col(ctx.sql, ctx.statement_range.start)?;
1784
1785    ctx.with_document_tokens(|tokens| {
1786        if tokens.is_empty() {
1787            return None;
1788        }
1789
1790        let mut out = Vec::new();
1791        for token in tokens {
1792            let Some((start, end)) = token_with_span_offsets(ctx.sql, token) else {
1793                continue;
1794            };
1795            if start < ctx.statement_range.start || end > ctx.statement_range.end {
1796                continue;
1797            }
1798
1799            let Some(start_loc) = relative_location(
1800                token.span.start,
1801                statement_start_line,
1802                statement_start_column,
1803            ) else {
1804                continue;
1805            };
1806            let Some(end_loc) =
1807                relative_location(token.span.end, statement_start_line, statement_start_column)
1808            else {
1809                continue;
1810            };
1811
1812            out.push(TokenWithSpan::new(
1813                token.token.clone(),
1814                Span::new(start_loc, end_loc),
1815            ));
1816        }
1817
1818        if out.is_empty() {
1819            None
1820        } else {
1821            Some(out)
1822        }
1823    })
1824}
1825
1826fn token_offsets(sql: &str, token: &TokenWithSpan) -> Option<(usize, usize)> {
1827    let start = line_col_to_offset(
1828        sql,
1829        token.span.start.line as usize,
1830        token.span.start.column as usize,
1831    )?;
1832    let end = line_col_to_offset(
1833        sql,
1834        token.span.end.line as usize,
1835        token.span.end.column as usize,
1836    )?;
1837    Some((start, end))
1838}
1839
1840fn next_non_trivia_index(tokens: &[TokenWithSpan], mut index: usize) -> Option<usize> {
1841    while index < tokens.len() {
1842        if !is_trivia_token(&tokens[index].token) {
1843            return Some(index);
1844        }
1845        index += 1;
1846    }
1847    None
1848}
1849
1850fn prev_non_trivia_index(tokens: &[TokenWithSpan], mut index: usize) -> Option<usize> {
1851    while index > 0 {
1852        index -= 1;
1853        if !is_trivia_token(&tokens[index].token) {
1854            return Some(index);
1855        }
1856    }
1857    None
1858}
1859
1860fn is_trivia_token(token: &Token) -> bool {
1861    matches!(
1862        token,
1863        Token::Whitespace(Whitespace::Space | Whitespace::Newline | Whitespace::Tab)
1864            | Token::Whitespace(Whitespace::SingleLineComment { .. })
1865            | Token::Whitespace(Whitespace::MultiLineComment(_))
1866    )
1867}
1868
1869fn line_col_to_offset(sql: &str, line: usize, column: usize) -> Option<usize> {
1870    if line == 0 || column == 0 {
1871        return None;
1872    }
1873
1874    let mut current_line = 1usize;
1875    let mut current_col = 1usize;
1876
1877    for (offset, ch) in sql.char_indices() {
1878        if current_line == line && current_col == column {
1879            return Some(offset);
1880        }
1881
1882        if ch == '\n' {
1883            current_line += 1;
1884            current_col = 1;
1885        } else {
1886            current_col += 1;
1887        }
1888    }
1889
1890    if current_line == line && current_col == column {
1891        return Some(sql.len());
1892    }
1893
1894    None
1895}
1896
1897fn token_with_span_offsets(sql: &str, token: &TokenWithSpan) -> Option<(usize, usize)> {
1898    let start = line_col_to_offset(
1899        sql,
1900        token.span.start.line as usize,
1901        token.span.start.column as usize,
1902    )?;
1903    let end = line_col_to_offset(
1904        sql,
1905        token.span.end.line as usize,
1906        token.span.end.column as usize,
1907    )?;
1908    Some((start, end))
1909}
1910
1911fn offset_to_line_col(sql: &str, offset: usize) -> Option<(usize, usize)> {
1912    if offset > sql.len() {
1913        return None;
1914    }
1915    if offset == sql.len() {
1916        let line = 1 + sql.as_bytes().iter().filter(|byte| **byte == b'\n').count();
1917        let column = sql
1918            .rsplit_once('\n')
1919            .map_or(sql.chars().count() + 1, |(_, tail)| {
1920                tail.chars().count() + 1
1921            });
1922        return Some((line, column));
1923    }
1924
1925    let mut line = 1usize;
1926    let mut column = 1usize;
1927    for (index, ch) in sql.char_indices() {
1928        if index == offset {
1929            return Some((line, column));
1930        }
1931        if ch == '\n' {
1932            line += 1;
1933            column = 1;
1934        } else {
1935            column += 1;
1936        }
1937    }
1938    Some((line, column))
1939}
1940
1941fn relative_location(
1942    location: Location,
1943    statement_start_line: usize,
1944    statement_start_column: usize,
1945) -> Option<Location> {
1946    if location.line == 0 || location.column == 0 {
1947        return None;
1948    }
1949
1950    let line = location.line as usize;
1951    let column = location.column as usize;
1952    if line < statement_start_line {
1953        return None;
1954    }
1955
1956    let relative_line = line - statement_start_line + 1;
1957    let relative_column = if line == statement_start_line {
1958        if column < statement_start_column {
1959            return None;
1960        }
1961        column - statement_start_column + 1
1962    } else {
1963        column
1964    };
1965
1966    Some(Location::new(relative_line as u64, relative_column as u64))
1967}
1968
1969#[cfg(test)]
1970mod tests {
1971    use super::*;
1972    use crate::linter::rule::with_active_dialect;
1973    use crate::parser::parse_sql;
1974    use crate::types::{Dialect, IssueAutofixApplicability};
1975
1976    fn run(sql: &str) -> Vec<Issue> {
1977        run_with_dialect(sql, Dialect::Generic)
1978    }
1979
1980    fn run_with_dialect(sql: &str, dialect: Dialect) -> Vec<Issue> {
1981        let statements = parse_sql(sql).expect("parse");
1982        let rule = LayoutSpacing::default();
1983        with_active_dialect(dialect, || {
1984            statements
1985                .iter()
1986                .enumerate()
1987                .flat_map(|(index, statement)| {
1988                    rule.check(
1989                        statement,
1990                        &LintContext {
1991                            sql,
1992                            statement_range: 0..sql.len(),
1993                            statement_index: index,
1994                        },
1995                    )
1996                })
1997                .collect()
1998        })
1999    }
2000
2001    fn run_statementless_with_dialect(sql: &str, dialect: Dialect) -> Vec<Issue> {
2002        run_statementless_with_rule(sql, dialect, LayoutSpacing::default())
2003    }
2004
2005    fn run_statementless_with_rule(sql: &str, dialect: Dialect, rule: LayoutSpacing) -> Vec<Issue> {
2006        let placeholder = parse_sql("SELECT 1").expect("parse placeholder");
2007        with_active_dialect(dialect, || {
2008            rule.check(
2009                &placeholder[0],
2010                &LintContext {
2011                    sql,
2012                    statement_range: 0..sql.len(),
2013                    statement_index: 0,
2014                },
2015            )
2016        })
2017    }
2018
2019    fn apply_all_issue_autofixes(sql: &str, issues: &[Issue]) -> String {
2020        let mut out = sql.to_string();
2021        let mut edits = issues
2022            .iter()
2023            .filter_map(|issue| issue.autofix.as_ref())
2024            .flat_map(|autofix| autofix.edits.clone())
2025            .collect::<Vec<_>>();
2026        edits.sort_by_key(|edit| (edit.span.start, edit.span.end));
2027        for edit in edits.into_iter().rev() {
2028            out.replace_range(edit.span.start..edit.span.end, &edit.replacement);
2029        }
2030        out
2031    }
2032
2033    #[test]
2034    fn allows_bigquery_array_type_angle_brackets_without_spaces() {
2035        let issues = run_with_dialect(
2036            "SELECT ARRAY<FLOAT64>[1, 2, 3] AS floats;",
2037            Dialect::Bigquery,
2038        );
2039        assert!(issues.is_empty());
2040    }
2041
2042    #[test]
2043    fn allows_create_table_with_qualified_name_before_column_list() {
2044        let issues = run("CREATE TABLE db.schema_name.tbl_name (id INT)");
2045        assert!(issues.is_empty());
2046    }
2047
2048    #[test]
2049    fn fixes_reference_target_column_list_spacing() {
2050        let sql = "create table tab1 (b int references tab2(b))";
2051        let issues = run_statementless_with_dialect(sql, Dialect::Ansi);
2052        assert!(!issues.is_empty());
2053        let fixed = apply_all_issue_autofixes(sql, &issues);
2054        assert_eq!(fixed, "create table tab1 (b int references tab2 (b))");
2055    }
2056
2057    #[test]
2058    fn allows_bigquery_hyphenated_project_identifier() {
2059        let issues = run_statementless_with_dialect(
2060            "SELECT col_foo FROM foo-bar.foo.bar",
2061            Dialect::Bigquery,
2062        );
2063        assert!(issues.is_empty());
2064    }
2065
2066    #[test]
2067    fn allows_bigquery_function_array_offset_access() {
2068        let sql = "SELECT testFunction(a)[OFFSET(0)].* FROM table1";
2069        let issues = run_statementless_with_dialect(sql, Dialect::Bigquery);
2070        assert!(issues.is_empty());
2071    }
2072
2073    #[test]
2074    fn allows_hive_struct_and_array_datatype_angles() {
2075        let sql = "select col1::STRUCT<foo: int>, col2::ARRAY<int> from t";
2076        let issues = run_statementless_with_dialect(sql, Dialect::Hive);
2077        assert!(issues.is_empty());
2078    }
2079
2080    #[test]
2081    fn allows_sparksql_file_literal_path() {
2082        let sql = "ADD JAR path/to/some.jar;";
2083        let issues = run_statementless_with_dialect(sql, Dialect::Databricks);
2084        assert!(issues.is_empty());
2085    }
2086
2087    #[test]
2088    fn allows_clickhouse_system_model_path() {
2089        let sql = "SYSTEM RELOAD MODEL /model/path;";
2090        let issues = run_statementless_with_dialect(sql, Dialect::Clickhouse);
2091        assert!(issues.is_empty(), "unexpected issues: {issues:?}");
2092    }
2093
2094    #[test]
2095    fn detects_alias_alignment_when_configured() {
2096        let sql = "SELECT\n\tcol1 AS a,\n\tlonger_col AS b\nFROM t";
2097        let issues = run_statementless_with_rule(
2098            sql,
2099            Dialect::Ansi,
2100            LayoutSpacing {
2101                align_alias_expression: true,
2102                tab_space_size: 4,
2103                ..LayoutSpacing::default()
2104            },
2105        );
2106        assert!(!issues.is_empty());
2107    }
2108
2109    #[test]
2110    fn detects_alias_alignment_with_tabs_when_columns_are_equal_width() {
2111        let sql = "SELECT\n\tcol1 AS alias1,\n\tcol2 AS alias2\nFROM table1";
2112        let issues = run_statementless_with_rule(
2113            sql,
2114            Dialect::Ansi,
2115            LayoutSpacing {
2116                align_alias_expression: true,
2117                align_with_tabs: true,
2118                tab_space_size: 4,
2119                ..LayoutSpacing::default()
2120            },
2121        );
2122        assert!(
2123            !issues.is_empty(),
2124            "tab indentation alignment should flag spaces before AS"
2125        );
2126    }
2127
2128    #[test]
2129    fn detects_create_table_datatype_alignment_when_configured() {
2130        let sql = "CREATE TABLE tbl (\n    foo VARCHAR(25) NOT NULL,\n    barbar INT NULL\n)";
2131        let issues = run_statementless_with_rule(
2132            sql,
2133            Dialect::Ansi,
2134            LayoutSpacing {
2135                align_data_type: true,
2136                ..LayoutSpacing::default()
2137            },
2138        );
2139        assert!(!issues.is_empty());
2140    }
2141
2142    #[test]
2143    fn does_not_flag_create_table_alignment_when_columns_are_already_aligned() {
2144        let sql = "CREATE TABLE foo (\n    x INT NOT NULL PRIMARY KEY,\n    y INT NULL,\n    z INT NULL\n);";
2145        let issues = run_statementless_with_rule(
2146            sql,
2147            Dialect::Ansi,
2148            LayoutSpacing {
2149                align_data_type: true,
2150                align_column_constraint: true,
2151                ..LayoutSpacing::default()
2152            },
2153        );
2154        assert!(
2155            issues.is_empty(),
2156            "expected no LT01 alignment issues: {issues:?}"
2157        );
2158    }
2159
2160    #[test]
2161    fn statementless_fixes_comment_on_function_spacing() {
2162        let sql = "COMMENT ON FUNCTION x (foo) IS 'y';";
2163        let issues = run_statementless_with_dialect(sql, Dialect::Postgres);
2164        assert!(!issues.is_empty());
2165        let fixed = apply_all_issue_autofixes(sql, &issues);
2166        assert_eq!(fixed, "COMMENT ON FUNCTION x(foo) IS 'y';");
2167    }
2168
2169    #[test]
2170    fn statementless_fixes_split_tsql_comparison_operator() {
2171        let sql = "SELECT col1 FROM table1 WHERE 1 > = 1";
2172        let issues = run_statementless_with_dialect(sql, Dialect::Mssql);
2173        assert!(!issues.is_empty());
2174        let fixed = apply_all_issue_autofixes(sql, &issues);
2175        assert_eq!(fixed, "SELECT col1 FROM table1 WHERE 1 >= 1");
2176    }
2177
2178    #[test]
2179    fn statementless_fixes_tsql_compound_assignment_operator() {
2180        let sql = "SET @param1+=1";
2181        let issues = run_statementless_with_dialect(sql, Dialect::Mssql);
2182        assert!(!issues.is_empty());
2183        let fixed = apply_all_issue_autofixes(sql, &issues);
2184        assert_eq!(fixed, "SET @param1 += 1");
2185    }
2186
2187    #[test]
2188    fn allows_sparksql_multi_unit_interval_minus() {
2189        let sql = "SELECT INTERVAL -2 HOUR '3' MINUTE AS col;";
2190        let issues = run_statementless_with_dialect(sql, Dialect::Databricks);
2191        assert!(issues.is_empty());
2192    }
2193
2194    #[test]
2195    fn ignore_templated_areas_skips_template_artifacts() {
2196        let sql = "{{ 'SELECT 1, 4' }}, 5, 6";
2197        let issues = run_statementless_with_rule(
2198            sql,
2199            Dialect::Generic,
2200            LayoutSpacing {
2201                ignore_templated_areas: true,
2202                ..LayoutSpacing::default()
2203            },
2204        );
2205        assert!(issues.is_empty(), "template-only spacing should be ignored");
2206    }
2207
2208    #[test]
2209    fn ignore_templated_areas_still_fixes_non_template_region() {
2210        let sql = "{{ 'SELECT 1, 4' }}, 5 , 6";
2211        let issues = run_statementless_with_rule(
2212            sql,
2213            Dialect::Generic,
2214            LayoutSpacing {
2215                ignore_templated_areas: true,
2216                ..LayoutSpacing::default()
2217            },
2218        );
2219        assert!(!issues.is_empty());
2220        let fixed = apply_all_issue_autofixes(sql, &issues);
2221        assert_eq!(fixed, "{{ 'SELECT 1, 4' }}, 5, 6");
2222    }
2223
2224    #[test]
2225    fn templated_string_content_is_checked_when_not_ignored() {
2226        let sql = "{{ 'SELECT 1 ,4' }}";
2227        let issues = run_statementless_with_rule(
2228            sql,
2229            Dialect::Generic,
2230            LayoutSpacing {
2231                ignore_templated_areas: false,
2232                ..LayoutSpacing::default()
2233            },
2234        );
2235        assert!(!issues.is_empty());
2236        assert!(
2237            issues.iter().all(|issue| issue.autofix.is_none()),
2238            "template-internal checks are detection-only"
2239        );
2240    }
2241
2242    #[test]
2243    fn templated_string_content_passes_when_clean() {
2244        let sql = "{{ 'SELECT 1, 4' }}";
2245        let issues = run_statementless_with_rule(
2246            sql,
2247            Dialect::Generic,
2248            LayoutSpacing {
2249                ignore_templated_areas: false,
2250                ..LayoutSpacing::default()
2251            },
2252        );
2253        assert!(issues.is_empty());
2254    }
2255
2256    #[test]
2257    fn allows_snowflake_match_recognize_pattern_spacing() {
2258        let sql = "select * from stock_price_history\n  match_recognize (\n    pattern ((A | B){5} C+)\n  )";
2259        let issues = run_statementless_with_dialect(sql, Dialect::Snowflake);
2260        assert!(issues.is_empty(), "snowflake pattern syntax should pass");
2261    }
2262
2263    #[test]
2264    fn fixes_snowflake_match_condition_newline_before_paren() {
2265        let sql = "select\n    table1.pk1\nfrom table1\n    asof join\n    table2\n    match_condition\n    (t1 > t2)";
2266        let issues = run_with_dialect(sql, Dialect::Snowflake);
2267        assert!(!issues.is_empty());
2268        let fixed = apply_all_issue_autofixes(sql, &issues);
2269        assert!(
2270            fixed.contains("match_condition(t1 > t2)"),
2271            "expected inline match_condition: {fixed}"
2272        );
2273    }
2274
2275    #[test]
2276    fn fixes_snowflake_copy_into_target_column_list_spacing() {
2277        let sql = "copy into DB.SCHEMA.ProblemHere(col1)\nfrom @my_stage/file";
2278        let issues = run_statementless_with_dialect(sql, Dialect::Snowflake);
2279        assert!(!issues.is_empty());
2280        let fixed = apply_all_issue_autofixes(sql, &issues);
2281        assert!(
2282            fixed.contains("DB.SCHEMA.ProblemHere (col1)"),
2283            "fixed: {fixed}"
2284        );
2285    }
2286
2287    #[test]
2288    fn fixes_snowflake_copy_into_target_column_list_spacing_with_placeholder_prefix() {
2289        let sql = "copy into ${env}_ENT_LANDING.SCHEMA_NAME.ProblemHere(col1)\nfrom @my_stage/file";
2290        let issues = run_statementless_with_dialect(sql, Dialect::Snowflake);
2291        assert!(!issues.is_empty());
2292        let fixed = apply_all_issue_autofixes(sql, &issues);
2293        assert!(
2294            fixed.contains(".SCHEMA_NAME.ProblemHere (col1)"),
2295            "fixed: {fixed}"
2296        );
2297    }
2298
2299    #[test]
2300    fn allows_snowflake_stage_path_without_spacing_around_slash() {
2301        let sql = "copy into t from @my_stage/file";
2302        let issues = run_statementless_with_dialect(sql, Dialect::Snowflake);
2303        assert!(
2304            issues.is_empty(),
2305            "snowflake stage path should not force spaces around slash: {issues:?}"
2306        );
2307    }
2308
2309    // --- Trailing whitespace tests ---
2310
2311    #[test]
2312    fn flags_trailing_whitespace() {
2313        let sql = "SELECT 1     \n";
2314        let issues = run(sql);
2315        assert!(!issues.is_empty(), "should flag trailing whitespace");
2316        let fixed = apply_all_issue_autofixes(sql, &issues);
2317        assert_eq!(fixed, "SELECT 1\n");
2318    }
2319
2320    #[test]
2321    fn flags_trailing_whitespace_on_initial_blank_line() {
2322        let sql = " \nSELECT 1     \n";
2323        let issues = run(sql);
2324        assert!(!issues.is_empty());
2325        let fixed = apply_all_issue_autofixes(sql, &issues);
2326        assert_eq!(fixed, "\nSELECT 1\n");
2327    }
2328
2329    // --- Operator spacing tests ---
2330
2331    #[test]
2332    fn flags_compact_operator() {
2333        let sql = "SELECT 1+2";
2334        let issues = run(sql);
2335        assert!(!issues.is_empty(), "should flag compact 1+2");
2336        let fixed = apply_all_issue_autofixes(sql, &issues);
2337        assert_eq!(fixed, "SELECT 1 + 2");
2338    }
2339
2340    #[test]
2341    fn flags_compact_operator_expression() {
2342        let sql = "select\n    field,\n    date(field_1)-date(field_2) as diff\nfrom tbl";
2343        let issues = run(sql);
2344        assert!(!issues.is_empty());
2345        let fixed = apply_all_issue_autofixes(sql, &issues);
2346        assert!(
2347            fixed.contains("date(field_1) - date(field_2)"),
2348            "should fix operator spacing: {fixed}"
2349        );
2350    }
2351
2352    #[test]
2353    fn flags_plus_between_identifier_and_literal() {
2354        let sql = "SELECT a +'b'+ 'c' FROM tbl";
2355        let issues = run(sql);
2356        assert!(
2357            !issues.is_empty(),
2358            "should flag operator spacing around string literals"
2359        );
2360        let fixed = apply_all_issue_autofixes(sql, &issues);
2361        assert_eq!(fixed, "SELECT a + 'b' + 'c' FROM tbl");
2362    }
2363
2364    #[test]
2365    fn does_not_flag_simple_spacing() {
2366        assert!(run("SELECT * FROM t WHERE a = 1").is_empty());
2367    }
2368
2369    #[test]
2370    fn does_not_flag_sign_indicators() {
2371        let issues = run("SELECT 1, +2, -4");
2372        // Sign indicators before numbers should not be flagged
2373        assert!(
2374            issues.is_empty(),
2375            "unary signs should not be flagged: {issues:?}"
2376        );
2377    }
2378
2379    #[test]
2380    fn does_not_flag_newline_operator() {
2381        assert!(run("SELECT 1\n+ 2").is_empty());
2382        assert!(run("SELECT 1\n    + 2").is_empty());
2383    }
2384
2385    // --- Comma spacing tests ---
2386
2387    #[test]
2388    fn flags_space_before_comma() {
2389        let sql = "SELECT 1 ,4";
2390        let issues = run(sql);
2391        assert!(!issues.is_empty(), "should flag space before comma");
2392        let fixed = apply_all_issue_autofixes(sql, &issues);
2393        assert_eq!(fixed, "SELECT 1, 4");
2394    }
2395
2396    #[test]
2397    fn flags_no_space_after_comma() {
2398        let sql = "SELECT 1,4";
2399        let issues = run(sql);
2400        assert!(!issues.is_empty(), "should flag missing space after comma");
2401        let fixed = apply_all_issue_autofixes(sql, &issues);
2402        assert_eq!(fixed, "SELECT 1, 4");
2403    }
2404
2405    #[test]
2406    fn flags_excessive_space_after_comma() {
2407        let sql = "SELECT 1,   4";
2408        let issues = run(sql);
2409        assert!(
2410            !issues.is_empty(),
2411            "should flag excessive space after comma"
2412        );
2413        let fixed = apply_all_issue_autofixes(sql, &issues);
2414        assert_eq!(fixed, "SELECT 1, 4");
2415    }
2416
2417    // --- Bracket spacing tests ---
2418
2419    #[test]
2420    fn flags_missing_space_before_paren_after_keyword() {
2421        let sql = "SELECT * FROM(SELECT 1 AS C1)AS T1;";
2422        let issues = run(sql);
2423        assert!(!issues.is_empty(), "should flag FROM( and )AS: {issues:?}");
2424        let fixed = apply_all_issue_autofixes(sql, &issues);
2425        assert_eq!(fixed, "SELECT * FROM (SELECT 1 AS C1) AS T1;");
2426    }
2427
2428    // --- Missing space tests ---
2429
2430    #[test]
2431    fn flags_cte_missing_space_after_as() {
2432        let sql = "WITH a AS(select 1) select * from a";
2433        let issues = run(sql);
2434        assert!(!issues.is_empty(), "should flag AS(");
2435        let fixed = apply_all_issue_autofixes(sql, &issues);
2436        assert_eq!(fixed, "WITH a AS (select 1) select * from a");
2437    }
2438
2439    #[test]
2440    fn flags_cte_multiple_spaces_after_as() {
2441        let sql = "WITH a AS  (select 1) select * from a";
2442        let issues = run(sql);
2443        assert!(!issues.is_empty(), "should flag AS  (");
2444        let fixed = apply_all_issue_autofixes(sql, &issues);
2445        assert_eq!(fixed, "WITH a AS (select 1) select * from a");
2446    }
2447
2448    #[test]
2449    fn flags_missing_space_after_using() {
2450        let sql = "select * from a JOIN b USING(x)";
2451        let issues = run(sql);
2452        assert!(!issues.is_empty(), "should flag USING(");
2453        let fixed = apply_all_issue_autofixes(sql, &issues);
2454        assert_eq!(fixed, "select * from a JOIN b USING (x)");
2455    }
2456
2457    // --- Excessive whitespace tests ---
2458
2459    #[test]
2460    fn flags_excessive_whitespace() {
2461        let sql = "SELECT     1";
2462        let issues = run(sql);
2463        assert!(!issues.is_empty(), "should flag excessive whitespace");
2464        let fixed = apply_all_issue_autofixes(sql, &issues);
2465        assert_eq!(fixed, "SELECT 1");
2466    }
2467
2468    #[test]
2469    fn flags_excessive_whitespace_multi() {
2470        let sql = "select\n    1 + 2     + 3     + 4        -- Comment\nfrom     foo";
2471        let issues = run(sql);
2472        assert!(!issues.is_empty());
2473        let fixed = apply_all_issue_autofixes(sql, &issues);
2474        assert_eq!(
2475            fixed,
2476            "select\n    1 + 2 + 3 + 4        -- Comment\nfrom foo"
2477        );
2478    }
2479
2480    // --- Literal spacing tests ---
2481
2482    #[test]
2483    fn flags_literal_operator_spacing() {
2484        let sql = "SELECT ('foo'||'bar') as buzz";
2485        let issues = run(sql);
2486        assert!(
2487            !issues.is_empty(),
2488            "should flag compact || operator: {issues:?}"
2489        );
2490        let fixed = apply_all_issue_autofixes(sql, &issues);
2491        assert_eq!(fixed, "SELECT ('foo' || 'bar') as buzz");
2492    }
2493
2494    #[test]
2495    fn flags_literal_as_spacing() {
2496        let sql = "SELECT\n    'foo'AS   bar\nFROM foo";
2497        let issues = run(sql);
2498        assert!(!issues.is_empty());
2499        let fixed = apply_all_issue_autofixes(sql, &issues);
2500        assert_eq!(fixed, "SELECT\n    'foo' AS bar\nFROM foo");
2501    }
2502
2503    #[test]
2504    fn flags_ansi_national_string_literal_spacing() {
2505        let sql = "SELECT a + N'b' + N'c' FROM tbl;";
2506        let issues = run_with_dialect(sql, Dialect::Ansi);
2507        assert!(!issues.is_empty());
2508        let fixed = apply_all_issue_autofixes(sql, &issues);
2509        assert_eq!(fixed, "SELECT a + N 'b' + N 'c' FROM tbl;");
2510    }
2511
2512    // --- Function spacing tests ---
2513
2514    #[test]
2515    fn does_not_flag_function_call() {
2516        assert!(run("SELECT foo(5) FROM T1;").is_empty());
2517        assert!(run("SELECT COUNT(*) FROM tbl\n\n").is_empty());
2518    }
2519
2520    // --- Cast operator tests ---
2521
2522    #[test]
2523    fn flags_spaced_cast_operator() {
2524        let sql = "SELECT '1' :: INT;";
2525        let issues = run(sql);
2526        assert!(!issues.is_empty(), "should flag space around ::");
2527        let fixed = apply_all_issue_autofixes(sql, &issues);
2528        assert_eq!(fixed, "SELECT '1'::INT;");
2529    }
2530
2531    // --- JSON arrow tests ---
2532
2533    #[test]
2534    fn flags_compact_json_arrow_operator() {
2535        let sql = "SELECT payload->>'id' FROM t";
2536        let issues = run(sql);
2537        assert!(
2538            issues.len() >= 2,
2539            "should flag 2+ violations for compact json-arrow"
2540        );
2541        assert!(
2542            issues
2543                .iter()
2544                .all(|issue| issue.autofix.as_ref().is_some_and(
2545                    |autofix| autofix.applicability == IssueAutofixApplicability::Safe
2546                )),
2547            "expected safe autofix metadata"
2548        );
2549
2550        let fixed = apply_all_issue_autofixes(sql, &issues);
2551        assert_eq!(fixed, "SELECT payload ->> 'id' FROM t");
2552    }
2553
2554    #[test]
2555    fn does_not_flag_exists_without_space_before_parenthesis() {
2556        let no_space = "SELECT\n    EXISTS(\n        SELECT 1\n    ) AS has_row\nFROM t";
2557        assert!(run(no_space).is_empty());
2558    }
2559
2560    #[test]
2561    fn flags_space_before_exists_parenthesis_in_select_list() {
2562        let sql = "SELECT 1,\n    EXISTS (\n        SELECT 1\n    ) AS has_row\nFROM t";
2563        let issues = run(sql);
2564        assert!(
2565            !issues.is_empty(),
2566            "expected EXISTS-space violation in select list"
2567        );
2568        let fixed = apply_all_issue_autofixes(sql, &issues);
2569        assert!(
2570            fixed.contains("EXISTS(\n"),
2571            "expected EXISTS( after fix, got: {fixed}"
2572        );
2573    }
2574
2575    #[test]
2576    fn requires_space_before_exists_parenthesis_after_where() {
2577        let sql = "SELECT 1\nWHERE EXISTS(\n    SELECT 1\n)";
2578        let issues = run(sql);
2579        assert!(
2580            !issues.is_empty(),
2581            "expected missing-space violation for WHERE EXISTS("
2582        );
2583        let fixed = apply_all_issue_autofixes(sql, &issues);
2584        assert!(
2585            fixed.contains("WHERE EXISTS (\n"),
2586            "expected WHERE EXISTS ( after fix, got: {fixed}"
2587        );
2588    }
2589
2590    #[test]
2591    fn merge_violations_prefers_fixable_duplicate_span() {
2592        let mut violations = vec![
2593            ((10, 10), Vec::new()),
2594            ((10, 10), vec![(10, 10, " ".to_string())]),
2595        ];
2596        merge_violations_by_span(&mut violations);
2597        assert_eq!(violations.len(), 1);
2598        assert_eq!(violations[0].0, (10, 10));
2599        assert_eq!(violations[0].1, vec![(10, 10, " ".to_string())]);
2600    }
2601
2602    // --- Safe pass cases ---
2603
2604    #[test]
2605    fn does_not_flag_spacing_patterns_inside_literals_or_comments() {
2606        let issues = run("SELECT 'payload->>''id''' AS txt -- EXISTS (\nFROM t");
2607        assert!(
2608            issues.is_empty(),
2609            "should not flag content inside literals/comments: {issues:?}"
2610        );
2611    }
2612
2613    #[test]
2614    fn does_not_flag_correct_comma_spacing() {
2615        assert!(run("SELECT 1, 4").is_empty());
2616    }
2617
2618    #[test]
2619    fn does_not_flag_correct_cast() {
2620        assert!(run("SELECT '1'::INT;").is_empty());
2621    }
2622
2623    #[test]
2624    fn does_not_flag_qualified_identifiers() {
2625        // Dot-separated identifiers should not have spaces
2626        assert!(run("SELECT a.b FROM c.d").is_empty());
2627    }
2628
2629    #[test]
2630    fn does_not_flag_newline_after_using() {
2631        assert!(
2632            run("select * from a JOIN b USING\n(x)").is_empty(),
2633            "newline between USING and ( should be acceptable"
2634        );
2635    }
2636
2637    #[test]
2638    fn flags_cte_newline_after_as() {
2639        let sql = "WITH a AS\n(\n  select 1\n)\nselect * from a";
2640        let issues = run(sql);
2641        assert!(!issues.is_empty(), "should flag AS + newline + (");
2642        let fixed = apply_all_issue_autofixes(sql, &issues);
2643        assert_eq!(fixed, "WITH a AS (\n  select 1\n)\nselect * from a");
2644    }
2645
2646    #[test]
2647    fn flags_cte_newline_and_spaces_after_as() {
2648        let sql = "WITH a AS\n\n\n    (\n  select 1\n)\nselect * from a";
2649        let issues = run(sql);
2650        assert!(!issues.is_empty());
2651        let fixed = apply_all_issue_autofixes(sql, &issues);
2652        assert_eq!(fixed, "WITH a AS (\n  select 1\n)\nselect * from a");
2653    }
2654
2655    #[test]
2656    fn does_not_flag_comment_after_as() {
2657        // When there's a comment between AS and (, it should pass
2658        assert!(
2659            run("WITH\na AS -- comment\n(\nselect 1\n)\nselect * from a").is_empty(),
2660            "comment between AS and ( should be acceptable"
2661        );
2662    }
2663
2664    #[test]
2665    fn insert_into_table_paren_allows_space() {
2666        // Space before ( in INSERT INTO table ( should be fine.
2667        let issues = run("INSERT INTO metrics.cold_start_daily (\n    workspace_id\n) SELECT 1");
2668        let lt01 = issues
2669            .iter()
2670            .filter(|i| i.code == "LT01")
2671            .collect::<Vec<_>>();
2672        assert!(
2673            lt01.is_empty(),
2674            "INSERT INTO table ( should not flag LT01, got: {lt01:?}"
2675        );
2676    }
2677
2678    #[test]
2679    fn insert_into_table_paren_with_cte() {
2680        // CTE + INSERT INTO: both parsed-statement and fallback paths.
2681        let sql = "WITH starts AS (\n    SELECT 1\n)\nINSERT INTO metrics.cold_start_daily (\n    workspace_id\n) SELECT workspace_id FROM starts";
2682        let issues = run_with_dialect(sql, Dialect::Postgres);
2683        let lt01 = issues
2684            .iter()
2685            .filter(|i| i.code == "LT01")
2686            .collect::<Vec<_>>();
2687        assert!(
2688            lt01.is_empty(),
2689            "INSERT INTO table ( with CTE should not flag LT01, got: {lt01:?}"
2690        );
2691    }
2692
2693    #[test]
2694    fn insert_into_table_paren_on_conflict() {
2695        // Regression: CTE + INSERT INTO + ON CONFLICT via statementless path.
2696        let sql = "\
2697WITH cte AS (
2698    SELECT workspace_id
2699    FROM ledger.query_history
2700    WHERE start_time >= $1
2701)
2702
2703INSERT INTO metrics.cold_start_daily (
2704    workspace_id
2705)
2706SELECT workspace_id
2707FROM cte
2708ON CONFLICT (workspace_id) DO UPDATE
2709    SET workspace_id = excluded.workspace_id";
2710        let issues = run_statementless_with_dialect(sql, Dialect::Postgres);
2711        let lt01 = issues
2712            .iter()
2713            .filter(|i| i.code == "LT01")
2714            .collect::<Vec<_>>();
2715        assert!(
2716            lt01.is_empty(),
2717            "INSERT INTO table ( with ON CONFLICT should not flag LT01, got: {lt01:?}"
2718        );
2719    }
2720}
flowscope_core/linter/rules/lt_001.rs

flowscope_core/linter/rules/
lt_001.rs