Skip to main content

flowscope_core/linter/rules/
lt_011.rs

1//! LINT_LT_011: Layout set operators.
2//!
3//! SQLFluff LT11 parity (current scope): enforce own-line placement for set
4//! operators in multiline statements.
5
6use crate::linter::config::LintConfig;
7use crate::linter::rule::{LintContext, LintRule};
8use crate::types::{issue_codes, Dialect, Issue, IssueAutofixApplicability, IssuePatchEdit};
9use sqlparser::ast::Statement;
10use sqlparser::keywords::Keyword;
11use sqlparser::tokenizer::{
12    Location, Span as TokenSpan, Token, TokenWithSpan, Tokenizer, Whitespace,
13};
14
15#[derive(Clone, Copy, Debug, Eq, PartialEq)]
16enum SetOperatorLinePosition {
17    AloneStrict,
18    Leading,
19    Trailing,
20}
21
22impl SetOperatorLinePosition {
23    fn from_config(config: &LintConfig) -> Self {
24        match config
25            .rule_option_str(issue_codes::LINT_LT_011, "line_position")
26            .unwrap_or("alone:strict")
27            .to_ascii_lowercase()
28            .as_str()
29        {
30            "leading" => Self::Leading,
31            "trailing" => Self::Trailing,
32            _ => Self::AloneStrict,
33        }
34    }
35}
36
37pub struct LayoutSetOperators {
38    line_position: SetOperatorLinePosition,
39}
40
41impl LayoutSetOperators {
42    pub fn from_config(config: &LintConfig) -> Self {
43        Self {
44            line_position: SetOperatorLinePosition::from_config(config),
45        }
46    }
47}
48
49impl Default for LayoutSetOperators {
50    fn default() -> Self {
51        Self {
52            line_position: SetOperatorLinePosition::AloneStrict,
53        }
54    }
55}
56
57impl LintRule for LayoutSetOperators {
58    fn code(&self) -> &'static str {
59        issue_codes::LINT_LT_011
60    }
61
62    fn name(&self) -> &'static str {
63        "Layout set operators"
64    }
65
66    fn description(&self) -> &'static str {
67        "Set operators should be surrounded by newlines."
68    }
69
70    fn check(&self, _statement: &Statement, ctx: &LintContext) -> Vec<Issue> {
71        let (has_violation, edit_spans) =
72            set_operator_layout_violation_and_fixable_spans(ctx, self.line_position);
73        if has_violation {
74            let mut issue = Issue::info(
75                issue_codes::LINT_LT_011,
76                "Set operator line placement appears inconsistent.",
77            )
78            .with_statement(ctx.statement_index);
79
80            if let Some((start, end)) = edit_spans.first().copied() {
81                issue = issue.with_span(ctx.span_from_statement_offset(start, end));
82                let edits = edit_spans
83                    .into_iter()
84                    .map(|(edit_start, edit_end)| {
85                        IssuePatchEdit::new(
86                            ctx.span_from_statement_offset(edit_start, edit_end),
87                            "\n",
88                        )
89                    })
90                    .collect();
91                issue = issue.with_autofix_edits(IssueAutofixApplicability::Safe, edits);
92            }
93
94            vec![issue]
95        } else {
96            Vec::new()
97        }
98    }
99}
100
101fn set_operator_layout_violation_and_fixable_spans(
102    ctx: &LintContext,
103    line_position: SetOperatorLinePosition,
104) -> (bool, Vec<(usize, usize)>) {
105    let tokens =
106        tokenized_for_context(ctx).or_else(|| tokenized(ctx.statement_sql(), ctx.dialect()));
107    let Some(tokens) = tokens else {
108        return (false, Vec::new());
109    };
110    let sql = ctx.statement_sql();
111
112    let significant_tokens: Vec<(usize, &TokenWithSpan)> = tokens
113        .iter()
114        .enumerate()
115        .filter(|(_, token)| !is_trivia_token(&token.token))
116        .collect();
117
118    let has_set_operator = significant_tokens
119        .iter()
120        .any(|(_, token)| set_operator_keyword(&token.token).is_some());
121    if !has_set_operator {
122        return (false, Vec::new());
123    }
124    let mut has_violation = false;
125    let mut edit_spans = Vec::new();
126
127    for (position, (_, token)) in significant_tokens.iter().enumerate() {
128        let Some(keyword) = set_operator_keyword(&token.token) else {
129            continue;
130        };
131
132        let operator_end = if keyword == Keyword::UNION
133            && matches!(
134                significant_tokens.get(position + 1).map(|(_, t)| &t.token),
135                Some(Token::Word(word)) if word.keyword == Keyword::ALL
136            ) {
137            position + 1
138        } else {
139            position
140        };
141
142        let Some((_, prev_token)) = position
143            .checked_sub(1)
144            .and_then(|idx| significant_tokens.get(idx))
145        else {
146            continue;
147        };
148        let Some((_, next_token)) = significant_tokens.get(operator_end + 1) else {
149            continue;
150        };
151
152        let operator_line = token.span.start.line;
153        let line_break_before = prev_token.span.start.line < operator_line;
154        let line_break_after = next_token.span.start.line > operator_line;
155
156        let placement_violation = match line_position {
157            SetOperatorLinePosition::AloneStrict => !line_break_before || !line_break_after,
158            SetOperatorLinePosition::Leading => !line_break_before || line_break_after,
159            SetOperatorLinePosition::Trailing => line_break_before || !line_break_after,
160        };
161
162        if placement_violation {
163            has_violation = true;
164        } else {
165            continue;
166        }
167
168        if !matches!(line_position, SetOperatorLinePosition::AloneStrict) {
169            continue;
170        }
171
172        if !line_break_before {
173            let Some(gap_start) = token_end_offset(sql, prev_token) else {
174                continue;
175            };
176            let Some(gap_end) = token_start_offset(sql, token) else {
177                continue;
178            };
179            if gap_start <= gap_end && gap_is_whitespace_only(sql, gap_start, gap_end) {
180                edit_spans.push((gap_start, gap_end));
181            }
182        }
183
184        if !line_break_after {
185            let Some((_, operator_end_token)) = significant_tokens.get(operator_end) else {
186                continue;
187            };
188            let Some(gap_start) = token_end_offset(sql, operator_end_token) else {
189                continue;
190            };
191            let Some(gap_end) = token_start_offset(sql, next_token) else {
192                continue;
193            };
194            if gap_start <= gap_end && gap_is_whitespace_only(sql, gap_start, gap_end) {
195                edit_spans.push((gap_start, gap_end));
196            }
197        }
198    }
199
200    edit_spans.sort_unstable();
201    edit_spans.dedup();
202    (has_violation, edit_spans)
203}
204
205fn tokenized(sql: &str, dialect: Dialect) -> Option<Vec<TokenWithSpan>> {
206    let dialect = dialect.to_sqlparser_dialect();
207    let mut tokenizer = Tokenizer::new(dialect.as_ref(), sql);
208    tokenizer.tokenize_with_location().ok()
209}
210
211fn tokenized_for_context(ctx: &LintContext) -> Option<Vec<TokenWithSpan>> {
212    let (statement_start_line, statement_start_column) =
213        offset_to_line_col(ctx.sql, ctx.statement_range.start)?;
214
215    ctx.with_document_tokens(|tokens| {
216        if tokens.is_empty() {
217            return None;
218        }
219
220        let mut out = Vec::new();
221        for token in tokens {
222            let Some((start, end)) = token_with_span_offsets(ctx.sql, token) else {
223                continue;
224            };
225            if start < ctx.statement_range.start || end > ctx.statement_range.end {
226                continue;
227            }
228
229            let Some(start_loc) = relative_location(
230                token.span.start,
231                statement_start_line,
232                statement_start_column,
233            ) else {
234                continue;
235            };
236            let Some(end_loc) =
237                relative_location(token.span.end, statement_start_line, statement_start_column)
238            else {
239                continue;
240            };
241
242            out.push(TokenWithSpan::new(
243                token.token.clone(),
244                TokenSpan::new(start_loc, end_loc),
245            ));
246        }
247
248        if out.is_empty() {
249            None
250        } else {
251            Some(out)
252        }
253    })
254}
255
256fn set_operator_keyword(token: &Token) -> Option<Keyword> {
257    let Token::Word(word) = token else {
258        return None;
259    };
260
261    match word.keyword {
262        Keyword::UNION | Keyword::INTERSECT | Keyword::EXCEPT => Some(word.keyword),
263        _ => None,
264    }
265}
266
267fn is_trivia_token(token: &Token) -> bool {
268    matches!(
269        token,
270        Token::Whitespace(Whitespace::Space | Whitespace::Newline | Whitespace::Tab)
271            | Token::Whitespace(Whitespace::SingleLineComment { .. })
272            | Token::Whitespace(Whitespace::MultiLineComment(_))
273    )
274}
275
276fn line_col_to_offset(sql: &str, line: usize, column: usize) -> Option<usize> {
277    if line == 0 || column == 0 {
278        return None;
279    }
280
281    let mut current_line = 1usize;
282    let mut current_col = 1usize;
283
284    for (offset, ch) in sql.char_indices() {
285        if current_line == line && current_col == column {
286            return Some(offset);
287        }
288
289        if ch == '\n' {
290            current_line += 1;
291            current_col = 1;
292        } else {
293            current_col += 1;
294        }
295    }
296
297    if current_line == line && current_col == column {
298        return Some(sql.len());
299    }
300
301    None
302}
303
304fn token_start_offset(sql: &str, token: &TokenWithSpan) -> Option<usize> {
305    line_col_to_offset(
306        sql,
307        token.span.start.line as usize,
308        token.span.start.column as usize,
309    )
310}
311
312fn token_end_offset(sql: &str, token: &TokenWithSpan) -> Option<usize> {
313    line_col_to_offset(
314        sql,
315        token.span.end.line as usize,
316        token.span.end.column as usize,
317    )
318}
319
320fn gap_is_whitespace_only(sql: &str, start: usize, end: usize) -> bool {
321    if start > end || end > sql.len() {
322        return false;
323    }
324
325    sql[start..end].chars().all(char::is_whitespace)
326}
327
328fn token_with_span_offsets(sql: &str, token: &TokenWithSpan) -> Option<(usize, usize)> {
329    let start = line_col_to_offset(
330        sql,
331        token.span.start.line as usize,
332        token.span.start.column as usize,
333    )?;
334    let end = line_col_to_offset(
335        sql,
336        token.span.end.line as usize,
337        token.span.end.column as usize,
338    )?;
339    Some((start, end))
340}
341
342fn offset_to_line_col(sql: &str, offset: usize) -> Option<(usize, usize)> {
343    if offset > sql.len() {
344        return None;
345    }
346    if offset == sql.len() {
347        let mut line = 1usize;
348        let mut column = 1usize;
349        for ch in sql.chars() {
350            if ch == '\n' {
351                line += 1;
352                column = 1;
353            } else {
354                column += 1;
355            }
356        }
357        return Some((line, column));
358    }
359
360    let mut line = 1usize;
361    let mut column = 1usize;
362    for (index, ch) in sql.char_indices() {
363        if index == offset {
364            return Some((line, column));
365        }
366        if ch == '\n' {
367            line += 1;
368            column = 1;
369        } else {
370            column += 1;
371        }
372    }
373
374    None
375}
376
377fn relative_location(
378    location: Location,
379    statement_start_line: usize,
380    statement_start_column: usize,
381) -> Option<Location> {
382    let line = location.line as usize;
383    let column = location.column as usize;
384    if line < statement_start_line {
385        return None;
386    }
387
388    if line == statement_start_line {
389        if column < statement_start_column {
390            return None;
391        }
392        return Some(Location::new(
393            1,
394            (column - statement_start_column + 1) as u64,
395        ));
396    }
397
398    Some(Location::new(
399        (line - statement_start_line + 1) as u64,
400        column as u64,
401    ))
402}
403
404#[cfg(test)]
405mod tests {
406    use super::*;
407    use crate::linter::config::LintConfig;
408    use crate::parser::parse_sql;
409    use crate::types::IssueAutofixApplicability;
410
411    fn run_with_rule(sql: &str, rule: &LayoutSetOperators) -> Vec<Issue> {
412        let statements = parse_sql(sql).expect("parse");
413        statements
414            .iter()
415            .enumerate()
416            .flat_map(|(index, statement)| {
417                rule.check(
418                    statement,
419                    &LintContext {
420                        sql,
421                        statement_range: 0..sql.len(),
422                        statement_index: index,
423                    },
424                )
425            })
426            .collect()
427    }
428
429    fn run(sql: &str) -> Vec<Issue> {
430        run_with_rule(sql, &LayoutSetOperators::default())
431    }
432
433    fn apply_issue_autofix(sql: &str, issue: &Issue) -> Option<String> {
434        let autofix = issue.autofix.as_ref()?;
435        let mut out = sql.to_string();
436        let mut edits = autofix.edits.clone();
437        edits.sort_by_key(|edit| (edit.span.start, edit.span.end));
438        for edit in edits.into_iter().rev() {
439            out.replace_range(edit.span.start..edit.span.end, &edit.replacement);
440        }
441        Some(out)
442    }
443
444    #[test]
445    fn flags_inline_set_operator_in_multiline_statement() {
446        let sql = "SELECT 1 UNION SELECT 2\nUNION SELECT 3";
447        let issues = run(sql);
448        assert_eq!(issues.len(), 1);
449        assert_eq!(issues[0].code, issue_codes::LINT_LT_011);
450        let autofix = issues[0].autofix.as_ref().expect("autofix metadata");
451        assert_eq!(autofix.applicability, IssueAutofixApplicability::Safe);
452        let fixed = apply_issue_autofix(sql, &issues[0]).expect("apply autofix");
453        assert_eq!(fixed, "SELECT 1\nUNION\nSELECT 2\nUNION\nSELECT 3");
454    }
455
456    #[test]
457    fn flags_inline_set_operator_in_single_line_statement() {
458        let issues = run("SELECT 1 UNION SELECT 2");
459        assert_eq!(issues.len(), 1);
460        assert_eq!(issues[0].code, issue_codes::LINT_LT_011);
461    }
462
463    #[test]
464    fn does_not_flag_own_line_set_operators() {
465        let issues = run("SELECT 1\nUNION\nSELECT 2\nUNION\nSELECT 3");
466        assert!(issues.is_empty());
467    }
468
469    #[test]
470    fn does_not_flag_own_line_union_all() {
471        let issues = run("SELECT 1\nUNION ALL\nSELECT 2");
472        assert!(issues.is_empty());
473    }
474
475    #[test]
476    fn leading_line_position_accepts_leading_operators() {
477        let config = LintConfig {
478            enabled: true,
479            disabled_rules: vec![],
480            rule_configs: std::collections::BTreeMap::from([(
481                "layout.set_operators".to_string(),
482                serde_json::json!({"line_position": "leading"}),
483            )]),
484        };
485        let issues = run_with_rule(
486            "SELECT 1\nUNION SELECT 2\nUNION SELECT 3",
487            &LayoutSetOperators::from_config(&config),
488        );
489        assert!(issues.is_empty());
490    }
491
492    #[test]
493    fn trailing_line_position_flags_leading_operators() {
494        let config = LintConfig {
495            enabled: true,
496            disabled_rules: vec![],
497            rule_configs: std::collections::BTreeMap::from([(
498                "LINT_LT_011".to_string(),
499                serde_json::json!({"line_position": "trailing"}),
500            )]),
501        };
502        let issues = run_with_rule(
503            "SELECT 1\nUNION SELECT 2",
504            &LayoutSetOperators::from_config(&config),
505        );
506        assert_eq!(issues.len(), 1);
507        assert_eq!(issues[0].code, issue_codes::LINT_LT_011);
508    }
509
510    #[test]
511    fn inline_set_operator_with_comment_gap_preserves_comment() {
512        let sql = "SELECT 1 /* keep */ UNION SELECT 2";
513        let issues = run(sql);
514        assert_eq!(issues.len(), 1);
515        assert_eq!(issues[0].code, issue_codes::LINT_LT_011);
516        let fixed = apply_issue_autofix(sql, &issues[0]).expect("apply autofix");
517        assert!(
518            fixed.contains("/* keep */"),
519            "LT011 autofix should preserve comment trivia: {fixed}"
520        );
521    }
522}