Skip to main content

sqrust_rules/layout/
set_operator_new_line.rs

1use sqrust_core::{Diagnostic, FileContext, Rule};
2use crate::capitalisation::{is_word_char, SkipMap};
3
4pub struct SetOperatorNewLine;
5
6impl Rule for SetOperatorNewLine {
7    fn name(&self) -> &'static str {
8        "Layout/SetOperatorNewLine"
9    }
10
11    fn check(&self, ctx: &FileContext) -> Vec<Diagnostic> {
12        let source = &ctx.source;
13        let bytes = source.as_bytes();
14        let len = bytes.len();
15        let skip_map = SkipMap::build(source);
16        let mut diags = Vec::new();
17        let mut i = 0;
18
19        while i < len {
20            if !skip_map.is_code(i) {
21                i += 1;
22                continue;
23            }
24
25            if let Some((kw_start, kw_end)) = try_set_op(bytes, &skip_map, i) {
26                // Skip ALL or DISTINCT after operator (horizontal whitespace only).
27                let mut after_kw = skip_ws_h(bytes, kw_end);
28                if let Some(e) = match_kw(bytes, &skip_map, after_kw, b"ALL") {
29                    after_kw = skip_ws_h(bytes, e);
30                } else if let Some(e) = match_kw(bytes, &skip_map, after_kw, b"DISTINCT") {
31                    after_kw = skip_ws_h(bytes, e);
32                }
33
34                // Check: only whitespace before kw_start on same line?
35                let newline_before = only_ws_before_on_line(bytes, kw_start);
36                // Check: at after_kw, is there a newline, EOF, or line comment?
37                let newline_after = after_kw >= len
38                    || bytes[after_kw] == b'\n'
39                    || bytes[after_kw] == b'\r'
40                    || (after_kw + 1 < len
41                        && bytes[after_kw] == b'-'
42                        && bytes[after_kw + 1] == b'-');
43
44                if !newline_before || !newline_after {
45                    let (line, col) = offset_to_line_col(source, kw_start);
46                    diags.push(Diagnostic {
47                        rule: "Layout/SetOperatorNewLine",
48                        message: "Set operator (UNION/INTERSECT/EXCEPT) must be on its own line, surrounded by newlines".to_string(),
49                        line,
50                        col,
51                    });
52                }
53
54                i = kw_end;
55                continue;
56            }
57
58            i += 1;
59        }
60
61        diags
62    }
63}
64
65fn try_set_op(bytes: &[u8], skip_map: &SkipMap, i: usize) -> Option<(usize, usize)> {
66    for kw in &[b"UNION" as &[u8], b"INTERSECT", b"EXCEPT"] {
67        if let Some(end) = match_kw(bytes, skip_map, i, kw) {
68            return Some((i, end));
69        }
70    }
71    None
72}
73
74fn only_ws_before_on_line(bytes: &[u8], i: usize) -> bool {
75    let mut j = i;
76    loop {
77        if j == 0 {
78            return true;
79        }
80        j -= 1;
81        if bytes[j] == b'\n' {
82            return true;
83        }
84        if bytes[j] != b' ' && bytes[j] != b'\t' {
85            return false;
86        }
87    }
88}
89
90fn skip_ws_h(bytes: &[u8], mut i: usize) -> usize {
91    while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
92        i += 1;
93    }
94    i
95}
96
97fn match_kw(bytes: &[u8], skip_map: &SkipMap, i: usize, kw: &[u8]) -> Option<usize> {
98    let len = bytes.len();
99    let kw_len = kw.len();
100    if i + kw_len > len {
101        return None;
102    }
103    if !skip_map.is_code(i) {
104        return None;
105    }
106    let before_ok = i == 0 || !is_word_char(bytes[i - 1]);
107    if !before_ok {
108        return None;
109    }
110    let matches = bytes[i..i + kw_len]
111        .iter()
112        .zip(kw.iter())
113        .all(|(&a, &b)| a.to_ascii_uppercase() == b.to_ascii_uppercase());
114    if !matches {
115        return None;
116    }
117    let end = i + kw_len;
118    if end < len && is_word_char(bytes[end]) {
119        return None;
120    }
121    Some(end)
122}
123
124fn offset_to_line_col(source: &str, offset: usize) -> (usize, usize) {
125    let before = &source[..offset.min(source.len())];
126    let line = before.chars().filter(|&c| c == '\n').count() + 1;
127    let col = before.rfind('\n').map(|p| offset - p - 1).unwrap_or(offset) + 1;
128    (line, col)
129}