Skip to main content

sqrust_rules/layout/
select_target_new_line.rs

1use sqrust_core::{Diagnostic, FileContext, Rule};
2use crate::capitalisation::{is_word_char, SkipMap};
3
4pub struct SelectTargetNewLine;
5
6impl Rule for SelectTargetNewLine {
7    fn name(&self) -> &'static str {
8        "Layout/SelectTargetNewLine"
9    }
10
11    fn check(&self, ctx: &FileContext) -> Vec<Diagnostic> {
12        let source = &ctx.source;
13        let bytes = source.as_bytes();
14        let len = bytes.len();
15        let skip_map = SkipMap::build(source);
16        let mut diags = Vec::new();
17        let mut i = 0;
18
19        while i < len {
20            if !skip_map.is_code(i) {
21                i += 1;
22                continue;
23            }
24
25            if let Some(after_select) = match_kw(bytes, &skip_map, i, b"SELECT") {
26                // Skip DISTINCT or ALL.
27                let mut pos = skip_ws(bytes, after_select);
28                if let Some(p) = match_kw(bytes, &skip_map, pos, b"DISTINCT") {
29                    pos = skip_ws(bytes, p);
30                } else if let Some(p) = match_kw(bytes, &skip_map, pos, b"ALL") {
31                    pos = skip_ws(bytes, p);
32                }
33
34                if let Some(violation_pos) = scan_select_targets(bytes, &skip_map, pos) {
35                    let (line, col) = offset_to_line_col(source, violation_pos);
36                    diags.push(Diagnostic {
37                        rule: "Layout/SelectTargetNewLine",
38                        message: "Multiple SELECT columns on the same line; put each column on its own line".to_string(),
39                        line,
40                        col,
41                    });
42                }
43                i = after_select;
44                continue;
45            }
46
47            i += 1;
48        }
49
50        diags
51    }
52}
53
54/// Scans the SELECT column list starting at `scan_start`.
55/// Returns `Some(pos)` at the position of the first column that is not on its
56/// own line (i.e., two consecutive columns are not separated by a newline).
57/// Returns `None` if every column separation is newline-delimited.
58///
59/// The check per top-level comma: after the comma, is there a newline before
60/// the next non-whitespace, non-skip code byte? If not → violation at the
61/// position right after the comma (the next column starts there on the same line).
62fn scan_select_targets(bytes: &[u8], skip_map: &SkipMap, scan_start: usize) -> Option<usize> {
63    let len = bytes.len();
64    let stop_kws: &[&[u8]] = &[
65        b"FROM", b"WHERE", b"GROUP", b"ORDER", b"HAVING", b"LIMIT",
66        b"UNION", b"INTERSECT", b"EXCEPT", b"FETCH",
67    ];
68    let mut i = scan_start;
69    let mut depth = 0i32;
70
71    while i < len {
72        if !skip_map.is_code(i) {
73            i += 1;
74            continue;
75        }
76        let b = bytes[i];
77
78        if b == b'(' {
79            depth += 1;
80            i += 1;
81            continue;
82        }
83        if b == b')' {
84            if depth > 0 {
85                depth -= 1;
86                i += 1;
87                continue;
88            } else {
89                break;
90            }
91        }
92
93        if depth == 0 {
94            // Check stop keywords.
95            let mut at_stop = false;
96            for kw in stop_kws {
97                if match_kw(bytes, skip_map, i, kw).is_some() {
98                    at_stop = true;
99                    break;
100                }
101            }
102            if at_stop || b == b';' {
103                break;
104            }
105
106            if b == b',' {
107                // After this comma, check if there is a newline before the next column.
108                // Scan forward through whitespace and skip-map bytes; if we reach
109                // a code byte without passing through a '\n', it's a violation.
110                let comma_pos = i;
111                let mut j = i + 1;
112                let mut found_newline = false;
113                while j < len {
114                    if skip_map.is_code(j) && bytes[j] == b'\n' {
115                        found_newline = true;
116                        break;
117                    }
118                    // If we hit a non-whitespace code byte before a newline, violation.
119                    if skip_map.is_code(j)
120                        && bytes[j] != b' '
121                        && bytes[j] != b'\t'
122                        && bytes[j] != b'\r'
123                    {
124                        // Also check if we're at a stop keyword — if so, no more columns.
125                        let mut at_stop2 = false;
126                        for kw in stop_kws {
127                            if match_kw(bytes, skip_map, j, kw).is_some() {
128                                at_stop2 = true;
129                                break;
130                            }
131                        }
132                        if !at_stop2 && bytes[j] != b';' {
133                            return Some(comma_pos + 1);
134                        }
135                        break;
136                    }
137                    j += 1;
138                }
139                if found_newline {
140                    i += 1;
141                    continue;
142                }
143                i += 1;
144                continue;
145            }
146        }
147
148        i += 1;
149    }
150
151    // If we found commas but no violations above, also check that the
152    // first column is not on the same line as the second. The comma-after
153    // check above handles this since it checks from AFTER each comma.
154    // Return None if no violation found.
155    None
156}
157
158fn match_kw(bytes: &[u8], skip_map: &SkipMap, i: usize, kw: &[u8]) -> Option<usize> {
159    let len = bytes.len();
160    let kw_len = kw.len();
161    if i + kw_len > len {
162        return None;
163    }
164    if !skip_map.is_code(i) {
165        return None;
166    }
167    let before_ok = i == 0 || !is_word_char(bytes[i - 1]);
168    if !before_ok {
169        return None;
170    }
171    let matches = bytes[i..i + kw_len]
172        .iter()
173        .zip(kw.iter())
174        .all(|(&a, &b)| a.to_ascii_uppercase() == b.to_ascii_uppercase());
175    if !matches {
176        return None;
177    }
178    let end = i + kw_len;
179    if end < len && is_word_char(bytes[end]) {
180        return None;
181    }
182    Some(end)
183}
184
185fn skip_ws(bytes: &[u8], mut i: usize) -> usize {
186    while i < bytes.len()
187        && (bytes[i] == b' ' || bytes[i] == b'\t' || bytes[i] == b'\n' || bytes[i] == b'\r')
188    {
189        i += 1;
190    }
191    i
192}
193
194fn offset_to_line_col(source: &str, offset: usize) -> (usize, usize) {
195    let before = &source[..offset.min(source.len())];
196    let line = before.chars().filter(|&c| c == '\n').count() + 1;
197    let col = before.rfind('\n').map(|p| offset - p - 1).unwrap_or(offset) + 1;
198    (line, col)
199}