Skip to main content

sqrust_rules/lint/
empty_string_comparison.rs

1use sqrust_core::{Diagnostic, FileContext, Rule};
2
3pub struct EmptyStringComparison;
4
5impl Rule for EmptyStringComparison {
6    fn name(&self) -> &'static str {
7        "Lint/EmptyStringComparison"
8    }
9
10    fn check(&self, ctx: &FileContext) -> Vec<Diagnostic> {
11        let source = &ctx.source;
12        let bytes = source.as_bytes();
13        let len = bytes.len();
14        let skip = build_skip(bytes);
15
16        let mut diags = Vec::new();
17        let mut i = 0;
18
19        while i < len {
20            // Only examine bytes outside strings/comments.
21            if skip[i] {
22                i += 1;
23                continue;
24            }
25
26            // Try to match one of: `!=`, `<>`, `=` (in that order so we
27            // don't accidentally consume just the `!` or `<` alone).
28            let (op_len, op_start) = if i + 1 < len
29                && bytes[i] == b'!'
30                && bytes[i + 1] == b'='
31                && !skip[i + 1]
32            {
33                (2, i)
34            } else if i + 1 < len
35                && bytes[i] == b'<'
36                && bytes[i + 1] == b'>'
37                && !skip[i + 1]
38            {
39                (2, i)
40            } else if bytes[i] == b'=' {
41                (1, i)
42            } else {
43                i += 1;
44                continue;
45            };
46
47            let after_op = op_start + op_len;
48
49            // Skip whitespace after operator (outside skip regions).
50            let mut j = after_op;
51            while j < len && bytes[j].is_ascii_whitespace() && !skip[j] {
52                j += 1;
53            }
54
55            // Check for empty string: two consecutive single quotes.
56            // The skip table marks both quotes as `true` (they are the
57            // delimiters of an empty string literal), so we test the raw
58            // bytes directly — the operator position guards us against
59            // being inside a comment or string already.
60            if j + 1 < len && bytes[j] == b'\'' && bytes[j + 1] == b'\'' {
61                // Make sure this isn't the start of a longer string (e.g. `'''`)
62                // — three quotes would mean an escaped-quote string, not empty.
63                // Actually `'''` in SQL is the string `'` (one quote char).
64                // We still flag because the VALUE being compared to is a
65                // single-quote character, not truly empty; however the plan
66                // specifically calls out `'it''s'` as NOT flagged (escaped
67                // quote inside a string). The skip table handles that: inside
68                // `'it''s'`, position of the inner `''` is already in the skip
69                // region because the outer string started at the first `'`.
70                // Here we are OUTSIDE any skip region (the operator was outside
71                // skip), so `bytes[j]` is the start of a new literal.
72                // Two consecutive quotes with nothing else = empty string.
73                let (line, col) = line_col(source, op_start);
74                diags.push(Diagnostic {
75                    rule: self.name(),
76                    message: "Comparison with empty string; consider checking for NULL as well"
77                        .to_string(),
78                    line,
79                    col,
80                });
81
82                // Advance past the operator and empty string so we don't
83                // re-scan them.
84                i = j + 2;
85                continue;
86            }
87
88            i += op_len;
89        }
90
91        diags
92    }
93}
94
95/// Returns `true` if `ch` is a word character (`[a-zA-Z0-9_]`).
96#[inline]
97fn is_word_char(ch: u8) -> bool {
98    ch.is_ascii_alphanumeric() || ch == b'_'
99}
100
101/// Converts a byte offset in `source` to a 1-indexed (line, col) pair.
102fn line_col(source: &str, offset: usize) -> (usize, usize) {
103    let before = &source[..offset];
104    let line = before.chars().filter(|&c| c == '\n').count() + 1;
105    let col = before.rfind('\n').map(|p| offset - p - 1).unwrap_or(offset) + 1;
106    (line, col)
107}
108
109/// Builds a skip table: `true` for every byte offset that is inside a
110/// string literal, line comment, block comment, or quoted identifier.
111fn build_skip(bytes: &[u8]) -> Vec<bool> {
112    let len = bytes.len();
113    let mut skip = vec![false; len];
114    let mut i = 0;
115
116    while i < len {
117        // Line comment: -- ... end-of-line
118        if i + 1 < len && bytes[i] == b'-' && bytes[i + 1] == b'-' {
119            skip[i] = true;
120            skip[i + 1] = true;
121            i += 2;
122            while i < len && bytes[i] != b'\n' {
123                skip[i] = true;
124                i += 1;
125            }
126            continue;
127        }
128
129        // Block comment: /* ... */
130        if i + 1 < len && bytes[i] == b'/' && bytes[i + 1] == b'*' {
131            skip[i] = true;
132            skip[i + 1] = true;
133            i += 2;
134            while i < len {
135                if i + 1 < len && bytes[i] == b'*' && bytes[i + 1] == b'/' {
136                    skip[i] = true;
137                    skip[i + 1] = true;
138                    i += 2;
139                    break;
140                }
141                skip[i] = true;
142                i += 1;
143            }
144            continue;
145        }
146
147        // Single-quoted string: '...' with '' as escaped quote
148        if bytes[i] == b'\'' {
149            skip[i] = true;
150            i += 1;
151            while i < len {
152                if bytes[i] == b'\'' {
153                    skip[i] = true;
154                    i += 1;
155                    // '' inside a string is an escaped quote — continue in string
156                    if i < len && bytes[i] == b'\'' {
157                        skip[i] = true;
158                        i += 1;
159                        continue;
160                    }
161                    break; // end of string
162                }
163                skip[i] = true;
164                i += 1;
165            }
166            continue;
167        }
168
169        // Double-quoted identifier: "..."
170        if bytes[i] == b'"' {
171            skip[i] = true;
172            i += 1;
173            while i < len && bytes[i] != b'"' {
174                skip[i] = true;
175                i += 1;
176            }
177            if i < len {
178                skip[i] = true;
179                i += 1;
180            }
181            continue;
182        }
183
184        // Backtick identifier: `...`
185        if bytes[i] == b'`' {
186            skip[i] = true;
187            i += 1;
188            while i < len && bytes[i] != b'`' {
189                skip[i] = true;
190                i += 1;
191            }
192            if i < len {
193                skip[i] = true;
194                i += 1;
195            }
196            continue;
197        }
198
199        i += 1;
200    }
201
202    // Suppress unused-import lint — `is_word_char` is defined for potential
203    // future use (e.g. word-boundary guards around operators).
204    let _ = is_word_char;
205
206    skip
207}