sqrust_rules/lint/empty_string_comparison.rs
1use sqrust_core::{Diagnostic, FileContext, Rule};
2
3pub struct EmptyStringComparison;
4
5impl Rule for EmptyStringComparison {
6 fn name(&self) -> &'static str {
7 "Lint/EmptyStringComparison"
8 }
9
10 fn check(&self, ctx: &FileContext) -> Vec<Diagnostic> {
11 let source = &ctx.source;
12 let bytes = source.as_bytes();
13 let len = bytes.len();
14 let skip = build_skip(bytes);
15
16 let mut diags = Vec::new();
17 let mut i = 0;
18
19 while i < len {
20 // Only examine bytes outside strings/comments.
21 if skip[i] {
22 i += 1;
23 continue;
24 }
25
26 // Try to match one of: `!=`, `<>`, `=` (in that order so we
27 // don't accidentally consume just the `!` or `<` alone).
28 let (op_len, op_start) = if i + 1 < len
29 && bytes[i] == b'!'
30 && bytes[i + 1] == b'='
31 && !skip[i + 1]
32 {
33 (2, i)
34 } else if i + 1 < len
35 && bytes[i] == b'<'
36 && bytes[i + 1] == b'>'
37 && !skip[i + 1]
38 {
39 (2, i)
40 } else if bytes[i] == b'=' {
41 (1, i)
42 } else {
43 i += 1;
44 continue;
45 };
46
47 let after_op = op_start + op_len;
48
49 // Skip whitespace after operator (outside skip regions).
50 let mut j = after_op;
51 while j < len && bytes[j].is_ascii_whitespace() && !skip[j] {
52 j += 1;
53 }
54
55 // Check for empty string: two consecutive single quotes.
56 // The skip table marks both quotes as `true` (they are the
57 // delimiters of an empty string literal), so we test the raw
58 // bytes directly — the operator position guards us against
59 // being inside a comment or string already.
60 if j + 1 < len && bytes[j] == b'\'' && bytes[j + 1] == b'\'' {
61 // Make sure this isn't the start of a longer string (e.g. `'''`)
62 // — three quotes would mean an escaped-quote string, not empty.
63 // Actually `'''` in SQL is the string `'` (one quote char).
64 // We still flag because the VALUE being compared to is a
65 // single-quote character, not truly empty; however the plan
66 // specifically calls out `'it''s'` as NOT flagged (escaped
67 // quote inside a string). The skip table handles that: inside
68 // `'it''s'`, position of the inner `''` is already in the skip
69 // region because the outer string started at the first `'`.
70 // Here we are OUTSIDE any skip region (the operator was outside
71 // skip), so `bytes[j]` is the start of a new literal.
72 // Two consecutive quotes with nothing else = empty string.
73 let (line, col) = line_col(source, op_start);
74 diags.push(Diagnostic {
75 rule: self.name(),
76 message: "Comparison with empty string; consider checking for NULL as well"
77 .to_string(),
78 line,
79 col,
80 });
81
82 // Advance past the operator and empty string so we don't
83 // re-scan them.
84 i = j + 2;
85 continue;
86 }
87
88 i += op_len;
89 }
90
91 diags
92 }
93}
94
95/// Returns `true` if `ch` is a word character (`[a-zA-Z0-9_]`).
96#[inline]
97fn is_word_char(ch: u8) -> bool {
98 ch.is_ascii_alphanumeric() || ch == b'_'
99}
100
101/// Converts a byte offset in `source` to a 1-indexed (line, col) pair.
102fn line_col(source: &str, offset: usize) -> (usize, usize) {
103 let before = &source[..offset];
104 let line = before.chars().filter(|&c| c == '\n').count() + 1;
105 let col = before.rfind('\n').map(|p| offset - p - 1).unwrap_or(offset) + 1;
106 (line, col)
107}
108
109/// Builds a skip table: `true` for every byte offset that is inside a
110/// string literal, line comment, block comment, or quoted identifier.
111fn build_skip(bytes: &[u8]) -> Vec<bool> {
112 let len = bytes.len();
113 let mut skip = vec![false; len];
114 let mut i = 0;
115
116 while i < len {
117 // Line comment: -- ... end-of-line
118 if i + 1 < len && bytes[i] == b'-' && bytes[i + 1] == b'-' {
119 skip[i] = true;
120 skip[i + 1] = true;
121 i += 2;
122 while i < len && bytes[i] != b'\n' {
123 skip[i] = true;
124 i += 1;
125 }
126 continue;
127 }
128
129 // Block comment: /* ... */
130 if i + 1 < len && bytes[i] == b'/' && bytes[i + 1] == b'*' {
131 skip[i] = true;
132 skip[i + 1] = true;
133 i += 2;
134 while i < len {
135 if i + 1 < len && bytes[i] == b'*' && bytes[i + 1] == b'/' {
136 skip[i] = true;
137 skip[i + 1] = true;
138 i += 2;
139 break;
140 }
141 skip[i] = true;
142 i += 1;
143 }
144 continue;
145 }
146
147 // Single-quoted string: '...' with '' as escaped quote
148 if bytes[i] == b'\'' {
149 skip[i] = true;
150 i += 1;
151 while i < len {
152 if bytes[i] == b'\'' {
153 skip[i] = true;
154 i += 1;
155 // '' inside a string is an escaped quote — continue in string
156 if i < len && bytes[i] == b'\'' {
157 skip[i] = true;
158 i += 1;
159 continue;
160 }
161 break; // end of string
162 }
163 skip[i] = true;
164 i += 1;
165 }
166 continue;
167 }
168
169 // Double-quoted identifier: "..."
170 if bytes[i] == b'"' {
171 skip[i] = true;
172 i += 1;
173 while i < len && bytes[i] != b'"' {
174 skip[i] = true;
175 i += 1;
176 }
177 if i < len {
178 skip[i] = true;
179 i += 1;
180 }
181 continue;
182 }
183
184 // Backtick identifier: `...`
185 if bytes[i] == b'`' {
186 skip[i] = true;
187 i += 1;
188 while i < len && bytes[i] != b'`' {
189 skip[i] = true;
190 i += 1;
191 }
192 if i < len {
193 skip[i] = true;
194 i += 1;
195 }
196 continue;
197 }
198
199 i += 1;
200 }
201
202 // Suppress unused-import lint — `is_word_char` is defined for potential
203 // future use (e.g. word-boundary guards around operators).
204 let _ = is_word_char;
205
206 skip
207}