Skip to main content

sqrust_rules/lint/
create_sequence_statement.rs

1use sqrust_core::{Diagnostic, FileContext, Rule};
2use std::collections::HashSet;
3
4pub struct CreateSequenceStatement;
5
6impl Rule for CreateSequenceStatement {
7    fn name(&self) -> &'static str {
8        "Lint/CreateSequenceStatement"
9    }
10
11    fn check(&self, ctx: &FileContext) -> Vec<Diagnostic> {
12        let source = &ctx.source;
13        let skip = build_skip_set(source);
14        let lower = source.to_lowercase();
15        let bytes = lower.as_bytes();
16        let len = bytes.len();
17        let mut diags = Vec::new();
18        let mut i = 0;
19
20        while i < len {
21            // Skip positions inside strings or comments.
22            if skip.contains(&i) {
23                i += 1;
24                continue;
25            }
26
27            // Look for "create" keyword (case-insensitive via lower).
28            if !lower[i..].starts_with("create") {
29                i += 1;
30                continue;
31            }
32
33            let create_start = i;
34            let create_end = i + 6;
35
36            // Word-boundary check before "create".
37            let before_ok = create_start == 0 || {
38                let b = bytes[create_start - 1];
39                !b.is_ascii_alphanumeric() && b != b'_'
40            };
41
42            // Word-boundary check after "create".
43            let after_ok = create_end >= len || {
44                let b = bytes[create_end];
45                !b.is_ascii_alphanumeric() && b != b'_'
46            };
47
48            if !before_ok || !after_ok {
49                i += 1;
50                continue;
51            }
52
53            // Skip whitespace to find next keyword.
54            let mut j = create_end;
55            while j < len
56                && (bytes[j] == b' '
57                    || bytes[j] == b'\t'
58                    || bytes[j] == b'\r'
59                    || bytes[j] == b'\n')
60                && !skip.contains(&j)
61            {
62                j += 1;
63            }
64
65            // Skip over optional "OR REPLACE" (word boundaries checked loosely — just skip if present).
66            // Not needed for sequences, but skip "IF NOT EXISTS" tokens or just check for "sequence".
67            // Directly check for the "sequence" keyword (allowing "IF NOT EXISTS" between CREATE and SEQUENCE
68            // by scanning forward for "sequence" with word boundaries before the next semicolon or EOF).
69            let stmt_end = find_stmt_end(&lower, &skip, j);
70            let stmt_slice = &lower[j..stmt_end];
71
72            if contains_word_boundary_keyword(stmt_slice, "sequence") {
73                let (line, col) = offset_to_line_col(source, create_start);
74                diags.push(Diagnostic {
75                    rule: self.name(),
76                    message: "CREATE SEQUENCE is not universally supported; MySQL uses AUTO_INCREMENT, SQLite uses AUTOINCREMENT — check dialect compatibility".to_string(),
77                    line,
78                    col,
79                });
80
81                // Advance past this statement.
82                i = stmt_end + 1;
83                continue;
84            }
85
86            i += 1;
87        }
88
89        diags
90    }
91}
92
93/// Finds the byte offset of the next `;` (outside skip) starting from `from`,
94/// or the length of the source if no `;` is found.
95fn find_stmt_end(lower: &str, skip: &HashSet<usize>, from: usize) -> usize {
96    let bytes = lower.as_bytes();
97    let len = bytes.len();
98    let mut k = from;
99    while k < len {
100        if !skip.contains(&k) && bytes[k] == b';' {
101            return k;
102        }
103        k += 1;
104    }
105    len
106}
107
108/// Returns `true` if `keyword` (already lowercased) appears with word boundaries in `text`.
109fn contains_word_boundary_keyword(text: &str, keyword: &str) -> bool {
110    let kw_len = keyword.len();
111    let bytes = text.as_bytes();
112    let text_len = bytes.len();
113    let mut search_from = 0;
114
115    while search_from < text_len {
116        let Some(rel) = text[search_from..].find(keyword) else {
117            break;
118        };
119        let abs = search_from + rel;
120
121        let before_ok = abs == 0 || {
122            let b = bytes[abs - 1];
123            !b.is_ascii_alphanumeric() && b != b'_'
124        };
125        let after = abs + kw_len;
126        let after_ok = after >= text_len || {
127            let b = bytes[after];
128            !b.is_ascii_alphanumeric() && b != b'_'
129        };
130
131        if before_ok && after_ok {
132            return true;
133        }
134        search_from = abs + 1;
135    }
136
137    false
138}
139
140/// Converts a byte offset in `source` to a 1-indexed (line, col) pair.
141fn offset_to_line_col(source: &str, offset: usize) -> (usize, usize) {
142    let before = &source[..offset];
143    let line = before.chars().filter(|&c| c == '\n').count() + 1;
144    let col = before.rfind('\n').map(|p| offset - p - 1).unwrap_or(offset) + 1;
145    (line, col)
146}
147
148/// Builds a skip set: byte offsets inside string literals, line comments, or block comments.
149fn build_skip_set(source: &str) -> HashSet<usize> {
150    let mut skip = HashSet::new();
151    let bytes = source.as_bytes();
152    let len = bytes.len();
153    let mut i = 0;
154
155    while i < len {
156        // Line comment: -- ... end-of-line
157        if i + 1 < len && bytes[i] == b'-' && bytes[i + 1] == b'-' {
158            skip.insert(i);
159            skip.insert(i + 1);
160            i += 2;
161            while i < len && bytes[i] != b'\n' {
162                skip.insert(i);
163                i += 1;
164            }
165            continue;
166        }
167
168        // Block comment: /* ... */
169        if i + 1 < len && bytes[i] == b'/' && bytes[i + 1] == b'*' {
170            skip.insert(i);
171            skip.insert(i + 1);
172            i += 2;
173            while i < len {
174                if i + 1 < len && bytes[i] == b'*' && bytes[i + 1] == b'/' {
175                    skip.insert(i);
176                    skip.insert(i + 1);
177                    i += 2;
178                    break;
179                }
180                skip.insert(i);
181                i += 1;
182            }
183            continue;
184        }
185
186        // Single-quoted string: '...' with '' as escaped quote
187        if bytes[i] == b'\'' {
188            skip.insert(i);
189            i += 1;
190            while i < len {
191                if bytes[i] == b'\'' {
192                    skip.insert(i);
193                    i += 1;
194                    if i < len && bytes[i] == b'\'' {
195                        skip.insert(i);
196                        i += 1;
197                        continue;
198                    }
199                    break;
200                }
201                skip.insert(i);
202                i += 1;
203            }
204            continue;
205        }
206
207        // Double-quoted identifier: "..."
208        if bytes[i] == b'"' {
209            skip.insert(i);
210            i += 1;
211            while i < len && bytes[i] != b'"' {
212                skip.insert(i);
213                i += 1;
214            }
215            if i < len {
216                skip.insert(i);
217                i += 1;
218            }
219            continue;
220        }
221
222        // Backtick identifier: `...`
223        if bytes[i] == b'`' {
224            skip.insert(i);
225            i += 1;
226            while i < len && bytes[i] != b'`' {
227                skip.insert(i);
228                i += 1;
229            }
230            if i < len {
231                skip.insert(i);
232                i += 1;
233            }
234            continue;
235        }
236
237        i += 1;
238    }
239
240    skip
241}