1use std::collections::{HashMap, HashSet};
8use std::ops::Range;
9
10use sqlparser::keywords::Keyword;
11use sqlparser::tokenizer::{Token, TokenWithSpan, Tokenizer, Whitespace};
12
13use crate::analyzer::helpers::line_col_to_offset;
14use crate::linter::config::canonicalize_rule_code;
15use crate::types::{Dialect, Span};
16
17pub struct LintStatement<'a> {
19 pub statement: &'a sqlparser::ast::Statement,
21 pub statement_index: usize,
23 pub statement_range: Range<usize>,
25}
26
27#[derive(Debug, Clone, Copy, PartialEq, Eq)]
29pub enum LintTokenKind {
30 Keyword,
31 Identifier,
32 Literal,
33 Operator,
34 Symbol,
35 Comment,
36 Whitespace,
37 Other,
38}
39
40#[derive(Debug, Clone, PartialEq, Eq)]
42pub struct LintToken {
43 pub kind: LintTokenKind,
44 pub span: Span,
45 pub text: String,
46 pub statement_index: Option<usize>,
47}
48
49#[derive(Debug, Clone)]
50enum NoqaDirective {
51 All,
52 Rules(HashSet<String>),
53}
54
55#[derive(Debug, Clone, Copy, PartialEq, Eq)]
56struct NoqaDisableRange {
57 start_line: usize,
58 end_line: Option<usize>,
59}
60
61#[derive(Debug, Clone, Default)]
63pub struct NoqaMap {
64 directives: HashMap<usize, NoqaDirective>,
65 disable_all_ranges: Vec<NoqaDisableRange>,
66}
67
68impl NoqaMap {
69 pub fn is_suppressed(&self, line: usize, code: &str) -> bool {
71 if self.disable_all_ranges.iter().any(|range| {
72 line >= range.start_line
73 && range
74 .end_line
75 .map(|end_line| line <= end_line)
76 .unwrap_or(true)
77 }) {
78 return true;
79 }
80
81 let Some(directive) = self.directives.get(&line) else {
82 return false;
83 };
84
85 match directive {
86 NoqaDirective::All => true,
87 NoqaDirective::Rules(rules) => {
88 let canonical = canonicalize_rule_code(code)
89 .unwrap_or_else(|| code.trim().to_ascii_uppercase());
90 rules.contains(&canonical)
91 }
92 }
93 }
94
95 fn suppress_all(&mut self, line: usize) {
96 self.directives.insert(line, NoqaDirective::All);
97 }
98
99 fn suppress_rules(&mut self, line: usize, codes: HashSet<String>) {
100 match self.directives.get_mut(&line) {
101 Some(NoqaDirective::All) => {}
102 Some(NoqaDirective::Rules(existing)) => existing.extend(codes),
103 None => {
104 self.directives.insert(line, NoqaDirective::Rules(codes));
105 }
106 }
107 }
108
109 fn suppress_all_range(&mut self, start_line: usize, end_line: Option<usize>) {
110 self.disable_all_ranges.push(NoqaDisableRange {
111 start_line,
112 end_line,
113 });
114 }
115}
116
117pub struct LintDocument<'a> {
119 pub sql: &'a str,
120 pub source_sql: Option<&'a str>,
121 pub source_statement_ranges: Vec<Option<Range<usize>>>,
122 pub dialect: Dialect,
123 pub statements: Vec<LintStatement<'a>>,
124 pub tokens: Vec<LintToken>,
125 pub raw_tokens: Vec<TokenWithSpan>,
126 pub noqa: NoqaMap,
127 pub parser_fallback_used: bool,
128 pub tokenizer_fallback_used: bool,
129}
130
131impl<'a> LintDocument<'a> {
132 #[must_use]
134 pub fn new(sql: &'a str, dialect: Dialect, statements: Vec<LintStatement<'a>>) -> Self {
135 Self::new_with_parser_fallback_and_source(sql, None, dialect, statements, false, None)
136 }
137
138 #[must_use]
140 pub fn new_with_parser_fallback(
141 sql: &'a str,
142 dialect: Dialect,
143 statements: Vec<LintStatement<'a>>,
144 parser_fallback_used: bool,
145 ) -> Self {
146 Self::new_with_parser_fallback_and_source(
147 sql,
148 None,
149 dialect,
150 statements,
151 parser_fallback_used,
152 None,
153 )
154 }
155
156 #[must_use]
159 pub fn new_with_parser_fallback_and_source(
160 sql: &'a str,
161 source_sql: Option<&'a str>,
162 dialect: Dialect,
163 statements: Vec<LintStatement<'a>>,
164 parser_fallback_used: bool,
165 source_statement_ranges: Option<Vec<Option<Range<usize>>>>,
166 ) -> Self {
167 let (tokens, raw_tokens, tokenizer_fallback_used) =
168 match tokenize_sql(sql, dialect, &statements) {
169 Ok((tokens, raw_tokens)) => (tokens, raw_tokens, false),
170 Err(_) => (Vec::new(), Vec::new(), true),
171 };
172 let noqa = extract_noqa(sql, &tokens);
173
174 Self {
175 sql,
176 source_sql,
177 source_statement_ranges: source_statement_ranges
178 .unwrap_or_else(|| vec![None; statements.len()]),
179 dialect,
180 statements,
181 tokens,
182 raw_tokens,
183 noqa,
184 parser_fallback_used,
185 tokenizer_fallback_used,
186 }
187 }
188}
189
190fn extract_noqa(sql: &str, tokens: &[LintToken]) -> NoqaMap {
191 let mut directives = NoqaMap::default();
192 let mut disable_all_start: Option<usize> = None;
193
194 for token in tokens {
195 if token.kind != LintTokenKind::Comment {
196 continue;
197 }
198
199 let Some(parsed) = parse_noqa_comment(&token.text) else {
200 continue;
201 };
202
203 let start_line = offset_to_line(sql, token.span.start);
204 let end_offset = token.span.end.saturating_sub(1);
205 let end_line = offset_to_line(sql, end_offset);
206 match parsed {
207 ParsedNoqa::All => {
208 for line in start_line..=end_line {
209 directives.suppress_all(line);
210 }
211 }
212 ParsedNoqa::Rules(rules) => {
213 for line in start_line..=end_line {
214 directives.suppress_rules(line, rules.clone());
215 }
216 }
217 ParsedNoqa::DisableAll => {
218 if disable_all_start.is_none() {
219 disable_all_start = Some(start_line);
220 }
221 }
222 ParsedNoqa::EnableAll => {
223 if let Some(start_line) = disable_all_start.take() {
224 directives.suppress_all_range(start_line, Some(end_line));
225 }
226 }
227 }
228 }
229
230 if let Some(start_line) = disable_all_start {
231 directives.suppress_all_range(start_line, None);
232 }
233
234 directives
235}
236
237enum ParsedNoqa {
238 All,
239 Rules(HashSet<String>),
240 DisableAll,
241 EnableAll,
242}
243
244fn parse_noqa_comment(comment_text: &str) -> Option<ParsedNoqa> {
245 let body = comment_body(comment_text);
246 let lowered = body.to_ascii_lowercase();
247 let mut search_start = 0usize;
248 let mut marker_pos = None;
249
250 while let Some(rel) = lowered[search_start..].find("noqa") {
251 let absolute = search_start + rel;
252 let prefix = &body[..absolute];
253 if prefix.trim().is_empty() || prefix.trim_end().ends_with("--") {
254 marker_pos = Some(absolute);
255 break;
256 }
257 search_start = absolute + 4;
258 }
259
260 let marker_pos = marker_pos?;
261 let suffix = body[marker_pos + 4..].trim();
262
263 if suffix.is_empty() {
264 return Some(ParsedNoqa::All);
265 }
266
267 let Some(rule_list) = suffix.strip_prefix(':') else {
268 return Some(ParsedNoqa::All);
269 };
270 let rule_list = rule_list.trim();
271 if rule_list.is_empty() {
272 return Some(ParsedNoqa::All);
273 }
274
275 if rule_list.eq_ignore_ascii_case("disable=all") {
276 return Some(ParsedNoqa::DisableAll);
277 }
278 if rule_list.eq_ignore_ascii_case("enable=all") {
279 return Some(ParsedNoqa::EnableAll);
280 }
281
282 let mut rules = HashSet::new();
283 for item in rule_list.split(',') {
284 let token = item
285 .trim()
286 .trim_matches(|c: char| matches!(c, '"' | '\'' | '`' | ';'));
287 if token.is_empty() {
288 continue;
289 }
290 if let Some(code) = canonicalize_rule_code(token) {
291 rules.insert(code);
292 }
293 }
294
295 if rules.is_empty() {
296 return None;
297 }
298
299 Some(ParsedNoqa::Rules(rules))
300}
301
302fn comment_body(comment_text: &str) -> &str {
303 let trimmed = comment_text.trim();
304 if let Some(inner) = trimmed
305 .strip_prefix("/*")
306 .and_then(|text| text.strip_suffix("*/"))
307 {
308 return inner.trim();
309 }
310 if let Some(inner) = trimmed.strip_prefix("--") {
311 return inner.trim();
312 }
313 if let Some(inner) = trimmed.strip_prefix('#') {
314 return inner.trim();
315 }
316 trimmed
317}
318
319fn offset_to_line(sql: &str, offset: usize) -> usize {
320 1 + sql
321 .as_bytes()
322 .iter()
323 .take(offset.min(sql.len()))
324 .filter(|byte| **byte == b'\n')
325 .count()
326}
327
328fn tokenize_sql(
329 sql: &str,
330 dialect: Dialect,
331 statements: &[LintStatement<'_>],
332) -> Result<(Vec<LintToken>, Vec<TokenWithSpan>), String> {
333 let dialect = dialect.to_sqlparser_dialect();
334 let mut tokenizer = Tokenizer::new(dialect.as_ref(), sql);
335 let raw_tokens: Vec<TokenWithSpan> = tokenizer
336 .tokenize_with_location()
337 .map_err(|error| error.to_string())?;
338
339 let mut out = Vec::with_capacity(raw_tokens.len());
340
341 for token in &raw_tokens {
342 let Some(span) = token_span_to_offsets(sql, &token.span) else {
343 continue;
344 };
345
346 let statement_index = statements
347 .iter()
348 .find(|statement| {
349 span.start >= statement.statement_range.start
350 && span.start < statement.statement_range.end
351 })
352 .map(|statement| statement.statement_index);
353
354 out.push(LintToken {
355 kind: classify_token(&token.token),
356 span,
357 text: token.token.to_string(),
358 statement_index,
359 });
360 }
361
362 Ok((out, raw_tokens))
363}
364
365fn token_span_to_offsets(sql: &str, span: &sqlparser::tokenizer::Span) -> Option<Span> {
366 let start = line_col_to_offset(sql, span.start.line as usize, span.start.column as usize)?;
367 let end = line_col_to_offset(sql, span.end.line as usize, span.end.column as usize)?;
368 Some(Span::new(start, end))
369}
370
371fn classify_token(token: &Token) -> LintTokenKind {
372 match token {
373 Token::Word(word) if word.keyword != Keyword::NoKeyword => LintTokenKind::Keyword,
374 Token::Word(_) => LintTokenKind::Identifier,
375 Token::Number(_, _)
376 | Token::SingleQuotedString(_)
377 | Token::DoubleQuotedString(_)
378 | Token::NationalStringLiteral(_)
379 | Token::EscapedStringLiteral(_)
380 | Token::HexStringLiteral(_) => LintTokenKind::Literal,
381 Token::Eq
382 | Token::Neq
383 | Token::Lt
384 | Token::Gt
385 | Token::LtEq
386 | Token::GtEq
387 | Token::Plus
388 | Token::Minus
389 | Token::Mul
390 | Token::Div
391 | Token::Mod
392 | Token::StringConcat => LintTokenKind::Operator,
393 Token::Comma
394 | Token::Period
395 | Token::LParen
396 | Token::RParen
397 | Token::SemiColon
398 | Token::LBracket
399 | Token::RBracket
400 | Token::LBrace
401 | Token::RBrace
402 | Token::Colon
403 | Token::DoubleColon
404 | Token::Assignment => LintTokenKind::Symbol,
405 Token::Whitespace(Whitespace::SingleLineComment { .. })
406 | Token::Whitespace(Whitespace::MultiLineComment(_)) => LintTokenKind::Comment,
407 Token::Whitespace(_) => LintTokenKind::Whitespace,
408 _ => LintTokenKind::Other,
409 }
410}
411
412#[cfg(test)]
413mod tests {
414 use super::*;
415 use crate::parser::parse_sql_with_dialect;
416
417 #[test]
418 fn builds_tokens_with_statement_mapping() {
419 let sql = "SELECT 1; SELECT 2";
420 let statements = parse_sql_with_dialect(sql, Dialect::Generic).expect("parse");
421
422 let lint_statements = statements
423 .iter()
424 .enumerate()
425 .map(|(index, statement)| LintStatement {
426 statement,
427 statement_index: index,
428 statement_range: if index == 0 { 0..8 } else { 9..17 },
429 })
430 .collect::<Vec<_>>();
431
432 let document = LintDocument::new(sql, Dialect::Generic, lint_statements);
433
434 assert!(!document.tokens.is_empty());
435 assert!(document
436 .tokens
437 .iter()
438 .any(|token| token.statement_index == Some(0)));
439 assert!(document
440 .tokens
441 .iter()
442 .any(|token| token.statement_index == Some(1)));
443 }
444
445 #[test]
446 fn records_parser_fallback_provenance() {
447 let sql = "SELECT 1";
448 let statements = parse_sql_with_dialect(sql, Dialect::Generic).expect("parse");
449 let lint_statements = statements
450 .iter()
451 .enumerate()
452 .map(|(index, statement)| LintStatement {
453 statement,
454 statement_index: index,
455 statement_range: 0..sql.len(),
456 })
457 .collect::<Vec<_>>();
458
459 let document =
460 LintDocument::new_with_parser_fallback(sql, Dialect::Generic, lint_statements, true);
461
462 assert!(document.parser_fallback_used);
463 }
464
465 #[test]
466 fn parses_noqa_directives() {
467 let sql = "SELECT a FROM foo -- noqa: AL01, ambiguous.join\nSELECT 1 -- noqa";
468 let document = LintDocument::new(sql, Dialect::Generic, Vec::new());
469
470 assert!(document.noqa.is_suppressed(1, "AL01"));
471 assert!(document.noqa.is_suppressed(1, "LINT_AM_005"));
472 assert!(!document.noqa.is_suppressed(1, "LINT_RF_001"));
473 assert!(document.noqa.is_suppressed(2, "LINT_RF_001"));
474 }
475
476 #[test]
477 fn parses_disable_enable_all_noqa_directives() {
478 let sql = "/* -- noqa: disable=all */\nSELECT 1\n/* noqa: enable=all */\nSELECT 2";
479 let document = LintDocument::new(sql, Dialect::Generic, Vec::new());
480
481 assert!(document.noqa.is_suppressed(2, "LINT_LT_005"));
482 assert!(!document.noqa.is_suppressed(4, "LINT_LT_005"));
483 }
484
485 #[test]
486 fn ignores_invalid_disable_all_without_double_dash_prefix() {
487 let sql = "/* This won't work: noqa: disable=all */\nSELECT 1";
488 let document = LintDocument::new(sql, Dialect::Generic, Vec::new());
489 assert!(!document.noqa.is_suppressed(2, "LINT_LT_005"));
490 }
491
492 #[test]
493 fn ignores_invalid_disable_all_with_trailing_text() {
494 let sql = "/* -- noqa: disable=all Invalid declaration */\nSELECT 1";
495 let document = LintDocument::new(sql, Dialect::Generic, Vec::new());
496 assert!(!document.noqa.is_suppressed(2, "LINT_LT_005"));
497 }
498}