use std::collections::{HashMap, HashSet};
use std::ops::Range;
use sqlparser::keywords::Keyword;
use sqlparser::tokenizer::{Token, TokenWithSpan, Tokenizer, Whitespace};
use crate::analyzer::helpers::line_col_to_offset;
use crate::linter::config::canonicalize_rule_code;
use crate::types::{Dialect, Span};
pub struct LintStatement<'a> {
pub statement: &'a sqlparser::ast::Statement,
pub statement_index: usize,
pub statement_range: Range<usize>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum LintTokenKind {
Keyword,
Identifier,
Literal,
Operator,
Symbol,
Comment,
Whitespace,
Other,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct LintToken {
pub kind: LintTokenKind,
pub span: Span,
pub text: String,
pub statement_index: Option<usize>,
}
#[derive(Debug, Clone)]
enum NoqaDirective {
All,
Rules(HashSet<String>),
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
struct NoqaDisableRange {
start_line: usize,
end_line: Option<usize>,
}
#[derive(Debug, Clone, Default)]
pub struct NoqaMap {
directives: HashMap<usize, NoqaDirective>,
disable_all_ranges: Vec<NoqaDisableRange>,
}
impl NoqaMap {
pub fn is_suppressed(&self, line: usize, code: &str) -> bool {
if self.disable_all_ranges.iter().any(|range| {
line >= range.start_line
&& range
.end_line
.map(|end_line| line <= end_line)
.unwrap_or(true)
}) {
return true;
}
let Some(directive) = self.directives.get(&line) else {
return false;
};
match directive {
NoqaDirective::All => true,
NoqaDirective::Rules(rules) => {
let canonical = canonicalize_rule_code(code)
.unwrap_or_else(|| code.trim().to_ascii_uppercase());
rules.contains(&canonical)
}
}
}
fn suppress_all(&mut self, line: usize) {
self.directives.insert(line, NoqaDirective::All);
}
fn suppress_rules(&mut self, line: usize, codes: HashSet<String>) {
match self.directives.get_mut(&line) {
Some(NoqaDirective::All) => {}
Some(NoqaDirective::Rules(existing)) => existing.extend(codes),
None => {
self.directives.insert(line, NoqaDirective::Rules(codes));
}
}
}
fn suppress_all_range(&mut self, start_line: usize, end_line: Option<usize>) {
self.disable_all_ranges.push(NoqaDisableRange {
start_line,
end_line,
});
}
}
pub struct LintDocument<'a> {
pub sql: &'a str,
pub source_sql: Option<&'a str>,
pub source_statement_ranges: Vec<Option<Range<usize>>>,
pub dialect: Dialect,
pub statements: Vec<LintStatement<'a>>,
pub tokens: Vec<LintToken>,
pub raw_tokens: Vec<TokenWithSpan>,
pub noqa: NoqaMap,
pub parser_fallback_used: bool,
pub tokenizer_fallback_used: bool,
}
impl<'a> LintDocument<'a> {
#[must_use]
pub fn new(sql: &'a str, dialect: Dialect, statements: Vec<LintStatement<'a>>) -> Self {
Self::new_with_parser_fallback_and_source(sql, None, dialect, statements, false, None)
}
#[must_use]
pub fn new_with_parser_fallback(
sql: &'a str,
dialect: Dialect,
statements: Vec<LintStatement<'a>>,
parser_fallback_used: bool,
) -> Self {
Self::new_with_parser_fallback_and_source(
sql,
None,
dialect,
statements,
parser_fallback_used,
None,
)
}
#[must_use]
pub fn new_with_parser_fallback_and_source(
sql: &'a str,
source_sql: Option<&'a str>,
dialect: Dialect,
statements: Vec<LintStatement<'a>>,
parser_fallback_used: bool,
source_statement_ranges: Option<Vec<Option<Range<usize>>>>,
) -> Self {
let (tokens, raw_tokens, tokenizer_fallback_used) =
match tokenize_sql(sql, dialect, &statements) {
Ok((tokens, raw_tokens)) => (tokens, raw_tokens, false),
Err(_) => (Vec::new(), Vec::new(), true),
};
let noqa = extract_noqa(sql, &tokens);
Self {
sql,
source_sql,
source_statement_ranges: source_statement_ranges
.unwrap_or_else(|| vec![None; statements.len()]),
dialect,
statements,
tokens,
raw_tokens,
noqa,
parser_fallback_used,
tokenizer_fallback_used,
}
}
}
fn extract_noqa(sql: &str, tokens: &[LintToken]) -> NoqaMap {
let mut directives = NoqaMap::default();
let mut disable_all_start: Option<usize> = None;
for token in tokens {
if token.kind != LintTokenKind::Comment {
continue;
}
let Some(parsed) = parse_noqa_comment(&token.text) else {
continue;
};
let start_line = offset_to_line(sql, token.span.start);
let end_offset = token.span.end.saturating_sub(1);
let end_line = offset_to_line(sql, end_offset);
match parsed {
ParsedNoqa::All => {
for line in start_line..=end_line {
directives.suppress_all(line);
}
}
ParsedNoqa::Rules(rules) => {
for line in start_line..=end_line {
directives.suppress_rules(line, rules.clone());
}
}
ParsedNoqa::DisableAll => {
if disable_all_start.is_none() {
disable_all_start = Some(start_line);
}
}
ParsedNoqa::EnableAll => {
if let Some(start_line) = disable_all_start.take() {
directives.suppress_all_range(start_line, Some(end_line));
}
}
}
}
if let Some(start_line) = disable_all_start {
directives.suppress_all_range(start_line, None);
}
directives
}
enum ParsedNoqa {
All,
Rules(HashSet<String>),
DisableAll,
EnableAll,
}
fn parse_noqa_comment(comment_text: &str) -> Option<ParsedNoqa> {
let body = comment_body(comment_text);
let lowered = body.to_ascii_lowercase();
let mut search_start = 0usize;
let mut marker_pos = None;
while let Some(rel) = lowered[search_start..].find("noqa") {
let absolute = search_start + rel;
let prefix = &body[..absolute];
if prefix.trim().is_empty() || prefix.trim_end().ends_with("--") {
marker_pos = Some(absolute);
break;
}
search_start = absolute + 4;
}
let marker_pos = marker_pos?;
let suffix = body[marker_pos + 4..].trim();
if suffix.is_empty() {
return Some(ParsedNoqa::All);
}
let Some(rule_list) = suffix.strip_prefix(':') else {
return Some(ParsedNoqa::All);
};
let rule_list = rule_list.trim();
if rule_list.is_empty() {
return Some(ParsedNoqa::All);
}
if rule_list.eq_ignore_ascii_case("disable=all") {
return Some(ParsedNoqa::DisableAll);
}
if rule_list.eq_ignore_ascii_case("enable=all") {
return Some(ParsedNoqa::EnableAll);
}
let mut rules = HashSet::new();
for item in rule_list.split(',') {
let token = item
.trim()
.trim_matches(|c: char| matches!(c, '"' | '\'' | '`' | ';'));
if token.is_empty() {
continue;
}
if let Some(code) = canonicalize_rule_code(token) {
rules.insert(code);
}
}
if rules.is_empty() {
return None;
}
Some(ParsedNoqa::Rules(rules))
}
fn comment_body(comment_text: &str) -> &str {
let trimmed = comment_text.trim();
if let Some(inner) = trimmed
.strip_prefix("/*")
.and_then(|text| text.strip_suffix("*/"))
{
return inner.trim();
}
if let Some(inner) = trimmed.strip_prefix("--") {
return inner.trim();
}
if let Some(inner) = trimmed.strip_prefix('#') {
return inner.trim();
}
trimmed
}
fn offset_to_line(sql: &str, offset: usize) -> usize {
1 + sql
.as_bytes()
.iter()
.take(offset.min(sql.len()))
.filter(|byte| **byte == b'\n')
.count()
}
fn tokenize_sql(
sql: &str,
dialect: Dialect,
statements: &[LintStatement<'_>],
) -> Result<(Vec<LintToken>, Vec<TokenWithSpan>), String> {
let dialect = dialect.to_sqlparser_dialect();
let mut tokenizer = Tokenizer::new(dialect.as_ref(), sql);
let raw_tokens: Vec<TokenWithSpan> = tokenizer
.tokenize_with_location()
.map_err(|error| error.to_string())?;
let mut out = Vec::with_capacity(raw_tokens.len());
for token in &raw_tokens {
let Some(span) = token_span_to_offsets(sql, &token.span) else {
continue;
};
let statement_index = statements
.iter()
.find(|statement| {
span.start >= statement.statement_range.start
&& span.start < statement.statement_range.end
})
.map(|statement| statement.statement_index);
out.push(LintToken {
kind: classify_token(&token.token),
span,
text: token.token.to_string(),
statement_index,
});
}
Ok((out, raw_tokens))
}
fn token_span_to_offsets(sql: &str, span: &sqlparser::tokenizer::Span) -> Option<Span> {
let start = line_col_to_offset(sql, span.start.line as usize, span.start.column as usize)?;
let end = line_col_to_offset(sql, span.end.line as usize, span.end.column as usize)?;
Some(Span::new(start, end))
}
fn classify_token(token: &Token) -> LintTokenKind {
match token {
Token::Word(word) if word.keyword != Keyword::NoKeyword => LintTokenKind::Keyword,
Token::Word(_) => LintTokenKind::Identifier,
Token::Number(_, _)
| Token::SingleQuotedString(_)
| Token::DoubleQuotedString(_)
| Token::NationalStringLiteral(_)
| Token::EscapedStringLiteral(_)
| Token::HexStringLiteral(_) => LintTokenKind::Literal,
Token::Eq
| Token::Neq
| Token::Lt
| Token::Gt
| Token::LtEq
| Token::GtEq
| Token::Plus
| Token::Minus
| Token::Mul
| Token::Div
| Token::Mod
| Token::StringConcat => LintTokenKind::Operator,
Token::Comma
| Token::Period
| Token::LParen
| Token::RParen
| Token::SemiColon
| Token::LBracket
| Token::RBracket
| Token::LBrace
| Token::RBrace
| Token::Colon
| Token::DoubleColon
| Token::Assignment => LintTokenKind::Symbol,
Token::Whitespace(Whitespace::SingleLineComment { .. })
| Token::Whitespace(Whitespace::MultiLineComment(_)) => LintTokenKind::Comment,
Token::Whitespace(_) => LintTokenKind::Whitespace,
_ => LintTokenKind::Other,
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::parser::parse_sql_with_dialect;
#[test]
fn builds_tokens_with_statement_mapping() {
let sql = "SELECT 1; SELECT 2";
let statements = parse_sql_with_dialect(sql, Dialect::Generic).expect("parse");
let lint_statements = statements
.iter()
.enumerate()
.map(|(index, statement)| LintStatement {
statement,
statement_index: index,
statement_range: if index == 0 { 0..8 } else { 9..17 },
})
.collect::<Vec<_>>();
let document = LintDocument::new(sql, Dialect::Generic, lint_statements);
assert!(!document.tokens.is_empty());
assert!(document
.tokens
.iter()
.any(|token| token.statement_index == Some(0)));
assert!(document
.tokens
.iter()
.any(|token| token.statement_index == Some(1)));
}
#[test]
fn records_parser_fallback_provenance() {
let sql = "SELECT 1";
let statements = parse_sql_with_dialect(sql, Dialect::Generic).expect("parse");
let lint_statements = statements
.iter()
.enumerate()
.map(|(index, statement)| LintStatement {
statement,
statement_index: index,
statement_range: 0..sql.len(),
})
.collect::<Vec<_>>();
let document =
LintDocument::new_with_parser_fallback(sql, Dialect::Generic, lint_statements, true);
assert!(document.parser_fallback_used);
}
#[test]
fn parses_noqa_directives() {
let sql = "SELECT a FROM foo -- noqa: AL01, ambiguous.join\nSELECT 1 -- noqa";
let document = LintDocument::new(sql, Dialect::Generic, Vec::new());
assert!(document.noqa.is_suppressed(1, "AL01"));
assert!(document.noqa.is_suppressed(1, "LINT_AM_005"));
assert!(!document.noqa.is_suppressed(1, "LINT_RF_001"));
assert!(document.noqa.is_suppressed(2, "LINT_RF_001"));
}
#[test]
fn parses_disable_enable_all_noqa_directives() {
let sql = "/* -- noqa: disable=all */\nSELECT 1\n/* noqa: enable=all */\nSELECT 2";
let document = LintDocument::new(sql, Dialect::Generic, Vec::new());
assert!(document.noqa.is_suppressed(2, "LINT_LT_005"));
assert!(!document.noqa.is_suppressed(4, "LINT_LT_005"));
}
#[test]
fn ignores_invalid_disable_all_without_double_dash_prefix() {
let sql = "/* This won't work: noqa: disable=all */\nSELECT 1";
let document = LintDocument::new(sql, Dialect::Generic, Vec::new());
assert!(!document.noqa.is_suppressed(2, "LINT_LT_005"));
}
#[test]
fn ignores_invalid_disable_all_with_trailing_text() {
let sql = "/* -- noqa: disable=all Invalid declaration */\nSELECT 1";
let document = LintDocument::new(sql, Dialect::Generic, Vec::new());
assert!(!document.noqa.is_suppressed(2, "LINT_LT_005"));
}
}