Skip to main content

flowscope_core/linter/
document.rs

1//! Shared linter document model.
2//!
3//! A `LintDocument` is constructed once per SQL source and reused across all
4//! rule engines. It carries source text, dialect metadata, parsed statements,
5//! and tokenizer output with stable spans.
6
7use std::collections::{HashMap, HashSet};
8use std::ops::Range;
9
10use sqlparser::keywords::Keyword;
11use sqlparser::tokenizer::{Token, TokenWithSpan, Tokenizer, Whitespace};
12
13use crate::analyzer::helpers::line_col_to_offset;
14use crate::linter::config::canonicalize_rule_code;
15use crate::types::{Dialect, Span};
16
17/// A parsed statement entry within a lint document.
18pub struct LintStatement<'a> {
19    /// Parsed statement AST.
20    pub statement: &'a sqlparser::ast::Statement,
21    /// Zero-based statement index in the overall analysis batch.
22    pub statement_index: usize,
23    /// Byte range of the statement within the source SQL.
24    pub statement_range: Range<usize>,
25}
26
27/// Token class used by lexical/document lint engines.
28#[derive(Debug, Clone, Copy, PartialEq, Eq)]
29pub enum LintTokenKind {
30    Keyword,
31    Identifier,
32    Literal,
33    Operator,
34    Symbol,
35    Comment,
36    Whitespace,
37    Other,
38}
39
40/// A token emitted by the SQL tokenizer with stable source span.
41#[derive(Debug, Clone, PartialEq, Eq)]
42pub struct LintToken {
43    pub kind: LintTokenKind,
44    pub span: Span,
45    pub text: String,
46    pub statement_index: Option<usize>,
47}
48
49#[derive(Debug, Clone)]
50enum NoqaDirective {
51    All,
52    Rules(HashSet<String>),
53}
54
55#[derive(Debug, Clone, Copy, PartialEq, Eq)]
56struct NoqaDisableRange {
57    start_line: usize,
58    end_line: Option<usize>,
59}
60
61/// `-- noqa` suppression directives indexed by 1-based line number.
62#[derive(Debug, Clone, Default)]
63pub struct NoqaMap {
64    directives: HashMap<usize, NoqaDirective>,
65    disable_all_ranges: Vec<NoqaDisableRange>,
66}
67
68impl NoqaMap {
69    /// Returns true if `code` is suppressed on `line`.
70    pub fn is_suppressed(&self, line: usize, code: &str) -> bool {
71        if self.disable_all_ranges.iter().any(|range| {
72            line >= range.start_line
73                && range
74                    .end_line
75                    .map(|end_line| line <= end_line)
76                    .unwrap_or(true)
77        }) {
78            return true;
79        }
80
81        let Some(directive) = self.directives.get(&line) else {
82            return false;
83        };
84
85        match directive {
86            NoqaDirective::All => true,
87            NoqaDirective::Rules(rules) => {
88                let canonical = canonicalize_rule_code(code)
89                    .unwrap_or_else(|| code.trim().to_ascii_uppercase());
90                rules.contains(&canonical)
91            }
92        }
93    }
94
95    fn suppress_all(&mut self, line: usize) {
96        self.directives.insert(line, NoqaDirective::All);
97    }
98
99    fn suppress_rules(&mut self, line: usize, codes: HashSet<String>) {
100        match self.directives.get_mut(&line) {
101            Some(NoqaDirective::All) => {}
102            Some(NoqaDirective::Rules(existing)) => existing.extend(codes),
103            None => {
104                self.directives.insert(line, NoqaDirective::Rules(codes));
105            }
106        }
107    }
108
109    fn suppress_all_range(&mut self, start_line: usize, end_line: Option<usize>) {
110        self.disable_all_ranges.push(NoqaDisableRange {
111            start_line,
112            end_line,
113        });
114    }
115}
116
117/// Normalized lint input model for a single SQL source.
118pub struct LintDocument<'a> {
119    pub sql: &'a str,
120    pub source_sql: Option<&'a str>,
121    pub source_statement_ranges: Vec<Option<Range<usize>>>,
122    pub dialect: Dialect,
123    pub statements: Vec<LintStatement<'a>>,
124    pub tokens: Vec<LintToken>,
125    pub raw_tokens: Vec<TokenWithSpan>,
126    pub noqa: NoqaMap,
127    pub parser_fallback_used: bool,
128    pub tokenizer_fallback_used: bool,
129}
130
131impl<'a> LintDocument<'a> {
132    /// Build a lint document from source SQL and parsed statements.
133    #[must_use]
134    pub fn new(sql: &'a str, dialect: Dialect, statements: Vec<LintStatement<'a>>) -> Self {
135        Self::new_with_parser_fallback_and_source(sql, None, dialect, statements, false, None)
136    }
137
138    /// Build a lint document with parser fallback provenance metadata.
139    #[must_use]
140    pub fn new_with_parser_fallback(
141        sql: &'a str,
142        dialect: Dialect,
143        statements: Vec<LintStatement<'a>>,
144        parser_fallback_used: bool,
145    ) -> Self {
146        Self::new_with_parser_fallback_and_source(
147            sql,
148            None,
149            dialect,
150            statements,
151            parser_fallback_used,
152            None,
153        )
154    }
155
156    /// Build a lint document with parser fallback metadata and optional
157    /// untemplated source mapping.
158    #[must_use]
159    pub fn new_with_parser_fallback_and_source(
160        sql: &'a str,
161        source_sql: Option<&'a str>,
162        dialect: Dialect,
163        statements: Vec<LintStatement<'a>>,
164        parser_fallback_used: bool,
165        source_statement_ranges: Option<Vec<Option<Range<usize>>>>,
166    ) -> Self {
167        let (tokens, raw_tokens, tokenizer_fallback_used) =
168            match tokenize_sql(sql, dialect, &statements) {
169                Ok((tokens, raw_tokens)) => (tokens, raw_tokens, false),
170                Err(_) => (Vec::new(), Vec::new(), true),
171            };
172        let noqa = extract_noqa(sql, &tokens);
173
174        Self {
175            sql,
176            source_sql,
177            source_statement_ranges: source_statement_ranges
178                .unwrap_or_else(|| vec![None; statements.len()]),
179            dialect,
180            statements,
181            tokens,
182            raw_tokens,
183            noqa,
184            parser_fallback_used,
185            tokenizer_fallback_used,
186        }
187    }
188}
189
190fn extract_noqa(sql: &str, tokens: &[LintToken]) -> NoqaMap {
191    let mut directives = NoqaMap::default();
192    let mut disable_all_start: Option<usize> = None;
193
194    for token in tokens {
195        if token.kind != LintTokenKind::Comment {
196            continue;
197        }
198
199        let Some(parsed) = parse_noqa_comment(&token.text) else {
200            continue;
201        };
202
203        let start_line = offset_to_line(sql, token.span.start);
204        let end_offset = token.span.end.saturating_sub(1);
205        let end_line = offset_to_line(sql, end_offset);
206        match parsed {
207            ParsedNoqa::All => {
208                for line in start_line..=end_line {
209                    directives.suppress_all(line);
210                }
211            }
212            ParsedNoqa::Rules(rules) => {
213                for line in start_line..=end_line {
214                    directives.suppress_rules(line, rules.clone());
215                }
216            }
217            ParsedNoqa::DisableAll => {
218                if disable_all_start.is_none() {
219                    disable_all_start = Some(start_line);
220                }
221            }
222            ParsedNoqa::EnableAll => {
223                if let Some(start_line) = disable_all_start.take() {
224                    directives.suppress_all_range(start_line, Some(end_line));
225                }
226            }
227        }
228    }
229
230    if let Some(start_line) = disable_all_start {
231        directives.suppress_all_range(start_line, None);
232    }
233
234    directives
235}
236
237enum ParsedNoqa {
238    All,
239    Rules(HashSet<String>),
240    DisableAll,
241    EnableAll,
242}
243
244fn parse_noqa_comment(comment_text: &str) -> Option<ParsedNoqa> {
245    let body = comment_body(comment_text);
246    let lowered = body.to_ascii_lowercase();
247    let mut search_start = 0usize;
248    let mut marker_pos = None;
249
250    while let Some(rel) = lowered[search_start..].find("noqa") {
251        let absolute = search_start + rel;
252        let prefix = &body[..absolute];
253        if prefix.trim().is_empty() || prefix.trim_end().ends_with("--") {
254            marker_pos = Some(absolute);
255            break;
256        }
257        search_start = absolute + 4;
258    }
259
260    let marker_pos = marker_pos?;
261    let suffix = body[marker_pos + 4..].trim();
262
263    if suffix.is_empty() {
264        return Some(ParsedNoqa::All);
265    }
266
267    let Some(rule_list) = suffix.strip_prefix(':') else {
268        return Some(ParsedNoqa::All);
269    };
270    let rule_list = rule_list.trim();
271    if rule_list.is_empty() {
272        return Some(ParsedNoqa::All);
273    }
274
275    if rule_list.eq_ignore_ascii_case("disable=all") {
276        return Some(ParsedNoqa::DisableAll);
277    }
278    if rule_list.eq_ignore_ascii_case("enable=all") {
279        return Some(ParsedNoqa::EnableAll);
280    }
281
282    let mut rules = HashSet::new();
283    for item in rule_list.split(',') {
284        let token = item
285            .trim()
286            .trim_matches(|c: char| matches!(c, '"' | '\'' | '`' | ';'));
287        if token.is_empty() {
288            continue;
289        }
290        if let Some(code) = canonicalize_rule_code(token) {
291            rules.insert(code);
292        }
293    }
294
295    if rules.is_empty() {
296        return None;
297    }
298
299    Some(ParsedNoqa::Rules(rules))
300}
301
302fn comment_body(comment_text: &str) -> &str {
303    let trimmed = comment_text.trim();
304    if let Some(inner) = trimmed
305        .strip_prefix("/*")
306        .and_then(|text| text.strip_suffix("*/"))
307    {
308        return inner.trim();
309    }
310    if let Some(inner) = trimmed.strip_prefix("--") {
311        return inner.trim();
312    }
313    if let Some(inner) = trimmed.strip_prefix('#') {
314        return inner.trim();
315    }
316    trimmed
317}
318
319fn offset_to_line(sql: &str, offset: usize) -> usize {
320    1 + sql
321        .as_bytes()
322        .iter()
323        .take(offset.min(sql.len()))
324        .filter(|byte| **byte == b'\n')
325        .count()
326}
327
328fn tokenize_sql(
329    sql: &str,
330    dialect: Dialect,
331    statements: &[LintStatement<'_>],
332) -> Result<(Vec<LintToken>, Vec<TokenWithSpan>), String> {
333    let dialect = dialect.to_sqlparser_dialect();
334    let mut tokenizer = Tokenizer::new(dialect.as_ref(), sql);
335    let raw_tokens: Vec<TokenWithSpan> = tokenizer
336        .tokenize_with_location()
337        .map_err(|error| error.to_string())?;
338
339    let mut out = Vec::with_capacity(raw_tokens.len());
340
341    for token in &raw_tokens {
342        let Some(span) = token_span_to_offsets(sql, &token.span) else {
343            continue;
344        };
345
346        let statement_index = statements
347            .iter()
348            .find(|statement| {
349                span.start >= statement.statement_range.start
350                    && span.start < statement.statement_range.end
351            })
352            .map(|statement| statement.statement_index);
353
354        out.push(LintToken {
355            kind: classify_token(&token.token),
356            span,
357            text: token.token.to_string(),
358            statement_index,
359        });
360    }
361
362    Ok((out, raw_tokens))
363}
364
365fn token_span_to_offsets(sql: &str, span: &sqlparser::tokenizer::Span) -> Option<Span> {
366    let start = line_col_to_offset(sql, span.start.line as usize, span.start.column as usize)?;
367    let end = line_col_to_offset(sql, span.end.line as usize, span.end.column as usize)?;
368    Some(Span::new(start, end))
369}
370
371fn classify_token(token: &Token) -> LintTokenKind {
372    match token {
373        Token::Word(word) if word.keyword != Keyword::NoKeyword => LintTokenKind::Keyword,
374        Token::Word(_) => LintTokenKind::Identifier,
375        Token::Number(_, _)
376        | Token::SingleQuotedString(_)
377        | Token::DoubleQuotedString(_)
378        | Token::NationalStringLiteral(_)
379        | Token::EscapedStringLiteral(_)
380        | Token::HexStringLiteral(_) => LintTokenKind::Literal,
381        Token::Eq
382        | Token::Neq
383        | Token::Lt
384        | Token::Gt
385        | Token::LtEq
386        | Token::GtEq
387        | Token::Plus
388        | Token::Minus
389        | Token::Mul
390        | Token::Div
391        | Token::Mod
392        | Token::StringConcat => LintTokenKind::Operator,
393        Token::Comma
394        | Token::Period
395        | Token::LParen
396        | Token::RParen
397        | Token::SemiColon
398        | Token::LBracket
399        | Token::RBracket
400        | Token::LBrace
401        | Token::RBrace
402        | Token::Colon
403        | Token::DoubleColon
404        | Token::Assignment => LintTokenKind::Symbol,
405        Token::Whitespace(Whitespace::SingleLineComment { .. })
406        | Token::Whitespace(Whitespace::MultiLineComment(_)) => LintTokenKind::Comment,
407        Token::Whitespace(_) => LintTokenKind::Whitespace,
408        _ => LintTokenKind::Other,
409    }
410}
411
412#[cfg(test)]
413mod tests {
414    use super::*;
415    use crate::parser::parse_sql_with_dialect;
416
417    #[test]
418    fn builds_tokens_with_statement_mapping() {
419        let sql = "SELECT 1; SELECT 2";
420        let statements = parse_sql_with_dialect(sql, Dialect::Generic).expect("parse");
421
422        let lint_statements = statements
423            .iter()
424            .enumerate()
425            .map(|(index, statement)| LintStatement {
426                statement,
427                statement_index: index,
428                statement_range: if index == 0 { 0..8 } else { 9..17 },
429            })
430            .collect::<Vec<_>>();
431
432        let document = LintDocument::new(sql, Dialect::Generic, lint_statements);
433
434        assert!(!document.tokens.is_empty());
435        assert!(document
436            .tokens
437            .iter()
438            .any(|token| token.statement_index == Some(0)));
439        assert!(document
440            .tokens
441            .iter()
442            .any(|token| token.statement_index == Some(1)));
443    }
444
445    #[test]
446    fn records_parser_fallback_provenance() {
447        let sql = "SELECT 1";
448        let statements = parse_sql_with_dialect(sql, Dialect::Generic).expect("parse");
449        let lint_statements = statements
450            .iter()
451            .enumerate()
452            .map(|(index, statement)| LintStatement {
453                statement,
454                statement_index: index,
455                statement_range: 0..sql.len(),
456            })
457            .collect::<Vec<_>>();
458
459        let document =
460            LintDocument::new_with_parser_fallback(sql, Dialect::Generic, lint_statements, true);
461
462        assert!(document.parser_fallback_used);
463    }
464
465    #[test]
466    fn parses_noqa_directives() {
467        let sql = "SELECT a FROM foo -- noqa: AL01, ambiguous.join\nSELECT 1 -- noqa";
468        let document = LintDocument::new(sql, Dialect::Generic, Vec::new());
469
470        assert!(document.noqa.is_suppressed(1, "AL01"));
471        assert!(document.noqa.is_suppressed(1, "LINT_AM_005"));
472        assert!(!document.noqa.is_suppressed(1, "LINT_RF_001"));
473        assert!(document.noqa.is_suppressed(2, "LINT_RF_001"));
474    }
475
476    #[test]
477    fn parses_disable_enable_all_noqa_directives() {
478        let sql = "/* -- noqa: disable=all */\nSELECT 1\n/* noqa: enable=all */\nSELECT 2";
479        let document = LintDocument::new(sql, Dialect::Generic, Vec::new());
480
481        assert!(document.noqa.is_suppressed(2, "LINT_LT_005"));
482        assert!(!document.noqa.is_suppressed(4, "LINT_LT_005"));
483    }
484
485    #[test]
486    fn ignores_invalid_disable_all_without_double_dash_prefix() {
487        let sql = "/* This won't work: noqa: disable=all */\nSELECT 1";
488        let document = LintDocument::new(sql, Dialect::Generic, Vec::new());
489        assert!(!document.noqa.is_suppressed(2, "LINT_LT_005"));
490    }
491
492    #[test]
493    fn ignores_invalid_disable_all_with_trailing_text() {
494        let sql = "/* -- noqa: disable=all Invalid declaration */\nSELECT 1";
495        let document = LintDocument::new(sql, Dialect::Generic, Vec::new());
496        assert!(!document.noqa.is_suppressed(2, "LINT_LT_005"));
497    }
498}