Skip to main content

flowscope_core/linter/
mod.rs

1//! SQL linter module.
2//!
3//! Provides a modular linting system split into semantic, lexical, and document
4//! engines. Semantic checks are AST-driven, while lexical/document checks can
5//! use tokenizer-level context.
6
7pub mod config;
8pub mod document;
9pub mod helpers;
10pub mod rule;
11pub mod rules;
12pub(crate) mod visit;
13
14use config::LintConfig;
15use document::{LintDocument, LintStatement};
16use rule::{
17    with_active_dialect, with_active_document_tokens, with_active_is_templated, LintContext,
18    LintRule,
19};
20use sqlparser::ast::Statement;
21use std::borrow::Cow;
22
23use crate::{
24    parser::parse_sql,
25    types::{Issue, LintConfidence, LintEngine, LintFallbackSource, Severity},
26    Dialect,
27};
28
29/// The SQL linter, holding a set of rules and configuration.
30pub struct Linter {
31    rules: Vec<Box<dyn LintRule>>,
32    config: LintConfig,
33}
34
35impl Linter {
36    /// Creates a new linter with the given configuration.
37    pub fn new(config: LintConfig) -> Self {
38        Self {
39            rules: rules::all_rules(&config),
40            config,
41        }
42    }
43
44    /// Returns true if linting is enabled.
45    pub fn is_enabled(&self) -> bool {
46        self.config.enabled
47    }
48
49    /// Checks a full lint document across semantic, lexical, and document engines.
50    pub fn check_document(&self, document: &LintDocument<'_>) -> Vec<Issue> {
51        if !self.config.enabled {
52            return Vec::new();
53        }
54
55        let is_templated = document.source_sql.is_some();
56        with_active_is_templated(is_templated, || {
57            with_active_document_tokens(&document.raw_tokens, || {
58                let mut issues = Vec::new();
59
60                for engine in [
61                    LintEngine::Semantic,
62                    LintEngine::Lexical,
63                    LintEngine::Document,
64                ] {
65                    for rule in &self.rules {
66                        if !self.config.is_rule_enabled(rule.code())
67                            || rule_engine(rule.code()) != engine
68                            || !rule_supported_in_dialect(rule.code(), document.dialect)
69                        {
70                            continue;
71                        }
72
73                        let (confidence, fallback) =
74                            lint_quality_for_rule(rule.code(), engine, document);
75
76                        if rule_uses_document_scope(rule.code()) {
77                            let Some(synthetic_statement) = parse_sql("SELECT 1")
78                                .ok()
79                                .and_then(|mut statements| statements.drain(..).next())
80                            else {
81                                continue;
82                            };
83
84                            let document_scope_sql =
85                                document_scope_sql_for_rule(&self.config, rule.code(), document);
86                            let ctx = LintContext {
87                                sql: document_scope_sql.as_ref(),
88                                statement_range: 0..document_scope_sql.len(),
89                                statement_index: 0,
90                            };
91
92                            with_active_dialect(document.dialect, || {
93                                for issue in rule.check(&synthetic_statement, &ctx) {
94                                    let mut issue = issue
95                                        .with_lint_engine(engine)
96                                        .with_lint_confidence(confidence);
97
98                                    if let Some(source) = fallback {
99                                        issue = issue.with_lint_fallback_source(source);
100                                    }
101
102                                    let sqlfluff_name = rule.sqlfluff_name();
103                                    if !sqlfluff_name.is_empty() {
104                                        issue = issue.with_sqlfluff_name(sqlfluff_name);
105                                    }
106
107                                    issues.push(issue);
108                                }
109                            });
110                            continue;
111                        }
112
113                        if document.statements.is_empty() {
114                            if !rule_supports_statementless_fallback(rule.code()) {
115                                continue;
116                            }
117
118                            let Some(synthetic_statement) = parse_sql("SELECT 1")
119                                .ok()
120                                .and_then(|mut statements| statements.drain(..).next())
121                            else {
122                                continue;
123                            };
124
125                            let ctx = LintContext {
126                                sql: document.sql,
127                                statement_range: 0..document.sql.len(),
128                                statement_index: 0,
129                            };
130
131                            with_active_dialect(document.dialect, || {
132                                for issue in rule.check(&synthetic_statement, &ctx) {
133                                    let mut issue = issue
134                                        .with_lint_engine(engine)
135                                        .with_lint_confidence(confidence);
136
137                                    if let Some(source) = fallback {
138                                        issue = issue.with_lint_fallback_source(source);
139                                    }
140
141                                    let sqlfluff_name = rule.sqlfluff_name();
142                                    if !sqlfluff_name.is_empty() {
143                                        issue = issue.with_sqlfluff_name(sqlfluff_name);
144                                    }
145
146                                    issues.push(issue);
147                                }
148                            });
149                            continue;
150                        }
151
152                        for statement in &document.statements {
153                            let (ctx_sql, ctx_statement_range) = if matches!(
154                                rule.code(),
155                                crate::types::issue_codes::LINT_LT_002
156                                    | crate::types::issue_codes::LINT_LT_005
157                                    | crate::types::issue_codes::LINT_LT_004
158                                    | crate::types::issue_codes::LINT_LT_007
159                                    | crate::types::issue_codes::LINT_LT_012
160                                    | crate::types::issue_codes::LINT_LT_013
161                                    | crate::types::issue_codes::LINT_CV_009
162                                    | crate::types::issue_codes::LINT_CV_010
163                                    | crate::types::issue_codes::LINT_ST_004
164                            ) {
165                                if matches!(
166                                    rule.code(),
167                                    crate::types::issue_codes::LINT_LT_012
168                                        | crate::types::issue_codes::LINT_LT_013
169                                ) {
170                                    if let Some(source_sql) = document.source_sql {
171                                        (source_sql, 0..source_sql.len())
172                                    } else {
173                                        (document.sql, statement.statement_range.clone())
174                                    }
175                                } else {
176                                    match (
177                                        document.source_sql,
178                                        document
179                                            .source_statement_ranges
180                                            .get(statement.statement_index)
181                                            .and_then(|range| range.clone()),
182                                    ) {
183                                        (Some(source_sql), Some(source_statement_range)) => {
184                                            (source_sql, source_statement_range)
185                                        }
186                                        _ => (document.sql, statement.statement_range.clone()),
187                                    }
188                                }
189                            } else if rule.code() == crate::types::issue_codes::LINT_LT_001 {
190                                // LT01 needs trailing whitespace visible so it can
191                                // detect and fix trailing spaces/tabs on lines.
192                                // The normal statement range trims whitespace, so
193                                // extend it to include trailing whitespace up to
194                                // the next newline (inclusive).
195                                let lt01_ignore_templated = self
196                                    .config
197                                    .core_option_bool("ignore_templated_areas")
198                                    .unwrap_or(true);
199                                match (
200                                    document.source_sql,
201                                    document
202                                        .source_statement_ranges
203                                        .get(statement.statement_index)
204                                        .and_then(|range| range.clone()),
205                                ) {
206                                    (Some(source_sql), Some(source_statement_range))
207                                        if lt01_ignore_templated =>
208                                    {
209                                        let range = extend_range_with_trailing_whitespace(
210                                            source_sql,
211                                            &source_statement_range,
212                                            next_source_statement_start(
213                                                &document.source_statement_ranges,
214                                                statement.statement_index,
215                                            ),
216                                        );
217                                        (source_sql, range)
218                                    }
219                                    _ => {
220                                        let range = extend_range_with_trailing_whitespace(
221                                            document.sql,
222                                            &statement.statement_range,
223                                            next_statement_start(
224                                                &document.statements,
225                                                statement.statement_index,
226                                            ),
227                                        );
228                                        (document.sql, range)
229                                    }
230                                }
231                            } else {
232                                (document.sql, statement.statement_range.clone())
233                            };
234
235                            let ctx = LintContext {
236                                sql: ctx_sql,
237                                statement_range: ctx_statement_range,
238                                statement_index: statement.statement_index,
239                            };
240
241                            with_active_dialect(document.dialect, || {
242                                for issue in rule.check(statement.statement, &ctx) {
243                                    let mut issue = issue
244                                        .with_lint_engine(engine)
245                                        .with_lint_confidence(confidence);
246
247                                    if let Some(source) = fallback {
248                                        issue = issue.with_lint_fallback_source(source);
249                                    }
250
251                                    let sqlfluff_name = rule.sqlfluff_name();
252                                    if !sqlfluff_name.is_empty() {
253                                        issue = issue.with_sqlfluff_name(sqlfluff_name);
254                                    }
255
256                                    issues.push(issue);
257                                }
258                            });
259                        }
260                    }
261                }
262
263                let issues = suppress_noqa_issues(issues, document);
264                normalize_issues(issues)
265            })
266        })
267    }
268
269    /// Checks a single statement against all enabled lint rules.
270    ///
271    /// This adapter is kept for tests and rule-level helpers. Production paths
272    /// should prefer `check_document()`.
273    pub fn check_statement(&self, stmt: &Statement, ctx: &LintContext) -> Vec<Issue> {
274        let document = LintDocument::new(
275            ctx.sql,
276            crate::Dialect::Generic,
277            vec![LintStatement {
278                statement: stmt,
279                statement_index: ctx.statement_index,
280                statement_range: ctx.statement_range.clone(),
281            }],
282        );
283        self.check_document(&document)
284    }
285}
286
287/// Extends a statement range to include trailing whitespace (spaces, tabs) and
288/// the terminating newline. This is used by LT01 so it can detect and fix
289/// trailing whitespace that `trim_statement_range` normally strips.
290fn extend_range_with_trailing_whitespace(
291    sql: &str,
292    range: &std::ops::Range<usize>,
293    next_start: Option<usize>,
294) -> std::ops::Range<usize> {
295    let bytes = sql.as_bytes();
296    let limit = next_start.unwrap_or(sql.len());
297    let mut end = range.end;
298    while end < limit {
299        match bytes[end] {
300            b' ' | b'\t' => end += 1,
301            b'\n' => {
302                end += 1;
303                break;
304            }
305            b'\r' => {
306                end += 1;
307                if end < limit && bytes[end] == b'\n' {
308                    end += 1;
309                }
310                break;
311            }
312            _ => break,
313        }
314    }
315    range.start..end
316}
317
318/// Returns the start byte of the next statement's range, if any.
319fn next_statement_start(statements: &[LintStatement], current_index: usize) -> Option<usize> {
320    statements
321        .iter()
322        .find(|s| s.statement_index == current_index + 1)
323        .map(|s| s.statement_range.start)
324}
325
326fn next_source_statement_start(
327    source_statement_ranges: &[Option<std::ops::Range<usize>>],
328    current_index: usize,
329) -> Option<usize> {
330    source_statement_ranges
331        .iter()
332        .enumerate()
333        .find_map(|(index, range)| {
334            (index > current_index)
335                .then(|| range.as_ref().map(|value| value.start))
336                .flatten()
337        })
338}
339
340fn normalize_issues(mut issues: Vec<Issue>) -> Vec<Issue> {
341    issues.sort_by(|left, right| issue_sort_key(left).cmp(&issue_sort_key(right)));
342    issues.dedup_by(|left, right| {
343        left.span.is_some()
344            && right.span.is_some()
345            && left.statement_index == right.statement_index
346            && left.span == right.span
347            && left.severity == right.severity
348            && left.code == right.code
349            && left.message == right.message
350            && left.autofix == right.autofix
351    });
352    issues
353}
354
355fn issue_sort_key(
356    issue: &Issue,
357) -> (
358    usize,
359    usize,
360    usize,
361    u8,
362    &str,
363    &str,
364    Option<&crate::types::IssueAutofix>,
365) {
366    (
367        issue.statement_index.unwrap_or(usize::MAX),
368        issue.span.map_or(usize::MAX, |span| span.start),
369        issue.span.map_or(usize::MAX, |span| span.end),
370        severity_rank(issue.severity),
371        issue.code.as_str(),
372        issue.message.as_str(),
373        issue.autofix.as_ref(),
374    )
375}
376
377const fn severity_rank(severity: Severity) -> u8 {
378    match severity {
379        Severity::Error => 0,
380        Severity::Warning => 1,
381        Severity::Info => 2,
382    }
383}
384
385fn rule_engine(code: &str) -> LintEngine {
386    match code {
387        crate::types::issue_codes::LINT_LT_012
388        | crate::types::issue_codes::LINT_LT_013
389        | crate::types::issue_codes::LINT_LT_015
390        | crate::types::issue_codes::LINT_ST_012 => LintEngine::Document,
391        c if c.starts_with("LINT_CP_")
392            || c.starts_with("LINT_JJ_")
393            || c.starts_with("LINT_LT_")
394            || c.starts_with("LINT_TQ_") =>
395        {
396            LintEngine::Lexical
397        }
398        _ => LintEngine::Semantic,
399    }
400}
401
402fn rule_supported_in_dialect(code: &str, dialect: Dialect) -> bool {
403    match code {
404        crate::types::issue_codes::LINT_AM_007 => matches!(
405            dialect,
406            Dialect::Generic
407                | Dialect::Ansi
408                | Dialect::Bigquery
409                | Dialect::Clickhouse
410                | Dialect::Databricks
411                | Dialect::Hive
412                | Dialect::Mysql
413                | Dialect::Redshift
414                | Dialect::Snowflake
415        ),
416        _ => true,
417    }
418}
419
420fn lint_quality_for_rule(
421    code: &str,
422    engine: LintEngine,
423    document: &LintDocument<'_>,
424) -> (LintConfidence, Option<LintFallbackSource>) {
425    if document.parser_fallback_used {
426        return (
427            LintConfidence::Medium,
428            Some(LintFallbackSource::ParserFallback),
429        );
430    }
431
432    if document.tokenizer_fallback_used && engine != LintEngine::Semantic {
433        return (
434            LintConfidence::Medium,
435            Some(LintFallbackSource::TokenizerFallback),
436        );
437    }
438
439    if ast_rule_code(code) {
440        return (LintConfidence::High, None);
441    }
442
443    (LintConfidence::Low, Some(LintFallbackSource::HeuristicRule))
444}
445
446fn ast_rule_code(code: &str) -> bool {
447    matches!(
448        code,
449        crate::types::issue_codes::LINT_AL_003
450            | crate::types::issue_codes::LINT_AL_004
451            | crate::types::issue_codes::LINT_AL_005
452            | crate::types::issue_codes::LINT_AL_006
453            | crate::types::issue_codes::LINT_AL_007
454            | crate::types::issue_codes::LINT_AL_008
455            | crate::types::issue_codes::LINT_AL_009
456            | crate::types::issue_codes::LINT_AM_001
457            | crate::types::issue_codes::LINT_AM_002
458            | crate::types::issue_codes::LINT_AM_003
459            | crate::types::issue_codes::LINT_AM_004
460            | crate::types::issue_codes::LINT_AM_005
461            | crate::types::issue_codes::LINT_AM_006
462            | crate::types::issue_codes::LINT_AM_007
463            | crate::types::issue_codes::LINT_AM_008
464            | crate::types::issue_codes::LINT_CV_002
465            | crate::types::issue_codes::LINT_CV_004
466            | crate::types::issue_codes::LINT_CV_005
467            | crate::types::issue_codes::LINT_CV_008
468            | crate::types::issue_codes::LINT_CV_012
469            | crate::types::issue_codes::LINT_RF_001
470            | crate::types::issue_codes::LINT_RF_002
471            | crate::types::issue_codes::LINT_RF_003
472            | crate::types::issue_codes::LINT_ST_001
473            | crate::types::issue_codes::LINT_ST_002
474            | crate::types::issue_codes::LINT_ST_003
475            | crate::types::issue_codes::LINT_ST_004
476            | crate::types::issue_codes::LINT_ST_005
477            | crate::types::issue_codes::LINT_ST_006
478            | crate::types::issue_codes::LINT_ST_007
479            | crate::types::issue_codes::LINT_ST_008
480            | crate::types::issue_codes::LINT_ST_009
481            | crate::types::issue_codes::LINT_ST_010
482            | crate::types::issue_codes::LINT_ST_011
483    )
484}
485
486fn rule_uses_document_scope(code: &str) -> bool {
487    matches!(
488        code,
489        crate::types::issue_codes::LINT_CP_001
490            | crate::types::issue_codes::LINT_CP_003
491            | crate::types::issue_codes::LINT_CP_004
492            | crate::types::issue_codes::LINT_CP_005
493            | crate::types::issue_codes::LINT_JJ_001
494    )
495}
496
497fn rule_supports_statementless_fallback(code: &str) -> bool {
498    matches!(
499        code,
500        crate::types::issue_codes::LINT_LT_001
501            | crate::types::issue_codes::LINT_LT_002
502            | crate::types::issue_codes::LINT_LT_003
503            | crate::types::issue_codes::LINT_LT_005
504            | crate::types::issue_codes::LINT_LT_012
505            | crate::types::issue_codes::LINT_AL_007
506            | crate::types::issue_codes::LINT_AL_008
507            | crate::types::issue_codes::LINT_AM_004
508            | crate::types::issue_codes::LINT_CV_001
509            | crate::types::issue_codes::LINT_RF_006
510            | crate::types::issue_codes::LINT_ST_002
511            | crate::types::issue_codes::LINT_TQ_001
512            | crate::types::issue_codes::LINT_TQ_002
513            | crate::types::issue_codes::LINT_CP_001
514            | crate::types::issue_codes::LINT_CP_002
515            | crate::types::issue_codes::LINT_CP_003
516            | crate::types::issue_codes::LINT_CP_004
517            | crate::types::issue_codes::LINT_CP_005
518            | crate::types::issue_codes::LINT_ST_004
519    )
520}
521
522fn document_scope_sql_for_rule<'a>(
523    config: &LintConfig,
524    code: &str,
525    document: &LintDocument<'a>,
526) -> Cow<'a, str> {
527    if !rule_uses_document_scope(code) {
528        return Cow::Borrowed(document.sql);
529    }
530
531    // JJ01 checks Jinja delimiter padding in the raw source, so it must
532    // always see the untemplated SQL when templating has been applied.
533    if code == crate::types::issue_codes::LINT_JJ_001 {
534        if let Some(source_sql) = document.source_sql {
535            return Cow::Borrowed(source_sql);
536        }
537        return Cow::Borrowed(document.sql);
538    }
539
540    // CP03 must apply patches against the original source text so fix spans
541    // remain valid when templated regions expand/contract during rendering.
542    if code == crate::types::issue_codes::LINT_CP_003 {
543        if let Some(source_sql) = document.source_sql {
544            return Cow::Borrowed(source_sql);
545        }
546    }
547
548    if !config
549        .core_option_bool("ignore_templated_areas")
550        .unwrap_or(false)
551    {
552        return Cow::Borrowed(document.sql);
553    }
554    let Some(source_sql) = document.source_sql else {
555        return Cow::Borrowed(document.sql);
556    };
557    Cow::Owned(strip_templated_areas(source_sql))
558}
559
560fn strip_templated_areas(sql: &str) -> String {
561    let mut out = String::with_capacity(sql.len());
562    let mut index = 0usize;
563
564    while let Some((open_index, close_marker)) = find_next_template_open(sql, index) {
565        out.push_str(&sql[index..open_index]);
566        let marker_start = open_index + 2;
567        if let Some(close_offset) = sql[marker_start..].find(close_marker) {
568            let close_index = marker_start + close_offset + close_marker.len();
569            out.push_str(&mask_non_newlines(&sql[open_index..close_index]));
570            index = close_index;
571        } else {
572            out.push_str(&mask_non_newlines(&sql[open_index..]));
573            return out;
574        }
575    }
576
577    out.push_str(&sql[index..]);
578    out
579}
580
581fn find_next_template_open(sql: &str, from: usize) -> Option<(usize, &'static str)> {
582    let rest = sql.get(from..)?;
583    let candidates = [("{{", "}}"), ("{%", "%}"), ("{#", "#}")];
584
585    candidates
586        .into_iter()
587        .filter_map(|(open, close)| rest.find(open).map(|offset| (from + offset, close)))
588        .min_by_key(|(index, _)| *index)
589}
590
591fn mask_non_newlines(segment: &str) -> String {
592    segment
593        .chars()
594        .map(|ch| if ch == '\n' { '\n' } else { ' ' })
595        .collect()
596}
597
598fn suppress_noqa_issues(issues: Vec<Issue>, document: &LintDocument<'_>) -> Vec<Issue> {
599    issues
600        .into_iter()
601        .filter(|issue| {
602            let Some(line) = issue_line(issue, document) else {
603                return true;
604            };
605            !document.noqa.is_suppressed(line, &issue.code)
606        })
607        .collect()
608}
609
610fn issue_line(issue: &Issue, document: &LintDocument<'_>) -> Option<usize> {
611    if let Some(span) = issue.span {
612        return Some(offset_to_line(document.sql, span.start));
613    }
614
615    let statement_index = issue.statement_index?;
616    let statement = document
617        .statements
618        .iter()
619        .find(|statement| statement.statement_index == statement_index)?;
620    Some(offset_to_line(
621        document.sql,
622        statement.statement_range.start,
623    ))
624}
625
626fn offset_to_line(sql: &str, offset: usize) -> usize {
627    1 + sql
628        .as_bytes()
629        .iter()
630        .take(offset.min(sql.len()))
631        .filter(|byte| **byte == b'\n')
632        .count()
633}
634
635#[cfg(test)]
636mod tests {
637    use super::{normalize_issues, strip_templated_areas};
638    use crate::types::{Issue, IssueAutofixApplicability, IssuePatchEdit, Span};
639
640    #[test]
641    fn strip_templated_areas_preserves_lines_and_replaces_tag_content() {
642        let sql = "SELECT {{ \"x\" }} AS x\nFROM t\nWHERE {% if true %}1{% endif %} = 1";
643        let stripped = strip_templated_areas(sql);
644
645        assert_eq!(stripped.lines().count(), sql.lines().count());
646        assert!(!stripped.contains("{{"));
647        assert!(!stripped.contains("{%"));
648        assert!(stripped.contains("SELECT"));
649        assert!(stripped.contains("FROM t"));
650    }
651
652    #[test]
653    fn normalize_issues_keeps_distinct_autofix_metadata() {
654        let base = Issue::warning("LINT_X", "lint message")
655            .with_statement(0)
656            .with_span(Span::new(0, 1));
657
658        let safe = base.clone().with_autofix_edits(
659            IssueAutofixApplicability::Safe,
660            vec![IssuePatchEdit::new(Span::new(0, 1), "x")],
661        );
662        let unsafe_fix = base.with_autofix_edits(
663            IssueAutofixApplicability::Unsafe,
664            vec![IssuePatchEdit::new(Span::new(0, 1), "x")],
665        );
666
667        let normalized = normalize_issues(vec![unsafe_fix, safe]);
668        assert_eq!(normalized.len(), 2);
669    }
670
671    #[test]
672    fn normalize_issues_dedups_when_autofix_matches() {
673        let issue = Issue::warning("LINT_X", "lint message")
674            .with_statement(0)
675            .with_span(Span::new(0, 1))
676            .with_autofix_edits(
677                IssueAutofixApplicability::Safe,
678                vec![IssuePatchEdit::new(Span::new(0, 1), "x")],
679            );
680
681        let normalized = normalize_issues(vec![issue.clone(), issue]);
682        assert_eq!(normalized.len(), 1);
683    }
684}