1pub mod config;
8pub mod document;
9pub mod helpers;
10pub mod rule;
11pub mod rules;
12pub(crate) mod visit;
13
14use config::LintConfig;
15use document::{LintDocument, LintStatement};
16use rule::{
17 with_active_dialect, with_active_document_tokens, with_active_is_templated, LintContext,
18 LintRule,
19};
20use sqlparser::ast::Statement;
21use std::borrow::Cow;
22
23use crate::{
24 parser::parse_sql,
25 types::{Issue, LintConfidence, LintEngine, LintFallbackSource, Severity},
26 Dialect,
27};
28
29pub struct Linter {
31 rules: Vec<Box<dyn LintRule>>,
32 config: LintConfig,
33}
34
35impl Linter {
36 pub fn new(config: LintConfig) -> Self {
38 Self {
39 rules: rules::all_rules(&config),
40 config,
41 }
42 }
43
44 pub fn is_enabled(&self) -> bool {
46 self.config.enabled
47 }
48
49 pub fn check_document(&self, document: &LintDocument<'_>) -> Vec<Issue> {
51 if !self.config.enabled {
52 return Vec::new();
53 }
54
55 let is_templated = document.source_sql.is_some();
56 with_active_is_templated(is_templated, || {
57 with_active_document_tokens(&document.raw_tokens, || {
58 let mut issues = Vec::new();
59
60 for engine in [
61 LintEngine::Semantic,
62 LintEngine::Lexical,
63 LintEngine::Document,
64 ] {
65 for rule in &self.rules {
66 if !self.config.is_rule_enabled(rule.code())
67 || rule_engine(rule.code()) != engine
68 || !rule_supported_in_dialect(rule.code(), document.dialect)
69 {
70 continue;
71 }
72
73 let (confidence, fallback) =
74 lint_quality_for_rule(rule.code(), engine, document);
75
76 if rule_uses_document_scope(rule.code()) {
77 let Some(synthetic_statement) = parse_sql("SELECT 1")
78 .ok()
79 .and_then(|mut statements| statements.drain(..).next())
80 else {
81 continue;
82 };
83
84 let document_scope_sql =
85 document_scope_sql_for_rule(&self.config, rule.code(), document);
86 let ctx = LintContext {
87 sql: document_scope_sql.as_ref(),
88 statement_range: 0..document_scope_sql.len(),
89 statement_index: 0,
90 };
91
92 with_active_dialect(document.dialect, || {
93 for issue in rule.check(&synthetic_statement, &ctx) {
94 let mut issue = issue
95 .with_lint_engine(engine)
96 .with_lint_confidence(confidence);
97
98 if let Some(source) = fallback {
99 issue = issue.with_lint_fallback_source(source);
100 }
101
102 let sqlfluff_name = rule.sqlfluff_name();
103 if !sqlfluff_name.is_empty() {
104 issue = issue.with_sqlfluff_name(sqlfluff_name);
105 }
106
107 issues.push(issue);
108 }
109 });
110 continue;
111 }
112
113 if document.statements.is_empty() {
114 if !rule_supports_statementless_fallback(rule.code()) {
115 continue;
116 }
117
118 let Some(synthetic_statement) = parse_sql("SELECT 1")
119 .ok()
120 .and_then(|mut statements| statements.drain(..).next())
121 else {
122 continue;
123 };
124
125 let ctx = LintContext {
126 sql: document.sql,
127 statement_range: 0..document.sql.len(),
128 statement_index: 0,
129 };
130
131 with_active_dialect(document.dialect, || {
132 for issue in rule.check(&synthetic_statement, &ctx) {
133 let mut issue = issue
134 .with_lint_engine(engine)
135 .with_lint_confidence(confidence);
136
137 if let Some(source) = fallback {
138 issue = issue.with_lint_fallback_source(source);
139 }
140
141 let sqlfluff_name = rule.sqlfluff_name();
142 if !sqlfluff_name.is_empty() {
143 issue = issue.with_sqlfluff_name(sqlfluff_name);
144 }
145
146 issues.push(issue);
147 }
148 });
149 continue;
150 }
151
152 for statement in &document.statements {
153 let (ctx_sql, ctx_statement_range) = if matches!(
154 rule.code(),
155 crate::types::issue_codes::LINT_LT_002
156 | crate::types::issue_codes::LINT_LT_005
157 | crate::types::issue_codes::LINT_LT_004
158 | crate::types::issue_codes::LINT_LT_007
159 | crate::types::issue_codes::LINT_LT_012
160 | crate::types::issue_codes::LINT_LT_013
161 | crate::types::issue_codes::LINT_CV_009
162 | crate::types::issue_codes::LINT_CV_010
163 | crate::types::issue_codes::LINT_ST_004
164 ) {
165 if matches!(
166 rule.code(),
167 crate::types::issue_codes::LINT_LT_012
168 | crate::types::issue_codes::LINT_LT_013
169 ) {
170 if let Some(source_sql) = document.source_sql {
171 (source_sql, 0..source_sql.len())
172 } else {
173 (document.sql, statement.statement_range.clone())
174 }
175 } else {
176 match (
177 document.source_sql,
178 document
179 .source_statement_ranges
180 .get(statement.statement_index)
181 .and_then(|range| range.clone()),
182 ) {
183 (Some(source_sql), Some(source_statement_range)) => {
184 (source_sql, source_statement_range)
185 }
186 _ => (document.sql, statement.statement_range.clone()),
187 }
188 }
189 } else if rule.code() == crate::types::issue_codes::LINT_LT_001 {
190 let lt01_ignore_templated = self
196 .config
197 .core_option_bool("ignore_templated_areas")
198 .unwrap_or(true);
199 match (
200 document.source_sql,
201 document
202 .source_statement_ranges
203 .get(statement.statement_index)
204 .and_then(|range| range.clone()),
205 ) {
206 (Some(source_sql), Some(source_statement_range))
207 if lt01_ignore_templated =>
208 {
209 let range = extend_range_with_trailing_whitespace(
210 source_sql,
211 &source_statement_range,
212 next_source_statement_start(
213 &document.source_statement_ranges,
214 statement.statement_index,
215 ),
216 );
217 (source_sql, range)
218 }
219 _ => {
220 let range = extend_range_with_trailing_whitespace(
221 document.sql,
222 &statement.statement_range,
223 next_statement_start(
224 &document.statements,
225 statement.statement_index,
226 ),
227 );
228 (document.sql, range)
229 }
230 }
231 } else {
232 (document.sql, statement.statement_range.clone())
233 };
234
235 let ctx = LintContext {
236 sql: ctx_sql,
237 statement_range: ctx_statement_range,
238 statement_index: statement.statement_index,
239 };
240
241 with_active_dialect(document.dialect, || {
242 for issue in rule.check(statement.statement, &ctx) {
243 let mut issue = issue
244 .with_lint_engine(engine)
245 .with_lint_confidence(confidence);
246
247 if let Some(source) = fallback {
248 issue = issue.with_lint_fallback_source(source);
249 }
250
251 let sqlfluff_name = rule.sqlfluff_name();
252 if !sqlfluff_name.is_empty() {
253 issue = issue.with_sqlfluff_name(sqlfluff_name);
254 }
255
256 issues.push(issue);
257 }
258 });
259 }
260 }
261 }
262
263 let issues = suppress_noqa_issues(issues, document);
264 normalize_issues(issues)
265 })
266 })
267 }
268
269 pub fn check_statement(&self, stmt: &Statement, ctx: &LintContext) -> Vec<Issue> {
274 let document = LintDocument::new(
275 ctx.sql,
276 crate::Dialect::Generic,
277 vec![LintStatement {
278 statement: stmt,
279 statement_index: ctx.statement_index,
280 statement_range: ctx.statement_range.clone(),
281 }],
282 );
283 self.check_document(&document)
284 }
285}
286
287fn extend_range_with_trailing_whitespace(
291 sql: &str,
292 range: &std::ops::Range<usize>,
293 next_start: Option<usize>,
294) -> std::ops::Range<usize> {
295 let bytes = sql.as_bytes();
296 let limit = next_start.unwrap_or(sql.len());
297 let mut end = range.end;
298 while end < limit {
299 match bytes[end] {
300 b' ' | b'\t' => end += 1,
301 b'\n' => {
302 end += 1;
303 break;
304 }
305 b'\r' => {
306 end += 1;
307 if end < limit && bytes[end] == b'\n' {
308 end += 1;
309 }
310 break;
311 }
312 _ => break,
313 }
314 }
315 range.start..end
316}
317
318fn next_statement_start(statements: &[LintStatement], current_index: usize) -> Option<usize> {
320 statements
321 .iter()
322 .find(|s| s.statement_index == current_index + 1)
323 .map(|s| s.statement_range.start)
324}
325
326fn next_source_statement_start(
327 source_statement_ranges: &[Option<std::ops::Range<usize>>],
328 current_index: usize,
329) -> Option<usize> {
330 source_statement_ranges
331 .iter()
332 .enumerate()
333 .find_map(|(index, range)| {
334 (index > current_index)
335 .then(|| range.as_ref().map(|value| value.start))
336 .flatten()
337 })
338}
339
340fn normalize_issues(mut issues: Vec<Issue>) -> Vec<Issue> {
341 issues.sort_by(|left, right| issue_sort_key(left).cmp(&issue_sort_key(right)));
342 issues.dedup_by(|left, right| {
343 left.span.is_some()
344 && right.span.is_some()
345 && left.statement_index == right.statement_index
346 && left.span == right.span
347 && left.severity == right.severity
348 && left.code == right.code
349 && left.message == right.message
350 && left.autofix == right.autofix
351 });
352 issues
353}
354
355fn issue_sort_key(
356 issue: &Issue,
357) -> (
358 usize,
359 usize,
360 usize,
361 u8,
362 &str,
363 &str,
364 Option<&crate::types::IssueAutofix>,
365) {
366 (
367 issue.statement_index.unwrap_or(usize::MAX),
368 issue.span.map_or(usize::MAX, |span| span.start),
369 issue.span.map_or(usize::MAX, |span| span.end),
370 severity_rank(issue.severity),
371 issue.code.as_str(),
372 issue.message.as_str(),
373 issue.autofix.as_ref(),
374 )
375}
376
377const fn severity_rank(severity: Severity) -> u8 {
378 match severity {
379 Severity::Error => 0,
380 Severity::Warning => 1,
381 Severity::Info => 2,
382 }
383}
384
385fn rule_engine(code: &str) -> LintEngine {
386 match code {
387 crate::types::issue_codes::LINT_LT_012
388 | crate::types::issue_codes::LINT_LT_013
389 | crate::types::issue_codes::LINT_LT_015
390 | crate::types::issue_codes::LINT_ST_012 => LintEngine::Document,
391 c if c.starts_with("LINT_CP_")
392 || c.starts_with("LINT_JJ_")
393 || c.starts_with("LINT_LT_")
394 || c.starts_with("LINT_TQ_") =>
395 {
396 LintEngine::Lexical
397 }
398 _ => LintEngine::Semantic,
399 }
400}
401
402fn rule_supported_in_dialect(code: &str, dialect: Dialect) -> bool {
403 match code {
404 crate::types::issue_codes::LINT_AM_007 => matches!(
405 dialect,
406 Dialect::Generic
407 | Dialect::Ansi
408 | Dialect::Bigquery
409 | Dialect::Clickhouse
410 | Dialect::Databricks
411 | Dialect::Hive
412 | Dialect::Mysql
413 | Dialect::Redshift
414 | Dialect::Snowflake
415 ),
416 _ => true,
417 }
418}
419
420fn lint_quality_for_rule(
421 code: &str,
422 engine: LintEngine,
423 document: &LintDocument<'_>,
424) -> (LintConfidence, Option<LintFallbackSource>) {
425 if document.parser_fallback_used {
426 return (
427 LintConfidence::Medium,
428 Some(LintFallbackSource::ParserFallback),
429 );
430 }
431
432 if document.tokenizer_fallback_used && engine != LintEngine::Semantic {
433 return (
434 LintConfidence::Medium,
435 Some(LintFallbackSource::TokenizerFallback),
436 );
437 }
438
439 if ast_rule_code(code) {
440 return (LintConfidence::High, None);
441 }
442
443 (LintConfidence::Low, Some(LintFallbackSource::HeuristicRule))
444}
445
446fn ast_rule_code(code: &str) -> bool {
447 matches!(
448 code,
449 crate::types::issue_codes::LINT_AL_003
450 | crate::types::issue_codes::LINT_AL_004
451 | crate::types::issue_codes::LINT_AL_005
452 | crate::types::issue_codes::LINT_AL_006
453 | crate::types::issue_codes::LINT_AL_007
454 | crate::types::issue_codes::LINT_AL_008
455 | crate::types::issue_codes::LINT_AL_009
456 | crate::types::issue_codes::LINT_AM_001
457 | crate::types::issue_codes::LINT_AM_002
458 | crate::types::issue_codes::LINT_AM_003
459 | crate::types::issue_codes::LINT_AM_004
460 | crate::types::issue_codes::LINT_AM_005
461 | crate::types::issue_codes::LINT_AM_006
462 | crate::types::issue_codes::LINT_AM_007
463 | crate::types::issue_codes::LINT_AM_008
464 | crate::types::issue_codes::LINT_CV_002
465 | crate::types::issue_codes::LINT_CV_004
466 | crate::types::issue_codes::LINT_CV_005
467 | crate::types::issue_codes::LINT_CV_008
468 | crate::types::issue_codes::LINT_CV_012
469 | crate::types::issue_codes::LINT_RF_001
470 | crate::types::issue_codes::LINT_RF_002
471 | crate::types::issue_codes::LINT_RF_003
472 | crate::types::issue_codes::LINT_ST_001
473 | crate::types::issue_codes::LINT_ST_002
474 | crate::types::issue_codes::LINT_ST_003
475 | crate::types::issue_codes::LINT_ST_004
476 | crate::types::issue_codes::LINT_ST_005
477 | crate::types::issue_codes::LINT_ST_006
478 | crate::types::issue_codes::LINT_ST_007
479 | crate::types::issue_codes::LINT_ST_008
480 | crate::types::issue_codes::LINT_ST_009
481 | crate::types::issue_codes::LINT_ST_010
482 | crate::types::issue_codes::LINT_ST_011
483 )
484}
485
486fn rule_uses_document_scope(code: &str) -> bool {
487 matches!(
488 code,
489 crate::types::issue_codes::LINT_CP_001
490 | crate::types::issue_codes::LINT_CP_003
491 | crate::types::issue_codes::LINT_CP_004
492 | crate::types::issue_codes::LINT_CP_005
493 | crate::types::issue_codes::LINT_JJ_001
494 )
495}
496
497fn rule_supports_statementless_fallback(code: &str) -> bool {
498 matches!(
499 code,
500 crate::types::issue_codes::LINT_LT_001
501 | crate::types::issue_codes::LINT_LT_002
502 | crate::types::issue_codes::LINT_LT_003
503 | crate::types::issue_codes::LINT_LT_005
504 | crate::types::issue_codes::LINT_LT_012
505 | crate::types::issue_codes::LINT_AL_007
506 | crate::types::issue_codes::LINT_AL_008
507 | crate::types::issue_codes::LINT_AM_004
508 | crate::types::issue_codes::LINT_CV_001
509 | crate::types::issue_codes::LINT_RF_006
510 | crate::types::issue_codes::LINT_ST_002
511 | crate::types::issue_codes::LINT_TQ_001
512 | crate::types::issue_codes::LINT_TQ_002
513 | crate::types::issue_codes::LINT_CP_001
514 | crate::types::issue_codes::LINT_CP_002
515 | crate::types::issue_codes::LINT_CP_003
516 | crate::types::issue_codes::LINT_CP_004
517 | crate::types::issue_codes::LINT_CP_005
518 | crate::types::issue_codes::LINT_ST_004
519 )
520}
521
522fn document_scope_sql_for_rule<'a>(
523 config: &LintConfig,
524 code: &str,
525 document: &LintDocument<'a>,
526) -> Cow<'a, str> {
527 if !rule_uses_document_scope(code) {
528 return Cow::Borrowed(document.sql);
529 }
530
531 if code == crate::types::issue_codes::LINT_JJ_001 {
534 if let Some(source_sql) = document.source_sql {
535 return Cow::Borrowed(source_sql);
536 }
537 return Cow::Borrowed(document.sql);
538 }
539
540 if code == crate::types::issue_codes::LINT_CP_003 {
543 if let Some(source_sql) = document.source_sql {
544 return Cow::Borrowed(source_sql);
545 }
546 }
547
548 if !config
549 .core_option_bool("ignore_templated_areas")
550 .unwrap_or(false)
551 {
552 return Cow::Borrowed(document.sql);
553 }
554 let Some(source_sql) = document.source_sql else {
555 return Cow::Borrowed(document.sql);
556 };
557 Cow::Owned(strip_templated_areas(source_sql))
558}
559
560fn strip_templated_areas(sql: &str) -> String {
561 let mut out = String::with_capacity(sql.len());
562 let mut index = 0usize;
563
564 while let Some((open_index, close_marker)) = find_next_template_open(sql, index) {
565 out.push_str(&sql[index..open_index]);
566 let marker_start = open_index + 2;
567 if let Some(close_offset) = sql[marker_start..].find(close_marker) {
568 let close_index = marker_start + close_offset + close_marker.len();
569 out.push_str(&mask_non_newlines(&sql[open_index..close_index]));
570 index = close_index;
571 } else {
572 out.push_str(&mask_non_newlines(&sql[open_index..]));
573 return out;
574 }
575 }
576
577 out.push_str(&sql[index..]);
578 out
579}
580
581fn find_next_template_open(sql: &str, from: usize) -> Option<(usize, &'static str)> {
582 let rest = sql.get(from..)?;
583 let candidates = [("{{", "}}"), ("{%", "%}"), ("{#", "#}")];
584
585 candidates
586 .into_iter()
587 .filter_map(|(open, close)| rest.find(open).map(|offset| (from + offset, close)))
588 .min_by_key(|(index, _)| *index)
589}
590
591fn mask_non_newlines(segment: &str) -> String {
592 segment
593 .chars()
594 .map(|ch| if ch == '\n' { '\n' } else { ' ' })
595 .collect()
596}
597
598fn suppress_noqa_issues(issues: Vec<Issue>, document: &LintDocument<'_>) -> Vec<Issue> {
599 issues
600 .into_iter()
601 .filter(|issue| {
602 let Some(line) = issue_line(issue, document) else {
603 return true;
604 };
605 !document.noqa.is_suppressed(line, &issue.code)
606 })
607 .collect()
608}
609
610fn issue_line(issue: &Issue, document: &LintDocument<'_>) -> Option<usize> {
611 if let Some(span) = issue.span {
612 return Some(offset_to_line(document.sql, span.start));
613 }
614
615 let statement_index = issue.statement_index?;
616 let statement = document
617 .statements
618 .iter()
619 .find(|statement| statement.statement_index == statement_index)?;
620 Some(offset_to_line(
621 document.sql,
622 statement.statement_range.start,
623 ))
624}
625
626fn offset_to_line(sql: &str, offset: usize) -> usize {
627 1 + sql
628 .as_bytes()
629 .iter()
630 .take(offset.min(sql.len()))
631 .filter(|byte| **byte == b'\n')
632 .count()
633}
634
635#[cfg(test)]
636mod tests {
637 use super::{normalize_issues, strip_templated_areas};
638 use crate::types::{Issue, IssueAutofixApplicability, IssuePatchEdit, Span};
639
640 #[test]
641 fn strip_templated_areas_preserves_lines_and_replaces_tag_content() {
642 let sql = "SELECT {{ \"x\" }} AS x\nFROM t\nWHERE {% if true %}1{% endif %} = 1";
643 let stripped = strip_templated_areas(sql);
644
645 assert_eq!(stripped.lines().count(), sql.lines().count());
646 assert!(!stripped.contains("{{"));
647 assert!(!stripped.contains("{%"));
648 assert!(stripped.contains("SELECT"));
649 assert!(stripped.contains("FROM t"));
650 }
651
652 #[test]
653 fn normalize_issues_keeps_distinct_autofix_metadata() {
654 let base = Issue::warning("LINT_X", "lint message")
655 .with_statement(0)
656 .with_span(Span::new(0, 1));
657
658 let safe = base.clone().with_autofix_edits(
659 IssueAutofixApplicability::Safe,
660 vec![IssuePatchEdit::new(Span::new(0, 1), "x")],
661 );
662 let unsafe_fix = base.with_autofix_edits(
663 IssueAutofixApplicability::Unsafe,
664 vec![IssuePatchEdit::new(Span::new(0, 1), "x")],
665 );
666
667 let normalized = normalize_issues(vec![unsafe_fix, safe]);
668 assert_eq!(normalized.len(), 2);
669 }
670
671 #[test]
672 fn normalize_issues_dedups_when_autofix_matches() {
673 let issue = Issue::warning("LINT_X", "lint message")
674 .with_statement(0)
675 .with_span(Span::new(0, 1))
676 .with_autofix_edits(
677 IssueAutofixApplicability::Safe,
678 vec![IssuePatchEdit::new(Span::new(0, 1), "x")],
679 );
680
681 let normalized = normalize_issues(vec![issue.clone(), issue]);
682 assert_eq!(normalized.len(), 1);
683 }
684}