1use std::borrow::Cow;
2use std::fs::File;
3use std::io::{BufRead, BufReader};
4use std::path::{Path, PathBuf};
5use std::sync::{Arc, OnceLock};
6
7use crate::Formatter;
8use crate::core::config::FluffConfig;
9use crate::core::linter::common::{ParsedString, RenderedFile};
10use crate::core::linter::linted_file::LintedFile;
11use crate::core::linter::linting_result::LintingResult;
12use crate::core::rules::noqa::IgnoreMask;
13use crate::core::rules::{ErasedRule, Exception, LintPhase, RulePack};
14use crate::rules::get_ruleset;
15use crate::templaters::raw::RawTemplater;
16use crate::templaters::{TEMPLATERS, Templater};
17use ahash::{AHashMap, AHashSet};
18use itertools::Itertools;
19use rayon::iter::{IndexedParallelIterator, IntoParallelRefIterator as _, ParallelIterator as _};
20use rustc_hash::FxHashMap;
21use smol_str::{SmolStr, ToSmolStr};
22use sqruff_lib_core::dialects::Dialect;
23use sqruff_lib_core::dialects::syntax::{SyntaxKind, SyntaxSet};
24use sqruff_lib_core::errors::{
25 SQLBaseError, SQLFluffUserError, SQLLexError, SQLLintError, SQLParseError,
26};
27use sqruff_lib_core::helpers;
28use sqruff_lib_core::linter::compute_anchor_edit_info;
29use sqruff_lib_core::parser::Parser;
30use sqruff_lib_core::parser::segments::fix::SourceFix;
31use sqruff_lib_core::parser::segments::{ErasedSegment, Tables};
32use sqruff_lib_core::templaters::TemplatedFile;
33use walkdir::WalkDir;
34
35pub struct Linter {
36 config: FluffConfig,
37 formatter: Option<Arc<dyn Formatter>>,
38 templater: &'static dyn Templater,
39 rules: OnceLock<Vec<ErasedRule>>,
40
41 include_parse_errors: bool,
43}
44
45impl Linter {
46 pub fn new(
47 config: FluffConfig,
48 formatter: Option<Arc<dyn Formatter>>,
49 templater: Option<&'static dyn Templater>,
50 include_parse_errors: bool,
51 ) -> Linter {
52 let templater: &'static dyn Templater = match templater {
53 Some(templater) => templater,
54 None => Linter::get_templater(&config),
55 };
56 Linter {
57 config,
58 formatter,
59 templater,
60 rules: OnceLock::new(),
61 include_parse_errors,
62 }
63 }
64
65 pub fn get_templater(config: &FluffConfig) -> &'static dyn Templater {
66 let templater_name = config.get("templater", "core").as_string();
67 match templater_name {
68 Some(name) => match TEMPLATERS.into_iter().find(|t| t.name() == name) {
69 Some(t) => t,
70 None => panic!("Unknown templater: {name}"),
71 },
72 None => &RawTemplater,
73 }
74 }
75
76 pub fn lint_string_wrapped(&mut self, sql: &str, fix: bool) -> LintedFile {
78 let filename = "<string input>".to_owned();
79 self.lint_string(sql, Some(filename), fix)
80 }
81
82 pub fn parse_string(
84 &self,
85 tables: &Tables,
86 sql: &str,
87 filename: Option<String>,
88 ) -> Result<ParsedString, SQLFluffUserError> {
89 let f_name = filename.unwrap_or_else(|| "<string>".to_string());
90
91 self.config.process_raw_file_for_config(sql);
93 let rendered = self.render_string(sql, f_name.clone(), &self.config)?;
94
95 Ok(self.parse_rendered(tables, rendered))
96 }
97
98 pub fn lint_string(&self, sql: &str, filename: Option<String>, fix: bool) -> LintedFile {
100 let tables = Tables::default();
101 let parsed = self.parse_string(&tables, sql, filename).unwrap();
102
103 self.lint_parsed(&tables, parsed, fix)
105 }
106
107 pub fn lint_paths(
110 &mut self,
111 mut paths: Vec<PathBuf>,
112 fix: bool,
113 ignorer: &(dyn Fn(&Path) -> bool + Send + Sync),
114 ) -> LintingResult {
115 if paths.is_empty() {
116 paths.push(std::env::current_dir().unwrap());
117 }
118
119 let mut expanded_paths = Vec::new();
120
121 for path in paths {
122 if path.is_file() {
123 expanded_paths.push(path.to_string_lossy().to_string());
124 } else {
125 expanded_paths.extend(self.paths_from_path(path, None, None, None, None));
126 };
127 }
128
129 let paths: Vec<String> = expanded_paths
130 .into_iter()
131 .filter(|path| !ignorer(Path::new(path)))
132 .collect_vec();
133
134 let mut files = Vec::with_capacity(paths.len());
135
136 if self.templater.can_process_in_parallel() {
137 paths
138 .par_iter()
139 .map(|path| {
140 let rendered = self.render_file(path.clone());
141 self.lint_rendered(rendered, fix)
142 })
143 .collect_into_vec(&mut files);
144 } else {
145 files.extend(paths.iter().map(|path| {
146 let rendered = self.render_file(path.clone());
147 self.lint_rendered(rendered, fix)
148 }));
149 };
150
151 LintingResult::new(files)
152 }
153
154 pub fn get_rulepack(&self) -> RulePack {
155 let rs = get_ruleset();
156 rs.get_rulepack(&self.config)
157 }
158
159 pub fn render_file(&self, fname: String) -> RenderedFile {
160 let in_str = std::fs::read_to_string(&fname).unwrap();
161 self.render_string(&in_str, fname, &self.config).unwrap()
162 }
163
164 pub fn lint_rendered(&self, rendered: RenderedFile, fix: bool) -> LintedFile {
165 let tables = Tables::default();
166 let parsed = self.parse_rendered(&tables, rendered);
167 self.lint_parsed(&tables, parsed, fix)
168 }
169
170 pub fn lint_parsed(
171 &self,
172 tables: &Tables,
173 parsed_string: ParsedString,
174 fix: bool,
175 ) -> LintedFile {
176 let mut violations = parsed_string.violations;
177
178 let (patches, ignore_mask, initial_linting_errors) =
179 parsed_string
180 .tree
181 .map_or((Vec::new(), None, Vec::new()), |erased_segment| {
182 let (tree, ignore_mask, initial_linting_errors) = self.lint_fix_parsed(
183 tables,
184 erased_segment,
185 &parsed_string.templated_file,
186 fix,
187 );
188 let patches = tree.iter_patches(&parsed_string.templated_file);
189 (patches, ignore_mask, initial_linting_errors)
190 });
191 violations.extend(initial_linting_errors.into_iter().map_into());
192
193 if let Some(ignore_mask) = &ignore_mask {
195 violations.retain(|violation| !ignore_mask.is_masked(violation, None));
196 }
197
198 let linted_file = LintedFile::new(
200 parsed_string.filename,
201 patches,
202 parsed_string.templated_file,
203 violations,
204 ignore_mask,
205 );
206
207 if let Some(formatter) = &self.formatter {
208 formatter.dispatch_file_violations(&linted_file);
209 }
210
211 linted_file
212 }
213
214 pub fn lint_fix_parsed(
215 &self,
216 tables: &Tables,
217 mut tree: ErasedSegment,
218 templated_file: &TemplatedFile,
219 fix: bool,
220 ) -> (ErasedSegment, Option<IgnoreMask>, Vec<SQLLintError>) {
221 let mut initial_violations = Vec::new();
222 let phases: &[_] = if fix {
223 &[LintPhase::Main, LintPhase::Post]
224 } else {
225 &[LintPhase::Main]
226 };
227 let mut previous_versions: AHashSet<(SmolStr, Vec<SourceFix>)> =
228 [(tree.raw().to_smolstr(), vec![])].into_iter().collect();
229
230 let loop_limit = if fix { 10 } else { 1 };
233 let (ignore_mask, violations): (Option<IgnoreMask>, Vec<SQLBaseError>) = {
235 let disable_noqa = self
236 .config
237 .get("disable_noqa", "core")
238 .as_bool()
239 .unwrap_or(false);
240 if disable_noqa {
241 (None, Vec::new())
242 } else {
243 let (ignore_mask, errors) = IgnoreMask::from_tree(&tree);
244 (Some(ignore_mask), errors)
245 }
246 };
247
248 initial_violations.extend(violations.into_iter().map_into());
249
250 let mut anchor_info = FxHashMap::default();
251
252 for phase in phases {
253 let loop_limit = if *phase == LintPhase::Main {
254 loop_limit
255 } else {
256 2
257 };
258 let mut rules_this_phase = if phases.len() > 1 {
259 &self
260 .rules()
261 .iter()
262 .filter(|rule| rule.lint_phase() == *phase)
263 .cloned()
264 .collect_vec()
265 } else {
266 self.rules()
267 };
268
269 for loop_ in 0..loop_limit {
270 let is_first_linter_pass = *phase == phases[0] && loop_ == 0;
271 let mut changed = false;
272
273 if is_first_linter_pass {
274 rules_this_phase = self.rules();
275 }
276
277 for rule in rules_this_phase {
278 anchor_info.clear();
279
280 if fix && !is_first_linter_pass && !rule.is_fix_compatible() {
285 continue;
286 }
287
288 let result = crate::core::rules::crawl(
289 rule,
290 tables,
291 &self.config.dialect,
292 templated_file,
293 tree.clone(),
294 &self.config,
295 &mut |mut result| {
296 if ignore_mask.as_ref().is_none_or(|ignore_mask| {
297 !ignore_mask.is_masked(&result, rule.into())
298 }) {
299 compute_anchor_edit_info(
300 &mut anchor_info,
301 std::mem::take(&mut result.fixes),
302 );
303
304 if is_first_linter_pass {
305 initial_violations.extend(result.to_linting_error(rule));
306 }
307 }
308 },
309 );
310
311 if let Err(Exception) = result {
312 if is_first_linter_pass {
313 initial_violations.push(
314 SQLLintError::new(
315 "Unexpected exception. Could you open an issue at https://github.com/quarylabs/sqruff",
316 tree.clone(),
317 false,
318 ),
319 );
320 }
321
322 continue;
323 }
324
325 if fix && !anchor_info.is_empty() {
326 let (new_tree, _, _) = tree.apply_fixes(&mut anchor_info);
327
328 let loop_check_tuple =
329 (new_tree.raw().to_smolstr(), new_tree.get_source_fixes());
330
331 if previous_versions.insert(loop_check_tuple) {
332 tree = new_tree;
333 changed = true;
334 continue;
335 }
336 }
337 }
338
339 if fix && !changed {
340 break;
341 }
342 }
343 }
344
345 (tree, ignore_mask, initial_violations)
346 }
347
348 pub fn render_string(
350 &self,
351 sql: &str,
352 filename: String,
353 config: &FluffConfig,
354 ) -> Result<RenderedFile, SQLFluffUserError> {
355 let sql = Self::normalise_newlines(sql);
356
357 if let Some(error) = config.verify_dialect_specified() {
358 return Err(error);
359 }
360
361 let templater_violations = vec![];
362 match self
363 .templater
364 .process(sql.as_ref(), filename.as_str(), config, &self.formatter)
365 {
366 Ok(templated_file) => Ok(RenderedFile {
367 templated_file,
368 templater_violations,
369 filename,
370 source_str: sql.to_string(),
371 }),
372 Err(err) => Err(SQLFluffUserError::new(format!(
373 "Failed to template file {filename} with error {err:?}"
374 ))),
375 }
376 }
377
378 pub fn parse_rendered(&self, tables: &Tables, rendered: RenderedFile) -> ParsedString {
380 let violations = rendered.templater_violations.clone();
381 if !violations.is_empty() {
382 unimplemented!()
383 }
384
385 let mut violations = Vec::new();
386 let tokens = if rendered.templated_file.is_templated() {
387 let (t, lvs) = Self::lex_templated_file(
388 tables,
389 rendered.templated_file.clone(),
390 &self.config.dialect,
391 );
392 if !lvs.is_empty() {
393 unimplemented!("violations.extend(lvs);")
394 }
395 t
396 } else {
397 None
398 };
399
400 let parsed: Option<ErasedSegment>;
401 if let Some(token_list) = tokens {
402 let (p, pvs) = Self::parse_tokens(
403 tables,
404 &token_list,
405 &self.config,
406 Some(rendered.filename.to_string()),
407 self.include_parse_errors,
408 );
409 parsed = p;
410 violations.extend(pvs.into_iter().map_into());
411 } else {
412 parsed = None;
413 };
414
415 ParsedString {
416 tree: parsed,
417 violations,
418 templated_file: rendered.templated_file,
419 filename: rendered.filename,
420 source_str: rendered.source_str,
421 }
422 }
423
424 fn parse_tokens(
425 tables: &Tables,
426 tokens: &[ErasedSegment],
427 config: &FluffConfig,
428 filename: Option<String>,
429 include_parse_errors: bool,
430 ) -> (Option<ErasedSegment>, Vec<SQLParseError>) {
431 let parser: Parser = config.into();
432 let mut violations: Vec<SQLParseError> = Vec::new();
433
434 let parsed = match parser.parse(tables, tokens, filename) {
435 Ok(parsed) => parsed,
436 Err(error) => {
437 violations.push(error);
438 None
439 }
440 };
441
442 if include_parse_errors && let Some(parsed) = &parsed {
443 let unparsables = parsed.recursive_crawl(
444 &SyntaxSet::single(SyntaxKind::Unparsable),
445 true,
446 &SyntaxSet::EMPTY,
447 true,
448 );
449
450 violations.extend(unparsables.into_iter().map(|segment| SQLParseError {
451 description: "Unparsable section".into(),
452 segment: segment.into(),
453 }));
454 };
455
456 (parsed, violations)
457 }
458
459 pub fn lex_templated_file(
461 tables: &Tables,
462 templated_file: TemplatedFile,
463 dialect: &Dialect,
464 ) -> (Option<Vec<ErasedSegment>>, Vec<SQLLexError>) {
465 let mut violations: Vec<SQLLexError> = vec![];
466 log::debug!("LEXING RAW ({})", templated_file.name());
467 let lexer = dialect.lexer();
469 let (tokens, lex_vs) = lexer.lex(tables, templated_file);
471
472 violations.extend(lex_vs);
473
474 if tokens.is_empty() {
475 return (None, violations);
476 }
477
478 (tokens.into(), violations)
479 }
480
481 fn normalise_newlines(string: &str) -> Cow<'_, str> {
483 lazy_regex::regex!("\r\n|\r").replace_all(string, "\n")
484 }
485
486 fn paths_from_path(
494 &self,
495 path: PathBuf,
496 ignore_file_name: Option<String>,
497 ignore_non_existent_files: Option<bool>,
498 ignore_files: Option<bool>,
499 working_path: Option<String>,
500 ) -> Vec<String> {
501 let ignore_file_name = ignore_file_name.unwrap_or_else(|| String::from(".sqlfluffignore"));
502 let ignore_non_existent_files = ignore_non_existent_files.unwrap_or(false);
503 let ignore_files = ignore_files.unwrap_or(true);
504 let _working_path =
505 working_path.unwrap_or_else(|| std::env::current_dir().unwrap().display().to_string());
506
507 let Ok(metadata) = std::fs::metadata(&path) else {
508 if ignore_non_existent_files {
509 return Vec::new();
510 } else {
511 panic!("Specified path does not exist. Check it/they exist(s): {path:?}");
512 }
513 };
514
515 let is_exact_file = metadata.is_file();
518
519 let mut path_walk = if is_exact_file {
520 let path = Path::new(&path);
521 let dirpath = path.parent().unwrap().to_str().unwrap().to_string();
522 let files = vec![path.file_name().unwrap().to_str().unwrap().to_string()];
523 vec![(dirpath, None, files)]
524 } else {
525 WalkDir::new(&path)
526 .into_iter()
527 .filter_map(Result::ok) .map(|entry| {
529 let dirpath = entry.path().parent().unwrap().to_str().unwrap().to_string();
530 let files = vec![entry.file_name().to_str().unwrap().to_string()];
531 (dirpath, None, files)
532 })
533 .collect_vec()
534 };
535
536 let ignore_file_paths: Vec<String> = Vec::new();
541
542 let path_walk_ignore_file: Vec<(String, Option<()>, Vec<String>)> = ignore_file_paths
545 .iter()
546 .map(|ignore_file_path| {
547 let ignore_file_path = Path::new(ignore_file_path);
548
549 let dir_name = ignore_file_path
551 .parent()
552 .unwrap()
553 .to_str()
554 .unwrap()
555 .to_string();
556
557 let file_name = vec![
560 ignore_file_path
561 .file_name()
562 .unwrap()
563 .to_str()
564 .unwrap()
565 .to_string(),
566 ];
567
568 (dir_name, None, file_name)
569 })
570 .collect();
571
572 path_walk.extend(path_walk_ignore_file);
573
574 let mut buffer = Vec::new();
575 let mut ignores = AHashMap::new();
576 let sql_file_exts = self.config.sql_file_exts();
577
578 for (dirpath, _, filenames) in path_walk {
579 for fname in filenames {
580 let fpath = Path::new(&dirpath).join(&fname);
581
582 if ignore_files && fname == ignore_file_name {
584 let file = File::open(&fpath).unwrap();
585 let lines = BufReader::new(file).lines();
586 let spec = lines.map_while(Result::ok); ignores.insert(dirpath.clone(), spec.collect::<Vec<String>>());
588
589 continue;
591 }
592
593 for ext in sql_file_exts {
598 if fname.to_lowercase().ends_with(ext) {
600 buffer.push(fpath.clone());
601 }
602 }
603 }
604 }
605
606 let mut filtered_buffer = AHashSet::new();
607
608 for fpath in buffer {
609 let npath = helpers::normalize(&fpath).to_str().unwrap().to_string();
610 filtered_buffer.insert(npath);
611 }
612
613 let mut files = filtered_buffer.into_iter().collect_vec();
614 files.sort();
615 files
616 }
617
618 pub fn config(&self) -> &FluffConfig {
619 &self.config
620 }
621
622 pub fn config_mut(&mut self) -> &mut FluffConfig {
623 self.rules = OnceLock::new();
624 &mut self.config
625 }
626
627 pub fn rules(&self) -> &[ErasedRule] {
628 self.rules.get_or_init(|| self.get_rulepack().rules)
629 }
630
631 pub fn formatter(&self) -> Option<&Arc<dyn Formatter>> {
632 self.formatter.as_ref()
633 }
634
635 pub fn formatter_mut(&mut self) -> Option<&mut Arc<dyn Formatter>> {
636 self.formatter.as_mut()
637 }
638}
639
640#[cfg(test)]
641mod tests {
642 use sqruff_lib_core::parser::segments::Tables;
643
644 use crate::core::config::FluffConfig;
645 use crate::core::linter::core::Linter;
646
647 fn normalise_paths(paths: Vec<String>) -> Vec<String> {
648 paths
649 .into_iter()
650 .map(|path| path.replace(['/', '\\'], "."))
651 .collect()
652 }
653
654 #[test]
655 fn test_linter_path_from_paths_dir() {
656 let lntr = Linter::new(
658 FluffConfig::new(<_>::default(), None, None),
659 None,
660 None,
661 false,
662 ); let paths = lntr.paths_from_path("test/fixtures/lexer".into(), None, None, None, None);
664 let expected = vec![
665 "test.fixtures.lexer.basic.sql",
666 "test.fixtures.lexer.block_comment.sql",
667 "test.fixtures.lexer.inline_comment.sql",
668 ];
669 assert_eq!(normalise_paths(paths), expected);
670 }
671
672 #[test]
673 fn test_linter_path_from_paths_default() {
674 let lntr = Linter::new(
676 FluffConfig::new(<_>::default(), None, None),
677 None,
678 None,
679 false,
680 ); let paths = normalise_paths(lntr.paths_from_path(
682 "test/fixtures/linter".into(),
683 None,
684 None,
685 None,
686 None,
687 ));
688 assert!(paths.contains(&"test.fixtures.linter.passing.sql".to_string()));
689 assert!(paths.contains(&"test.fixtures.linter.passing_cap_extension.SQL".to_string()));
690 assert!(!paths.contains(&"test.fixtures.linter.discovery_file.txt".to_string()));
691 }
692
693 #[test]
694 fn test_linter_path_from_paths_exts() {
695 let config =
698 FluffConfig::new(<_>::default(), None, None).with_sql_file_exts(vec![".txt".into()]);
699 let lntr = Linter::new(config, None, None, false); let paths = lntr.paths_from_path("test/fixtures/linter".into(), None, None, None, None);
702
703 let normalized_paths = normalise_paths(paths);
705
706 assert!(!normalized_paths.contains(&"test.fixtures.linter.passing.sql".into()));
708 assert!(
709 !normalized_paths.contains(&"test.fixtures.linter.passing_cap_extension.SQL".into())
710 );
711 assert!(normalized_paths.contains(&"test.fixtures.linter.discovery_file.txt".into()));
712 }
713
714 #[test]
715 fn test_linter_path_from_paths_file() {
716 let lntr = Linter::new(
717 FluffConfig::new(<_>::default(), None, None),
718 None,
719 None,
720 false,
721 ); let paths = lntr.paths_from_path(
723 "test/fixtures/linter/indentation_errors.sql".into(),
724 None,
725 None,
726 None,
727 None,
728 );
729
730 assert_eq!(
731 normalise_paths(paths),
732 &["test.fixtures.linter.indentation_errors.sql"]
733 );
734 }
735
736 #[test]
754 fn test_linter_empty_file() {
755 let linter = Linter::new(
756 FluffConfig::new(<_>::default(), None, None),
757 None,
758 None,
759 false,
760 );
761 let tables = Tables::default();
762 let parsed = linter.parse_string(&tables, "", None).unwrap();
763
764 assert!(parsed.violations.is_empty());
765 }
766
767 #[test]
773 #[ignore = "The implementation of Lexer::lex_templated_file is required"]
774 fn test_advanced_api_methods() {
775 let sql = "
776 WITH cte AS (
777 SELECT * FROM tab_a
778 )
779 SELECT
780 cte.col_a,
781 tab_b.col_b
782 FROM cte
783 INNER JOIN tab_b;
784 "
785 .to_string();
786
787 let linter = Linter::new(
788 FluffConfig::new(<_>::default(), None, None),
789 None,
790 None,
791 false,
792 );
793 let tables = Tables::default();
794 let _parsed = linter.parse_string(&tables, &sql, None).unwrap();
795 }
796
797 #[test]
798 fn test_normalise_newlines() {
799 let in_str = "SELECT\r\n foo\n FROM \r \n\r bar;";
800 let out_str = "SELECT\n foo\n FROM \n \n\n bar;";
801
802 assert_eq!(Linter::normalise_newlines(in_str), out_str);
803 }
804}