1use std::borrow::Cow;
2use std::fs::File;
3use std::io::{BufRead, BufReader};
4use std::path::{Path, PathBuf};
5use std::sync::{Arc, OnceLock};
6
7use crate::Formatter;
8use crate::core::config::FluffConfig;
9use crate::core::linter::common::{ParsedString, RenderedFile};
10use crate::core::linter::linted_file::LintedFile;
11use crate::core::linter::linting_result::LintingResult;
12use crate::core::rules::noqa::IgnoreMask;
13use crate::core::rules::{ErasedRule, Exception, LintPhase, RulePack};
14use crate::rules::get_ruleset;
15use crate::templaters::raw::RawTemplater;
16use crate::templaters::{TEMPLATERS, Templater};
17use ahash::{AHashMap, AHashSet};
18use itertools::Itertools;
19use rayon::iter::{IndexedParallelIterator, IntoParallelRefIterator as _, ParallelIterator as _};
20use rustc_hash::FxHashMap;
21use smol_str::{SmolStr, ToSmolStr};
22use sqruff_lib_core::dialects::Dialect;
23use sqruff_lib_core::dialects::syntax::{SyntaxKind, SyntaxSet};
24use sqruff_lib_core::errors::{
25 SQLBaseError, SQLFluffUserError, SQLLexError, SQLLintError, SQLParseError,
26};
27use sqruff_lib_core::helpers;
28use sqruff_lib_core::linter::compute_anchor_edit_info;
29use sqruff_lib_core::parser::Parser;
30use sqruff_lib_core::parser::segments::fix::SourceFix;
31use sqruff_lib_core::parser::segments::{ErasedSegment, Tables};
32use sqruff_lib_core::templaters::TemplatedFile;
33use walkdir::WalkDir;
34
35pub struct Linter {
36 config: FluffConfig,
37 formatter: Option<Arc<dyn Formatter>>,
38 templater: &'static dyn Templater,
39 rules: OnceLock<Vec<ErasedRule>>,
40
41 include_parse_errors: bool,
43}
44
45impl Linter {
46 pub fn new(
47 config: FluffConfig,
48 formatter: Option<Arc<dyn Formatter>>,
49 templater: Option<&'static dyn Templater>,
50 include_parse_errors: bool,
51 ) -> Linter {
52 let templater: &'static dyn Templater = match templater {
53 Some(templater) => templater,
54 None => Linter::get_templater(&config),
55 };
56 Linter {
57 config,
58 formatter,
59 templater,
60 rules: OnceLock::new(),
61 include_parse_errors,
62 }
63 }
64
65 pub fn get_templater(config: &FluffConfig) -> &'static dyn Templater {
66 let templater_name = config.get("templater", "core").as_string();
67 match templater_name {
68 Some(name) => match TEMPLATERS.into_iter().find(|t| t.name() == name) {
69 Some(t) => t,
70 None => panic!("Unknown templater: {name}"),
71 },
72 None => &RawTemplater,
73 }
74 }
75
76 pub fn lint_string_wrapped(&mut self, sql: &str, fix: bool) -> LintedFile {
78 let filename = "<string input>".to_owned();
79 self.lint_string(sql, Some(filename), fix)
80 }
81
82 pub fn parse_string(
84 &self,
85 tables: &Tables,
86 sql: &str,
87 filename: Option<String>,
88 ) -> Result<ParsedString, SQLFluffUserError> {
89 let f_name = filename.unwrap_or_else(|| "<string>".to_string());
90
91 self.config.process_raw_file_for_config(sql);
93 let rendered = self.render_string(sql, f_name.clone(), &self.config)?;
94
95 Ok(self.parse_rendered(tables, rendered))
96 }
97
98 pub fn lint_string(&self, sql: &str, filename: Option<String>, fix: bool) -> LintedFile {
100 let tables = Tables::default();
101 let parsed = self.parse_string(&tables, sql, filename).unwrap();
102
103 self.lint_parsed(&tables, parsed, fix)
105 }
106
107 pub fn lint_paths(
110 &mut self,
111 mut paths: Vec<PathBuf>,
112 fix: bool,
113 ignorer: &(dyn Fn(&Path) -> bool + Send + Sync),
114 ) -> LintingResult {
115 if paths.is_empty() {
116 paths.push(std::env::current_dir().unwrap());
117 }
118
119 let mut expanded_paths = Vec::new();
120
121 for path in paths {
122 if path.is_file() {
123 expanded_paths.push(path.to_string_lossy().to_string());
124 } else {
125 expanded_paths.extend(self.paths_from_path(
126 path,
127 None,
128 None,
129 None,
130 None,
131 Some(ignorer),
132 ));
133 };
134 }
135
136 let paths: Vec<String> = expanded_paths
137 .into_iter()
138 .filter(|path| {
139 let should_ignore = ignorer(Path::new(path));
140 if should_ignore {
141 log::debug!(
142 "Filtering out ignored file '{}' from final processing list",
143 path
144 );
145 }
146 !should_ignore
147 })
148 .collect_vec();
149
150 let mut files = Vec::with_capacity(paths.len());
151
152 if self.templater.can_process_in_parallel() {
153 paths
154 .par_iter()
155 .map(|path| {
156 let rendered = self.render_file(path.clone());
157 self.lint_rendered(rendered, fix)
158 })
159 .collect_into_vec(&mut files);
160 } else {
161 files.extend(paths.iter().map(|path| {
162 let rendered = self.render_file(path.clone());
163 self.lint_rendered(rendered, fix)
164 }));
165 };
166
167 LintingResult::new(files)
168 }
169
170 pub fn get_rulepack(&self) -> RulePack {
171 let rs = get_ruleset();
172 rs.get_rulepack(&self.config)
173 }
174
175 pub fn render_file(&self, fname: String) -> RenderedFile {
176 let in_str = std::fs::read_to_string(&fname).unwrap();
177 self.render_string(&in_str, fname, &self.config).unwrap()
178 }
179
180 pub fn lint_rendered(&self, rendered: RenderedFile, fix: bool) -> LintedFile {
181 let tables = Tables::default();
182 let parsed = self.parse_rendered(&tables, rendered);
183 self.lint_parsed(&tables, parsed, fix)
184 }
185
186 pub fn lint_parsed(
187 &self,
188 tables: &Tables,
189 parsed_string: ParsedString,
190 fix: bool,
191 ) -> LintedFile {
192 let mut violations = parsed_string.violations;
193
194 let (patches, ignore_mask, initial_linting_errors) =
195 parsed_string
196 .tree
197 .map_or((Vec::new(), None, Vec::new()), |erased_segment| {
198 let (tree, ignore_mask, initial_linting_errors) = self.lint_fix_parsed(
199 tables,
200 erased_segment,
201 &parsed_string.templated_file,
202 fix,
203 );
204 let patches = tree.iter_patches(&parsed_string.templated_file);
205 (patches, ignore_mask, initial_linting_errors)
206 });
207 violations.extend(initial_linting_errors.into_iter().map_into());
208
209 if let Some(ignore_mask) = &ignore_mask {
211 violations.retain(|violation| !ignore_mask.is_masked(violation, None));
212 }
213
214 let linted_file = LintedFile::new(
216 parsed_string.filename,
217 patches,
218 parsed_string.templated_file,
219 violations,
220 ignore_mask,
221 );
222
223 if let Some(formatter) = &self.formatter {
224 formatter.dispatch_file_violations(&linted_file);
225 }
226
227 linted_file
228 }
229
230 pub fn lint_fix_parsed(
231 &self,
232 tables: &Tables,
233 mut tree: ErasedSegment,
234 templated_file: &TemplatedFile,
235 fix: bool,
236 ) -> (ErasedSegment, Option<IgnoreMask>, Vec<SQLLintError>) {
237 let mut initial_violations = Vec::new();
238 let phases: &[_] = if fix {
239 &[LintPhase::Main, LintPhase::Post]
240 } else {
241 &[LintPhase::Main]
242 };
243 let mut previous_versions: AHashSet<(SmolStr, Vec<SourceFix>)> =
244 [(tree.raw().to_smolstr(), vec![])].into_iter().collect();
245
246 let loop_limit = if fix { 10 } else { 1 };
249 let (ignore_mask, violations): (Option<IgnoreMask>, Vec<SQLBaseError>) = {
251 let disable_noqa = self
252 .config
253 .get("disable_noqa", "core")
254 .as_bool()
255 .unwrap_or(false);
256 if disable_noqa {
257 (None, Vec::new())
258 } else {
259 let (ignore_mask, errors) = IgnoreMask::from_tree(&tree);
260 (Some(ignore_mask), errors)
261 }
262 };
263
264 initial_violations.extend(violations.into_iter().map_into());
265
266 let mut anchor_info = FxHashMap::default();
267
268 for phase in phases {
269 let loop_limit = if *phase == LintPhase::Main {
270 loop_limit
271 } else {
272 2
273 };
274 let mut rules_this_phase = if phases.len() > 1 {
275 &self
276 .rules()
277 .iter()
278 .filter(|rule| rule.lint_phase() == *phase)
279 .cloned()
280 .collect_vec()
281 } else {
282 self.rules()
283 };
284
285 for loop_ in 0..loop_limit {
286 let is_first_linter_pass = *phase == phases[0] && loop_ == 0;
287 let mut changed = false;
288
289 if is_first_linter_pass {
290 rules_this_phase = self.rules();
291 }
292
293 for rule in rules_this_phase {
294 anchor_info.clear();
295
296 if fix && !is_first_linter_pass && !rule.is_fix_compatible() {
301 continue;
302 }
303
304 let result = crate::core::rules::crawl(
305 rule,
306 tables,
307 &self.config.dialect,
308 templated_file,
309 tree.clone(),
310 &self.config,
311 &mut |mut result| {
312 if ignore_mask.as_ref().is_none_or(|ignore_mask| {
313 !ignore_mask.is_masked(&result, rule.into())
314 }) {
315 compute_anchor_edit_info(
316 &mut anchor_info,
317 std::mem::take(&mut result.fixes),
318 );
319
320 if is_first_linter_pass {
321 initial_violations.extend(result.to_linting_error(rule));
322 }
323 }
324 },
325 );
326
327 if let Err(Exception) = result {
328 if is_first_linter_pass {
329 initial_violations.push(
330 SQLLintError::new(
331 "Unexpected exception. Could you open an issue at https://github.com/quarylabs/sqruff",
332 tree.clone(),
333 false,
334 ),
335 );
336 }
337
338 continue;
339 }
340
341 if fix && !anchor_info.is_empty() {
342 let (new_tree, _, _) = tree.apply_fixes(&mut anchor_info);
343
344 let loop_check_tuple =
345 (new_tree.raw().to_smolstr(), new_tree.get_source_fixes());
346
347 if previous_versions.insert(loop_check_tuple) {
348 tree = new_tree;
349 changed = true;
350 continue;
351 }
352 }
353 }
354
355 if fix && !changed {
356 break;
357 }
358 }
359 }
360
361 (tree, ignore_mask, initial_violations)
362 }
363
364 pub fn render_string(
366 &self,
367 sql: &str,
368 filename: String,
369 config: &FluffConfig,
370 ) -> Result<RenderedFile, SQLFluffUserError> {
371 let sql = Self::normalise_newlines(sql);
372
373 if let Some(error) = config.verify_dialect_specified() {
374 return Err(error);
375 }
376
377 let templater_violations = vec![];
378 match self
379 .templater
380 .process(sql.as_ref(), filename.as_str(), config, &self.formatter)
381 {
382 Ok(templated_file) => Ok(RenderedFile {
383 templated_file,
384 templater_violations,
385 filename,
386 source_str: sql.to_string(),
387 }),
388 Err(err) => Err(SQLFluffUserError::new(format!(
389 "Failed to template file {filename} with error {err:?}"
390 ))),
391 }
392 }
393
394 pub fn parse_rendered(&self, tables: &Tables, rendered: RenderedFile) -> ParsedString {
396 let violations = rendered.templater_violations.clone();
397 if !violations.is_empty() {
398 unimplemented!()
399 }
400
401 let mut violations = Vec::new();
402 let tokens = if rendered.templated_file.is_templated() {
403 let (t, lvs) = Self::lex_templated_file(
404 tables,
405 rendered.templated_file.clone(),
406 &self.config.dialect,
407 );
408 if !lvs.is_empty() {
409 unimplemented!("violations.extend(lvs);")
410 }
411 t
412 } else {
413 None
414 };
415
416 let parsed: Option<ErasedSegment>;
417 if let Some(token_list) = tokens {
418 let (p, pvs) =
419 Self::parse_tokens(tables, &token_list, &self.config, self.include_parse_errors);
420 parsed = p;
421 violations.extend(pvs.into_iter().map_into());
422 } else {
423 parsed = None;
424 };
425
426 ParsedString {
427 tree: parsed,
428 violations,
429 templated_file: rendered.templated_file,
430 filename: rendered.filename,
431 source_str: rendered.source_str,
432 }
433 }
434
435 fn parse_tokens(
436 tables: &Tables,
437 tokens: &[ErasedSegment],
438 config: &FluffConfig,
439 include_parse_errors: bool,
440 ) -> (Option<ErasedSegment>, Vec<SQLParseError>) {
441 let parser: Parser = config.into();
442 let mut violations: Vec<SQLParseError> = Vec::new();
443
444 let parsed = match parser.parse(tables, tokens) {
445 Ok(parsed) => parsed,
446 Err(error) => {
447 violations.push(error);
448 None
449 }
450 };
451
452 if include_parse_errors && let Some(parsed) = &parsed {
453 let unparsables = parsed.recursive_crawl(
454 &SyntaxSet::single(SyntaxKind::Unparsable),
455 true,
456 &SyntaxSet::EMPTY,
457 true,
458 );
459
460 violations.extend(unparsables.into_iter().map(|segment| SQLParseError {
461 description: "Unparsable section".into(),
462 segment: segment.into(),
463 }));
464 };
465
466 (parsed, violations)
467 }
468
469 pub fn lex_templated_file(
471 tables: &Tables,
472 templated_file: TemplatedFile,
473 dialect: &Dialect,
474 ) -> (Option<Vec<ErasedSegment>>, Vec<SQLLexError>) {
475 let mut violations: Vec<SQLLexError> = vec![];
476 log::debug!("LEXING RAW ({})", templated_file.name());
477 let lexer = dialect.lexer();
479 let (tokens, lex_vs) = lexer.lex(tables, templated_file);
481
482 violations.extend(lex_vs);
483
484 if tokens.is_empty() {
485 return (None, violations);
486 }
487
488 (tokens.into(), violations)
489 }
490
491 fn normalise_newlines(string: &str) -> Cow<'_, str> {
493 lazy_regex::regex!("\r\n|\r").replace_all(string, "\n")
494 }
495
496 fn paths_from_path(
504 &self,
505 path: PathBuf,
506 ignore_file_name: Option<String>,
507 ignore_non_existent_files: Option<bool>,
508 ignore_files: Option<bool>,
509 working_path: Option<String>,
510 ignorer: Option<&(dyn Fn(&Path) -> bool + Send + Sync)>,
511 ) -> Vec<String> {
512 let ignore_file_name = ignore_file_name.unwrap_or_else(|| String::from(".sqlfluffignore"));
513 let ignore_non_existent_files = ignore_non_existent_files.unwrap_or(false);
514 let ignore_files = ignore_files.unwrap_or(true);
515 let _working_path =
516 working_path.unwrap_or_else(|| std::env::current_dir().unwrap().display().to_string());
517
518 let Ok(metadata) = std::fs::metadata(&path) else {
519 if ignore_non_existent_files {
520 return Vec::new();
521 } else {
522 panic!("Specified path does not exist. Check it/they exist(s): {path:?}");
523 }
524 };
525
526 let is_exact_file = metadata.is_file();
529
530 let mut path_walk = if is_exact_file {
531 let path = Path::new(&path);
532 let dirpath = path.parent().unwrap().to_str().unwrap().to_string();
533 let files = vec![path.file_name().unwrap().to_str().unwrap().to_string()];
534 vec![(dirpath, None, files)]
535 } else {
536 let walkdir = WalkDir::new(&path);
537 let entries: Vec<_> = if let Some(ignorer) = ignorer {
538 walkdir
540 .into_iter()
541 .filter_entry(|entry| {
542 let should_ignore = ignorer(entry.path());
543 if should_ignore {
544 let path_type = if entry.file_type().is_dir() {
545 "directory"
546 } else {
547 "file"
548 };
549 log::debug!(
550 "Skipping {} '{}' during file discovery traversal",
551 path_type,
552 entry.path().display()
553 );
554 }
555 !should_ignore
556 })
557 .filter_map(Result::ok)
558 .collect()
559 } else {
560 walkdir.into_iter().filter_map(Result::ok).collect()
562 };
563
564 let mut dir_files: AHashMap<String, Vec<String>> = AHashMap::new();
566
567 for entry in entries {
568 if entry.file_type().is_file() {
569 let dirpath = entry.path().parent().unwrap().to_str().unwrap().to_string();
570 let filename = entry.file_name().to_str().unwrap().to_string();
571 dir_files.entry(dirpath).or_default().push(filename);
572 }
573 }
574
575 dir_files
576 .into_iter()
577 .map(|(dirpath, files)| (dirpath, None, files))
578 .collect_vec()
579 };
580
581 let ignore_file_paths: Vec<String> = Vec::new();
586
587 let path_walk_ignore_file: Vec<(String, Option<()>, Vec<String>)> = ignore_file_paths
590 .iter()
591 .map(|ignore_file_path| {
592 let ignore_file_path = Path::new(ignore_file_path);
593
594 let dir_name = ignore_file_path
596 .parent()
597 .unwrap()
598 .to_str()
599 .unwrap()
600 .to_string();
601
602 let file_name = vec![
605 ignore_file_path
606 .file_name()
607 .unwrap()
608 .to_str()
609 .unwrap()
610 .to_string(),
611 ];
612
613 (dir_name, None, file_name)
614 })
615 .collect();
616
617 path_walk.extend(path_walk_ignore_file);
618
619 let mut buffer = Vec::new();
620 let mut ignores = AHashMap::new();
621 let sql_file_exts = self.config.sql_file_exts();
622
623 for (dirpath, _, filenames) in path_walk {
624 for fname in filenames {
625 let fpath = Path::new(&dirpath).join(&fname);
626
627 if ignore_files && fname == ignore_file_name {
629 let file = File::open(&fpath).unwrap();
630 let lines = BufReader::new(file).lines();
631 let spec = lines.map_while(Result::ok); ignores.insert(dirpath.clone(), spec.collect::<Vec<String>>());
633
634 continue;
636 }
637
638 for ext in sql_file_exts {
643 if fname.to_lowercase().ends_with(ext) {
645 buffer.push(fpath.clone());
646 }
647 }
648 }
649 }
650
651 let mut filtered_buffer = AHashSet::new();
652
653 for fpath in buffer {
654 let npath = helpers::normalize(&fpath).to_str().unwrap().to_string();
655 filtered_buffer.insert(npath);
656 }
657
658 let mut files = filtered_buffer.into_iter().collect_vec();
659 files.sort();
660 files
661 }
662
663 pub fn config(&self) -> &FluffConfig {
664 &self.config
665 }
666
667 pub fn config_mut(&mut self) -> &mut FluffConfig {
668 self.rules = OnceLock::new();
669 &mut self.config
670 }
671
672 pub fn rules(&self) -> &[ErasedRule] {
673 self.rules.get_or_init(|| self.get_rulepack().rules)
674 }
675
676 pub fn formatter(&self) -> Option<&Arc<dyn Formatter>> {
677 self.formatter.as_ref()
678 }
679
680 pub fn formatter_mut(&mut self) -> Option<&mut Arc<dyn Formatter>> {
681 self.formatter.as_mut()
682 }
683}
684
685#[cfg(test)]
686mod tests {
687 use sqruff_lib_core::parser::segments::Tables;
688
689 use crate::core::config::FluffConfig;
690 use crate::core::linter::core::Linter;
691
692 fn normalise_paths(paths: Vec<String>) -> Vec<String> {
693 paths
694 .into_iter()
695 .map(|path| path.replace(['/', '\\'], "."))
696 .collect()
697 }
698
699 #[test]
700 fn test_linter_path_from_paths_dir() {
701 let lntr = Linter::new(
703 FluffConfig::new(<_>::default(), None, None),
704 None,
705 None,
706 false,
707 ); let paths =
709 lntr.paths_from_path("test/fixtures/lexer".into(), None, None, None, None, None);
710 let expected = vec![
711 "test.fixtures.lexer.basic.sql",
712 "test.fixtures.lexer.block_comment.sql",
713 "test.fixtures.lexer.inline_comment.sql",
714 ];
715 assert_eq!(normalise_paths(paths), expected);
716 }
717
718 #[test]
719 fn test_linter_path_from_paths_default() {
720 let lntr = Linter::new(
722 FluffConfig::new(<_>::default(), None, None),
723 None,
724 None,
725 false,
726 ); let paths = normalise_paths(lntr.paths_from_path(
728 "test/fixtures/linter".into(),
729 None,
730 None,
731 None,
732 None,
733 None,
734 ));
735 assert!(paths.contains(&"test.fixtures.linter.passing.sql".to_string()));
736 assert!(paths.contains(&"test.fixtures.linter.passing_cap_extension.SQL".to_string()));
737 assert!(!paths.contains(&"test.fixtures.linter.discovery_file.txt".to_string()));
738 }
739
740 #[test]
741 fn test_linter_path_from_paths_exts() {
742 let config =
745 FluffConfig::new(<_>::default(), None, None).with_sql_file_exts(vec![".txt".into()]);
746 let lntr = Linter::new(config, None, None, false); let paths =
749 lntr.paths_from_path("test/fixtures/linter".into(), None, None, None, None, None);
750
751 let normalized_paths = normalise_paths(paths);
753
754 assert!(!normalized_paths.contains(&"test.fixtures.linter.passing.sql".into()));
756 assert!(
757 !normalized_paths.contains(&"test.fixtures.linter.passing_cap_extension.SQL".into())
758 );
759 assert!(normalized_paths.contains(&"test.fixtures.linter.discovery_file.txt".into()));
760 }
761
762 #[test]
763 fn test_linter_path_from_paths_file() {
764 let lntr = Linter::new(
765 FluffConfig::new(<_>::default(), None, None),
766 None,
767 None,
768 false,
769 ); let paths = lntr.paths_from_path(
771 "test/fixtures/linter/indentation_errors.sql".into(),
772 None,
773 None,
774 None,
775 None,
776 None,
777 );
778
779 assert_eq!(
780 normalise_paths(paths),
781 &["test.fixtures.linter.indentation_errors.sql"]
782 );
783 }
784
785 #[test]
803 fn test_linter_empty_file() {
804 let linter = Linter::new(
805 FluffConfig::new(<_>::default(), None, None),
806 None,
807 None,
808 false,
809 );
810 let tables = Tables::default();
811 let parsed = linter.parse_string(&tables, "", None).unwrap();
812
813 assert!(parsed.violations.is_empty());
814 }
815
816 #[test]
822 #[ignore = "The implementation of Lexer::lex_templated_file is required"]
823 fn test_advanced_api_methods() {
824 let sql = "
825 WITH cte AS (
826 SELECT * FROM tab_a
827 )
828 SELECT
829 cte.col_a,
830 tab_b.col_b
831 FROM cte
832 INNER JOIN tab_b;
833 "
834 .to_string();
835
836 let linter = Linter::new(
837 FluffConfig::new(<_>::default(), None, None),
838 None,
839 None,
840 false,
841 );
842 let tables = Tables::default();
843 let _parsed = linter.parse_string(&tables, &sql, None).unwrap();
844 }
845
846 #[test]
847 fn test_normalise_newlines() {
848 let in_str = "SELECT\r\n foo\n FROM \r \n\r bar;";
849 let out_str = "SELECT\n foo\n FROM \n \n\n bar;";
850
851 assert_eq!(Linter::normalise_newlines(in_str), out_str);
852 }
853}