1use std::collections::HashSet;
7
8use crate::generated::NormalizationStrategy;
9use crate::linter::config::LintConfig;
10use crate::linter::rule::{LintContext, LintRule};
11use crate::types::{issue_codes, Dialect, Issue, IssueAutofixApplicability, IssuePatchEdit};
12use regex::Regex;
13use sqlparser::ast::Statement;
14
15use super::identifier_candidates_helpers::{
16 collect_identifier_candidates, IdentifierCandidate, IdentifierKind, IdentifierPolicy,
17};
18
19pub struct ReferencesQuoting {
20 prefer_quoted_identifiers: bool,
21 prefer_quoted_keywords: bool,
22 case_sensitive_override: Option<bool>,
23 quoted_policy: IdentifierPolicy,
24 unquoted_policy: IdentifierPolicy,
25 ignore_words: HashSet<String>,
26 ignore_words_regex: Option<Regex>,
27}
28
29impl Default for ReferencesQuoting {
30 fn default() -> Self {
31 Self {
32 prefer_quoted_identifiers: false,
33 prefer_quoted_keywords: false,
34 case_sensitive_override: None,
35 quoted_policy: IdentifierPolicy::All,
36 unquoted_policy: IdentifierPolicy::All,
37 ignore_words: HashSet::new(),
38 ignore_words_regex: None,
39 }
40 }
41}
42
43impl ReferencesQuoting {
44 pub fn from_config(config: &LintConfig) -> Self {
45 Self {
46 prefer_quoted_identifiers: config
47 .rule_option_bool(issue_codes::LINT_RF_006, "prefer_quoted_identifiers")
48 .unwrap_or(false),
49 prefer_quoted_keywords: config
50 .rule_option_bool(issue_codes::LINT_RF_006, "prefer_quoted_keywords")
51 .unwrap_or(false),
52 case_sensitive_override: config
53 .rule_option_bool(issue_codes::LINT_RF_006, "case_sensitive"),
54 quoted_policy: IdentifierPolicy::from_config(
55 config,
56 issue_codes::LINT_RF_006,
57 "quoted_identifiers_policy",
58 "all",
59 ),
60 unquoted_policy: IdentifierPolicy::from_config(
61 config,
62 issue_codes::LINT_RF_006,
63 "unquoted_identifiers_policy",
64 "all",
65 ),
66 ignore_words: configured_ignore_words(config)
67 .into_iter()
68 .map(|word| normalize_token(&word))
69 .collect(),
70 ignore_words_regex: config
71 .rule_option_str(issue_codes::LINT_RF_006, "ignore_words_regex")
72 .filter(|pattern| !pattern.trim().is_empty())
73 .and_then(|pattern| Regex::new(pattern).ok()),
74 }
75 }
76
77 fn is_case_aware(&self, dialect: Dialect) -> bool {
79 match self.case_sensitive_override {
80 Some(false) => false,
81 Some(true) => true,
82 None => {
83 matches!(
87 dialect.normalization_strategy(),
88 NormalizationStrategy::Lowercase | NormalizationStrategy::Uppercase
89 )
90 }
91 }
92 }
93}
94
95impl LintRule for ReferencesQuoting {
96 fn code(&self) -> &'static str {
97 issue_codes::LINT_RF_006
98 }
99
100 fn name(&self) -> &'static str {
101 "References quoting"
102 }
103
104 fn description(&self) -> &'static str {
105 "Unnecessary quoted identifier."
106 }
107
108 fn check(&self, statement: &Statement, ctx: &LintContext) -> Vec<Issue> {
109 let dialect = ctx.dialect();
110
111 let ast_has_violation = collect_identifier_candidates(statement)
112 .into_iter()
113 .any(|candidate| candidate_triggers_rule(&candidate, self, dialect));
114
115 let mut autofix_edits_raw =
116 unnecessary_quoted_identifier_edits(ctx.statement_sql(), self, dialect);
117 if is_spark_insert_overwrite_directory_options(ctx.statement_sql(), dialect) {
118 autofix_edits_raw.extend(spark_options_unquote_key_edits(ctx.statement_sql(), self));
119 autofix_edits_raw.sort_by_key(|edit| (edit.start, edit.end));
120 autofix_edits_raw.dedup_by(|left, right| {
121 left.start == right.start
122 && left.end == right.end
123 && left.replacement == right.replacement
124 });
125 }
126
127 let fallback_has_violation =
128 is_spark_insert_overwrite_directory_options(ctx.statement_sql(), dialect)
129 && !autofix_edits_raw.is_empty();
130
131 let has_violation = ast_has_violation || fallback_has_violation;
132
133 if !has_violation {
134 return Vec::new();
135 }
136
137 let message = if self.prefer_quoted_identifiers {
138 "Identifiers should be quoted."
139 } else {
140 "Identifier quoting appears unnecessary."
141 };
142 let mut issue =
143 Issue::info(issue_codes::LINT_RF_006, message).with_statement(ctx.statement_index);
144
145 let autofix_edits = autofix_edits_raw
146 .into_iter()
147 .map(|edit| {
148 IssuePatchEdit::new(
149 ctx.span_from_statement_offset(edit.start, edit.end),
150 edit.replacement,
151 )
152 })
153 .collect::<Vec<_>>();
154 if !autofix_edits.is_empty() {
155 issue = issue.with_autofix_edits(IssueAutofixApplicability::Safe, autofix_edits);
156 }
157
158 vec![issue]
159 }
160}
161
162fn is_spark_insert_overwrite_directory_options(sql: &str, dialect: Dialect) -> bool {
163 if !matches!(dialect, Dialect::Databricks) {
164 return false;
165 }
166 let upper = sql.to_ascii_uppercase();
167 upper.contains("INSERT OVERWRITE DIRECTORY")
168 && upper.contains("USING")
169 && upper.contains("OPTIONS")
170}
171
172fn spark_options_unquote_key_edits(sql: &str, rule: &ReferencesQuoting) -> Vec<Rf006AutofixEdit> {
173 if rule.prefer_quoted_identifiers || !rule.quoted_policy.allows(IdentifierKind::Other) {
174 return Vec::new();
175 }
176
177 let bytes = sql.as_bytes();
178 let mut edits = Vec::new();
179 let mut index = 0usize;
180
181 while index < bytes.len() {
182 if bytes[index] != b'"' {
183 index += 1;
184 continue;
185 }
186
187 let start = index;
188 index += 1;
189 let ident_start = index;
190 while index < bytes.len() && bytes[index] != b'"' {
191 index += 1;
192 }
193 if index >= bytes.len() {
194 break;
195 }
196 let ident_end = index;
197 let end = index + 1;
198
199 let Some(ident) = sql.get(ident_start..ident_end) else {
200 index = end;
201 continue;
202 };
203 if ident.is_empty() {
204 index = end;
205 continue;
206 }
207 if is_keyword(ident) || is_ignored_token(ident, rule) || !is_valid_bare_identifier(ident) {
208 index = end;
209 continue;
210 }
211
212 let mut prev = start;
213 while prev > 0 && bytes[prev - 1].is_ascii_whitespace() {
214 prev -= 1;
215 }
216 if prev == 0 || !matches!(bytes[prev - 1], b'(' | b',') {
217 index = end;
218 continue;
219 }
220
221 let mut probe = end;
222 while probe < bytes.len() && bytes[probe].is_ascii_whitespace() {
223 probe += 1;
224 }
225 if probe >= bytes.len() || bytes[probe] != b'=' {
226 index = end;
227 continue;
228 }
229
230 edits.push(Rf006AutofixEdit {
231 start,
232 end,
233 replacement: ident.to_string(),
234 });
235 index = end;
236 }
237
238 edits
239}
240
241struct Rf006AutofixEdit {
242 start: usize,
243 end: usize,
244 replacement: String,
245}
246
247fn unnecessary_quoted_identifier_edits(
248 sql: &str,
249 rule: &ReferencesQuoting,
250 dialect: Dialect,
251) -> Vec<Rf006AutofixEdit> {
252 if rule.prefer_quoted_identifiers || !rule.quoted_policy.allows(IdentifierKind::Other) {
253 return Vec::new();
254 }
255
256 let case_aware = rule.is_case_aware(dialect);
257 let strategy = dialect.normalization_strategy();
258
259 let bytes = sql.as_bytes();
260 let mut edits = Vec::new();
261 let mut index = 0usize;
262 let mut in_single = false;
263
264 let quote_chars: &[u8] = match dialect {
266 Dialect::Mssql => b"\"[",
267 Dialect::Bigquery | Dialect::Databricks | Dialect::Hive | Dialect::Mysql => b"`",
268 _ => b"\"",
269 };
270
271 while index < bytes.len() {
272 if bytes[index] == b'\'' {
274 if in_single && index + 1 < bytes.len() && bytes[index + 1] == b'\'' {
275 index += 2;
276 continue;
277 }
278 in_single = !in_single;
279 index += 1;
280 continue;
281 }
282
283 if in_single {
284 index += 1;
285 continue;
286 }
287
288 let is_quote = quote_chars.contains(&bytes[index]);
289 if !is_quote {
290 index += 1;
291 continue;
292 }
293
294 let quote_byte = bytes[index];
295 let close_byte = if quote_byte == b'[' { b']' } else { quote_byte };
296
297 let start = index;
298 index += 1;
299 let ident_start = index;
300 let mut escaped_quote = false;
301
302 while index < bytes.len() {
303 if bytes[index] == close_byte {
304 if close_byte != b']' && index + 1 < bytes.len() && bytes[index + 1] == close_byte {
305 escaped_quote = true;
306 index += 2;
307 continue;
308 }
309 break;
310 }
311 index += 1;
312 }
313 if index >= bytes.len() {
314 break;
315 }
316
317 let ident_end = index;
318 let end = index + 1;
319 let Some(ident) = sql.get(ident_start..ident_end) else {
320 index += 1;
321 continue;
322 };
323
324 if !escaped_quote
325 && quoted_identifier_allows_safe_unquote(ident, rule, dialect, case_aware, strategy)
326 && can_unquote_identifier(ident, case_aware, strategy)
327 {
328 edits.push(Rf006AutofixEdit {
329 start,
330 end,
331 replacement: ident.to_string(),
332 });
333 }
334
335 index = end;
336 }
337
338 edits
339}
340
341fn quoted_identifier_allows_safe_unquote(
342 ident: &str,
343 rule: &ReferencesQuoting,
344 _dialect: Dialect,
345 case_aware: bool,
346 strategy: NormalizationStrategy,
347) -> bool {
348 if is_ignored_token(ident, rule) {
349 return false;
350 }
351
352 if !rule.quoted_policy.allows(IdentifierKind::Other) {
353 return false;
354 }
355
356 if rule.prefer_quoted_keywords && is_keyword(ident) {
357 return false;
358 }
359
360 is_unnecessarily_quoted(ident, case_aware, strategy)
361}
362
363fn can_unquote_identifier(
364 identifier: &str,
365 case_aware: bool,
366 strategy: NormalizationStrategy,
367) -> bool {
368 if !is_valid_bare_identifier(identifier) {
369 return false;
370 }
371
372 if is_keyword(identifier) {
373 return false;
374 }
375
376 if case_aware {
378 return matches_casefold(identifier, strategy);
379 }
380
381 true
382}
383
384fn candidate_triggers_rule(
385 candidate: &IdentifierCandidate,
386 rule: &ReferencesQuoting,
387 dialect: Dialect,
388) -> bool {
389 if is_ignored_token(&candidate.value, rule) {
390 return false;
391 }
392
393 let policy = if candidate.quoted {
394 rule.quoted_policy
395 } else {
396 rule.unquoted_policy
397 };
398 if !policy.allows(candidate.kind) {
399 return false;
400 }
401
402 if rule.prefer_quoted_identifiers {
403 return !candidate.quoted;
404 }
405
406 if !candidate.quoted {
407 return false;
408 }
409
410 if rule.prefer_quoted_keywords && is_keyword(&candidate.value) {
411 return false;
412 }
413
414 let case_aware = rule.is_case_aware(dialect);
415 let strategy = dialect.normalization_strategy();
416 is_unnecessarily_quoted(&candidate.value, case_aware, strategy)
417}
418
419fn is_unnecessarily_quoted(ident: &str, case_aware: bool, strategy: NormalizationStrategy) -> bool {
420 if !is_valid_bare_identifier(ident) {
421 return false;
422 }
423
424 if is_keyword(ident) {
425 return false;
426 }
427
428 if case_aware {
429 return matches_casefold(ident, strategy);
430 }
431
432 true
433}
434
435fn is_valid_bare_identifier(ident: &str) -> bool {
436 let mut chars = ident.chars();
437 let Some(first) = chars.next() else {
438 return false;
439 };
440
441 if !(first.is_ascii_alphabetic() || first == '_') {
442 return false;
443 }
444
445 chars.all(|ch| ch.is_ascii_alphanumeric() || ch == '_')
446}
447
448fn matches_casefold(ident: &str, strategy: NormalizationStrategy) -> bool {
449 match strategy {
450 NormalizationStrategy::Uppercase => ident.chars().all(|ch| !ch.is_ascii_lowercase()),
451 NormalizationStrategy::Lowercase => ident.chars().all(|ch| !ch.is_ascii_uppercase()),
452 NormalizationStrategy::CaseInsensitive => true,
453 NormalizationStrategy::CaseSensitive => true,
454 }
455}
456
457fn configured_ignore_words(config: &LintConfig) -> Vec<String> {
458 if let Some(words) = config.rule_option_string_list(issue_codes::LINT_RF_006, "ignore_words") {
459 return words;
460 }
461
462 config
463 .rule_option_str(issue_codes::LINT_RF_006, "ignore_words")
464 .map(|words| {
465 words
466 .split(',')
467 .map(str::trim)
468 .filter(|word| !word.is_empty())
469 .map(str::to_string)
470 .collect()
471 })
472 .unwrap_or_default()
473}
474
475fn is_ignored_token(token: &str, rule: &ReferencesQuoting) -> bool {
476 let normalized = normalize_token(token);
477 if rule.ignore_words.contains(&normalized) {
479 return true;
480 }
481 if let Some(regex) = &rule.ignore_words_regex {
484 let raw = token
485 .trim()
486 .trim_matches(|ch| matches!(ch, '"' | '`' | '\'' | '[' | ']'));
487 if regex.is_match(raw) {
488 return true;
489 }
490 }
491 false
492}
493
494fn normalize_token(token: &str) -> String {
495 token
496 .trim()
497 .trim_matches(|ch| matches!(ch, '"' | '`' | '\'' | '[' | ']'))
498 .to_ascii_uppercase()
499}
500
501fn is_keyword(token: &str) -> bool {
502 matches!(
503 token.trim().to_ascii_uppercase().as_str(),
504 "ALL"
505 | "AND"
506 | "AS"
507 | "ASC"
508 | "BETWEEN"
509 | "BY"
510 | "CASE"
511 | "CROSS"
512 | "DEFAULT"
513 | "DELETE"
514 | "DESC"
515 | "DISTINCT"
516 | "ELSE"
517 | "END"
518 | "EXISTS"
519 | "FALSE"
520 | "FOR"
521 | "FROM"
522 | "FULL"
523 | "GROUP"
524 | "HAVING"
525 | "IF"
526 | "IN"
527 | "INNER"
528 | "INSERT"
529 | "INTO"
530 | "IS"
531 | "JOIN"
532 | "LEFT"
533 | "LIKE"
534 | "LIMIT"
535 | "METADATA"
536 | "NOT"
537 | "NULL"
538 | "OFFSET"
539 | "ON"
540 | "OR"
541 | "ORDER"
542 | "OUTER"
543 | "RECURSIVE"
544 | "RIGHT"
545 | "SELECT"
546 | "SET"
547 | "SUM"
548 | "TABLE"
549 | "THEN"
550 | "TRUE"
551 | "UNION"
552 | "UPDATE"
553 | "USER"
554 | "USING"
555 | "VALUES"
556 | "WHEN"
557 | "WHERE"
558 | "WITH"
559 | "DATETIME"
560 )
561}
562
563#[cfg(test)]
564mod tests {
565 use super::*;
566 use crate::linter::rule::with_active_dialect;
567 use crate::parser::parse_sql;
568 use crate::types::Dialect;
569 use crate::types::IssueAutofixApplicability;
570
571 fn run(sql: &str) -> Vec<Issue> {
572 let statements = parse_sql(sql).expect("parse");
573 let rule = ReferencesQuoting::default();
574 statements
575 .iter()
576 .enumerate()
577 .flat_map(|(index, statement)| {
578 rule.check(
579 statement,
580 &LintContext {
581 sql,
582 statement_range: 0..sql.len(),
583 statement_index: index,
584 },
585 )
586 })
587 .collect()
588 }
589
590 fn apply_issue_autofix(sql: &str, issue: &Issue) -> Option<String> {
591 let autofix = issue.autofix.as_ref()?;
592 let mut out = sql.to_string();
593 let mut edits = autofix.edits.clone();
594 edits.sort_by_key(|edit| (edit.span.start, edit.span.end));
595 for edit in edits.into_iter().rev() {
596 out.replace_range(edit.span.start..edit.span.end, &edit.replacement);
597 }
598 Some(out)
599 }
600
601 #[test]
602 fn flags_unnecessary_quoted_identifier() {
603 let sql = "SELECT \"good_name\" FROM t";
604 let issues = run(sql);
605 assert_eq!(issues.len(), 1);
606 assert_eq!(issues[0].code, issue_codes::LINT_RF_006);
607 let autofix = issues[0].autofix.as_ref().expect("autofix metadata");
608 assert_eq!(autofix.applicability, IssueAutofixApplicability::Safe);
609 let fixed = apply_issue_autofix(sql, &issues[0]).expect("apply autofix");
610 assert_eq!(fixed, "SELECT good_name FROM t");
611 }
612
613 #[test]
614 fn mixed_case_quoted_identifier_remains_report_only() {
615 let issues = run("SELECT \"MixedCase\" FROM t");
616 assert_eq!(issues.len(), 1);
618 }
619
620 #[test]
621 fn does_not_flag_quoted_identifier_with_special_char() {
622 let issues = run("SELECT \"bad-name\" FROM t");
623 assert!(issues.is_empty());
624 }
625
626 #[test]
627 fn does_not_flag_double_quotes_inside_string_literal() {
628 let issues = run("SELECT '\"good_name\"' AS note FROM t");
629 assert!(issues.is_empty());
630 }
631
632 #[test]
633 fn prefer_quoted_identifiers_true_disables_unnecessary_quote_issues() {
634 let config = LintConfig {
635 enabled: true,
636 disabled_rules: vec![],
637 rule_configs: std::collections::BTreeMap::from([(
638 "references.quoting".to_string(),
639 serde_json::json!({"prefer_quoted_identifiers": true}),
640 )]),
641 };
642 let rule = ReferencesQuoting::from_config(&config);
643 let sql = "SELECT \"good_name\" FROM \"t\"";
644 let statements = parse_sql(sql).expect("parse");
645 let issues = rule.check(
646 &statements[0],
647 &LintContext {
648 sql,
649 statement_range: 0..sql.len(),
650 statement_index: 0,
651 },
652 );
653 assert!(issues.is_empty());
654 }
655
656 #[test]
657 fn prefer_quoted_identifiers_true_flags_unquoted_identifier() {
658 let config = LintConfig {
659 enabled: true,
660 disabled_rules: vec![],
661 rule_configs: std::collections::BTreeMap::from([(
662 "references.quoting".to_string(),
663 serde_json::json!({"prefer_quoted_identifiers": true}),
664 )]),
665 };
666 let rule = ReferencesQuoting::from_config(&config);
667 let sql = "SELECT good_name FROM t";
668 let statements = parse_sql(sql).expect("parse");
669 let issues = rule.check(
670 &statements[0],
671 &LintContext {
672 sql,
673 statement_range: 0..sql.len(),
674 statement_index: 0,
675 },
676 );
677 assert_eq!(issues.len(), 1);
678 }
679
680 #[test]
681 fn prefer_quoted_keywords_true_allows_quoted_keyword_identifier() {
682 let config = LintConfig {
683 enabled: true,
684 disabled_rules: vec![],
685 rule_configs: std::collections::BTreeMap::from([(
686 "LINT_RF_006".to_string(),
687 serde_json::json!({"prefer_quoted_keywords": true}),
688 )]),
689 };
690 let rule = ReferencesQuoting::from_config(&config);
691 let sql = "SELECT \"select\".id FROM users AS \"select\"";
692 let statements = parse_sql(sql).expect("parse");
693 let issues = rule.check(
694 &statements[0],
695 &LintContext {
696 sql,
697 statement_range: 0..sql.len(),
698 statement_index: 0,
699 },
700 );
701 assert!(issues.is_empty());
702 }
703
704 #[test]
705 fn quoted_policy_none_skips_quoted_identifier_checks() {
706 let config = LintConfig {
707 enabled: true,
708 disabled_rules: vec![],
709 rule_configs: std::collections::BTreeMap::from([(
710 "references.quoting".to_string(),
711 serde_json::json!({"quoted_identifiers_policy": "none"}),
712 )]),
713 };
714 let rule = ReferencesQuoting::from_config(&config);
715 let sql = "SELECT \"good_name\" FROM t";
716 let statements = parse_sql(sql).expect("parse");
717 let issues = rule.check(
718 &statements[0],
719 &LintContext {
720 sql,
721 statement_range: 0..sql.len(),
722 statement_index: 0,
723 },
724 );
725 assert!(issues.is_empty());
726 }
727
728 #[test]
729 fn ignore_words_suppresses_identifier() {
730 let config = LintConfig {
731 enabled: true,
732 disabled_rules: vec![],
733 rule_configs: std::collections::BTreeMap::from([(
734 "LINT_RF_006".to_string(),
735 serde_json::json!({"ignore_words": ["good_name"]}),
736 )]),
737 };
738 let rule = ReferencesQuoting::from_config(&config);
739 let sql = "SELECT \"good_name\" FROM t";
740 let statements = parse_sql(sql).expect("parse");
741 let issues = rule.check(
742 &statements[0],
743 &LintContext {
744 sql,
745 statement_range: 0..sql.len(),
746 statement_index: 0,
747 },
748 );
749 assert!(issues.is_empty());
750 }
751
752 #[test]
753 fn ignore_words_regex_suppresses_identifier() {
754 let config = LintConfig {
755 enabled: true,
756 disabled_rules: vec![],
757 rule_configs: std::collections::BTreeMap::from([(
758 "references.quoting".to_string(),
759 serde_json::json!({"ignore_words_regex": "^good_"}),
760 )]),
761 };
762 let rule = ReferencesQuoting::from_config(&config);
763 let sql = "SELECT \"good_name\" FROM t";
764 let statements = parse_sql(sql).expect("parse");
765 let issues = rule.check(
766 &statements[0],
767 &LintContext {
768 sql,
769 statement_range: 0..sql.len(),
770 statement_index: 0,
771 },
772 );
773 assert!(issues.is_empty());
774 }
775
776 #[test]
777 fn does_not_flag_keyword_identifier() {
778 let issues = run("SELECT \"SELECT\" FROM t");
779 assert!(issues.is_empty());
780 }
781
782 #[test]
783 fn does_not_flag_datetime_keyword_identifier() {
784 let issues = run("SELECT \"datetime\" FROM t");
785 assert!(issues.is_empty());
786 }
787
788 #[test]
789 fn sparksql_insert_overwrite_directory_options_uses_fallback_detection_and_fix() {
790 let sql = "INSERT OVERWRITE DIRECTORY '/tmp/destination'\nUSING PARQUET\nOPTIONS (\"col1\" = \"1\", \"col2\" = \"2\", \"col3\" = 'test', \"user\" = \"a person\")\nSELECT a FROM test_table;";
791 let statements = parse_sql("SELECT 1").expect("synthetic parse");
792 let rule = ReferencesQuoting::default();
793
794 let issues = with_active_dialect(Dialect::Databricks, || {
795 rule.check(
796 &statements[0],
797 &LintContext {
798 sql,
799 statement_range: 0..sql.len(),
800 statement_index: 0,
801 },
802 )
803 });
804 assert_eq!(issues.len(), 1);
805 let autofix = issues[0].autofix.as_ref().expect("autofix metadata");
806 assert_eq!(autofix.applicability, IssueAutofixApplicability::Safe);
807 let fixed = apply_issue_autofix(sql, &issues[0]).expect("apply autofix");
808 assert_eq!(
809 fixed,
810 "INSERT OVERWRITE DIRECTORY '/tmp/destination'\nUSING PARQUET\nOPTIONS (col1 = \"1\", col2 = \"2\", col3 = 'test', \"user\" = \"a person\")\nSELECT a FROM test_table;"
811 );
812 }
813}