1use std::collections::HashSet;
7
8use crate::linter::config::LintConfig;
9use crate::linter::rule::{LintContext, LintRule};
10use crate::types::{issue_codes, Dialect, Issue, IssueAutofixApplicability, IssuePatchEdit};
11use regex::{Regex, RegexBuilder};
12use sqlparser::ast::Statement;
13use sqlparser::keywords::ALL_KEYWORDS;
14use sqlparser::tokenizer::{Token, TokenWithSpan, Tokenizer, Whitespace};
15
16use super::identifier_candidates_helpers::{
17 collect_identifier_candidates, IdentifierCandidate, IdentifierKind, IdentifierPolicy,
18};
19
20pub struct ReferencesKeywords {
21 quoted_policy: IdentifierPolicy,
22 unquoted_policy: IdentifierPolicy,
23 ignore_words: HashSet<String>,
24 ignore_words_regex: Option<Regex>,
25}
26
27impl ReferencesKeywords {
28 pub fn from_config(config: &LintConfig) -> Self {
29 Self {
30 quoted_policy: IdentifierPolicy::from_config(
31 config,
32 issue_codes::LINT_RF_004,
33 "quoted_identifiers_policy",
34 "none",
35 ),
36 unquoted_policy: IdentifierPolicy::from_config(
37 config,
38 issue_codes::LINT_RF_004,
39 "unquoted_identifiers_policy",
40 "aliases",
41 ),
42 ignore_words: configured_ignore_words(config)
43 .into_iter()
44 .map(|word| normalize_token(&word))
45 .collect(),
46 ignore_words_regex: config
47 .rule_option_str(issue_codes::LINT_RF_004, "ignore_words_regex")
48 .filter(|pattern| !pattern.trim().is_empty())
49 .and_then(|pattern| {
50 RegexBuilder::new(pattern)
51 .case_insensitive(true)
52 .build()
53 .ok()
54 }),
55 }
56 }
57}
58
59impl Default for ReferencesKeywords {
60 fn default() -> Self {
61 Self {
62 quoted_policy: IdentifierPolicy::None,
63 unquoted_policy: IdentifierPolicy::Aliases,
64 ignore_words: HashSet::new(),
65 ignore_words_regex: None,
66 }
67 }
68}
69
70impl LintRule for ReferencesKeywords {
71 fn code(&self) -> &'static str {
72 issue_codes::LINT_RF_004
73 }
74
75 fn name(&self) -> &'static str {
76 "References keywords"
77 }
78
79 fn description(&self) -> &'static str {
80 "Keywords should not be used as identifiers."
81 }
82
83 fn check(&self, statement: &Statement, ctx: &LintContext) -> Vec<Issue> {
84 if !statement_contains_keyword_identifier(statement, self) {
85 return Vec::new();
86 }
87
88 let mut issue = Issue::info(issue_codes::LINT_RF_004, "Keyword used as identifier.")
89 .with_statement(ctx.statement_index);
90
91 let autofix_edits =
92 keyword_table_alias_autofix_edits(ctx.statement_sql(), ctx.dialect(), self)
93 .into_iter()
94 .map(|edit| {
95 IssuePatchEdit::new(
96 ctx.span_from_statement_offset(edit.start, edit.end),
97 edit.replacement,
98 )
99 })
100 .collect::<Vec<_>>();
101 if !autofix_edits.is_empty() {
102 issue = issue.with_autofix_edits(IssueAutofixApplicability::Safe, autofix_edits);
103 }
104
105 vec![issue]
106 }
107}
108
109struct Rf004AutofixEdit {
110 start: usize,
111 end: usize,
112 replacement: String,
113}
114
115#[derive(Clone)]
116struct SimpleTableAliasDecl {
117 keyword_start: usize,
118 keyword_end: usize,
119 table_start: usize,
120 table_end: usize,
121 alias_end: usize,
122 alias: String,
123 explicit_as: bool,
124}
125
126#[derive(Clone)]
127struct LocatedToken {
128 token: Token,
129 start: usize,
130 end: usize,
131}
132
133fn keyword_table_alias_autofix_edits(
134 sql: &str,
135 dialect: Dialect,
136 rule: &ReferencesKeywords,
137) -> Vec<Rf004AutofixEdit> {
138 if !rule.unquoted_policy.allows(IdentifierKind::TableAlias) {
139 return Vec::new();
140 }
141
142 let Some(decls) = collect_simple_table_alias_declarations(sql, dialect) else {
143 return Vec::new();
144 };
145
146 let mut edits = Vec::new();
147 for decl in decls {
148 if !decl.explicit_as
149 || !is_alias_keyword_token(&decl.alias)
150 || is_ignored_token(&decl.alias, rule)
151 {
152 continue;
153 }
154 let clause = &sql[decl.keyword_start..decl.keyword_end];
155 let table = &sql[decl.table_start..decl.table_end];
156 edits.push(Rf004AutofixEdit {
157 start: decl.keyword_start,
158 end: decl.alias_end,
159 replacement: format!(
160 "{clause} {table} AS alias_{}",
161 decl.alias.to_ascii_lowercase()
162 ),
163 });
164 }
165
166 edits
167}
168
169fn collect_simple_table_alias_declarations(
170 sql: &str,
171 dialect: Dialect,
172) -> Option<Vec<SimpleTableAliasDecl>> {
173 let tokens = tokenize_with_offsets(sql, dialect)?;
174 let mut out = Vec::new();
175 let mut index = 0usize;
176
177 while index < tokens.len() {
178 if !token_matches_keyword(&tokens[index].token, "FROM")
179 && !token_matches_keyword(&tokens[index].token, "JOIN")
180 {
181 index += 1;
182 continue;
183 }
184
185 let keyword_start = tokens[index].start;
186 let keyword_end = tokens[index].end;
187
188 let Some(mut cursor) = next_non_trivia_token(&tokens, index + 1) else {
189 index += 1;
190 continue;
191 };
192 if token_simple_identifier(&tokens[cursor].token).is_none() {
193 index += 1;
194 continue;
195 }
196
197 let table_start = tokens[cursor].start;
198 let mut table_end = tokens[cursor].end;
199 cursor += 1;
200
201 while let Some(dot_index) = next_non_trivia_token(&tokens, cursor) {
202 if !matches!(tokens[dot_index].token, Token::Period) {
203 break;
204 }
205 let Some(next_index) = next_non_trivia_token(&tokens, dot_index + 1) else {
206 break;
207 };
208 if token_simple_identifier(&tokens[next_index].token).is_none() {
209 break;
210 }
211 table_end = tokens[next_index].end;
212 cursor = next_index + 1;
213 }
214
215 let Some(mut alias_index) = next_non_trivia_token(&tokens, cursor) else {
216 index += 1;
217 continue;
218 };
219 let mut explicit_as = false;
220 if token_matches_keyword(&tokens[alias_index].token, "AS") {
221 explicit_as = true;
222 let Some(next_index) = next_non_trivia_token(&tokens, alias_index + 1) else {
223 index += 1;
224 continue;
225 };
226 alias_index = next_index;
227 }
228
229 let Some(alias) = token_simple_identifier(&tokens[alias_index].token) else {
230 index += 1;
231 continue;
232 };
233
234 out.push(SimpleTableAliasDecl {
235 keyword_start,
236 keyword_end,
237 table_start,
238 table_end,
239 alias_end: tokens[alias_index].end,
240 alias: alias.to_string(),
241 explicit_as,
242 });
243 index = alias_index + 1;
244 }
245
246 Some(out)
247}
248
249fn tokenize_with_offsets(sql: &str, dialect: Dialect) -> Option<Vec<LocatedToken>> {
250 let dialect = dialect.to_sqlparser_dialect();
251 let mut tokenizer = Tokenizer::new(dialect.as_ref(), sql);
252 let tokens = tokenizer.tokenize_with_location().ok()?;
253
254 let mut out = Vec::with_capacity(tokens.len());
255 for token in tokens {
256 let (start, end) = token_with_span_offsets(sql, &token)?;
257 out.push(LocatedToken {
258 token: token.token,
259 start,
260 end,
261 });
262 }
263 Some(out)
264}
265
266fn token_with_span_offsets(sql: &str, token: &TokenWithSpan) -> Option<(usize, usize)> {
267 let start = line_col_to_offset(
268 sql,
269 token.span.start.line as usize,
270 token.span.start.column as usize,
271 )?;
272 let end = line_col_to_offset(
273 sql,
274 token.span.end.line as usize,
275 token.span.end.column as usize,
276 )?;
277 Some((start, end))
278}
279
280fn line_col_to_offset(sql: &str, line: usize, column: usize) -> Option<usize> {
281 if line == 0 || column == 0 {
282 return None;
283 }
284
285 let mut current_line = 1usize;
286 let mut current_col = 1usize;
287
288 for (offset, ch) in sql.char_indices() {
289 if current_line == line && current_col == column {
290 return Some(offset);
291 }
292
293 if ch == '\n' {
294 current_line += 1;
295 current_col = 1;
296 } else {
297 current_col += 1;
298 }
299 }
300
301 if current_line == line && current_col == column {
302 return Some(sql.len());
303 }
304
305 None
306}
307
308fn next_non_trivia_token(tokens: &[LocatedToken], mut start: usize) -> Option<usize> {
309 while start < tokens.len() {
310 if !is_trivia_token(&tokens[start].token) {
311 return Some(start);
312 }
313 start += 1;
314 }
315 None
316}
317
318fn is_trivia_token(token: &Token) -> bool {
319 matches!(
320 token,
321 Token::Whitespace(
322 Whitespace::Space
323 | Whitespace::Newline
324 | Whitespace::Tab
325 | Whitespace::SingleLineComment { .. }
326 | Whitespace::MultiLineComment(_)
327 )
328 )
329}
330
331fn token_matches_keyword(token: &Token, keyword: &str) -> bool {
332 matches!(token, Token::Word(word) if word.value.eq_ignore_ascii_case(keyword))
333}
334
335fn token_simple_identifier(token: &Token) -> Option<&str> {
336 match token {
337 Token::Word(word) if is_simple_identifier(&word.value) => Some(&word.value),
338 _ => None,
339 }
340}
341
342fn is_simple_identifier(value: &str) -> bool {
343 let bytes = value.as_bytes();
344 if bytes.is_empty() || !is_ascii_ident_start(bytes[0]) {
345 return false;
346 }
347 bytes[1..].iter().copied().all(is_ascii_ident_continue)
348}
349
350fn is_ascii_ident_start(byte: u8) -> bool {
351 byte.is_ascii_alphabetic() || byte == b'_'
352}
353
354fn is_ascii_ident_continue(byte: u8) -> bool {
355 byte.is_ascii_alphanumeric() || byte == b'_'
356}
357
358fn is_alias_keyword_token(alias: &str) -> bool {
359 is_keyword(alias)
360}
361
362fn statement_contains_keyword_identifier(statement: &Statement, rule: &ReferencesKeywords) -> bool {
363 collect_identifier_candidates(statement)
364 .into_iter()
365 .any(|candidate| candidate_triggers_rule(&candidate, rule))
366}
367
368fn candidate_triggers_rule(candidate: &IdentifierCandidate, rule: &ReferencesKeywords) -> bool {
369 if candidate.value.len() <= 1 {
371 return false;
372 }
373 if is_ignored_token(&candidate.value, rule) || !is_keyword(&candidate.value) {
374 return false;
375 }
376
377 if candidate.quoted {
378 rule.quoted_policy.allows(candidate.kind)
379 } else {
380 rule.unquoted_policy.allows(candidate.kind)
381 }
382}
383
384fn configured_ignore_words(config: &LintConfig) -> Vec<String> {
385 if let Some(words) = config.rule_option_string_list(issue_codes::LINT_RF_004, "ignore_words") {
386 return words;
387 }
388
389 config
390 .rule_option_str(issue_codes::LINT_RF_004, "ignore_words")
391 .map(|words| {
392 words
393 .split(',')
394 .map(str::trim)
395 .filter(|word| !word.is_empty())
396 .map(str::to_string)
397 .collect()
398 })
399 .unwrap_or_default()
400}
401
402fn is_ignored_token(token: &str, rule: &ReferencesKeywords) -> bool {
403 let normalized = normalize_token(token);
404 rule.ignore_words.contains(&normalized)
405 || rule
406 .ignore_words_regex
407 .as_ref()
408 .is_some_and(|regex| regex.is_match(&normalized))
409}
410
411fn normalize_token(token: &str) -> String {
412 token
413 .trim()
414 .trim_matches(|ch| matches!(ch, '"' | '`' | '\'' | '[' | ']'))
415 .to_ascii_uppercase()
416}
417
418fn is_keyword(token: &str) -> bool {
419 let upper = token.trim().to_ascii_uppercase();
420 (ALL_KEYWORDS.binary_search(&upper.as_str()).is_ok() || is_sqlfluff_extra_keyword(&upper))
421 && !is_non_keyword_identifier(&upper)
422}
423
424fn is_sqlfluff_extra_keyword(upper: &str) -> bool {
425 matches!(upper, "COST")
427}
428
429fn is_non_keyword_identifier(upper: &str) -> bool {
438 if upper.contains('_')
439 && !upper.starts_with("CURRENT_")
440 && !upper.starts_with("LOCAL_")
441 && !upper.starts_with("SESSION_")
442 && !upper.starts_with("SYSTEM_")
443 {
444 return true;
445 }
446 matches!(upper, "CHANNEL" | "GENERATED" | "METADATA" | "STATUS")
447}
448
449#[cfg(test)]
450mod tests {
451 use super::*;
452 use crate::parser::parse_sql;
453 use crate::types::IssueAutofixApplicability;
454
455 fn run(sql: &str) -> Vec<Issue> {
456 run_with_config(sql, LintConfig::default())
457 }
458
459 fn run_with_config(sql: &str, config: LintConfig) -> Vec<Issue> {
460 let statements = parse_sql(sql).expect("parse");
461 let rule = ReferencesKeywords::from_config(&config);
462 statements
463 .iter()
464 .enumerate()
465 .flat_map(|(index, statement)| {
466 rule.check(
467 statement,
468 &LintContext {
469 sql,
470 statement_range: 0..sql.len(),
471 statement_index: index,
472 },
473 )
474 })
475 .collect()
476 }
477
478 fn apply_issue_autofix(sql: &str, issue: &Issue) -> Option<String> {
479 let autofix = issue.autofix.as_ref()?;
480 let mut out = sql.to_string();
481 let mut edits = autofix.edits.clone();
482 edits.sort_by_key(|edit| (edit.span.start, edit.span.end));
483 for edit in edits.into_iter().rev() {
484 out.replace_range(edit.span.start..edit.span.end, &edit.replacement);
485 }
486 Some(out)
487 }
488
489 #[test]
490 fn flags_unquoted_keyword_table_alias() {
491 let issues = run("SELECT sum.id FROM users AS sum");
492 assert_eq!(issues.len(), 1);
493 assert_eq!(issues[0].code, issue_codes::LINT_RF_004);
494 }
495
496 #[test]
497 fn emits_safe_autofix_for_explicit_keyword_table_alias() {
498 let sql = "select a from users as select";
499 let issues = run(sql);
500 assert_eq!(issues.len(), 1);
501 let autofix = issues[0].autofix.as_ref().expect("autofix metadata");
502 assert_eq!(autofix.applicability, IssueAutofixApplicability::Safe);
503 let fixed = apply_issue_autofix(sql, &issues[0]).expect("apply autofix");
504 assert_eq!(fixed, "select a from users AS alias_select");
505 }
506
507 #[test]
508 fn flags_unquoted_keyword_projection_alias() {
509 let issues = run("SELECT amount AS sum FROM t");
510 assert_eq!(issues.len(), 1);
511 assert_eq!(issues[0].code, issue_codes::LINT_RF_004);
512 }
513
514 #[test]
515 fn flags_cost_alias_as_keyword_identifier() {
516 let issues = run("SELECT 1 AS cost");
517 assert_eq!(issues.len(), 1);
518 assert_eq!(issues[0].code, issue_codes::LINT_RF_004);
519 }
520
521 #[test]
522 fn flags_unquoted_keyword_cte_alias() {
523 let issues = run("WITH sum AS (SELECT 1 AS value) SELECT value FROM sum");
524 assert_eq!(issues.len(), 1);
525 assert_eq!(issues[0].code, issue_codes::LINT_RF_004);
526 }
527
528 #[test]
529 fn does_not_flag_quoted_keyword_alias_by_default() {
530 assert!(run("SELECT \"select\".id FROM users AS \"select\"").is_empty());
531 }
532
533 #[test]
534 fn does_not_flag_non_keyword_alias() {
535 let issues = run("SELECT u.id FROM users AS u");
536 assert!(issues.is_empty());
537 }
538
539 #[test]
540 fn does_not_flag_sql_like_string_literal() {
541 let issues = run("SELECT 'FROM users AS date' AS snippet");
542 assert!(issues.is_empty());
543 }
544
545 #[test]
546 fn quoted_identifiers_policy_all_flags_quoted_keyword_alias() {
547 let issues = run_with_config(
548 "SELECT \"select\".id FROM users AS \"select\"",
549 LintConfig {
550 enabled: true,
551 disabled_rules: vec![],
552 rule_configs: std::collections::BTreeMap::from([(
553 "references.keywords".to_string(),
554 serde_json::json!({"quoted_identifiers_policy": "all"}),
555 )]),
556 },
557 );
558 assert_eq!(issues.len(), 1);
559 assert_eq!(issues[0].code, issue_codes::LINT_RF_004);
560 }
561
562 #[test]
563 fn unquoted_column_alias_policy_does_not_flag_table_alias() {
564 let issues = run_with_config(
565 "SELECT sum.id FROM users AS sum",
566 LintConfig {
567 enabled: true,
568 disabled_rules: vec![],
569 rule_configs: std::collections::BTreeMap::from([(
570 "LINT_RF_004".to_string(),
571 serde_json::json!({"unquoted_identifiers_policy": "column_aliases"}),
572 )]),
573 },
574 );
575 assert!(issues.is_empty());
576 }
577
578 #[test]
579 fn ignore_words_suppresses_keyword_identifier() {
580 let issues = run_with_config(
581 "SELECT amount AS sum FROM t",
582 LintConfig {
583 enabled: true,
584 disabled_rules: vec![],
585 rule_configs: std::collections::BTreeMap::from([(
586 "references.keywords".to_string(),
587 serde_json::json!({"ignore_words": ["sum"]}),
588 )]),
589 },
590 );
591 assert!(issues.is_empty());
592 }
593
594 #[test]
595 fn ignore_words_regex_suppresses_keyword_identifier() {
596 let issues = run_with_config(
597 "SELECT amount AS sum FROM t",
598 LintConfig {
599 enabled: true,
600 disabled_rules: vec![],
601 rule_configs: std::collections::BTreeMap::from([(
602 "LINT_RF_004".to_string(),
603 serde_json::json!({"ignore_words_regex": "^s.*"}),
604 )]),
605 },
606 );
607 assert!(issues.is_empty());
608 }
609
610 #[test]
611 fn flags_keyword_as_column_name_in_create_table() {
612 let issues = run("CREATE TABLE artist(create TEXT)");
614 assert_eq!(issues.len(), 1);
615 assert_eq!(issues[0].code, issue_codes::LINT_RF_004);
616 }
617
618 #[test]
619 fn flags_keyword_as_column_alias() {
620 let issues = run("SELECT 1 as parameter");
622 assert_eq!(issues.len(), 1);
623 assert_eq!(issues[0].code, issue_codes::LINT_RF_004);
624 }
625
626 #[test]
627 fn flags_keyword_as_table_alias() {
628 let issues = run("SELECT x FROM tbl AS parameter");
630 assert_eq!(issues.len(), 1);
631 assert_eq!(issues[0].code, issue_codes::LINT_RF_004);
632 }
633
634 #[test]
635 fn does_not_flag_non_alias_with_aliases_policy() {
636 assert!(run("SELECT parameter").is_empty());
640 }
641
642 #[test]
643 fn flags_non_alias_with_all_policy() {
644 let issues = run_with_config(
646 "SELECT parameter",
647 LintConfig {
648 enabled: true,
649 disabled_rules: vec![],
650 rule_configs: std::collections::BTreeMap::from([(
651 "references.keywords".to_string(),
652 serde_json::json!({"unquoted_identifiers_policy": "all"}),
653 )]),
654 },
655 );
656 assert_eq!(issues.len(), 1);
657 }
658
659 #[test]
660 fn flags_column_alias_with_column_aliases_policy() {
661 let issues = run_with_config(
663 "SELECT x AS date FROM tbl AS parameter",
664 LintConfig {
665 enabled: true,
666 disabled_rules: vec![],
667 rule_configs: std::collections::BTreeMap::from([(
668 "references.keywords".to_string(),
669 serde_json::json!({"unquoted_identifiers_policy": "column_aliases"}),
670 )]),
671 },
672 );
673 assert_eq!(issues.len(), 1);
674 }
675
676 #[test]
677 fn flags_quoted_keyword_column_in_create_table() {
678 let issues = run_with_config(
680 "CREATE TABLE \"artist\"(\"create\" TEXT)",
681 LintConfig {
682 enabled: true,
683 disabled_rules: vec![],
684 rule_configs: std::collections::BTreeMap::from([(
685 "references.keywords".to_string(),
686 serde_json::json!({"quoted_identifiers_policy": "aliases"}),
687 )]),
688 },
689 );
690 assert_eq!(issues.len(), 1);
691 }
692
693 #[test]
694 fn flags_keyword_as_column_name_postgres() {
695 let issues = run("CREATE TABLE test_table (type varchar(30) NOT NULL)");
697 assert_eq!(issues.len(), 1);
698 }
699
700 #[test]
701 fn does_not_flag_function_name_as_keyword() {
702 assert!(run("SELECT ROW_NUMBER() OVER () AS row_number FROM t").is_empty());
704 }
705
706 #[test]
707 fn does_not_flag_non_keyword_identifiers() {
708 assert!(run("WITH generated AS (SELECT 1 AS x) SELECT x FROM generated").is_empty());
711 assert!(run("SELECT x AS status FROM t").is_empty());
712 assert!(run("SELECT x AS metadata FROM t").is_empty());
713 assert!(run("SELECT x AS channel FROM t").is_empty());
714 }
715
716 #[test]
717 fn still_flags_current_date_as_keyword() {
718 let issues = run("SELECT x AS current_date FROM t");
720 assert_eq!(issues.len(), 1);
721 }
722}