1use std::collections::HashSet;
7
8use crate::linter::config::LintConfig;
9use crate::linter::rule::{LintContext, LintRule};
10use crate::types::{issue_codes, Dialect, Issue, IssueAutofixApplicability, IssuePatchEdit};
11use regex::{Regex, RegexBuilder};
12use sqlparser::ast::Statement;
13use sqlparser::keywords::ALL_KEYWORDS;
14use sqlparser::tokenizer::{Token, TokenWithSpan, Tokenizer, Whitespace};
15
16use super::identifier_candidates_helpers::{
17 collect_identifier_candidates, IdentifierCandidate, IdentifierKind, IdentifierPolicy,
18};
19
20pub struct ReferencesKeywords {
21 quoted_policy: IdentifierPolicy,
22 unquoted_policy: IdentifierPolicy,
23 ignore_words: HashSet<String>,
24 ignore_words_regex: Option<Regex>,
25}
26
27impl ReferencesKeywords {
28 pub fn from_config(config: &LintConfig) -> Self {
29 Self {
30 quoted_policy: IdentifierPolicy::from_config(
31 config,
32 issue_codes::LINT_RF_004,
33 "quoted_identifiers_policy",
34 "none",
35 ),
36 unquoted_policy: IdentifierPolicy::from_config(
37 config,
38 issue_codes::LINT_RF_004,
39 "unquoted_identifiers_policy",
40 "aliases",
41 ),
42 ignore_words: configured_ignore_words(config)
43 .into_iter()
44 .map(|word| normalize_token(&word))
45 .collect(),
46 ignore_words_regex: config
47 .rule_option_str(issue_codes::LINT_RF_004, "ignore_words_regex")
48 .filter(|pattern| !pattern.trim().is_empty())
49 .and_then(|pattern| {
50 RegexBuilder::new(pattern)
51 .case_insensitive(true)
52 .build()
53 .ok()
54 }),
55 }
56 }
57}
58
59impl Default for ReferencesKeywords {
60 fn default() -> Self {
61 Self {
62 quoted_policy: IdentifierPolicy::None,
63 unquoted_policy: IdentifierPolicy::Aliases,
64 ignore_words: HashSet::new(),
65 ignore_words_regex: None,
66 }
67 }
68}
69
70impl LintRule for ReferencesKeywords {
71 fn code(&self) -> &'static str {
72 issue_codes::LINT_RF_004
73 }
74
75 fn name(&self) -> &'static str {
76 "References keywords"
77 }
78
79 fn description(&self) -> &'static str {
80 "Keywords should not be used as identifiers."
81 }
82
83 fn check(&self, statement: &Statement, ctx: &LintContext) -> Vec<Issue> {
84 if !statement_contains_keyword_identifier(statement, self) {
85 return Vec::new();
86 }
87
88 let mut issue = Issue::info(issue_codes::LINT_RF_004, "Keyword used as identifier.")
89 .with_statement(ctx.statement_index);
90
91 let autofix_edits =
92 keyword_table_alias_autofix_edits(ctx.statement_sql(), ctx.dialect(), self)
93 .into_iter()
94 .map(|edit| {
95 IssuePatchEdit::new(
96 ctx.span_from_statement_offset(edit.start, edit.end),
97 edit.replacement,
98 )
99 })
100 .collect::<Vec<_>>();
101 if !autofix_edits.is_empty() {
102 issue = issue.with_autofix_edits(IssueAutofixApplicability::Safe, autofix_edits);
103 }
104
105 vec![issue]
106 }
107}
108
109struct Rf004AutofixEdit {
110 start: usize,
111 end: usize,
112 replacement: String,
113}
114
115#[derive(Clone)]
116struct SimpleTableAliasDecl {
117 keyword_start: usize,
118 keyword_end: usize,
119 table_start: usize,
120 table_end: usize,
121 alias_end: usize,
122 alias: String,
123 explicit_as: bool,
124}
125
126#[derive(Clone)]
127struct LocatedToken {
128 token: Token,
129 start: usize,
130 end: usize,
131}
132
133fn keyword_table_alias_autofix_edits(
134 sql: &str,
135 dialect: Dialect,
136 rule: &ReferencesKeywords,
137) -> Vec<Rf004AutofixEdit> {
138 if !rule.unquoted_policy.allows(IdentifierKind::TableAlias) {
139 return Vec::new();
140 }
141
142 let Some(decls) = collect_simple_table_alias_declarations(sql, dialect) else {
143 return Vec::new();
144 };
145
146 let mut edits = Vec::new();
147 for decl in decls {
148 if !decl.explicit_as
149 || !is_alias_keyword_token(&decl.alias)
150 || is_ignored_token(&decl.alias, rule)
151 {
152 continue;
153 }
154 let clause = &sql[decl.keyword_start..decl.keyword_end];
155 let table = &sql[decl.table_start..decl.table_end];
156 edits.push(Rf004AutofixEdit {
157 start: decl.keyword_start,
158 end: decl.alias_end,
159 replacement: format!(
160 "{clause} {table} AS alias_{}",
161 decl.alias.to_ascii_lowercase()
162 ),
163 });
164 }
165
166 edits
167}
168
169fn collect_simple_table_alias_declarations(
170 sql: &str,
171 dialect: Dialect,
172) -> Option<Vec<SimpleTableAliasDecl>> {
173 let tokens = tokenize_with_offsets(sql, dialect)?;
174 let mut out = Vec::new();
175 let mut index = 0usize;
176
177 while index < tokens.len() {
178 if !token_matches_keyword(&tokens[index].token, "FROM")
179 && !token_matches_keyword(&tokens[index].token, "JOIN")
180 {
181 index += 1;
182 continue;
183 }
184
185 let keyword_start = tokens[index].start;
186 let keyword_end = tokens[index].end;
187
188 let Some(mut cursor) = next_non_trivia_token(&tokens, index + 1) else {
189 index += 1;
190 continue;
191 };
192 if token_simple_identifier(&tokens[cursor].token).is_none() {
193 index += 1;
194 continue;
195 }
196
197 let table_start = tokens[cursor].start;
198 let mut table_end = tokens[cursor].end;
199 cursor += 1;
200
201 loop {
202 let Some(dot_index) = next_non_trivia_token(&tokens, cursor) else {
203 break;
204 };
205 if !matches!(tokens[dot_index].token, Token::Period) {
206 break;
207 }
208 let Some(next_index) = next_non_trivia_token(&tokens, dot_index + 1) else {
209 break;
210 };
211 if token_simple_identifier(&tokens[next_index].token).is_none() {
212 break;
213 }
214 table_end = tokens[next_index].end;
215 cursor = next_index + 1;
216 }
217
218 let Some(mut alias_index) = next_non_trivia_token(&tokens, cursor) else {
219 index += 1;
220 continue;
221 };
222 let mut explicit_as = false;
223 if token_matches_keyword(&tokens[alias_index].token, "AS") {
224 explicit_as = true;
225 let Some(next_index) = next_non_trivia_token(&tokens, alias_index + 1) else {
226 index += 1;
227 continue;
228 };
229 alias_index = next_index;
230 }
231
232 let Some(alias) = token_simple_identifier(&tokens[alias_index].token) else {
233 index += 1;
234 continue;
235 };
236
237 out.push(SimpleTableAliasDecl {
238 keyword_start,
239 keyword_end,
240 table_start,
241 table_end,
242 alias_end: tokens[alias_index].end,
243 alias: alias.to_string(),
244 explicit_as,
245 });
246 index = alias_index + 1;
247 }
248
249 Some(out)
250}
251
252fn tokenize_with_offsets(sql: &str, dialect: Dialect) -> Option<Vec<LocatedToken>> {
253 let dialect = dialect.to_sqlparser_dialect();
254 let mut tokenizer = Tokenizer::new(dialect.as_ref(), sql);
255 let tokens = tokenizer.tokenize_with_location().ok()?;
256
257 let mut out = Vec::with_capacity(tokens.len());
258 for token in tokens {
259 let (start, end) = token_with_span_offsets(sql, &token)?;
260 out.push(LocatedToken {
261 token: token.token,
262 start,
263 end,
264 });
265 }
266 Some(out)
267}
268
269fn token_with_span_offsets(sql: &str, token: &TokenWithSpan) -> Option<(usize, usize)> {
270 let start = line_col_to_offset(
271 sql,
272 token.span.start.line as usize,
273 token.span.start.column as usize,
274 )?;
275 let end = line_col_to_offset(
276 sql,
277 token.span.end.line as usize,
278 token.span.end.column as usize,
279 )?;
280 Some((start, end))
281}
282
283fn line_col_to_offset(sql: &str, line: usize, column: usize) -> Option<usize> {
284 if line == 0 || column == 0 {
285 return None;
286 }
287
288 let mut current_line = 1usize;
289 let mut current_col = 1usize;
290
291 for (offset, ch) in sql.char_indices() {
292 if current_line == line && current_col == column {
293 return Some(offset);
294 }
295
296 if ch == '\n' {
297 current_line += 1;
298 current_col = 1;
299 } else {
300 current_col += 1;
301 }
302 }
303
304 if current_line == line && current_col == column {
305 return Some(sql.len());
306 }
307
308 None
309}
310
311fn next_non_trivia_token(tokens: &[LocatedToken], mut start: usize) -> Option<usize> {
312 while start < tokens.len() {
313 if !is_trivia_token(&tokens[start].token) {
314 return Some(start);
315 }
316 start += 1;
317 }
318 None
319}
320
321fn is_trivia_token(token: &Token) -> bool {
322 matches!(
323 token,
324 Token::Whitespace(
325 Whitespace::Space
326 | Whitespace::Newline
327 | Whitespace::Tab
328 | Whitespace::SingleLineComment { .. }
329 | Whitespace::MultiLineComment(_)
330 )
331 )
332}
333
334fn token_matches_keyword(token: &Token, keyword: &str) -> bool {
335 matches!(token, Token::Word(word) if word.value.eq_ignore_ascii_case(keyword))
336}
337
338fn token_simple_identifier(token: &Token) -> Option<&str> {
339 match token {
340 Token::Word(word) if is_simple_identifier(&word.value) => Some(&word.value),
341 _ => None,
342 }
343}
344
345fn is_simple_identifier(value: &str) -> bool {
346 let bytes = value.as_bytes();
347 if bytes.is_empty() || !is_ascii_ident_start(bytes[0]) {
348 return false;
349 }
350 bytes[1..].iter().copied().all(is_ascii_ident_continue)
351}
352
353fn is_ascii_ident_start(byte: u8) -> bool {
354 byte.is_ascii_alphabetic() || byte == b'_'
355}
356
357fn is_ascii_ident_continue(byte: u8) -> bool {
358 byte.is_ascii_alphanumeric() || byte == b'_'
359}
360
361fn is_alias_keyword_token(alias: &str) -> bool {
362 is_keyword(alias)
363}
364
365fn statement_contains_keyword_identifier(statement: &Statement, rule: &ReferencesKeywords) -> bool {
366 collect_identifier_candidates(statement)
367 .into_iter()
368 .any(|candidate| candidate_triggers_rule(&candidate, rule))
369}
370
371fn candidate_triggers_rule(candidate: &IdentifierCandidate, rule: &ReferencesKeywords) -> bool {
372 if candidate.value.len() <= 1 {
374 return false;
375 }
376 if is_ignored_token(&candidate.value, rule) || !is_keyword(&candidate.value) {
377 return false;
378 }
379
380 if candidate.quoted {
381 rule.quoted_policy.allows(candidate.kind)
382 } else {
383 rule.unquoted_policy.allows(candidate.kind)
384 }
385}
386
387fn configured_ignore_words(config: &LintConfig) -> Vec<String> {
388 if let Some(words) = config.rule_option_string_list(issue_codes::LINT_RF_004, "ignore_words") {
389 return words;
390 }
391
392 config
393 .rule_option_str(issue_codes::LINT_RF_004, "ignore_words")
394 .map(|words| {
395 words
396 .split(',')
397 .map(str::trim)
398 .filter(|word| !word.is_empty())
399 .map(str::to_string)
400 .collect()
401 })
402 .unwrap_or_default()
403}
404
405fn is_ignored_token(token: &str, rule: &ReferencesKeywords) -> bool {
406 let normalized = normalize_token(token);
407 rule.ignore_words.contains(&normalized)
408 || rule
409 .ignore_words_regex
410 .as_ref()
411 .is_some_and(|regex| regex.is_match(&normalized))
412}
413
414fn normalize_token(token: &str) -> String {
415 token
416 .trim()
417 .trim_matches(|ch| matches!(ch, '"' | '`' | '\'' | '[' | ']'))
418 .to_ascii_uppercase()
419}
420
421fn is_keyword(token: &str) -> bool {
422 let upper = token.trim().to_ascii_uppercase();
423 (ALL_KEYWORDS.binary_search(&upper.as_str()).is_ok() || is_sqlfluff_extra_keyword(&upper))
424 && !is_non_keyword_identifier(&upper)
425}
426
427fn is_sqlfluff_extra_keyword(upper: &str) -> bool {
428 matches!(upper, "COST")
430}
431
432fn is_non_keyword_identifier(upper: &str) -> bool {
441 if upper.contains('_')
442 && !upper.starts_with("CURRENT_")
443 && !upper.starts_with("LOCAL_")
444 && !upper.starts_with("SESSION_")
445 && !upper.starts_with("SYSTEM_")
446 {
447 return true;
448 }
449 matches!(upper, "CHANNEL" | "GENERATED" | "METADATA" | "STATUS")
450}
451
452#[cfg(test)]
453mod tests {
454 use super::*;
455 use crate::parser::parse_sql;
456 use crate::types::IssueAutofixApplicability;
457
458 fn run(sql: &str) -> Vec<Issue> {
459 run_with_config(sql, LintConfig::default())
460 }
461
462 fn run_with_config(sql: &str, config: LintConfig) -> Vec<Issue> {
463 let statements = parse_sql(sql).expect("parse");
464 let rule = ReferencesKeywords::from_config(&config);
465 statements
466 .iter()
467 .enumerate()
468 .flat_map(|(index, statement)| {
469 rule.check(
470 statement,
471 &LintContext {
472 sql,
473 statement_range: 0..sql.len(),
474 statement_index: index,
475 },
476 )
477 })
478 .collect()
479 }
480
481 fn apply_issue_autofix(sql: &str, issue: &Issue) -> Option<String> {
482 let autofix = issue.autofix.as_ref()?;
483 let mut out = sql.to_string();
484 let mut edits = autofix.edits.clone();
485 edits.sort_by_key(|edit| (edit.span.start, edit.span.end));
486 for edit in edits.into_iter().rev() {
487 out.replace_range(edit.span.start..edit.span.end, &edit.replacement);
488 }
489 Some(out)
490 }
491
492 #[test]
493 fn flags_unquoted_keyword_table_alias() {
494 let issues = run("SELECT sum.id FROM users AS sum");
495 assert_eq!(issues.len(), 1);
496 assert_eq!(issues[0].code, issue_codes::LINT_RF_004);
497 }
498
499 #[test]
500 fn emits_safe_autofix_for_explicit_keyword_table_alias() {
501 let sql = "select a from users as select";
502 let issues = run(sql);
503 assert_eq!(issues.len(), 1);
504 let autofix = issues[0].autofix.as_ref().expect("autofix metadata");
505 assert_eq!(autofix.applicability, IssueAutofixApplicability::Safe);
506 let fixed = apply_issue_autofix(sql, &issues[0]).expect("apply autofix");
507 assert_eq!(fixed, "select a from users AS alias_select");
508 }
509
510 #[test]
511 fn flags_unquoted_keyword_projection_alias() {
512 let issues = run("SELECT amount AS sum FROM t");
513 assert_eq!(issues.len(), 1);
514 assert_eq!(issues[0].code, issue_codes::LINT_RF_004);
515 }
516
517 #[test]
518 fn flags_cost_alias_as_keyword_identifier() {
519 let issues = run("SELECT 1 AS cost");
520 assert_eq!(issues.len(), 1);
521 assert_eq!(issues[0].code, issue_codes::LINT_RF_004);
522 }
523
524 #[test]
525 fn flags_unquoted_keyword_cte_alias() {
526 let issues = run("WITH sum AS (SELECT 1 AS value) SELECT value FROM sum");
527 assert_eq!(issues.len(), 1);
528 assert_eq!(issues[0].code, issue_codes::LINT_RF_004);
529 }
530
531 #[test]
532 fn does_not_flag_quoted_keyword_alias_by_default() {
533 assert!(run("SELECT \"select\".id FROM users AS \"select\"").is_empty());
534 }
535
536 #[test]
537 fn does_not_flag_non_keyword_alias() {
538 let issues = run("SELECT u.id FROM users AS u");
539 assert!(issues.is_empty());
540 }
541
542 #[test]
543 fn does_not_flag_sql_like_string_literal() {
544 let issues = run("SELECT 'FROM users AS date' AS snippet");
545 assert!(issues.is_empty());
546 }
547
548 #[test]
549 fn quoted_identifiers_policy_all_flags_quoted_keyword_alias() {
550 let issues = run_with_config(
551 "SELECT \"select\".id FROM users AS \"select\"",
552 LintConfig {
553 enabled: true,
554 disabled_rules: vec![],
555 rule_configs: std::collections::BTreeMap::from([(
556 "references.keywords".to_string(),
557 serde_json::json!({"quoted_identifiers_policy": "all"}),
558 )]),
559 },
560 );
561 assert_eq!(issues.len(), 1);
562 assert_eq!(issues[0].code, issue_codes::LINT_RF_004);
563 }
564
565 #[test]
566 fn unquoted_column_alias_policy_does_not_flag_table_alias() {
567 let issues = run_with_config(
568 "SELECT sum.id FROM users AS sum",
569 LintConfig {
570 enabled: true,
571 disabled_rules: vec![],
572 rule_configs: std::collections::BTreeMap::from([(
573 "LINT_RF_004".to_string(),
574 serde_json::json!({"unquoted_identifiers_policy": "column_aliases"}),
575 )]),
576 },
577 );
578 assert!(issues.is_empty());
579 }
580
581 #[test]
582 fn ignore_words_suppresses_keyword_identifier() {
583 let issues = run_with_config(
584 "SELECT amount AS sum FROM t",
585 LintConfig {
586 enabled: true,
587 disabled_rules: vec![],
588 rule_configs: std::collections::BTreeMap::from([(
589 "references.keywords".to_string(),
590 serde_json::json!({"ignore_words": ["sum"]}),
591 )]),
592 },
593 );
594 assert!(issues.is_empty());
595 }
596
597 #[test]
598 fn ignore_words_regex_suppresses_keyword_identifier() {
599 let issues = run_with_config(
600 "SELECT amount AS sum FROM t",
601 LintConfig {
602 enabled: true,
603 disabled_rules: vec![],
604 rule_configs: std::collections::BTreeMap::from([(
605 "LINT_RF_004".to_string(),
606 serde_json::json!({"ignore_words_regex": "^s.*"}),
607 )]),
608 },
609 );
610 assert!(issues.is_empty());
611 }
612
613 #[test]
614 fn flags_keyword_as_column_name_in_create_table() {
615 let issues = run("CREATE TABLE artist(create TEXT)");
617 assert_eq!(issues.len(), 1);
618 assert_eq!(issues[0].code, issue_codes::LINT_RF_004);
619 }
620
621 #[test]
622 fn flags_keyword_as_column_alias() {
623 let issues = run("SELECT 1 as parameter");
625 assert_eq!(issues.len(), 1);
626 assert_eq!(issues[0].code, issue_codes::LINT_RF_004);
627 }
628
629 #[test]
630 fn flags_keyword_as_table_alias() {
631 let issues = run("SELECT x FROM tbl AS parameter");
633 assert_eq!(issues.len(), 1);
634 assert_eq!(issues[0].code, issue_codes::LINT_RF_004);
635 }
636
637 #[test]
638 fn does_not_flag_non_alias_with_aliases_policy() {
639 assert!(run("SELECT parameter").is_empty());
643 }
644
645 #[test]
646 fn flags_non_alias_with_all_policy() {
647 let issues = run_with_config(
649 "SELECT parameter",
650 LintConfig {
651 enabled: true,
652 disabled_rules: vec![],
653 rule_configs: std::collections::BTreeMap::from([(
654 "references.keywords".to_string(),
655 serde_json::json!({"unquoted_identifiers_policy": "all"}),
656 )]),
657 },
658 );
659 assert_eq!(issues.len(), 1);
660 }
661
662 #[test]
663 fn flags_column_alias_with_column_aliases_policy() {
664 let issues = run_with_config(
666 "SELECT x AS date FROM tbl AS parameter",
667 LintConfig {
668 enabled: true,
669 disabled_rules: vec![],
670 rule_configs: std::collections::BTreeMap::from([(
671 "references.keywords".to_string(),
672 serde_json::json!({"unquoted_identifiers_policy": "column_aliases"}),
673 )]),
674 },
675 );
676 assert_eq!(issues.len(), 1);
677 }
678
679 #[test]
680 fn flags_quoted_keyword_column_in_create_table() {
681 let issues = run_with_config(
683 "CREATE TABLE \"artist\"(\"create\" TEXT)",
684 LintConfig {
685 enabled: true,
686 disabled_rules: vec![],
687 rule_configs: std::collections::BTreeMap::from([(
688 "references.keywords".to_string(),
689 serde_json::json!({"quoted_identifiers_policy": "aliases"}),
690 )]),
691 },
692 );
693 assert_eq!(issues.len(), 1);
694 }
695
696 #[test]
697 fn flags_keyword_as_column_name_postgres() {
698 let issues = run("CREATE TABLE test_table (type varchar(30) NOT NULL)");
700 assert_eq!(issues.len(), 1);
701 }
702
703 #[test]
704 fn does_not_flag_function_name_as_keyword() {
705 assert!(run("SELECT ROW_NUMBER() OVER () AS row_number FROM t").is_empty());
707 }
708
709 #[test]
710 fn does_not_flag_non_keyword_identifiers() {
711 assert!(run("WITH generated AS (SELECT 1 AS x) SELECT x FROM generated").is_empty());
714 assert!(run("SELECT x AS status FROM t").is_empty());
715 assert!(run("SELECT x AS metadata FROM t").is_empty());
716 assert!(run("SELECT x AS channel FROM t").is_empty());
717 }
718
719 #[test]
720 fn still_flags_current_date_as_keyword() {
721 let issues = run("SELECT x AS current_date FROM t");
723 assert_eq!(issues.len(), 1);
724 }
725}