Skip to main content

flowscope_core/linter/rules/
rf_004.rs

1//! LINT_RF_004: References keywords.
2//!
3//! SQLFluff RF04 parity (current scope): avoid keyword-looking identifiers with
4//! SQLFluff-style quoted/unquoted identifier-policy controls.
5
6use std::collections::HashSet;
7
8use crate::linter::config::LintConfig;
9use crate::linter::rule::{LintContext, LintRule};
10use crate::types::{issue_codes, Dialect, Issue, IssueAutofixApplicability, IssuePatchEdit};
11use regex::{Regex, RegexBuilder};
12use sqlparser::ast::Statement;
13use sqlparser::keywords::ALL_KEYWORDS;
14use sqlparser::tokenizer::{Token, TokenWithSpan, Tokenizer, Whitespace};
15
16use super::identifier_candidates_helpers::{
17    collect_identifier_candidates, IdentifierCandidate, IdentifierKind, IdentifierPolicy,
18};
19
20pub struct ReferencesKeywords {
21    quoted_policy: IdentifierPolicy,
22    unquoted_policy: IdentifierPolicy,
23    ignore_words: HashSet<String>,
24    ignore_words_regex: Option<Regex>,
25}
26
27impl ReferencesKeywords {
28    pub fn from_config(config: &LintConfig) -> Self {
29        Self {
30            quoted_policy: IdentifierPolicy::from_config(
31                config,
32                issue_codes::LINT_RF_004,
33                "quoted_identifiers_policy",
34                "none",
35            ),
36            unquoted_policy: IdentifierPolicy::from_config(
37                config,
38                issue_codes::LINT_RF_004,
39                "unquoted_identifiers_policy",
40                "aliases",
41            ),
42            ignore_words: configured_ignore_words(config)
43                .into_iter()
44                .map(|word| normalize_token(&word))
45                .collect(),
46            ignore_words_regex: config
47                .rule_option_str(issue_codes::LINT_RF_004, "ignore_words_regex")
48                .filter(|pattern| !pattern.trim().is_empty())
49                .and_then(|pattern| {
50                    RegexBuilder::new(pattern)
51                        .case_insensitive(true)
52                        .build()
53                        .ok()
54                }),
55        }
56    }
57}
58
59impl Default for ReferencesKeywords {
60    fn default() -> Self {
61        Self {
62            quoted_policy: IdentifierPolicy::None,
63            unquoted_policy: IdentifierPolicy::Aliases,
64            ignore_words: HashSet::new(),
65            ignore_words_regex: None,
66        }
67    }
68}
69
70impl LintRule for ReferencesKeywords {
71    fn code(&self) -> &'static str {
72        issue_codes::LINT_RF_004
73    }
74
75    fn name(&self) -> &'static str {
76        "References keywords"
77    }
78
79    fn description(&self) -> &'static str {
80        "Keywords should not be used as identifiers."
81    }
82
83    fn check(&self, statement: &Statement, ctx: &LintContext) -> Vec<Issue> {
84        if !statement_contains_keyword_identifier(statement, self) {
85            return Vec::new();
86        }
87
88        let mut issue = Issue::info(issue_codes::LINT_RF_004, "Keyword used as identifier.")
89            .with_statement(ctx.statement_index);
90
91        let autofix_edits =
92            keyword_table_alias_autofix_edits(ctx.statement_sql(), ctx.dialect(), self)
93                .into_iter()
94                .map(|edit| {
95                    IssuePatchEdit::new(
96                        ctx.span_from_statement_offset(edit.start, edit.end),
97                        edit.replacement,
98                    )
99                })
100                .collect::<Vec<_>>();
101        if !autofix_edits.is_empty() {
102            issue = issue.with_autofix_edits(IssueAutofixApplicability::Safe, autofix_edits);
103        }
104
105        vec![issue]
106    }
107}
108
109struct Rf004AutofixEdit {
110    start: usize,
111    end: usize,
112    replacement: String,
113}
114
115#[derive(Clone)]
116struct SimpleTableAliasDecl {
117    keyword_start: usize,
118    keyword_end: usize,
119    table_start: usize,
120    table_end: usize,
121    alias_end: usize,
122    alias: String,
123    explicit_as: bool,
124}
125
126#[derive(Clone)]
127struct LocatedToken {
128    token: Token,
129    start: usize,
130    end: usize,
131}
132
133fn keyword_table_alias_autofix_edits(
134    sql: &str,
135    dialect: Dialect,
136    rule: &ReferencesKeywords,
137) -> Vec<Rf004AutofixEdit> {
138    if !rule.unquoted_policy.allows(IdentifierKind::TableAlias) {
139        return Vec::new();
140    }
141
142    let Some(decls) = collect_simple_table_alias_declarations(sql, dialect) else {
143        return Vec::new();
144    };
145
146    let mut edits = Vec::new();
147    for decl in decls {
148        if !decl.explicit_as
149            || !is_alias_keyword_token(&decl.alias)
150            || is_ignored_token(&decl.alias, rule)
151        {
152            continue;
153        }
154        let clause = &sql[decl.keyword_start..decl.keyword_end];
155        let table = &sql[decl.table_start..decl.table_end];
156        edits.push(Rf004AutofixEdit {
157            start: decl.keyword_start,
158            end: decl.alias_end,
159            replacement: format!(
160                "{clause} {table} AS alias_{}",
161                decl.alias.to_ascii_lowercase()
162            ),
163        });
164    }
165
166    edits
167}
168
169fn collect_simple_table_alias_declarations(
170    sql: &str,
171    dialect: Dialect,
172) -> Option<Vec<SimpleTableAliasDecl>> {
173    let tokens = tokenize_with_offsets(sql, dialect)?;
174    let mut out = Vec::new();
175    let mut index = 0usize;
176
177    while index < tokens.len() {
178        if !token_matches_keyword(&tokens[index].token, "FROM")
179            && !token_matches_keyword(&tokens[index].token, "JOIN")
180        {
181            index += 1;
182            continue;
183        }
184
185        let keyword_start = tokens[index].start;
186        let keyword_end = tokens[index].end;
187
188        let Some(mut cursor) = next_non_trivia_token(&tokens, index + 1) else {
189            index += 1;
190            continue;
191        };
192        if token_simple_identifier(&tokens[cursor].token).is_none() {
193            index += 1;
194            continue;
195        }
196
197        let table_start = tokens[cursor].start;
198        let mut table_end = tokens[cursor].end;
199        cursor += 1;
200
201        while let Some(dot_index) = next_non_trivia_token(&tokens, cursor) {
202            if !matches!(tokens[dot_index].token, Token::Period) {
203                break;
204            }
205            let Some(next_index) = next_non_trivia_token(&tokens, dot_index + 1) else {
206                break;
207            };
208            if token_simple_identifier(&tokens[next_index].token).is_none() {
209                break;
210            }
211            table_end = tokens[next_index].end;
212            cursor = next_index + 1;
213        }
214
215        let Some(mut alias_index) = next_non_trivia_token(&tokens, cursor) else {
216            index += 1;
217            continue;
218        };
219        let mut explicit_as = false;
220        if token_matches_keyword(&tokens[alias_index].token, "AS") {
221            explicit_as = true;
222            let Some(next_index) = next_non_trivia_token(&tokens, alias_index + 1) else {
223                index += 1;
224                continue;
225            };
226            alias_index = next_index;
227        }
228
229        let Some(alias) = token_simple_identifier(&tokens[alias_index].token) else {
230            index += 1;
231            continue;
232        };
233
234        out.push(SimpleTableAliasDecl {
235            keyword_start,
236            keyword_end,
237            table_start,
238            table_end,
239            alias_end: tokens[alias_index].end,
240            alias: alias.to_string(),
241            explicit_as,
242        });
243        index = alias_index + 1;
244    }
245
246    Some(out)
247}
248
249fn tokenize_with_offsets(sql: &str, dialect: Dialect) -> Option<Vec<LocatedToken>> {
250    let dialect = dialect.to_sqlparser_dialect();
251    let mut tokenizer = Tokenizer::new(dialect.as_ref(), sql);
252    let tokens = tokenizer.tokenize_with_location().ok()?;
253
254    let mut out = Vec::with_capacity(tokens.len());
255    for token in tokens {
256        let (start, end) = token_with_span_offsets(sql, &token)?;
257        out.push(LocatedToken {
258            token: token.token,
259            start,
260            end,
261        });
262    }
263    Some(out)
264}
265
266fn token_with_span_offsets(sql: &str, token: &TokenWithSpan) -> Option<(usize, usize)> {
267    let start = line_col_to_offset(
268        sql,
269        token.span.start.line as usize,
270        token.span.start.column as usize,
271    )?;
272    let end = line_col_to_offset(
273        sql,
274        token.span.end.line as usize,
275        token.span.end.column as usize,
276    )?;
277    Some((start, end))
278}
279
280fn line_col_to_offset(sql: &str, line: usize, column: usize) -> Option<usize> {
281    if line == 0 || column == 0 {
282        return None;
283    }
284
285    let mut current_line = 1usize;
286    let mut current_col = 1usize;
287
288    for (offset, ch) in sql.char_indices() {
289        if current_line == line && current_col == column {
290            return Some(offset);
291        }
292
293        if ch == '\n' {
294            current_line += 1;
295            current_col = 1;
296        } else {
297            current_col += 1;
298        }
299    }
300
301    if current_line == line && current_col == column {
302        return Some(sql.len());
303    }
304
305    None
306}
307
308fn next_non_trivia_token(tokens: &[LocatedToken], mut start: usize) -> Option<usize> {
309    while start < tokens.len() {
310        if !is_trivia_token(&tokens[start].token) {
311            return Some(start);
312        }
313        start += 1;
314    }
315    None
316}
317
318fn is_trivia_token(token: &Token) -> bool {
319    matches!(
320        token,
321        Token::Whitespace(
322            Whitespace::Space
323                | Whitespace::Newline
324                | Whitespace::Tab
325                | Whitespace::SingleLineComment { .. }
326                | Whitespace::MultiLineComment(_)
327        )
328    )
329}
330
331fn token_matches_keyword(token: &Token, keyword: &str) -> bool {
332    matches!(token, Token::Word(word) if word.value.eq_ignore_ascii_case(keyword))
333}
334
335fn token_simple_identifier(token: &Token) -> Option<&str> {
336    match token {
337        Token::Word(word) if is_simple_identifier(&word.value) => Some(&word.value),
338        _ => None,
339    }
340}
341
342fn is_simple_identifier(value: &str) -> bool {
343    let bytes = value.as_bytes();
344    if bytes.is_empty() || !is_ascii_ident_start(bytes[0]) {
345        return false;
346    }
347    bytes[1..].iter().copied().all(is_ascii_ident_continue)
348}
349
350fn is_ascii_ident_start(byte: u8) -> bool {
351    byte.is_ascii_alphabetic() || byte == b'_'
352}
353
354fn is_ascii_ident_continue(byte: u8) -> bool {
355    byte.is_ascii_alphanumeric() || byte == b'_'
356}
357
358fn is_alias_keyword_token(alias: &str) -> bool {
359    is_keyword(alias)
360}
361
362fn statement_contains_keyword_identifier(statement: &Statement, rule: &ReferencesKeywords) -> bool {
363    collect_identifier_candidates(statement)
364        .into_iter()
365        .any(|candidate| candidate_triggers_rule(&candidate, rule))
366}
367
368fn candidate_triggers_rule(candidate: &IdentifierCandidate, rule: &ReferencesKeywords) -> bool {
369    // SQLFluff skips 1-character identifiers (e.g. datepart keywords like "d").
370    if candidate.value.len() <= 1 {
371        return false;
372    }
373    if is_ignored_token(&candidate.value, rule) || !is_keyword(&candidate.value) {
374        return false;
375    }
376
377    if candidate.quoted {
378        rule.quoted_policy.allows(candidate.kind)
379    } else {
380        rule.unquoted_policy.allows(candidate.kind)
381    }
382}
383
384fn configured_ignore_words(config: &LintConfig) -> Vec<String> {
385    if let Some(words) = config.rule_option_string_list(issue_codes::LINT_RF_004, "ignore_words") {
386        return words;
387    }
388
389    config
390        .rule_option_str(issue_codes::LINT_RF_004, "ignore_words")
391        .map(|words| {
392            words
393                .split(',')
394                .map(str::trim)
395                .filter(|word| !word.is_empty())
396                .map(str::to_string)
397                .collect()
398        })
399        .unwrap_or_default()
400}
401
402fn is_ignored_token(token: &str, rule: &ReferencesKeywords) -> bool {
403    let normalized = normalize_token(token);
404    rule.ignore_words.contains(&normalized)
405        || rule
406            .ignore_words_regex
407            .as_ref()
408            .is_some_and(|regex| regex.is_match(&normalized))
409}
410
411fn normalize_token(token: &str) -> String {
412    token
413        .trim()
414        .trim_matches(|ch| matches!(ch, '"' | '`' | '\'' | '[' | ']'))
415        .to_ascii_uppercase()
416}
417
418fn is_keyword(token: &str) -> bool {
419    let upper = token.trim().to_ascii_uppercase();
420    (ALL_KEYWORDS.binary_search(&upper.as_str()).is_ok() || is_sqlfluff_extra_keyword(&upper))
421        && !is_non_keyword_identifier(&upper)
422}
423
424fn is_sqlfluff_extra_keyword(upper: &str) -> bool {
425    // SQLFluff treats COST as a keyword for PostgreSQL, while sqlparser does not.
426    matches!(upper, "COST")
427}
428
429/// Returns true for words that sqlparser includes in `ALL_KEYWORDS` but that
430/// SQLFluff does not treat as keywords for RF04.  These fall into two groups:
431///
432///  1. Window/aggregate function names (compound names with underscores like
433///     `ROW_NUMBER`, `DATE_PART`) — excluding `CURRENT_*` / `LOCAL_*` /
434///     `SESSION_*` / `SYSTEM_*` which are SQL-standard reserved pseudo-functions.
435///  2. Dialect-specific parser tokens that are not general SQL keywords
436///     (`METADATA` for BigQuery, `CHANNEL` for PostgreSQL LISTEN/NOTIFY, etc.).
437fn is_non_keyword_identifier(upper: &str) -> bool {
438    if upper.contains('_')
439        && !upper.starts_with("CURRENT_")
440        && !upper.starts_with("LOCAL_")
441        && !upper.starts_with("SESSION_")
442        && !upper.starts_with("SYSTEM_")
443    {
444        return true;
445    }
446    matches!(upper, "CHANNEL" | "GENERATED" | "METADATA" | "STATUS")
447}
448
449#[cfg(test)]
450mod tests {
451    use super::*;
452    use crate::parser::parse_sql;
453    use crate::types::IssueAutofixApplicability;
454
455    fn run(sql: &str) -> Vec<Issue> {
456        run_with_config(sql, LintConfig::default())
457    }
458
459    fn run_with_config(sql: &str, config: LintConfig) -> Vec<Issue> {
460        let statements = parse_sql(sql).expect("parse");
461        let rule = ReferencesKeywords::from_config(&config);
462        statements
463            .iter()
464            .enumerate()
465            .flat_map(|(index, statement)| {
466                rule.check(
467                    statement,
468                    &LintContext {
469                        sql,
470                        statement_range: 0..sql.len(),
471                        statement_index: index,
472                    },
473                )
474            })
475            .collect()
476    }
477
478    fn apply_issue_autofix(sql: &str, issue: &Issue) -> Option<String> {
479        let autofix = issue.autofix.as_ref()?;
480        let mut out = sql.to_string();
481        let mut edits = autofix.edits.clone();
482        edits.sort_by_key(|edit| (edit.span.start, edit.span.end));
483        for edit in edits.into_iter().rev() {
484            out.replace_range(edit.span.start..edit.span.end, &edit.replacement);
485        }
486        Some(out)
487    }
488
489    #[test]
490    fn flags_unquoted_keyword_table_alias() {
491        let issues = run("SELECT sum.id FROM users AS sum");
492        assert_eq!(issues.len(), 1);
493        assert_eq!(issues[0].code, issue_codes::LINT_RF_004);
494    }
495
496    #[test]
497    fn emits_safe_autofix_for_explicit_keyword_table_alias() {
498        let sql = "select a from users as select";
499        let issues = run(sql);
500        assert_eq!(issues.len(), 1);
501        let autofix = issues[0].autofix.as_ref().expect("autofix metadata");
502        assert_eq!(autofix.applicability, IssueAutofixApplicability::Safe);
503        let fixed = apply_issue_autofix(sql, &issues[0]).expect("apply autofix");
504        assert_eq!(fixed, "select a from users AS alias_select");
505    }
506
507    #[test]
508    fn flags_unquoted_keyword_projection_alias() {
509        let issues = run("SELECT amount AS sum FROM t");
510        assert_eq!(issues.len(), 1);
511        assert_eq!(issues[0].code, issue_codes::LINT_RF_004);
512    }
513
514    #[test]
515    fn flags_cost_alias_as_keyword_identifier() {
516        let issues = run("SELECT 1 AS cost");
517        assert_eq!(issues.len(), 1);
518        assert_eq!(issues[0].code, issue_codes::LINT_RF_004);
519    }
520
521    #[test]
522    fn flags_unquoted_keyword_cte_alias() {
523        let issues = run("WITH sum AS (SELECT 1 AS value) SELECT value FROM sum");
524        assert_eq!(issues.len(), 1);
525        assert_eq!(issues[0].code, issue_codes::LINT_RF_004);
526    }
527
528    #[test]
529    fn does_not_flag_quoted_keyword_alias_by_default() {
530        assert!(run("SELECT \"select\".id FROM users AS \"select\"").is_empty());
531    }
532
533    #[test]
534    fn does_not_flag_non_keyword_alias() {
535        let issues = run("SELECT u.id FROM users AS u");
536        assert!(issues.is_empty());
537    }
538
539    #[test]
540    fn does_not_flag_sql_like_string_literal() {
541        let issues = run("SELECT 'FROM users AS date' AS snippet");
542        assert!(issues.is_empty());
543    }
544
545    #[test]
546    fn quoted_identifiers_policy_all_flags_quoted_keyword_alias() {
547        let issues = run_with_config(
548            "SELECT \"select\".id FROM users AS \"select\"",
549            LintConfig {
550                enabled: true,
551                disabled_rules: vec![],
552                rule_configs: std::collections::BTreeMap::from([(
553                    "references.keywords".to_string(),
554                    serde_json::json!({"quoted_identifiers_policy": "all"}),
555                )]),
556            },
557        );
558        assert_eq!(issues.len(), 1);
559        assert_eq!(issues[0].code, issue_codes::LINT_RF_004);
560    }
561
562    #[test]
563    fn unquoted_column_alias_policy_does_not_flag_table_alias() {
564        let issues = run_with_config(
565            "SELECT sum.id FROM users AS sum",
566            LintConfig {
567                enabled: true,
568                disabled_rules: vec![],
569                rule_configs: std::collections::BTreeMap::from([(
570                    "LINT_RF_004".to_string(),
571                    serde_json::json!({"unquoted_identifiers_policy": "column_aliases"}),
572                )]),
573            },
574        );
575        assert!(issues.is_empty());
576    }
577
578    #[test]
579    fn ignore_words_suppresses_keyword_identifier() {
580        let issues = run_with_config(
581            "SELECT amount AS sum FROM t",
582            LintConfig {
583                enabled: true,
584                disabled_rules: vec![],
585                rule_configs: std::collections::BTreeMap::from([(
586                    "references.keywords".to_string(),
587                    serde_json::json!({"ignore_words": ["sum"]}),
588                )]),
589            },
590        );
591        assert!(issues.is_empty());
592    }
593
594    #[test]
595    fn ignore_words_regex_suppresses_keyword_identifier() {
596        let issues = run_with_config(
597            "SELECT amount AS sum FROM t",
598            LintConfig {
599                enabled: true,
600                disabled_rules: vec![],
601                rule_configs: std::collections::BTreeMap::from([(
602                    "LINT_RF_004".to_string(),
603                    serde_json::json!({"ignore_words_regex": "^s.*"}),
604                )]),
605            },
606        );
607        assert!(issues.is_empty());
608    }
609
610    #[test]
611    fn flags_keyword_as_column_name_in_create_table() {
612        // SQLFluff: test_fail_keyword_as_identifier_column
613        let issues = run("CREATE TABLE artist(create TEXT)");
614        assert_eq!(issues.len(), 1);
615        assert_eq!(issues[0].code, issue_codes::LINT_RF_004);
616    }
617
618    #[test]
619    fn flags_keyword_as_column_alias() {
620        // SQLFluff: test_fail_keyword_as_identifier_column_alias
621        let issues = run("SELECT 1 as parameter");
622        assert_eq!(issues.len(), 1);
623        assert_eq!(issues[0].code, issue_codes::LINT_RF_004);
624    }
625
626    #[test]
627    fn flags_keyword_as_table_alias() {
628        // SQLFluff: test_fail_keyword_as_identifier_table_alias
629        let issues = run("SELECT x FROM tbl AS parameter");
630        assert_eq!(issues.len(), 1);
631        assert_eq!(issues[0].code, issue_codes::LINT_RF_004);
632    }
633
634    #[test]
635    fn does_not_flag_non_alias_with_aliases_policy() {
636        // SQLFluff: test_pass_valid_identifier_not_alias
637        // Default unquoted_identifiers_policy is "aliases", so bare column
638        // references that are not aliases should not trigger.
639        assert!(run("SELECT parameter").is_empty());
640    }
641
642    #[test]
643    fn flags_non_alias_with_all_policy() {
644        // SQLFluff: test_fail_keyword_as_identifier_not_alias_all
645        let issues = run_with_config(
646            "SELECT parameter",
647            LintConfig {
648                enabled: true,
649                disabled_rules: vec![],
650                rule_configs: std::collections::BTreeMap::from([(
651                    "references.keywords".to_string(),
652                    serde_json::json!({"unquoted_identifiers_policy": "all"}),
653                )]),
654            },
655        );
656        assert_eq!(issues.len(), 1);
657    }
658
659    #[test]
660    fn flags_column_alias_with_column_aliases_policy() {
661        // SQLFluff: test_fail_keyword_as_identifier_column_alias_config
662        let issues = run_with_config(
663            "SELECT x AS date FROM tbl AS parameter",
664            LintConfig {
665                enabled: true,
666                disabled_rules: vec![],
667                rule_configs: std::collections::BTreeMap::from([(
668                    "references.keywords".to_string(),
669                    serde_json::json!({"unquoted_identifiers_policy": "column_aliases"}),
670                )]),
671            },
672        );
673        assert_eq!(issues.len(), 1);
674    }
675
676    #[test]
677    fn flags_quoted_keyword_column_in_create_table() {
678        // SQLFluff: test_fail_keyword_as_quoted_identifier_column
679        let issues = run_with_config(
680            "CREATE TABLE \"artist\"(\"create\" TEXT)",
681            LintConfig {
682                enabled: true,
683                disabled_rules: vec![],
684                rule_configs: std::collections::BTreeMap::from([(
685                    "references.keywords".to_string(),
686                    serde_json::json!({"quoted_identifiers_policy": "aliases"}),
687                )]),
688            },
689        );
690        assert_eq!(issues.len(), 1);
691    }
692
693    #[test]
694    fn flags_keyword_as_column_name_postgres() {
695        // SQLFluff: test_fail_keyword_as_column_name_postgres
696        let issues = run("CREATE TABLE test_table (type varchar(30) NOT NULL)");
697        assert_eq!(issues.len(), 1);
698    }
699
700    #[test]
701    fn does_not_flag_function_name_as_keyword() {
702        // ROW_NUMBER is a window function name, not a SQL keyword.
703        assert!(run("SELECT ROW_NUMBER() OVER () AS row_number FROM t").is_empty());
704    }
705
706    #[test]
707    fn does_not_flag_non_keyword_identifiers() {
708        // Words in sqlparser's ALL_KEYWORDS that are not treated as keywords
709        // by SQLFluff: METADATA, CHANNEL, STATUS, GENERATED.
710        assert!(run("WITH generated AS (SELECT 1 AS x) SELECT x FROM generated").is_empty());
711        assert!(run("SELECT x AS status FROM t").is_empty());
712        assert!(run("SELECT x AS metadata FROM t").is_empty());
713        assert!(run("SELECT x AS channel FROM t").is_empty());
714    }
715
716    #[test]
717    fn still_flags_current_date_as_keyword() {
718        // CURRENT_DATE is a SQL-standard reserved pseudo-function.
719        let issues = run("SELECT x AS current_date FROM t");
720        assert_eq!(issues.len(), 1);
721    }
722}