Skip to main content

flowscope_core/linter/rules/
rf_004.rs

1//! LINT_RF_004: References keywords.
2//!
3//! SQLFluff RF04 parity (current scope): avoid keyword-looking identifiers with
4//! SQLFluff-style quoted/unquoted identifier-policy controls.
5
6use std::collections::HashSet;
7
8use crate::linter::config::LintConfig;
9use crate::linter::rule::{LintContext, LintRule};
10use crate::types::{issue_codes, Dialect, Issue, IssueAutofixApplicability, IssuePatchEdit};
11use regex::{Regex, RegexBuilder};
12use sqlparser::ast::Statement;
13use sqlparser::keywords::ALL_KEYWORDS;
14use sqlparser::tokenizer::{Token, TokenWithSpan, Tokenizer, Whitespace};
15
16use super::identifier_candidates_helpers::{
17    collect_identifier_candidates, IdentifierCandidate, IdentifierKind, IdentifierPolicy,
18};
19
20pub struct ReferencesKeywords {
21    quoted_policy: IdentifierPolicy,
22    unquoted_policy: IdentifierPolicy,
23    ignore_words: HashSet<String>,
24    ignore_words_regex: Option<Regex>,
25}
26
27impl ReferencesKeywords {
28    pub fn from_config(config: &LintConfig) -> Self {
29        Self {
30            quoted_policy: IdentifierPolicy::from_config(
31                config,
32                issue_codes::LINT_RF_004,
33                "quoted_identifiers_policy",
34                "none",
35            ),
36            unquoted_policy: IdentifierPolicy::from_config(
37                config,
38                issue_codes::LINT_RF_004,
39                "unquoted_identifiers_policy",
40                "aliases",
41            ),
42            ignore_words: configured_ignore_words(config)
43                .into_iter()
44                .map(|word| normalize_token(&word))
45                .collect(),
46            ignore_words_regex: config
47                .rule_option_str(issue_codes::LINT_RF_004, "ignore_words_regex")
48                .filter(|pattern| !pattern.trim().is_empty())
49                .and_then(|pattern| {
50                    RegexBuilder::new(pattern)
51                        .case_insensitive(true)
52                        .build()
53                        .ok()
54                }),
55        }
56    }
57}
58
59impl Default for ReferencesKeywords {
60    fn default() -> Self {
61        Self {
62            quoted_policy: IdentifierPolicy::None,
63            unquoted_policy: IdentifierPolicy::Aliases,
64            ignore_words: HashSet::new(),
65            ignore_words_regex: None,
66        }
67    }
68}
69
70impl LintRule for ReferencesKeywords {
71    fn code(&self) -> &'static str {
72        issue_codes::LINT_RF_004
73    }
74
75    fn name(&self) -> &'static str {
76        "References keywords"
77    }
78
79    fn description(&self) -> &'static str {
80        "Keywords should not be used as identifiers."
81    }
82
83    fn check(&self, statement: &Statement, ctx: &LintContext) -> Vec<Issue> {
84        if !statement_contains_keyword_identifier(statement, self) {
85            return Vec::new();
86        }
87
88        let mut issue = Issue::info(issue_codes::LINT_RF_004, "Keyword used as identifier.")
89            .with_statement(ctx.statement_index);
90
91        let autofix_edits =
92            keyword_table_alias_autofix_edits(ctx.statement_sql(), ctx.dialect(), self)
93                .into_iter()
94                .map(|edit| {
95                    IssuePatchEdit::new(
96                        ctx.span_from_statement_offset(edit.start, edit.end),
97                        edit.replacement,
98                    )
99                })
100                .collect::<Vec<_>>();
101        if !autofix_edits.is_empty() {
102            issue = issue.with_autofix_edits(IssueAutofixApplicability::Safe, autofix_edits);
103        }
104
105        vec![issue]
106    }
107}
108
109struct Rf004AutofixEdit {
110    start: usize,
111    end: usize,
112    replacement: String,
113}
114
115#[derive(Clone)]
116struct SimpleTableAliasDecl {
117    keyword_start: usize,
118    keyword_end: usize,
119    table_start: usize,
120    table_end: usize,
121    alias_end: usize,
122    alias: String,
123    explicit_as: bool,
124}
125
126#[derive(Clone)]
127struct LocatedToken {
128    token: Token,
129    start: usize,
130    end: usize,
131}
132
133fn keyword_table_alias_autofix_edits(
134    sql: &str,
135    dialect: Dialect,
136    rule: &ReferencesKeywords,
137) -> Vec<Rf004AutofixEdit> {
138    if !rule.unquoted_policy.allows(IdentifierKind::TableAlias) {
139        return Vec::new();
140    }
141
142    let Some(decls) = collect_simple_table_alias_declarations(sql, dialect) else {
143        return Vec::new();
144    };
145
146    let mut edits = Vec::new();
147    for decl in decls {
148        if !decl.explicit_as
149            || !is_alias_keyword_token(&decl.alias)
150            || is_ignored_token(&decl.alias, rule)
151        {
152            continue;
153        }
154        let clause = &sql[decl.keyword_start..decl.keyword_end];
155        let table = &sql[decl.table_start..decl.table_end];
156        edits.push(Rf004AutofixEdit {
157            start: decl.keyword_start,
158            end: decl.alias_end,
159            replacement: format!(
160                "{clause} {table} AS alias_{}",
161                decl.alias.to_ascii_lowercase()
162            ),
163        });
164    }
165
166    edits
167}
168
169fn collect_simple_table_alias_declarations(
170    sql: &str,
171    dialect: Dialect,
172) -> Option<Vec<SimpleTableAliasDecl>> {
173    let tokens = tokenize_with_offsets(sql, dialect)?;
174    let mut out = Vec::new();
175    let mut index = 0usize;
176
177    while index < tokens.len() {
178        if !token_matches_keyword(&tokens[index].token, "FROM")
179            && !token_matches_keyword(&tokens[index].token, "JOIN")
180        {
181            index += 1;
182            continue;
183        }
184
185        let keyword_start = tokens[index].start;
186        let keyword_end = tokens[index].end;
187
188        let Some(mut cursor) = next_non_trivia_token(&tokens, index + 1) else {
189            index += 1;
190            continue;
191        };
192        if token_simple_identifier(&tokens[cursor].token).is_none() {
193            index += 1;
194            continue;
195        }
196
197        let table_start = tokens[cursor].start;
198        let mut table_end = tokens[cursor].end;
199        cursor += 1;
200
201        loop {
202            let Some(dot_index) = next_non_trivia_token(&tokens, cursor) else {
203                break;
204            };
205            if !matches!(tokens[dot_index].token, Token::Period) {
206                break;
207            }
208            let Some(next_index) = next_non_trivia_token(&tokens, dot_index + 1) else {
209                break;
210            };
211            if token_simple_identifier(&tokens[next_index].token).is_none() {
212                break;
213            }
214            table_end = tokens[next_index].end;
215            cursor = next_index + 1;
216        }
217
218        let Some(mut alias_index) = next_non_trivia_token(&tokens, cursor) else {
219            index += 1;
220            continue;
221        };
222        let mut explicit_as = false;
223        if token_matches_keyword(&tokens[alias_index].token, "AS") {
224            explicit_as = true;
225            let Some(next_index) = next_non_trivia_token(&tokens, alias_index + 1) else {
226                index += 1;
227                continue;
228            };
229            alias_index = next_index;
230        }
231
232        let Some(alias) = token_simple_identifier(&tokens[alias_index].token) else {
233            index += 1;
234            continue;
235        };
236
237        out.push(SimpleTableAliasDecl {
238            keyword_start,
239            keyword_end,
240            table_start,
241            table_end,
242            alias_end: tokens[alias_index].end,
243            alias: alias.to_string(),
244            explicit_as,
245        });
246        index = alias_index + 1;
247    }
248
249    Some(out)
250}
251
252fn tokenize_with_offsets(sql: &str, dialect: Dialect) -> Option<Vec<LocatedToken>> {
253    let dialect = dialect.to_sqlparser_dialect();
254    let mut tokenizer = Tokenizer::new(dialect.as_ref(), sql);
255    let tokens = tokenizer.tokenize_with_location().ok()?;
256
257    let mut out = Vec::with_capacity(tokens.len());
258    for token in tokens {
259        let (start, end) = token_with_span_offsets(sql, &token)?;
260        out.push(LocatedToken {
261            token: token.token,
262            start,
263            end,
264        });
265    }
266    Some(out)
267}
268
269fn token_with_span_offsets(sql: &str, token: &TokenWithSpan) -> Option<(usize, usize)> {
270    let start = line_col_to_offset(
271        sql,
272        token.span.start.line as usize,
273        token.span.start.column as usize,
274    )?;
275    let end = line_col_to_offset(
276        sql,
277        token.span.end.line as usize,
278        token.span.end.column as usize,
279    )?;
280    Some((start, end))
281}
282
283fn line_col_to_offset(sql: &str, line: usize, column: usize) -> Option<usize> {
284    if line == 0 || column == 0 {
285        return None;
286    }
287
288    let mut current_line = 1usize;
289    let mut current_col = 1usize;
290
291    for (offset, ch) in sql.char_indices() {
292        if current_line == line && current_col == column {
293            return Some(offset);
294        }
295
296        if ch == '\n' {
297            current_line += 1;
298            current_col = 1;
299        } else {
300            current_col += 1;
301        }
302    }
303
304    if current_line == line && current_col == column {
305        return Some(sql.len());
306    }
307
308    None
309}
310
311fn next_non_trivia_token(tokens: &[LocatedToken], mut start: usize) -> Option<usize> {
312    while start < tokens.len() {
313        if !is_trivia_token(&tokens[start].token) {
314            return Some(start);
315        }
316        start += 1;
317    }
318    None
319}
320
321fn is_trivia_token(token: &Token) -> bool {
322    matches!(
323        token,
324        Token::Whitespace(
325            Whitespace::Space
326                | Whitespace::Newline
327                | Whitespace::Tab
328                | Whitespace::SingleLineComment { .. }
329                | Whitespace::MultiLineComment(_)
330        )
331    )
332}
333
334fn token_matches_keyword(token: &Token, keyword: &str) -> bool {
335    matches!(token, Token::Word(word) if word.value.eq_ignore_ascii_case(keyword))
336}
337
338fn token_simple_identifier(token: &Token) -> Option<&str> {
339    match token {
340        Token::Word(word) if is_simple_identifier(&word.value) => Some(&word.value),
341        _ => None,
342    }
343}
344
345fn is_simple_identifier(value: &str) -> bool {
346    let bytes = value.as_bytes();
347    if bytes.is_empty() || !is_ascii_ident_start(bytes[0]) {
348        return false;
349    }
350    bytes[1..].iter().copied().all(is_ascii_ident_continue)
351}
352
353fn is_ascii_ident_start(byte: u8) -> bool {
354    byte.is_ascii_alphabetic() || byte == b'_'
355}
356
357fn is_ascii_ident_continue(byte: u8) -> bool {
358    byte.is_ascii_alphanumeric() || byte == b'_'
359}
360
361fn is_alias_keyword_token(alias: &str) -> bool {
362    is_keyword(alias)
363}
364
365fn statement_contains_keyword_identifier(statement: &Statement, rule: &ReferencesKeywords) -> bool {
366    collect_identifier_candidates(statement)
367        .into_iter()
368        .any(|candidate| candidate_triggers_rule(&candidate, rule))
369}
370
371fn candidate_triggers_rule(candidate: &IdentifierCandidate, rule: &ReferencesKeywords) -> bool {
372    // SQLFluff skips 1-character identifiers (e.g. datepart keywords like "d").
373    if candidate.value.len() <= 1 {
374        return false;
375    }
376    if is_ignored_token(&candidate.value, rule) || !is_keyword(&candidate.value) {
377        return false;
378    }
379
380    if candidate.quoted {
381        rule.quoted_policy.allows(candidate.kind)
382    } else {
383        rule.unquoted_policy.allows(candidate.kind)
384    }
385}
386
387fn configured_ignore_words(config: &LintConfig) -> Vec<String> {
388    if let Some(words) = config.rule_option_string_list(issue_codes::LINT_RF_004, "ignore_words") {
389        return words;
390    }
391
392    config
393        .rule_option_str(issue_codes::LINT_RF_004, "ignore_words")
394        .map(|words| {
395            words
396                .split(',')
397                .map(str::trim)
398                .filter(|word| !word.is_empty())
399                .map(str::to_string)
400                .collect()
401        })
402        .unwrap_or_default()
403}
404
405fn is_ignored_token(token: &str, rule: &ReferencesKeywords) -> bool {
406    let normalized = normalize_token(token);
407    rule.ignore_words.contains(&normalized)
408        || rule
409            .ignore_words_regex
410            .as_ref()
411            .is_some_and(|regex| regex.is_match(&normalized))
412}
413
414fn normalize_token(token: &str) -> String {
415    token
416        .trim()
417        .trim_matches(|ch| matches!(ch, '"' | '`' | '\'' | '[' | ']'))
418        .to_ascii_uppercase()
419}
420
421fn is_keyword(token: &str) -> bool {
422    let upper = token.trim().to_ascii_uppercase();
423    (ALL_KEYWORDS.binary_search(&upper.as_str()).is_ok() || is_sqlfluff_extra_keyword(&upper))
424        && !is_non_keyword_identifier(&upper)
425}
426
427fn is_sqlfluff_extra_keyword(upper: &str) -> bool {
428    // SQLFluff treats COST as a keyword for PostgreSQL, while sqlparser does not.
429    matches!(upper, "COST")
430}
431
432/// Returns true for words that sqlparser includes in `ALL_KEYWORDS` but that
433/// SQLFluff does not treat as keywords for RF04.  These fall into two groups:
434///
435///  1. Window/aggregate function names (compound names with underscores like
436///     `ROW_NUMBER`, `DATE_PART`) — excluding `CURRENT_*` / `LOCAL_*` /
437///     `SESSION_*` / `SYSTEM_*` which are SQL-standard reserved pseudo-functions.
438///  2. Dialect-specific parser tokens that are not general SQL keywords
439///     (`METADATA` for BigQuery, `CHANNEL` for PostgreSQL LISTEN/NOTIFY, etc.).
440fn is_non_keyword_identifier(upper: &str) -> bool {
441    if upper.contains('_')
442        && !upper.starts_with("CURRENT_")
443        && !upper.starts_with("LOCAL_")
444        && !upper.starts_with("SESSION_")
445        && !upper.starts_with("SYSTEM_")
446    {
447        return true;
448    }
449    matches!(upper, "CHANNEL" | "GENERATED" | "METADATA" | "STATUS")
450}
451
452#[cfg(test)]
453mod tests {
454    use super::*;
455    use crate::parser::parse_sql;
456    use crate::types::IssueAutofixApplicability;
457
458    fn run(sql: &str) -> Vec<Issue> {
459        run_with_config(sql, LintConfig::default())
460    }
461
462    fn run_with_config(sql: &str, config: LintConfig) -> Vec<Issue> {
463        let statements = parse_sql(sql).expect("parse");
464        let rule = ReferencesKeywords::from_config(&config);
465        statements
466            .iter()
467            .enumerate()
468            .flat_map(|(index, statement)| {
469                rule.check(
470                    statement,
471                    &LintContext {
472                        sql,
473                        statement_range: 0..sql.len(),
474                        statement_index: index,
475                    },
476                )
477            })
478            .collect()
479    }
480
481    fn apply_issue_autofix(sql: &str, issue: &Issue) -> Option<String> {
482        let autofix = issue.autofix.as_ref()?;
483        let mut out = sql.to_string();
484        let mut edits = autofix.edits.clone();
485        edits.sort_by_key(|edit| (edit.span.start, edit.span.end));
486        for edit in edits.into_iter().rev() {
487            out.replace_range(edit.span.start..edit.span.end, &edit.replacement);
488        }
489        Some(out)
490    }
491
492    #[test]
493    fn flags_unquoted_keyword_table_alias() {
494        let issues = run("SELECT sum.id FROM users AS sum");
495        assert_eq!(issues.len(), 1);
496        assert_eq!(issues[0].code, issue_codes::LINT_RF_004);
497    }
498
499    #[test]
500    fn emits_safe_autofix_for_explicit_keyword_table_alias() {
501        let sql = "select a from users as select";
502        let issues = run(sql);
503        assert_eq!(issues.len(), 1);
504        let autofix = issues[0].autofix.as_ref().expect("autofix metadata");
505        assert_eq!(autofix.applicability, IssueAutofixApplicability::Safe);
506        let fixed = apply_issue_autofix(sql, &issues[0]).expect("apply autofix");
507        assert_eq!(fixed, "select a from users AS alias_select");
508    }
509
510    #[test]
511    fn flags_unquoted_keyword_projection_alias() {
512        let issues = run("SELECT amount AS sum FROM t");
513        assert_eq!(issues.len(), 1);
514        assert_eq!(issues[0].code, issue_codes::LINT_RF_004);
515    }
516
517    #[test]
518    fn flags_cost_alias_as_keyword_identifier() {
519        let issues = run("SELECT 1 AS cost");
520        assert_eq!(issues.len(), 1);
521        assert_eq!(issues[0].code, issue_codes::LINT_RF_004);
522    }
523
524    #[test]
525    fn flags_unquoted_keyword_cte_alias() {
526        let issues = run("WITH sum AS (SELECT 1 AS value) SELECT value FROM sum");
527        assert_eq!(issues.len(), 1);
528        assert_eq!(issues[0].code, issue_codes::LINT_RF_004);
529    }
530
531    #[test]
532    fn does_not_flag_quoted_keyword_alias_by_default() {
533        assert!(run("SELECT \"select\".id FROM users AS \"select\"").is_empty());
534    }
535
536    #[test]
537    fn does_not_flag_non_keyword_alias() {
538        let issues = run("SELECT u.id FROM users AS u");
539        assert!(issues.is_empty());
540    }
541
542    #[test]
543    fn does_not_flag_sql_like_string_literal() {
544        let issues = run("SELECT 'FROM users AS date' AS snippet");
545        assert!(issues.is_empty());
546    }
547
548    #[test]
549    fn quoted_identifiers_policy_all_flags_quoted_keyword_alias() {
550        let issues = run_with_config(
551            "SELECT \"select\".id FROM users AS \"select\"",
552            LintConfig {
553                enabled: true,
554                disabled_rules: vec![],
555                rule_configs: std::collections::BTreeMap::from([(
556                    "references.keywords".to_string(),
557                    serde_json::json!({"quoted_identifiers_policy": "all"}),
558                )]),
559            },
560        );
561        assert_eq!(issues.len(), 1);
562        assert_eq!(issues[0].code, issue_codes::LINT_RF_004);
563    }
564
565    #[test]
566    fn unquoted_column_alias_policy_does_not_flag_table_alias() {
567        let issues = run_with_config(
568            "SELECT sum.id FROM users AS sum",
569            LintConfig {
570                enabled: true,
571                disabled_rules: vec![],
572                rule_configs: std::collections::BTreeMap::from([(
573                    "LINT_RF_004".to_string(),
574                    serde_json::json!({"unquoted_identifiers_policy": "column_aliases"}),
575                )]),
576            },
577        );
578        assert!(issues.is_empty());
579    }
580
581    #[test]
582    fn ignore_words_suppresses_keyword_identifier() {
583        let issues = run_with_config(
584            "SELECT amount AS sum FROM t",
585            LintConfig {
586                enabled: true,
587                disabled_rules: vec![],
588                rule_configs: std::collections::BTreeMap::from([(
589                    "references.keywords".to_string(),
590                    serde_json::json!({"ignore_words": ["sum"]}),
591                )]),
592            },
593        );
594        assert!(issues.is_empty());
595    }
596
597    #[test]
598    fn ignore_words_regex_suppresses_keyword_identifier() {
599        let issues = run_with_config(
600            "SELECT amount AS sum FROM t",
601            LintConfig {
602                enabled: true,
603                disabled_rules: vec![],
604                rule_configs: std::collections::BTreeMap::from([(
605                    "LINT_RF_004".to_string(),
606                    serde_json::json!({"ignore_words_regex": "^s.*"}),
607                )]),
608            },
609        );
610        assert!(issues.is_empty());
611    }
612
613    #[test]
614    fn flags_keyword_as_column_name_in_create_table() {
615        // SQLFluff: test_fail_keyword_as_identifier_column
616        let issues = run("CREATE TABLE artist(create TEXT)");
617        assert_eq!(issues.len(), 1);
618        assert_eq!(issues[0].code, issue_codes::LINT_RF_004);
619    }
620
621    #[test]
622    fn flags_keyword_as_column_alias() {
623        // SQLFluff: test_fail_keyword_as_identifier_column_alias
624        let issues = run("SELECT 1 as parameter");
625        assert_eq!(issues.len(), 1);
626        assert_eq!(issues[0].code, issue_codes::LINT_RF_004);
627    }
628
629    #[test]
630    fn flags_keyword_as_table_alias() {
631        // SQLFluff: test_fail_keyword_as_identifier_table_alias
632        let issues = run("SELECT x FROM tbl AS parameter");
633        assert_eq!(issues.len(), 1);
634        assert_eq!(issues[0].code, issue_codes::LINT_RF_004);
635    }
636
637    #[test]
638    fn does_not_flag_non_alias_with_aliases_policy() {
639        // SQLFluff: test_pass_valid_identifier_not_alias
640        // Default unquoted_identifiers_policy is "aliases", so bare column
641        // references that are not aliases should not trigger.
642        assert!(run("SELECT parameter").is_empty());
643    }
644
645    #[test]
646    fn flags_non_alias_with_all_policy() {
647        // SQLFluff: test_fail_keyword_as_identifier_not_alias_all
648        let issues = run_with_config(
649            "SELECT parameter",
650            LintConfig {
651                enabled: true,
652                disabled_rules: vec![],
653                rule_configs: std::collections::BTreeMap::from([(
654                    "references.keywords".to_string(),
655                    serde_json::json!({"unquoted_identifiers_policy": "all"}),
656                )]),
657            },
658        );
659        assert_eq!(issues.len(), 1);
660    }
661
662    #[test]
663    fn flags_column_alias_with_column_aliases_policy() {
664        // SQLFluff: test_fail_keyword_as_identifier_column_alias_config
665        let issues = run_with_config(
666            "SELECT x AS date FROM tbl AS parameter",
667            LintConfig {
668                enabled: true,
669                disabled_rules: vec![],
670                rule_configs: std::collections::BTreeMap::from([(
671                    "references.keywords".to_string(),
672                    serde_json::json!({"unquoted_identifiers_policy": "column_aliases"}),
673                )]),
674            },
675        );
676        assert_eq!(issues.len(), 1);
677    }
678
679    #[test]
680    fn flags_quoted_keyword_column_in_create_table() {
681        // SQLFluff: test_fail_keyword_as_quoted_identifier_column
682        let issues = run_with_config(
683            "CREATE TABLE \"artist\"(\"create\" TEXT)",
684            LintConfig {
685                enabled: true,
686                disabled_rules: vec![],
687                rule_configs: std::collections::BTreeMap::from([(
688                    "references.keywords".to_string(),
689                    serde_json::json!({"quoted_identifiers_policy": "aliases"}),
690                )]),
691            },
692        );
693        assert_eq!(issues.len(), 1);
694    }
695
696    #[test]
697    fn flags_keyword_as_column_name_postgres() {
698        // SQLFluff: test_fail_keyword_as_column_name_postgres
699        let issues = run("CREATE TABLE test_table (type varchar(30) NOT NULL)");
700        assert_eq!(issues.len(), 1);
701    }
702
703    #[test]
704    fn does_not_flag_function_name_as_keyword() {
705        // ROW_NUMBER is a window function name, not a SQL keyword.
706        assert!(run("SELECT ROW_NUMBER() OVER () AS row_number FROM t").is_empty());
707    }
708
709    #[test]
710    fn does_not_flag_non_keyword_identifiers() {
711        // Words in sqlparser's ALL_KEYWORDS that are not treated as keywords
712        // by SQLFluff: METADATA, CHANNEL, STATUS, GENERATED.
713        assert!(run("WITH generated AS (SELECT 1 AS x) SELECT x FROM generated").is_empty());
714        assert!(run("SELECT x AS status FROM t").is_empty());
715        assert!(run("SELECT x AS metadata FROM t").is_empty());
716        assert!(run("SELECT x AS channel FROM t").is_empty());
717    }
718
719    #[test]
720    fn still_flags_current_date_as_keyword() {
721        // CURRENT_DATE is a SQL-standard reserved pseudo-function.
722        let issues = run("SELECT x AS current_date FROM t");
723        assert_eq!(issues.len(), 1);
724    }
725}