Skip to main content

flowscope_core/linter/rules/
cp_004.rs

1//! LINT_CP_004: Literal capitalisation.
2//!
3//! SQLFluff CP04 parity (current scope): detect mixed-case usage for
4//! NULL/TRUE/FALSE literal keywords.
5
6use std::collections::HashSet;
7
8use crate::linter::config::LintConfig;
9use crate::linter::rule::{LintContext, LintRule};
10use crate::types::{issue_codes, Dialect, Issue, IssueAutofixApplicability, IssuePatchEdit, Span};
11use regex::Regex;
12use sqlparser::ast::Statement;
13use sqlparser::tokenizer::{Token, TokenWithSpan, Tokenizer};
14
15use super::capitalisation_policy_helpers::{
16    ignored_words_from_config, ignored_words_regex_from_config, token_is_ignored,
17    tokens_violate_policy, CapitalisationPolicy,
18};
19
20pub struct CapitalisationLiterals {
21    policy: CapitalisationPolicy,
22    ignore_words: HashSet<String>,
23    ignore_words_regex: Option<Regex>,
24}
25
26impl CapitalisationLiterals {
27    pub fn from_config(config: &LintConfig) -> Self {
28        // SQLFluff accepts both `extended_capitalisation_policy` and the
29        // shorter `capitalisation_policy` for CP04.
30        let policy = config
31            .rule_option_str(issue_codes::LINT_CP_004, "extended_capitalisation_policy")
32            .or_else(|| config.rule_option_str(issue_codes::LINT_CP_004, "capitalisation_policy"))
33            .map(CapitalisationPolicy::from_raw_value)
34            .unwrap_or(CapitalisationPolicy::Consistent);
35
36        Self {
37            policy,
38            ignore_words: ignored_words_from_config(config, issue_codes::LINT_CP_004),
39            ignore_words_regex: ignored_words_regex_from_config(config, issue_codes::LINT_CP_004),
40        }
41    }
42}
43
44impl Default for CapitalisationLiterals {
45    fn default() -> Self {
46        Self {
47            policy: CapitalisationPolicy::Consistent,
48            ignore_words: HashSet::new(),
49            ignore_words_regex: None,
50        }
51    }
52}
53
54impl LintRule for CapitalisationLiterals {
55    fn code(&self) -> &'static str {
56        issue_codes::LINT_CP_004
57    }
58
59    fn name(&self) -> &'static str {
60        "Literal capitalisation"
61    }
62
63    fn description(&self) -> &'static str {
64        "Inconsistent capitalisation of boolean/null literal."
65    }
66
67    fn check(&self, _statement: &Statement, ctx: &LintContext) -> Vec<Issue> {
68        let literals =
69            literal_tokens_for_context(ctx, &self.ignore_words, self.ignore_words_regex.as_ref());
70        let literal_values = literals
71            .iter()
72            .map(|candidate| candidate.value.clone())
73            .collect::<Vec<_>>();
74        if !tokens_violate_policy(&literal_values, self.policy) {
75            return Vec::new();
76        }
77
78        let autofix_edits = literal_autofix_edits(ctx, &literals, self.policy);
79
80        // Emit one issue per violating literal at its specific position.
81        if autofix_edits.is_empty() {
82            return vec![Issue::info(
83                issue_codes::LINT_CP_004,
84                "Literal keywords (NULL/TRUE/FALSE) use inconsistent capitalisation.",
85            )
86            .with_statement(ctx.statement_index)];
87        }
88
89        autofix_edits
90            .into_iter()
91            .map(|edit| {
92                let span = Span::new(edit.span.start, edit.span.end);
93                Issue::info(
94                    issue_codes::LINT_CP_004,
95                    "Literal keywords (NULL/TRUE/FALSE) use inconsistent capitalisation.",
96                )
97                .with_statement(ctx.statement_index)
98                .with_span(span)
99                .with_autofix_edits(IssueAutofixApplicability::Safe, vec![edit])
100            })
101            .collect()
102    }
103}
104
105#[derive(Clone)]
106struct LiteralCandidate {
107    value: String,
108    start: usize,
109    end: usize,
110}
111
112fn literal_tokens_for_context(
113    ctx: &LintContext,
114    ignore_words: &HashSet<String>,
115    ignore_words_regex: Option<&Regex>,
116) -> Vec<LiteralCandidate> {
117    let from_document_tokens = ctx.with_document_tokens(|tokens| {
118        if tokens.is_empty() {
119            return None;
120        }
121
122        let mut out = Vec::new();
123        for token in tokens {
124            let Some((start, end)) = token_with_span_offsets(ctx.sql, token) else {
125                continue;
126            };
127            if start < ctx.statement_range.start || end > ctx.statement_range.end {
128                continue;
129            }
130
131            if let Token::Word(word) = &token.token {
132                // Document token spans are tied to rendered SQL. If the source
133                // slice does not match the token text, fall back to
134                // statement-local tokenization.
135                if !source_word_matches(ctx.sql, start, end, word.value.as_str()) {
136                    return None;
137                }
138                if matches!(
139                    word.value.to_ascii_uppercase().as_str(),
140                    "NULL" | "TRUE" | "FALSE"
141                ) && !token_is_ignored(word.value.as_str(), ignore_words, ignore_words_regex)
142                {
143                    let Some(local_start) = start.checked_sub(ctx.statement_range.start) else {
144                        continue;
145                    };
146                    let Some(local_end) = end.checked_sub(ctx.statement_range.start) else {
147                        continue;
148                    };
149                    out.push(LiteralCandidate {
150                        value: word.value.clone(),
151                        start: local_start,
152                        end: local_end,
153                    });
154                }
155            }
156        }
157        Some(out)
158    });
159
160    if let Some(tokens) = from_document_tokens {
161        return tokens;
162    }
163
164    literal_tokens(
165        ctx.statement_sql(),
166        ignore_words,
167        ignore_words_regex,
168        ctx.dialect(),
169    )
170}
171
172fn literal_tokens(
173    sql: &str,
174    ignore_words: &HashSet<String>,
175    ignore_words_regex: Option<&Regex>,
176    dialect: Dialect,
177) -> Vec<LiteralCandidate> {
178    let dialect = dialect.to_sqlparser_dialect();
179    let mut tokenizer = Tokenizer::new(dialect.as_ref(), sql);
180    let Ok(tokens) = tokenizer.tokenize_with_location() else {
181        return Vec::new();
182    };
183
184    tokens
185        .into_iter()
186        .filter_map(|token| {
187            if let Token::Word(word) = &token.token {
188                if matches!(
189                    word.value.to_ascii_uppercase().as_str(),
190                    "NULL" | "TRUE" | "FALSE"
191                ) && !token_is_ignored(word.value.as_str(), ignore_words, ignore_words_regex)
192                {
193                    let (start, end) = token_with_span_offsets(sql, &token)?;
194                    return Some(LiteralCandidate {
195                        value: word.value.clone(),
196                        start,
197                        end,
198                    });
199                }
200            }
201            None
202        })
203        .collect()
204}
205
206fn literal_autofix_edits(
207    ctx: &LintContext,
208    literals: &[LiteralCandidate],
209    policy: CapitalisationPolicy,
210) -> Vec<IssuePatchEdit> {
211    // For `Consistent` mode, resolve to the majority case among the
212    // literal tokens so that the fewest edits are generated.
213    let resolved = if policy == CapitalisationPolicy::Consistent {
214        resolve_consistent_policy(literals)
215    } else {
216        policy
217    };
218
219    let mut edits = Vec::new();
220
221    for candidate in literals {
222        let Some(replacement) = literal_case_replacement(candidate.value.as_str(), resolved) else {
223            continue;
224        };
225        if replacement == candidate.value {
226            continue;
227        }
228
229        edits.push(IssuePatchEdit::new(
230            ctx.span_from_statement_offset(candidate.start, candidate.end),
231            replacement,
232        ));
233    }
234
235    edits.sort_by_key(|edit| (edit.span.start, edit.span.end));
236    edits.dedup_by(|left, right| {
237        left.span.start == right.span.start
238            && left.span.end == right.span.end
239            && left.replacement == right.replacement
240    });
241    edits
242}
243
244fn literal_case_replacement(value: &str, policy: CapitalisationPolicy) -> Option<String> {
245    match policy {
246        CapitalisationPolicy::Lower => Some(value.to_ascii_lowercase()),
247        CapitalisationPolicy::Upper => Some(value.to_ascii_uppercase()),
248        CapitalisationPolicy::Capitalise => Some(capitalise_ascii_token(value)),
249        // `Consistent` should be resolved to Upper/Lower before calling
250        // this function.  Fall back to lowercase if somehow unresolved.
251        CapitalisationPolicy::Consistent => Some(value.to_ascii_lowercase()),
252        // These policies are currently report-only in CP04 autofix scope.
253        CapitalisationPolicy::Pascal
254        | CapitalisationPolicy::Camel
255        | CapitalisationPolicy::Snake => None,
256    }
257}
258
259/// Resolve `Consistent` mode by adopting the style of the first literal token.
260fn resolve_consistent_policy(literals: &[LiteralCandidate]) -> CapitalisationPolicy {
261    for lit in literals {
262        if lit.value == lit.value.to_ascii_uppercase() {
263            return CapitalisationPolicy::Upper;
264        }
265        if lit.value == lit.value.to_ascii_lowercase() {
266            return CapitalisationPolicy::Lower;
267        }
268    }
269    CapitalisationPolicy::Lower
270}
271
272fn capitalise_ascii_token(value: &str) -> String {
273    let mut out = String::with_capacity(value.len());
274    let mut seen_alpha = false;
275
276    for ch in value.chars() {
277        if !ch.is_ascii_alphabetic() {
278            out.push(ch);
279            continue;
280        }
281
282        if !seen_alpha {
283            out.push(ch.to_ascii_uppercase());
284            seen_alpha = true;
285        } else {
286            out.push(ch.to_ascii_lowercase());
287        }
288    }
289
290    out
291}
292
293fn token_with_span_offsets(sql: &str, token: &TokenWithSpan) -> Option<(usize, usize)> {
294    let start = line_col_to_offset(
295        sql,
296        token.span.start.line as usize,
297        token.span.start.column as usize,
298    )?;
299    let end = line_col_to_offset(
300        sql,
301        token.span.end.line as usize,
302        token.span.end.column as usize,
303    )?;
304    Some((start, end))
305}
306
307fn line_col_to_offset(sql: &str, line: usize, column: usize) -> Option<usize> {
308    if line == 0 || column == 0 {
309        return None;
310    }
311
312    let mut current_line = 1usize;
313    let mut current_col = 1usize;
314
315    for (offset, ch) in sql.char_indices() {
316        if current_line == line && current_col == column {
317            return Some(offset);
318        }
319
320        if ch == '\n' {
321            current_line += 1;
322            current_col = 1;
323        } else {
324            current_col += 1;
325        }
326    }
327
328    if current_line == line && current_col == column {
329        return Some(sql.len());
330    }
331
332    None
333}
334
335fn source_word_matches(sql: &str, start: usize, end: usize, value: &str) -> bool {
336    let Some(raw) = sql.get(start..end) else {
337        return false;
338    };
339    let normalized = raw.trim_matches(|ch| matches!(ch, '"' | '`' | '[' | ']'));
340    normalized.eq_ignore_ascii_case(value)
341}
342
343#[cfg(test)]
344mod tests {
345    use super::*;
346    use crate::linter::config::LintConfig;
347    use crate::parser::parse_sql;
348    use crate::types::IssueAutofixApplicability;
349
350    fn run(sql: &str) -> Vec<Issue> {
351        let statements = parse_sql(sql).expect("parse");
352        let rule = CapitalisationLiterals::default();
353        statements
354            .iter()
355            .enumerate()
356            .flat_map(|(index, statement)| {
357                rule.check(
358                    statement,
359                    &LintContext {
360                        sql,
361                        statement_range: 0..sql.len(),
362                        statement_index: index,
363                    },
364                )
365            })
366            .collect()
367    }
368
369    fn apply_issue_autofix(sql: &str, issue: &Issue) -> Option<String> {
370        let autofix = issue.autofix.as_ref()?;
371        let mut out = sql.to_string();
372        let mut edits = autofix.edits.clone();
373        edits.sort_by_key(|edit| (edit.span.start, edit.span.end));
374        for edit in edits.into_iter().rev() {
375            out.replace_range(edit.span.start..edit.span.end, &edit.replacement);
376        }
377        Some(out)
378    }
379
380    #[test]
381    fn flags_mixed_literal_case() {
382        let issues = run("SELECT NULL, true FROM t");
383        assert_eq!(issues.len(), 1);
384        assert_eq!(issues[0].code, issue_codes::LINT_CP_004);
385    }
386
387    #[test]
388    fn emits_safe_autofix_for_mixed_literal_case() {
389        // Consistent mode detects the majority case among literal tokens.
390        // NULL (upper) vs true (lower) — tie goes to upper.
391        let sql = "SELECT NULL, true FROM t";
392        let issues = run(sql);
393        assert_eq!(issues.len(), 1);
394        let autofix = issues[0].autofix.as_ref().expect("autofix metadata");
395        assert_eq!(autofix.applicability, IssueAutofixApplicability::Safe);
396        let fixed = apply_issue_autofix(sql, &issues[0]).expect("apply autofix");
397        assert_eq!(fixed, "SELECT NULL, TRUE FROM t");
398    }
399
400    #[test]
401    fn does_not_flag_consistent_literal_case() {
402        assert!(run("SELECT NULL, TRUE FROM t").is_empty());
403    }
404
405    #[test]
406    fn does_not_flag_literal_words_in_strings_or_comments() {
407        let sql = "SELECT 'null true false' AS txt -- NULL true\nFROM t";
408        assert!(run(sql).is_empty());
409    }
410
411    #[test]
412    fn upper_policy_flags_lowercase_literal() {
413        let config = LintConfig {
414            enabled: true,
415            disabled_rules: vec![],
416            rule_configs: std::collections::BTreeMap::from([(
417                "capitalisation.literals".to_string(),
418                serde_json::json!({"extended_capitalisation_policy": "upper"}),
419            )]),
420        };
421        let rule = CapitalisationLiterals::from_config(&config);
422        let sql = "SELECT true FROM t";
423        let statements = parse_sql(sql).expect("parse");
424        let issues = rule.check(
425            &statements[0],
426            &LintContext {
427                sql,
428                statement_range: 0..sql.len(),
429                statement_index: 0,
430            },
431        );
432        assert_eq!(issues.len(), 1);
433    }
434
435    #[test]
436    fn upper_policy_emits_uppercase_autofix() {
437        let config = LintConfig {
438            enabled: true,
439            disabled_rules: vec![],
440            rule_configs: std::collections::BTreeMap::from([(
441                "capitalisation.literals".to_string(),
442                serde_json::json!({"extended_capitalisation_policy": "upper"}),
443            )]),
444        };
445        let rule = CapitalisationLiterals::from_config(&config);
446        let sql = "SELECT null, true FROM t";
447        let statements = parse_sql(sql).expect("parse");
448        let issues = rule.check(
449            &statements[0],
450            &LintContext {
451                sql,
452                statement_range: 0..sql.len(),
453                statement_index: 0,
454            },
455        );
456        // Both null and true violate upper → 2 violations.
457        assert_eq!(issues.len(), 2);
458        let fixed = {
459            let mut edits: Vec<_> = issues
460                .iter()
461                .filter_map(|i| i.autofix.as_ref())
462                .flat_map(|a| a.edits.clone())
463                .collect();
464            edits.sort_by_key(|e| (e.span.start, e.span.end));
465            let mut out = sql.to_string();
466            for edit in edits.into_iter().rev() {
467                out.replace_range(edit.span.start..edit.span.end, &edit.replacement);
468            }
469            out
470        };
471        assert_eq!(fixed, "SELECT NULL, TRUE FROM t");
472    }
473
474    #[test]
475    fn camel_policy_violation_remains_report_only() {
476        let config = LintConfig {
477            enabled: true,
478            disabled_rules: vec![],
479            rule_configs: std::collections::BTreeMap::from([(
480                "capitalisation.literals".to_string(),
481                serde_json::json!({"extended_capitalisation_policy": "camel"}),
482            )]),
483        };
484        let rule = CapitalisationLiterals::from_config(&config);
485        let sql = "SELECT NULL, TRUE FROM t";
486        let statements = parse_sql(sql).expect("parse");
487        let issues = rule.check(
488            &statements[0],
489            &LintContext {
490                sql,
491                statement_range: 0..sql.len(),
492                statement_index: 0,
493            },
494        );
495        assert_eq!(issues.len(), 1);
496        assert!(
497            issues[0].autofix.is_none(),
498            "camel/pascal/snake are report-only in current CP004 autofix scope"
499        );
500    }
501
502    #[test]
503    fn consistent_majority_lowercase_emits_lowercase_autofix() {
504        // true (lower), false (lower) vs NULL (upper) — majority lowercase.
505        let sql = "SELECT true, false, NULL FROM t";
506        let issues = run(sql);
507        assert_eq!(issues.len(), 1);
508        let fixed = apply_issue_autofix(sql, &issues[0]).expect("apply autofix");
509        assert_eq!(fixed, "SELECT true, false, null FROM t");
510    }
511
512    #[test]
513    fn capitalisation_policy_config_key_fallback() {
514        let config = LintConfig {
515            enabled: true,
516            disabled_rules: vec![],
517            rule_configs: std::collections::BTreeMap::from([(
518                "capitalisation.literals".to_string(),
519                serde_json::json!({"capitalisation_policy": "upper"}),
520            )]),
521        };
522        let rule = CapitalisationLiterals::from_config(&config);
523        let sql = "SELECT true FROM t";
524        let statements = parse_sql(sql).expect("parse");
525        let issues = rule.check(
526            &statements[0],
527            &LintContext {
528                sql,
529                statement_range: 0..sql.len(),
530                statement_index: 0,
531            },
532        );
533        assert_eq!(issues.len(), 1);
534        let fixed = apply_issue_autofix(sql, &issues[0]).expect("apply autofix");
535        assert_eq!(fixed, "SELECT TRUE FROM t");
536    }
537
538    #[test]
539    fn ignore_words_regex_excludes_literals_from_check() {
540        let config = LintConfig {
541            enabled: true,
542            disabled_rules: vec![],
543            rule_configs: std::collections::BTreeMap::from([(
544                "capitalisation.literals".to_string(),
545                serde_json::json!({"ignore_words_regex": "^true$"}),
546            )]),
547        };
548        let rule = CapitalisationLiterals::from_config(&config);
549        let sql = "SELECT NULL, true FROM t";
550        let statements = parse_sql(sql).expect("parse");
551        let issues = rule.check(
552            &statements[0],
553            &LintContext {
554                sql,
555                statement_range: 0..sql.len(),
556                statement_index: 0,
557            },
558        );
559        assert!(issues.is_empty());
560    }
561}