1use std::collections::HashSet;
7
8use crate::linter::config::LintConfig;
9use crate::linter::rule::{LintContext, LintRule};
10use crate::types::{issue_codes, Dialect, Issue, IssueAutofixApplicability, IssuePatchEdit, Span};
11use regex::Regex;
12use sqlparser::ast::Statement;
13use sqlparser::tokenizer::{Token, TokenWithSpan, Tokenizer};
14
15use super::capitalisation_policy_helpers::{
16 ignored_words_from_config, ignored_words_regex_from_config, token_is_ignored,
17 tokens_violate_policy, CapitalisationPolicy,
18};
19
20pub struct CapitalisationLiterals {
21 policy: CapitalisationPolicy,
22 ignore_words: HashSet<String>,
23 ignore_words_regex: Option<Regex>,
24}
25
26impl CapitalisationLiterals {
27 pub fn from_config(config: &LintConfig) -> Self {
28 let policy = config
31 .rule_option_str(issue_codes::LINT_CP_004, "extended_capitalisation_policy")
32 .or_else(|| config.rule_option_str(issue_codes::LINT_CP_004, "capitalisation_policy"))
33 .map(CapitalisationPolicy::from_raw_value)
34 .unwrap_or(CapitalisationPolicy::Consistent);
35
36 Self {
37 policy,
38 ignore_words: ignored_words_from_config(config, issue_codes::LINT_CP_004),
39 ignore_words_regex: ignored_words_regex_from_config(config, issue_codes::LINT_CP_004),
40 }
41 }
42}
43
44impl Default for CapitalisationLiterals {
45 fn default() -> Self {
46 Self {
47 policy: CapitalisationPolicy::Consistent,
48 ignore_words: HashSet::new(),
49 ignore_words_regex: None,
50 }
51 }
52}
53
54impl LintRule for CapitalisationLiterals {
55 fn code(&self) -> &'static str {
56 issue_codes::LINT_CP_004
57 }
58
59 fn name(&self) -> &'static str {
60 "Literal capitalisation"
61 }
62
63 fn description(&self) -> &'static str {
64 "Inconsistent capitalisation of boolean/null literal."
65 }
66
67 fn check(&self, _statement: &Statement, ctx: &LintContext) -> Vec<Issue> {
68 let literals =
69 literal_tokens_for_context(ctx, &self.ignore_words, self.ignore_words_regex.as_ref());
70 let literal_values = literals
71 .iter()
72 .map(|candidate| candidate.value.clone())
73 .collect::<Vec<_>>();
74 if !tokens_violate_policy(&literal_values, self.policy) {
75 return Vec::new();
76 }
77
78 let autofix_edits = literal_autofix_edits(ctx, &literals, self.policy);
79
80 if autofix_edits.is_empty() {
82 return vec![Issue::info(
83 issue_codes::LINT_CP_004,
84 "Literal keywords (NULL/TRUE/FALSE) use inconsistent capitalisation.",
85 )
86 .with_statement(ctx.statement_index)];
87 }
88
89 autofix_edits
90 .into_iter()
91 .map(|edit| {
92 let span = Span::new(edit.span.start, edit.span.end);
93 Issue::info(
94 issue_codes::LINT_CP_004,
95 "Literal keywords (NULL/TRUE/FALSE) use inconsistent capitalisation.",
96 )
97 .with_statement(ctx.statement_index)
98 .with_span(span)
99 .with_autofix_edits(IssueAutofixApplicability::Safe, vec![edit])
100 })
101 .collect()
102 }
103}
104
105#[derive(Clone)]
106struct LiteralCandidate {
107 value: String,
108 start: usize,
109 end: usize,
110}
111
112fn literal_tokens_for_context(
113 ctx: &LintContext,
114 ignore_words: &HashSet<String>,
115 ignore_words_regex: Option<&Regex>,
116) -> Vec<LiteralCandidate> {
117 let from_document_tokens = ctx.with_document_tokens(|tokens| {
118 if tokens.is_empty() {
119 return None;
120 }
121
122 let mut out = Vec::new();
123 for token in tokens {
124 let Some((start, end)) = token_with_span_offsets(ctx.sql, token) else {
125 continue;
126 };
127 if start < ctx.statement_range.start || end > ctx.statement_range.end {
128 continue;
129 }
130
131 if let Token::Word(word) = &token.token {
132 if !source_word_matches(ctx.sql, start, end, word.value.as_str()) {
136 return None;
137 }
138 if matches!(
139 word.value.to_ascii_uppercase().as_str(),
140 "NULL" | "TRUE" | "FALSE"
141 ) && !token_is_ignored(word.value.as_str(), ignore_words, ignore_words_regex)
142 {
143 let Some(local_start) = start.checked_sub(ctx.statement_range.start) else {
144 continue;
145 };
146 let Some(local_end) = end.checked_sub(ctx.statement_range.start) else {
147 continue;
148 };
149 out.push(LiteralCandidate {
150 value: word.value.clone(),
151 start: local_start,
152 end: local_end,
153 });
154 }
155 }
156 }
157 Some(out)
158 });
159
160 if let Some(tokens) = from_document_tokens {
161 return tokens;
162 }
163
164 literal_tokens(
165 ctx.statement_sql(),
166 ignore_words,
167 ignore_words_regex,
168 ctx.dialect(),
169 )
170}
171
172fn literal_tokens(
173 sql: &str,
174 ignore_words: &HashSet<String>,
175 ignore_words_regex: Option<&Regex>,
176 dialect: Dialect,
177) -> Vec<LiteralCandidate> {
178 let dialect = dialect.to_sqlparser_dialect();
179 let mut tokenizer = Tokenizer::new(dialect.as_ref(), sql);
180 let Ok(tokens) = tokenizer.tokenize_with_location() else {
181 return Vec::new();
182 };
183
184 tokens
185 .into_iter()
186 .filter_map(|token| {
187 if let Token::Word(word) = &token.token {
188 if matches!(
189 word.value.to_ascii_uppercase().as_str(),
190 "NULL" | "TRUE" | "FALSE"
191 ) && !token_is_ignored(word.value.as_str(), ignore_words, ignore_words_regex)
192 {
193 let (start, end) = token_with_span_offsets(sql, &token)?;
194 return Some(LiteralCandidate {
195 value: word.value.clone(),
196 start,
197 end,
198 });
199 }
200 }
201 None
202 })
203 .collect()
204}
205
206fn literal_autofix_edits(
207 ctx: &LintContext,
208 literals: &[LiteralCandidate],
209 policy: CapitalisationPolicy,
210) -> Vec<IssuePatchEdit> {
211 let resolved = if policy == CapitalisationPolicy::Consistent {
214 resolve_consistent_policy(literals)
215 } else {
216 policy
217 };
218
219 let mut edits = Vec::new();
220
221 for candidate in literals {
222 let Some(replacement) = literal_case_replacement(candidate.value.as_str(), resolved) else {
223 continue;
224 };
225 if replacement == candidate.value {
226 continue;
227 }
228
229 edits.push(IssuePatchEdit::new(
230 ctx.span_from_statement_offset(candidate.start, candidate.end),
231 replacement,
232 ));
233 }
234
235 edits.sort_by_key(|edit| (edit.span.start, edit.span.end));
236 edits.dedup_by(|left, right| {
237 left.span.start == right.span.start
238 && left.span.end == right.span.end
239 && left.replacement == right.replacement
240 });
241 edits
242}
243
244fn literal_case_replacement(value: &str, policy: CapitalisationPolicy) -> Option<String> {
245 match policy {
246 CapitalisationPolicy::Lower => Some(value.to_ascii_lowercase()),
247 CapitalisationPolicy::Upper => Some(value.to_ascii_uppercase()),
248 CapitalisationPolicy::Capitalise => Some(capitalise_ascii_token(value)),
249 CapitalisationPolicy::Consistent => Some(value.to_ascii_lowercase()),
252 CapitalisationPolicy::Pascal
254 | CapitalisationPolicy::Camel
255 | CapitalisationPolicy::Snake => None,
256 }
257}
258
259fn resolve_consistent_policy(literals: &[LiteralCandidate]) -> CapitalisationPolicy {
261 for lit in literals {
262 if lit.value == lit.value.to_ascii_uppercase() {
263 return CapitalisationPolicy::Upper;
264 }
265 if lit.value == lit.value.to_ascii_lowercase() {
266 return CapitalisationPolicy::Lower;
267 }
268 }
269 CapitalisationPolicy::Lower
270}
271
272fn capitalise_ascii_token(value: &str) -> String {
273 let mut out = String::with_capacity(value.len());
274 let mut seen_alpha = false;
275
276 for ch in value.chars() {
277 if !ch.is_ascii_alphabetic() {
278 out.push(ch);
279 continue;
280 }
281
282 if !seen_alpha {
283 out.push(ch.to_ascii_uppercase());
284 seen_alpha = true;
285 } else {
286 out.push(ch.to_ascii_lowercase());
287 }
288 }
289
290 out
291}
292
293fn token_with_span_offsets(sql: &str, token: &TokenWithSpan) -> Option<(usize, usize)> {
294 let start = line_col_to_offset(
295 sql,
296 token.span.start.line as usize,
297 token.span.start.column as usize,
298 )?;
299 let end = line_col_to_offset(
300 sql,
301 token.span.end.line as usize,
302 token.span.end.column as usize,
303 )?;
304 Some((start, end))
305}
306
307fn line_col_to_offset(sql: &str, line: usize, column: usize) -> Option<usize> {
308 if line == 0 || column == 0 {
309 return None;
310 }
311
312 let mut current_line = 1usize;
313 let mut current_col = 1usize;
314
315 for (offset, ch) in sql.char_indices() {
316 if current_line == line && current_col == column {
317 return Some(offset);
318 }
319
320 if ch == '\n' {
321 current_line += 1;
322 current_col = 1;
323 } else {
324 current_col += 1;
325 }
326 }
327
328 if current_line == line && current_col == column {
329 return Some(sql.len());
330 }
331
332 None
333}
334
335fn source_word_matches(sql: &str, start: usize, end: usize, value: &str) -> bool {
336 let Some(raw) = sql.get(start..end) else {
337 return false;
338 };
339 let normalized = raw.trim_matches(|ch| matches!(ch, '"' | '`' | '[' | ']'));
340 normalized.eq_ignore_ascii_case(value)
341}
342
343#[cfg(test)]
344mod tests {
345 use super::*;
346 use crate::linter::config::LintConfig;
347 use crate::parser::parse_sql;
348 use crate::types::IssueAutofixApplicability;
349
350 fn run(sql: &str) -> Vec<Issue> {
351 let statements = parse_sql(sql).expect("parse");
352 let rule = CapitalisationLiterals::default();
353 statements
354 .iter()
355 .enumerate()
356 .flat_map(|(index, statement)| {
357 rule.check(
358 statement,
359 &LintContext {
360 sql,
361 statement_range: 0..sql.len(),
362 statement_index: index,
363 },
364 )
365 })
366 .collect()
367 }
368
369 fn apply_issue_autofix(sql: &str, issue: &Issue) -> Option<String> {
370 let autofix = issue.autofix.as_ref()?;
371 let mut out = sql.to_string();
372 let mut edits = autofix.edits.clone();
373 edits.sort_by_key(|edit| (edit.span.start, edit.span.end));
374 for edit in edits.into_iter().rev() {
375 out.replace_range(edit.span.start..edit.span.end, &edit.replacement);
376 }
377 Some(out)
378 }
379
380 #[test]
381 fn flags_mixed_literal_case() {
382 let issues = run("SELECT NULL, true FROM t");
383 assert_eq!(issues.len(), 1);
384 assert_eq!(issues[0].code, issue_codes::LINT_CP_004);
385 }
386
387 #[test]
388 fn emits_safe_autofix_for_mixed_literal_case() {
389 let sql = "SELECT NULL, true FROM t";
392 let issues = run(sql);
393 assert_eq!(issues.len(), 1);
394 let autofix = issues[0].autofix.as_ref().expect("autofix metadata");
395 assert_eq!(autofix.applicability, IssueAutofixApplicability::Safe);
396 let fixed = apply_issue_autofix(sql, &issues[0]).expect("apply autofix");
397 assert_eq!(fixed, "SELECT NULL, TRUE FROM t");
398 }
399
400 #[test]
401 fn does_not_flag_consistent_literal_case() {
402 assert!(run("SELECT NULL, TRUE FROM t").is_empty());
403 }
404
405 #[test]
406 fn does_not_flag_literal_words_in_strings_or_comments() {
407 let sql = "SELECT 'null true false' AS txt -- NULL true\nFROM t";
408 assert!(run(sql).is_empty());
409 }
410
411 #[test]
412 fn upper_policy_flags_lowercase_literal() {
413 let config = LintConfig {
414 enabled: true,
415 disabled_rules: vec![],
416 rule_configs: std::collections::BTreeMap::from([(
417 "capitalisation.literals".to_string(),
418 serde_json::json!({"extended_capitalisation_policy": "upper"}),
419 )]),
420 };
421 let rule = CapitalisationLiterals::from_config(&config);
422 let sql = "SELECT true FROM t";
423 let statements = parse_sql(sql).expect("parse");
424 let issues = rule.check(
425 &statements[0],
426 &LintContext {
427 sql,
428 statement_range: 0..sql.len(),
429 statement_index: 0,
430 },
431 );
432 assert_eq!(issues.len(), 1);
433 }
434
435 #[test]
436 fn upper_policy_emits_uppercase_autofix() {
437 let config = LintConfig {
438 enabled: true,
439 disabled_rules: vec![],
440 rule_configs: std::collections::BTreeMap::from([(
441 "capitalisation.literals".to_string(),
442 serde_json::json!({"extended_capitalisation_policy": "upper"}),
443 )]),
444 };
445 let rule = CapitalisationLiterals::from_config(&config);
446 let sql = "SELECT null, true FROM t";
447 let statements = parse_sql(sql).expect("parse");
448 let issues = rule.check(
449 &statements[0],
450 &LintContext {
451 sql,
452 statement_range: 0..sql.len(),
453 statement_index: 0,
454 },
455 );
456 assert_eq!(issues.len(), 2);
458 let fixed = {
459 let mut edits: Vec<_> = issues
460 .iter()
461 .filter_map(|i| i.autofix.as_ref())
462 .flat_map(|a| a.edits.clone())
463 .collect();
464 edits.sort_by_key(|e| (e.span.start, e.span.end));
465 let mut out = sql.to_string();
466 for edit in edits.into_iter().rev() {
467 out.replace_range(edit.span.start..edit.span.end, &edit.replacement);
468 }
469 out
470 };
471 assert_eq!(fixed, "SELECT NULL, TRUE FROM t");
472 }
473
474 #[test]
475 fn camel_policy_violation_remains_report_only() {
476 let config = LintConfig {
477 enabled: true,
478 disabled_rules: vec![],
479 rule_configs: std::collections::BTreeMap::from([(
480 "capitalisation.literals".to_string(),
481 serde_json::json!({"extended_capitalisation_policy": "camel"}),
482 )]),
483 };
484 let rule = CapitalisationLiterals::from_config(&config);
485 let sql = "SELECT NULL, TRUE FROM t";
486 let statements = parse_sql(sql).expect("parse");
487 let issues = rule.check(
488 &statements[0],
489 &LintContext {
490 sql,
491 statement_range: 0..sql.len(),
492 statement_index: 0,
493 },
494 );
495 assert_eq!(issues.len(), 1);
496 assert!(
497 issues[0].autofix.is_none(),
498 "camel/pascal/snake are report-only in current CP004 autofix scope"
499 );
500 }
501
502 #[test]
503 fn consistent_majority_lowercase_emits_lowercase_autofix() {
504 let sql = "SELECT true, false, NULL FROM t";
506 let issues = run(sql);
507 assert_eq!(issues.len(), 1);
508 let fixed = apply_issue_autofix(sql, &issues[0]).expect("apply autofix");
509 assert_eq!(fixed, "SELECT true, false, null FROM t");
510 }
511
512 #[test]
513 fn capitalisation_policy_config_key_fallback() {
514 let config = LintConfig {
515 enabled: true,
516 disabled_rules: vec![],
517 rule_configs: std::collections::BTreeMap::from([(
518 "capitalisation.literals".to_string(),
519 serde_json::json!({"capitalisation_policy": "upper"}),
520 )]),
521 };
522 let rule = CapitalisationLiterals::from_config(&config);
523 let sql = "SELECT true FROM t";
524 let statements = parse_sql(sql).expect("parse");
525 let issues = rule.check(
526 &statements[0],
527 &LintContext {
528 sql,
529 statement_range: 0..sql.len(),
530 statement_index: 0,
531 },
532 );
533 assert_eq!(issues.len(), 1);
534 let fixed = apply_issue_autofix(sql, &issues[0]).expect("apply autofix");
535 assert_eq!(fixed, "SELECT TRUE FROM t");
536 }
537
538 #[test]
539 fn ignore_words_regex_excludes_literals_from_check() {
540 let config = LintConfig {
541 enabled: true,
542 disabled_rules: vec![],
543 rule_configs: std::collections::BTreeMap::from([(
544 "capitalisation.literals".to_string(),
545 serde_json::json!({"ignore_words_regex": "^true$"}),
546 )]),
547 };
548 let rule = CapitalisationLiterals::from_config(&config);
549 let sql = "SELECT NULL, true FROM t";
550 let statements = parse_sql(sql).expect("parse");
551 let issues = rule.check(
552 &statements[0],
553 &LintContext {
554 sql,
555 statement_range: 0..sql.len(),
556 statement_index: 0,
557 },
558 );
559 assert!(issues.is_empty());
560 }
561}