sqrust_rules/capitalisation/types.rs
1use sqrust_core::{Diagnostic, FileContext, Rule};
2
3use super::{is_word_char, SkipMap};
4
5/// SQL data type names that must be written in UPPERCASE.
6/// Stored as uppercase for comparison purposes.
7/// Sorted by length descending so that longer names (e.g. SMALLINT) are
8/// checked before shorter prefixes (e.g. INT), preventing partial matches.
9const TYPES: &[&str] = &[
10 "CURRENT_TIMESTAMP",
11 "VARBINARY",
12 "TIMESTAMP",
13 "NVARCHAR",
14 "SMALLINT",
15 "DATETIME",
16 "INTERVAL",
17 "NUMERIC",
18 "BOOLEAN",
19 "INTEGER",
20 "DECIMAL",
21 "TINYINT",
22 "VARCHAR",
23 "DOUBLE",
24 "BIGINT",
25 "BINARY",
26 "NCHAR",
27 "FLOAT",
28 "CLOB",
29 "TEXT",
30 "BOOL",
31 "REAL",
32 "DATE",
33 "BLOB",
34 "UUID",
35 "JSONB",
36 "TIME",
37 "JSON",
38 "CHAR",
39 "ARRAY",
40 "NUMBER",
41 "BYTEA",
42 "INT",
43 "BIT",
44];
45
46pub struct Types;
47
48impl Rule for Types {
49 fn name(&self) -> &'static str {
50 "Capitalisation/Types"
51 }
52
53 fn check(&self, ctx: &FileContext) -> Vec<Diagnostic> {
54 let source = &ctx.source;
55 let bytes = source.as_bytes();
56 let len = bytes.len();
57 let skip_map = SkipMap::build(source);
58
59 let mut diags = Vec::new();
60
61 let mut i = 0;
62 while i < len {
63 // Only enter type detection on a word-start in code that is not
64 // preceded by a word character.
65 if skip_map.is_code(i) && is_word_char(bytes[i]) {
66 let preceded_by_word = i > 0 && is_word_char(bytes[i - 1]);
67 if !preceded_by_word {
68 // Find end of this word token.
69 let word_start = i;
70 let mut j = i;
71 while j < len && is_word_char(bytes[j]) {
72 j += 1;
73 }
74 let word_end = j; // exclusive
75
76 // The whole word must be in code (not inside a string/comment).
77 let all_code = (word_start..word_end).all(|k| skip_map.is_code(k));
78
79 if all_code {
80 let word_bytes = &bytes[word_start..word_end];
81
82 // Try each type name (already sorted longest-first).
83 for type_name in TYPES {
84 if type_name.len() == word_bytes.len()
85 && type_name
86 .bytes()
87 .zip(word_bytes.iter())
88 .all(|(a, &b)| a.eq_ignore_ascii_case(&b))
89 {
90 // Matched — is it already all-uppercase?
91 let already_upper = word_bytes
92 .iter()
93 .all(|b| b.is_ascii_uppercase() || !b.is_ascii_alphabetic());
94 if !already_upper {
95 let (line, col) = line_col(source, word_start);
96 let found =
97 std::str::from_utf8(word_bytes).unwrap_or("?").to_string();
98 let upper = *type_name;
99 diags.push(Diagnostic {
100 rule: self.name(),
101 message: format!(
102 "Data type '{}' should be '{}'",
103 found, upper
104 ),
105 line,
106 col,
107 });
108 }
109 // Whether or not it was a violation, stop checking
110 // this word against further (shorter) type names.
111 break;
112 }
113 }
114 }
115
116 i = word_end;
117 continue;
118 }
119 }
120 i += 1;
121 }
122
123 diags
124 }
125}
126
127/// Converts a byte offset in `source` to a 1-indexed (line, col) pair.
128fn line_col(source: &str, offset: usize) -> (usize, usize) {
129 let before = &source[..offset];
130 let line = before.chars().filter(|&c| c == '\n').count() + 1;
131 let col = before.rfind('\n').map(|p| offset - p - 1).unwrap_or(offset) + 1;
132 (line, col)
133}