Skip to main content

sqrust_rules/capitalisation/
types.rs

1use sqrust_core::{Diagnostic, FileContext, Rule};
2
3use super::{is_word_char, SkipMap};
4
5/// SQL data type names that must be written in UPPERCASE.
6/// Stored as uppercase for comparison purposes.
7/// Sorted by length descending so that longer names (e.g. SMALLINT) are
8/// checked before shorter prefixes (e.g. INT), preventing partial matches.
9const TYPES: &[&str] = &[
10    "CURRENT_TIMESTAMP",
11    "VARBINARY",
12    "TIMESTAMP",
13    "NVARCHAR",
14    "SMALLINT",
15    "DATETIME",
16    "INTERVAL",
17    "NUMERIC",
18    "BOOLEAN",
19    "INTEGER",
20    "DECIMAL",
21    "TINYINT",
22    "VARCHAR",
23    "DOUBLE",
24    "BIGINT",
25    "BINARY",
26    "NCHAR",
27    "FLOAT",
28    "CLOB",
29    "TEXT",
30    "BOOL",
31    "REAL",
32    "DATE",
33    "BLOB",
34    "UUID",
35    "JSONB",
36    "TIME",
37    "JSON",
38    "CHAR",
39    "ARRAY",
40    "NUMBER",
41    "BYTEA",
42    "INT",
43    "BIT",
44];
45
46pub struct Types;
47
48impl Rule for Types {
49    fn name(&self) -> &'static str {
50        "Capitalisation/Types"
51    }
52
53    fn check(&self, ctx: &FileContext) -> Vec<Diagnostic> {
54        let source = &ctx.source;
55        let bytes = source.as_bytes();
56        let len = bytes.len();
57        let skip_map = SkipMap::build(source);
58
59        let mut diags = Vec::new();
60
61        let mut i = 0;
62        while i < len {
63            // Only enter type detection on a word-start in code that is not
64            // preceded by a word character.
65            if skip_map.is_code(i) && is_word_char(bytes[i]) {
66                let preceded_by_word = i > 0 && is_word_char(bytes[i - 1]);
67                if !preceded_by_word {
68                    // Find end of this word token.
69                    let word_start = i;
70                    let mut j = i;
71                    while j < len && is_word_char(bytes[j]) {
72                        j += 1;
73                    }
74                    let word_end = j; // exclusive
75
76                    // The whole word must be in code (not inside a string/comment).
77                    let all_code = (word_start..word_end).all(|k| skip_map.is_code(k));
78
79                    if all_code {
80                        let word_bytes = &bytes[word_start..word_end];
81
82                        // Try each type name (already sorted longest-first).
83                        for type_name in TYPES {
84                            if type_name.len() == word_bytes.len()
85                                && type_name
86                                    .bytes()
87                                    .zip(word_bytes.iter())
88                                    .all(|(a, &b)| a.eq_ignore_ascii_case(&b))
89                            {
90                                // Matched — is it already all-uppercase?
91                                let already_upper = word_bytes
92                                    .iter()
93                                    .all(|b| b.is_ascii_uppercase() || !b.is_ascii_alphabetic());
94                                if !already_upper {
95                                    let (line, col) = line_col(source, word_start);
96                                    let found =
97                                        std::str::from_utf8(word_bytes).unwrap_or("?").to_string();
98                                    let upper = *type_name;
99                                    diags.push(Diagnostic {
100                                        rule: self.name(),
101                                        message: format!(
102                                            "Data type '{}' should be '{}'",
103                                            found, upper
104                                        ),
105                                        line,
106                                        col,
107                                    });
108                                }
109                                // Whether or not it was a violation, stop checking
110                                // this word against further (shorter) type names.
111                                break;
112                            }
113                        }
114                    }
115
116                    i = word_end;
117                    continue;
118                }
119            }
120            i += 1;
121        }
122
123        diags
124    }
125}
126
127/// Converts a byte offset in `source` to a 1-indexed (line, col) pair.
128fn line_col(source: &str, offset: usize) -> (usize, usize) {
129    let before = &source[..offset];
130    let line = before.chars().filter(|&c| c == '\n').count() + 1;
131    let col = before.rfind('\n').map(|p| offset - p - 1).unwrap_or(offset) + 1;
132    (line, col)
133}