sqrust_rules/capitalisation/mod.rs
1pub mod functions;
2pub mod keywords;
3pub mod literals;
4pub mod types;
5
6/// Tokenises a SQL source string into a sequence of `Token`s.
7///
8/// Tokens are either:
9/// - `Code(start_byte)` — a single character of real SQL code, with its byte offset
10/// - `Skip` — a character that is inside a string/comment/quoted-identifier and
11/// should be ignored by rules
12///
13/// Rules iterate over the source and use `is_code_at` to decide whether a
14/// character at a given byte offset should be inspected.
15pub(crate) struct SkipMap {
16 /// `true` for every byte offset that is inside a string, comment, or
17 /// quoted identifier and must be skipped.
18 skip: Vec<bool>,
19}
20
21impl SkipMap {
22 pub(crate) fn build(source: &str) -> Self {
23 let bytes = source.as_bytes();
24 let len = source.len();
25 let mut skip = vec![false; len];
26
27 let mut i = 0;
28 while i < len {
29 // Line comment: -- ... end-of-line
30 if i + 1 < len && bytes[i] == b'-' && bytes[i + 1] == b'-' {
31 skip[i] = true;
32 skip[i + 1] = true;
33 i += 2;
34 while i < len && bytes[i] != b'\n' {
35 skip[i] = true;
36 i += 1;
37 }
38 // '\n' itself is not skipped so line numbers stay correct
39 continue;
40 }
41
42 // Block comment: /* ... */
43 if i + 1 < len && bytes[i] == b'/' && bytes[i + 1] == b'*' {
44 skip[i] = true;
45 skip[i + 1] = true;
46 i += 2;
47 while i < len {
48 if i + 1 < len && bytes[i] == b'*' && bytes[i + 1] == b'/' {
49 skip[i] = true;
50 skip[i + 1] = true;
51 i += 2;
52 break;
53 }
54 skip[i] = true;
55 i += 1;
56 }
57 continue;
58 }
59
60 // Single-quoted string: '...' with '' escape (SQL standard)
61 if bytes[i] == b'\'' {
62 skip[i] = true;
63 i += 1;
64 while i < len {
65 if bytes[i] == b'\'' {
66 skip[i] = true;
67 i += 1;
68 // '' is an escaped quote inside the string, not the end
69 if i < len && bytes[i] == b'\'' {
70 skip[i] = true;
71 i += 1;
72 continue;
73 }
74 break; // end of string
75 }
76 skip[i] = true;
77 i += 1;
78 }
79 continue;
80 }
81
82 // Double-quoted identifier: "..."
83 if bytes[i] == b'"' {
84 skip[i] = true;
85 i += 1;
86 while i < len && bytes[i] != b'"' {
87 skip[i] = true;
88 i += 1;
89 }
90 if i < len {
91 skip[i] = true; // closing "
92 i += 1;
93 }
94 continue;
95 }
96
97 // Backtick identifier: `...`
98 if bytes[i] == b'`' {
99 skip[i] = true;
100 i += 1;
101 while i < len && bytes[i] != b'`' {
102 skip[i] = true;
103 i += 1;
104 }
105 if i < len {
106 skip[i] = true; // closing `
107 i += 1;
108 }
109 continue;
110 }
111
112 i += 1;
113 }
114
115 SkipMap { skip }
116 }
117
118 /// Returns `true` if the byte at `offset` is real SQL code (not inside a
119 /// string / comment / quoted identifier).
120 #[inline]
121 pub(crate) fn is_code(&self, offset: usize) -> bool {
122 !self.skip[offset]
123 }
124}
125
126/// Returns `true` if `ch` is a word character (`[a-zA-Z0-9_]`).
127#[inline]
128pub(crate) fn is_word_char(ch: u8) -> bool {
129 ch.is_ascii_alphanumeric() || ch == b'_'
130}