sqrust_rules/layout/unicode_identifiers.rs
1use sqrust_core::{Diagnostic, FileContext, Rule};
2
3pub struct UnicodeIdentifiers;
4
5impl Rule for UnicodeIdentifiers {
6 fn name(&self) -> &'static str {
7 "Layout/UnicodeIdentifiers"
8 }
9
10 fn check(&self, ctx: &FileContext) -> Vec<Diagnostic> {
11 // Skip files that failed to parse entirely.
12 if !ctx.parse_errors.is_empty() {
13 return Vec::new();
14 }
15
16 let mut diags = Vec::new();
17 let source = &ctx.source;
18
19 // We walk char by char, tracking whether we are inside:
20 // - a single-quoted string (skip)
21 // - a double-quoted identifier (skip)
22 // - a block comment /* ... */ (skip)
23 // - a line comment -- ... \n (skip)
24 //
25 // For every non-ASCII character that falls *outside* all skip contexts,
26 // we emit one diagnostic.
27
28 let chars: Vec<char> = source.chars().collect();
29 let n = chars.len();
30
31 let mut i = 0usize;
32 // Track line/col (1-indexed). We advance these as we scan.
33 let mut line: usize = 1;
34 let mut col: usize = 1;
35
36 // Skip-context flags
37 let mut in_single_quote = false;
38 let mut in_double_quote = false;
39 let mut in_block_comment = false;
40 let mut in_line_comment = false;
41
42 while i < n {
43 let ch = chars[i];
44
45 // --- Detect context transitions ---
46
47 // Exit line comment on newline
48 if in_line_comment {
49 if ch == '\n' {
50 in_line_comment = false;
51 }
52 // advance position and continue
53 if ch == '\n' {
54 line += 1;
55 col = 1;
56 } else {
57 col += 1;
58 }
59 i += 1;
60 continue;
61 }
62
63 // Exit block comment on */
64 if in_block_comment {
65 if ch == '*' && i + 1 < n && chars[i + 1] == '/' {
66 // consume both chars
67 col += 1; i += 1; // '*'
68 col += 1; i += 1; // '/'
69 in_block_comment = false;
70 } else {
71 if ch == '\n' {
72 line += 1;
73 col = 1;
74 } else {
75 col += 1;
76 }
77 i += 1;
78 }
79 continue;
80 }
81
82 // Inside single-quoted string: exit on unescaped '
83 if in_single_quote {
84 if ch == '\'' {
85 // Standard SQL uses '' to escape a quote — peek ahead
86 if i + 1 < n && chars[i + 1] == '\'' {
87 // escaped quote: consume both
88 col += 1; i += 1;
89 col += 1; i += 1;
90 } else {
91 in_single_quote = false;
92 col += 1;
93 i += 1;
94 }
95 } else {
96 if ch == '\n' {
97 line += 1;
98 col = 1;
99 } else {
100 col += 1;
101 }
102 i += 1;
103 }
104 continue;
105 }
106
107 // Inside double-quoted identifier: exit on "
108 if in_double_quote {
109 if ch == '"' {
110 if i + 1 < n && chars[i + 1] == '"' {
111 // escaped double-quote inside identifier
112 col += 1; i += 1;
113 col += 1; i += 1;
114 } else {
115 in_double_quote = false;
116 col += 1;
117 i += 1;
118 }
119 } else {
120 if ch == '\n' {
121 line += 1;
122 col = 1;
123 } else {
124 col += 1;
125 }
126 i += 1;
127 }
128 continue;
129 }
130
131 // Not inside any skip context — check for context-entry or flagging.
132
133 // Enter block comment
134 if ch == '/' && i + 1 < n && chars[i + 1] == '*' {
135 in_block_comment = true;
136 col += 1; i += 1; // '/'
137 col += 1; i += 1; // '*'
138 continue;
139 }
140
141 // Enter line comment
142 if ch == '-' && i + 1 < n && chars[i + 1] == '-' {
143 in_line_comment = true;
144 col += 1; i += 1; // first '-'
145 col += 1; i += 1; // second '-'
146 continue;
147 }
148
149 // Enter single-quoted string
150 if ch == '\'' {
151 in_single_quote = true;
152 col += 1;
153 i += 1;
154 continue;
155 }
156
157 // Enter double-quoted identifier
158 if ch == '"' {
159 in_double_quote = true;
160 col += 1;
161 i += 1;
162 continue;
163 }
164
165 // Plain SQL context — flag any non-ASCII character
166 if !ch.is_ascii() {
167 diags.push(Diagnostic {
168 rule: self.name(),
169 message: "Non-ASCII character found in SQL; use ASCII identifiers for portability"
170 .to_string(),
171 line,
172 col,
173 });
174 }
175
176 // Advance position
177 if ch == '\n' {
178 line += 1;
179 col = 1;
180 } else {
181 col += 1;
182 }
183 i += 1;
184 }
185
186 diags
187 }
188}