Skip to main content

sqrust_rules/layout/
unicode_identifiers.rs

1use sqrust_core::{Diagnostic, FileContext, Rule};
2
3pub struct UnicodeIdentifiers;
4
5impl Rule for UnicodeIdentifiers {
6    fn name(&self) -> &'static str {
7        "Layout/UnicodeIdentifiers"
8    }
9
10    fn check(&self, ctx: &FileContext) -> Vec<Diagnostic> {
11        // Skip files that failed to parse entirely.
12        if !ctx.parse_errors.is_empty() {
13            return Vec::new();
14        }
15
16        let mut diags = Vec::new();
17        let source = &ctx.source;
18
19        // We walk char by char, tracking whether we are inside:
20        //   - a single-quoted string  (skip)
21        //   - a double-quoted identifier (skip)
22        //   - a block comment /* ... */ (skip)
23        //   - a line comment  -- ... \n  (skip)
24        //
25        // For every non-ASCII character that falls *outside* all skip contexts,
26        // we emit one diagnostic.
27
28        let chars: Vec<char> = source.chars().collect();
29        let n = chars.len();
30
31        let mut i = 0usize;
32        // Track line/col (1-indexed).  We advance these as we scan.
33        let mut line: usize = 1;
34        let mut col: usize = 1;
35
36        // Skip-context flags
37        let mut in_single_quote = false;
38        let mut in_double_quote = false;
39        let mut in_block_comment = false;
40        let mut in_line_comment = false;
41
42        while i < n {
43            let ch = chars[i];
44
45            // --- Detect context transitions ---
46
47            // Exit line comment on newline
48            if in_line_comment {
49                if ch == '\n' {
50                    in_line_comment = false;
51                }
52                // advance position and continue
53                if ch == '\n' {
54                    line += 1;
55                    col = 1;
56                } else {
57                    col += 1;
58                }
59                i += 1;
60                continue;
61            }
62
63            // Exit block comment on */
64            if in_block_comment {
65                if ch == '*' && i + 1 < n && chars[i + 1] == '/' {
66                    // consume both chars
67                    col += 1; i += 1; // '*'
68                    col += 1; i += 1; // '/'
69                    in_block_comment = false;
70                } else {
71                    if ch == '\n' {
72                        line += 1;
73                        col = 1;
74                    } else {
75                        col += 1;
76                    }
77                    i += 1;
78                }
79                continue;
80            }
81
82            // Inside single-quoted string: exit on unescaped '
83            if in_single_quote {
84                if ch == '\'' {
85                    // Standard SQL uses '' to escape a quote — peek ahead
86                    if i + 1 < n && chars[i + 1] == '\'' {
87                        // escaped quote: consume both
88                        col += 1; i += 1;
89                        col += 1; i += 1;
90                    } else {
91                        in_single_quote = false;
92                        col += 1;
93                        i += 1;
94                    }
95                } else {
96                    if ch == '\n' {
97                        line += 1;
98                        col = 1;
99                    } else {
100                        col += 1;
101                    }
102                    i += 1;
103                }
104                continue;
105            }
106
107            // Inside double-quoted identifier: exit on "
108            if in_double_quote {
109                if ch == '"' {
110                    if i + 1 < n && chars[i + 1] == '"' {
111                        // escaped double-quote inside identifier
112                        col += 1; i += 1;
113                        col += 1; i += 1;
114                    } else {
115                        in_double_quote = false;
116                        col += 1;
117                        i += 1;
118                    }
119                } else {
120                    if ch == '\n' {
121                        line += 1;
122                        col = 1;
123                    } else {
124                        col += 1;
125                    }
126                    i += 1;
127                }
128                continue;
129            }
130
131            // Not inside any skip context — check for context-entry or flagging.
132
133            // Enter block comment
134            if ch == '/' && i + 1 < n && chars[i + 1] == '*' {
135                in_block_comment = true;
136                col += 1; i += 1; // '/'
137                col += 1; i += 1; // '*'
138                continue;
139            }
140
141            // Enter line comment
142            if ch == '-' && i + 1 < n && chars[i + 1] == '-' {
143                in_line_comment = true;
144                col += 1; i += 1; // first '-'
145                col += 1; i += 1; // second '-'
146                continue;
147            }
148
149            // Enter single-quoted string
150            if ch == '\'' {
151                in_single_quote = true;
152                col += 1;
153                i += 1;
154                continue;
155            }
156
157            // Enter double-quoted identifier
158            if ch == '"' {
159                in_double_quote = true;
160                col += 1;
161                i += 1;
162                continue;
163            }
164
165            // Plain SQL context — flag any non-ASCII character
166            if !ch.is_ascii() {
167                diags.push(Diagnostic {
168                    rule: self.name(),
169                    message: "Non-ASCII character found in SQL; use ASCII identifiers for portability"
170                        .to_string(),
171                    line,
172                    col,
173                });
174            }
175
176            // Advance position
177            if ch == '\n' {
178                line += 1;
179                col = 1;
180            } else {
181                col += 1;
182            }
183            i += 1;
184        }
185
186        diags
187    }
188}