alef_codegen/naming.rs
1use heck::{ToLowerCamelCase, ToPascalCase, ToShoutySnakeCase, ToSnakeCase};
2
3/// Convert a Rust snake_case name to the target language convention.
4pub fn to_python_name(name: &str) -> String {
5 name.to_snake_case()
6}
7
8/// Convert a Rust snake_case name to Node.js/TypeScript lowerCamelCase convention.
9pub fn to_node_name(name: &str) -> String {
10 name.to_lower_camel_case()
11}
12
13/// Convert a Rust snake_case name to Ruby snake_case convention.
14pub fn to_ruby_name(name: &str) -> String {
15 name.to_snake_case()
16}
17
18/// Convert a Rust snake_case name to PHP lowerCamelCase convention.
19pub fn to_php_name(name: &str) -> String {
20 name.to_lower_camel_case()
21}
22
23/// Convert a Rust snake_case name to Elixir snake_case convention.
24pub fn to_elixir_name(name: &str) -> String {
25 name.to_snake_case()
26}
27
28/// Well-known Go acronyms that must be fully uppercased per Go naming conventions.
29/// See: https://go.dev/wiki/CodeReviewComments#initialisms
30const GO_ACRONYMS: &[&str] = &[
31 "API", "ASCII", "CPU", "CSS", "DNS", "EOF", "FTP", "GID", "GUI", "HTML", "HTTP", "HTTPS", "ID", "IMAP", "IP",
32 "JSON", "LHS", "MFA", "POP", "QPS", "RAM", "RHS", "RPC", "SLA", "SMTP", "SQL", "SSH", "SSL", "TCP", "TLS", "TTL",
33 "UDP", "UI", "UID", "UUID", "URI", "URL", "UTF8", "VM", "XML", "XMPP", "XSRF", "XSS",
34];
35
36/// Apply Go acronym uppercasing to a PascalCase name.
37///
38/// Scans word boundaries in the PascalCase string and replaces any run of
39/// characters that matches a known Go acronym (case-insensitively) with the
40/// all-caps form. For example `ImageUrl` becomes `ImageURL` and `UserId`
41/// becomes `UserID`.
42fn apply_go_acronyms(name: &str) -> String {
43 if name.is_empty() {
44 return name.to_string();
45 }
46
47 // Split the PascalCase string into words at uppercase letter boundaries.
48 // Each "word" is a contiguous sequence starting with an uppercase letter.
49 let mut words: Vec<&str> = Vec::new();
50 let mut word_start = 0;
51 let bytes = name.as_bytes();
52 for i in 1..bytes.len() {
53 if bytes[i].is_ascii_uppercase() {
54 words.push(&name[word_start..i]);
55 word_start = i;
56 }
57 }
58 words.push(&name[word_start..]);
59
60 // For each word, check if it matches a known acronym (case-insensitive).
61 let mut result = String::with_capacity(name.len());
62 let mut i = 0;
63 while i < words.len() {
64 // Try to match as many consecutive words as possible to a single acronym
65 // (handles acronyms like "UTF8" which span one word but look like two parts).
66 let word = words[i];
67 let upper = word.to_ascii_uppercase();
68 if GO_ACRONYMS.contains(&upper.as_str()) {
69 result.push_str(&upper);
70 } else {
71 result.push_str(word);
72 }
73 i += 1;
74 }
75 result
76}
77
78/// Convert a Rust snake_case name to Go PascalCase convention with acronym uppercasing.
79pub fn to_go_name(name: &str) -> String {
80 apply_go_acronyms(&name.to_pascal_case())
81}
82
83/// Apply Go acronym uppercasing to a name that is already in PascalCase (e.g. an IR type name).
84///
85/// IR type names come directly from Rust PascalCase (e.g. `ImageUrl`, `JsonSchemaFormat`).
86/// This function uppercases known acronym segments so they conform to Go naming conventions
87/// (e.g. `ImageUrl` → `ImageURL`, `JsonSchemaFormat` → `JSONSchemaFormat`).
88pub fn go_type_name(name: &str) -> String {
89 apply_go_acronyms(name)
90}
91
92/// Convert a Rust snake_case parameter/variable name to Go lowerCamelCase with acronym uppercasing.
93///
94/// Go naming conventions require that acronyms in identifiers be fully uppercased.
95/// `to_lower_camel_case` alone converts `base_url` → `baseUrl`, but Go wants `baseURL`.
96/// This function converts via PascalCase (which applies acronym uppercasing) then lowercases
97/// the first "word" (the initial run of uppercase letters treated as a unit) while preserving
98/// the case of subsequent words/acronyms:
99/// - `base_url` → `BaseURL` → `baseURL`
100/// - `api_key` → `APIKey` → `apiKey`
101/// - `user_id` → `UserID` → `userID`
102/// - `json` → `JSON` → `json`
103pub fn go_param_name(name: &str) -> String {
104 let pascal = apply_go_acronyms(&name.to_pascal_case());
105 if pascal.is_empty() {
106 return pascal;
107 }
108 let bytes = pascal.as_bytes();
109 // Find the boundary of the first "word":
110 // - If the string begins with a multi-char uppercase run followed by a lowercase letter,
111 // the run minus its last char is an acronym prefix (e.g. "APIKey": run="API", next='K')
112 // → lowercase "AP" and keep "IKey" → "apIKey" ... but Go actually wants "apiKey".
113 // The real rule: lowercase the whole leading uppercase run regardless, because the
114 // acronym-prefix IS the first word.
115 // - If the string begins with a single uppercase char (e.g. "BaseURL"), lowercase just it.
116 //
117 // Concretely: find how many leading bytes are uppercase. If that whole run is followed by
118 // end-of-string, lowercase everything. If followed by more chars, lowercase the entire run.
119 // For "APIKey": upper_len=3, next='K'(uppercase) but that starts the second word.
120 // Actually: scan for the first lowercase char to find where the first word ends.
121 let first_lower = bytes.iter().position(|b| b.is_ascii_lowercase());
122 match first_lower {
123 None => {
124 // Entire string is uppercase (single acronym like "JSON", "URL") — all lowercase.
125 pascal.to_lowercase()
126 }
127 Some(0) => {
128 // Starts with lowercase (already correct)
129 pascal
130 }
131 Some(pos) => {
132 // pos is the index of the first lowercase char.
133 // The first "word" ends just before pos-1 (the char at pos-1 is the first char of
134 // the next PascalCase word that isnds with a lowercase continuation).
135 // For "BaseURL": pos=1 ('a'), so uppercase run = ['B'], lowercase just index 0.
136 // For "APIKey": pos=4 ('e' in "Key"), uppercase run = "APIK", next lower = 'e',
137 // so word boundary is at pos-1=3 ('K' is start of "Key").
138 // → lowercase "API" (indices 0..2), keep "Key" → "apiKey" ✓
139 // For "UserID": pos=1 ('s'), uppercase run starts at 'U', lowercase just 'U' → "userID"... wait
140 // "UserID": 'U'(upper),'s'(lower) → pos=1, word="U", lower "U" → "u"+"serID" = "userID" ✓
141 let word_end = if pos > 1 { pos - 1 } else { 1 };
142 let lower_prefix = pascal[..word_end].to_lowercase();
143 format!("{}{}", lower_prefix, &pascal[word_end..])
144 }
145 }
146}
147
148/// Convert a Rust snake_case name to Java lowerCamelCase convention.
149pub fn to_java_name(name: &str) -> String {
150 name.to_lower_camel_case()
151}
152
153/// Convert a Rust snake_case name to C# PascalCase convention.
154pub fn to_csharp_name(name: &str) -> String {
155 name.to_pascal_case()
156}
157
158/// Convert a Rust name to a C-style prefixed snake_case identifier (e.g. `prefix_name`).
159pub fn to_c_name(prefix: &str, name: &str) -> String {
160 format!("{}_{}", prefix, name.to_snake_case())
161}
162
163/// Convert a Rust type name to class name convention for target language.
164pub fn to_class_name(name: &str) -> String {
165 name.to_pascal_case()
166}
167
168/// Convert to SCREAMING_SNAKE for constants.
169pub fn to_constant_name(name: &str) -> String {
170 name.to_shouty_snake_case()
171}
172
173/// Convert a PascalCase or mixed-case name to snake_case with correct acronym handling.
174///
175/// Use this instead of `heck::ToSnakeCase` when the input is a PascalCase Rust type or
176/// enum variant name — `heck` inserts an underscore before every uppercase letter, which
177/// incorrectly splits acronym-style names like `Rdfa` into `rd_fa`.
178///
179/// Rules:
180/// - A run of consecutive uppercase letters is treated as a single acronym word.
181/// - If the run is followed by a lowercase letter, the last uppercase char begins the
182/// next word (e.g. `XMLHttp` → `xml_http`).
183/// - A single uppercase letter followed by lowercase is a normal word start.
184///
185/// Examples:
186/// - `MyType` → `my_type`
187/// - `Rdfa` → `rdfa`
188/// - `HTMLParser` → `html_parser`
189/// - `XMLHttpRequest` → `xml_http_request`
190/// - `IOError` → `io_error`
191/// - `URLPath` → `url_path`
192/// - `JSONLD` → `jsonld`
193pub fn pascal_to_snake(name: &str) -> String {
194 if name.is_empty() {
195 return String::new();
196 }
197 let chars: Vec<char> = name.chars().collect();
198 let n = chars.len();
199 let mut out = String::with_capacity(n + 4);
200 let mut i = 0;
201 while i < n {
202 let ch = chars[i];
203 if ch.is_ascii_uppercase() {
204 let run_start = i;
205 while i < n && chars[i].is_ascii_uppercase() {
206 i += 1;
207 }
208 let run_end = i;
209 let run_len = run_end - run_start;
210 if run_len == 1 {
211 if !out.is_empty() {
212 out.push('_');
213 }
214 out.extend(chars[run_start].to_lowercase());
215 } else {
216 let split = if i < n && chars[i].is_ascii_lowercase() {
217 run_len - 1
218 } else {
219 run_len
220 };
221 if !out.is_empty() {
222 out.push('_');
223 }
224 for &c in chars.iter().skip(run_start).take(split) {
225 out.extend(c.to_lowercase());
226 }
227 if split < run_len {
228 out.push('_');
229 out.extend(chars[run_start + split].to_lowercase());
230 }
231 }
232 } else {
233 out.push(ch);
234 i += 1;
235 }
236 }
237 out
238}
239
240/// Convert a PascalCase name to SCREAMING_SNAKE_CASE with correct acronym handling.
241///
242/// Examples:
243/// - `MyType` → `MY_TYPE`
244/// - `Rdfa` → `RDFA`
245/// - `HTMLParser` → `HTML_PARSER`
246pub fn pascal_to_screaming_snake(name: &str) -> String {
247 pascal_to_snake(name).to_ascii_uppercase()
248}
249
250#[cfg(test)]
251mod tests {
252 use super::*;
253
254 // --- to_go_name (snake_case → Go PascalCase with initialism uppercasing) ---
255
256 #[test]
257 fn test_to_go_name_html_initialism() {
258 assert_eq!(to_go_name("html"), "HTML");
259 }
260
261 #[test]
262 fn test_to_go_name_url_initialism() {
263 assert_eq!(to_go_name("url"), "URL");
264 }
265
266 #[test]
267 fn test_to_go_name_id_initialism() {
268 assert_eq!(to_go_name("id"), "ID");
269 }
270
271 #[test]
272 fn test_to_go_name_plain_word() {
273 assert_eq!(to_go_name("links"), "Links");
274 }
275
276 #[test]
277 fn test_to_go_name_user_id() {
278 assert_eq!(to_go_name("user_id"), "UserID");
279 }
280
281 #[test]
282 fn test_to_go_name_request_url() {
283 assert_eq!(to_go_name("request_url"), "RequestURL");
284 }
285
286 // --- Additional cases ---
287
288 #[test]
289 fn test_to_go_name_http_status() {
290 assert_eq!(to_go_name("http_status"), "HTTPStatus");
291 }
292
293 #[test]
294 fn test_to_go_name_json_body() {
295 assert_eq!(to_go_name("json_body"), "JSONBody");
296 }
297
298 // --- go_param_name (snake_case → Go lowerCamelCase with initialism uppercasing) ---
299
300 #[test]
301 fn test_go_param_name_base_url() {
302 assert_eq!(go_param_name("base_url"), "baseURL");
303 }
304
305 #[test]
306 fn test_go_param_name_user_id() {
307 assert_eq!(go_param_name("user_id"), "userID");
308 }
309
310 #[test]
311 fn test_go_param_name_api_key() {
312 assert_eq!(go_param_name("api_key"), "apiKey");
313 }
314
315 #[test]
316 fn test_go_param_name_plain() {
317 assert_eq!(go_param_name("json"), "json");
318 }
319
320 // --- pascal_to_snake ---
321
322 #[test]
323 fn pascal_to_snake_normal_case() {
324 assert_eq!(pascal_to_snake("MyType"), "my_type");
325 }
326
327 #[test]
328 fn pascal_to_snake_rdfa() {
329 assert_eq!(pascal_to_snake("Rdfa"), "rdfa");
330 }
331
332 #[test]
333 fn pascal_to_snake_html_parser() {
334 assert_eq!(pascal_to_snake("HTMLParser"), "html_parser");
335 }
336
337 #[test]
338 fn pascal_to_snake_xml_http_request() {
339 assert_eq!(pascal_to_snake("XMLHttpRequest"), "xml_http_request");
340 }
341
342 #[test]
343 fn pascal_to_snake_io_error() {
344 assert_eq!(pascal_to_snake("IOError"), "io_error");
345 }
346
347 #[test]
348 fn pascal_to_snake_url_path() {
349 assert_eq!(pascal_to_snake("URLPath"), "url_path");
350 }
351
352 #[test]
353 fn pascal_to_snake_jsonld_all_caps() {
354 assert_eq!(pascal_to_snake("JSONLD"), "jsonld");
355 }
356
357 #[test]
358 fn pascal_to_snake_camel_case() {
359 assert_eq!(pascal_to_snake("myField"), "my_field");
360 }
361
362 #[test]
363 fn pascal_to_snake_already_snake() {
364 assert_eq!(pascal_to_snake("already_snake"), "already_snake");
365 }
366
367 #[test]
368 fn pascal_to_snake_empty() {
369 assert_eq!(pascal_to_snake(""), "");
370 }
371
372 // --- pascal_to_screaming_snake ---
373
374 #[test]
375 fn pascal_to_screaming_snake_rdfa() {
376 assert_eq!(pascal_to_screaming_snake("Rdfa"), "RDFA");
377 }
378
379 #[test]
380 fn pascal_to_screaming_snake_html_parser() {
381 assert_eq!(pascal_to_screaming_snake("HTMLParser"), "HTML_PARSER");
382 }
383
384 #[test]
385 fn pascal_to_screaming_snake_my_type() {
386 assert_eq!(pascal_to_screaming_snake("MyType"), "MY_TYPE");
387 }
388}