alef_codegen/naming.rs
1use heck::{ToLowerCamelCase, ToPascalCase, ToShoutySnakeCase, ToSnakeCase};
2
3/// Convert a Rust snake_case name to the target language convention.
4pub fn to_python_name(name: &str) -> String {
5 name.to_snake_case()
6}
7
8/// Convert a Rust snake_case name to Node.js/TypeScript lowerCamelCase convention.
9pub fn to_node_name(name: &str) -> String {
10 name.to_lower_camel_case()
11}
12
13/// Convert a Rust snake_case name to Ruby snake_case convention.
14pub fn to_ruby_name(name: &str) -> String {
15 name.to_snake_case()
16}
17
18/// Convert a Rust snake_case name to PHP lowerCamelCase convention.
19pub fn to_php_name(name: &str) -> String {
20 name.to_lower_camel_case()
21}
22
23/// Convert a Rust snake_case name to Elixir snake_case convention.
24pub fn to_elixir_name(name: &str) -> String {
25 name.to_snake_case()
26}
27
28/// Well-known initialisms that must be fully uppercased per Go and C# naming conventions.
29/// See: https://go.dev/wiki/CodeReviewComments#initialisms
30const INITIALISMS: &[&str] = &[
31 "API", "ASCII", "CPU", "CSS", "DNS", "EOF", "FTP", "GID", "GraphQL", "GUI", "HTML", "HTTP", "HTTPS", "ID", "IMAP",
32 "IP", "JSON", "LHS", "MFA", "POP", "QPS", "RAM", "RHS", "RPC", "SLA", "SMTP", "SQL", "SSH", "SSL", "TCP", "TLS",
33 "TTL", "UDP", "UI", "UID", "UUID", "URI", "URL", "UTF8", "VM", "XML", "XMPP", "XSRF", "XSS",
34];
35
36/// Apply initialism uppercasing to a PascalCase name using the provided list.
37///
38/// Scans word boundaries in the PascalCase string and replaces any run of
39/// characters that matches a known initialism (case-insensitively) with the
40/// canonical form from the list. For example `ImageUrl` becomes `ImageURL`,
41/// `UserId` becomes `UserID`, and `GraphQlRouteConfig` becomes `GraphQLRouteConfig`.
42fn apply_initialisms(name: &str, list: &[&str]) -> String {
43 if name.is_empty() {
44 return name.to_string();
45 }
46
47 // Split the PascalCase string into words at uppercase letter boundaries.
48 // Each "word" is a contiguous sequence starting with an uppercase letter.
49 let mut words: Vec<&str> = Vec::new();
50 let mut word_start = 0;
51 let bytes = name.as_bytes();
52 for i in 1..bytes.len() {
53 if bytes[i].is_ascii_uppercase() {
54 words.push(&name[word_start..i]);
55 word_start = i;
56 }
57 }
58 words.push(&name[word_start..]);
59
60 // For each word, check if it matches a known initialism (case-insensitive).
61 let mut result = String::with_capacity(name.len());
62 let mut i = 0;
63 while i < words.len() {
64 // Try to match the longest possible span of consecutive words to a known initialism
65 // (longest-match first). This handles multi-segment initialisms like "GraphQL" which
66 // heck splits into "Graph" + "Ql".
67 let mut matched = false;
68 for span in (1..=(words.len() - i)).rev() {
69 let candidate: String = words[i..i + span].concat();
70 let candidate_upper = candidate.to_ascii_uppercase();
71 if let Some(&canonical) = list.iter().find(|&&s| s.to_ascii_uppercase() == candidate_upper) {
72 result.push_str(canonical);
73 i += span;
74 matched = true;
75 break;
76 }
77 }
78 if !matched {
79 result.push_str(words[i]);
80 i += 1;
81 }
82 }
83 result
84}
85
86/// Apply Go initialism uppercasing to a PascalCase name.
87///
88/// Scans word boundaries in the PascalCase string and replaces any run of
89/// characters that matches a known initialism (case-insensitively) with the
90/// all-caps form. For example `ImageUrl` becomes `ImageURL` and `UserId`
91/// becomes `UserID`.
92fn apply_go_acronyms(name: &str) -> String {
93 apply_initialisms(name, INITIALISMS)
94}
95
96/// Convert a Rust snake_case name to Go PascalCase convention with acronym uppercasing.
97pub fn to_go_name(name: &str) -> String {
98 apply_go_acronyms(&name.to_pascal_case())
99}
100
101/// Apply Go acronym uppercasing to a name that is already in PascalCase (e.g. an IR type name).
102///
103/// IR type names come directly from Rust PascalCase (e.g. `ImageUrl`, `JsonSchemaFormat`).
104/// This function uppercases known acronym segments so they conform to Go naming conventions
105/// (e.g. `ImageUrl` → `ImageURL`, `JsonSchemaFormat` → `JSONSchemaFormat`).
106pub fn go_type_name(name: &str) -> String {
107 apply_go_acronyms(name)
108}
109
110/// Convert a Rust snake_case parameter/variable name to Go lowerCamelCase with acronym uppercasing.
111///
112/// Go naming conventions require that acronyms in identifiers be fully uppercased.
113/// `to_lower_camel_case` alone converts `base_url` → `baseUrl`, but Go wants `baseURL`.
114/// This function converts via PascalCase (which applies acronym uppercasing) then lowercases
115/// the first "word" (the initial run of uppercase letters treated as a unit) while preserving
116/// the case of subsequent words/acronyms:
117/// - `base_url` → `BaseURL` → `baseURL`
118/// - `api_key` → `APIKey` → `apiKey`
119/// - `user_id` → `UserID` → `userID`
120/// - `json` → `JSON` → `json`
121pub fn go_param_name(name: &str) -> String {
122 let pascal = apply_go_acronyms(&name.to_pascal_case());
123 if pascal.is_empty() {
124 return pascal;
125 }
126 let bytes = pascal.as_bytes();
127 // Find the boundary of the first "word":
128 // - If the string begins with a multi-char uppercase run followed by a lowercase letter,
129 // the run minus its last char is an acronym prefix (e.g. "APIKey": run="API", next='K')
130 // → lowercase "AP" and keep "IKey" → "apIKey" ... but Go actually wants "apiKey".
131 // The real rule: lowercase the whole leading uppercase run regardless, because the
132 // acronym-prefix IS the first word.
133 // - If the string begins with a single uppercase char (e.g. "BaseURL"), lowercase just it.
134 //
135 // Concretely: find how many leading bytes are uppercase. If that whole run is followed by
136 // end-of-string, lowercase everything. If followed by more chars, lowercase the entire run.
137 // For "APIKey": upper_len=3, next='K'(uppercase) but that starts the second word.
138 // Actually: scan for the first lowercase char to find where the first word ends.
139 let first_lower = bytes.iter().position(|b| b.is_ascii_lowercase());
140 match first_lower {
141 None => {
142 // Entire string is uppercase (single acronym like "JSON", "URL") — all lowercase.
143 pascal.to_lowercase()
144 }
145 Some(0) => {
146 // Starts with lowercase (already correct)
147 pascal
148 }
149 Some(pos) => {
150 // pos is the index of the first lowercase char.
151 // The first "word" ends just before pos-1 (the char at pos-1 is the first char of
152 // the next PascalCase word that isnds with a lowercase continuation).
153 // For "BaseURL": pos=1 ('a'), so uppercase run = ['B'], lowercase just index 0.
154 // For "APIKey": pos=4 ('e' in "Key"), uppercase run = "APIK", next lower = 'e',
155 // so word boundary is at pos-1=3 ('K' is start of "Key").
156 // → lowercase "API" (indices 0..2), keep "Key" → "apiKey" ✓
157 // For "UserID": pos=1 ('s'), uppercase run starts at 'U', lowercase just 'U' → "userID"... wait
158 // "UserID": 'U'(upper),'s'(lower) → pos=1, word="U", lower "U" → "u"+"serID" = "userID" ✓
159 let word_end = if pos > 1 { pos - 1 } else { 1 };
160 let lower_prefix = pascal[..word_end].to_lowercase();
161 format!("{}{}", lower_prefix, &pascal[word_end..])
162 }
163 }
164}
165
166/// Convert a Rust snake_case name to Java lowerCamelCase convention.
167pub fn to_java_name(name: &str) -> String {
168 name.to_lower_camel_case()
169}
170
171/// Convert a Rust snake_case name to C# PascalCase convention with initialism uppercasing.
172///
173/// Converts snake_case to PascalCase via `heck` and then restores known initialisms so that
174/// e.g. `graphql_route_config` → `GraphQLRouteConfig` (not `GraphqlRouteConfig`) and
175/// `http_status` → `HTTPStatus` (not `HttpStatus`).
176pub fn to_csharp_name(name: &str) -> String {
177 apply_initialisms(&name.to_pascal_case(), INITIALISMS)
178}
179
180/// Apply initialism uppercasing to a name that is already in PascalCase (e.g. an IR type name).
181///
182/// IR type names come directly from Rust PascalCase (e.g. `GraphQLRouteConfig`, `ImageUrl`).
183/// When such names have been processed by `heck::ToPascalCase` they may lose initialism
184/// capitalisation (e.g. `GraphQLRouteConfig` → `GraphQlRouteConfig`). This function restores
185/// the canonical form regardless of whether the input is already correct or heck-corrupted.
186///
187/// Examples:
188/// - `GraphQlRouteConfig` → `GraphQLRouteConfig`
189/// - `GraphQLRouteConfig` → `GraphQLRouteConfig` (idempotent)
190/// - `HttpStatus` → `HTTPStatus`
191pub fn csharp_type_name(name: &str) -> String {
192 apply_initialisms(name, INITIALISMS)
193}
194
195/// Convert a Rust name to a C-style prefixed snake_case identifier (e.g. `prefix_name`).
196pub fn to_c_name(prefix: &str, name: &str) -> String {
197 format!("{}_{}", prefix, name.to_snake_case())
198}
199
200/// Convert a Rust type name to class name convention for target language.
201pub fn to_class_name(name: &str) -> String {
202 name.to_pascal_case()
203}
204
205/// Convert to SCREAMING_SNAKE for constants.
206pub fn to_constant_name(name: &str) -> String {
207 name.to_shouty_snake_case()
208}
209
210/// Convert a PascalCase or mixed-case name to snake_case with correct acronym handling.
211///
212/// Use this instead of `heck::ToSnakeCase` when the input is a PascalCase Rust type or
213/// enum variant name — `heck` inserts an underscore before every uppercase letter, which
214/// incorrectly splits acronym-style names like `Rdfa` into `rd_fa`.
215///
216/// Rules:
217/// - A run of consecutive uppercase letters is treated as a single acronym word.
218/// - If the run is followed by a lowercase letter, the last uppercase char begins the
219/// next word (e.g. `XMLHttp` → `xml_http`).
220/// - A single uppercase letter followed by lowercase is a normal word start.
221///
222/// Examples:
223/// - `MyType` → `my_type`
224/// - `Rdfa` → `rdfa`
225/// - `HTMLParser` → `html_parser`
226/// - `XMLHttpRequest` → `xml_http_request`
227/// - `IOError` → `io_error`
228/// - `URLPath` → `url_path`
229/// - `JSONLD` → `jsonld`
230pub fn pascal_to_snake(name: &str) -> String {
231 if name.is_empty() {
232 return String::new();
233 }
234 let chars: Vec<char> = name.chars().collect();
235 let n = chars.len();
236 let mut out = String::with_capacity(n + 4);
237 let mut i = 0;
238 while i < n {
239 let ch = chars[i];
240 if ch.is_ascii_uppercase() {
241 let run_start = i;
242 while i < n && chars[i].is_ascii_uppercase() {
243 i += 1;
244 }
245 let run_end = i;
246 let run_len = run_end - run_start;
247 if run_len == 1 {
248 if !out.is_empty() {
249 out.push('_');
250 }
251 out.extend(chars[run_start].to_lowercase());
252 } else {
253 let split = if i < n && chars[i].is_ascii_lowercase() {
254 run_len - 1
255 } else {
256 run_len
257 };
258 if !out.is_empty() {
259 out.push('_');
260 }
261 for &c in chars.iter().skip(run_start).take(split) {
262 out.extend(c.to_lowercase());
263 }
264 if split < run_len {
265 out.push('_');
266 out.extend(chars[run_start + split].to_lowercase());
267 }
268 }
269 } else {
270 out.push(ch);
271 i += 1;
272 }
273 }
274 out
275}
276
277/// Convert a PascalCase name to SCREAMING_SNAKE_CASE with correct acronym handling.
278///
279/// Examples:
280/// - `MyType` → `MY_TYPE`
281/// - `Rdfa` → `RDFA`
282/// - `HTMLParser` → `HTML_PARSER`
283pub fn pascal_to_screaming_snake(name: &str) -> String {
284 pascal_to_snake(name).to_ascii_uppercase()
285}
286
287#[cfg(test)]
288mod tests {
289 use super::*;
290
291 // --- to_go_name (snake_case → Go PascalCase with initialism uppercasing) ---
292
293 #[test]
294 fn test_to_go_name_html_initialism() {
295 assert_eq!(to_go_name("html"), "HTML");
296 }
297
298 #[test]
299 fn test_to_go_name_url_initialism() {
300 assert_eq!(to_go_name("url"), "URL");
301 }
302
303 #[test]
304 fn test_to_go_name_id_initialism() {
305 assert_eq!(to_go_name("id"), "ID");
306 }
307
308 #[test]
309 fn test_to_go_name_plain_word() {
310 assert_eq!(to_go_name("links"), "Links");
311 }
312
313 #[test]
314 fn test_to_go_name_user_id() {
315 assert_eq!(to_go_name("user_id"), "UserID");
316 }
317
318 #[test]
319 fn test_to_go_name_request_url() {
320 assert_eq!(to_go_name("request_url"), "RequestURL");
321 }
322
323 // --- Additional cases ---
324
325 #[test]
326 fn test_to_go_name_http_status() {
327 assert_eq!(to_go_name("http_status"), "HTTPStatus");
328 }
329
330 #[test]
331 fn test_to_go_name_json_body() {
332 assert_eq!(to_go_name("json_body"), "JSONBody");
333 }
334
335 // --- go_param_name (snake_case → Go lowerCamelCase with initialism uppercasing) ---
336
337 #[test]
338 fn test_go_param_name_base_url() {
339 assert_eq!(go_param_name("base_url"), "baseURL");
340 }
341
342 #[test]
343 fn test_go_param_name_user_id() {
344 assert_eq!(go_param_name("user_id"), "userID");
345 }
346
347 #[test]
348 fn test_go_param_name_api_key() {
349 assert_eq!(go_param_name("api_key"), "apiKey");
350 }
351
352 #[test]
353 fn test_go_param_name_plain() {
354 assert_eq!(go_param_name("json"), "json");
355 }
356
357 // --- pascal_to_snake ---
358
359 #[test]
360 fn pascal_to_snake_normal_case() {
361 assert_eq!(pascal_to_snake("MyType"), "my_type");
362 }
363
364 #[test]
365 fn pascal_to_snake_rdfa() {
366 assert_eq!(pascal_to_snake("Rdfa"), "rdfa");
367 }
368
369 #[test]
370 fn pascal_to_snake_html_parser() {
371 assert_eq!(pascal_to_snake("HTMLParser"), "html_parser");
372 }
373
374 #[test]
375 fn pascal_to_snake_xml_http_request() {
376 assert_eq!(pascal_to_snake("XMLHttpRequest"), "xml_http_request");
377 }
378
379 #[test]
380 fn pascal_to_snake_io_error() {
381 assert_eq!(pascal_to_snake("IOError"), "io_error");
382 }
383
384 #[test]
385 fn pascal_to_snake_url_path() {
386 assert_eq!(pascal_to_snake("URLPath"), "url_path");
387 }
388
389 #[test]
390 fn pascal_to_snake_jsonld_all_caps() {
391 assert_eq!(pascal_to_snake("JSONLD"), "jsonld");
392 }
393
394 #[test]
395 fn pascal_to_snake_camel_case() {
396 assert_eq!(pascal_to_snake("myField"), "my_field");
397 }
398
399 #[test]
400 fn pascal_to_snake_already_snake() {
401 assert_eq!(pascal_to_snake("already_snake"), "already_snake");
402 }
403
404 #[test]
405 fn pascal_to_snake_empty() {
406 assert_eq!(pascal_to_snake(""), "");
407 }
408
409 // --- pascal_to_screaming_snake ---
410
411 #[test]
412 fn pascal_to_screaming_snake_rdfa() {
413 assert_eq!(pascal_to_screaming_snake("Rdfa"), "RDFA");
414 }
415
416 #[test]
417 fn pascal_to_screaming_snake_html_parser() {
418 assert_eq!(pascal_to_screaming_snake("HTMLParser"), "HTML_PARSER");
419 }
420
421 #[test]
422 fn pascal_to_screaming_snake_my_type() {
423 assert_eq!(pascal_to_screaming_snake("MyType"), "MY_TYPE");
424 }
425
426 // --- to_csharp_name (snake_case → C# PascalCase with initialism uppercasing) ---
427
428 #[test]
429 fn test_to_csharp_name_graphql_route_config() {
430 assert_eq!(to_csharp_name("graphql_route_config"), "GraphQLRouteConfig");
431 }
432
433 #[test]
434 fn test_to_csharp_name_http_status() {
435 assert_eq!(to_csharp_name("http_status"), "HTTPStatus");
436 }
437
438 #[test]
439 fn test_to_csharp_name_plain() {
440 assert_eq!(to_csharp_name("my_field"), "MyField");
441 }
442
443 // --- csharp_type_name (PascalCase → C# PascalCase with initialism uppercasing) ---
444
445 #[test]
446 fn test_csharp_type_name_heck_corrupted() {
447 // heck produces "GraphQlRouteConfig" from "GraphQLRouteConfig" — we must restore it
448 assert_eq!(csharp_type_name("GraphQlRouteConfig"), "GraphQLRouteConfig");
449 }
450
451 #[test]
452 fn test_csharp_type_name_already_correct() {
453 // Input that already has the correct form is preserved idempotently
454 assert_eq!(csharp_type_name("GraphQLRouteConfig"), "GraphQLRouteConfig");
455 }
456
457 #[test]
458 fn test_csharp_type_name_http_status() {
459 assert_eq!(csharp_type_name("HttpStatus"), "HTTPStatus");
460 }
461}