alef_codegen/naming.rs
1use heck::{ToLowerCamelCase, ToPascalCase, ToShoutySnakeCase, ToSnakeCase};
2
3/// Convert a Rust snake_case name to the target language convention.
4pub fn to_python_name(name: &str) -> String {
5 name.to_snake_case()
6}
7
8/// Convert a Rust snake_case name to Node.js/TypeScript lowerCamelCase convention.
9pub fn to_node_name(name: &str) -> String {
10 name.to_lower_camel_case()
11}
12
13/// Convert a Rust snake_case name to Ruby snake_case convention.
14pub fn to_ruby_name(name: &str) -> String {
15 name.to_snake_case()
16}
17
18/// Convert a Rust snake_case name to PHP lowerCamelCase convention.
19pub fn to_php_name(name: &str) -> String {
20 name.to_lower_camel_case()
21}
22
23/// Convert a Rust snake_case name to Elixir snake_case convention.
24pub fn to_elixir_name(name: &str) -> String {
25 name.to_snake_case()
26}
27
28/// Well-known initialisms that must be fully uppercased per Go naming conventions.
29/// See: https://go.dev/wiki/CodeReviewComments#initialisms
30const INITIALISMS: &[&str] = &[
31 "API", "ASCII", "CPU", "CSS", "DNS", "EOF", "FTP", "GID", "GraphQL", "GUI", "HTML", "HTTP", "HTTPS", "ID", "IMAP",
32 "IP", "JSON", "LHS", "MFA", "POP", "QPS", "RAM", "RHS", "RPC", "SLA", "SMTP", "SQL", "SSH", "SSL", "TCP", "TLS",
33 "TTL", "UDP", "UI", "UID", "UUID", "URI", "URL", "UTF8", "VM", "XML", "XMPP", "XSRF", "XSS",
34];
35
36/// Initialisms preserved in C# PascalCase. Microsoft's framework design guidelines
37/// recommend `Json`/`Http`/`Url` rather than `JSON`/`HTTP`/`URL` (3+ letter
38/// initialisms use PascalCase, 2-letter ones use all-caps). This list intentionally
39/// excludes generic acronyms so they round-trip cleanly through heck's PascalCase
40/// (matching alef's hardcoded helper names like `{Type}ToJson`/`{Type}FromJson`),
41/// while still preserving product names like `GraphQL` that heck would mangle.
42// `Id` deliberately omitted: Microsoft's modern framework design guidelines
43// (and the de-facto convention in EF Core, ASP.NET Core, Azure SDKs) treat
44// `Id` as a word — `EntityId`, not `EntityID`. Keeping `ID` here also
45// diverges from the e2e codegen, which calls `to_upper_camel_case` directly
46// and emits `.Id` accessors; reconciling both sides to `Id` matches the
47// existing test expectations.
48const CSHARP_INITIALISMS: &[&str] = &["GraphQL", "UUID"];
49
50/// Apply initialism uppercasing to a PascalCase name using the provided list.
51///
52/// Scans word boundaries in the PascalCase string and replaces any run of
53/// characters that matches a known initialism (case-insensitively) with the
54/// canonical form from the list. For example `ImageUrl` becomes `ImageURL`,
55/// `UserId` becomes `UserID`, and `GraphQlRouteConfig` becomes `GraphQLRouteConfig`.
56fn apply_initialisms(name: &str, list: &[&str]) -> String {
57 if name.is_empty() {
58 return name.to_string();
59 }
60
61 // Split the PascalCase string into words at uppercase letter boundaries.
62 // Each "word" is a contiguous sequence starting with an uppercase letter.
63 let mut words: Vec<&str> = Vec::new();
64 let mut word_start = 0;
65 let bytes = name.as_bytes();
66 for i in 1..bytes.len() {
67 if bytes[i].is_ascii_uppercase() {
68 words.push(&name[word_start..i]);
69 word_start = i;
70 }
71 }
72 words.push(&name[word_start..]);
73
74 // For each word, check if it matches a known initialism (case-insensitive).
75 let mut result = String::with_capacity(name.len());
76 let mut i = 0;
77 while i < words.len() {
78 // Try to match the longest possible span of consecutive words to a known initialism
79 // (longest-match first). This handles multi-segment initialisms like "GraphQL" which
80 // heck splits into "Graph" + "Ql".
81 let mut matched = false;
82 for span in (1..=(words.len() - i)).rev() {
83 let candidate: String = words[i..i + span].concat();
84 let candidate_upper = candidate.to_ascii_uppercase();
85 if let Some(&canonical) = list.iter().find(|&&s| s.to_ascii_uppercase() == candidate_upper) {
86 result.push_str(canonical);
87 i += span;
88 matched = true;
89 break;
90 }
91 }
92 if !matched {
93 result.push_str(words[i]);
94 i += 1;
95 }
96 }
97 result
98}
99
100/// Apply Go initialism uppercasing to a PascalCase name.
101///
102/// Scans word boundaries in the PascalCase string and replaces any run of
103/// characters that matches a known initialism (case-insensitively) with the
104/// all-caps form. For example `ImageUrl` becomes `ImageURL` and `UserId`
105/// becomes `UserID`.
106fn apply_go_acronyms(name: &str) -> String {
107 apply_initialisms(name, INITIALISMS)
108}
109
110/// Convert a Rust snake_case name to Go PascalCase convention with acronym uppercasing.
111pub fn to_go_name(name: &str) -> String {
112 apply_go_acronyms(&name.to_pascal_case())
113}
114
115/// Apply Go acronym uppercasing to a name that is already in PascalCase (e.g. an IR type name).
116///
117/// IR type names come directly from Rust PascalCase (e.g. `ImageUrl`, `JsonSchemaFormat`).
118/// This function uppercases known acronym segments so they conform to Go naming conventions
119/// (e.g. `ImageUrl` → `ImageURL`, `JsonSchemaFormat` → `JSONSchemaFormat`).
120pub fn go_type_name(name: &str) -> String {
121 apply_go_acronyms(name)
122}
123
124/// Convert a Rust snake_case parameter/variable name to Go lowerCamelCase with acronym uppercasing.
125///
126/// Go naming conventions require that acronyms in identifiers be fully uppercased.
127/// `to_lower_camel_case` alone converts `base_url` → `baseUrl`, but Go wants `baseURL`.
128/// This function converts via PascalCase (which applies acronym uppercasing) then lowercases
129/// the first "word" (the initial run of uppercase letters treated as a unit) while preserving
130/// the case of subsequent words/acronyms:
131/// - `base_url` → `BaseURL` → `baseURL`
132/// - `api_key` → `APIKey` → `apiKey`
133/// - `user_id` → `UserID` → `userID`
134/// - `json` → `JSON` → `json`
135pub fn go_param_name(name: &str) -> String {
136 let pascal = apply_go_acronyms(&name.to_pascal_case());
137 if pascal.is_empty() {
138 return pascal;
139 }
140 let bytes = pascal.as_bytes();
141 // Find the boundary of the first "word":
142 // - If the string begins with a multi-char uppercase run followed by a lowercase letter,
143 // the run minus its last char is an acronym prefix (e.g. "APIKey": run="API", next='K')
144 // → lowercase "AP" and keep "IKey" → "apIKey" ... but Go actually wants "apiKey".
145 // The real rule: lowercase the whole leading uppercase run regardless, because the
146 // acronym-prefix IS the first word.
147 // - If the string begins with a single uppercase char (e.g. "BaseURL"), lowercase just it.
148 //
149 // Concretely: find how many leading bytes are uppercase. If that whole run is followed by
150 // end-of-string, lowercase everything. If followed by more chars, lowercase the entire run.
151 // For "APIKey": upper_len=3, next='K'(uppercase) but that starts the second word.
152 // Actually: scan for the first lowercase char to find where the first word ends.
153 let first_lower = bytes.iter().position(|b| b.is_ascii_lowercase());
154 match first_lower {
155 None => {
156 // Entire string is uppercase (single acronym like "JSON", "URL") — all lowercase.
157 pascal.to_lowercase()
158 }
159 Some(0) => {
160 // Starts with lowercase (already correct)
161 pascal
162 }
163 Some(pos) => {
164 // pos is the index of the first lowercase char.
165 // The first "word" ends just before pos-1 (the char at pos-1 is the first char of
166 // the next PascalCase word that isnds with a lowercase continuation).
167 // For "BaseURL": pos=1 ('a'), so uppercase run = ['B'], lowercase just index 0.
168 // For "APIKey": pos=4 ('e' in "Key"), uppercase run = "APIK", next lower = 'e',
169 // so word boundary is at pos-1=3 ('K' is start of "Key").
170 // → lowercase "API" (indices 0..2), keep "Key" → "apiKey" ✓
171 // For "UserID": pos=1 ('s'), uppercase run starts at 'U', lowercase just 'U' → "userID"... wait
172 // "UserID": 'U'(upper),'s'(lower) → pos=1, word="U", lower "U" → "u"+"serID" = "userID" ✓
173 let word_end = if pos > 1 { pos - 1 } else { 1 };
174 let lower_prefix = pascal[..word_end].to_lowercase();
175 format!("{}{}", lower_prefix, &pascal[word_end..])
176 }
177 }
178}
179
180/// Convert a Rust snake_case name to Java lowerCamelCase convention.
181pub fn to_java_name(name: &str) -> String {
182 name.to_lower_camel_case()
183}
184
185/// Convert a Rust snake_case name to C# PascalCase convention with initialism uppercasing.
186///
187/// Converts snake_case to PascalCase via `heck` and then restores C#-preserved initialisms.
188/// The C# list is intentionally narrow (Microsoft's framework design guidelines prefer
189/// `Json`/`Http`/`Url` over `JSON`/`HTTP`/`URL`), so only product names like `GraphQL`
190/// and short 2-letter abbreviations get all-caps. This keeps method names like
191/// `to_json` → `ToJson` in lockstep with alef's hardcoded `{Type}ToJson` /
192/// `{Type}FromJson` helper declarations.
193pub fn to_csharp_name(name: &str) -> String {
194 apply_initialisms(&name.to_pascal_case(), CSHARP_INITIALISMS)
195}
196
197/// Normalize 3+ letter acronyms at the start of a name to PascalCase.
198///
199/// C# convention: 3+ letter acronyms use PascalCase (Uri, Xml, Json) not all-caps (URI, XML, JSON).
200/// This function detects names like "URI", "XML", "JSON" and converts them to "Uri", "Xml", "Json".
201/// Leaves already-correct names like "Uri" unchanged, and preserves non-acronym names.
202///
203/// Examples:
204/// - `URI` → `Uri` (acronym → PascalCase)
205/// - `Uri` → `Uri` (already correct)
206/// - `XML` → `Xml`
207/// - `Xml` → `Xml`
208/// - `JSON` → `Json`
209/// - `Json` → `Json`
210/// - `HttpStatus` → `HttpStatus` (not an acronym)
211fn normalize_acronym_to_pascalcase(name: &str) -> String {
212 if name.is_empty() {
213 return name.to_string();
214 }
215
216 // Check if the name is all uppercase and 3+ letters (an acronym like "URI", "XML", "JSON")
217 if name.len() >= 3 && name.chars().all(|c| c.is_ascii_uppercase()) {
218 // Convert "URI" → "Uri", "XML" → "Xml", "JSON" → "Json"
219 let mut result = String::with_capacity(name.len());
220 result.push(name.chars().next().unwrap().to_ascii_uppercase());
221 result.extend(name.chars().skip(1).map(|c| c.to_ascii_lowercase()));
222 return result;
223 }
224
225 // Not an all-caps acronym — return as-is
226 name.to_string()
227}
228
229/// Apply C# initialism handling to a name that is already in PascalCase (e.g. an IR type name).
230///
231/// IR type names come directly from Rust PascalCase (e.g. `GraphQLRouteConfig`, `HttpStatus`).
232/// When such names have been processed by `heck::ToPascalCase` they may lose initialism
233/// capitalisation for the names we explicitly preserve (e.g. `GraphQLRouteConfig` →
234/// `GraphQlRouteConfig`). This function restores them.
235///
236/// Examples:
237/// - `GraphQlRouteConfig` → `GraphQLRouteConfig`
238/// - `GraphQLRouteConfig` → `GraphQLRouteConfig` (idempotent)
239/// - `HttpStatus` → `HttpStatus` (left alone — `Http` not in `CSHARP_INITIALISMS`)
240pub fn csharp_type_name(name: &str) -> String {
241 // First normalize 3+ letter acronyms to PascalCase (URI → Uri, XML → Xml, JSON → Json)
242 let normalized = normalize_acronym_to_pascalcase(name);
243 // Then apply the preserved initialism rules (GraphQL, ID, UUID)
244 apply_initialisms(&normalized, CSHARP_INITIALISMS)
245}
246
247/// Convert a Rust name to a C-style prefixed snake_case identifier (e.g. `prefix_name`).
248pub fn to_c_name(prefix: &str, name: &str) -> String {
249 format!("{}_{}", prefix, name.to_snake_case())
250}
251
252/// Convert a Rust type name to class name convention for target language.
253pub fn to_class_name(name: &str) -> String {
254 name.to_pascal_case()
255}
256
257/// Convert to SCREAMING_SNAKE for constants.
258pub fn to_constant_name(name: &str) -> String {
259 name.to_shouty_snake_case()
260}
261
262/// Convert a PascalCase or mixed-case name to snake_case with correct acronym handling.
263///
264/// Use this instead of `heck::ToSnakeCase` when the input is a PascalCase Rust type or
265/// enum variant name — `heck` inserts an underscore before every uppercase letter, which
266/// incorrectly splits acronym-style names like `Rdfa` into `rd_fa`.
267///
268/// Rules:
269/// - A run of consecutive uppercase letters is treated as a single acronym word.
270/// - If the run is followed by a lowercase letter, the last uppercase char begins the
271/// next word (e.g. `XMLHttp` → `xml_http`).
272/// - A single uppercase letter followed by lowercase is a normal word start.
273///
274/// Examples:
275/// - `MyType` → `my_type`
276/// - `Rdfa` → `rdfa`
277/// - `HTMLParser` → `html_parser`
278/// - `XMLHttpRequest` → `xml_http_request`
279/// - `IOError` → `io_error`
280/// - `URLPath` → `url_path`
281/// - `JSONLD` → `jsonld`
282pub fn pascal_to_snake(name: &str) -> String {
283 if name.is_empty() {
284 return String::new();
285 }
286 let chars: Vec<char> = name.chars().collect();
287 let n = chars.len();
288 let mut out = String::with_capacity(n + 4);
289 let mut i = 0;
290 while i < n {
291 let ch = chars[i];
292 if ch.is_ascii_uppercase() {
293 let run_start = i;
294 while i < n && chars[i].is_ascii_uppercase() {
295 i += 1;
296 }
297 let run_end = i;
298 let run_len = run_end - run_start;
299 if run_len == 1 {
300 if !out.is_empty() {
301 out.push('_');
302 }
303 out.extend(chars[run_start].to_lowercase());
304 } else {
305 let split = if i < n && chars[i].is_ascii_lowercase() {
306 run_len - 1
307 } else {
308 run_len
309 };
310 if !out.is_empty() {
311 out.push('_');
312 }
313 for &c in chars.iter().skip(run_start).take(split) {
314 out.extend(c.to_lowercase());
315 }
316 if split < run_len {
317 out.push('_');
318 out.extend(chars[run_start + split].to_lowercase());
319 }
320 }
321 } else {
322 out.push(ch);
323 i += 1;
324 }
325 }
326 out
327}
328
329/// Convert a PascalCase name to SCREAMING_SNAKE_CASE with correct acronym handling.
330///
331/// Examples:
332/// - `MyType` → `MY_TYPE`
333/// - `Rdfa` → `RDFA`
334/// - `HTMLParser` → `HTML_PARSER`
335pub fn pascal_to_screaming_snake(name: &str) -> String {
336 pascal_to_snake(name).to_ascii_uppercase()
337}
338
339#[cfg(test)]
340mod tests {
341 use super::*;
342
343 // --- to_go_name (snake_case → Go PascalCase with initialism uppercasing) ---
344
345 #[test]
346 fn test_to_go_name_html_initialism() {
347 assert_eq!(to_go_name("html"), "HTML");
348 }
349
350 #[test]
351 fn test_to_go_name_url_initialism() {
352 assert_eq!(to_go_name("url"), "URL");
353 }
354
355 #[test]
356 fn test_to_go_name_id_initialism() {
357 assert_eq!(to_go_name("id"), "ID");
358 }
359
360 #[test]
361 fn test_to_go_name_plain_word() {
362 assert_eq!(to_go_name("links"), "Links");
363 }
364
365 #[test]
366 fn test_to_go_name_user_id() {
367 assert_eq!(to_go_name("user_id"), "UserID");
368 }
369
370 #[test]
371 fn test_to_go_name_request_url() {
372 assert_eq!(to_go_name("request_url"), "RequestURL");
373 }
374
375 // --- Additional cases ---
376
377 #[test]
378 fn test_to_go_name_http_status() {
379 assert_eq!(to_go_name("http_status"), "HTTPStatus");
380 }
381
382 #[test]
383 fn test_to_go_name_json_body() {
384 assert_eq!(to_go_name("json_body"), "JSONBody");
385 }
386
387 // --- go_param_name (snake_case → Go lowerCamelCase with initialism uppercasing) ---
388
389 #[test]
390 fn test_go_param_name_base_url() {
391 assert_eq!(go_param_name("base_url"), "baseURL");
392 }
393
394 #[test]
395 fn test_go_param_name_user_id() {
396 assert_eq!(go_param_name("user_id"), "userID");
397 }
398
399 #[test]
400 fn test_go_param_name_api_key() {
401 assert_eq!(go_param_name("api_key"), "apiKey");
402 }
403
404 #[test]
405 fn test_go_param_name_plain() {
406 assert_eq!(go_param_name("json"), "json");
407 }
408
409 // --- pascal_to_snake ---
410
411 #[test]
412 fn pascal_to_snake_normal_case() {
413 assert_eq!(pascal_to_snake("MyType"), "my_type");
414 }
415
416 #[test]
417 fn pascal_to_snake_rdfa() {
418 assert_eq!(pascal_to_snake("Rdfa"), "rdfa");
419 }
420
421 #[test]
422 fn pascal_to_snake_html_parser() {
423 assert_eq!(pascal_to_snake("HTMLParser"), "html_parser");
424 }
425
426 #[test]
427 fn pascal_to_snake_xml_http_request() {
428 assert_eq!(pascal_to_snake("XMLHttpRequest"), "xml_http_request");
429 }
430
431 #[test]
432 fn pascal_to_snake_io_error() {
433 assert_eq!(pascal_to_snake("IOError"), "io_error");
434 }
435
436 #[test]
437 fn pascal_to_snake_url_path() {
438 assert_eq!(pascal_to_snake("URLPath"), "url_path");
439 }
440
441 #[test]
442 fn pascal_to_snake_jsonld_all_caps() {
443 assert_eq!(pascal_to_snake("JSONLD"), "jsonld");
444 }
445
446 #[test]
447 fn pascal_to_snake_camel_case() {
448 assert_eq!(pascal_to_snake("myField"), "my_field");
449 }
450
451 #[test]
452 fn pascal_to_snake_already_snake() {
453 assert_eq!(pascal_to_snake("already_snake"), "already_snake");
454 }
455
456 #[test]
457 fn pascal_to_snake_empty() {
458 assert_eq!(pascal_to_snake(""), "");
459 }
460
461 // --- pascal_to_screaming_snake ---
462
463 #[test]
464 fn pascal_to_screaming_snake_rdfa() {
465 assert_eq!(pascal_to_screaming_snake("Rdfa"), "RDFA");
466 }
467
468 #[test]
469 fn pascal_to_screaming_snake_html_parser() {
470 assert_eq!(pascal_to_screaming_snake("HTMLParser"), "HTML_PARSER");
471 }
472
473 #[test]
474 fn pascal_to_screaming_snake_my_type() {
475 assert_eq!(pascal_to_screaming_snake("MyType"), "MY_TYPE");
476 }
477
478 // --- to_csharp_name (snake_case → C# PascalCase with initialism uppercasing) ---
479
480 #[test]
481 fn test_to_csharp_name_graphql_route_config() {
482 assert_eq!(to_csharp_name("graphql_route_config"), "GraphQLRouteConfig");
483 }
484
485 #[test]
486 fn test_to_csharp_name_http_status_no_acronym() {
487 // C# follows Microsoft style — 3+ letter initialisms use PascalCase ("Http"),
488 // not all-caps ("HTTP"). Only product names like GraphQL get all-caps.
489 assert_eq!(to_csharp_name("http_status"), "HttpStatus");
490 }
491
492 #[test]
493 fn test_to_csharp_name_to_json_no_acronym() {
494 // Keeps `to_json` → `ToJson` so it matches alef's hardcoded helper names
495 // (`{Type}ToJson`, `{Type}FromJson`) on the FFI declaration side.
496 assert_eq!(to_csharp_name("to_json"), "ToJson");
497 }
498
499 #[test]
500 fn test_to_csharp_name_plain() {
501 assert_eq!(to_csharp_name("my_field"), "MyField");
502 }
503
504 // --- csharp_type_name (PascalCase → C# PascalCase with initialism uppercasing) ---
505
506 #[test]
507 fn test_csharp_type_name_heck_corrupted() {
508 // heck produces "GraphQlRouteConfig" from "GraphQLRouteConfig" — we must restore it
509 assert_eq!(csharp_type_name("GraphQlRouteConfig"), "GraphQLRouteConfig");
510 }
511
512 #[test]
513 fn test_csharp_type_name_already_correct() {
514 // Input that already has the correct form is preserved idempotently
515 assert_eq!(csharp_type_name("GraphQLRouteConfig"), "GraphQLRouteConfig");
516 }
517
518 #[test]
519 fn test_csharp_type_name_http_status_no_acronym() {
520 // `Http` is intentionally not in CSHARP_INITIALISMS — Microsoft style prefers `Http`.
521 assert_eq!(csharp_type_name("HttpStatus"), "HttpStatus");
522 }
523
524 #[test]
525 fn test_csharp_type_name_three_letter_acronyms() {
526 // 3+ letter acronyms should NOT be uppercased (Uri not URI, Xml not XML, Json not JSON)
527 assert_eq!(csharp_type_name("Uri"), "Uri");
528 assert_eq!(csharp_type_name("URI"), "Uri");
529 assert_eq!(csharp_type_name("Xml"), "Xml");
530 assert_eq!(csharp_type_name("XML"), "Xml");
531 assert_eq!(csharp_type_name("Json"), "Json");
532 assert_eq!(csharp_type_name("JSON"), "Json");
533 }
534}