Skip to main content

alef_codegen/
naming.rs

1use heck::{ToLowerCamelCase, ToPascalCase, ToShoutySnakeCase, ToSnakeCase};
2
3/// Convert a Rust snake_case name to the target language convention.
4pub fn to_python_name(name: &str) -> String {
5    name.to_snake_case()
6}
7
8/// Convert a Rust snake_case name to Node.js/TypeScript lowerCamelCase convention.
9pub fn to_node_name(name: &str) -> String {
10    name.to_lower_camel_case()
11}
12
13/// Convert a Rust snake_case name to Ruby snake_case convention.
14pub fn to_ruby_name(name: &str) -> String {
15    name.to_snake_case()
16}
17
18/// Convert a Rust snake_case name to PHP lowerCamelCase convention.
19pub fn to_php_name(name: &str) -> String {
20    name.to_lower_camel_case()
21}
22
23/// Convert a Rust snake_case name to Elixir snake_case convention.
24pub fn to_elixir_name(name: &str) -> String {
25    name.to_snake_case()
26}
27
28/// Well-known initialisms that must be fully uppercased per Go naming conventions.
29/// See: https://go.dev/wiki/CodeReviewComments#initialisms
30const INITIALISMS: &[&str] = &[
31    "API", "ASCII", "CPU", "CSS", "DNS", "EOF", "FTP", "GID", "GraphQL", "GUI", "HTML", "HTTP", "HTTPS", "ID", "IMAP",
32    "IP", "JSON", "LHS", "MFA", "POP", "QPS", "RAM", "RHS", "RPC", "SLA", "SMTP", "SQL", "SSH", "SSL", "TCP", "TLS",
33    "TTL", "UDP", "UI", "UID", "UUID", "URI", "URL", "UTF8", "VM", "XML", "XMPP", "XSRF", "XSS",
34];
35
36/// Initialisms preserved in C# PascalCase. Microsoft's framework design guidelines
37/// recommend `Json`/`Http`/`Url` rather than `JSON`/`HTTP`/`URL` (3+ letter
38/// initialisms use PascalCase, 2-letter ones use all-caps). This list intentionally
39/// excludes generic acronyms so they round-trip cleanly through heck's PascalCase
40/// (matching alef's hardcoded helper names like `{Type}ToJson`/`{Type}FromJson`),
41/// while still preserving product names like `GraphQL` that heck would mangle.
42const CSHARP_INITIALISMS: &[&str] = &["GraphQL", "ID", "UUID", "URI"];
43
44/// Apply initialism uppercasing to a PascalCase name using the provided list.
45///
46/// Scans word boundaries in the PascalCase string and replaces any run of
47/// characters that matches a known initialism (case-insensitively) with the
48/// canonical form from the list. For example `ImageUrl` becomes `ImageURL`,
49/// `UserId` becomes `UserID`, and `GraphQlRouteConfig` becomes `GraphQLRouteConfig`.
50fn apply_initialisms(name: &str, list: &[&str]) -> String {
51    if name.is_empty() {
52        return name.to_string();
53    }
54
55    // Split the PascalCase string into words at uppercase letter boundaries.
56    // Each "word" is a contiguous sequence starting with an uppercase letter.
57    let mut words: Vec<&str> = Vec::new();
58    let mut word_start = 0;
59    let bytes = name.as_bytes();
60    for i in 1..bytes.len() {
61        if bytes[i].is_ascii_uppercase() {
62            words.push(&name[word_start..i]);
63            word_start = i;
64        }
65    }
66    words.push(&name[word_start..]);
67
68    // For each word, check if it matches a known initialism (case-insensitive).
69    let mut result = String::with_capacity(name.len());
70    let mut i = 0;
71    while i < words.len() {
72        // Try to match the longest possible span of consecutive words to a known initialism
73        // (longest-match first). This handles multi-segment initialisms like "GraphQL" which
74        // heck splits into "Graph" + "Ql".
75        let mut matched = false;
76        for span in (1..=(words.len() - i)).rev() {
77            let candidate: String = words[i..i + span].concat();
78            let candidate_upper = candidate.to_ascii_uppercase();
79            if let Some(&canonical) = list.iter().find(|&&s| s.to_ascii_uppercase() == candidate_upper) {
80                result.push_str(canonical);
81                i += span;
82                matched = true;
83                break;
84            }
85        }
86        if !matched {
87            result.push_str(words[i]);
88            i += 1;
89        }
90    }
91    result
92}
93
94/// Apply Go initialism uppercasing to a PascalCase name.
95///
96/// Scans word boundaries in the PascalCase string and replaces any run of
97/// characters that matches a known initialism (case-insensitively) with the
98/// all-caps form. For example `ImageUrl` becomes `ImageURL` and `UserId`
99/// becomes `UserID`.
100fn apply_go_acronyms(name: &str) -> String {
101    apply_initialisms(name, INITIALISMS)
102}
103
104/// Convert a Rust snake_case name to Go PascalCase convention with acronym uppercasing.
105pub fn to_go_name(name: &str) -> String {
106    apply_go_acronyms(&name.to_pascal_case())
107}
108
109/// Apply Go acronym uppercasing to a name that is already in PascalCase (e.g. an IR type name).
110///
111/// IR type names come directly from Rust PascalCase (e.g. `ImageUrl`, `JsonSchemaFormat`).
112/// This function uppercases known acronym segments so they conform to Go naming conventions
113/// (e.g. `ImageUrl` → `ImageURL`, `JsonSchemaFormat` → `JSONSchemaFormat`).
114pub fn go_type_name(name: &str) -> String {
115    apply_go_acronyms(name)
116}
117
118/// Convert a Rust snake_case parameter/variable name to Go lowerCamelCase with acronym uppercasing.
119///
120/// Go naming conventions require that acronyms in identifiers be fully uppercased.
121/// `to_lower_camel_case` alone converts `base_url` → `baseUrl`, but Go wants `baseURL`.
122/// This function converts via PascalCase (which applies acronym uppercasing) then lowercases
123/// the first "word" (the initial run of uppercase letters treated as a unit) while preserving
124/// the case of subsequent words/acronyms:
125/// - `base_url`  → `BaseURL`  → `baseURL`
126/// - `api_key`   → `APIKey`   → `apiKey`
127/// - `user_id`   → `UserID`   → `userID`
128/// - `json`      → `JSON`     → `json`
129pub fn go_param_name(name: &str) -> String {
130    let pascal = apply_go_acronyms(&name.to_pascal_case());
131    if pascal.is_empty() {
132        return pascal;
133    }
134    let bytes = pascal.as_bytes();
135    // Find the boundary of the first "word":
136    // - If the string begins with a multi-char uppercase run followed by a lowercase letter,
137    //   the run minus its last char is an acronym prefix (e.g. "APIKey": run="API", next='K')
138    //   → lowercase "AP" and keep "IKey" → "apIKey" ... but Go actually wants "apiKey".
139    //   The real rule: lowercase the whole leading uppercase run regardless, because the
140    //   acronym-prefix IS the first word.
141    // - If the string begins with a single uppercase char (e.g. "BaseURL"), lowercase just it.
142    //
143    // Concretely: find how many leading bytes are uppercase. If that whole run is followed by
144    // end-of-string, lowercase everything. If followed by more chars, lowercase the entire run.
145    // For "APIKey": upper_len=3, next='K'(uppercase) but that starts the second word.
146    // Actually: scan for the first lowercase char to find where the first word ends.
147    let first_lower = bytes.iter().position(|b| b.is_ascii_lowercase());
148    match first_lower {
149        None => {
150            // Entire string is uppercase (single acronym like "JSON", "URL") — all lowercase.
151            pascal.to_lowercase()
152        }
153        Some(0) => {
154            // Starts with lowercase (already correct)
155            pascal
156        }
157        Some(pos) => {
158            // pos is the index of the first lowercase char.
159            // The first "word" ends just before pos-1 (the char at pos-1 is the first char of
160            // the next PascalCase word that isnds with a lowercase continuation).
161            // For "BaseURL": pos=1 ('a'), so uppercase run = ['B'], lowercase just index 0.
162            // For "APIKey":  pos=4 ('e' in "Key"), uppercase run = "APIK", next lower = 'e',
163            //   so word boundary is at pos-1=3 ('K' is start of "Key").
164            //   → lowercase "API" (indices 0..2), keep "Key" → "apiKey" ✓
165            // For "UserID":  pos=1 ('s'), uppercase run starts at 'U', lowercase just 'U' → "userID"... wait
166            //   "UserID": 'U'(upper),'s'(lower) → pos=1, word="U", lower "U" → "u"+"serID" = "userID" ✓
167            let word_end = if pos > 1 { pos - 1 } else { 1 };
168            let lower_prefix = pascal[..word_end].to_lowercase();
169            format!("{}{}", lower_prefix, &pascal[word_end..])
170        }
171    }
172}
173
174/// Convert a Rust snake_case name to Java lowerCamelCase convention.
175pub fn to_java_name(name: &str) -> String {
176    name.to_lower_camel_case()
177}
178
179/// Convert a Rust snake_case name to C# PascalCase convention with initialism uppercasing.
180///
181/// Converts snake_case to PascalCase via `heck` and then restores C#-preserved initialisms.
182/// The C# list is intentionally narrow (Microsoft's framework design guidelines prefer
183/// `Json`/`Http`/`Url` over `JSON`/`HTTP`/`URL`), so only product names like `GraphQL`
184/// and short 2-letter abbreviations get all-caps. This keeps method names like
185/// `to_json` → `ToJson` in lockstep with alef's hardcoded `{Type}ToJson` /
186/// `{Type}FromJson` helper declarations.
187pub fn to_csharp_name(name: &str) -> String {
188    apply_initialisms(&name.to_pascal_case(), CSHARP_INITIALISMS)
189}
190
191/// Apply C# initialism handling to a name that is already in PascalCase (e.g. an IR type name).
192///
193/// IR type names come directly from Rust PascalCase (e.g. `GraphQLRouteConfig`, `HttpStatus`).
194/// When such names have been processed by `heck::ToPascalCase` they may lose initialism
195/// capitalisation for the names we explicitly preserve (e.g. `GraphQLRouteConfig` →
196/// `GraphQlRouteConfig`). This function restores them.
197///
198/// Examples:
199/// - `GraphQlRouteConfig`   → `GraphQLRouteConfig`
200/// - `GraphQLRouteConfig`   → `GraphQLRouteConfig`  (idempotent)
201/// - `HttpStatus`           → `HttpStatus`          (left alone — `Http` not in `CSHARP_INITIALISMS`)
202pub fn csharp_type_name(name: &str) -> String {
203    apply_initialisms(name, CSHARP_INITIALISMS)
204}
205
206/// Convert a Rust name to a C-style prefixed snake_case identifier (e.g. `prefix_name`).
207pub fn to_c_name(prefix: &str, name: &str) -> String {
208    format!("{}_{}", prefix, name.to_snake_case())
209}
210
211/// Convert a Rust type name to class name convention for target language.
212pub fn to_class_name(name: &str) -> String {
213    name.to_pascal_case()
214}
215
216/// Convert to SCREAMING_SNAKE for constants.
217pub fn to_constant_name(name: &str) -> String {
218    name.to_shouty_snake_case()
219}
220
221/// Convert a PascalCase or mixed-case name to snake_case with correct acronym handling.
222///
223/// Use this instead of `heck::ToSnakeCase` when the input is a PascalCase Rust type or
224/// enum variant name — `heck` inserts an underscore before every uppercase letter, which
225/// incorrectly splits acronym-style names like `Rdfa` into `rd_fa`.
226///
227/// Rules:
228/// - A run of consecutive uppercase letters is treated as a single acronym word.
229/// - If the run is followed by a lowercase letter, the last uppercase char begins the
230///   next word (e.g. `XMLHttp` → `xml_http`).
231/// - A single uppercase letter followed by lowercase is a normal word start.
232///
233/// Examples:
234/// - `MyType`         → `my_type`
235/// - `Rdfa`           → `rdfa`
236/// - `HTMLParser`     → `html_parser`
237/// - `XMLHttpRequest` → `xml_http_request`
238/// - `IOError`        → `io_error`
239/// - `URLPath`        → `url_path`
240/// - `JSONLD`         → `jsonld`
241pub fn pascal_to_snake(name: &str) -> String {
242    if name.is_empty() {
243        return String::new();
244    }
245    let chars: Vec<char> = name.chars().collect();
246    let n = chars.len();
247    let mut out = String::with_capacity(n + 4);
248    let mut i = 0;
249    while i < n {
250        let ch = chars[i];
251        if ch.is_ascii_uppercase() {
252            let run_start = i;
253            while i < n && chars[i].is_ascii_uppercase() {
254                i += 1;
255            }
256            let run_end = i;
257            let run_len = run_end - run_start;
258            if run_len == 1 {
259                if !out.is_empty() {
260                    out.push('_');
261                }
262                out.extend(chars[run_start].to_lowercase());
263            } else {
264                let split = if i < n && chars[i].is_ascii_lowercase() {
265                    run_len - 1
266                } else {
267                    run_len
268                };
269                if !out.is_empty() {
270                    out.push('_');
271                }
272                for &c in chars.iter().skip(run_start).take(split) {
273                    out.extend(c.to_lowercase());
274                }
275                if split < run_len {
276                    out.push('_');
277                    out.extend(chars[run_start + split].to_lowercase());
278                }
279            }
280        } else {
281            out.push(ch);
282            i += 1;
283        }
284    }
285    out
286}
287
288/// Convert a PascalCase name to SCREAMING_SNAKE_CASE with correct acronym handling.
289///
290/// Examples:
291/// - `MyType`     → `MY_TYPE`
292/// - `Rdfa`       → `RDFA`
293/// - `HTMLParser` → `HTML_PARSER`
294pub fn pascal_to_screaming_snake(name: &str) -> String {
295    pascal_to_snake(name).to_ascii_uppercase()
296}
297
298#[cfg(test)]
299mod tests {
300    use super::*;
301
302    // --- to_go_name (snake_case → Go PascalCase with initialism uppercasing) ---
303
304    #[test]
305    fn test_to_go_name_html_initialism() {
306        assert_eq!(to_go_name("html"), "HTML");
307    }
308
309    #[test]
310    fn test_to_go_name_url_initialism() {
311        assert_eq!(to_go_name("url"), "URL");
312    }
313
314    #[test]
315    fn test_to_go_name_id_initialism() {
316        assert_eq!(to_go_name("id"), "ID");
317    }
318
319    #[test]
320    fn test_to_go_name_plain_word() {
321        assert_eq!(to_go_name("links"), "Links");
322    }
323
324    #[test]
325    fn test_to_go_name_user_id() {
326        assert_eq!(to_go_name("user_id"), "UserID");
327    }
328
329    #[test]
330    fn test_to_go_name_request_url() {
331        assert_eq!(to_go_name("request_url"), "RequestURL");
332    }
333
334    // --- Additional cases ---
335
336    #[test]
337    fn test_to_go_name_http_status() {
338        assert_eq!(to_go_name("http_status"), "HTTPStatus");
339    }
340
341    #[test]
342    fn test_to_go_name_json_body() {
343        assert_eq!(to_go_name("json_body"), "JSONBody");
344    }
345
346    // --- go_param_name (snake_case → Go lowerCamelCase with initialism uppercasing) ---
347
348    #[test]
349    fn test_go_param_name_base_url() {
350        assert_eq!(go_param_name("base_url"), "baseURL");
351    }
352
353    #[test]
354    fn test_go_param_name_user_id() {
355        assert_eq!(go_param_name("user_id"), "userID");
356    }
357
358    #[test]
359    fn test_go_param_name_api_key() {
360        assert_eq!(go_param_name("api_key"), "apiKey");
361    }
362
363    #[test]
364    fn test_go_param_name_plain() {
365        assert_eq!(go_param_name("json"), "json");
366    }
367
368    // --- pascal_to_snake ---
369
370    #[test]
371    fn pascal_to_snake_normal_case() {
372        assert_eq!(pascal_to_snake("MyType"), "my_type");
373    }
374
375    #[test]
376    fn pascal_to_snake_rdfa() {
377        assert_eq!(pascal_to_snake("Rdfa"), "rdfa");
378    }
379
380    #[test]
381    fn pascal_to_snake_html_parser() {
382        assert_eq!(pascal_to_snake("HTMLParser"), "html_parser");
383    }
384
385    #[test]
386    fn pascal_to_snake_xml_http_request() {
387        assert_eq!(pascal_to_snake("XMLHttpRequest"), "xml_http_request");
388    }
389
390    #[test]
391    fn pascal_to_snake_io_error() {
392        assert_eq!(pascal_to_snake("IOError"), "io_error");
393    }
394
395    #[test]
396    fn pascal_to_snake_url_path() {
397        assert_eq!(pascal_to_snake("URLPath"), "url_path");
398    }
399
400    #[test]
401    fn pascal_to_snake_jsonld_all_caps() {
402        assert_eq!(pascal_to_snake("JSONLD"), "jsonld");
403    }
404
405    #[test]
406    fn pascal_to_snake_camel_case() {
407        assert_eq!(pascal_to_snake("myField"), "my_field");
408    }
409
410    #[test]
411    fn pascal_to_snake_already_snake() {
412        assert_eq!(pascal_to_snake("already_snake"), "already_snake");
413    }
414
415    #[test]
416    fn pascal_to_snake_empty() {
417        assert_eq!(pascal_to_snake(""), "");
418    }
419
420    // --- pascal_to_screaming_snake ---
421
422    #[test]
423    fn pascal_to_screaming_snake_rdfa() {
424        assert_eq!(pascal_to_screaming_snake("Rdfa"), "RDFA");
425    }
426
427    #[test]
428    fn pascal_to_screaming_snake_html_parser() {
429        assert_eq!(pascal_to_screaming_snake("HTMLParser"), "HTML_PARSER");
430    }
431
432    #[test]
433    fn pascal_to_screaming_snake_my_type() {
434        assert_eq!(pascal_to_screaming_snake("MyType"), "MY_TYPE");
435    }
436
437    // --- to_csharp_name (snake_case → C# PascalCase with initialism uppercasing) ---
438
439    #[test]
440    fn test_to_csharp_name_graphql_route_config() {
441        assert_eq!(to_csharp_name("graphql_route_config"), "GraphQLRouteConfig");
442    }
443
444    #[test]
445    fn test_to_csharp_name_http_status_no_acronym() {
446        // C# follows Microsoft style — 3+ letter initialisms use PascalCase ("Http"),
447        // not all-caps ("HTTP"). Only product names like GraphQL get all-caps.
448        assert_eq!(to_csharp_name("http_status"), "HttpStatus");
449    }
450
451    #[test]
452    fn test_to_csharp_name_to_json_no_acronym() {
453        // Keeps `to_json` → `ToJson` so it matches alef's hardcoded helper names
454        // (`{Type}ToJson`, `{Type}FromJson`) on the FFI declaration side.
455        assert_eq!(to_csharp_name("to_json"), "ToJson");
456    }
457
458    #[test]
459    fn test_to_csharp_name_plain() {
460        assert_eq!(to_csharp_name("my_field"), "MyField");
461    }
462
463    // --- csharp_type_name (PascalCase → C# PascalCase with initialism uppercasing) ---
464
465    #[test]
466    fn test_csharp_type_name_heck_corrupted() {
467        // heck produces "GraphQlRouteConfig" from "GraphQLRouteConfig" — we must restore it
468        assert_eq!(csharp_type_name("GraphQlRouteConfig"), "GraphQLRouteConfig");
469    }
470
471    #[test]
472    fn test_csharp_type_name_already_correct() {
473        // Input that already has the correct form is preserved idempotently
474        assert_eq!(csharp_type_name("GraphQLRouteConfig"), "GraphQLRouteConfig");
475    }
476
477    #[test]
478    fn test_csharp_type_name_http_status_no_acronym() {
479        // `Http` is intentionally not in CSHARP_INITIALISMS — Microsoft style prefers `Http`.
480        assert_eq!(csharp_type_name("HttpStatus"), "HttpStatus");
481    }
482}