Skip to main content

alef_codegen/
naming.rs

1use heck::{ToLowerCamelCase, ToPascalCase, ToShoutySnakeCase, ToSnakeCase};
2
3/// Convert a Rust snake_case name to the target language convention.
4pub fn to_python_name(name: &str) -> String {
5    name.to_snake_case()
6}
7
8/// Convert a Rust snake_case name to Node.js/TypeScript lowerCamelCase convention.
9pub fn to_node_name(name: &str) -> String {
10    name.to_lower_camel_case()
11}
12
13/// Convert a Rust snake_case name to Ruby snake_case convention.
14pub fn to_ruby_name(name: &str) -> String {
15    name.to_snake_case()
16}
17
18/// Convert a Rust snake_case name to PHP lowerCamelCase convention.
19pub fn to_php_name(name: &str) -> String {
20    name.to_lower_camel_case()
21}
22
23/// Convert a Rust snake_case name to Elixir snake_case convention.
24pub fn to_elixir_name(name: &str) -> String {
25    name.to_snake_case()
26}
27
28/// Well-known initialisms that must be fully uppercased per Go and C# naming conventions.
29/// See: https://go.dev/wiki/CodeReviewComments#initialisms
30const INITIALISMS: &[&str] = &[
31    "API", "ASCII", "CPU", "CSS", "DNS", "EOF", "FTP", "GID", "GraphQL", "GUI", "HTML", "HTTP", "HTTPS", "ID", "IMAP",
32    "IP", "JSON", "LHS", "MFA", "POP", "QPS", "RAM", "RHS", "RPC", "SLA", "SMTP", "SQL", "SSH", "SSL", "TCP", "TLS",
33    "TTL", "UDP", "UI", "UID", "UUID", "URI", "URL", "UTF8", "VM", "XML", "XMPP", "XSRF", "XSS",
34];
35
36/// Apply initialism uppercasing to a PascalCase name using the provided list.
37///
38/// Scans word boundaries in the PascalCase string and replaces any run of
39/// characters that matches a known initialism (case-insensitively) with the
40/// canonical form from the list. For example `ImageUrl` becomes `ImageURL`,
41/// `UserId` becomes `UserID`, and `GraphQlRouteConfig` becomes `GraphQLRouteConfig`.
42fn apply_initialisms(name: &str, list: &[&str]) -> String {
43    if name.is_empty() {
44        return name.to_string();
45    }
46
47    // Split the PascalCase string into words at uppercase letter boundaries.
48    // Each "word" is a contiguous sequence starting with an uppercase letter.
49    let mut words: Vec<&str> = Vec::new();
50    let mut word_start = 0;
51    let bytes = name.as_bytes();
52    for i in 1..bytes.len() {
53        if bytes[i].is_ascii_uppercase() {
54            words.push(&name[word_start..i]);
55            word_start = i;
56        }
57    }
58    words.push(&name[word_start..]);
59
60    // For each word, check if it matches a known initialism (case-insensitive).
61    let mut result = String::with_capacity(name.len());
62    let mut i = 0;
63    while i < words.len() {
64        // Try to match the longest possible span of consecutive words to a known initialism
65        // (longest-match first). This handles multi-segment initialisms like "GraphQL" which
66        // heck splits into "Graph" + "Ql".
67        let mut matched = false;
68        for span in (1..=(words.len() - i)).rev() {
69            let candidate: String = words[i..i + span].concat();
70            let candidate_upper = candidate.to_ascii_uppercase();
71            if let Some(&canonical) = list.iter().find(|&&s| s.to_ascii_uppercase() == candidate_upper) {
72                result.push_str(canonical);
73                i += span;
74                matched = true;
75                break;
76            }
77        }
78        if !matched {
79            result.push_str(words[i]);
80            i += 1;
81        }
82    }
83    result
84}
85
86/// Apply Go initialism uppercasing to a PascalCase name.
87///
88/// Scans word boundaries in the PascalCase string and replaces any run of
89/// characters that matches a known initialism (case-insensitively) with the
90/// all-caps form. For example `ImageUrl` becomes `ImageURL` and `UserId`
91/// becomes `UserID`.
92fn apply_go_acronyms(name: &str) -> String {
93    apply_initialisms(name, INITIALISMS)
94}
95
96/// Convert a Rust snake_case name to Go PascalCase convention with acronym uppercasing.
97pub fn to_go_name(name: &str) -> String {
98    apply_go_acronyms(&name.to_pascal_case())
99}
100
101/// Apply Go acronym uppercasing to a name that is already in PascalCase (e.g. an IR type name).
102///
103/// IR type names come directly from Rust PascalCase (e.g. `ImageUrl`, `JsonSchemaFormat`).
104/// This function uppercases known acronym segments so they conform to Go naming conventions
105/// (e.g. `ImageUrl` → `ImageURL`, `JsonSchemaFormat` → `JSONSchemaFormat`).
106pub fn go_type_name(name: &str) -> String {
107    apply_go_acronyms(name)
108}
109
110/// Convert a Rust snake_case parameter/variable name to Go lowerCamelCase with acronym uppercasing.
111///
112/// Go naming conventions require that acronyms in identifiers be fully uppercased.
113/// `to_lower_camel_case` alone converts `base_url` → `baseUrl`, but Go wants `baseURL`.
114/// This function converts via PascalCase (which applies acronym uppercasing) then lowercases
115/// the first "word" (the initial run of uppercase letters treated as a unit) while preserving
116/// the case of subsequent words/acronyms:
117/// - `base_url`  → `BaseURL`  → `baseURL`
118/// - `api_key`   → `APIKey`   → `apiKey`
119/// - `user_id`   → `UserID`   → `userID`
120/// - `json`      → `JSON`     → `json`
121pub fn go_param_name(name: &str) -> String {
122    let pascal = apply_go_acronyms(&name.to_pascal_case());
123    if pascal.is_empty() {
124        return pascal;
125    }
126    let bytes = pascal.as_bytes();
127    // Find the boundary of the first "word":
128    // - If the string begins with a multi-char uppercase run followed by a lowercase letter,
129    //   the run minus its last char is an acronym prefix (e.g. "APIKey": run="API", next='K')
130    //   → lowercase "AP" and keep "IKey" → "apIKey" ... but Go actually wants "apiKey".
131    //   The real rule: lowercase the whole leading uppercase run regardless, because the
132    //   acronym-prefix IS the first word.
133    // - If the string begins with a single uppercase char (e.g. "BaseURL"), lowercase just it.
134    //
135    // Concretely: find how many leading bytes are uppercase. If that whole run is followed by
136    // end-of-string, lowercase everything. If followed by more chars, lowercase the entire run.
137    // For "APIKey": upper_len=3, next='K'(uppercase) but that starts the second word.
138    // Actually: scan for the first lowercase char to find where the first word ends.
139    let first_lower = bytes.iter().position(|b| b.is_ascii_lowercase());
140    match first_lower {
141        None => {
142            // Entire string is uppercase (single acronym like "JSON", "URL") — all lowercase.
143            pascal.to_lowercase()
144        }
145        Some(0) => {
146            // Starts with lowercase (already correct)
147            pascal
148        }
149        Some(pos) => {
150            // pos is the index of the first lowercase char.
151            // The first "word" ends just before pos-1 (the char at pos-1 is the first char of
152            // the next PascalCase word that isnds with a lowercase continuation).
153            // For "BaseURL": pos=1 ('a'), so uppercase run = ['B'], lowercase just index 0.
154            // For "APIKey":  pos=4 ('e' in "Key"), uppercase run = "APIK", next lower = 'e',
155            //   so word boundary is at pos-1=3 ('K' is start of "Key").
156            //   → lowercase "API" (indices 0..2), keep "Key" → "apiKey" ✓
157            // For "UserID":  pos=1 ('s'), uppercase run starts at 'U', lowercase just 'U' → "userID"... wait
158            //   "UserID": 'U'(upper),'s'(lower) → pos=1, word="U", lower "U" → "u"+"serID" = "userID" ✓
159            let word_end = if pos > 1 { pos - 1 } else { 1 };
160            let lower_prefix = pascal[..word_end].to_lowercase();
161            format!("{}{}", lower_prefix, &pascal[word_end..])
162        }
163    }
164}
165
166/// Convert a Rust snake_case name to Java lowerCamelCase convention.
167pub fn to_java_name(name: &str) -> String {
168    name.to_lower_camel_case()
169}
170
171/// Convert a Rust snake_case name to C# PascalCase convention with initialism uppercasing.
172///
173/// Converts snake_case to PascalCase via `heck` and then restores known initialisms so that
174/// e.g. `graphql_route_config` → `GraphQLRouteConfig` (not `GraphqlRouteConfig`) and
175/// `http_status` → `HTTPStatus` (not `HttpStatus`).
176pub fn to_csharp_name(name: &str) -> String {
177    apply_initialisms(&name.to_pascal_case(), INITIALISMS)
178}
179
180/// Apply initialism uppercasing to a name that is already in PascalCase (e.g. an IR type name).
181///
182/// IR type names come directly from Rust PascalCase (e.g. `GraphQLRouteConfig`, `ImageUrl`).
183/// When such names have been processed by `heck::ToPascalCase` they may lose initialism
184/// capitalisation (e.g. `GraphQLRouteConfig` → `GraphQlRouteConfig`). This function restores
185/// the canonical form regardless of whether the input is already correct or heck-corrupted.
186///
187/// Examples:
188/// - `GraphQlRouteConfig`   → `GraphQLRouteConfig`
189/// - `GraphQLRouteConfig`   → `GraphQLRouteConfig`  (idempotent)
190/// - `HttpStatus`           → `HTTPStatus`
191pub fn csharp_type_name(name: &str) -> String {
192    apply_initialisms(name, INITIALISMS)
193}
194
195/// Convert a Rust name to a C-style prefixed snake_case identifier (e.g. `prefix_name`).
196pub fn to_c_name(prefix: &str, name: &str) -> String {
197    format!("{}_{}", prefix, name.to_snake_case())
198}
199
200/// Convert a Rust type name to class name convention for target language.
201pub fn to_class_name(name: &str) -> String {
202    name.to_pascal_case()
203}
204
205/// Convert to SCREAMING_SNAKE for constants.
206pub fn to_constant_name(name: &str) -> String {
207    name.to_shouty_snake_case()
208}
209
210/// Convert a PascalCase or mixed-case name to snake_case with correct acronym handling.
211///
212/// Use this instead of `heck::ToSnakeCase` when the input is a PascalCase Rust type or
213/// enum variant name — `heck` inserts an underscore before every uppercase letter, which
214/// incorrectly splits acronym-style names like `Rdfa` into `rd_fa`.
215///
216/// Rules:
217/// - A run of consecutive uppercase letters is treated as a single acronym word.
218/// - If the run is followed by a lowercase letter, the last uppercase char begins the
219///   next word (e.g. `XMLHttp` → `xml_http`).
220/// - A single uppercase letter followed by lowercase is a normal word start.
221///
222/// Examples:
223/// - `MyType`         → `my_type`
224/// - `Rdfa`           → `rdfa`
225/// - `HTMLParser`     → `html_parser`
226/// - `XMLHttpRequest` → `xml_http_request`
227/// - `IOError`        → `io_error`
228/// - `URLPath`        → `url_path`
229/// - `JSONLD`         → `jsonld`
230pub fn pascal_to_snake(name: &str) -> String {
231    if name.is_empty() {
232        return String::new();
233    }
234    let chars: Vec<char> = name.chars().collect();
235    let n = chars.len();
236    let mut out = String::with_capacity(n + 4);
237    let mut i = 0;
238    while i < n {
239        let ch = chars[i];
240        if ch.is_ascii_uppercase() {
241            let run_start = i;
242            while i < n && chars[i].is_ascii_uppercase() {
243                i += 1;
244            }
245            let run_end = i;
246            let run_len = run_end - run_start;
247            if run_len == 1 {
248                if !out.is_empty() {
249                    out.push('_');
250                }
251                out.extend(chars[run_start].to_lowercase());
252            } else {
253                let split = if i < n && chars[i].is_ascii_lowercase() {
254                    run_len - 1
255                } else {
256                    run_len
257                };
258                if !out.is_empty() {
259                    out.push('_');
260                }
261                for &c in chars.iter().skip(run_start).take(split) {
262                    out.extend(c.to_lowercase());
263                }
264                if split < run_len {
265                    out.push('_');
266                    out.extend(chars[run_start + split].to_lowercase());
267                }
268            }
269        } else {
270            out.push(ch);
271            i += 1;
272        }
273    }
274    out
275}
276
277/// Convert a PascalCase name to SCREAMING_SNAKE_CASE with correct acronym handling.
278///
279/// Examples:
280/// - `MyType`     → `MY_TYPE`
281/// - `Rdfa`       → `RDFA`
282/// - `HTMLParser` → `HTML_PARSER`
283pub fn pascal_to_screaming_snake(name: &str) -> String {
284    pascal_to_snake(name).to_ascii_uppercase()
285}
286
287#[cfg(test)]
288mod tests {
289    use super::*;
290
291    // --- to_go_name (snake_case → Go PascalCase with initialism uppercasing) ---
292
293    #[test]
294    fn test_to_go_name_html_initialism() {
295        assert_eq!(to_go_name("html"), "HTML");
296    }
297
298    #[test]
299    fn test_to_go_name_url_initialism() {
300        assert_eq!(to_go_name("url"), "URL");
301    }
302
303    #[test]
304    fn test_to_go_name_id_initialism() {
305        assert_eq!(to_go_name("id"), "ID");
306    }
307
308    #[test]
309    fn test_to_go_name_plain_word() {
310        assert_eq!(to_go_name("links"), "Links");
311    }
312
313    #[test]
314    fn test_to_go_name_user_id() {
315        assert_eq!(to_go_name("user_id"), "UserID");
316    }
317
318    #[test]
319    fn test_to_go_name_request_url() {
320        assert_eq!(to_go_name("request_url"), "RequestURL");
321    }
322
323    // --- Additional cases ---
324
325    #[test]
326    fn test_to_go_name_http_status() {
327        assert_eq!(to_go_name("http_status"), "HTTPStatus");
328    }
329
330    #[test]
331    fn test_to_go_name_json_body() {
332        assert_eq!(to_go_name("json_body"), "JSONBody");
333    }
334
335    // --- go_param_name (snake_case → Go lowerCamelCase with initialism uppercasing) ---
336
337    #[test]
338    fn test_go_param_name_base_url() {
339        assert_eq!(go_param_name("base_url"), "baseURL");
340    }
341
342    #[test]
343    fn test_go_param_name_user_id() {
344        assert_eq!(go_param_name("user_id"), "userID");
345    }
346
347    #[test]
348    fn test_go_param_name_api_key() {
349        assert_eq!(go_param_name("api_key"), "apiKey");
350    }
351
352    #[test]
353    fn test_go_param_name_plain() {
354        assert_eq!(go_param_name("json"), "json");
355    }
356
357    // --- pascal_to_snake ---
358
359    #[test]
360    fn pascal_to_snake_normal_case() {
361        assert_eq!(pascal_to_snake("MyType"), "my_type");
362    }
363
364    #[test]
365    fn pascal_to_snake_rdfa() {
366        assert_eq!(pascal_to_snake("Rdfa"), "rdfa");
367    }
368
369    #[test]
370    fn pascal_to_snake_html_parser() {
371        assert_eq!(pascal_to_snake("HTMLParser"), "html_parser");
372    }
373
374    #[test]
375    fn pascal_to_snake_xml_http_request() {
376        assert_eq!(pascal_to_snake("XMLHttpRequest"), "xml_http_request");
377    }
378
379    #[test]
380    fn pascal_to_snake_io_error() {
381        assert_eq!(pascal_to_snake("IOError"), "io_error");
382    }
383
384    #[test]
385    fn pascal_to_snake_url_path() {
386        assert_eq!(pascal_to_snake("URLPath"), "url_path");
387    }
388
389    #[test]
390    fn pascal_to_snake_jsonld_all_caps() {
391        assert_eq!(pascal_to_snake("JSONLD"), "jsonld");
392    }
393
394    #[test]
395    fn pascal_to_snake_camel_case() {
396        assert_eq!(pascal_to_snake("myField"), "my_field");
397    }
398
399    #[test]
400    fn pascal_to_snake_already_snake() {
401        assert_eq!(pascal_to_snake("already_snake"), "already_snake");
402    }
403
404    #[test]
405    fn pascal_to_snake_empty() {
406        assert_eq!(pascal_to_snake(""), "");
407    }
408
409    // --- pascal_to_screaming_snake ---
410
411    #[test]
412    fn pascal_to_screaming_snake_rdfa() {
413        assert_eq!(pascal_to_screaming_snake("Rdfa"), "RDFA");
414    }
415
416    #[test]
417    fn pascal_to_screaming_snake_html_parser() {
418        assert_eq!(pascal_to_screaming_snake("HTMLParser"), "HTML_PARSER");
419    }
420
421    #[test]
422    fn pascal_to_screaming_snake_my_type() {
423        assert_eq!(pascal_to_screaming_snake("MyType"), "MY_TYPE");
424    }
425
426    // --- to_csharp_name (snake_case → C# PascalCase with initialism uppercasing) ---
427
428    #[test]
429    fn test_to_csharp_name_graphql_route_config() {
430        assert_eq!(to_csharp_name("graphql_route_config"), "GraphQLRouteConfig");
431    }
432
433    #[test]
434    fn test_to_csharp_name_http_status() {
435        assert_eq!(to_csharp_name("http_status"), "HTTPStatus");
436    }
437
438    #[test]
439    fn test_to_csharp_name_plain() {
440        assert_eq!(to_csharp_name("my_field"), "MyField");
441    }
442
443    // --- csharp_type_name (PascalCase → C# PascalCase with initialism uppercasing) ---
444
445    #[test]
446    fn test_csharp_type_name_heck_corrupted() {
447        // heck produces "GraphQlRouteConfig" from "GraphQLRouteConfig" — we must restore it
448        assert_eq!(csharp_type_name("GraphQlRouteConfig"), "GraphQLRouteConfig");
449    }
450
451    #[test]
452    fn test_csharp_type_name_already_correct() {
453        // Input that already has the correct form is preserved idempotently
454        assert_eq!(csharp_type_name("GraphQLRouteConfig"), "GraphQLRouteConfig");
455    }
456
457    #[test]
458    fn test_csharp_type_name_http_status() {
459        assert_eq!(csharp_type_name("HttpStatus"), "HTTPStatus");
460    }
461}