Skip to main content

ploidy_codegen_rust/
naming.rs

1use std::{
2    fmt::{Display, Write},
3    ops::Deref,
4};
5
6use heck::{AsKebabCase, AsPascalCase, AsSnekCase};
7use itertools::Itertools;
8use ploidy_core::{
9    arena::Arena,
10    codegen::{UniqueNames, unique::WordSegments},
11    ir::{PrimitiveType, StructFieldNameHint, UntaggedVariantNameHint},
12};
13
14use proc_macro2::{Ident, Span, TokenStream};
15use quote::{IdentFragment, ToTokens, TokenStreamExt};
16use ref_cast::{RefCastCustom, ref_cast_custom};
17
18// Keywords that can't be used as identifiers, even with `r#`.
19const KEYWORDS: &[&str] = &["crate", "self", "super", "Self"];
20
21/// A cleaned string that's valid for use as a Rust identifier.
22#[derive(Debug, Eq, Hash, Ord, PartialEq, PartialOrd, RefCastCustom)]
23#[repr(transparent)]
24pub struct CodegenIdent(str);
25
26impl CodegenIdent {
27    #[ref_cast_custom]
28    fn new(s: &str) -> &Self;
29}
30
31/// An identifier that's unique within its [`UniqueIdents`] scope.
32///
33/// Only a scope can construct these, ensuring that identifiers won't collide
34/// within that scope. Pass a [`UniqueIdent`] to a [`CodegenIdentUsage`] variant
35/// to emit it as an [`Ident`] token.
36#[derive(Debug, Eq, Hash, Ord, PartialEq, PartialOrd, RefCastCustom)]
37#[repr(transparent)]
38pub struct UniqueIdent(str);
39
40impl UniqueIdent {
41    #[ref_cast_custom]
42    fn new(s: &str) -> &Self;
43}
44
45impl Deref for UniqueIdent {
46    type Target = CodegenIdent;
47
48    #[inline]
49    fn deref(&self) -> &Self::Target {
50        CodegenIdent::new(&self.0)
51    }
52}
53
54/// Emits a [`CodegenIdent`] as an idiomatic Rust identifier.
55///
56/// Each [`CodegenIdentUsage`] variant determines the case transformation
57/// applied to the identifier: module, field, parameter, and method names
58/// become snake_case; type and enum variant names become PascalCase.
59///
60/// Implements [`ToTokens`] for use in [`quote`] macros. For string interpolation,
61/// use [`display`](Self::display).
62#[derive(Clone, Copy, Debug)]
63pub enum CodegenIdentUsage<'a> {
64    Module(&'a CodegenIdent),
65    Type(&'a CodegenIdent),
66    Field(&'a UniqueIdent),
67    Variant(&'a UniqueIdent),
68    Param(&'a UniqueIdent),
69    Method(&'a UniqueIdent),
70}
71
72impl<'a> CodegenIdentUsage<'a> {
73    /// Returns a formattable representation of this identifier.
74    ///
75    /// [`CodegenIdentUsage`] doesn't implement [`Display`] directly, to help catch
76    /// context mismatches: using `.display()` in a [`quote`] macro, or
77    /// `.to_token_stream()` in a [`format`] string, stands out during review.
78    pub fn display(self) -> impl Display {
79        struct DisplayUsage<'a>(CodegenIdentUsage<'a>);
80        impl Display for DisplayUsage<'_> {
81            fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
82                let s = self.0.as_str();
83                if !s.starts_with(unicode_ident::is_xid_start) {
84                    // `s` is an identifier fragment; ensure it starts with
85                    // `XID_Start` to make it a valid identifier.
86                    f.write_char('_')?;
87                }
88                match self.0 {
89                    CodegenIdentUsage::Type(_) | CodegenIdentUsage::Variant(_) => {
90                        write!(f, "{}", AsPascalCase(s))
91                    }
92                    CodegenIdentUsage::Module(_)
93                    | CodegenIdentUsage::Field(_)
94                    | CodegenIdentUsage::Param(_)
95                    | CodegenIdentUsage::Method(_) => write!(f, "{}", AsSnekCase(s)),
96                }
97            }
98        }
99        DisplayUsage(self)
100    }
101
102    #[inline]
103    fn as_str(&self) -> &str {
104        match self {
105            CodegenIdentUsage::Type(s) => &s.0,
106            CodegenIdentUsage::Variant(s) => &s.0,
107            CodegenIdentUsage::Module(s) => &s.0,
108            CodegenIdentUsage::Field(s) => &s.0,
109            CodegenIdentUsage::Param(s) => &s.0,
110            CodegenIdentUsage::Method(s) => &s.0,
111        }
112    }
113}
114
115impl IdentFragment for CodegenIdentUsage<'_> {
116    #[inline]
117    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
118        write!(f, "{}", self.display())
119    }
120}
121
122impl ToTokens for CodegenIdentUsage<'_> {
123    #[inline]
124    fn to_tokens(&self, tokens: &mut TokenStream) {
125        let s = self.display().to_string();
126        // Assume `s` is a keyword that must be rendered as a raw identifier
127        // if `parse_str` fails. A string that's not a valid identifier here
128        // is a logic error.
129        let ident = syn::parse_str(&s).unwrap_or_else(|_| Ident::new_raw(&s, Span::call_site()));
130        tokens.append(ident);
131    }
132}
133
134/// A key used to group a resource's operations into modules
135/// and derive Cargo features for resource operations and types.
136///
137/// [`Named`] wraps a uniquified resource name; [`Default`] represents
138/// operations and types without a resource name.
139///
140/// [`Named`]: Self::Named
141/// [`Default`]: Self::Default
142#[derive(Clone, Copy, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
143pub enum ResourceGroup<'a> {
144    Named(&'a UniqueIdent),
145    #[default]
146    Default,
147}
148
149impl<'a> ResourceGroup<'a> {
150    /// Returns the resource name for a [`Named`][Self::Named] group.
151    #[inline]
152    pub fn name(self) -> Option<&'a UniqueIdent> {
153        match self {
154            Self::Named(name) => Some(name),
155            Self::Default => None,
156        }
157    }
158
159    /// Returns whether this group represents operations and types
160    /// without a resource name.
161    #[inline]
162    pub fn is_default(&self) -> bool {
163        matches!(self, Self::Default)
164    }
165}
166
167/// Formats a uniquified resource name as a Cargo feature name.
168#[derive(Clone, Copy, Debug)]
169pub struct AsFeatureName<'a>(pub &'a UniqueIdent);
170
171impl Display for AsFeatureName<'_> {
172    #[inline]
173    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
174        write!(f, "{}", AsKebabCase(&self.0.0))
175    }
176}
177
178/// A scope for generating unique, valid Rust identifiers.
179#[derive(Debug)]
180pub struct UniqueIdents<'a>(UniqueNames<'a>);
181
182impl<'a> UniqueIdents<'a> {
183    /// Creates a new identifier scope that's backed by the given arena.
184    #[inline]
185    pub fn new(arena: &'a Arena) -> Self {
186        Self::with_reserved(arena, &[])
187    }
188
189    /// Creates a new identifier scope that's backed by the given arena,
190    /// with additional pre-reserved names.
191    #[inline]
192    pub fn with_reserved(arena: &'a Arena, reserved: &[&str]) -> Self {
193        Self(UniqueNames::with_reserved(
194            arena,
195            reserved.iter().chain(KEYWORDS).copied(),
196        ))
197    }
198
199    /// Cleans and uniquifies an identifier.
200    #[inline]
201    pub fn ident(&mut self, name: &str) -> &'a UniqueIdent {
202        UniqueIdent::new(self.0.uniquify(&clean(name)))
203    }
204
205    /// Uniquifies a struct field name from a [`StructFieldNameHint`].
206    #[inline]
207    pub fn field_name_hint(&mut self, hint: StructFieldNameHint) -> &'a UniqueIdent {
208        use StructFieldNameHint::*;
209        UniqueIdent::new(match hint {
210            Index(index) => self.0.uniquify(&format!("variant_{index}")),
211            AdditionalProperties => self.0.uniquify("additional_properties"),
212        })
213    }
214
215    /// Uniquifies an untagged union variant name from an
216    /// [`UntaggedVariantNameHint`].
217    #[inline]
218    pub fn variant_name_hint(&mut self, hint: UntaggedVariantNameHint) -> &'a UniqueIdent {
219        use {PrimitiveType::*, UntaggedVariantNameHint::*};
220        UniqueIdent::new(match hint {
221            Primitive(String) => self.0.uniquify("String"),
222            Primitive(I8) => self.0.uniquify("I8"),
223            Primitive(U8) => self.0.uniquify("U8"),
224            Primitive(I16) => self.0.uniquify("I16"),
225            Primitive(U16) => self.0.uniquify("U16"),
226            Primitive(I32) => self.0.uniquify("I32"),
227            Primitive(U32) => self.0.uniquify("U32"),
228            Primitive(I64) => self.0.uniquify("I64"),
229            Primitive(U64) => self.0.uniquify("U64"),
230            Primitive(F32) => self.0.uniquify("F32"),
231            Primitive(F64) => self.0.uniquify("F64"),
232            Primitive(Bool) => self.0.uniquify("Bool"),
233            Primitive(DateTime) => self.0.uniquify("DateTime"),
234            Primitive(UnixTime) => self.0.uniquify("UnixTime"),
235            Primitive(Date) => self.0.uniquify("Date"),
236            Primitive(Url) => self.0.uniquify("Url"),
237            Primitive(Uuid) => self.0.uniquify("Uuid"),
238            Primitive(Bytes) => self.0.uniquify("Bytes"),
239            Primitive(Binary) => self.0.uniquify("Binary"),
240            Array => self.0.uniquify("Array"),
241            Map => self.0.uniquify("Map"),
242            Index(index) => self.0.uniquify(&format!("V{index}")),
243        })
244    }
245}
246
247/// Makes a valid Rust identifier fragment from a string.
248///
249/// Cleaning segments the string on word boundaries and collapses all
250/// non-`XID_Continue` characters into new boundaries. This makes the fragment
251/// resilient to Heck's case transformations, which also collapse boundaries,
252/// and so can produce duplicates.
253///
254/// The result is a valid identifier fragment, but may not be a valid [`Ident`],
255/// because Rust identifiers must start with `XID_Start`.
256#[inline]
257fn clean(s: &str) -> String {
258    WordSegments::new(s)
259        .flat_map(|(_, s)| s.split(|c| !unicode_ident::is_xid_continue(c)))
260        .join("_")
261}
262
263#[cfg(test)]
264mod tests {
265    use super::*;
266
267    use pretty_assertions::assert_eq;
268    use syn::parse_quote;
269
270    // MARK: Usages
271
272    #[test]
273    fn test_codegen_ident_type() {
274        let arena = Arena::new();
275        let mut scope = UniqueIdents::new(&arena);
276        let ident = scope.ident("pet_store");
277        let usage = CodegenIdentUsage::Type(ident);
278        let actual: syn::Ident = parse_quote!(#usage);
279        let expected: syn::Ident = parse_quote!(PetStore);
280        assert_eq!(actual, expected);
281    }
282
283    #[test]
284    fn test_codegen_ident_field() {
285        let arena = Arena::new();
286        let mut scope = UniqueIdents::new(&arena);
287        let ident = scope.ident("petStore");
288        let usage = CodegenIdentUsage::Field(ident);
289        let actual: syn::Ident = parse_quote!(#usage);
290        let expected: syn::Ident = parse_quote!(pet_store);
291        assert_eq!(actual, expected);
292    }
293
294    #[test]
295    fn test_codegen_ident_module() {
296        let arena = Arena::new();
297        let mut scope = UniqueIdents::new(&arena);
298        let ident = scope.ident("MyModule");
299        let usage = CodegenIdentUsage::Module(ident);
300        let actual: syn::Ident = parse_quote!(#usage);
301        let expected: syn::Ident = parse_quote!(my_module);
302        assert_eq!(actual, expected);
303    }
304
305    #[test]
306    fn test_codegen_ident_variant() {
307        let arena = Arena::new();
308        let mut scope = UniqueIdents::new(&arena);
309        let ident = scope.ident("http_error");
310        let usage = CodegenIdentUsage::Variant(ident);
311        let actual: syn::Ident = parse_quote!(#usage);
312        let expected: syn::Ident = parse_quote!(HttpError);
313        assert_eq!(actual, expected);
314    }
315
316    #[test]
317    fn test_codegen_ident_param() {
318        let arena = Arena::new();
319        let mut scope = UniqueIdents::new(&arena);
320        let ident = scope.ident("userId");
321        let usage = CodegenIdentUsage::Param(ident);
322        let actual: syn::Ident = parse_quote!(#usage);
323        let expected: syn::Ident = parse_quote!(user_id);
324        assert_eq!(actual, expected);
325    }
326
327    #[test]
328    fn test_codegen_ident_method() {
329        let arena = Arena::new();
330        let mut scope = UniqueIdents::new(&arena);
331        let ident = scope.ident("getUserById");
332        let usage = CodegenIdentUsage::Method(ident);
333        let actual: syn::Ident = parse_quote!(#usage);
334        let expected: syn::Ident = parse_quote!(get_user_by_id);
335        assert_eq!(actual, expected);
336    }
337
338    // MARK: Special characters
339
340    #[test]
341    fn test_codegen_ident_handles_rust_keywords() {
342        let arena = Arena::new();
343        let mut scope = UniqueIdents::new(&arena);
344        let ident = scope.ident("type");
345        let usage = CodegenIdentUsage::Field(ident);
346        let actual: syn::Ident = parse_quote!(#usage);
347        let expected: syn::Ident = parse_quote!(r#type);
348        assert_eq!(actual, expected);
349    }
350
351    #[test]
352    fn test_codegen_ident_handles_invalid_start_chars() {
353        let arena = Arena::new();
354        let mut scope = UniqueIdents::new(&arena);
355        let ident = scope.ident("123foo");
356        let usage = CodegenIdentUsage::Field(ident);
357        let actual: syn::Ident = parse_quote!(#usage);
358        let expected: syn::Ident = parse_quote!(_123_foo);
359        assert_eq!(actual, expected);
360    }
361
362    #[test]
363    fn test_codegen_ident_handles_special_chars() {
364        let arena = Arena::new();
365        let mut scope = UniqueIdents::new(&arena);
366        let ident = scope.ident("foo-bar-baz");
367        let usage = CodegenIdentUsage::Field(ident);
368        let actual: syn::Ident = parse_quote!(#usage);
369        let expected: syn::Ident = parse_quote!(foo_bar_baz);
370        assert_eq!(actual, expected);
371    }
372
373    #[test]
374    fn test_codegen_ident_handles_number_prefix() {
375        let arena = Arena::new();
376        let mut scope = UniqueIdents::new(&arena);
377        let ident = scope.ident("1099KStatus");
378
379        let usage = CodegenIdentUsage::Field(ident);
380        let actual: syn::Ident = parse_quote!(#usage);
381        let expected: syn::Ident = parse_quote!(_1099_k_status);
382        assert_eq!(actual, expected);
383
384        let usage = CodegenIdentUsage::Type(ident);
385        let actual: syn::Ident = parse_quote!(#usage);
386        let expected: syn::Ident = parse_quote!(_1099KStatus);
387        assert_eq!(actual, expected);
388    }
389
390    // MARK: Untagged variant names
391
392    #[test]
393    fn test_untagged_variant_name_index() {
394        let arena = Arena::new();
395        let mut scope = UniqueIdents::new(&arena);
396
397        let ident = scope.variant_name_hint(UntaggedVariantNameHint::Index(0));
398        assert_eq!(&ident.0, "V");
399
400        let ident = scope.variant_name_hint(UntaggedVariantNameHint::Index(42));
401        assert_eq!(&ident.0, "V42");
402    }
403
404    // MARK: Struct field names
405
406    #[test]
407    fn test_struct_field_name_index() {
408        let arena = Arena::new();
409        let mut scope = UniqueIdents::new(&arena);
410        let ident0 = scope.field_name_hint(StructFieldNameHint::Index(0));
411        let usage = CodegenIdentUsage::Field(ident0);
412        let actual: syn::Ident = parse_quote!(#usage);
413        let expected: syn::Ident = parse_quote!(variant);
414        assert_eq!(actual, expected);
415
416        let ident5 = scope.field_name_hint(StructFieldNameHint::Index(5));
417        let usage = CodegenIdentUsage::Field(ident5);
418        let actual: syn::Ident = parse_quote!(#usage);
419        let expected: syn::Ident = parse_quote!(variant5);
420        assert_eq!(actual, expected);
421    }
422
423    #[test]
424    fn test_struct_field_name_additional_properties() {
425        let arena = Arena::new();
426        let mut scope = UniqueIdents::new(&arena);
427        let ident = scope.field_name_hint(StructFieldNameHint::AdditionalProperties);
428        let usage = CodegenIdentUsage::Field(ident);
429        let actual: syn::Ident = parse_quote!(#usage);
430        let expected: syn::Ident = parse_quote!(additional_properties);
431        assert_eq!(actual, expected);
432    }
433
434    // MARK: `clean()`
435
436    #[test]
437    fn test_clean() {
438        assert_eq!(clean("foo-bar"), "foo_bar");
439        assert_eq!(clean("foo.bar"), "foo_bar");
440        assert_eq!(clean("foo bar"), "foo_bar");
441        assert_eq!(clean("foo@bar"), "foo_bar");
442        assert_eq!(clean("foo#bar"), "foo_bar");
443        assert_eq!(clean("foo!bar"), "foo_bar");
444
445        assert_eq!(clean("foo_bar"), "foo_bar");
446        assert_eq!(clean("FooBar"), "Foo_Bar");
447        assert_eq!(clean("foo123"), "foo_123");
448        assert_eq!(clean("_foo"), "foo");
449
450        assert_eq!(clean("_foo"), "foo");
451        assert_eq!(clean("__foo"), "foo");
452
453        // Digits are in `XID_Continue`, so they should be preserved.
454        assert_eq!(clean("123foo"), "123_foo");
455        assert_eq!(clean("9bar"), "9_bar");
456
457        // Non-ASCII characters that are valid in identifiers should be preserved;
458        // characters that aren't should be replaced.
459        assert_eq!(clean("café"), "café");
460        assert_eq!(clean("foo™bar"), "foo_bar");
461
462        // Invalid characters should be collapsed.
463        assert_eq!(clean("foo---bar"), "foo_bar");
464        assert_eq!(clean("foo...bar"), "foo_bar");
465    }
466
467    // MARK: Scopes
468
469    #[test]
470    fn test_codegen_ident_scope_handles_empty() {
471        let arena = Arena::new();
472        let mut scope = UniqueIdents::new(&arena);
473        let ident = scope.ident("");
474
475        let usage = CodegenIdentUsage::Field(ident);
476        let actual: syn::Ident = parse_quote!(#usage);
477        let expected: syn::Ident = parse_quote!(_1);
478        assert_eq!(actual, expected);
479
480        let usage = CodegenIdentUsage::Type(ident);
481        let actual: syn::Ident = parse_quote!(#usage);
482        let expected: syn::Ident = parse_quote!(_1);
483        assert_eq!(actual, expected);
484    }
485
486    #[test]
487    fn test_codegen_ident_scope_handles_numeric_names() {
488        let arena = Arena::new();
489        let mut scope = UniqueIdents::new(&arena);
490
491        let ident = scope.ident("0");
492        let usage = CodegenIdentUsage::Field(ident);
493        let actual: syn::Ident = parse_quote!(#usage);
494        let expected: syn::Ident = parse_quote!(_1);
495        assert_eq!(actual, expected);
496
497        let ident = scope.ident("1");
498        let usage = CodegenIdentUsage::Type(ident);
499        let actual: syn::Ident = parse_quote!(#usage);
500        let expected: syn::Ident = parse_quote!(_2);
501        assert_eq!(actual, expected);
502    }
503
504    #[test]
505    fn test_codegen_ident_scope_handles_reserved_suffixes() {
506        let arena = Arena::new();
507        let mut scope = UniqueIdents::new(&arena);
508
509        let ident = scope.ident("crate");
510        let usage = CodegenIdentUsage::Method(ident);
511        let actual: syn::Ident = parse_quote!(#usage);
512        let expected: syn::Ident = parse_quote!(crate2);
513        assert_eq!(actual, expected);
514
515        let ident = scope.ident("crate2");
516        let usage = CodegenIdentUsage::Method(ident);
517        let actual: syn::Ident = parse_quote!(#usage);
518        let expected: syn::Ident = parse_quote!(crate3);
519        assert_eq!(actual, expected);
520    }
521}