ploidy_codegen_rust/
naming.rs

1use std::{borrow::Cow, cmp::Ordering, fmt::Display, ops::Deref};
2
3use heck::{AsPascalCase, AsSnekCase};
4use itertools::Itertools;
5use ploidy_core::{
6    codegen::{
7        UniqueNames,
8        unique::{UniqueNamesScope, WordSegments},
9    },
10    ir::{
11        InlineIrTypePathSegment, InlineIrTypeView, IrStructFieldName, IrStructFieldNameHint,
12        IrUntaggedVariantNameHint, PrimitiveIrType, SchemaIrTypeView, View,
13    },
14};
15use proc_macro2::{Ident, Span, TokenStream};
16use quote::{IdentFragment, ToTokens, TokenStreamExt, format_ident};
17use ref_cast::{RefCastCustom, ref_cast_custom};
18
19// Keywords that can't be used as identifiers, even with `r#`.
20const KEYWORDS: &[&str] = &["crate", "self", "super", "Self"];
21
22#[derive(Clone, Copy, Debug)]
23pub enum CodegenTypeName<'a> {
24    Schema(&'a SchemaIrTypeView<'a>),
25    Inline(&'a InlineIrTypeView<'a>),
26}
27
28impl<'a> CodegenTypeName<'a> {
29    #[inline]
30    pub fn into_sort_key(self) -> CodegenTypeNameSortKey<'a> {
31        CodegenTypeNameSortKey(self)
32    }
33}
34
35impl ToTokens for CodegenTypeName<'_> {
36    fn to_tokens(&self, tokens: &mut TokenStream) {
37        match self {
38            Self::Schema(view) => {
39                let ident = view.extensions().get::<CodegenIdent>().unwrap();
40                tokens.append_all(CodegenIdentUsage::Type(&ident).to_token_stream())
41            }
42            Self::Inline(view) => {
43                let ident = view
44                    .path()
45                    .segments
46                    .iter()
47                    .map(CodegenTypePathSegment)
48                    .map(|segment| format_ident!("{}", segment))
49                    .reduce(|a, b| format_ident!("{}{}", a, b))
50                    .unwrap();
51                tokens.append(ident);
52            }
53        }
54    }
55}
56
57/// A comparator that sorts type names lexicographically.
58#[derive(Clone, Copy, Debug)]
59pub struct CodegenTypeNameSortKey<'a>(CodegenTypeName<'a>);
60
61impl<'a> CodegenTypeNameSortKey<'a> {
62    #[inline]
63    pub fn into_name(self) -> CodegenTypeName<'a> {
64        self.0
65    }
66}
67
68impl Eq for CodegenTypeNameSortKey<'_> {}
69
70impl Ord for CodegenTypeNameSortKey<'_> {
71    fn cmp(&self, other: &Self) -> Ordering {
72        match (&self.0, &other.0) {
73            (CodegenTypeName::Schema(a), CodegenTypeName::Schema(b)) => a.name().cmp(b.name()),
74            (CodegenTypeName::Inline(a), CodegenTypeName::Inline(b)) => a.path().cmp(b.path()),
75            (CodegenTypeName::Schema(_), CodegenTypeName::Inline(_)) => Ordering::Less,
76            (CodegenTypeName::Inline(_), CodegenTypeName::Schema(_)) => Ordering::Greater,
77        }
78    }
79}
80
81impl PartialEq for CodegenTypeNameSortKey<'_> {
82    fn eq(&self, other: &Self) -> bool {
83        self.cmp(other).is_eq()
84    }
85}
86
87impl PartialOrd for CodegenTypeNameSortKey<'_> {
88    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
89        Some(self.cmp(other))
90    }
91}
92
93/// A string that's statically guaranteed to be valid for any
94/// [`CodegenIdentUsage`].
95#[derive(Clone, Debug, Eq, Ord, PartialEq, PartialOrd)]
96pub struct CodegenIdent(String);
97
98impl CodegenIdent {
99    /// Creates an identifier for any usage.
100    pub fn new(s: &str) -> Self {
101        let s = clean(s);
102        if KEYWORDS.contains(&s.as_str()) {
103            Self(format!("_{s}"))
104        } else {
105            Self(s)
106        }
107    }
108}
109
110impl Deref for CodegenIdent {
111    type Target = CodegenIdentRef;
112
113    fn deref(&self) -> &Self::Target {
114        CodegenIdentRef::new(&self.0)
115    }
116}
117
118/// A string slice that's guaranteed to be valid for any [`CodegenIdentUsage`].
119#[derive(Debug, Eq, Ord, PartialEq, PartialOrd, RefCastCustom)]
120#[repr(transparent)]
121pub struct CodegenIdentRef(str);
122
123impl CodegenIdentRef {
124    #[ref_cast_custom]
125    fn new(s: &str) -> &Self;
126}
127
128#[derive(Clone, Copy, Debug)]
129pub enum CodegenIdentUsage<'a> {
130    Module(&'a CodegenIdentRef),
131    Type(&'a CodegenIdentRef),
132    Field(&'a CodegenIdentRef),
133    Variant(&'a CodegenIdentRef),
134    Param(&'a CodegenIdentRef),
135    Method(&'a CodegenIdentRef),
136}
137
138impl Display for CodegenIdentUsage<'_> {
139    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
140        match self {
141            Self::Module(name) | Self::Field(name) | Self::Param(name) | Self::Method(name) => {
142                if name.0.starts_with(unicode_ident::is_xid_start) {
143                    write!(f, "{}", AsSnekCase(&name.0))
144                } else {
145                    // `name` doesn't start with `XID_Start` (e.g., "1099KStatus"),
146                    // so prefix it with `_`; everything after is known to be
147                    // `XID_Continue`.
148                    write!(f, "_{}", AsSnekCase(&name.0))
149                }
150            }
151            Self::Type(name) | Self::Variant(name) => {
152                if name.0.starts_with(unicode_ident::is_xid_start) {
153                    write!(f, "{}", AsPascalCase(&name.0))
154                } else {
155                    write!(f, "_{}", AsPascalCase(&name.0))
156                }
157            }
158        }
159    }
160}
161
162impl ToTokens for CodegenIdentUsage<'_> {
163    fn to_tokens(&self, tokens: &mut TokenStream) {
164        let s = self.to_string();
165        let ident = syn::parse_str(&s).unwrap_or_else(|_| Ident::new_raw(&s, Span::call_site()));
166        tokens.append(ident);
167    }
168}
169
170/// A scope for generating unique, valid Rust identifiers.
171#[derive(Debug)]
172pub struct CodegenIdentScope<'a>(UniqueNamesScope<'a>);
173
174impl<'a> CodegenIdentScope<'a> {
175    /// Creates a new identifier scope that's backed by the given arena.
176    pub fn new(arena: &'a UniqueNames) -> Self {
177        Self::with_reserved(arena, &[])
178    }
179
180    /// Creates a new identifier scope that's backed by the given arena,
181    /// with additional pre-reserved names.
182    pub fn with_reserved(arena: &'a UniqueNames, reserved: &[&str]) -> Self {
183        Self(arena.scope_with_reserved(itertools::chain!(
184            reserved.iter().copied(),
185            KEYWORDS.iter().copied(),
186            std::iter::once("")
187        )))
188    }
189
190    /// Cleans the input string and returns a name that's unique
191    /// within this scope, and valid for any [`CodegenIdentUsage`].
192    pub fn uniquify(&mut self, name: &str) -> CodegenIdent {
193        CodegenIdent(self.0.uniquify(&clean(name)).into_owned())
194    }
195}
196
197#[derive(Clone, Copy, Debug)]
198pub struct CodegenUntaggedVariantName(pub IrUntaggedVariantNameHint);
199
200impl ToTokens for CodegenUntaggedVariantName {
201    fn to_tokens(&self, tokens: &mut TokenStream) {
202        use IrUntaggedVariantNameHint::*;
203        let s = match self.0 {
204            Primitive(PrimitiveIrType::String) => "String".into(),
205            Primitive(PrimitiveIrType::I32) => "I32".into(),
206            Primitive(PrimitiveIrType::I64) => "I64".into(),
207            Primitive(PrimitiveIrType::F32) => "F32".into(),
208            Primitive(PrimitiveIrType::F64) => "F64".into(),
209            Primitive(PrimitiveIrType::Bool) => "Bool".into(),
210            Primitive(PrimitiveIrType::DateTime) => "DateTime".into(),
211            Primitive(PrimitiveIrType::Date) => "Date".into(),
212            Primitive(PrimitiveIrType::Url) => "Url".into(),
213            Primitive(PrimitiveIrType::Uuid) => "Uuid".into(),
214            Primitive(PrimitiveIrType::Bytes) => "Bytes".into(),
215            Array => "Array".into(),
216            Map => "Map".into(),
217            Index(index) => Cow::Owned(format!("V{index}")),
218        };
219        tokens.append(Ident::new(&s, Span::call_site()));
220    }
221}
222
223#[derive(Clone, Copy, Debug)]
224pub struct CodegenStructFieldName(pub IrStructFieldNameHint);
225
226impl ToTokens for CodegenStructFieldName {
227    fn to_tokens(&self, tokens: &mut TokenStream) {
228        match self.0 {
229            IrStructFieldNameHint::Index(index) => {
230                CodegenIdentUsage::Field(&CodegenIdent(format!("variant_{index}")))
231                    .to_tokens(tokens)
232            }
233        }
234    }
235}
236
237#[derive(Clone, Copy, Debug)]
238pub struct CodegenTypePathSegment<'a>(&'a InlineIrTypePathSegment<'a>);
239
240impl IdentFragment for CodegenTypePathSegment<'_> {
241    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
242        use InlineIrTypePathSegment::*;
243        match self.0 {
244            // Segments are part of an inline type path that always has a root prefix,
245            // so we don't need to check for `XID_Start`.
246            Operation(name) => write!(f, "{}", AsPascalCase(clean(name))),
247            Parameter(name) => write!(f, "{}", AsPascalCase(clean(name))),
248            Request => f.write_str("Request"),
249            Response => f.write_str("Response"),
250            Field(IrStructFieldName::Name(name)) => {
251                write!(f, "{}", AsPascalCase(clean(name)))
252            }
253            Field(IrStructFieldName::Hint(IrStructFieldNameHint::Index(index))) => {
254                write!(f, "Variant{index}")
255            }
256            MapValue => f.write_str("Value"),
257            ArrayItem => f.write_str("Item"),
258            Variant(index) => write!(f, "V{index}"),
259        }
260    }
261}
262
263/// Makes a string suitable for inclusion within a Rust identifier.
264///
265/// Cleaning segments the string on word boundaries, collapses all
266/// non-`XID_Continue` characters into new boundaries, and
267/// reassembles the string. This makes the string resilient to
268/// case transformations, which also collapse boundaries, and so
269/// can produce duplicates in some cases.
270///
271/// Note that the result may not itself be a valid Rust identifier,
272/// because Rust identifiers must start with `XID_Start`.
273/// This is checked and handled in [`CodegenIdentUsage`].
274fn clean(s: &str) -> String {
275    WordSegments::new(s)
276        .flat_map(|s| s.split(|c| !unicode_ident::is_xid_continue(c)))
277        .join("_")
278}
279
280#[cfg(test)]
281mod tests {
282    use super::*;
283
284    use pretty_assertions::assert_eq;
285    use syn::parse_quote;
286
287    // MARK: Usages
288
289    #[test]
290    fn test_codegen_ident_type() {
291        let ident = CodegenIdent::new("pet_store");
292        let usage = CodegenIdentUsage::Type(&ident);
293        let actual: syn::Ident = parse_quote!(#usage);
294        let expected: syn::Ident = parse_quote!(PetStore);
295        assert_eq!(actual, expected);
296    }
297
298    #[test]
299    fn test_codegen_ident_field() {
300        let ident = CodegenIdent::new("petStore");
301        let usage = CodegenIdentUsage::Field(&ident);
302        let actual: syn::Ident = parse_quote!(#usage);
303        let expected: syn::Ident = parse_quote!(pet_store);
304        assert_eq!(actual, expected);
305    }
306
307    #[test]
308    fn test_codegen_ident_module() {
309        let ident = CodegenIdent::new("MyModule");
310        let usage = CodegenIdentUsage::Module(&ident);
311        let actual: syn::Ident = parse_quote!(#usage);
312        let expected: syn::Ident = parse_quote!(my_module);
313        assert_eq!(actual, expected);
314    }
315
316    #[test]
317    fn test_codegen_ident_variant() {
318        let ident = CodegenIdent::new("http_error");
319        let usage = CodegenIdentUsage::Variant(&ident);
320        let actual: syn::Ident = parse_quote!(#usage);
321        let expected: syn::Ident = parse_quote!(HttpError);
322        assert_eq!(actual, expected);
323    }
324
325    #[test]
326    fn test_codegen_ident_param() {
327        let ident = CodegenIdent::new("userId");
328        let usage = CodegenIdentUsage::Param(&ident);
329        let actual: syn::Ident = parse_quote!(#usage);
330        let expected: syn::Ident = parse_quote!(user_id);
331        assert_eq!(actual, expected);
332    }
333
334    #[test]
335    fn test_codegen_ident_method() {
336        let ident = CodegenIdent::new("getUserById");
337        let usage = CodegenIdentUsage::Method(&ident);
338        let actual: syn::Ident = parse_quote!(#usage);
339        let expected: syn::Ident = parse_quote!(get_user_by_id);
340        assert_eq!(actual, expected);
341    }
342
343    // MARK: Special characters
344
345    #[test]
346    fn test_codegen_ident_handles_rust_keywords() {
347        let ident = CodegenIdent::new("type");
348        let usage = CodegenIdentUsage::Field(&ident);
349        let actual: syn::Ident = parse_quote!(#usage);
350        let expected: syn::Ident = parse_quote!(r#type);
351        assert_eq!(actual, expected);
352    }
353
354    #[test]
355    fn test_codegen_ident_handles_invalid_start_chars() {
356        let ident = CodegenIdent::new("123foo");
357        let usage = CodegenIdentUsage::Field(&ident);
358        let actual: syn::Ident = parse_quote!(#usage);
359        let expected: syn::Ident = parse_quote!(_123_foo);
360        assert_eq!(actual, expected);
361    }
362
363    #[test]
364    fn test_codegen_ident_handles_special_chars() {
365        let ident = CodegenIdent::new("foo-bar-baz");
366        let usage = CodegenIdentUsage::Field(&ident);
367        let actual: syn::Ident = parse_quote!(#usage);
368        let expected: syn::Ident = parse_quote!(foo_bar_baz);
369        assert_eq!(actual, expected);
370    }
371
372    #[test]
373    fn test_codegen_ident_handles_number_prefix() {
374        let ident = CodegenIdent::new("1099KStatus");
375
376        let usage = CodegenIdentUsage::Field(&ident);
377        let actual: syn::Ident = parse_quote!(#usage);
378        let expected: syn::Ident = parse_quote!(_1099_k_status);
379        assert_eq!(actual, expected);
380
381        let usage = CodegenIdentUsage::Type(&ident);
382        let actual: syn::Ident = parse_quote!(#usage);
383        let expected: syn::Ident = parse_quote!(_1099KStatus);
384        assert_eq!(actual, expected);
385    }
386
387    // MARK: Untagged variant names
388
389    #[test]
390    fn test_untagged_variant_name_string() {
391        let variant_name = CodegenUntaggedVariantName(IrUntaggedVariantNameHint::Primitive(
392            PrimitiveIrType::String,
393        ));
394        let actual: syn::Ident = parse_quote!(#variant_name);
395        let expected: syn::Ident = parse_quote!(String);
396        assert_eq!(actual, expected);
397    }
398
399    #[test]
400    fn test_untagged_variant_name_i32() {
401        let variant_name =
402            CodegenUntaggedVariantName(IrUntaggedVariantNameHint::Primitive(PrimitiveIrType::I32));
403        let actual: syn::Ident = parse_quote!(#variant_name);
404        let expected: syn::Ident = parse_quote!(I32);
405        assert_eq!(actual, expected);
406    }
407
408    #[test]
409    fn test_untagged_variant_name_i64() {
410        let variant_name =
411            CodegenUntaggedVariantName(IrUntaggedVariantNameHint::Primitive(PrimitiveIrType::I64));
412        let actual: syn::Ident = parse_quote!(#variant_name);
413        let expected: syn::Ident = parse_quote!(I64);
414        assert_eq!(actual, expected);
415    }
416
417    #[test]
418    fn test_untagged_variant_name_f32() {
419        let variant_name =
420            CodegenUntaggedVariantName(IrUntaggedVariantNameHint::Primitive(PrimitiveIrType::F32));
421        let actual: syn::Ident = parse_quote!(#variant_name);
422        let expected: syn::Ident = parse_quote!(F32);
423        assert_eq!(actual, expected);
424    }
425
426    #[test]
427    fn test_untagged_variant_name_f64() {
428        let variant_name =
429            CodegenUntaggedVariantName(IrUntaggedVariantNameHint::Primitive(PrimitiveIrType::F64));
430        let actual: syn::Ident = parse_quote!(#variant_name);
431        let expected: syn::Ident = parse_quote!(F64);
432        assert_eq!(actual, expected);
433    }
434
435    #[test]
436    fn test_untagged_variant_name_bool() {
437        let variant_name =
438            CodegenUntaggedVariantName(IrUntaggedVariantNameHint::Primitive(PrimitiveIrType::Bool));
439        let actual: syn::Ident = parse_quote!(#variant_name);
440        let expected: syn::Ident = parse_quote!(Bool);
441        assert_eq!(actual, expected);
442    }
443
444    #[test]
445    fn test_untagged_variant_name_datetime() {
446        let variant_name = CodegenUntaggedVariantName(IrUntaggedVariantNameHint::Primitive(
447            PrimitiveIrType::DateTime,
448        ));
449        let actual: syn::Ident = parse_quote!(#variant_name);
450        let expected: syn::Ident = parse_quote!(DateTime);
451        assert_eq!(actual, expected);
452    }
453
454    #[test]
455    fn test_untagged_variant_name_date() {
456        let variant_name =
457            CodegenUntaggedVariantName(IrUntaggedVariantNameHint::Primitive(PrimitiveIrType::Date));
458        let actual: syn::Ident = parse_quote!(#variant_name);
459        let expected: syn::Ident = parse_quote!(Date);
460        assert_eq!(actual, expected);
461    }
462
463    #[test]
464    fn test_untagged_variant_name_url() {
465        let variant_name =
466            CodegenUntaggedVariantName(IrUntaggedVariantNameHint::Primitive(PrimitiveIrType::Url));
467        let actual: syn::Ident = parse_quote!(#variant_name);
468        let expected: syn::Ident = parse_quote!(Url);
469        assert_eq!(actual, expected);
470    }
471
472    #[test]
473    fn test_untagged_variant_name_uuid() {
474        let variant_name =
475            CodegenUntaggedVariantName(IrUntaggedVariantNameHint::Primitive(PrimitiveIrType::Uuid));
476        let actual: syn::Ident = parse_quote!(#variant_name);
477        let expected: syn::Ident = parse_quote!(Uuid);
478        assert_eq!(actual, expected);
479    }
480
481    #[test]
482    fn test_untagged_variant_name_bytes() {
483        let variant_name = CodegenUntaggedVariantName(IrUntaggedVariantNameHint::Primitive(
484            PrimitiveIrType::Bytes,
485        ));
486        let actual: syn::Ident = parse_quote!(#variant_name);
487        let expected: syn::Ident = parse_quote!(Bytes);
488        assert_eq!(actual, expected);
489    }
490
491    #[test]
492    fn test_untagged_variant_name_index() {
493        let variant_name = CodegenUntaggedVariantName(IrUntaggedVariantNameHint::Index(0));
494        let actual: syn::Ident = parse_quote!(#variant_name);
495        let expected: syn::Ident = parse_quote!(V0);
496        assert_eq!(actual, expected);
497
498        let variant_name = CodegenUntaggedVariantName(IrUntaggedVariantNameHint::Index(42));
499        let actual: syn::Ident = parse_quote!(#variant_name);
500        let expected: syn::Ident = parse_quote!(V42);
501        assert_eq!(actual, expected);
502    }
503
504    #[test]
505    fn test_untagged_variant_name_array() {
506        let variant_name = CodegenUntaggedVariantName(IrUntaggedVariantNameHint::Array);
507        let actual: syn::Ident = parse_quote!(#variant_name);
508        let expected: syn::Ident = parse_quote!(Array);
509        assert_eq!(actual, expected);
510    }
511
512    #[test]
513    fn test_untagged_variant_name_map() {
514        let variant_name = CodegenUntaggedVariantName(IrUntaggedVariantNameHint::Map);
515        let actual: syn::Ident = parse_quote!(#variant_name);
516        let expected: syn::Ident = parse_quote!(Map);
517        assert_eq!(actual, expected);
518    }
519
520    // MARK: Struct field names
521
522    #[test]
523    fn test_struct_field_name_index() {
524        let field_name = CodegenStructFieldName(IrStructFieldNameHint::Index(0));
525        let actual: syn::Ident = parse_quote!(#field_name);
526        let expected: syn::Ident = parse_quote!(variant_0);
527        assert_eq!(actual, expected);
528
529        let field_name = CodegenStructFieldName(IrStructFieldNameHint::Index(5));
530        let actual: syn::Ident = parse_quote!(#field_name);
531        let expected: syn::Ident = parse_quote!(variant_5);
532        assert_eq!(actual, expected);
533    }
534
535    // MARK: `clean()`
536
537    #[test]
538    fn test_clean() {
539        assert_eq!(clean("foo-bar"), "foo_bar");
540        assert_eq!(clean("foo.bar"), "foo_bar");
541        assert_eq!(clean("foo bar"), "foo_bar");
542        assert_eq!(clean("foo@bar"), "foo_bar");
543        assert_eq!(clean("foo#bar"), "foo_bar");
544        assert_eq!(clean("foo!bar"), "foo_bar");
545
546        assert_eq!(clean("foo_bar"), "foo_bar");
547        assert_eq!(clean("FooBar"), "Foo_Bar");
548        assert_eq!(clean("foo123"), "foo123");
549        assert_eq!(clean("_foo"), "foo");
550
551        assert_eq!(clean("_foo"), "foo");
552        assert_eq!(clean("__foo"), "foo");
553
554        // Digits are in `XID_Continue`, so they should be preserved.
555        assert_eq!(clean("123foo"), "123_foo");
556        assert_eq!(clean("9bar"), "9_bar");
557
558        // Non-ASCII characters that are valid in identifiers should be preserved;
559        // characters that aren't should be replaced.
560        assert_eq!(clean("café"), "café");
561        assert_eq!(clean("foo™bar"), "foo_bar");
562
563        // Invalid characters should be collapsed.
564        assert_eq!(clean("foo---bar"), "foo_bar");
565        assert_eq!(clean("foo...bar"), "foo_bar");
566    }
567
568    // MARK: Scopes
569
570    #[test]
571    fn test_codegen_ident_scope_handles_empty() {
572        let unique = UniqueNames::new();
573        let mut scope = CodegenIdentScope::new(&unique);
574        let ident = scope.uniquify("");
575
576        let usage = CodegenIdentUsage::Field(&ident);
577        let actual: syn::Ident = parse_quote!(#usage);
578        let expected: syn::Ident = parse_quote!(_2);
579        assert_eq!(actual, expected);
580
581        let usage = CodegenIdentUsage::Type(&ident);
582        let actual: syn::Ident = parse_quote!(#usage);
583        let expected: syn::Ident = parse_quote!(_2);
584        assert_eq!(actual, expected);
585    }
586}