Skip to main content

ploidy_core/codegen/
unique.rs

1//! Naming support for generated code.
2//!
3//! OpenAPI specs use different naming conventions for their types, operations,
4//! and resources. When codegen emits these names, it needs to transform them
5//! into identifiers that conform to the grammar and idiomatic case style of
6//! each target language.
7//!
8//! Codegen segments OpenAPI names into [`NamePart`] segments. A [`UniqueNames`]
9//! scope turns these segment sequences into a representation that's unique
10//! within that scope, and stable regardless of whether it's rendered
11//! [`AsPascalCase`], [`AsSnakeCase`], or [`AsKebabCase`].
12
13use std::{
14    fmt::{Display, Formatter, Result as FmtResult, Write},
15    iter::{self, Peekable},
16    mem,
17};
18
19use icu_normalizer::{ComposingNormalizer, ComposingNormalizerBorrowed};
20use itertools::Itertools;
21use rustc_hash::{FxHashMap, FxHashSet};
22use unicase::UniCase;
23
24use crate::arena::Arena;
25
26static NFC: ComposingNormalizerBorrowed<'_> = ComposingNormalizer::new_nfc();
27
28/// A scope that claims target language names before final case rendering.
29///
30/// [`UniqueNames`] canonicalizes source name parts into word segments,
31/// assigns collision suffixes for already-claimed names, and returns
32/// opaque [`UniqueName`] handles for codegen to render in any case style.
33#[derive(Debug)]
34pub struct UniqueNames<'a> {
35    arena: &'a Arena,
36    space: FxHashMap<Box<[UniCase<&'a str>]>, FxHashSet<SuffixSlot>>,
37}
38
39impl<'a> UniqueNames<'a> {
40    /// Creates an empty name scope.
41    pub fn new(arena: &'a Arena) -> Self {
42        Self {
43            arena,
44            space: FxHashMap::default(),
45        }
46    }
47
48    /// Creates a name scope that reserves existing names.
49    pub fn with_reserved<'part, R>(arena: &'a Arena, reserved: R) -> Self
50    where
51        R: IntoIterator,
52        R::Item: IntoIterator<Item = NamePart<'part>>,
53    {
54        let mut space = FxHashMap::<_, FxHashSet<_>>::default();
55        for parts in reserved {
56            let segments = segments(parts)
57                .map(|WordSegment(text, boundary)| WordSegment(&*arena.alloc_str(&text), boundary))
58                .collect_vec();
59            let decomposed = DecomposedName::new(&segments);
60            space
61                .entry(decomposed.prefix().map(|s| UniCase::new(s.0)).collect())
62                .or_default()
63                .insert(decomposed.slot());
64        }
65        Self { arena, space }
66    }
67
68    /// Claims a segmented source name, and returns a name that's
69    /// unique within this scope.
70    ///
71    /// If the name has already been claimed, the returned name receives
72    /// the next free unique numeric suffix.
73    pub fn claim<'part>(
74        &mut self,
75        parts: impl IntoIterator<Item = NamePart<'part>>,
76    ) -> UniqueName<'a> {
77        let segments = segments(parts)
78            .map(|WordSegment(text, boundary)| WordSegment(&*self.arena.alloc_str(&text), boundary))
79            .collect_vec();
80        UniqueName(self.claim_from_segments(&segments))
81    }
82
83    /// Claims a name that's already unique in another scope, and returns
84    /// a unique form of that name in this scope.
85    pub fn adopt(&mut self, name: UniqueName<'a>) -> UniqueName<'a> {
86        UniqueName(self.claim_from_segments(name.0))
87    }
88
89    fn claim_from_segments(
90        &mut self,
91        segments: &[WordSegment<&'a str>],
92    ) -> &'a [WordSegment<&'a str>] {
93        let decomposed = DecomposedName::new(segments);
94        let occupied = self
95            .space
96            .entry(decomposed.prefix().map(|s| UniCase::new(s.0)).collect())
97            .or_default();
98
99        match decomposed {
100            DecomposedName::Empty { mut slot } => {
101                // An empty or digit-only name becomes a single word
102                // that's just the unique suffix.
103                while !occupied.insert(SuffixSlot::Number(slot)) {
104                    slot = slot.checked_add(1).unwrap();
105                }
106                std::slice::from_ref(self.arena.alloc(WordSegment(
107                    self.arena.alloc_fmt(format_args!("{slot}")),
108                    WordBoundary::First,
109                )))
110            }
111            DecomposedName::Text {
112                suffix: DecomposedSuffix::Source { mut slot, boundary },
113                ..
114            } => {
115                // A name with an existing numeric suffix reuses the
116                // boundary between the last stem and original suffix,
117                // then adds the unique suffix.
118                while !occupied.insert(SuffixSlot::Number(slot)) {
119                    slot = slot.checked_add(1).unwrap();
120                }
121                self.arena
122                    .alloc_slice(decomposed.prefix().chain(iter::once(WordSegment(
123                        self.arena.alloc_fmt(format_args!("{slot}")),
124                        boundary,
125                    ))))
126            }
127            DecomposedName::Text {
128                suffix: DecomposedSuffix::Absent,
129                ..
130            } => {
131                let mut slot = SuffixSlot::Absent;
132                while !occupied.insert(slot) {
133                    slot = match slot {
134                        SuffixSlot::Absent => SuffixSlot::Number(2),
135                        SuffixSlot::Number(slot) => {
136                            SuffixSlot::Number(slot.checked_add(1).unwrap())
137                        }
138                    };
139                }
140                match slot {
141                    // A unique name doesn't need a suffix.
142                    SuffixSlot::Absent => self.arena.alloc_slice(decomposed.prefix()),
143                    // An unsuffixed name adds a separator, then the unique suffix.
144                    SuffixSlot::Number(slot) => {
145                        self.arena
146                            .alloc_slice(decomposed.prefix().chain(iter::once(WordSegment(
147                                self.arena.alloc_fmt(format_args!("{slot}")),
148                                WordBoundary::After(SegmentBoundary::Separator),
149                            ))))
150                    }
151                }
152            }
153        }
154    }
155}
156
157/// A segment of an OpenAPI source name.
158#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
159pub enum NamePart<'a> {
160    /// Text to normalize and split into [`UniqueName`] segments.
161    Text(&'a str),
162    /// An explicit word boundary.
163    Boundary,
164}
165
166/// A name that's unique within a scope, and that can be rendered in any case.
167#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
168pub struct UniqueName<'a>(&'a [WordSegment<&'a str>]);
169
170impl<'a> UniqueName<'a> {
171    /// Returns the first character of this name's segment text.
172    #[inline]
173    pub fn first_char(&self) -> Option<char> {
174        self.0.first().and_then(|s| s.0.chars().next())
175    }
176
177    /// Returns the segments that make up this name.
178    #[inline]
179    fn segments(&self) -> impl Iterator<Item = NameSegment<'a>> {
180        self.0.iter().flat_map(|&WordSegment(text, boundary)| {
181            either!(match boundary {
182                WordBoundary::First => [NameSegment::Text(text)],
183                WordBoundary::After(boundary) =>
184                    [NameSegment::Boundary(boundary), NameSegment::Text(text)],
185            })
186            .into_iter()
187        })
188    }
189}
190
191/// A canonical text or boundary segment in a [`UniqueName`].
192#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
193enum NameSegment<'a> {
194    /// The canonicalized segment text.
195    Text(&'a str),
196    /// A segment boundary.
197    Boundary(SegmentBoundary),
198}
199
200/// Formats a [`UniqueName`] as `PascalCase`.
201///
202/// Each segment starts with an uppercase character and continues in lowercase.
203pub struct AsPascalCase<'a>(pub UniqueName<'a>);
204
205impl Display for AsPascalCase<'_> {
206    fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
207        for segment in self.0.segments() {
208            if let NameSegment::Text(text) = segment {
209                let mut chars = text.chars();
210                if let Some(c) = chars.next() {
211                    write!(f, "{}", c.to_uppercase())?;
212                    chars.try_for_each(|c| write!(f, "{}", c.to_lowercase()))?;
213                }
214            }
215        }
216        Ok(())
217    }
218}
219
220/// Formats a [`UniqueName`] as `snake_case`.
221///
222/// Case and separator boundaries become `_`.
223/// Letter-to-digit and digit-to-letter boundaries collapse to preserve
224/// common names like `sha256`, `http2`, `x509`, and `s3`.
225pub struct AsSnakeCase<'a>(pub UniqueName<'a>);
226
227impl Display for AsSnakeCase<'_> {
228    fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
229        for segment in self.0.segments() {
230            match segment {
231                NameSegment::Boundary(
232                    SegmentBoundary::LetterDigit | SegmentBoundary::DigitLetter,
233                ) => continue,
234                NameSegment::Boundary(_) => f.write_char('_')?,
235                NameSegment::Text(text) => text
236                    .chars()
237                    .try_for_each(|c| write!(f, "{}", c.to_lowercase()))?,
238            }
239        }
240        Ok(())
241    }
242}
243
244/// Formats a name as `kebab-case`.
245///
246/// Case and separator boundaries become `-`.
247/// Letter-to-digit and digit-to-letter boundaries collapse, like
248/// [`AsSnakeCase`].
249pub struct AsKebabCase<'a>(pub UniqueName<'a>);
250
251impl Display for AsKebabCase<'_> {
252    fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
253        for segment in self.0.segments() {
254            match segment {
255                NameSegment::Boundary(
256                    SegmentBoundary::LetterDigit | SegmentBoundary::DigitLetter,
257                ) => continue,
258                NameSegment::Boundary(_) => f.write_char('-')?,
259                NameSegment::Text(text) => text
260                    .chars()
261                    .try_for_each(|c| write!(f, "{}", c.to_lowercase()))?,
262            }
263        }
264        Ok(())
265    }
266}
267
268/// A boundary between word segments.
269#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
270enum SegmentBoundary {
271    /// The segment follows one or more separator parts.
272    Separator,
273    /// The segment follows a case transition.
274    Case,
275    /// The segment follows a letter-to-digit transition.
276    LetterDigit,
277    /// The segment follows a digit-to-letter transition.
278    DigitLetter,
279}
280
281enum DecomposedName<'segments, 'text> {
282    Empty {
283        slot: usize,
284    },
285    Text {
286        init: &'segments [WordSegment<&'text str>],
287        last: Option<WordSegment<&'text str>>,
288        suffix: DecomposedSuffix,
289    },
290}
291
292impl<'segments, 'text> DecomposedName<'segments, 'text> {
293    fn new(segments: &'segments [WordSegment<&'text str>]) -> Self {
294        if segments.is_empty() {
295            return Self::Empty { slot: 1 };
296        }
297        if let Some((&WordSegment(last, boundary), head)) = segments.split_last() {
298            let stem = last.trim_end_matches(|c: char| c.is_ascii_digit());
299            if let Some(slot) = last.strip_prefix(stem)
300                && let Ok(slot) = slot.parse::<usize>()
301            {
302                if stem.is_empty() {
303                    if head.is_empty() {
304                        return Self::Empty { slot: slot.max(1) };
305                    }
306                    return Self::Text {
307                        init: head,
308                        last: None,
309                        suffix: DecomposedSuffix::Source { slot, boundary },
310                    };
311                }
312                let last = match head {
313                    [] => WordSegment(stem, WordBoundary::First),
314                    [..] => WordSegment(stem, WordBoundary::After(SegmentBoundary::Separator)),
315                };
316                return Self::Text {
317                    init: head,
318                    last: Some(last),
319                    suffix: DecomposedSuffix::Source {
320                        slot,
321                        boundary: WordBoundary::After(SegmentBoundary::LetterDigit),
322                    },
323                };
324            }
325        }
326        Self::Text {
327            init: segments,
328            last: None,
329            suffix: DecomposedSuffix::Absent,
330        }
331    }
332
333    fn prefix(&self) -> impl Iterator<Item = WordSegment<&'text str>> {
334        let (init, last): (&'segments [_], Option<_>) = match self {
335            Self::Empty { .. } => (&[], None),
336            &Self::Text { init, last, .. } => (init, last),
337        };
338        init.iter().copied().chain(last)
339    }
340
341    fn slot(&self) -> SuffixSlot {
342        match *self {
343            Self::Empty { slot } => SuffixSlot::Number(slot),
344            Self::Text {
345                suffix: DecomposedSuffix::Absent,
346                ..
347            } => SuffixSlot::Absent,
348            Self::Text {
349                suffix: DecomposedSuffix::Source { slot, .. },
350                ..
351            } => SuffixSlot::Number(slot),
352        }
353    }
354}
355
356#[derive(Clone, Copy)]
357enum DecomposedSuffix {
358    Absent,
359    Source { slot: usize, boundary: WordBoundary },
360}
361
362#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
363enum SuffixSlot {
364    Absent,
365    Number(usize),
366}
367
368/// Segments name parts into words.
369///
370/// Text parts are normalized to NFC before segmentation.
371///
372/// Word boundaries occur on:
373///
374/// * Whitespace, `-`, `_`, and explicit [`NamePart::Boundary`] parts.
375/// * Lowercase-to-uppercase transitions (`httpResponse`).
376/// * Uppercase-to-lowercase after an uppercase run (`XMLHttp`).
377/// * Letter-to-ASCII-digit transitions (`sha256`).
378/// * ASCII digit-to-letter transitions (`250g`).
379fn segments<'a>(
380    input: impl IntoIterator<Item = NamePart<'a>>,
381) -> impl Iterator<Item = WordSegment<String>> {
382    WordSegments {
383        input: input
384            .into_iter()
385            .flat_map(|part| {
386                either!(match part {
387                    NamePart::Text(text) => NFC.normalize_iter(text.chars()).map(NameChar::from),
388                    NamePart::Boundary => iter::once(NameChar::Separator),
389                })
390            })
391            .peekable(),
392        state: WordState::Start,
393    }
394}
395
396#[derive(Clone, Copy, Debug, Eq, PartialEq)]
397enum NameChar {
398    Continue(char),
399    Separator,
400}
401
402impl From<char> for NameChar {
403    fn from(c: char) -> Self {
404        match c {
405            c if c.is_whitespace() => Self::Separator,
406            // Explicitly treat snake_case and kebab-case separators
407            // as word boundaries.
408            '_' | '-' => Self::Separator,
409            c => Self::Continue(c),
410        }
411    }
412}
413
414/// The active or pending word state in a [`WordSegments`].
415#[derive(Clone)]
416enum WordState {
417    /// Before the first word.
418    Start,
419    /// Between words, with the boundary to apply to the next word.
420    Between(SegmentBoundary),
421    /// Inside a word that can be emitted by the next boundary.
422    InWord(String, WordBoundary, WordMode),
423}
424
425/// The character class of the active [`WordState::InWord`] state.
426#[derive(Clone, Copy)]
427enum WordMode {
428    /// Currently in an uncased alphanumeric segment.
429    Uncased,
430    /// Currently in a lowercase segment.
431    Lowercase,
432    /// Currently in an uppercase segment.
433    Uppercase,
434    /// Currently in a digit segment.
435    Digit,
436}
437
438#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
439enum WordBoundary {
440    First,
441    After(SegmentBoundary),
442}
443
444#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
445struct WordSegment<T>(T, WordBoundary);
446
447struct WordSegments<I: Iterator<Item = NameChar>> {
448    input: Peekable<I>,
449    state: WordState,
450}
451
452impl<I: Iterator<Item = NameChar>> Iterator for WordSegments<I> {
453    type Item = WordSegment<String>;
454
455    fn next(&mut self) -> Option<Self::Item> {
456        while let Some(c) = self.input.next() {
457            match c {
458                NameChar::Separator => {
459                    // Start a new word at this separator character.
460                    match mem::replace(
461                        &mut self.state,
462                        WordState::Between(SegmentBoundary::Separator),
463                    ) {
464                        WordState::InWord(text, boundary, _) => {
465                            while let Some(NameChar::Separator) = self.input.peek() {
466                                self.input.next();
467                            }
468                            self.state = WordState::Between(SegmentBoundary::Separator);
469                            return Some(WordSegment(text, boundary));
470                        }
471                        state => {
472                            self.state = state;
473                        }
474                    }
475                }
476                NameChar::Continue(c) if c.is_uppercase() => {
477                    match mem::replace(
478                        &mut self.state,
479                        WordState::Between(SegmentBoundary::Separator),
480                    ) {
481                        WordState::Start => {
482                            self.state = WordState::InWord(
483                                c.to_string(),
484                                WordBoundary::First,
485                                WordMode::Uppercase,
486                            );
487                        }
488                        WordState::Between(next_boundary) => {
489                            self.state = WordState::InWord(
490                                c.to_string(),
491                                WordBoundary::After(next_boundary),
492                                WordMode::Uppercase,
493                            );
494                        }
495                        WordState::InWord(
496                            mut text,
497                            boundary,
498                            WordMode::Uncased | WordMode::Uppercase,
499                        ) => {
500                            let next_is_lowercase = self.input.peek().is_some_and(|next| {
501                                matches!(next, NameChar::Continue(next) if next.is_lowercase())
502                            });
503                            if next_is_lowercase {
504                                // `XMLHttp` case; start a new word with this uppercase
505                                // character (the "H" in "Http").
506                                self.state = WordState::InWord(
507                                    c.to_string(),
508                                    WordBoundary::After(SegmentBoundary::Case),
509                                    WordMode::Uppercase,
510                                );
511                                return Some(WordSegment(text, boundary));
512                            }
513                            text.push(c);
514                            self.state = WordState::InWord(text, boundary, WordMode::Uppercase);
515                        }
516                        WordState::InWord(text, boundary, WordMode::Digit) => {
517                            let next_is_lowercase = self.input.peek().is_some_and(|next| {
518                                matches!(next, NameChar::Continue(next) if next.is_lowercase())
519                            });
520                            self.state = WordState::InWord(
521                                c.to_string(),
522                                WordBoundary::After(if next_is_lowercase {
523                                    SegmentBoundary::Case
524                                } else {
525                                    SegmentBoundary::DigitLetter
526                                }),
527                                WordMode::Uppercase,
528                            );
529                            return Some(WordSegment(text, boundary));
530                        }
531                        WordState::InWord(text, boundary, WordMode::Lowercase) => {
532                            // Start a new word at the uppercase side of a case boundary.
533                            self.state = WordState::InWord(
534                                c.to_string(),
535                                WordBoundary::After(SegmentBoundary::Case),
536                                WordMode::Uppercase,
537                            );
538                            return Some(WordSegment(text, boundary));
539                        }
540                    }
541                }
542                NameChar::Continue(c) if c.is_lowercase() => {
543                    match mem::replace(
544                        &mut self.state,
545                        WordState::Between(SegmentBoundary::Separator),
546                    ) {
547                        WordState::Start => {
548                            self.state = WordState::InWord(
549                                c.to_string(),
550                                WordBoundary::First,
551                                WordMode::Lowercase,
552                            );
553                        }
554                        WordState::Between(next_boundary) => {
555                            self.state = WordState::InWord(
556                                c.to_string(),
557                                WordBoundary::After(next_boundary),
558                                WordMode::Lowercase,
559                            );
560                        }
561                        WordState::InWord(
562                            mut text,
563                            boundary,
564                            WordMode::Uncased | WordMode::Lowercase | WordMode::Uppercase,
565                        ) => {
566                            text.push(c);
567                            self.state = WordState::InWord(text, boundary, WordMode::Lowercase);
568                        }
569                        WordState::InWord(text, boundary, WordMode::Digit) => {
570                            // Start a new word after a digit segment.
571                            self.state = WordState::InWord(
572                                c.to_string(),
573                                WordBoundary::After(SegmentBoundary::DigitLetter),
574                                WordMode::Lowercase,
575                            );
576                            return Some(WordSegment(text, boundary));
577                        }
578                    }
579                }
580                NameChar::Continue(c) if c.is_ascii_digit() => {
581                    match mem::replace(
582                        &mut self.state,
583                        WordState::Between(SegmentBoundary::Separator),
584                    ) {
585                        WordState::Start => {
586                            self.state = WordState::InWord(
587                                c.to_string(),
588                                WordBoundary::First,
589                                WordMode::Digit,
590                            );
591                        }
592                        WordState::Between(next_boundary) => {
593                            self.state = WordState::InWord(
594                                c.to_string(),
595                                WordBoundary::After(next_boundary),
596                                WordMode::Digit,
597                            );
598                        }
599                        WordState::InWord(mut text, boundary, WordMode::Digit) => {
600                            text.push(c);
601                            self.state = WordState::InWord(text, boundary, WordMode::Digit);
602                        }
603                        WordState::InWord(
604                            text,
605                            boundary,
606                            WordMode::Uncased | WordMode::Lowercase | WordMode::Uppercase,
607                        ) => {
608                            // Start a new word after a letter segment.
609                            self.state = WordState::InWord(
610                                c.to_string(),
611                                WordBoundary::After(SegmentBoundary::LetterDigit),
612                                WordMode::Digit,
613                            );
614                            return Some(WordSegment(text, boundary));
615                        }
616                    }
617                }
618                NameChar::Continue(c) => {
619                    // All other characters continue the current word.
620                    match mem::replace(
621                        &mut self.state,
622                        WordState::Between(SegmentBoundary::Separator),
623                    ) {
624                        WordState::Start => {
625                            self.state = WordState::InWord(
626                                c.to_string(),
627                                WordBoundary::First,
628                                WordMode::Uncased,
629                            );
630                        }
631                        WordState::Between(next_boundary) => {
632                            self.state = WordState::InWord(
633                                c.to_string(),
634                                WordBoundary::After(next_boundary),
635                                WordMode::Uncased,
636                            );
637                        }
638                        WordState::InWord(mut text, boundary, mode) => {
639                            text.push(c);
640                            self.state = WordState::InWord(text, boundary, mode);
641                        }
642                    }
643                }
644            }
645        }
646        if let WordState::InWord(text, boundary, _) = mem::replace(
647            &mut self.state,
648            WordState::Between(SegmentBoundary::Separator),
649        ) {
650            // Trailing word.
651            self.state = WordState::Between(SegmentBoundary::Separator);
652            return Some(WordSegment(text, boundary));
653        }
654        None
655    }
656}
657
658#[cfg(test)]
659mod tests {
660    use super::*;
661    use NamePart::{Boundary, Text};
662
663    use itertools::Itertools;
664
665    fn segments(parts: &[NamePart<'_>]) -> Vec<String> {
666        super::segments(parts.iter().copied())
667            .map(|WordSegment(text, _)| text)
668            .collect_vec()
669    }
670
671    #[test]
672    fn test_segment_camel_case() {
673        assert_eq!(segments(&[Text("camelCase")]), vec!["camel", "Case"]);
674        assert_eq!(segments(&[Text("httpResponse")]), vec!["http", "Response"]);
675    }
676
677    #[test]
678    fn test_segment_pascal_case() {
679        assert_eq!(segments(&[Text("PascalCase")]), vec!["Pascal", "Case"]);
680        assert_eq!(segments(&[Text("HttpResponse")]), vec!["Http", "Response"]);
681    }
682
683    #[test]
684    fn test_segment_snake_case() {
685        assert_eq!(
686            segments(&[Text("snake"), Boundary, Text("case")]),
687            vec!["snake", "case"]
688        );
689        assert_eq!(
690            segments(&[Text("http"), Boundary, Text("response")]),
691            vec!["http", "response"]
692        );
693    }
694
695    #[test]
696    fn test_segment_screaming_snake() {
697        assert_eq!(
698            segments(&[Text("SCREAMING"), Boundary, Text("SNAKE")]),
699            vec!["SCREAMING", "SNAKE"]
700        );
701        assert_eq!(
702            segments(&[Text("HTTP"), Boundary, Text("RESPONSE")]),
703            vec!["HTTP", "RESPONSE"]
704        );
705    }
706
707    #[test]
708    fn test_segment_consecutive_uppercase() {
709        assert_eq!(
710            segments(&[Text("XMLHttpRequest")]),
711            vec!["XML", "Http", "Request"]
712        );
713        assert_eq!(segments(&[Text("HTTPResponse")]), vec!["HTTP", "Response"]);
714        assert_eq!(
715            segments(&[Text("HTTP"), Boundary, Text("Response")]),
716            vec!["HTTP", "Response"]
717        );
718        assert_eq!(segments(&[Text("ALLCAPS")]), vec!["ALLCAPS"]);
719    }
720
721    #[test]
722    fn test_segment_unicode_case_boundaries() {
723        assert_eq!(segments(&[Text("\u{e9}clair")]), vec!["\u{e9}clair"]);
724        assert_eq!(segments(&[Text("\u{c9}clair")]), vec!["\u{c9}clair"]);
725        assert_eq!(
726            segments(&[Text("XML\u{c9}clair")]),
727            vec!["XML", "\u{c9}clair"]
728        );
729        assert_eq!(
730            segments(&[Text("CAF\u{c9}Token")]),
731            vec!["CAF\u{c9}", "Token"]
732        );
733        assert_eq!(segments(&[Text("\u{e9}Tag")]), vec!["\u{e9}", "Tag"]);
734        assert_eq!(segments(&[Text("\u{c9}Token")]), vec!["\u{c9}", "Token"]);
735        assert_eq!(segments(&[Text("\u{e9}HTTP")]), vec!["\u{e9}", "HTTP"]);
736        assert_eq!(
737            segments(&[Text("foo"), Boundary, Text("bar")]),
738            vec!["foo", "bar"]
739        );
740        assert_eq!(
741            segments(&[Text("foo"), Boundary, Boundary, Text("bar")]),
742            vec!["foo", "bar"]
743        );
744        assert_eq!(segments(&[Boundary, Text("foo"), Boundary]), vec!["foo"]);
745        assert_eq!(
746            segments(&[Text("foo"), Boundary, Text("2")]),
747            vec!["foo", "2"]
748        );
749    }
750
751    #[test]
752    fn test_segment_with_numbers() {
753        assert_eq!(segments(&[Text("Response2")]), vec!["Response", "2"]);
754        assert_eq!(
755            segments(&[Text("response"), Boundary, Text("2")]),
756            vec!["response", "2"]
757        );
758        assert_eq!(
759            segments(&[Text("HTTP2Protocol")]),
760            vec!["HTTP", "2", "Protocol"]
761        );
762        assert_eq!(
763            segments(&[Text("OAuth2Token")]),
764            vec!["O", "Auth", "2", "Token"]
765        );
766        assert_eq!(segments(&[Text("HTTP2XML")]), vec!["HTTP", "2", "XML"]);
767        assert_eq!(
768            segments(&[Text("1099KStatus")]),
769            vec!["1099", "K", "Status"]
770        );
771        assert_eq!(segments(&[Text("123abc")]), vec!["123", "abc"]);
772        assert_eq!(segments(&[Text("123ABC")]), vec!["123", "ABC"]);
773        assert_eq!(
774            segments(&[Text("Sha2"), Boundary, Text("56Digest")]),
775            vec!["Sha", "2", "56", "Digest"]
776        );
777    }
778
779    #[test]
780    fn test_segment_empty_and_special() {
781        assert!(segments(&[]).is_empty());
782        assert!(segments(&[Boundary, Boundary, Boundary]).is_empty());
783        assert_eq!(segments(&[Text("a")]), vec!["a"]);
784        assert_eq!(segments(&[Text("A")]), vec!["A"]);
785    }
786
787    #[test]
788    fn test_segment_mixed_separators() {
789        assert_eq!(
790            segments(&[Text("foo"), Boundary, Text("bar"), Boundary, Text("baz"),]),
791            vec!["foo", "bar", "baz"]
792        );
793        assert_eq!(
794            segments(&[Text("foo"), Boundary, Boundary, Text("bar")]),
795            vec!["foo", "bar"]
796        );
797    }
798
799    #[test]
800    fn test_segment_boundaries() {
801        let arena = Arena::new();
802        let mut names = UniqueNames::new(&arena);
803
804        let name = names.claim([Text("fooBar2"), Boundary, Text("baz3Qux")]);
805        assert_eq!(
806            name.segments().collect_vec(),
807            [
808                NameSegment::Text("foo"),
809                NameSegment::Boundary(SegmentBoundary::Case),
810                NameSegment::Text("Bar"),
811                NameSegment::Boundary(SegmentBoundary::LetterDigit),
812                NameSegment::Text("2"),
813                NameSegment::Boundary(SegmentBoundary::Separator),
814                NameSegment::Text("baz"),
815                NameSegment::Boundary(SegmentBoundary::LetterDigit),
816                NameSegment::Text("3"),
817                NameSegment::Boundary(SegmentBoundary::Case),
818                NameSegment::Text("Qux"),
819            ]
820        );
821
822        let name = names.claim([Text("foo"), Boundary, Text("2Bar")]);
823        assert_eq!(
824            name.segments().collect_vec(),
825            [
826                NameSegment::Text("foo"),
827                NameSegment::Boundary(SegmentBoundary::Separator),
828                NameSegment::Text("2"),
829                NameSegment::Boundary(SegmentBoundary::Case),
830                NameSegment::Text("Bar"),
831            ]
832        );
833
834        let arena = Arena::new();
835        let mut names = UniqueNames::new(&arena);
836        let name = names.claim([Text("foo2bar")]);
837        assert_eq!(
838            name.segments().collect_vec(),
839            [
840                NameSegment::Text("foo"),
841                NameSegment::Boundary(SegmentBoundary::LetterDigit),
842                NameSegment::Text("2"),
843                NameSegment::Boundary(SegmentBoundary::DigitLetter),
844                NameSegment::Text("bar"),
845            ]
846        );
847
848        let name = names.claim([Text("Vector3D")]);
849        assert_eq!(
850            name.segments().collect_vec(),
851            [
852                NameSegment::Text("Vector"),
853                NameSegment::Boundary(SegmentBoundary::LetterDigit),
854                NameSegment::Text("3"),
855                NameSegment::Boundary(SegmentBoundary::DigitLetter),
856                NameSegment::Text("D"),
857            ]
858        );
859
860        let name = names.claim([Text("50GBPerSecond")]);
861        assert_eq!(
862            name.segments().collect_vec(),
863            [
864                NameSegment::Text("50"),
865                NameSegment::Boundary(SegmentBoundary::DigitLetter),
866                NameSegment::Text("GB"),
867                NameSegment::Boundary(SegmentBoundary::Case),
868                NameSegment::Text("Per"),
869                NameSegment::Boundary(SegmentBoundary::Case),
870                NameSegment::Text("Second"),
871            ]
872        );
873    }
874
875    #[test]
876    fn test_deduplication_http_response_collision() {
877        let arena = Arena::new();
878        let mut names = UniqueNames::new(&arena);
879
880        assert_eq!(
881            AsPascalCase(names.claim([Text("HTTPResponse")])).to_string(),
882            "HttpResponse"
883        );
884        assert_eq!(
885            AsPascalCase(names.claim([Text("HTTP"), Boundary, Text("Response"),])).to_string(),
886            "HttpResponse2"
887        );
888        assert_eq!(
889            AsPascalCase(names.claim([Text("httpResponse")])).to_string(),
890            "HttpResponse3"
891        );
892        assert_eq!(
893            AsPascalCase(names.claim([Text("http"), Boundary, Text("response"),])).to_string(),
894            "HttpResponse4"
895        );
896        // `HTTPRESPONSE` isn't a collision; it's a single word.
897        assert_eq!(
898            AsPascalCase(names.claim([Text("HTTPRESPONSE")])).to_string(),
899            "Httpresponse"
900        );
901    }
902
903    #[test]
904    fn test_deduplication_xml_http_request() {
905        let arena = Arena::new();
906        let mut names = UniqueNames::new(&arena);
907
908        assert_eq!(
909            AsSnakeCase(names.claim([Text("XMLHttpRequest")])).to_string(),
910            "xml_http_request"
911        );
912        assert_eq!(
913            AsSnakeCase(names.claim([
914                Text("xml"),
915                Boundary,
916                Text("http"),
917                Boundary,
918                Text("request"),
919            ]))
920            .to_string(),
921            "xml_http_request_2"
922        );
923        assert_eq!(
924            AsSnakeCase(names.claim([Text("XmlHttpRequest")])).to_string(),
925            "xml_http_request_3"
926        );
927    }
928
929    #[test]
930    fn test_deduplication_separator_parts() {
931        let arena = Arena::new();
932        let mut names = UniqueNames::new(&arena);
933
934        assert_eq!(
935            AsSnakeCase(names.claim([Text("foo"), Boundary, Text("bar")])).to_string(),
936            "foo_bar",
937        );
938        assert_eq!(
939            AsSnakeCase(names.claim([Text("foo"), Boundary, Text("bar")])).to_string(),
940            "foo_bar_2"
941        );
942        assert_eq!(
943            AsSnakeCase(names.claim([Text("foo"), Boundary, Boundary, Boundary, Text("bar"),]))
944                .to_string(),
945            "foo_bar_3"
946        );
947    }
948
949    #[test]
950    fn test_deduplication_preserves_first_slot() {
951        let arena = Arena::new();
952        let mut names = UniqueNames::new(&arena);
953
954        assert_eq!(
955            AsPascalCase(names.claim([Text("HTTP"), Boundary, Text("Response"),])).to_string(),
956            "HttpResponse"
957        );
958        assert_eq!(
959            AsPascalCase(names.claim([Text("httpResponse")])).to_string(),
960            "HttpResponse2"
961        );
962    }
963
964    #[test]
965    fn test_deduplication_same_prefix() {
966        let arena = Arena::new();
967        let mut names = UniqueNames::new(&arena);
968
969        assert_eq!(
970            AsPascalCase(names.claim([Text("HttpRequest")])).to_string(),
971            "HttpRequest"
972        );
973        assert_eq!(
974            AsPascalCase(names.claim([Text("HttpResponse")])).to_string(),
975            "HttpResponse"
976        );
977        assert_eq!(
978            AsPascalCase(names.claim([Text("HttpError")])).to_string(),
979            "HttpError"
980        );
981    }
982
983    #[test]
984    fn test_deduplication_with_numbers() {
985        let arena = Arena::new();
986        let mut names = UniqueNames::new(&arena);
987
988        assert_eq!(
989            AsSnakeCase(names.claim([Text("Response2")])).to_string(),
990            "response2"
991        );
992        assert_eq!(
993            AsSnakeCase(names.claim([Text("response"), Boundary, Text("2"),])).to_string(),
994            "response_3"
995        );
996
997        assert_eq!(
998            AsSnakeCase(names.claim([Text("Response0")])).to_string(),
999            "response0"
1000        );
1001        assert_eq!(
1002            AsSnakeCase(names.claim([Text("response")])).to_string(),
1003            "response"
1004        );
1005
1006        // Internal digit boundaries collapse in PascalCase.
1007        assert_eq!(
1008            AsPascalCase(names.claim([Text("Http2Protocol")])).to_string(),
1009            "Http2Protocol"
1010        );
1011        assert_eq!(
1012            AsPascalCase(names.claim([Text("Http"), Boundary, Text("2Protocol"),])).to_string(),
1013            "Http2Protocol2"
1014        );
1015        assert_eq!(
1016            AsSnakeCase(names.claim([Text("Sha2"), Boundary, Text("56Digest"),])).to_string(),
1017            "sha2_56_digest"
1018        );
1019        assert_eq!(
1020            AsSnakeCase(names.claim([Text("Sha256Digest")])).to_string(),
1021            "sha256_digest"
1022        );
1023        assert_eq!(
1024            AsSnakeCase(names.claim([Text("Vector3D")])).to_string(),
1025            "vector3d"
1026        );
1027        assert_eq!(
1028            AsSnakeCase(names.claim([Text("50GBPerSecond")])).to_string(),
1029            "50gb_per_second"
1030        );
1031        assert_eq!(
1032            AsSnakeCase(names.claim([Text("Caf\u{e9}2")])).to_string(),
1033            "caf\u{e9}2"
1034        );
1035
1036        // Digit-to-uppercase collisions.
1037        assert_eq!(
1038            AsPascalCase(names.claim([Text("1099KStatus")])).to_string(),
1039            "1099KStatus"
1040        );
1041        assert_eq!(
1042            AsPascalCase(names.claim([Text("1099K"), Boundary, Text("Status"),])).to_string(),
1043            "1099KStatus2"
1044        );
1045        assert_eq!(
1046            AsPascalCase(names.claim([Text("1099KStatus")])).to_string(),
1047            "1099KStatus3"
1048        );
1049        assert_eq!(
1050            AsPascalCase(names.claim([
1051                Text("1099"),
1052                Boundary,
1053                Text("K"),
1054                Boundary,
1055                Text("Status"),
1056            ]))
1057            .to_string(),
1058            "1099KStatus4"
1059        );
1060
1061        // Digit-to-lowercase collisions.
1062        assert_eq!(
1063            AsSnakeCase(names.claim([Text("123abc")])).to_string(),
1064            "123abc"
1065        );
1066        assert_eq!(
1067            AsSnakeCase(names.claim([Text("123"), Boundary, Text("abc"),])).to_string(),
1068            "123_abc_2"
1069        );
1070    }
1071
1072    #[test]
1073    fn test_deduplication_numeric_suffixes() {
1074        let arena = Arena::new();
1075        let mut names = UniqueNames::new(&arena);
1076
1077        assert_eq!(
1078            AsSnakeCase(names.claim([Text("OAuth2")])).to_string(),
1079            "o_auth2"
1080        );
1081        assert_eq!(
1082            AsSnakeCase(names.claim([Text("OAuth"), Boundary, Text("2")])).to_string(),
1083            "o_auth_3"
1084        );
1085        assert_eq!(
1086            AsSnakeCase(names.claim([Text("OAuth")])).to_string(),
1087            "o_auth"
1088        );
1089        assert_eq!(
1090            AsSnakeCase(names.claim([Text("OAuth0")])).to_string(),
1091            "o_auth0"
1092        );
1093    }
1094
1095    #[test]
1096    fn test_deduplication_numeric_suffix_preserves_source_boundary() {
1097        let arena = Arena::new();
1098        let mut names = UniqueNames::new(&arena);
1099        assert_eq!(
1100            names
1101                .claim([NamePart::Text("Response2")])
1102                .segments()
1103                .collect_vec(),
1104            &[
1105                NameSegment::Text("Response"),
1106                NameSegment::Boundary(SegmentBoundary::LetterDigit),
1107                NameSegment::Text("2"),
1108            ]
1109        );
1110
1111        let arena = Arena::new();
1112        let mut names = UniqueNames::new(&arena);
1113        assert_eq!(
1114            names
1115                .claim([NamePart::Text("Response0")])
1116                .segments()
1117                .collect_vec(),
1118            &[
1119                NameSegment::Text("Response"),
1120                NameSegment::Boundary(SegmentBoundary::LetterDigit),
1121                NameSegment::Text("0"),
1122            ]
1123        );
1124    }
1125
1126    #[test]
1127    fn test_deduplication_numeric_suffix_slots() {
1128        let arena = Arena::new();
1129        let mut names = UniqueNames::new(&arena);
1130
1131        assert_eq!(AsSnakeCase(names.claim([Text("v2")])).to_string(), "v2");
1132        assert_eq!(
1133            AsSnakeCase(names.claim([Text("v"), Boundary, Text("2")])).to_string(),
1134            "v_3"
1135        );
1136        assert_eq!(AsSnakeCase(names.claim([Text("v")])).to_string(), "v");
1137        assert_eq!(AsSnakeCase(names.claim([Text("v")])).to_string(), "v_4");
1138
1139        let arena = Arena::new();
1140        let mut names = UniqueNames::new(&arena);
1141
1142        assert_eq!(
1143            AsKebabCase(names.claim([Text("response")])).to_string(),
1144            "response"
1145        );
1146        assert_eq!(
1147            AsKebabCase(names.claim([Text("response")])).to_string(),
1148            "response-2"
1149        );
1150        assert_eq!(
1151            AsKebabCase(names.claim([Text("response2")])).to_string(),
1152            "response3"
1153        );
1154        assert_eq!(
1155            AsKebabCase(names.claim([Text("response")])).to_string(),
1156            "response-4"
1157        );
1158    }
1159
1160    #[test]
1161    fn test_deduplication_source_zero_suffix_uses_own_slot() {
1162        let arena = Arena::new();
1163        let mut names = UniqueNames::new(&arena);
1164
1165        assert_eq!(
1166            AsSnakeCase(names.claim([Text("Response")])).to_string(),
1167            "response"
1168        );
1169        assert_eq!(
1170            AsSnakeCase(names.claim([Text("Response0")])).to_string(),
1171            "response0"
1172        );
1173        assert_eq!(
1174            AsSnakeCase(names.claim([Text("Response")])).to_string(),
1175            "response_2"
1176        );
1177
1178        let arena = Arena::new();
1179        let mut names = UniqueNames::new(&arena);
1180
1181        assert_eq!(
1182            AsSnakeCase(names.claim([Text("Response0")])).to_string(),
1183            "response0"
1184        );
1185        assert_eq!(
1186            AsSnakeCase(names.claim([Text("Response")])).to_string(),
1187            "response"
1188        );
1189        assert_eq!(
1190            AsSnakeCase(names.claim([Text("Response0")])).to_string(),
1191            "response1"
1192        );
1193    }
1194
1195    #[test]
1196    fn test_deduplication_unicode_case_family() {
1197        let arena = Arena::new();
1198        let mut names = UniqueNames::new(&arena);
1199
1200        assert_eq!(AsSnakeCase(names.claim([Text("ß")])).to_string(), "ß");
1201        assert_eq!(AsSnakeCase(names.claim([Text("SS")])).to_string(), "ss_2");
1202        assert_eq!(AsSnakeCase(names.claim([Text("ss")])).to_string(), "ss_3");
1203        assert_eq!(
1204            AsSnakeCase(names.claim([Text("İ")])).to_string(),
1205            "i\u{307}"
1206        );
1207        assert_eq!(
1208            AsSnakeCase(names.claim([Text("i\u{307}")])).to_string(),
1209            "i\u{307}_2"
1210        );
1211    }
1212
1213    #[test]
1214    fn test_deduplication_normalizes_unicode_to_nfc() {
1215        let arena = Arena::new();
1216        let mut names = UniqueNames::new(&arena);
1217
1218        assert_eq!(
1219            AsSnakeCase(names.claim([Text("cafe\u{301}")])).to_string(),
1220            "caf\u{e9}"
1221        );
1222        assert_eq!(
1223            AsSnakeCase(names.claim([Text("caf\u{e9}")])).to_string(),
1224            "caf\u{e9}_2"
1225        );
1226    }
1227
1228    #[test]
1229    fn test_deduplication_empty_names_start_at_one() {
1230        let arena = Arena::new();
1231        let mut names = UniqueNames::new(&arena);
1232
1233        assert_eq!(AsSnakeCase(names.claim([])).to_string(), "1");
1234        assert_eq!(AsSnakeCase(names.claim([Boundary])).to_string(), "2");
1235        assert_eq!(
1236            AsSnakeCase(names.claim([Boundary, Boundary, Boundary])).to_string(),
1237            "3"
1238        );
1239    }
1240
1241    #[test]
1242    fn test_deduplication_numeric_names_share_empty_stem() {
1243        let arena = Arena::new();
1244        let mut names = UniqueNames::new(&arena);
1245
1246        assert_eq!(AsSnakeCase(names.claim([Text("2")])).to_string(), "2");
1247        assert_eq!(AsSnakeCase(names.claim([])).to_string(), "1");
1248        assert_eq!(AsSnakeCase(names.claim([Text("2")])).to_string(), "3");
1249
1250        let arena = Arena::new();
1251        let mut names = UniqueNames::new(&arena);
1252
1253        assert_eq!(AsSnakeCase(names.claim([Text("0")])).to_string(), "1");
1254        assert_eq!(AsSnakeCase(names.claim([])).to_string(), "2");
1255    }
1256
1257    #[test]
1258    fn test_reserved_digit_only_names_share_empty_stem_sequence() {
1259        let arena = Arena::new();
1260        let mut names = UniqueNames::with_reserved(&arena, [[Text("0")]]);
1261
1262        assert_eq!(AsSnakeCase(names.claim([Text("0")])).to_string(), "2");
1263        assert_eq!(AsSnakeCase(names.claim([])).to_string(), "3");
1264    }
1265
1266    #[test]
1267    fn test_reserved_boundary_only_shares_empty_stem_sequence() {
1268        let arena = Arena::new();
1269        let mut names = UniqueNames::with_reserved(&arena, [[Boundary]]);
1270
1271        assert_eq!(AsSnakeCase(names.claim([Boundary])).to_string(), "2");
1272        assert_eq!(AsSnakeCase(names.claim([Boundary])).to_string(), "3");
1273    }
1274
1275    #[test]
1276    fn test_reserved_multiple() {
1277        let arena = Arena::new();
1278        let mut names = UniqueNames::with_reserved(&arena, [[Boundary], [Text("reserved")]]);
1279
1280        assert_eq!(AsSnakeCase(names.claim([Boundary])).to_string(), "2");
1281        assert_eq!(
1282            AsSnakeCase(names.claim([Text("reserved")])).to_string(),
1283            "reserved_2"
1284        );
1285        assert_eq!(
1286            AsSnakeCase(names.claim([Text("other")])).to_string(),
1287            "other"
1288        );
1289    }
1290
1291    #[test]
1292    fn test_reserved_numeric_suffixes() {
1293        let arena = Arena::new();
1294        let mut names = UniqueNames::with_reserved(&arena, [[Text("crate")]]);
1295
1296        assert_eq!(
1297            AsSnakeCase(names.claim([Text("crate")])).to_string(),
1298            "crate_2"
1299        );
1300        assert_eq!(
1301            AsSnakeCase(names.claim([Text("crate2")])).to_string(),
1302            "crate3"
1303        );
1304
1305        let arena = Arena::new();
1306        let mut names = UniqueNames::with_reserved(&arena, [[Text("Response0")]]);
1307
1308        assert_eq!(
1309            AsSnakeCase(names.claim([Text("Response")])).to_string(),
1310            "response"
1311        );
1312        assert_eq!(
1313            AsSnakeCase(names.claim([Text("Response0")])).to_string(),
1314            "response1"
1315        );
1316    }
1317}