Skip to main content

ploidy_core/codegen/
unique.rs

1//! Naming support for generated code.
2//!
3//! OpenAPI specs use different naming conventions for their types, operations,
4//! and resources. When codegen emits these names, it needs to transform them
5//! into identifiers that conform to the grammar and idiomatic case style of
6//! each target language.
7//!
8//! Codegen segments OpenAPI names into [`NamePart`] segments. A [`UniqueNames`]
9//! scope turns these segment sequences into a representation that's unique
10//! within that scope, and stable regardless of whether it's rendered
11//! [`AsPascalCase`], [`AsSnakeCase`], or [`AsKebabCase`].
12
13use std::{
14    fmt::{Display, Formatter, Result as FmtResult, Write},
15    iter::{self, Peekable},
16    mem,
17};
18
19use itertools::Itertools;
20use rustc_hash::{FxHashMap, FxHashSet};
21use unicase::UniCase;
22use unicode_normalization::UnicodeNormalization;
23
24use crate::arena::Arena;
25
26/// A scope that claims target language names before final case rendering.
27///
28/// [`UniqueNames`] canonicalizes source name parts into word segments,
29/// assigns collision suffixes for already-claimed names, and returns
30/// opaque [`UniqueName`] handles for codegen to render in any case style.
31#[derive(Debug)]
32pub struct UniqueNames<'a> {
33    arena: &'a Arena,
34    space: FxHashMap<Box<[UniCase<&'a str>]>, FxHashSet<SuffixSlot>>,
35}
36
37impl<'a> UniqueNames<'a> {
38    /// Creates an empty name scope.
39    pub fn new(arena: &'a Arena) -> Self {
40        Self {
41            arena,
42            space: FxHashMap::default(),
43        }
44    }
45
46    /// Creates a name scope that reserves existing names.
47    pub fn with_reserved<'part, R>(arena: &'a Arena, reserved: R) -> Self
48    where
49        R: IntoIterator,
50        R::Item: IntoIterator<Item = NamePart<'part>>,
51    {
52        let mut space = FxHashMap::<_, FxHashSet<_>>::default();
53        for parts in reserved {
54            let segments = segments(parts)
55                .map(|WordSegment(text, boundary)| WordSegment(&*arena.alloc_str(&text), boundary))
56                .collect_vec();
57            let decomposed = DecomposedName::new(&segments);
58            space
59                .entry(decomposed.prefix().map(|s| UniCase::new(s.0)).collect())
60                .or_default()
61                .insert(decomposed.slot());
62        }
63        Self { arena, space }
64    }
65
66    /// Claims a segmented source name, and returns a name that's
67    /// unique within this scope.
68    ///
69    /// If the name has already been claimed, the returned name receives
70    /// the next free unique numeric suffix.
71    pub fn claim<'part>(
72        &mut self,
73        parts: impl IntoIterator<Item = NamePart<'part>>,
74    ) -> UniqueName<'a> {
75        let segments = segments(parts)
76            .map(|WordSegment(text, boundary)| WordSegment(&*self.arena.alloc_str(&text), boundary))
77            .collect_vec();
78        UniqueName(self.claim_from_segments(&segments))
79    }
80
81    /// Claims a name that's already unique in another scope, and returns
82    /// a unique form of that name in this scope.
83    pub fn adopt(&mut self, name: UniqueName<'a>) -> UniqueName<'a> {
84        UniqueName(self.claim_from_segments(name.0))
85    }
86
87    fn claim_from_segments(
88        &mut self,
89        segments: &[WordSegment<&'a str>],
90    ) -> &'a [WordSegment<&'a str>] {
91        let decomposed = DecomposedName::new(segments);
92        let occupied = self
93            .space
94            .entry(decomposed.prefix().map(|s| UniCase::new(s.0)).collect())
95            .or_default();
96
97        match decomposed {
98            DecomposedName::Empty { mut slot } => {
99                // An empty or digit-only name becomes a single word
100                // that's just the unique suffix.
101                while !occupied.insert(SuffixSlot::Number(slot)) {
102                    slot = slot.checked_add(1).unwrap();
103                }
104                std::slice::from_ref(self.arena.alloc(WordSegment(
105                    self.arena.alloc_fmt(format_args!("{slot}")),
106                    WordBoundary::First,
107                )))
108            }
109            DecomposedName::Text {
110                suffix: DecomposedSuffix::Source { mut slot, boundary },
111                ..
112            } => {
113                // A name with an existing numeric suffix reuses the
114                // boundary between the last stem and original suffix,
115                // then adds the unique suffix.
116                while !occupied.insert(SuffixSlot::Number(slot)) {
117                    slot = slot.checked_add(1).unwrap();
118                }
119                self.arena
120                    .alloc_slice(decomposed.prefix().chain(iter::once(WordSegment(
121                        self.arena.alloc_fmt(format_args!("{slot}")),
122                        boundary,
123                    ))))
124            }
125            DecomposedName::Text {
126                suffix: DecomposedSuffix::Absent,
127                ..
128            } => {
129                let mut slot = SuffixSlot::Absent;
130                while !occupied.insert(slot) {
131                    slot = match slot {
132                        SuffixSlot::Absent => SuffixSlot::Number(2),
133                        SuffixSlot::Number(slot) => {
134                            SuffixSlot::Number(slot.checked_add(1).unwrap())
135                        }
136                    };
137                }
138                match slot {
139                    // A unique name doesn't need a suffix.
140                    SuffixSlot::Absent => self.arena.alloc_slice(decomposed.prefix()),
141                    // An unsuffixed name adds a separator, then the unique suffix.
142                    SuffixSlot::Number(slot) => {
143                        self.arena
144                            .alloc_slice(decomposed.prefix().chain(iter::once(WordSegment(
145                                self.arena.alloc_fmt(format_args!("{slot}")),
146                                WordBoundary::After(SegmentBoundary::Separator),
147                            ))))
148                    }
149                }
150            }
151        }
152    }
153}
154
155/// A segment of an OpenAPI source name.
156#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
157pub enum NamePart<'a> {
158    /// Text to normalize and split into [`UniqueName`] segments.
159    Text(&'a str),
160    /// An explicit word boundary.
161    Boundary,
162}
163
164/// A name that's unique within a scope, and that can be rendered in any case.
165#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
166pub struct UniqueName<'a>(&'a [WordSegment<&'a str>]);
167
168impl<'a> UniqueName<'a> {
169    /// Returns the first character of this name's segment text.
170    #[inline]
171    pub fn first_char(&self) -> Option<char> {
172        self.0.first().and_then(|s| s.0.chars().next())
173    }
174
175    /// Returns the segments that make up this name.
176    #[inline]
177    fn segments(&self) -> impl Iterator<Item = NameSegment<'a>> {
178        self.0.iter().flat_map(|&WordSegment(text, boundary)| {
179            either!(match boundary {
180                WordBoundary::First => [NameSegment::Text(text)],
181                WordBoundary::After(boundary) =>
182                    [NameSegment::Boundary(boundary), NameSegment::Text(text)],
183            })
184            .into_iter()
185        })
186    }
187}
188
189/// A canonical text or boundary segment in a [`UniqueName`].
190#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
191enum NameSegment<'a> {
192    /// The canonicalized segment text.
193    Text(&'a str),
194    /// A segment boundary.
195    Boundary(SegmentBoundary),
196}
197
198/// Formats a [`UniqueName`] as `PascalCase`.
199///
200/// Each segment starts with an uppercase character and continues in lowercase.
201pub struct AsPascalCase<'a>(pub UniqueName<'a>);
202
203impl Display for AsPascalCase<'_> {
204    fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
205        for segment in self.0.segments() {
206            if let NameSegment::Text(text) = segment {
207                let mut chars = text.chars();
208                if let Some(c) = chars.next() {
209                    write!(f, "{}", c.to_uppercase())?;
210                    chars.try_for_each(|c| write!(f, "{}", c.to_lowercase()))?;
211                }
212            }
213        }
214        Ok(())
215    }
216}
217
218/// Formats a [`UniqueName`] as `snake_case`.
219///
220/// Case and separator boundaries become `_`.
221/// Letter-to-digit and digit-to-letter boundaries collapse to preserve
222/// common names like `sha256`, `http2`, `x509`, and `s3`.
223pub struct AsSnakeCase<'a>(pub UniqueName<'a>);
224
225impl Display for AsSnakeCase<'_> {
226    fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
227        for segment in self.0.segments() {
228            match segment {
229                NameSegment::Boundary(
230                    SegmentBoundary::LetterDigit | SegmentBoundary::DigitLetter,
231                ) => continue,
232                NameSegment::Boundary(_) => f.write_char('_')?,
233                NameSegment::Text(text) => text
234                    .chars()
235                    .try_for_each(|c| write!(f, "{}", c.to_lowercase()))?,
236            }
237        }
238        Ok(())
239    }
240}
241
242/// Formats a name as `kebab-case`.
243///
244/// Case and separator boundaries become `-`.
245/// Letter-to-digit and digit-to-letter boundaries collapse, like
246/// [`AsSnakeCase`].
247pub struct AsKebabCase<'a>(pub UniqueName<'a>);
248
249impl Display for AsKebabCase<'_> {
250    fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
251        for segment in self.0.segments() {
252            match segment {
253                NameSegment::Boundary(
254                    SegmentBoundary::LetterDigit | SegmentBoundary::DigitLetter,
255                ) => continue,
256                NameSegment::Boundary(_) => f.write_char('-')?,
257                NameSegment::Text(text) => text
258                    .chars()
259                    .try_for_each(|c| write!(f, "{}", c.to_lowercase()))?,
260            }
261        }
262        Ok(())
263    }
264}
265
266/// A boundary between word segments.
267#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
268enum SegmentBoundary {
269    /// The segment follows one or more separator parts.
270    Separator,
271    /// The segment follows a case transition.
272    Case,
273    /// The segment follows a letter-to-digit transition.
274    LetterDigit,
275    /// The segment follows a digit-to-letter transition.
276    DigitLetter,
277}
278
279enum DecomposedName<'segments, 'text> {
280    Empty {
281        slot: usize,
282    },
283    Text {
284        init: &'segments [WordSegment<&'text str>],
285        last: Option<WordSegment<&'text str>>,
286        suffix: DecomposedSuffix,
287    },
288}
289
290impl<'segments, 'text> DecomposedName<'segments, 'text> {
291    fn new(segments: &'segments [WordSegment<&'text str>]) -> Self {
292        if segments.is_empty() {
293            return Self::Empty { slot: 1 };
294        }
295        if let Some((&WordSegment(last, boundary), head)) = segments.split_last() {
296            let stem = last.trim_end_matches(|c: char| c.is_ascii_digit());
297            if let Some(slot) = last.strip_prefix(stem)
298                && let Ok(slot) = slot.parse::<usize>()
299            {
300                if stem.is_empty() {
301                    if head.is_empty() {
302                        return Self::Empty { slot: slot.max(1) };
303                    }
304                    return Self::Text {
305                        init: head,
306                        last: None,
307                        suffix: DecomposedSuffix::Source { slot, boundary },
308                    };
309                }
310                let last = match head {
311                    [] => WordSegment(stem, WordBoundary::First),
312                    [..] => WordSegment(stem, WordBoundary::After(SegmentBoundary::Separator)),
313                };
314                return Self::Text {
315                    init: head,
316                    last: Some(last),
317                    suffix: DecomposedSuffix::Source {
318                        slot,
319                        boundary: WordBoundary::After(SegmentBoundary::LetterDigit),
320                    },
321                };
322            }
323        }
324        Self::Text {
325            init: segments,
326            last: None,
327            suffix: DecomposedSuffix::Absent,
328        }
329    }
330
331    fn prefix(&self) -> impl Iterator<Item = WordSegment<&'text str>> {
332        let (init, last): (&'segments [_], Option<_>) = match self {
333            Self::Empty { .. } => (&[], None),
334            &Self::Text { init, last, .. } => (init, last),
335        };
336        init.iter().copied().chain(last)
337    }
338
339    fn slot(&self) -> SuffixSlot {
340        match *self {
341            Self::Empty { slot } => SuffixSlot::Number(slot),
342            Self::Text {
343                suffix: DecomposedSuffix::Absent,
344                ..
345            } => SuffixSlot::Absent,
346            Self::Text {
347                suffix: DecomposedSuffix::Source { slot, .. },
348                ..
349            } => SuffixSlot::Number(slot),
350        }
351    }
352}
353
354#[derive(Clone, Copy)]
355enum DecomposedSuffix {
356    Absent,
357    Source { slot: usize, boundary: WordBoundary },
358}
359
360#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
361enum SuffixSlot {
362    Absent,
363    Number(usize),
364}
365
366/// Segments name parts into words.
367///
368/// Text parts are normalized to NFC before segmentation.
369///
370/// Word boundaries occur on:
371///
372/// * Whitespace, `-`, `_`, and explicit [`NamePart::Boundary`] parts.
373/// * Lowercase-to-uppercase transitions (`httpResponse`).
374/// * Uppercase-to-lowercase after an uppercase run (`XMLHttp`).
375/// * Letter-to-ASCII-digit transitions (`sha256`).
376/// * ASCII digit-to-letter transitions (`250g`).
377fn segments<'a>(
378    input: impl IntoIterator<Item = NamePart<'a>>,
379) -> impl Iterator<Item = WordSegment<String>> {
380    WordSegments {
381        input: input
382            .into_iter()
383            .flat_map(|part| {
384                either!(match part {
385                    NamePart::Text(text) => text.nfc().map(NameChar::from),
386                    NamePart::Boundary => iter::once(NameChar::Separator),
387                })
388            })
389            .peekable(),
390        state: WordState::Start,
391    }
392}
393
394#[derive(Clone, Copy, Debug, Eq, PartialEq)]
395enum NameChar {
396    Continue(char),
397    Separator,
398}
399
400impl From<char> for NameChar {
401    fn from(c: char) -> Self {
402        match c {
403            c if c.is_whitespace() => Self::Separator,
404            // Explicitly treat snake_case and kebab-case separators
405            // as word boundaries.
406            '_' | '-' => Self::Separator,
407            c => Self::Continue(c),
408        }
409    }
410}
411
412/// The active or pending word state in a [`WordSegments`].
413#[derive(Clone)]
414enum WordState {
415    /// Before the first word.
416    Start,
417    /// Between words, with the boundary to apply to the next word.
418    Between(SegmentBoundary),
419    /// Inside a word that can be emitted by the next boundary.
420    InWord(String, WordBoundary, WordMode),
421}
422
423/// The character class of the active [`WordState::InWord`] state.
424#[derive(Clone, Copy)]
425enum WordMode {
426    /// Currently in an uncased alphanumeric segment.
427    Uncased,
428    /// Currently in a lowercase segment.
429    Lowercase,
430    /// Currently in an uppercase segment.
431    Uppercase,
432    /// Currently in a digit segment.
433    Digit,
434}
435
436#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
437enum WordBoundary {
438    First,
439    After(SegmentBoundary),
440}
441
442#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
443struct WordSegment<T>(T, WordBoundary);
444
445struct WordSegments<I: Iterator<Item = NameChar>> {
446    input: Peekable<I>,
447    state: WordState,
448}
449
450impl<I: Iterator<Item = NameChar>> Iterator for WordSegments<I> {
451    type Item = WordSegment<String>;
452
453    fn next(&mut self) -> Option<Self::Item> {
454        while let Some(c) = self.input.next() {
455            match c {
456                NameChar::Separator => {
457                    // Start a new word at this separator character.
458                    match mem::replace(
459                        &mut self.state,
460                        WordState::Between(SegmentBoundary::Separator),
461                    ) {
462                        WordState::InWord(text, boundary, _) => {
463                            while let Some(NameChar::Separator) = self.input.peek() {
464                                self.input.next();
465                            }
466                            self.state = WordState::Between(SegmentBoundary::Separator);
467                            return Some(WordSegment(text, boundary));
468                        }
469                        state => {
470                            self.state = state;
471                        }
472                    }
473                }
474                NameChar::Continue(c) if c.is_uppercase() => {
475                    match mem::replace(
476                        &mut self.state,
477                        WordState::Between(SegmentBoundary::Separator),
478                    ) {
479                        WordState::Start => {
480                            self.state = WordState::InWord(
481                                c.to_string(),
482                                WordBoundary::First,
483                                WordMode::Uppercase,
484                            );
485                        }
486                        WordState::Between(next_boundary) => {
487                            self.state = WordState::InWord(
488                                c.to_string(),
489                                WordBoundary::After(next_boundary),
490                                WordMode::Uppercase,
491                            );
492                        }
493                        WordState::InWord(
494                            mut text,
495                            boundary,
496                            WordMode::Uncased | WordMode::Uppercase,
497                        ) => {
498                            let next_is_lowercase = self.input.peek().is_some_and(|next| {
499                                matches!(next, NameChar::Continue(next) if next.is_lowercase())
500                            });
501                            if next_is_lowercase {
502                                // `XMLHttp` case; start a new word with this uppercase
503                                // character (the "H" in "Http").
504                                self.state = WordState::InWord(
505                                    c.to_string(),
506                                    WordBoundary::After(SegmentBoundary::Case),
507                                    WordMode::Uppercase,
508                                );
509                                return Some(WordSegment(text, boundary));
510                            }
511                            text.push(c);
512                            self.state = WordState::InWord(text, boundary, WordMode::Uppercase);
513                        }
514                        WordState::InWord(text, boundary, WordMode::Digit) => {
515                            let next_is_lowercase = self.input.peek().is_some_and(|next| {
516                                matches!(next, NameChar::Continue(next) if next.is_lowercase())
517                            });
518                            self.state = WordState::InWord(
519                                c.to_string(),
520                                WordBoundary::After(if next_is_lowercase {
521                                    SegmentBoundary::Case
522                                } else {
523                                    SegmentBoundary::DigitLetter
524                                }),
525                                WordMode::Uppercase,
526                            );
527                            return Some(WordSegment(text, boundary));
528                        }
529                        WordState::InWord(text, boundary, WordMode::Lowercase) => {
530                            // Start a new word at the uppercase side of a case boundary.
531                            self.state = WordState::InWord(
532                                c.to_string(),
533                                WordBoundary::After(SegmentBoundary::Case),
534                                WordMode::Uppercase,
535                            );
536                            return Some(WordSegment(text, boundary));
537                        }
538                    }
539                }
540                NameChar::Continue(c) if c.is_lowercase() => {
541                    match mem::replace(
542                        &mut self.state,
543                        WordState::Between(SegmentBoundary::Separator),
544                    ) {
545                        WordState::Start => {
546                            self.state = WordState::InWord(
547                                c.to_string(),
548                                WordBoundary::First,
549                                WordMode::Lowercase,
550                            );
551                        }
552                        WordState::Between(next_boundary) => {
553                            self.state = WordState::InWord(
554                                c.to_string(),
555                                WordBoundary::After(next_boundary),
556                                WordMode::Lowercase,
557                            );
558                        }
559                        WordState::InWord(
560                            mut text,
561                            boundary,
562                            WordMode::Uncased | WordMode::Lowercase | WordMode::Uppercase,
563                        ) => {
564                            text.push(c);
565                            self.state = WordState::InWord(text, boundary, WordMode::Lowercase);
566                        }
567                        WordState::InWord(text, boundary, WordMode::Digit) => {
568                            // Start a new word after a digit segment.
569                            self.state = WordState::InWord(
570                                c.to_string(),
571                                WordBoundary::After(SegmentBoundary::DigitLetter),
572                                WordMode::Lowercase,
573                            );
574                            return Some(WordSegment(text, boundary));
575                        }
576                    }
577                }
578                NameChar::Continue(c) if c.is_ascii_digit() => {
579                    match mem::replace(
580                        &mut self.state,
581                        WordState::Between(SegmentBoundary::Separator),
582                    ) {
583                        WordState::Start => {
584                            self.state = WordState::InWord(
585                                c.to_string(),
586                                WordBoundary::First,
587                                WordMode::Digit,
588                            );
589                        }
590                        WordState::Between(next_boundary) => {
591                            self.state = WordState::InWord(
592                                c.to_string(),
593                                WordBoundary::After(next_boundary),
594                                WordMode::Digit,
595                            );
596                        }
597                        WordState::InWord(mut text, boundary, WordMode::Digit) => {
598                            text.push(c);
599                            self.state = WordState::InWord(text, boundary, WordMode::Digit);
600                        }
601                        WordState::InWord(
602                            text,
603                            boundary,
604                            WordMode::Uncased | WordMode::Lowercase | WordMode::Uppercase,
605                        ) => {
606                            // Start a new word after a letter segment.
607                            self.state = WordState::InWord(
608                                c.to_string(),
609                                WordBoundary::After(SegmentBoundary::LetterDigit),
610                                WordMode::Digit,
611                            );
612                            return Some(WordSegment(text, boundary));
613                        }
614                    }
615                }
616                NameChar::Continue(c) => {
617                    // All other characters continue the current word.
618                    match mem::replace(
619                        &mut self.state,
620                        WordState::Between(SegmentBoundary::Separator),
621                    ) {
622                        WordState::Start => {
623                            self.state = WordState::InWord(
624                                c.to_string(),
625                                WordBoundary::First,
626                                WordMode::Uncased,
627                            );
628                        }
629                        WordState::Between(next_boundary) => {
630                            self.state = WordState::InWord(
631                                c.to_string(),
632                                WordBoundary::After(next_boundary),
633                                WordMode::Uncased,
634                            );
635                        }
636                        WordState::InWord(mut text, boundary, mode) => {
637                            text.push(c);
638                            self.state = WordState::InWord(text, boundary, mode);
639                        }
640                    }
641                }
642            }
643        }
644        if let WordState::InWord(text, boundary, _) = mem::replace(
645            &mut self.state,
646            WordState::Between(SegmentBoundary::Separator),
647        ) {
648            // Trailing word.
649            self.state = WordState::Between(SegmentBoundary::Separator);
650            return Some(WordSegment(text, boundary));
651        }
652        None
653    }
654}
655
656#[cfg(test)]
657mod tests {
658    use super::*;
659    use NamePart::{Boundary, Text};
660
661    use itertools::Itertools;
662
663    fn segments(parts: &[NamePart<'_>]) -> Vec<String> {
664        super::segments(parts.iter().copied())
665            .map(|WordSegment(text, _)| text)
666            .collect_vec()
667    }
668
669    #[test]
670    fn test_segment_camel_case() {
671        assert_eq!(segments(&[Text("camelCase")]), vec!["camel", "Case"]);
672        assert_eq!(segments(&[Text("httpResponse")]), vec!["http", "Response"]);
673    }
674
675    #[test]
676    fn test_segment_pascal_case() {
677        assert_eq!(segments(&[Text("PascalCase")]), vec!["Pascal", "Case"]);
678        assert_eq!(segments(&[Text("HttpResponse")]), vec!["Http", "Response"]);
679    }
680
681    #[test]
682    fn test_segment_snake_case() {
683        assert_eq!(
684            segments(&[Text("snake"), Boundary, Text("case")]),
685            vec!["snake", "case"]
686        );
687        assert_eq!(
688            segments(&[Text("http"), Boundary, Text("response")]),
689            vec!["http", "response"]
690        );
691    }
692
693    #[test]
694    fn test_segment_screaming_snake() {
695        assert_eq!(
696            segments(&[Text("SCREAMING"), Boundary, Text("SNAKE")]),
697            vec!["SCREAMING", "SNAKE"]
698        );
699        assert_eq!(
700            segments(&[Text("HTTP"), Boundary, Text("RESPONSE")]),
701            vec!["HTTP", "RESPONSE"]
702        );
703    }
704
705    #[test]
706    fn test_segment_consecutive_uppercase() {
707        assert_eq!(
708            segments(&[Text("XMLHttpRequest")]),
709            vec!["XML", "Http", "Request"]
710        );
711        assert_eq!(segments(&[Text("HTTPResponse")]), vec!["HTTP", "Response"]);
712        assert_eq!(
713            segments(&[Text("HTTP"), Boundary, Text("Response")]),
714            vec!["HTTP", "Response"]
715        );
716        assert_eq!(segments(&[Text("ALLCAPS")]), vec!["ALLCAPS"]);
717    }
718
719    #[test]
720    fn test_segment_unicode_case_boundaries() {
721        assert_eq!(segments(&[Text("\u{e9}clair")]), vec!["\u{e9}clair"]);
722        assert_eq!(segments(&[Text("\u{c9}clair")]), vec!["\u{c9}clair"]);
723        assert_eq!(
724            segments(&[Text("XML\u{c9}clair")]),
725            vec!["XML", "\u{c9}clair"]
726        );
727        assert_eq!(
728            segments(&[Text("CAF\u{c9}Token")]),
729            vec!["CAF\u{c9}", "Token"]
730        );
731        assert_eq!(segments(&[Text("\u{e9}Tag")]), vec!["\u{e9}", "Tag"]);
732        assert_eq!(segments(&[Text("\u{c9}Token")]), vec!["\u{c9}", "Token"]);
733        assert_eq!(segments(&[Text("\u{e9}HTTP")]), vec!["\u{e9}", "HTTP"]);
734        assert_eq!(
735            segments(&[Text("foo"), Boundary, Text("bar")]),
736            vec!["foo", "bar"]
737        );
738        assert_eq!(
739            segments(&[Text("foo"), Boundary, Boundary, Text("bar")]),
740            vec!["foo", "bar"]
741        );
742        assert_eq!(segments(&[Boundary, Text("foo"), Boundary]), vec!["foo"]);
743        assert_eq!(
744            segments(&[Text("foo"), Boundary, Text("2")]),
745            vec!["foo", "2"]
746        );
747    }
748
749    #[test]
750    fn test_segment_with_numbers() {
751        assert_eq!(segments(&[Text("Response2")]), vec!["Response", "2"]);
752        assert_eq!(
753            segments(&[Text("response"), Boundary, Text("2")]),
754            vec!["response", "2"]
755        );
756        assert_eq!(
757            segments(&[Text("HTTP2Protocol")]),
758            vec!["HTTP", "2", "Protocol"]
759        );
760        assert_eq!(
761            segments(&[Text("OAuth2Token")]),
762            vec!["O", "Auth", "2", "Token"]
763        );
764        assert_eq!(segments(&[Text("HTTP2XML")]), vec!["HTTP", "2", "XML"]);
765        assert_eq!(
766            segments(&[Text("1099KStatus")]),
767            vec!["1099", "K", "Status"]
768        );
769        assert_eq!(segments(&[Text("123abc")]), vec!["123", "abc"]);
770        assert_eq!(segments(&[Text("123ABC")]), vec!["123", "ABC"]);
771        assert_eq!(
772            segments(&[Text("Sha2"), Boundary, Text("56Digest")]),
773            vec!["Sha", "2", "56", "Digest"]
774        );
775    }
776
777    #[test]
778    fn test_segment_empty_and_special() {
779        assert!(segments(&[]).is_empty());
780        assert!(segments(&[Boundary, Boundary, Boundary]).is_empty());
781        assert_eq!(segments(&[Text("a")]), vec!["a"]);
782        assert_eq!(segments(&[Text("A")]), vec!["A"]);
783    }
784
785    #[test]
786    fn test_segment_mixed_separators() {
787        assert_eq!(
788            segments(&[Text("foo"), Boundary, Text("bar"), Boundary, Text("baz"),]),
789            vec!["foo", "bar", "baz"]
790        );
791        assert_eq!(
792            segments(&[Text("foo"), Boundary, Boundary, Text("bar")]),
793            vec!["foo", "bar"]
794        );
795    }
796
797    #[test]
798    fn test_segment_boundaries() {
799        let arena = Arena::new();
800        let mut names = UniqueNames::new(&arena);
801
802        let name = names.claim([Text("fooBar2"), Boundary, Text("baz3Qux")]);
803        assert_eq!(
804            name.segments().collect_vec(),
805            [
806                NameSegment::Text("foo"),
807                NameSegment::Boundary(SegmentBoundary::Case),
808                NameSegment::Text("Bar"),
809                NameSegment::Boundary(SegmentBoundary::LetterDigit),
810                NameSegment::Text("2"),
811                NameSegment::Boundary(SegmentBoundary::Separator),
812                NameSegment::Text("baz"),
813                NameSegment::Boundary(SegmentBoundary::LetterDigit),
814                NameSegment::Text("3"),
815                NameSegment::Boundary(SegmentBoundary::Case),
816                NameSegment::Text("Qux"),
817            ]
818        );
819
820        let name = names.claim([Text("foo"), Boundary, Text("2Bar")]);
821        assert_eq!(
822            name.segments().collect_vec(),
823            [
824                NameSegment::Text("foo"),
825                NameSegment::Boundary(SegmentBoundary::Separator),
826                NameSegment::Text("2"),
827                NameSegment::Boundary(SegmentBoundary::Case),
828                NameSegment::Text("Bar"),
829            ]
830        );
831
832        let arena = Arena::new();
833        let mut names = UniqueNames::new(&arena);
834        let name = names.claim([Text("foo2bar")]);
835        assert_eq!(
836            name.segments().collect_vec(),
837            [
838                NameSegment::Text("foo"),
839                NameSegment::Boundary(SegmentBoundary::LetterDigit),
840                NameSegment::Text("2"),
841                NameSegment::Boundary(SegmentBoundary::DigitLetter),
842                NameSegment::Text("bar"),
843            ]
844        );
845
846        let name = names.claim([Text("Vector3D")]);
847        assert_eq!(
848            name.segments().collect_vec(),
849            [
850                NameSegment::Text("Vector"),
851                NameSegment::Boundary(SegmentBoundary::LetterDigit),
852                NameSegment::Text("3"),
853                NameSegment::Boundary(SegmentBoundary::DigitLetter),
854                NameSegment::Text("D"),
855            ]
856        );
857
858        let name = names.claim([Text("50GBPerSecond")]);
859        assert_eq!(
860            name.segments().collect_vec(),
861            [
862                NameSegment::Text("50"),
863                NameSegment::Boundary(SegmentBoundary::DigitLetter),
864                NameSegment::Text("GB"),
865                NameSegment::Boundary(SegmentBoundary::Case),
866                NameSegment::Text("Per"),
867                NameSegment::Boundary(SegmentBoundary::Case),
868                NameSegment::Text("Second"),
869            ]
870        );
871    }
872
873    #[test]
874    fn test_deduplication_http_response_collision() {
875        let arena = Arena::new();
876        let mut names = UniqueNames::new(&arena);
877
878        assert_eq!(
879            AsPascalCase(names.claim([Text("HTTPResponse")])).to_string(),
880            "HttpResponse"
881        );
882        assert_eq!(
883            AsPascalCase(names.claim([Text("HTTP"), Boundary, Text("Response"),])).to_string(),
884            "HttpResponse2"
885        );
886        assert_eq!(
887            AsPascalCase(names.claim([Text("httpResponse")])).to_string(),
888            "HttpResponse3"
889        );
890        assert_eq!(
891            AsPascalCase(names.claim([Text("http"), Boundary, Text("response"),])).to_string(),
892            "HttpResponse4"
893        );
894        // `HTTPRESPONSE` isn't a collision; it's a single word.
895        assert_eq!(
896            AsPascalCase(names.claim([Text("HTTPRESPONSE")])).to_string(),
897            "Httpresponse"
898        );
899    }
900
901    #[test]
902    fn test_deduplication_xml_http_request() {
903        let arena = Arena::new();
904        let mut names = UniqueNames::new(&arena);
905
906        assert_eq!(
907            AsSnakeCase(names.claim([Text("XMLHttpRequest")])).to_string(),
908            "xml_http_request"
909        );
910        assert_eq!(
911            AsSnakeCase(names.claim([
912                Text("xml"),
913                Boundary,
914                Text("http"),
915                Boundary,
916                Text("request"),
917            ]))
918            .to_string(),
919            "xml_http_request_2"
920        );
921        assert_eq!(
922            AsSnakeCase(names.claim([Text("XmlHttpRequest")])).to_string(),
923            "xml_http_request_3"
924        );
925    }
926
927    #[test]
928    fn test_deduplication_separator_parts() {
929        let arena = Arena::new();
930        let mut names = UniqueNames::new(&arena);
931
932        assert_eq!(
933            AsSnakeCase(names.claim([Text("foo"), Boundary, Text("bar")])).to_string(),
934            "foo_bar",
935        );
936        assert_eq!(
937            AsSnakeCase(names.claim([Text("foo"), Boundary, Text("bar")])).to_string(),
938            "foo_bar_2"
939        );
940        assert_eq!(
941            AsSnakeCase(names.claim([Text("foo"), Boundary, Boundary, Boundary, Text("bar"),]))
942                .to_string(),
943            "foo_bar_3"
944        );
945    }
946
947    #[test]
948    fn test_deduplication_preserves_first_slot() {
949        let arena = Arena::new();
950        let mut names = UniqueNames::new(&arena);
951
952        assert_eq!(
953            AsPascalCase(names.claim([Text("HTTP"), Boundary, Text("Response"),])).to_string(),
954            "HttpResponse"
955        );
956        assert_eq!(
957            AsPascalCase(names.claim([Text("httpResponse")])).to_string(),
958            "HttpResponse2"
959        );
960    }
961
962    #[test]
963    fn test_deduplication_same_prefix() {
964        let arena = Arena::new();
965        let mut names = UniqueNames::new(&arena);
966
967        assert_eq!(
968            AsPascalCase(names.claim([Text("HttpRequest")])).to_string(),
969            "HttpRequest"
970        );
971        assert_eq!(
972            AsPascalCase(names.claim([Text("HttpResponse")])).to_string(),
973            "HttpResponse"
974        );
975        assert_eq!(
976            AsPascalCase(names.claim([Text("HttpError")])).to_string(),
977            "HttpError"
978        );
979    }
980
981    #[test]
982    fn test_deduplication_with_numbers() {
983        let arena = Arena::new();
984        let mut names = UniqueNames::new(&arena);
985
986        assert_eq!(
987            AsSnakeCase(names.claim([Text("Response2")])).to_string(),
988            "response2"
989        );
990        assert_eq!(
991            AsSnakeCase(names.claim([Text("response"), Boundary, Text("2"),])).to_string(),
992            "response_3"
993        );
994
995        assert_eq!(
996            AsSnakeCase(names.claim([Text("Response0")])).to_string(),
997            "response0"
998        );
999        assert_eq!(
1000            AsSnakeCase(names.claim([Text("response")])).to_string(),
1001            "response"
1002        );
1003
1004        // Internal digit boundaries collapse in PascalCase.
1005        assert_eq!(
1006            AsPascalCase(names.claim([Text("Http2Protocol")])).to_string(),
1007            "Http2Protocol"
1008        );
1009        assert_eq!(
1010            AsPascalCase(names.claim([Text("Http"), Boundary, Text("2Protocol"),])).to_string(),
1011            "Http2Protocol2"
1012        );
1013        assert_eq!(
1014            AsSnakeCase(names.claim([Text("Sha2"), Boundary, Text("56Digest"),])).to_string(),
1015            "sha2_56_digest"
1016        );
1017        assert_eq!(
1018            AsSnakeCase(names.claim([Text("Sha256Digest")])).to_string(),
1019            "sha256_digest"
1020        );
1021        assert_eq!(
1022            AsSnakeCase(names.claim([Text("Vector3D")])).to_string(),
1023            "vector3d"
1024        );
1025        assert_eq!(
1026            AsSnakeCase(names.claim([Text("50GBPerSecond")])).to_string(),
1027            "50gb_per_second"
1028        );
1029        assert_eq!(
1030            AsSnakeCase(names.claim([Text("Caf\u{e9}2")])).to_string(),
1031            "caf\u{e9}2"
1032        );
1033
1034        // Digit-to-uppercase collisions.
1035        assert_eq!(
1036            AsPascalCase(names.claim([Text("1099KStatus")])).to_string(),
1037            "1099KStatus"
1038        );
1039        assert_eq!(
1040            AsPascalCase(names.claim([Text("1099K"), Boundary, Text("Status"),])).to_string(),
1041            "1099KStatus2"
1042        );
1043        assert_eq!(
1044            AsPascalCase(names.claim([Text("1099KStatus")])).to_string(),
1045            "1099KStatus3"
1046        );
1047        assert_eq!(
1048            AsPascalCase(names.claim([
1049                Text("1099"),
1050                Boundary,
1051                Text("K"),
1052                Boundary,
1053                Text("Status"),
1054            ]))
1055            .to_string(),
1056            "1099KStatus4"
1057        );
1058
1059        // Digit-to-lowercase collisions.
1060        assert_eq!(
1061            AsSnakeCase(names.claim([Text("123abc")])).to_string(),
1062            "123abc"
1063        );
1064        assert_eq!(
1065            AsSnakeCase(names.claim([Text("123"), Boundary, Text("abc"),])).to_string(),
1066            "123_abc_2"
1067        );
1068    }
1069
1070    #[test]
1071    fn test_deduplication_numeric_suffixes() {
1072        let arena = Arena::new();
1073        let mut names = UniqueNames::new(&arena);
1074
1075        assert_eq!(
1076            AsSnakeCase(names.claim([Text("OAuth2")])).to_string(),
1077            "o_auth2"
1078        );
1079        assert_eq!(
1080            AsSnakeCase(names.claim([Text("OAuth"), Boundary, Text("2")])).to_string(),
1081            "o_auth_3"
1082        );
1083        assert_eq!(
1084            AsSnakeCase(names.claim([Text("OAuth")])).to_string(),
1085            "o_auth"
1086        );
1087        assert_eq!(
1088            AsSnakeCase(names.claim([Text("OAuth0")])).to_string(),
1089            "o_auth0"
1090        );
1091    }
1092
1093    #[test]
1094    fn test_deduplication_numeric_suffix_preserves_source_boundary() {
1095        let arena = Arena::new();
1096        let mut names = UniqueNames::new(&arena);
1097        assert_eq!(
1098            names
1099                .claim([NamePart::Text("Response2")])
1100                .segments()
1101                .collect_vec(),
1102            &[
1103                NameSegment::Text("Response"),
1104                NameSegment::Boundary(SegmentBoundary::LetterDigit),
1105                NameSegment::Text("2"),
1106            ]
1107        );
1108
1109        let arena = Arena::new();
1110        let mut names = UniqueNames::new(&arena);
1111        assert_eq!(
1112            names
1113                .claim([NamePart::Text("Response0")])
1114                .segments()
1115                .collect_vec(),
1116            &[
1117                NameSegment::Text("Response"),
1118                NameSegment::Boundary(SegmentBoundary::LetterDigit),
1119                NameSegment::Text("0"),
1120            ]
1121        );
1122    }
1123
1124    #[test]
1125    fn test_deduplication_numeric_suffix_slots() {
1126        let arena = Arena::new();
1127        let mut names = UniqueNames::new(&arena);
1128
1129        assert_eq!(AsSnakeCase(names.claim([Text("v2")])).to_string(), "v2");
1130        assert_eq!(
1131            AsSnakeCase(names.claim([Text("v"), Boundary, Text("2")])).to_string(),
1132            "v_3"
1133        );
1134        assert_eq!(AsSnakeCase(names.claim([Text("v")])).to_string(), "v");
1135        assert_eq!(AsSnakeCase(names.claim([Text("v")])).to_string(), "v_4");
1136
1137        let arena = Arena::new();
1138        let mut names = UniqueNames::new(&arena);
1139
1140        assert_eq!(
1141            AsKebabCase(names.claim([Text("response")])).to_string(),
1142            "response"
1143        );
1144        assert_eq!(
1145            AsKebabCase(names.claim([Text("response")])).to_string(),
1146            "response-2"
1147        );
1148        assert_eq!(
1149            AsKebabCase(names.claim([Text("response2")])).to_string(),
1150            "response3"
1151        );
1152        assert_eq!(
1153            AsKebabCase(names.claim([Text("response")])).to_string(),
1154            "response-4"
1155        );
1156    }
1157
1158    #[test]
1159    fn test_deduplication_source_zero_suffix_uses_own_slot() {
1160        let arena = Arena::new();
1161        let mut names = UniqueNames::new(&arena);
1162
1163        assert_eq!(
1164            AsSnakeCase(names.claim([Text("Response")])).to_string(),
1165            "response"
1166        );
1167        assert_eq!(
1168            AsSnakeCase(names.claim([Text("Response0")])).to_string(),
1169            "response0"
1170        );
1171        assert_eq!(
1172            AsSnakeCase(names.claim([Text("Response")])).to_string(),
1173            "response_2"
1174        );
1175
1176        let arena = Arena::new();
1177        let mut names = UniqueNames::new(&arena);
1178
1179        assert_eq!(
1180            AsSnakeCase(names.claim([Text("Response0")])).to_string(),
1181            "response0"
1182        );
1183        assert_eq!(
1184            AsSnakeCase(names.claim([Text("Response")])).to_string(),
1185            "response"
1186        );
1187        assert_eq!(
1188            AsSnakeCase(names.claim([Text("Response0")])).to_string(),
1189            "response1"
1190        );
1191    }
1192
1193    #[test]
1194    fn test_deduplication_unicode_case_family() {
1195        let arena = Arena::new();
1196        let mut names = UniqueNames::new(&arena);
1197
1198        assert_eq!(AsSnakeCase(names.claim([Text("ß")])).to_string(), "ß");
1199        assert_eq!(AsSnakeCase(names.claim([Text("SS")])).to_string(), "ss_2");
1200        assert_eq!(AsSnakeCase(names.claim([Text("ss")])).to_string(), "ss_3");
1201        assert_eq!(
1202            AsSnakeCase(names.claim([Text("İ")])).to_string(),
1203            "i\u{307}"
1204        );
1205        assert_eq!(
1206            AsSnakeCase(names.claim([Text("i\u{307}")])).to_string(),
1207            "i\u{307}_2"
1208        );
1209    }
1210
1211    #[test]
1212    fn test_deduplication_normalizes_unicode_to_nfc() {
1213        let arena = Arena::new();
1214        let mut names = UniqueNames::new(&arena);
1215
1216        assert_eq!(
1217            AsSnakeCase(names.claim([Text("cafe\u{301}")])).to_string(),
1218            "caf\u{e9}"
1219        );
1220        assert_eq!(
1221            AsSnakeCase(names.claim([Text("caf\u{e9}")])).to_string(),
1222            "caf\u{e9}_2"
1223        );
1224    }
1225
1226    #[test]
1227    fn test_deduplication_empty_names_start_at_one() {
1228        let arena = Arena::new();
1229        let mut names = UniqueNames::new(&arena);
1230
1231        assert_eq!(AsSnakeCase(names.claim([])).to_string(), "1");
1232        assert_eq!(AsSnakeCase(names.claim([Boundary])).to_string(), "2");
1233        assert_eq!(
1234            AsSnakeCase(names.claim([Boundary, Boundary, Boundary])).to_string(),
1235            "3"
1236        );
1237    }
1238
1239    #[test]
1240    fn test_deduplication_numeric_names_share_empty_stem() {
1241        let arena = Arena::new();
1242        let mut names = UniqueNames::new(&arena);
1243
1244        assert_eq!(AsSnakeCase(names.claim([Text("2")])).to_string(), "2");
1245        assert_eq!(AsSnakeCase(names.claim([])).to_string(), "1");
1246        assert_eq!(AsSnakeCase(names.claim([Text("2")])).to_string(), "3");
1247
1248        let arena = Arena::new();
1249        let mut names = UniqueNames::new(&arena);
1250
1251        assert_eq!(AsSnakeCase(names.claim([Text("0")])).to_string(), "1");
1252        assert_eq!(AsSnakeCase(names.claim([])).to_string(), "2");
1253    }
1254
1255    #[test]
1256    fn test_reserved_digit_only_names_share_empty_stem_sequence() {
1257        let arena = Arena::new();
1258        let mut names = UniqueNames::with_reserved(&arena, [[Text("0")]]);
1259
1260        assert_eq!(AsSnakeCase(names.claim([Text("0")])).to_string(), "2");
1261        assert_eq!(AsSnakeCase(names.claim([])).to_string(), "3");
1262    }
1263
1264    #[test]
1265    fn test_reserved_boundary_only_shares_empty_stem_sequence() {
1266        let arena = Arena::new();
1267        let mut names = UniqueNames::with_reserved(&arena, [[Boundary]]);
1268
1269        assert_eq!(AsSnakeCase(names.claim([Boundary])).to_string(), "2");
1270        assert_eq!(AsSnakeCase(names.claim([Boundary])).to_string(), "3");
1271    }
1272
1273    #[test]
1274    fn test_reserved_multiple() {
1275        let arena = Arena::new();
1276        let mut names = UniqueNames::with_reserved(&arena, [[Boundary], [Text("reserved")]]);
1277
1278        assert_eq!(AsSnakeCase(names.claim([Boundary])).to_string(), "2");
1279        assert_eq!(
1280            AsSnakeCase(names.claim([Text("reserved")])).to_string(),
1281            "reserved_2"
1282        );
1283        assert_eq!(
1284            AsSnakeCase(names.claim([Text("other")])).to_string(),
1285            "other"
1286        );
1287    }
1288
1289    #[test]
1290    fn test_reserved_numeric_suffixes() {
1291        let arena = Arena::new();
1292        let mut names = UniqueNames::with_reserved(&arena, [[Text("crate")]]);
1293
1294        assert_eq!(
1295            AsSnakeCase(names.claim([Text("crate")])).to_string(),
1296            "crate_2"
1297        );
1298        assert_eq!(
1299            AsSnakeCase(names.claim([Text("crate2")])).to_string(),
1300            "crate3"
1301        );
1302
1303        let arena = Arena::new();
1304        let mut names = UniqueNames::with_reserved(&arena, [[Text("Response0")]]);
1305
1306        assert_eq!(
1307            AsSnakeCase(names.claim([Text("Response")])).to_string(),
1308            "response"
1309        );
1310        assert_eq!(
1311            AsSnakeCase(names.claim([Text("Response0")])).to_string(),
1312            "response1"
1313        );
1314    }
1315}