ploidy_core/codegen/
unique.rs

1use std::borrow::Cow;
2use std::collections::btree_map::Entry;
3use std::str::CharIndices;
4use std::{collections::BTreeMap, iter::Peekable};
5
6use unicase::UniCase;
7
8/// Produces names that will never collide with other names in this space,
9/// even when converted to a different case.
10///
11/// [`UniqueNameSpace`] exists to disambiguate type and field names
12/// that are distinct in the source spec, but collide when transformed
13/// to a different case. (For example, both `HTTP_Response` and `HTTPResponse`
14/// become `http_response` in snake case).
15#[derive(Debug, Default)]
16pub struct UniqueNameSpace<'a>(BTreeMap<Box<[UniCase<&'a str>]>, usize>);
17
18impl<'a> UniqueNameSpace<'a> {
19    #[inline]
20    pub fn new() -> Self {
21        Self::default()
22    }
23
24    /// Returns a unique name, ignoring case and case transformations.
25    /// The unique name preserves the case of the original name, but adds
26    /// a numeric suffix on collisions.
27    ///
28    /// # Examples
29    ///
30    /// ```
31    /// # use ploidy_core::codegen::UniqueNameSpace;
32    /// # let mut space = UniqueNameSpace::new();
33    /// assert_eq!(space.uniquify("HTTPResponse"), "HTTPResponse");
34    /// assert_eq!(space.uniquify("HTTP_Response"), "HTTP_Response2");
35    /// assert_eq!(space.uniquify("httpResponse"), "httpResponse3");
36    /// ```
37    #[inline]
38    pub fn uniquify(&mut self, name: &'a str) -> Cow<'a, str> {
39        match self
40            .0
41            .entry(WordSegments::new(name).map(UniCase::new).collect())
42        {
43            Entry::Occupied(mut entry) => {
44                let count = entry.get_mut();
45                *count += 1;
46                format!("{name}{count}").into()
47            }
48            Entry::Vacant(entry) => {
49                entry.insert(1);
50                name.into()
51            }
52        }
53    }
54}
55
56/// Segments a string into words, following Heck's notion of word boundaries.
57///
58/// # Examples
59///
60/// ```
61/// # use itertools::Itertools;
62/// # use ploidy_core::codegen::WordSegments;
63/// assert_eq!(WordSegments::new("HTTPResponse").collect_vec(), vec!["HTTP", "Response"]);
64/// assert_eq!(WordSegments::new("HTTP_Response").collect_vec(), vec!["HTTP", "Response"]);
65/// assert_eq!(WordSegments::new("httpResponse").collect_vec(), vec!["http", "Response"]);
66/// assert_eq!(WordSegments::new("XMLHttpRequest").collect_vec(), vec!["XML", "Http", "Request"]);
67pub struct WordSegments<'a> {
68    input: &'a str,
69    chars: Peekable<CharIndices<'a>>,
70    current_word_starts_at: Option<usize>,
71    mode: WordMode,
72}
73
74impl<'a> WordSegments<'a> {
75    #[inline]
76    pub fn new(input: &'a str) -> Self {
77        Self {
78            input,
79            chars: input.char_indices().peekable(),
80            current_word_starts_at: None,
81            mode: WordMode::Boundary,
82        }
83    }
84}
85
86impl<'a> Iterator for WordSegments<'a> {
87    type Item = &'a str;
88
89    fn next(&mut self) -> Option<Self::Item> {
90        while let Some((index, c)) = self.chars.next() {
91            if c.is_uppercase() {
92                match self.mode {
93                    WordMode::Boundary => {
94                        // Start a new word with this uppercase character.
95                        self.current_word_starts_at = Some(index);
96                        self.mode = WordMode::Uppercase;
97                    }
98                    WordMode::Lowercase => {
99                        // camelCased word (previous was lowercase;
100                        // current is uppercase), start a new word.
101                        let start = self.current_word_starts_at.replace(index);
102                        self.mode = WordMode::Uppercase;
103                        if let Some(start) = start {
104                            return Some(&self.input[start..index]);
105                        }
106                    }
107                    WordMode::Uppercase => {
108                        let next_is_lowercase = self
109                            .chars
110                            .peek()
111                            .map(|&(_, next)| next.is_lowercase())
112                            .unwrap_or(false);
113                        if next_is_lowercase && let Some(start) = self.current_word_starts_at {
114                            // `XMLHttp` case; start a new word with this uppercase
115                            // character (the "H" in "Http").
116                            self.current_word_starts_at = Some(index);
117                            return Some(&self.input[start..index]);
118                        }
119                        // (Stay in uppercase mode).
120                    }
121                }
122            } else if c.is_lowercase() {
123                if self.current_word_starts_at.is_none() {
124                    // Start a new word with this lowercase character
125                    // (the "c" in "camelCase").
126                    self.current_word_starts_at = Some(index);
127                }
128                self.mode = WordMode::Lowercase;
129            } else if !c.is_alphanumeric() {
130                // Start a new word at this non-alphanumeric character.
131                let start = std::mem::take(&mut self.current_word_starts_at);
132                self.mode = WordMode::Boundary;
133                if let Some(start) = start {
134                    return Some(&self.input[start..index]);
135                }
136            } else {
137                // Digit or other character: continue the current word.
138                if self.current_word_starts_at.is_none() {
139                    self.current_word_starts_at = Some(index);
140                }
141            }
142        }
143        if let Some(start) = std::mem::take(&mut self.current_word_starts_at) {
144            // Trailing word.
145            return Some(&self.input[start..]);
146        }
147        None
148    }
149}
150
151/// The current state of a [`WordSegments`] iterator.
152#[derive(Clone, Copy)]
153enum WordMode {
154    /// At a word boundary: either at the start of a new word, or
155    /// after a non-alphanumeric character.
156    Boundary,
157    /// Currently in a lowercase segment.
158    Lowercase,
159    /// Currently in an uppercase segment.
160    Uppercase,
161}
162
163#[cfg(test)]
164mod tests {
165    use super::*;
166    use itertools::Itertools;
167
168    #[test]
169    fn test_segment_camel_case() {
170        assert_eq!(
171            WordSegments::new("camelCase").collect_vec(),
172            vec!["camel", "Case"]
173        );
174        assert_eq!(
175            WordSegments::new("httpResponse").collect_vec(),
176            vec!["http", "Response"]
177        );
178    }
179
180    #[test]
181    fn test_segment_pascal_case() {
182        assert_eq!(
183            WordSegments::new("PascalCase").collect_vec(),
184            vec!["Pascal", "Case"]
185        );
186        assert_eq!(
187            WordSegments::new("HttpResponse").collect_vec(),
188            vec!["Http", "Response"]
189        );
190    }
191
192    #[test]
193    fn test_segment_snake_case() {
194        assert_eq!(
195            WordSegments::new("snake_case").collect_vec(),
196            vec!["snake", "case"]
197        );
198        assert_eq!(
199            WordSegments::new("http_response").collect_vec(),
200            vec!["http", "response"]
201        );
202    }
203
204    #[test]
205    fn test_segment_screaming_snake() {
206        assert_eq!(
207            WordSegments::new("SCREAMING_SNAKE").collect_vec(),
208            vec!["SCREAMING", "SNAKE"]
209        );
210        assert_eq!(
211            WordSegments::new("HTTP_RESPONSE").collect_vec(),
212            vec!["HTTP", "RESPONSE"]
213        );
214    }
215
216    #[test]
217    fn test_segment_consecutive_uppercase() {
218        assert_eq!(
219            WordSegments::new("XMLHttpRequest").collect_vec(),
220            vec!["XML", "Http", "Request"]
221        );
222        assert_eq!(
223            WordSegments::new("HTTPResponse").collect_vec(),
224            vec!["HTTP", "Response"]
225        );
226        assert_eq!(
227            WordSegments::new("HTTP_Response").collect_vec(),
228            vec!["HTTP", "Response"]
229        );
230        assert_eq!(WordSegments::new("ALLCAPS").collect_vec(), vec!["ALLCAPS"]);
231    }
232
233    #[test]
234    fn test_segment_with_numbers() {
235        assert_eq!(
236            WordSegments::new("Response2").collect_vec(),
237            vec!["Response2"]
238        );
239        assert_eq!(
240            WordSegments::new("response_2").collect_vec(),
241            vec!["response", "2"]
242        );
243        assert_eq!(
244            WordSegments::new("HTTP2Protocol").collect_vec(),
245            vec!["HTTP2", "Protocol"]
246        );
247        assert_eq!(
248            WordSegments::new("OAuth2Token").collect_vec(),
249            vec!["O", "Auth2", "Token"]
250        );
251        assert_eq!(
252            WordSegments::new("HTTP2XML").collect_vec(),
253            vec!["HTTP2XML"]
254        );
255    }
256
257    #[test]
258    fn test_segment_empty_and_special() {
259        assert!(WordSegments::new("").collect_vec().is_empty());
260        assert!(WordSegments::new("___").collect_vec().is_empty());
261        assert_eq!(WordSegments::new("a").collect_vec(), vec!["a"]);
262        assert_eq!(WordSegments::new("A").collect_vec(), vec!["A"]);
263    }
264
265    #[test]
266    fn test_segment_mixed_separators() {
267        assert_eq!(
268            WordSegments::new("foo-bar_baz").collect_vec(),
269            vec!["foo", "bar", "baz"]
270        );
271        assert_eq!(
272            WordSegments::new("foo--bar").collect_vec(),
273            vec!["foo", "bar"]
274        );
275    }
276
277    #[test]
278    fn test_deduplication_http_response_collision() {
279        let mut space = UniqueNameSpace::new();
280
281        assert_eq!(space.uniquify("HTTPResponse"), "HTTPResponse");
282        assert_eq!(space.uniquify("HTTP_Response"), "HTTP_Response2");
283        assert_eq!(space.uniquify("httpResponse"), "httpResponse3");
284        assert_eq!(space.uniquify("http_response"), "http_response4");
285        // `HTTPRESPONSE` isn't a collision; it's a single word.
286        assert_eq!(space.uniquify("HTTPRESPONSE"), "HTTPRESPONSE");
287    }
288
289    #[test]
290    fn test_deduplication_xml_http_request() {
291        let mut space = UniqueNameSpace::new();
292
293        assert_eq!(space.uniquify("XMLHttpRequest"), "XMLHttpRequest");
294        assert_eq!(space.uniquify("xml_http_request"), "xml_http_request2");
295        assert_eq!(space.uniquify("XmlHttpRequest"), "XmlHttpRequest3");
296    }
297
298    #[test]
299    fn test_deduplication_preserves_original_casing() {
300        let mut space = UniqueNameSpace::new();
301
302        assert_eq!(space.uniquify("HTTP_Response"), "HTTP_Response");
303        assert_eq!(space.uniquify("httpResponse"), "httpResponse2");
304    }
305
306    #[test]
307    fn test_deduplication_same_prefix() {
308        let mut dedup = UniqueNameSpace::new();
309
310        assert_eq!(dedup.uniquify("HttpRequest"), "HttpRequest");
311        assert_eq!(dedup.uniquify("HttpResponse"), "HttpResponse");
312        assert_eq!(dedup.uniquify("HttpError"), "HttpError");
313    }
314
315    #[test]
316    fn test_deduplication_with_numbers() {
317        let mut space = UniqueNameSpace::new();
318
319        assert_eq!(space.uniquify("Response2"), "Response2");
320        assert_eq!(space.uniquify("response_2"), "response_2");
321    }
322}