ploidy_core/codegen/
unique.rs1use std::borrow::Cow;
2use std::collections::btree_map::Entry;
3use std::str::CharIndices;
4use std::{collections::BTreeMap, iter::Peekable};
5
6use unicase::UniCase;
7
8#[derive(Debug, Default)]
16pub struct UniqueNameSpace<'a>(BTreeMap<Box<[UniCase<&'a str>]>, usize>);
17
18impl<'a> UniqueNameSpace<'a> {
19 #[inline]
20 pub fn new() -> Self {
21 Self::default()
22 }
23
24 #[inline]
38 pub fn uniquify(&mut self, name: &'a str) -> Cow<'a, str> {
39 match self
40 .0
41 .entry(WordSegments::new(name).map(UniCase::new).collect())
42 {
43 Entry::Occupied(mut entry) => {
44 let count = entry.get_mut();
45 *count += 1;
46 format!("{name}{count}").into()
47 }
48 Entry::Vacant(entry) => {
49 entry.insert(1);
50 name.into()
51 }
52 }
53 }
54}
55
56pub struct WordSegments<'a> {
68 input: &'a str,
69 chars: Peekable<CharIndices<'a>>,
70 current_word_starts_at: Option<usize>,
71 mode: WordMode,
72}
73
74impl<'a> WordSegments<'a> {
75 #[inline]
76 pub fn new(input: &'a str) -> Self {
77 Self {
78 input,
79 chars: input.char_indices().peekable(),
80 current_word_starts_at: None,
81 mode: WordMode::Boundary,
82 }
83 }
84}
85
86impl<'a> Iterator for WordSegments<'a> {
87 type Item = &'a str;
88
89 fn next(&mut self) -> Option<Self::Item> {
90 while let Some((index, c)) = self.chars.next() {
91 if c.is_uppercase() {
92 match self.mode {
93 WordMode::Boundary => {
94 self.current_word_starts_at = Some(index);
96 self.mode = WordMode::Uppercase;
97 }
98 WordMode::Lowercase => {
99 let start = self.current_word_starts_at.replace(index);
102 self.mode = WordMode::Uppercase;
103 if let Some(start) = start {
104 return Some(&self.input[start..index]);
105 }
106 }
107 WordMode::Uppercase => {
108 let next_is_lowercase = self
109 .chars
110 .peek()
111 .map(|&(_, next)| next.is_lowercase())
112 .unwrap_or(false);
113 if next_is_lowercase && let Some(start) = self.current_word_starts_at {
114 self.current_word_starts_at = Some(index);
117 return Some(&self.input[start..index]);
118 }
119 }
121 }
122 } else if c.is_lowercase() {
123 if self.current_word_starts_at.is_none() {
124 self.current_word_starts_at = Some(index);
127 }
128 self.mode = WordMode::Lowercase;
129 } else if !c.is_alphanumeric() {
130 let start = std::mem::take(&mut self.current_word_starts_at);
132 self.mode = WordMode::Boundary;
133 if let Some(start) = start {
134 return Some(&self.input[start..index]);
135 }
136 } else {
137 if self.current_word_starts_at.is_none() {
139 self.current_word_starts_at = Some(index);
140 }
141 }
142 }
143 if let Some(start) = std::mem::take(&mut self.current_word_starts_at) {
144 return Some(&self.input[start..]);
146 }
147 None
148 }
149}
150
151#[derive(Clone, Copy)]
153enum WordMode {
154 Boundary,
157 Lowercase,
159 Uppercase,
161}
162
163#[cfg(test)]
164mod tests {
165 use super::*;
166 use itertools::Itertools;
167
168 #[test]
169 fn test_segment_camel_case() {
170 assert_eq!(
171 WordSegments::new("camelCase").collect_vec(),
172 vec!["camel", "Case"]
173 );
174 assert_eq!(
175 WordSegments::new("httpResponse").collect_vec(),
176 vec!["http", "Response"]
177 );
178 }
179
180 #[test]
181 fn test_segment_pascal_case() {
182 assert_eq!(
183 WordSegments::new("PascalCase").collect_vec(),
184 vec!["Pascal", "Case"]
185 );
186 assert_eq!(
187 WordSegments::new("HttpResponse").collect_vec(),
188 vec!["Http", "Response"]
189 );
190 }
191
192 #[test]
193 fn test_segment_snake_case() {
194 assert_eq!(
195 WordSegments::new("snake_case").collect_vec(),
196 vec!["snake", "case"]
197 );
198 assert_eq!(
199 WordSegments::new("http_response").collect_vec(),
200 vec!["http", "response"]
201 );
202 }
203
204 #[test]
205 fn test_segment_screaming_snake() {
206 assert_eq!(
207 WordSegments::new("SCREAMING_SNAKE").collect_vec(),
208 vec!["SCREAMING", "SNAKE"]
209 );
210 assert_eq!(
211 WordSegments::new("HTTP_RESPONSE").collect_vec(),
212 vec!["HTTP", "RESPONSE"]
213 );
214 }
215
216 #[test]
217 fn test_segment_consecutive_uppercase() {
218 assert_eq!(
219 WordSegments::new("XMLHttpRequest").collect_vec(),
220 vec!["XML", "Http", "Request"]
221 );
222 assert_eq!(
223 WordSegments::new("HTTPResponse").collect_vec(),
224 vec!["HTTP", "Response"]
225 );
226 assert_eq!(
227 WordSegments::new("HTTP_Response").collect_vec(),
228 vec!["HTTP", "Response"]
229 );
230 assert_eq!(WordSegments::new("ALLCAPS").collect_vec(), vec!["ALLCAPS"]);
231 }
232
233 #[test]
234 fn test_segment_with_numbers() {
235 assert_eq!(
236 WordSegments::new("Response2").collect_vec(),
237 vec!["Response2"]
238 );
239 assert_eq!(
240 WordSegments::new("response_2").collect_vec(),
241 vec!["response", "2"]
242 );
243 assert_eq!(
244 WordSegments::new("HTTP2Protocol").collect_vec(),
245 vec!["HTTP2", "Protocol"]
246 );
247 assert_eq!(
248 WordSegments::new("OAuth2Token").collect_vec(),
249 vec!["O", "Auth2", "Token"]
250 );
251 assert_eq!(
252 WordSegments::new("HTTP2XML").collect_vec(),
253 vec!["HTTP2XML"]
254 );
255 }
256
257 #[test]
258 fn test_segment_empty_and_special() {
259 assert!(WordSegments::new("").collect_vec().is_empty());
260 assert!(WordSegments::new("___").collect_vec().is_empty());
261 assert_eq!(WordSegments::new("a").collect_vec(), vec!["a"]);
262 assert_eq!(WordSegments::new("A").collect_vec(), vec!["A"]);
263 }
264
265 #[test]
266 fn test_segment_mixed_separators() {
267 assert_eq!(
268 WordSegments::new("foo-bar_baz").collect_vec(),
269 vec!["foo", "bar", "baz"]
270 );
271 assert_eq!(
272 WordSegments::new("foo--bar").collect_vec(),
273 vec!["foo", "bar"]
274 );
275 }
276
277 #[test]
278 fn test_deduplication_http_response_collision() {
279 let mut space = UniqueNameSpace::new();
280
281 assert_eq!(space.uniquify("HTTPResponse"), "HTTPResponse");
282 assert_eq!(space.uniquify("HTTP_Response"), "HTTP_Response2");
283 assert_eq!(space.uniquify("httpResponse"), "httpResponse3");
284 assert_eq!(space.uniquify("http_response"), "http_response4");
285 assert_eq!(space.uniquify("HTTPRESPONSE"), "HTTPRESPONSE");
287 }
288
289 #[test]
290 fn test_deduplication_xml_http_request() {
291 let mut space = UniqueNameSpace::new();
292
293 assert_eq!(space.uniquify("XMLHttpRequest"), "XMLHttpRequest");
294 assert_eq!(space.uniquify("xml_http_request"), "xml_http_request2");
295 assert_eq!(space.uniquify("XmlHttpRequest"), "XmlHttpRequest3");
296 }
297
298 #[test]
299 fn test_deduplication_preserves_original_casing() {
300 let mut space = UniqueNameSpace::new();
301
302 assert_eq!(space.uniquify("HTTP_Response"), "HTTP_Response");
303 assert_eq!(space.uniquify("httpResponse"), "httpResponse2");
304 }
305
306 #[test]
307 fn test_deduplication_same_prefix() {
308 let mut dedup = UniqueNameSpace::new();
309
310 assert_eq!(dedup.uniquify("HttpRequest"), "HttpRequest");
311 assert_eq!(dedup.uniquify("HttpResponse"), "HttpResponse");
312 assert_eq!(dedup.uniquify("HttpError"), "HttpError");
313 }
314
315 #[test]
316 fn test_deduplication_with_numbers() {
317 let mut space = UniqueNameSpace::new();
318
319 assert_eq!(space.uniquify("Response2"), "Response2");
320 assert_eq!(space.uniquify("response_2"), "response_2");
321 }
322}