1use std::borrow::Cow;
2use std::collections::btree_map::Entry;
3use std::str::CharIndices;
4use std::{collections::BTreeMap, iter::Peekable};
5
6use bumpalo::{
7 Bump,
8 collections::{CollectIn, Vec as BumpVec},
9};
10use unicase::UniCase;
11
12#[derive(Debug, Default)]
14pub struct UniqueNames(Bump);
15
16impl UniqueNames {
17 #[inline]
19 pub fn new() -> Self {
20 Self::default()
21 }
22
23 #[inline]
33 pub fn scope(&self) -> UniqueNamesScope<'_> {
34 UniqueNamesScope::new(&self.0)
35 }
36
37 #[inline]
54 pub fn scope_with_reserved<S: AsRef<str>>(
55 &self,
56 reserved: impl IntoIterator<Item = S>,
57 ) -> UniqueNamesScope<'_> {
58 UniqueNamesScope::with_reserved(&self.0, reserved)
59 }
60}
61
62#[derive(Debug)]
64pub struct UniqueNamesScope<'a> {
65 arena: &'a Bump,
66 space: BTreeMap<&'a [UniCase<&'a str>], usize>,
67}
68
69impl<'a> UniqueNamesScope<'a> {
70 fn new(arena: &'a Bump) -> Self {
71 Self {
72 arena,
73 space: BTreeMap::new(),
74 }
75 }
76
77 fn with_reserved<S: AsRef<str>>(
78 arena: &'a Bump,
79 reserved: impl IntoIterator<Item = S>,
80 ) -> Self {
81 let space = reserved
82 .into_iter()
83 .map(|name| arena.alloc_str(name.as_ref()))
84 .map(|name| {
85 WordSegments::new(name)
86 .map(UniCase::new)
87 .collect_in::<BumpVec<_>>(arena)
88 })
89 .fold(BTreeMap::new(), |mut names, segments| {
90 names.insert(segments.into_bump_slice(), 1);
92 names
93 });
94 Self { arena, space }
95 }
96
97 pub fn uniquify<'b>(&mut self, name: &'b str) -> Cow<'b, str> {
112 match self.space.entry(
113 WordSegments::new(name)
114 .map(|name| UniCase::new(&*self.arena.alloc_str(name)))
115 .collect_in::<BumpVec<_>>(self.arena)
116 .into_bump_slice(),
117 ) {
118 Entry::Occupied(mut entry) => {
119 let count = entry.get_mut();
120 *count += 1;
121 format!("{name}{count}").into()
122 }
123 Entry::Vacant(entry) => {
124 entry.insert(1);
125 name.into()
126 }
127 }
128 }
129}
130
131pub struct WordSegments<'a> {
159 input: &'a str,
160 chars: Peekable<CharIndices<'a>>,
161 current_word_starts_at: Option<usize>,
162 mode: WordMode,
163}
164
165impl<'a> WordSegments<'a> {
166 #[inline]
167 pub fn new(input: &'a str) -> Self {
168 Self {
169 input,
170 chars: input.char_indices().peekable(),
171 current_word_starts_at: None,
172 mode: WordMode::Boundary,
173 }
174 }
175}
176
177impl<'a> Iterator for WordSegments<'a> {
178 type Item = &'a str;
179
180 fn next(&mut self) -> Option<Self::Item> {
181 while let Some((index, c)) = self.chars.next() {
182 if c.is_uppercase() {
183 match self.mode {
184 WordMode::Boundary => {
185 let start = self.current_word_starts_at.replace(index);
187 self.mode = WordMode::Uppercase;
188 if let Some(start) = start {
189 return Some(&self.input[start..index]);
190 }
191 }
192 WordMode::Lowercase => {
193 let start = self.current_word_starts_at.replace(index);
196 self.mode = WordMode::Uppercase;
197 if let Some(start) = start {
198 return Some(&self.input[start..index]);
199 }
200 }
201 WordMode::Uppercase => {
202 let next_is_lowercase = self
203 .chars
204 .peek()
205 .map(|&(_, next)| next.is_lowercase())
206 .unwrap_or(false);
207 if next_is_lowercase && let Some(start) = self.current_word_starts_at {
208 self.current_word_starts_at = Some(index);
211 return Some(&self.input[start..index]);
212 }
213 }
215 }
216 } else if c.is_lowercase() {
217 match self.mode {
218 WordMode::Boundary => {
219 let start = self.current_word_starts_at.replace(index);
221 self.mode = WordMode::Lowercase;
222 if let Some(start) = start {
223 return Some(&self.input[start..index]);
224 }
225 }
226 WordMode::Lowercase | WordMode::Uppercase => {
227 if self.current_word_starts_at.is_none() {
228 self.current_word_starts_at = Some(index);
230 }
231 self.mode = WordMode::Lowercase;
232 }
233 }
234 } else if !c.is_alphanumeric() {
235 let start = std::mem::take(&mut self.current_word_starts_at);
237 self.mode = WordMode::Boundary;
238 if let Some(start) = start {
239 return Some(&self.input[start..index]);
240 }
241 } else {
242 if self.current_word_starts_at.is_none() {
244 self.current_word_starts_at = Some(index);
245 }
246 }
247 }
248 if let Some(start) = std::mem::take(&mut self.current_word_starts_at) {
249 return Some(&self.input[start..]);
251 }
252 None
253 }
254}
255
256#[derive(Clone, Copy)]
258enum WordMode {
259 Boundary,
262 Lowercase,
264 Uppercase,
266}
267
268#[cfg(test)]
269mod tests {
270 use super::*;
271 use itertools::Itertools;
272
273 #[test]
274 fn test_segment_camel_case() {
275 assert_eq!(
276 WordSegments::new("camelCase").collect_vec(),
277 vec!["camel", "Case"]
278 );
279 assert_eq!(
280 WordSegments::new("httpResponse").collect_vec(),
281 vec!["http", "Response"]
282 );
283 }
284
285 #[test]
286 fn test_segment_pascal_case() {
287 assert_eq!(
288 WordSegments::new("PascalCase").collect_vec(),
289 vec!["Pascal", "Case"]
290 );
291 assert_eq!(
292 WordSegments::new("HttpResponse").collect_vec(),
293 vec!["Http", "Response"]
294 );
295 }
296
297 #[test]
298 fn test_segment_snake_case() {
299 assert_eq!(
300 WordSegments::new("snake_case").collect_vec(),
301 vec!["snake", "case"]
302 );
303 assert_eq!(
304 WordSegments::new("http_response").collect_vec(),
305 vec!["http", "response"]
306 );
307 }
308
309 #[test]
310 fn test_segment_screaming_snake() {
311 assert_eq!(
312 WordSegments::new("SCREAMING_SNAKE").collect_vec(),
313 vec!["SCREAMING", "SNAKE"]
314 );
315 assert_eq!(
316 WordSegments::new("HTTP_RESPONSE").collect_vec(),
317 vec!["HTTP", "RESPONSE"]
318 );
319 }
320
321 #[test]
322 fn test_segment_consecutive_uppercase() {
323 assert_eq!(
324 WordSegments::new("XMLHttpRequest").collect_vec(),
325 vec!["XML", "Http", "Request"]
326 );
327 assert_eq!(
328 WordSegments::new("HTTPResponse").collect_vec(),
329 vec!["HTTP", "Response"]
330 );
331 assert_eq!(
332 WordSegments::new("HTTP_Response").collect_vec(),
333 vec!["HTTP", "Response"]
334 );
335 assert_eq!(WordSegments::new("ALLCAPS").collect_vec(), vec!["ALLCAPS"]);
336 }
337
338 #[test]
339 fn test_segment_with_numbers() {
340 assert_eq!(
341 WordSegments::new("Response2").collect_vec(),
342 vec!["Response2"]
343 );
344 assert_eq!(
345 WordSegments::new("response_2").collect_vec(),
346 vec!["response", "2"]
347 );
348 assert_eq!(
349 WordSegments::new("HTTP2Protocol").collect_vec(),
350 vec!["HTTP2", "Protocol"]
351 );
352 assert_eq!(
353 WordSegments::new("OAuth2Token").collect_vec(),
354 vec!["O", "Auth2", "Token"]
355 );
356 assert_eq!(
357 WordSegments::new("HTTP2XML").collect_vec(),
358 vec!["HTTP2XML"]
359 );
360 assert_eq!(
361 WordSegments::new("1099KStatus").collect_vec(),
362 vec!["1099", "K", "Status"]
363 );
364 assert_eq!(
365 WordSegments::new("123abc").collect_vec(),
366 vec!["123", "abc"]
367 );
368 assert_eq!(
369 WordSegments::new("123ABC").collect_vec(),
370 vec!["123", "ABC"]
371 );
372 }
373
374 #[test]
375 fn test_segment_empty_and_special() {
376 assert!(WordSegments::new("").collect_vec().is_empty());
377 assert!(WordSegments::new("___").collect_vec().is_empty());
378 assert_eq!(WordSegments::new("a").collect_vec(), vec!["a"]);
379 assert_eq!(WordSegments::new("A").collect_vec(), vec!["A"]);
380 }
381
382 #[test]
383 fn test_segment_mixed_separators() {
384 assert_eq!(
385 WordSegments::new("foo-bar_baz").collect_vec(),
386 vec!["foo", "bar", "baz"]
387 );
388 assert_eq!(
389 WordSegments::new("foo--bar").collect_vec(),
390 vec!["foo", "bar"]
391 );
392 }
393
394 #[test]
395 fn test_deduplication_http_response_collision() {
396 let unique = UniqueNames::new();
397 let mut scope = unique.scope();
398
399 assert_eq!(scope.uniquify("HTTPResponse"), "HTTPResponse");
400 assert_eq!(scope.uniquify("HTTP_Response"), "HTTP_Response2");
401 assert_eq!(scope.uniquify("httpResponse"), "httpResponse3");
402 assert_eq!(scope.uniquify("http_response"), "http_response4");
403 assert_eq!(scope.uniquify("HTTPRESPONSE"), "HTTPRESPONSE");
405 }
406
407 #[test]
408 fn test_deduplication_xml_http_request() {
409 let unique = UniqueNames::new();
410 let mut scope = unique.scope();
411
412 assert_eq!(scope.uniquify("XMLHttpRequest"), "XMLHttpRequest");
413 assert_eq!(scope.uniquify("xml_http_request"), "xml_http_request2");
414 assert_eq!(scope.uniquify("XmlHttpRequest"), "XmlHttpRequest3");
415 }
416
417 #[test]
418 fn test_deduplication_preserves_original_casing() {
419 let unique = UniqueNames::new();
420 let mut scope = unique.scope();
421
422 assert_eq!(scope.uniquify("HTTP_Response"), "HTTP_Response");
423 assert_eq!(scope.uniquify("httpResponse"), "httpResponse2");
424 }
425
426 #[test]
427 fn test_deduplication_same_prefix() {
428 let unique = UniqueNames::new();
429 let mut scope = unique.scope();
430
431 assert_eq!(scope.uniquify("HttpRequest"), "HttpRequest");
432 assert_eq!(scope.uniquify("HttpResponse"), "HttpResponse");
433 assert_eq!(scope.uniquify("HttpError"), "HttpError");
434 }
435
436 #[test]
437 fn test_deduplication_with_numbers() {
438 let unique = UniqueNames::new();
439 let mut scope = unique.scope();
440
441 assert_eq!(scope.uniquify("Response2"), "Response2");
442 assert_eq!(scope.uniquify("response_2"), "response_2");
443
444 assert_eq!(scope.uniquify("1099KStatus"), "1099KStatus");
446 assert_eq!(scope.uniquify("1099K_Status"), "1099K_Status2");
447 assert_eq!(scope.uniquify("1099KStatus"), "1099KStatus3");
448 assert_eq!(scope.uniquify("1099_K_Status"), "1099_K_Status4");
449
450 assert_eq!(scope.uniquify("123abc"), "123abc");
452 assert_eq!(scope.uniquify("123_abc"), "123_abc2");
453 }
454
455 #[test]
456 fn test_with_reserved_underscore() {
457 let unique = UniqueNames::new();
458 let mut scope = unique.scope_with_reserved(["_"]);
459
460 assert_eq!(scope.uniquify("_"), "_2");
462 assert_eq!(scope.uniquify("_"), "_3");
463 }
464
465 #[test]
466 fn test_with_reserved_multiple() {
467 let unique = UniqueNames::new();
468 let mut scope = unique.scope_with_reserved(["_", "reserved"]);
469
470 assert_eq!(scope.uniquify("_"), "_2");
471 assert_eq!(scope.uniquify("reserved"), "reserved2");
472 assert_eq!(scope.uniquify("other"), "other");
473 }
474
475 #[test]
476 fn test_with_reserved_empty() {
477 let unique = UniqueNames::new();
478 let mut scope = unique.scope_with_reserved([""]);
479
480 assert_eq!(scope.uniquify(""), "2");
481 assert_eq!(scope.uniquify(""), "3");
482 }
483}