1use std::{borrow::Cow, collections::hash_map::Entry, iter::Peekable, str::CharIndices};
2
3use bumpalo::{
4 Bump,
5 collections::{CollectIn, Vec as BumpVec},
6};
7use rustc_hash::FxHashMap;
8use unicase::UniCase;
9
10#[derive(Debug, Default)]
12pub struct UniqueNames(Bump);
13
14impl UniqueNames {
15 #[inline]
17 pub fn new() -> Self {
18 Self::default()
19 }
20
21 #[inline]
31 pub fn scope(&self) -> UniqueNamesScope<'_> {
32 UniqueNamesScope::new(&self.0)
33 }
34
35 #[inline]
52 pub fn scope_with_reserved<S: AsRef<str>>(
53 &self,
54 reserved: impl IntoIterator<Item = S>,
55 ) -> UniqueNamesScope<'_> {
56 UniqueNamesScope::with_reserved(&self.0, reserved)
57 }
58}
59
60#[derive(Debug)]
62pub struct UniqueNamesScope<'a> {
63 arena: &'a Bump,
64 space: FxHashMap<&'a [UniCase<&'a str>], usize>,
65}
66
67impl<'a> UniqueNamesScope<'a> {
68 fn new(arena: &'a Bump) -> Self {
69 Self {
70 arena,
71 space: FxHashMap::default(),
72 }
73 }
74
75 fn with_reserved<S: AsRef<str>>(
76 arena: &'a Bump,
77 reserved: impl IntoIterator<Item = S>,
78 ) -> Self {
79 let space = reserved
80 .into_iter()
81 .map(|name| arena.alloc_str(name.as_ref()))
82 .map(|name| {
83 WordSegments::new(name)
84 .map(UniCase::new)
85 .collect_in::<BumpVec<_>>(arena)
86 })
87 .fold(FxHashMap::default(), |mut names, segments| {
88 names.insert(segments.into_bump_slice(), 1);
90 names
91 });
92 Self { arena, space }
93 }
94
95 pub fn uniquify<'b>(&mut self, name: &'b str) -> Cow<'b, str> {
110 match self.space.entry(
111 WordSegments::new(name)
112 .map(|name| UniCase::new(&*self.arena.alloc_str(name)))
113 .collect_in::<BumpVec<_>>(self.arena)
114 .into_bump_slice(),
115 ) {
116 Entry::Occupied(mut entry) => {
117 let count = entry.get_mut();
118 *count += 1;
119 format!("{name}{count}").into()
120 }
121 Entry::Vacant(entry) => {
122 entry.insert(1);
123 name.into()
124 }
125 }
126 }
127}
128
129pub struct WordSegments<'a> {
157 input: &'a str,
158 chars: Peekable<CharIndices<'a>>,
159 current_word_starts_at: Option<usize>,
160 mode: WordMode,
161}
162
163impl<'a> WordSegments<'a> {
164 #[inline]
165 pub fn new(input: &'a str) -> Self {
166 Self {
167 input,
168 chars: input.char_indices().peekable(),
169 current_word_starts_at: None,
170 mode: WordMode::Boundary,
171 }
172 }
173}
174
175impl<'a> Iterator for WordSegments<'a> {
176 type Item = &'a str;
177
178 fn next(&mut self) -> Option<Self::Item> {
179 while let Some((index, c)) = self.chars.next() {
180 if c.is_uppercase() {
181 match self.mode {
182 WordMode::Boundary => {
183 let start = self.current_word_starts_at.replace(index);
185 self.mode = WordMode::Uppercase;
186 if let Some(start) = start {
187 return Some(&self.input[start..index]);
188 }
189 }
190 WordMode::Lowercase => {
191 let start = self.current_word_starts_at.replace(index);
194 self.mode = WordMode::Uppercase;
195 if let Some(start) = start {
196 return Some(&self.input[start..index]);
197 }
198 }
199 WordMode::Uppercase => {
200 let next_is_lowercase = self
201 .chars
202 .peek()
203 .map(|&(_, next)| next.is_lowercase())
204 .unwrap_or(false);
205 if next_is_lowercase && let Some(start) = self.current_word_starts_at {
206 self.current_word_starts_at = Some(index);
209 return Some(&self.input[start..index]);
210 }
211 }
213 }
214 } else if c.is_lowercase() {
215 match self.mode {
216 WordMode::Boundary => {
217 let start = self.current_word_starts_at.replace(index);
219 self.mode = WordMode::Lowercase;
220 if let Some(start) = start {
221 return Some(&self.input[start..index]);
222 }
223 }
224 WordMode::Lowercase | WordMode::Uppercase => {
225 if self.current_word_starts_at.is_none() {
226 self.current_word_starts_at = Some(index);
228 }
229 self.mode = WordMode::Lowercase;
230 }
231 }
232 } else if !c.is_alphanumeric() {
233 let start = std::mem::take(&mut self.current_word_starts_at);
235 self.mode = WordMode::Boundary;
236 if let Some(start) = start {
237 return Some(&self.input[start..index]);
238 }
239 } else {
240 if self.current_word_starts_at.is_none() {
242 self.current_word_starts_at = Some(index);
243 }
244 }
245 }
246 if let Some(start) = std::mem::take(&mut self.current_word_starts_at) {
247 return Some(&self.input[start..]);
249 }
250 None
251 }
252}
253
254#[derive(Clone, Copy)]
256enum WordMode {
257 Boundary,
260 Lowercase,
262 Uppercase,
264}
265
266#[cfg(test)]
267mod tests {
268 use super::*;
269 use itertools::Itertools;
270
271 #[test]
272 fn test_segment_camel_case() {
273 assert_eq!(
274 WordSegments::new("camelCase").collect_vec(),
275 vec!["camel", "Case"]
276 );
277 assert_eq!(
278 WordSegments::new("httpResponse").collect_vec(),
279 vec!["http", "Response"]
280 );
281 }
282
283 #[test]
284 fn test_segment_pascal_case() {
285 assert_eq!(
286 WordSegments::new("PascalCase").collect_vec(),
287 vec!["Pascal", "Case"]
288 );
289 assert_eq!(
290 WordSegments::new("HttpResponse").collect_vec(),
291 vec!["Http", "Response"]
292 );
293 }
294
295 #[test]
296 fn test_segment_snake_case() {
297 assert_eq!(
298 WordSegments::new("snake_case").collect_vec(),
299 vec!["snake", "case"]
300 );
301 assert_eq!(
302 WordSegments::new("http_response").collect_vec(),
303 vec!["http", "response"]
304 );
305 }
306
307 #[test]
308 fn test_segment_screaming_snake() {
309 assert_eq!(
310 WordSegments::new("SCREAMING_SNAKE").collect_vec(),
311 vec!["SCREAMING", "SNAKE"]
312 );
313 assert_eq!(
314 WordSegments::new("HTTP_RESPONSE").collect_vec(),
315 vec!["HTTP", "RESPONSE"]
316 );
317 }
318
319 #[test]
320 fn test_segment_consecutive_uppercase() {
321 assert_eq!(
322 WordSegments::new("XMLHttpRequest").collect_vec(),
323 vec!["XML", "Http", "Request"]
324 );
325 assert_eq!(
326 WordSegments::new("HTTPResponse").collect_vec(),
327 vec!["HTTP", "Response"]
328 );
329 assert_eq!(
330 WordSegments::new("HTTP_Response").collect_vec(),
331 vec!["HTTP", "Response"]
332 );
333 assert_eq!(WordSegments::new("ALLCAPS").collect_vec(), vec!["ALLCAPS"]);
334 }
335
336 #[test]
337 fn test_segment_with_numbers() {
338 assert_eq!(
339 WordSegments::new("Response2").collect_vec(),
340 vec!["Response2"]
341 );
342 assert_eq!(
343 WordSegments::new("response_2").collect_vec(),
344 vec!["response", "2"]
345 );
346 assert_eq!(
347 WordSegments::new("HTTP2Protocol").collect_vec(),
348 vec!["HTTP2", "Protocol"]
349 );
350 assert_eq!(
351 WordSegments::new("OAuth2Token").collect_vec(),
352 vec!["O", "Auth2", "Token"]
353 );
354 assert_eq!(
355 WordSegments::new("HTTP2XML").collect_vec(),
356 vec!["HTTP2XML"]
357 );
358 assert_eq!(
359 WordSegments::new("1099KStatus").collect_vec(),
360 vec!["1099", "K", "Status"]
361 );
362 assert_eq!(
363 WordSegments::new("123abc").collect_vec(),
364 vec!["123", "abc"]
365 );
366 assert_eq!(
367 WordSegments::new("123ABC").collect_vec(),
368 vec!["123", "ABC"]
369 );
370 }
371
372 #[test]
373 fn test_segment_empty_and_special() {
374 assert!(WordSegments::new("").collect_vec().is_empty());
375 assert!(WordSegments::new("___").collect_vec().is_empty());
376 assert_eq!(WordSegments::new("a").collect_vec(), vec!["a"]);
377 assert_eq!(WordSegments::new("A").collect_vec(), vec!["A"]);
378 }
379
380 #[test]
381 fn test_segment_mixed_separators() {
382 assert_eq!(
383 WordSegments::new("foo-bar_baz").collect_vec(),
384 vec!["foo", "bar", "baz"]
385 );
386 assert_eq!(
387 WordSegments::new("foo--bar").collect_vec(),
388 vec!["foo", "bar"]
389 );
390 }
391
392 #[test]
393 fn test_deduplication_http_response_collision() {
394 let unique = UniqueNames::new();
395 let mut scope = unique.scope();
396
397 assert_eq!(scope.uniquify("HTTPResponse"), "HTTPResponse");
398 assert_eq!(scope.uniquify("HTTP_Response"), "HTTP_Response2");
399 assert_eq!(scope.uniquify("httpResponse"), "httpResponse3");
400 assert_eq!(scope.uniquify("http_response"), "http_response4");
401 assert_eq!(scope.uniquify("HTTPRESPONSE"), "HTTPRESPONSE");
403 }
404
405 #[test]
406 fn test_deduplication_xml_http_request() {
407 let unique = UniqueNames::new();
408 let mut scope = unique.scope();
409
410 assert_eq!(scope.uniquify("XMLHttpRequest"), "XMLHttpRequest");
411 assert_eq!(scope.uniquify("xml_http_request"), "xml_http_request2");
412 assert_eq!(scope.uniquify("XmlHttpRequest"), "XmlHttpRequest3");
413 }
414
415 #[test]
416 fn test_deduplication_preserves_original_casing() {
417 let unique = UniqueNames::new();
418 let mut scope = unique.scope();
419
420 assert_eq!(scope.uniquify("HTTP_Response"), "HTTP_Response");
421 assert_eq!(scope.uniquify("httpResponse"), "httpResponse2");
422 }
423
424 #[test]
425 fn test_deduplication_same_prefix() {
426 let unique = UniqueNames::new();
427 let mut scope = unique.scope();
428
429 assert_eq!(scope.uniquify("HttpRequest"), "HttpRequest");
430 assert_eq!(scope.uniquify("HttpResponse"), "HttpResponse");
431 assert_eq!(scope.uniquify("HttpError"), "HttpError");
432 }
433
434 #[test]
435 fn test_deduplication_with_numbers() {
436 let unique = UniqueNames::new();
437 let mut scope = unique.scope();
438
439 assert_eq!(scope.uniquify("Response2"), "Response2");
440 assert_eq!(scope.uniquify("response_2"), "response_2");
441
442 assert_eq!(scope.uniquify("1099KStatus"), "1099KStatus");
444 assert_eq!(scope.uniquify("1099K_Status"), "1099K_Status2");
445 assert_eq!(scope.uniquify("1099KStatus"), "1099KStatus3");
446 assert_eq!(scope.uniquify("1099_K_Status"), "1099_K_Status4");
447
448 assert_eq!(scope.uniquify("123abc"), "123abc");
450 assert_eq!(scope.uniquify("123_abc"), "123_abc2");
451 }
452
453 #[test]
454 fn test_with_reserved_underscore() {
455 let unique = UniqueNames::new();
456 let mut scope = unique.scope_with_reserved(["_"]);
457
458 assert_eq!(scope.uniquify("_"), "_2");
460 assert_eq!(scope.uniquify("_"), "_3");
461 }
462
463 #[test]
464 fn test_with_reserved_multiple() {
465 let unique = UniqueNames::new();
466 let mut scope = unique.scope_with_reserved(["_", "reserved"]);
467
468 assert_eq!(scope.uniquify("_"), "_2");
469 assert_eq!(scope.uniquify("reserved"), "reserved2");
470 assert_eq!(scope.uniquify("other"), "other");
471 }
472
473 #[test]
474 fn test_with_reserved_empty() {
475 let unique = UniqueNames::new();
476 let mut scope = unique.scope_with_reserved([""]);
477
478 assert_eq!(scope.uniquify(""), "2");
479 assert_eq!(scope.uniquify(""), "3");
480 }
481}