1use std::{iter::Peekable, str::CharIndices};
2
3use itertools::Itertools;
4use rustc_hash::{FxHashMap, FxHashSet};
5use unicase::UniCase;
6
7use crate::arena::Arena;
8
9#[derive(Debug)]
11pub struct UniqueNames<'a> {
12 arena: &'a Arena,
13 space: FxHashMap<&'a [UniCase<&'a str>], FxHashSet<usize>>,
14}
15
16impl<'a> UniqueNames<'a> {
17 pub fn new(arena: &'a Arena) -> Self {
18 Self {
19 arena,
20 space: FxHashMap::default(),
21 }
22 }
23
24 pub fn with_reserved<S: AsRef<str>>(
25 arena: &'a Arena,
26 reserved: impl IntoIterator<Item = S>,
27 ) -> Self {
28 let mut names = Self::new(arena);
29 for name in reserved {
30 names.reserve(name.as_ref());
31 }
32 names
33 }
34
35 pub fn uniquify(&mut self, name: &str) -> &'a str {
51 let parsed = ParsedName::parse(name);
52 let segments = self.arena.alloc_slice_exact(
53 parsed
54 .segments()
55 .iter()
56 .map(|(_, segment)| UniCase::new(&*self.arena.alloc_str(segment))),
57 );
58 let occupied = self.space.entry(segments).or_default();
59
60 let suffix = if occupied.insert(parsed.suffix()) {
61 parsed.suffix()
62 } else {
63 let mut suffix = parsed.min_suffix();
64 while !occupied.insert(suffix) {
65 suffix = suffix.checked_add(1).unwrap();
66 }
67 suffix
68 };
69
70 match parsed {
71 ParsedName::Empty | ParsedName::Numeric(_) => self.arena.alloc_str(&suffix.to_string()),
72 ParsedName::Stemmed { stem, .. } if suffix > 0 => {
73 self.arena.alloc_str(&format!("{stem}{suffix}"))
74 }
75 ParsedName::Stemmed { stem, .. } => self.arena.alloc_str(stem),
76 }
77 }
78
79 fn reserve(&mut self, name: &str) {
80 let parsed = ParsedName::parse(name);
81 let segments = self.arena.alloc_slice_exact(
82 parsed
83 .segments()
84 .iter()
85 .map(|(_, segment)| UniCase::new(&*self.arena.alloc_str(segment))),
86 );
87 self.space
88 .entry(segments)
89 .or_default()
90 .insert(parsed.reserved_suffix());
91 }
92}
93
94pub struct WordSegments<'a> {
141 input: &'a str,
142 chars: Peekable<CharIndices<'a>>,
143 current_word_starts_at: Option<usize>,
144 mode: WordMode,
145}
146
147impl<'a> WordSegments<'a> {
148 #[inline]
149 pub fn new(input: &'a str) -> Self {
150 Self {
151 input,
152 chars: input.char_indices().peekable(),
153 current_word_starts_at: None,
154 mode: WordMode::Boundary,
155 }
156 }
157}
158
159impl<'a> Iterator for WordSegments<'a> {
160 type Item = (usize, &'a str);
161
162 fn next(&mut self) -> Option<Self::Item> {
163 while let Some((index, c)) = self.chars.next() {
164 if c.is_uppercase() {
165 match self.mode {
166 WordMode::Boundary | WordMode::Digit => {
167 let start = self.current_word_starts_at.replace(index);
169 self.mode = WordMode::Uppercase;
170 if let Some(start) = start {
171 return Some((start, &self.input[start..index]));
172 }
173 }
174 WordMode::Lowercase => {
175 let start = self.current_word_starts_at.replace(index);
178 self.mode = WordMode::Uppercase;
179 if let Some(start) = start {
180 return Some((start, &self.input[start..index]));
181 }
182 }
183 WordMode::Uppercase => {
184 let next_is_lowercase = self
185 .chars
186 .peek()
187 .map(|&(_, next)| next.is_lowercase())
188 .unwrap_or(false);
189 if next_is_lowercase && let Some(start) = self.current_word_starts_at {
190 self.current_word_starts_at = Some(index);
193 return Some((start, &self.input[start..index]));
194 }
195 }
197 }
198 } else if c.is_lowercase() {
199 match self.mode {
200 WordMode::Boundary | WordMode::Digit => {
201 let start = self.current_word_starts_at.replace(index);
203 self.mode = WordMode::Lowercase;
204 if let Some(start) = start {
205 return Some((start, &self.input[start..index]));
206 }
207 }
208 WordMode::Lowercase | WordMode::Uppercase => {
209 if self.current_word_starts_at.is_none() {
210 self.current_word_starts_at = Some(index);
212 }
213 self.mode = WordMode::Lowercase;
214 }
215 }
216 } else if c.is_ascii_digit() {
217 match self.mode {
218 WordMode::Boundary | WordMode::Digit => {
219 if self.current_word_starts_at.is_none() {
220 self.current_word_starts_at = Some(index);
221 }
222 self.mode = WordMode::Digit;
223 }
224 WordMode::Lowercase | WordMode::Uppercase => {
225 let start = self.current_word_starts_at.replace(index);
226 self.mode = WordMode::Digit;
227 if let Some(start) = start {
228 return Some((start, &self.input[start..index]));
229 }
230 }
231 }
232 } else if !c.is_alphanumeric() {
233 let start = std::mem::take(&mut self.current_word_starts_at);
235 self.mode = WordMode::Boundary;
236 if let Some(start) = start {
237 return Some((start, &self.input[start..index]));
238 }
239 } else {
240 if self.current_word_starts_at.is_none() {
242 self.current_word_starts_at = Some(index);
243 }
244 }
245 }
246 if let Some(start) = std::mem::take(&mut self.current_word_starts_at) {
247 return Some((start, &self.input[start..]));
249 }
250 None
251 }
252}
253
254#[derive(Clone, Copy)]
256enum WordMode {
257 Boundary,
260 Lowercase,
262 Uppercase,
264 Digit,
266}
267
268enum ParsedName<'a> {
269 Empty,
270 Numeric(usize),
271 Stemmed {
272 segments: Vec<(usize, &'a str)>,
273 stem: &'a str,
274 suffix: usize,
275 },
276}
277
278impl<'a> ParsedName<'a> {
279 fn parse(name: &'a str) -> Self {
280 let mut segments = WordSegments::new(name).collect_vec();
281 if segments.is_empty() {
282 return Self::Empty;
283 }
284
285 let Some((suffix_start, suffix)) = segments
286 .iter()
287 .last()
288 .and_then(|&(offset, segment)| Some((offset, segment.parse::<usize>().ok()?)))
289 else {
290 return Self::Stemmed {
291 segments,
292 stem: name,
293 suffix: 0,
294 };
295 };
296
297 segments.pop();
298 if segments.is_empty() {
299 return Self::Numeric(suffix);
300 }
301
302 let stem = name[..suffix_start].trim_end_matches(|c: char| !c.is_alphanumeric());
303 Self::Stemmed {
304 segments,
305 stem,
306 suffix,
307 }
308 }
309
310 fn segments(&self) -> &[(usize, &'a str)] {
311 match self {
312 Self::Empty | Self::Numeric(_) => &[],
313 Self::Stemmed { segments, .. } => segments,
314 }
315 }
316
317 fn suffix(&self) -> usize {
318 match self {
319 Self::Empty | Self::Numeric(0) => 1,
320 &(Self::Numeric(suffix) | Self::Stemmed { suffix, .. }) => suffix,
321 }
322 }
323
324 fn reserved_suffix(&self) -> usize {
325 match self {
326 Self::Empty | Self::Numeric(0) => 0,
327 &(Self::Numeric(suffix) | Self::Stemmed { suffix, .. }) => suffix,
328 }
329 }
330
331 fn min_suffix(&self) -> usize {
332 match self {
333 Self::Empty => 1,
334 &Self::Numeric(suffix) => suffix.max(1),
335 &Self::Stemmed { suffix, .. } => suffix.max(2),
336 }
337 }
338}
339
340#[cfg(test)]
341mod tests {
342 use super::*;
343 use itertools::Itertools;
344
345 #[test]
346 fn test_segment_camel_case() {
347 assert_eq!(
348 WordSegments::new("camelCase").collect_vec(),
349 vec![(0, "camel"), (5, "Case")]
350 );
351 assert_eq!(
352 WordSegments::new("httpResponse").collect_vec(),
353 vec![(0, "http"), (4, "Response")]
354 );
355 }
356
357 #[test]
358 fn test_segment_pascal_case() {
359 assert_eq!(
360 WordSegments::new("PascalCase").collect_vec(),
361 vec![(0, "Pascal"), (6, "Case")]
362 );
363 assert_eq!(
364 WordSegments::new("HttpResponse").collect_vec(),
365 vec![(0, "Http"), (4, "Response")]
366 );
367 }
368
369 #[test]
370 fn test_segment_snake_case() {
371 assert_eq!(
372 WordSegments::new("snake_case").collect_vec(),
373 vec![(0, "snake"), (6, "case")]
374 );
375 assert_eq!(
376 WordSegments::new("http_response").collect_vec(),
377 vec![(0, "http"), (5, "response")]
378 );
379 }
380
381 #[test]
382 fn test_segment_screaming_snake() {
383 assert_eq!(
384 WordSegments::new("SCREAMING_SNAKE").collect_vec(),
385 vec![(0, "SCREAMING"), (10, "SNAKE")]
386 );
387 assert_eq!(
388 WordSegments::new("HTTP_RESPONSE").collect_vec(),
389 vec![(0, "HTTP"), (5, "RESPONSE")]
390 );
391 }
392
393 #[test]
394 fn test_segment_consecutive_uppercase() {
395 assert_eq!(
396 WordSegments::new("XMLHttpRequest").collect_vec(),
397 vec![(0, "XML"), (3, "Http"), (7, "Request")]
398 );
399 assert_eq!(
400 WordSegments::new("HTTPResponse").collect_vec(),
401 vec![(0, "HTTP"), (4, "Response")]
402 );
403 assert_eq!(
404 WordSegments::new("HTTP_Response").collect_vec(),
405 vec![(0, "HTTP"), (5, "Response")]
406 );
407 assert_eq!(
408 WordSegments::new("ALLCAPS").collect_vec(),
409 vec![(0, "ALLCAPS")]
410 );
411 }
412
413 #[test]
414 fn test_segment_with_numbers() {
415 assert_eq!(
416 WordSegments::new("Response2").collect_vec(),
417 vec![(0, "Response"), (8, "2")]
418 );
419 assert_eq!(
420 WordSegments::new("response_2").collect_vec(),
421 vec![(0, "response"), (9, "2")]
422 );
423 assert_eq!(
424 WordSegments::new("HTTP2Protocol").collect_vec(),
425 vec![(0, "HTTP"), (4, "2"), (5, "Protocol")]
426 );
427 assert_eq!(
428 WordSegments::new("OAuth2Token").collect_vec(),
429 vec![(0, "O"), (1, "Auth"), (5, "2"), (6, "Token")]
430 );
431 assert_eq!(
432 WordSegments::new("HTTP2XML").collect_vec(),
433 vec![(0, "HTTP"), (4, "2"), (5, "XML")]
434 );
435 assert_eq!(
436 WordSegments::new("1099KStatus").collect_vec(),
437 vec![(0, "1099"), (4, "K"), (5, "Status")]
438 );
439 assert_eq!(
440 WordSegments::new("123abc").collect_vec(),
441 vec![(0, "123"), (3, "abc")]
442 );
443 assert_eq!(
444 WordSegments::new("123ABC").collect_vec(),
445 vec![(0, "123"), (3, "ABC")]
446 );
447 }
448
449 #[test]
450 fn test_segment_empty_and_special() {
451 assert!(WordSegments::new("").collect_vec().is_empty());
452 assert!(WordSegments::new("___").collect_vec().is_empty());
453 assert_eq!(WordSegments::new("a").collect_vec(), vec![(0, "a")]);
454 assert_eq!(WordSegments::new("A").collect_vec(), vec![(0, "A")]);
455 }
456
457 #[test]
458 fn test_segment_mixed_separators() {
459 assert_eq!(
460 WordSegments::new("foo-bar_baz").collect_vec(),
461 vec![(0, "foo"), (4, "bar"), (8, "baz")]
462 );
463 assert_eq!(
464 WordSegments::new("foo--bar").collect_vec(),
465 vec![(0, "foo"), (5, "bar")]
466 );
467 }
468
469 #[test]
470 fn test_deduplication_http_response_collision() {
471 let arena = Arena::new();
472 let mut names = UniqueNames::new(&arena);
473
474 assert_eq!(names.uniquify("HTTPResponse"), "HTTPResponse");
475 assert_eq!(names.uniquify("HTTP_Response"), "HTTP_Response2");
476 assert_eq!(names.uniquify("httpResponse"), "httpResponse3");
477 assert_eq!(names.uniquify("http_response"), "http_response4");
478 assert_eq!(names.uniquify("HTTPRESPONSE"), "HTTPRESPONSE");
480 }
481
482 #[test]
483 fn test_deduplication_xml_http_request() {
484 let arena = Arena::new();
485 let mut names = UniqueNames::new(&arena);
486
487 assert_eq!(names.uniquify("XMLHttpRequest"), "XMLHttpRequest");
488 assert_eq!(names.uniquify("xml_http_request"), "xml_http_request2");
489 assert_eq!(names.uniquify("XmlHttpRequest"), "XmlHttpRequest3");
490 }
491
492 #[test]
493 fn test_deduplication_preserves_original_casing() {
494 let arena = Arena::new();
495 let mut names = UniqueNames::new(&arena);
496
497 assert_eq!(names.uniquify("HTTP_Response"), "HTTP_Response");
498 assert_eq!(names.uniquify("httpResponse"), "httpResponse2");
499 }
500
501 #[test]
502 fn test_deduplication_same_prefix() {
503 let arena = Arena::new();
504 let mut names = UniqueNames::new(&arena);
505
506 assert_eq!(names.uniquify("HttpRequest"), "HttpRequest");
507 assert_eq!(names.uniquify("HttpResponse"), "HttpResponse");
508 assert_eq!(names.uniquify("HttpError"), "HttpError");
509 }
510
511 #[test]
512 fn test_deduplication_with_numbers() {
513 let arena = Arena::new();
514 let mut names = UniqueNames::new(&arena);
515
516 assert_eq!(names.uniquify("Response2"), "Response2");
517 assert_eq!(names.uniquify("response_2"), "response3");
518
519 assert_eq!(names.uniquify("Response0"), "Response");
521 assert_eq!(names.uniquify("response"), "response4");
522
523 assert_eq!(names.uniquify("1099KStatus"), "1099KStatus");
525 assert_eq!(names.uniquify("1099K_Status"), "1099K_Status2");
526 assert_eq!(names.uniquify("1099KStatus"), "1099KStatus3");
527 assert_eq!(names.uniquify("1099_K_Status"), "1099_K_Status4");
528
529 assert_eq!(names.uniquify("123abc"), "123abc");
531 assert_eq!(names.uniquify("123_abc"), "123_abc2");
532 }
533
534 #[test]
535 fn test_deduplication_numeric_suffixes() {
536 let arena = Arena::new();
537 let mut names = UniqueNames::new(&arena);
538
539 assert_eq!(names.uniquify("OAuth2"), "OAuth2");
540 assert_eq!(names.uniquify("OAuth_2"), "OAuth3");
541 assert_eq!(names.uniquify("OAuth"), "OAuth");
542 assert_eq!(names.uniquify("OAuth0"), "OAuth4");
543 }
544
545 #[test]
546 fn test_deduplication_empty_names_start_at_one() {
547 let arena = Arena::new();
548 let mut names = UniqueNames::new(&arena);
549
550 assert_eq!(names.uniquify(""), "1");
551 assert_eq!(names.uniquify("_"), "2");
552 assert_eq!(names.uniquify("---"), "3");
553 }
554
555 #[test]
556 fn test_deduplication_numeric_names_share_empty_stem() {
557 let arena = Arena::new();
558 let mut names = UniqueNames::new(&arena);
559
560 assert_eq!(names.uniquify("2"), "2");
561 assert_eq!(names.uniquify(""), "1");
562 assert_eq!(names.uniquify("2"), "3");
563
564 let arena = Arena::new();
565 let mut names = UniqueNames::new(&arena);
566
567 assert_eq!(names.uniquify("0"), "1");
568 assert_eq!(names.uniquify(""), "2");
569 }
570
571 #[test]
572 fn test_with_reserved_empty_stem_uses_zero_slot() {
573 let arena = Arena::new();
574 let mut names = UniqueNames::with_reserved(&arena, [""]);
575
576 assert_eq!(names.uniquify(""), "1");
577 assert_eq!(names.uniquify(""), "2");
578
579 let arena = Arena::new();
580 let mut names = UniqueNames::with_reserved(&arena, [""]);
581
582 assert_eq!(names.uniquify("0"), "1");
583 assert_eq!(names.uniquify(""), "2");
584
585 let arena = Arena::new();
586 let mut names = UniqueNames::with_reserved(&arena, ["0"]);
587
588 assert_eq!(names.uniquify("0"), "1");
589 assert_eq!(names.uniquify(""), "2");
590
591 let arena = Arena::new();
592 let mut names = UniqueNames::with_reserved(&arena, ["_"]);
593
594 assert_eq!(names.uniquify("_"), "1");
595 assert_eq!(names.uniquify("_"), "2");
596 }
597
598 #[test]
599 fn test_with_reserved_multiple() {
600 let arena = Arena::new();
601 let mut names = UniqueNames::with_reserved(&arena, ["_", "reserved"]);
602
603 assert_eq!(names.uniquify("_"), "1");
604 assert_eq!(names.uniquify("reserved"), "reserved2");
605 assert_eq!(names.uniquify("other"), "other");
606 }
607
608 #[test]
609 fn test_with_reserved_numeric_suffixes() {
610 let arena = Arena::new();
611 let mut names = UniqueNames::with_reserved(&arena, ["crate"]);
612
613 assert_eq!(names.uniquify("crate"), "crate2");
614 assert_eq!(names.uniquify("crate2"), "crate3");
615 }
616}