1use std::{borrow::Cow, collections::hash_map::Entry, iter::Peekable, str::CharIndices};
2
3use rustc_hash::FxHashMap;
4use unicase::UniCase;
5
6use crate::arena::Arena;
7
8#[derive(Debug, Default)]
10pub struct UniqueNames(Arena);
11
12impl UniqueNames {
13 #[inline]
15 pub fn new() -> Self {
16 Self::default()
17 }
18
19 #[inline]
29 pub fn scope(&self) -> UniqueNamesScope<'_> {
30 UniqueNamesScope::new(&self.0)
31 }
32
33 #[inline]
50 pub fn scope_with_reserved<S: AsRef<str>>(
51 &self,
52 reserved: impl IntoIterator<Item = S>,
53 ) -> UniqueNamesScope<'_> {
54 UniqueNamesScope::with_reserved(&self.0, reserved)
55 }
56}
57
58#[derive(Debug)]
60pub struct UniqueNamesScope<'a> {
61 arena: &'a Arena,
62 space: FxHashMap<&'a [UniCase<&'a str>], usize>,
63}
64
65impl<'a> UniqueNamesScope<'a> {
66 fn new(arena: &'a Arena) -> Self {
67 Self {
68 arena,
69 space: FxHashMap::default(),
70 }
71 }
72
73 fn with_reserved<S: AsRef<str>>(
74 arena: &'a Arena,
75 reserved: impl IntoIterator<Item = S>,
76 ) -> Self {
77 let space = reserved
78 .into_iter()
79 .map(|name| arena.alloc_str(name.as_ref()))
80 .map(|name| arena.alloc_slice(WordSegments::new(name).map(UniCase::new)))
81 .fold(FxHashMap::default(), |mut names, segments| {
82 names.insert(&*segments, 1);
84 names
85 });
86 Self { arena, space }
87 }
88
89 pub fn uniquify<'b>(&mut self, name: &'b str) -> Cow<'b, str> {
104 match self.space.entry(self.arena.alloc_slice(
105 WordSegments::new(name).map(|name| UniCase::new(&*self.arena.alloc_str(name))),
106 )) {
107 Entry::Occupied(mut entry) => {
108 let count = entry.get_mut();
109 *count += 1;
110 format!("{name}{count}").into()
111 }
112 Entry::Vacant(entry) => {
113 entry.insert(1);
114 name.into()
115 }
116 }
117 }
118}
119
120pub struct WordSegments<'a> {
148 input: &'a str,
149 chars: Peekable<CharIndices<'a>>,
150 current_word_starts_at: Option<usize>,
151 mode: WordMode,
152}
153
154impl<'a> WordSegments<'a> {
155 #[inline]
156 pub fn new(input: &'a str) -> Self {
157 Self {
158 input,
159 chars: input.char_indices().peekable(),
160 current_word_starts_at: None,
161 mode: WordMode::Boundary,
162 }
163 }
164}
165
166impl<'a> Iterator for WordSegments<'a> {
167 type Item = &'a str;
168
169 fn next(&mut self) -> Option<Self::Item> {
170 while let Some((index, c)) = self.chars.next() {
171 if c.is_uppercase() {
172 match self.mode {
173 WordMode::Boundary => {
174 let start = self.current_word_starts_at.replace(index);
176 self.mode = WordMode::Uppercase;
177 if let Some(start) = start {
178 return Some(&self.input[start..index]);
179 }
180 }
181 WordMode::Lowercase => {
182 let start = self.current_word_starts_at.replace(index);
185 self.mode = WordMode::Uppercase;
186 if let Some(start) = start {
187 return Some(&self.input[start..index]);
188 }
189 }
190 WordMode::Uppercase => {
191 let next_is_lowercase = self
192 .chars
193 .peek()
194 .map(|&(_, next)| next.is_lowercase())
195 .unwrap_or(false);
196 if next_is_lowercase && let Some(start) = self.current_word_starts_at {
197 self.current_word_starts_at = Some(index);
200 return Some(&self.input[start..index]);
201 }
202 }
204 }
205 } else if c.is_lowercase() {
206 match self.mode {
207 WordMode::Boundary => {
208 let start = self.current_word_starts_at.replace(index);
210 self.mode = WordMode::Lowercase;
211 if let Some(start) = start {
212 return Some(&self.input[start..index]);
213 }
214 }
215 WordMode::Lowercase | WordMode::Uppercase => {
216 if self.current_word_starts_at.is_none() {
217 self.current_word_starts_at = Some(index);
219 }
220 self.mode = WordMode::Lowercase;
221 }
222 }
223 } else if !c.is_alphanumeric() {
224 let start = std::mem::take(&mut self.current_word_starts_at);
226 self.mode = WordMode::Boundary;
227 if let Some(start) = start {
228 return Some(&self.input[start..index]);
229 }
230 } else {
231 if self.current_word_starts_at.is_none() {
233 self.current_word_starts_at = Some(index);
234 }
235 }
236 }
237 if let Some(start) = std::mem::take(&mut self.current_word_starts_at) {
238 return Some(&self.input[start..]);
240 }
241 None
242 }
243}
244
245#[derive(Clone, Copy)]
247enum WordMode {
248 Boundary,
251 Lowercase,
253 Uppercase,
255}
256
257#[cfg(test)]
258mod tests {
259 use super::*;
260 use itertools::Itertools;
261
262 #[test]
263 fn test_segment_camel_case() {
264 assert_eq!(
265 WordSegments::new("camelCase").collect_vec(),
266 vec!["camel", "Case"]
267 );
268 assert_eq!(
269 WordSegments::new("httpResponse").collect_vec(),
270 vec!["http", "Response"]
271 );
272 }
273
274 #[test]
275 fn test_segment_pascal_case() {
276 assert_eq!(
277 WordSegments::new("PascalCase").collect_vec(),
278 vec!["Pascal", "Case"]
279 );
280 assert_eq!(
281 WordSegments::new("HttpResponse").collect_vec(),
282 vec!["Http", "Response"]
283 );
284 }
285
286 #[test]
287 fn test_segment_snake_case() {
288 assert_eq!(
289 WordSegments::new("snake_case").collect_vec(),
290 vec!["snake", "case"]
291 );
292 assert_eq!(
293 WordSegments::new("http_response").collect_vec(),
294 vec!["http", "response"]
295 );
296 }
297
298 #[test]
299 fn test_segment_screaming_snake() {
300 assert_eq!(
301 WordSegments::new("SCREAMING_SNAKE").collect_vec(),
302 vec!["SCREAMING", "SNAKE"]
303 );
304 assert_eq!(
305 WordSegments::new("HTTP_RESPONSE").collect_vec(),
306 vec!["HTTP", "RESPONSE"]
307 );
308 }
309
310 #[test]
311 fn test_segment_consecutive_uppercase() {
312 assert_eq!(
313 WordSegments::new("XMLHttpRequest").collect_vec(),
314 vec!["XML", "Http", "Request"]
315 );
316 assert_eq!(
317 WordSegments::new("HTTPResponse").collect_vec(),
318 vec!["HTTP", "Response"]
319 );
320 assert_eq!(
321 WordSegments::new("HTTP_Response").collect_vec(),
322 vec!["HTTP", "Response"]
323 );
324 assert_eq!(WordSegments::new("ALLCAPS").collect_vec(), vec!["ALLCAPS"]);
325 }
326
327 #[test]
328 fn test_segment_with_numbers() {
329 assert_eq!(
330 WordSegments::new("Response2").collect_vec(),
331 vec!["Response2"]
332 );
333 assert_eq!(
334 WordSegments::new("response_2").collect_vec(),
335 vec!["response", "2"]
336 );
337 assert_eq!(
338 WordSegments::new("HTTP2Protocol").collect_vec(),
339 vec!["HTTP2", "Protocol"]
340 );
341 assert_eq!(
342 WordSegments::new("OAuth2Token").collect_vec(),
343 vec!["O", "Auth2", "Token"]
344 );
345 assert_eq!(
346 WordSegments::new("HTTP2XML").collect_vec(),
347 vec!["HTTP2XML"]
348 );
349 assert_eq!(
350 WordSegments::new("1099KStatus").collect_vec(),
351 vec!["1099", "K", "Status"]
352 );
353 assert_eq!(
354 WordSegments::new("123abc").collect_vec(),
355 vec!["123", "abc"]
356 );
357 assert_eq!(
358 WordSegments::new("123ABC").collect_vec(),
359 vec!["123", "ABC"]
360 );
361 }
362
363 #[test]
364 fn test_segment_empty_and_special() {
365 assert!(WordSegments::new("").collect_vec().is_empty());
366 assert!(WordSegments::new("___").collect_vec().is_empty());
367 assert_eq!(WordSegments::new("a").collect_vec(), vec!["a"]);
368 assert_eq!(WordSegments::new("A").collect_vec(), vec!["A"]);
369 }
370
371 #[test]
372 fn test_segment_mixed_separators() {
373 assert_eq!(
374 WordSegments::new("foo-bar_baz").collect_vec(),
375 vec!["foo", "bar", "baz"]
376 );
377 assert_eq!(
378 WordSegments::new("foo--bar").collect_vec(),
379 vec!["foo", "bar"]
380 );
381 }
382
383 #[test]
384 fn test_deduplication_http_response_collision() {
385 let unique = UniqueNames::new();
386 let mut scope = unique.scope();
387
388 assert_eq!(scope.uniquify("HTTPResponse"), "HTTPResponse");
389 assert_eq!(scope.uniquify("HTTP_Response"), "HTTP_Response2");
390 assert_eq!(scope.uniquify("httpResponse"), "httpResponse3");
391 assert_eq!(scope.uniquify("http_response"), "http_response4");
392 assert_eq!(scope.uniquify("HTTPRESPONSE"), "HTTPRESPONSE");
394 }
395
396 #[test]
397 fn test_deduplication_xml_http_request() {
398 let unique = UniqueNames::new();
399 let mut scope = unique.scope();
400
401 assert_eq!(scope.uniquify("XMLHttpRequest"), "XMLHttpRequest");
402 assert_eq!(scope.uniquify("xml_http_request"), "xml_http_request2");
403 assert_eq!(scope.uniquify("XmlHttpRequest"), "XmlHttpRequest3");
404 }
405
406 #[test]
407 fn test_deduplication_preserves_original_casing() {
408 let unique = UniqueNames::new();
409 let mut scope = unique.scope();
410
411 assert_eq!(scope.uniquify("HTTP_Response"), "HTTP_Response");
412 assert_eq!(scope.uniquify("httpResponse"), "httpResponse2");
413 }
414
415 #[test]
416 fn test_deduplication_same_prefix() {
417 let unique = UniqueNames::new();
418 let mut scope = unique.scope();
419
420 assert_eq!(scope.uniquify("HttpRequest"), "HttpRequest");
421 assert_eq!(scope.uniquify("HttpResponse"), "HttpResponse");
422 assert_eq!(scope.uniquify("HttpError"), "HttpError");
423 }
424
425 #[test]
426 fn test_deduplication_with_numbers() {
427 let unique = UniqueNames::new();
428 let mut scope = unique.scope();
429
430 assert_eq!(scope.uniquify("Response2"), "Response2");
431 assert_eq!(scope.uniquify("response_2"), "response_2");
432
433 assert_eq!(scope.uniquify("1099KStatus"), "1099KStatus");
435 assert_eq!(scope.uniquify("1099K_Status"), "1099K_Status2");
436 assert_eq!(scope.uniquify("1099KStatus"), "1099KStatus3");
437 assert_eq!(scope.uniquify("1099_K_Status"), "1099_K_Status4");
438
439 assert_eq!(scope.uniquify("123abc"), "123abc");
441 assert_eq!(scope.uniquify("123_abc"), "123_abc2");
442 }
443
444 #[test]
445 fn test_with_reserved_underscore() {
446 let unique = UniqueNames::new();
447 let mut scope = unique.scope_with_reserved(["_"]);
448
449 assert_eq!(scope.uniquify("_"), "_2");
451 assert_eq!(scope.uniquify("_"), "_3");
452 }
453
454 #[test]
455 fn test_with_reserved_multiple() {
456 let unique = UniqueNames::new();
457 let mut scope = unique.scope_with_reserved(["_", "reserved"]);
458
459 assert_eq!(scope.uniquify("_"), "_2");
460 assert_eq!(scope.uniquify("reserved"), "reserved2");
461 assert_eq!(scope.uniquify("other"), "other");
462 }
463
464 #[test]
465 fn test_with_reserved_empty() {
466 let unique = UniqueNames::new();
467 let mut scope = unique.scope_with_reserved([""]);
468
469 assert_eq!(scope.uniquify(""), "2");
470 assert_eq!(scope.uniquify(""), "3");
471 }
472}