generic_str/slice_utf8.rs
1use core::{
2 slice::SliceIndex,
3 str::{Bytes, CharIndices, Chars},
4};
5
6use crate::{from_utf8_unchecked_mut, validation::truncate_to_char_boundary, StringSlice};
7
8#[allow(non_camel_case_types)]
9/// Exactly the same as [`std::primitive::str`], except generic
10pub type str = StringSlice<u8>;
11
12impl str {
13 /// Returns the length of `self`.
14 ///
15 /// This length is in bytes, not [`char`]s or graphemes. In other words,
16 /// it may not be what a human considers the length of the string.
17 ///
18 /// [`char`]: prim@char
19 ///
20 /// # Examples
21 ///
22 /// Basic usage:
23 ///
24 /// ```
25 /// # use generic_str::str;
26 /// let len = <&str>::from("foo").len();
27 /// assert_eq!(3, len);
28 ///
29 /// assert_eq!("ƒoo".len(), 4); // fancy f!
30 /// assert_eq!("ƒoo".chars().count(), 3);
31 /// ```
32 #[inline]
33 pub fn len(&self) -> usize {
34 self.storage.as_ref().len()
35 }
36
37 /// Returns `true` if `self` has a length of zero bytes.
38 ///
39 /// # Examples
40 ///
41 /// Basic usage:
42 ///
43 /// ```
44 /// # use generic_str::str;
45 /// let s: &str = "".into();
46 /// assert!(s.is_empty());
47 ///
48 /// let s: &str = "not empty".into();
49 /// assert!(!s.is_empty());
50 /// ```
51 #[inline]
52 pub fn is_empty(&self) -> bool {
53 self.storage.is_empty()
54 }
55
56 /// Checks that `index`-th byte is the first byte in a UTF-8 code point
57 /// sequence or the end of the string.
58 ///
59 /// The start and end of the string (when `index == self.len()`) are
60 /// considered to be boundaries.
61 ///
62 /// Returns `false` if `index` is greater than `self.len()`.
63 ///
64 /// # Examples
65 ///
66 /// ```
67 /// # use generic_str::str;
68 /// let s: &str = "Löwe 老虎 Léopard".into();
69 /// assert!(s.is_char_boundary(0));
70 /// // start of `老`
71 /// assert!(s.is_char_boundary(6));
72 /// assert!(s.is_char_boundary(s.len()));
73 ///
74 /// // second byte of `ö`
75 /// assert!(!s.is_char_boundary(2));
76 ///
77 /// // third byte of `老`
78 /// assert!(!s.is_char_boundary(8));
79 /// ```
80 #[inline]
81 pub fn is_char_boundary(&self, index: usize) -> bool {
82 // 0 is always ok.
83 // Test for 0 explicitly so that it can optimize out the check
84 // easily and skip reading string data for that case.
85 // Note that optimizing `self.get(..index)` relies on this.
86 if index == 0 {
87 return true;
88 }
89
90 match self.as_bytes().get(index) {
91 // For `None` we have two options:
92 //
93 // - index == self.len()
94 // Empty strings are valid, so return true
95 // - index > self.len()
96 // In this case return false
97 //
98 // The check is placed exactly here, because it improves generated
99 // code on higher opt-levels. See PR #84751 for more details.
100 None => index == self.len(),
101
102 // This is bit magic equivalent to: b < 128 || b >= 192
103 Some(&b) => (b as i8) >= -0x40,
104 }
105 }
106
107 /// Converts a string slice to a byte slice. To convert the byte slice back
108 /// into a string slice, use the [`from_utf8`] function.
109 ///
110 /// [`from_utf8`]: crate::from_utf8
111 ///
112 /// # Examples
113 ///
114 /// Basic usage:
115 ///
116 /// ```
117 /// # use generic_str::str;
118 /// let bytes = <&str>::from("bors").as_bytes();
119 /// assert_eq!(b"bors", bytes);
120 /// ```
121 #[inline(always)]
122 pub fn as_bytes(&self) -> &[u8] {
123 // SAFETY: const sound because we transmute two types with the same layout
124 unsafe { core::mem::transmute(self.storage.as_ref()) }
125 }
126
127 /// Converts a mutable string slice to a mutable byte slice.
128 ///
129 /// # Safety
130 ///
131 /// The caller must ensure that the content of the slice is valid UTF-8
132 /// before the borrow ends and the underlying `str` is used.
133 ///
134 /// Use of a `str` whose contents are not valid UTF-8 is undefined behavior.
135 ///
136 /// # Examples
137 ///
138 /// Basic usage:
139 ///
140 /// ```
141 /// # use generic_str::String;
142 /// let mut s = String::from("Hello");
143 /// let bytes = unsafe { s.as_bytes_mut() };
144 ///
145 /// assert_eq!(bytes, b"Hello");
146 /// ```
147 ///
148 /// Mutability:
149 ///
150 /// ```
151 /// # use generic_str::{str, String};
152 /// let mut s = String::from("🗻∈🌏");
153 ///
154 /// unsafe {
155 /// let bytes = s.as_bytes_mut();
156 ///
157 /// bytes[0] = 0xF0;
158 /// bytes[1] = 0x9F;
159 /// bytes[2] = 0x8D;
160 /// bytes[3] = 0x94;
161 /// }
162 ///
163 /// assert_eq!(s, <&str>::from("🍔∈🌏"));
164 /// ```
165 #[inline(always)]
166 pub unsafe fn as_bytes_mut(&mut self) -> &mut [u8] {
167 // SAFETY: const sound because we transmute two types with the same layout
168 core::mem::transmute(self.storage.as_mut())
169 }
170
171 /// Converts a string slice to a raw pointer.
172 ///
173 /// As string slices are a slice of bytes, the raw pointer points to a
174 /// [`u8`]. This pointer will be pointing to the first byte of the string
175 /// slice.
176 ///
177 /// The caller must ensure that the returned pointer is never written to.
178 /// If you need to mutate the contents of the string slice, use [`as_mut_ptr`].
179 ///
180 /// [`as_mut_ptr`]: str::as_mut_ptr
181 ///
182 /// # Examples
183 ///
184 /// Basic usage:
185 ///
186 /// ```
187 /// # use generic_str::str;
188 /// let s: &str = "Hello".into();
189 /// let ptr = s.as_ptr();
190 /// ```
191 #[inline]
192 pub fn as_ptr(&self) -> *const u8 {
193 self.storage.as_ref() as *const [u8] as *const u8
194 }
195
196 /// Converts a mutable string slice to a raw pointer.
197 ///
198 /// As string slices are a slice of bytes, the raw pointer points to a
199 /// [`u8`]. This pointer will be pointing to the first byte of the string
200 /// slice.
201 ///
202 /// It is your responsibility to make sure that the string slice only gets
203 /// modified in a way that it remains valid UTF-8.
204 #[inline]
205 pub fn as_mut_ptr(&mut self) -> *mut u8 {
206 self.storage.as_mut() as *mut [u8] as *mut u8
207 }
208
209 /// Returns a subslice of `str`.
210 ///
211 /// This is the non-panicking alternative to indexing the `str`. Returns
212 /// [`None`] whenever equivalent indexing operation would panic.
213 ///
214 /// # Examples
215 ///
216 /// ```
217 /// # use generic_str::{str, String};
218 /// let v = String::from("🗻∈🌏");
219 ///
220 /// assert_eq!(v.get(0..4), Some(<&str>::from("🗻")));
221 ///
222 /// // indices not on UTF-8 sequence boundaries
223 /// assert!(v.get(1..).is_none());
224 /// assert!(v.get(..8).is_none());
225 ///
226 /// // out of bounds
227 /// assert!(v.get(..42).is_none());
228 /// ```
229 #[inline]
230 pub fn get<I: SliceIndex<Self>>(&self, i: I) -> Option<&I::Output> {
231 i.get(self.as_ref())
232 }
233
234 /// Returns a mutable subslice of `str`.
235 ///
236 /// This is the non-panicking alternative to indexing the `str`. Returns
237 /// [`None`] whenever equivalent indexing operation would panic.
238 ///
239 /// # Examples
240 ///
241 /// ```
242 /// # use generic_str::{str, String};
243 /// let mut v = String::from("hello");
244 /// // correct length
245 /// assert!(v.get_mut(0..5).is_some());
246 /// // out of bounds
247 /// assert!(v.get_mut(..42).is_none());
248 /// assert_eq!(v.get_mut(0..2).map(|v| &*v), Some(<&str>::from("he")));
249 ///
250 /// assert_eq!(v, <&str>::from("hello"));
251 /// {
252 /// let s = v.get_mut(0..2);
253 /// let s = s.map(|s| {
254 /// s.make_ascii_uppercase();
255 /// &*s
256 /// });
257 /// assert_eq!(s, Some(<&str>::from("HE")));
258 /// }
259 /// assert_eq!(v, <&str>::from("HEllo"));
260 /// ```
261 #[inline]
262 pub fn get_mut<I: SliceIndex<Self>>(&mut self, i: I) -> Option<&mut I::Output> {
263 i.get_mut(self.as_mut())
264 }
265
266 /// Returns an unchecked subslice of `str`.
267 ///
268 /// This is the unchecked alternative to indexing the `str`.
269 ///
270 /// # Safety
271 ///
272 /// Callers of this function are responsible that these preconditions are
273 /// satisfied:
274 ///
275 /// * The starting index must not exceed the ending index;
276 /// * Indexes must be within bounds of the original slice;
277 /// * Indexes must lie on UTF-8 sequence boundaries.
278 ///
279 /// Failing that, the returned string slice may reference invalid memory or
280 /// violate the invariants communicated by the `str` type.
281 ///
282 /// # Examples
283 ///
284 /// ```
285 /// # use generic_str::str;
286 /// let v = <&str>::from("🗻∈🌏");
287 /// unsafe {
288 /// assert_eq!(v.get_unchecked(0..4), <&str>::from("🗻"));
289 /// assert_eq!(v.get_unchecked(4..7), <&str>::from("∈"));
290 /// assert_eq!(v.get_unchecked(7..11), <&str>::from("🌏"));
291 /// }
292 /// ```
293 #[inline]
294 pub unsafe fn get_unchecked<I: SliceIndex<Self>>(&self, i: I) -> &I::Output {
295 // SAFETY: the caller must uphold the safety contract for `get_unchecked`;
296 // the slice is dereferencable because `self` is a safe reference.
297 // The returned pointer is safe because impls of `SliceIndex` have to guarantee that it is.
298 &*i.get_unchecked(self)
299 }
300
301 /// Returns a mutable, unchecked subslice of `str`.
302 ///
303 /// This is the unchecked alternative to indexing the `str`.
304 ///
305 /// # Safety
306 ///
307 /// Callers of this function are responsible that these preconditions are
308 /// satisfied:
309 ///
310 /// * The starting index must not exceed the ending index;
311 /// * Indexes must be within bounds of the original slice;
312 /// * Indexes must lie on UTF-8 sequence boundaries.
313 ///
314 /// Failing that, the returned string slice may reference invalid memory or
315 /// violate the invariants communicated by the `str` type.
316 ///
317 /// # Examples
318 ///
319 /// ```
320 /// # use generic_str::{str, String};
321 /// let mut v = String::from("🗻∈🌏");
322 /// unsafe {
323 /// assert_eq!(v.get_unchecked_mut(0..4), <&str>::from("🗻"));
324 /// assert_eq!(v.get_unchecked_mut(4..7), <&str>::from("∈"));
325 /// assert_eq!(v.get_unchecked_mut(7..11), <&str>::from("🌏"));
326 /// }
327 /// ```
328 #[inline]
329 pub unsafe fn get_unchecked_mut<I: SliceIndex<Self>>(&mut self, i: I) -> &mut I::Output {
330 // SAFETY: the caller must uphold the safety contract for `get_unchecked_mut`;
331 // the slice is dereferencable because `self` is a safe reference.
332 // The returned pointer is safe because impls of `SliceIndex` have to guarantee that it is.
333 &mut *i.get_unchecked_mut(self)
334 }
335
336 /// Divide one string slice into two at an index.
337 ///
338 /// The argument, `mid`, should be a byte offset from the start of the
339 /// string. It must also be on the boundary of a UTF-8 code point.
340 ///
341 /// The two slices returned go from the start of the string slice to `mid`,
342 /// and from `mid` to the end of the string slice.
343 ///
344 /// To get mutable string slices instead, see the [`split_at_mut`]
345 /// method.
346 ///
347 /// [`split_at_mut`]: str::split_at_mut
348 ///
349 /// # Panics
350 ///
351 /// Panics if `mid` is not on a UTF-8 code point boundary, or if it is
352 /// past the end of the last code point of the string slice.
353 ///
354 /// # Examples
355 ///
356 /// Basic usage:
357 ///
358 /// ```
359 /// # use generic_str::str;
360 /// let s: &str = "Per Martin-Löf".into();
361 ///
362 /// let (first, last) = s.split_at(3);
363 ///
364 /// assert_eq!(first, <&str>::from("Per"));
365 /// assert_eq!(last, <&str>::from(" Martin-Löf"));
366 /// ```
367 #[inline]
368 pub fn split_at(&self, mid: usize) -> (&Self, &Self) {
369 // is_char_boundary checks that the index is in [0, .len()]
370 if self.is_char_boundary(mid) {
371 // SAFETY: just checked that `mid` is on a char boundary.
372 unsafe {
373 (
374 self.get_unchecked(0..mid),
375 self.get_unchecked(mid..self.len()),
376 )
377 }
378 } else {
379 slice_error_fail(self, 0, mid)
380 }
381 }
382
383 /// Divide one mutable string slice into two at an index.
384 ///
385 /// The argument, `mid`, should be a byte offset from the start of the
386 /// string. It must also be on the boundary of a UTF-8 code point.
387 ///
388 /// The two slices returned go from the start of the string slice to `mid`,
389 /// and from `mid` to the end of the string slice.
390 ///
391 /// To get immutable string slices instead, see the [`split_at`] method.
392 ///
393 /// [`split_at`]: str::split_at
394 ///
395 /// # Panics
396 ///
397 /// Panics if `mid` is not on a UTF-8 code point boundary, or if it is
398 /// past the end of the last code point of the string slice.
399 ///
400 /// # Examples
401 ///
402 /// Basic usage:
403 ///
404 /// ```
405 /// # use generic_str::{str, String};
406 /// let mut s = String::from("Per Martin-Löf");
407 /// {
408 /// let (first, last) = s.split_at_mut(3);
409 /// first.make_ascii_uppercase();
410 /// assert_eq!(first, <&str>::from("PER"));
411 /// assert_eq!(last, <&str>::from(" Martin-Löf"));
412 /// }
413 /// assert_eq!(s, <&str>::from("PER Martin-Löf"));
414 /// ```
415 #[inline]
416 pub fn split_at_mut(&mut self, mid: usize) -> (&mut Self, &mut Self) {
417 // is_char_boundary checks that the index is in [0, .len()]
418 if self.is_char_boundary(mid) {
419 let len = self.len();
420 let ptr = self.as_mut_ptr();
421 // SAFETY: just checked that `mid` is on a char boundary.
422 unsafe {
423 (
424 from_utf8_unchecked_mut(core::slice::from_raw_parts_mut(ptr, mid)),
425 from_utf8_unchecked_mut(core::slice::from_raw_parts_mut(
426 ptr.add(mid),
427 len - mid,
428 )),
429 )
430 }
431 } else {
432 slice_error_fail(self, 0, mid)
433 }
434 }
435
436 /// Returns an iterator over the [`char`]s of a string slice.
437 ///
438 /// As a string slice consists of valid UTF-8, we can iterate through a
439 /// string slice by [`char`]. This method returns such an iterator.
440 ///
441 /// It's important to remember that [`char`] represents a Unicode Scalar
442 /// Value, and may not match your idea of what a 'character' is. Iteration
443 /// over grapheme clusters may be what you actually want. This functionality
444 /// is not provided by Rust's standard library, check crates.io instead.
445 ///
446 /// # Examples
447 ///
448 /// Basic usage:
449 ///
450 /// ```
451 /// # use generic_str::str;
452 /// let word = <&str>::from("goodbye");
453 ///
454 /// let count = word.chars().count();
455 /// assert_eq!(7, count);
456 ///
457 /// let mut chars = word.chars();
458 ///
459 /// assert_eq!(Some('g'), chars.next());
460 /// assert_eq!(Some('o'), chars.next());
461 /// assert_eq!(Some('o'), chars.next());
462 /// assert_eq!(Some('d'), chars.next());
463 /// assert_eq!(Some('b'), chars.next());
464 /// assert_eq!(Some('y'), chars.next());
465 /// assert_eq!(Some('e'), chars.next());
466 ///
467 /// assert_eq!(None, chars.next());
468 /// ```
469 ///
470 /// Remember, [`char`]s may not match your intuition about characters:
471 ///
472 /// [`char`]: prim@char
473 ///
474 /// ```
475 /// let y = "y̆";
476 ///
477 /// let mut chars = y.chars();
478 ///
479 /// assert_eq!(Some('y'), chars.next()); // not 'y̆'
480 /// assert_eq!(Some('\u{0306}'), chars.next());
481 ///
482 /// assert_eq!(None, chars.next());
483 /// ```
484 #[inline]
485 pub fn chars(&self) -> Chars<'_> {
486 let s: &core::primitive::str = self.into();
487 s.chars()
488 }
489 pub fn char_indices(&self) -> CharIndices<'_> {
490 let s: &core::primitive::str = self.into();
491 s.char_indices()
492 }
493
494 /// An iterator over the bytes of a string slice.
495 ///
496 /// As a string slice consists of a sequence of bytes, we can iterate
497 /// through a string slice by byte. This method returns such an iterator.
498 ///
499 /// # Examples
500 ///
501 /// Basic usage:
502 ///
503 /// ```
504 /// # use generic_str::str;
505 /// let mut bytes = <&str>::from("bors").bytes();
506 ///
507 /// assert_eq!(Some(b'b'), bytes.next());
508 /// assert_eq!(Some(b'o'), bytes.next());
509 /// assert_eq!(Some(b'r'), bytes.next());
510 /// assert_eq!(Some(b's'), bytes.next());
511 ///
512 /// assert_eq!(None, bytes.next());
513 /// ```
514 #[inline]
515 pub fn bytes(&self) -> Bytes<'_> {
516 let s: &core::primitive::str = self.into();
517 s.bytes()
518 }
519
520 /// Checks if all characters in this string are within the ASCII range.
521 ///
522 /// # Examples
523 ///
524 /// ```
525 /// # use generic_str::str;
526 /// let ascii = <&str>::from("hello!\n");
527 /// let non_ascii = <&str>::from("Grüße, Jürgen ❤");
528 ///
529 /// assert!(ascii.is_ascii());
530 /// assert!(!non_ascii.is_ascii());
531 /// ```
532 #[inline]
533 pub fn is_ascii(&self) -> bool {
534 // We can treat each byte as character here: all multibyte characters
535 // start with a byte that is not in the ascii range, so we will stop
536 // there already.
537 self.as_bytes().is_ascii()
538 }
539
540 /// Checks that two strings are an ASCII case-insensitive match.
541 ///
542 /// Same as `to_ascii_lowercase(a) == to_ascii_lowercase(b)`,
543 /// but without allocating and copying temporaries.
544 ///
545 /// # Examples
546 ///
547 /// ```
548 /// # use generic_str::str;
549 /// assert!(<&str>::from("Ferris").eq_ignore_ascii_case("FERRIS".into()));
550 /// assert!(<&str>::from("Ferrös").eq_ignore_ascii_case("FERRöS".into()));
551 /// assert!(!<&str>::from("Ferrös").eq_ignore_ascii_case("FERRÖS".into()));
552 /// ```
553 #[inline]
554 pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool {
555 self.as_bytes().eq_ignore_ascii_case(other.as_bytes())
556 }
557
558 /// Converts this string to its ASCII upper case equivalent in-place.
559 ///
560 /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
561 /// but non-ASCII letters are unchanged.
562 ///
563 /// To return a new uppercased value without modifying the existing one, use
564 /// [`to_ascii_uppercase()`].
565 ///
566 /// [`to_ascii_uppercase()`]: #method.to_ascii_uppercase
567 ///
568 /// # Examples
569 ///
570 /// ```
571 /// # use generic_str::{str, String};
572 /// let mut s = String::from("Grüße, Jürgen ❤");
573 ///
574 /// s.make_ascii_uppercase();
575 ///
576 /// assert_eq!(s, <&str>::from("GRüßE, JüRGEN ❤"));
577 /// ```
578 #[inline]
579 pub fn make_ascii_uppercase(&mut self) {
580 // SAFETY: safe because we transmute two types with the same layout.
581 let me = unsafe { self.as_bytes_mut() };
582 me.make_ascii_uppercase()
583 }
584
585 /// Converts this string to its ASCII lower case equivalent in-place.
586 ///
587 /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
588 /// but non-ASCII letters are unchanged.
589 ///
590 /// To return a new lowercased value without modifying the existing one, use
591 /// [`to_ascii_lowercase()`].
592 ///
593 /// [`to_ascii_lowercase()`]: #method.to_ascii_lowercase
594 ///
595 /// # Examples
596 ///
597 /// ```
598 /// # use generic_str::{str, String};
599 /// let mut s = String::from("GRÜßE, JÜRGEN ❤");
600 ///
601 /// s.make_ascii_lowercase();
602 ///
603 /// assert_eq!(s, <&str>::from("grÜße, jÜrgen ❤"));
604 /// ```
605 #[inline]
606 pub fn make_ascii_lowercase(&mut self) {
607 // SAFETY: safe because we transmute two types with the same layout.
608 let me = unsafe { self.as_bytes_mut() };
609 me.make_ascii_lowercase()
610 }
611
612 /// Returns the lowercase equivalent of this string slice, as a new [`String`].
613 ///
614 /// 'Lowercase' is defined according to the terms of the Unicode Derived Core Property
615 /// `Lowercase`.
616 ///
617 /// Since some characters can expand into multiple characters when changing
618 /// the case, this function returns a [`String`] instead of modifying the
619 /// parameter in-place.
620 ///
621 /// # Examples
622 ///
623 /// Basic usage:
624 ///
625 /// ```
626 /// # use generic_str::str;
627 /// let s = <&str>::from("HELLO");
628 ///
629 /// assert_eq!(s.to_lowercase(), <&str>::from("hello"));
630 /// ```
631 ///
632 /// A tricky example, with sigma:
633 ///
634 /// ```
635 /// # use generic_str::str;
636 /// let sigma = <&str>::from("Σ");
637 ///
638 /// assert_eq!(sigma.to_lowercase(), <&str>::from("σ"));
639 ///
640 /// // but at the end of a word, it's ς, not σ:
641 /// let odysseus = <&str>::from("ὈΔΥΣΣΕΎΣ");
642 ///
643 /// assert_eq!(odysseus.to_lowercase(), <&str>::from("ὀδυσσεύς"));
644 /// ```
645 ///
646 /// Languages without case are not changed:
647 ///
648 /// ```
649 /// # use generic_str::str;
650 /// let new_year = <&str>::from("农历新年");
651 ///
652 /// assert_eq!(new_year, new_year.to_lowercase());
653 /// ```
654 #[cfg(feature = "alloc")]
655 pub fn to_lowercase(&self) -> crate::String {
656 use core::unicode::conversions;
657
658 let mut s = crate::String::with_capacity(self.len());
659 for (i, c) in self[..].char_indices() {
660 if c == 'Σ' {
661 // Σ maps to σ, except at the end of a word where it maps to ς.
662 // This is the only conditional (contextual) but language-independent mapping
663 // in `SpecialCasing.txt`,
664 // so hard-code it rather than have a generic "condition" mechanism.
665 // See https://github.com/rust-lang/rust/issues/26035
666 map_uppercase_sigma(self, i, &mut s)
667 } else {
668 match conversions::to_lower(c) {
669 [a, '\0', _] => s.push(a),
670 [a, b, '\0'] => {
671 s.push(a);
672 s.push(b);
673 }
674 [a, b, c] => {
675 s.push(a);
676 s.push(b);
677 s.push(c);
678 }
679 }
680 }
681 }
682 return s;
683
684 fn map_uppercase_sigma(from: &str, i: usize, to: &mut crate::String) {
685 // See http://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G33992
686 // for the definition of `Final_Sigma`.
687 debug_assert!('Σ'.len_utf8() == 2);
688 let is_word_final = case_ignoreable_then_cased(from[..i].chars().rev())
689 && !case_ignoreable_then_cased(from[i + 2..].chars());
690 to.push_str(if is_word_final { "ς" } else { "σ" }.into());
691 }
692
693 fn case_ignoreable_then_cased<I: Iterator<Item = char>>(mut iter: I) -> bool {
694 use core::unicode::{Case_Ignorable, Cased};
695 match iter.find(|&c| !Case_Ignorable(c)) {
696 Some(c) => Cased(c),
697 None => false,
698 }
699 }
700 }
701
702 /// Returns the uppercase equivalent of this string slice, as a new [`String`].
703 ///
704 /// 'Uppercase' is defined according to the terms of the Unicode Derived Core Property
705 /// `Uppercase`.
706 ///
707 /// Since some characters can expand into multiple characters when changing
708 /// the case, this function returns a [`String`] instead of modifying the
709 /// parameter in-place.
710 ///
711 /// # Examples
712 ///
713 /// Basic usage:
714 ///
715 /// ```
716 /// # use generic_str::str;
717 /// let s = <&str>::from("hello");
718 ///
719 /// assert_eq!(s.to_uppercase(), <&str>::from("HELLO"));
720 /// ```
721 ///
722 /// Scripts without case are not changed:
723 ///
724 /// ```
725 /// # use generic_str::str;
726 /// let new_year = <&str>::from("农历新年");
727 ///
728 /// assert_eq!(new_year, new_year.to_uppercase());
729 /// ```
730 ///
731 /// One character can become multiple:
732 /// ```
733 /// # use generic_str::str;
734 /// let s = <&str>::from("tschüß");
735 ///
736 /// assert_eq!(s.to_uppercase(), <&str>::from("TSCHÜSS"));
737 /// ```
738 #[cfg(feature = "alloc")]
739 pub fn to_uppercase(&self) -> crate::String {
740 use core::unicode::conversions;
741
742 let mut s = crate::String::with_capacity(self.len());
743 for c in self[..].chars() {
744 match conversions::to_upper(c) {
745 [a, '\0', _] => s.push(a),
746 [a, b, '\0'] => {
747 s.push(a);
748 s.push(b);
749 }
750 [a, b, c] => {
751 s.push(a);
752 s.push(b);
753 s.push(c);
754 }
755 }
756 }
757 s
758 }
759}
760
761#[inline(never)]
762#[cold]
763#[track_caller]
764pub(crate) fn slice_error_fail(s: &str, begin: usize, end: usize) -> ! {
765 const MAX_DISPLAY_LENGTH: usize = 256;
766 let (truncated, s_trunc) = truncate_to_char_boundary(s, MAX_DISPLAY_LENGTH);
767 let ellipsis = if truncated { "[...]" } else { "" };
768
769 // 1. out of bounds
770 if begin > s.len() || end > s.len() {
771 let oob_index = if begin > s.len() { begin } else { end };
772 panic!(
773 "byte index {} is out of bounds of `{}`{}",
774 oob_index, s_trunc, ellipsis
775 );
776 }
777
778 // 2. begin <= end
779 assert!(
780 begin <= end,
781 "begin <= end ({} <= {}) when slicing `{}`{}",
782 begin,
783 end,
784 s_trunc,
785 ellipsis
786 );
787
788 // 3. character boundary
789 let index = if !s.is_char_boundary(begin) {
790 begin
791 } else {
792 end
793 };
794 // find the character
795 let mut char_start = index;
796 while !s.is_char_boundary(char_start) {
797 char_start -= 1;
798 }
799 // `char_start` must be less than len and a char boundary
800 let ch = s[char_start..].chars().next().unwrap();
801 let char_range = char_start..char_start + ch.len_utf8();
802 panic!(
803 "byte index {} is not a char boundary; it is inside {:?} (bytes {:?}) of `{}`{}",
804 index, ch, char_range, s_trunc, ellipsis
805 );
806}