generic_str/
slice_utf8.rs

1use core::{
2    slice::SliceIndex,
3    str::{Bytes, CharIndices, Chars},
4};
5
6use crate::{from_utf8_unchecked_mut, validation::truncate_to_char_boundary, StringSlice};
7
8#[allow(non_camel_case_types)]
9/// Exactly the same as [`std::primitive::str`], except generic
10pub type str = StringSlice<u8>;
11
12impl str {
13    /// Returns the length of `self`.
14    ///
15    /// This length is in bytes, not [`char`]s or graphemes. In other words,
16    /// it may not be what a human considers the length of the string.
17    ///
18    /// [`char`]: prim@char
19    ///
20    /// # Examples
21    ///
22    /// Basic usage:
23    ///
24    /// ```
25    /// # use generic_str::str;
26    /// let len = <&str>::from("foo").len();
27    /// assert_eq!(3, len);
28    ///
29    /// assert_eq!("ƒoo".len(), 4); // fancy f!
30    /// assert_eq!("ƒoo".chars().count(), 3);
31    /// ```
32    #[inline]
33    pub fn len(&self) -> usize {
34        self.storage.as_ref().len()
35    }
36
37    /// Returns `true` if `self` has a length of zero bytes.
38    ///
39    /// # Examples
40    ///
41    /// Basic usage:
42    ///
43    /// ```
44    /// # use generic_str::str;
45    /// let s: &str = "".into();
46    /// assert!(s.is_empty());
47    ///
48    /// let s: &str = "not empty".into();
49    /// assert!(!s.is_empty());
50    /// ```
51    #[inline]
52    pub fn is_empty(&self) -> bool {
53        self.storage.is_empty()
54    }
55
56    /// Checks that `index`-th byte is the first byte in a UTF-8 code point
57    /// sequence or the end of the string.
58    ///
59    /// The start and end of the string (when `index == self.len()`) are
60    /// considered to be boundaries.
61    ///
62    /// Returns `false` if `index` is greater than `self.len()`.
63    ///
64    /// # Examples
65    ///
66    /// ```
67    /// # use generic_str::str;
68    /// let s: &str = "Löwe 老虎 Léopard".into();
69    /// assert!(s.is_char_boundary(0));
70    /// // start of `老`
71    /// assert!(s.is_char_boundary(6));
72    /// assert!(s.is_char_boundary(s.len()));
73    ///
74    /// // second byte of `ö`
75    /// assert!(!s.is_char_boundary(2));
76    ///
77    /// // third byte of `老`
78    /// assert!(!s.is_char_boundary(8));
79    /// ```
80    #[inline]
81    pub fn is_char_boundary(&self, index: usize) -> bool {
82        // 0 is always ok.
83        // Test for 0 explicitly so that it can optimize out the check
84        // easily and skip reading string data for that case.
85        // Note that optimizing `self.get(..index)` relies on this.
86        if index == 0 {
87            return true;
88        }
89
90        match self.as_bytes().get(index) {
91            // For `None` we have two options:
92            //
93            // - index == self.len()
94            //   Empty strings are valid, so return true
95            // - index > self.len()
96            //   In this case return false
97            //
98            // The check is placed exactly here, because it improves generated
99            // code on higher opt-levels. See PR #84751 for more details.
100            None => index == self.len(),
101
102            // This is bit magic equivalent to: b < 128 || b >= 192
103            Some(&b) => (b as i8) >= -0x40,
104        }
105    }
106
107    /// Converts a string slice to a byte slice. To convert the byte slice back
108    /// into a string slice, use the [`from_utf8`] function.
109    ///
110    /// [`from_utf8`]: crate::from_utf8
111    ///
112    /// # Examples
113    ///
114    /// Basic usage:
115    ///
116    /// ```
117    /// # use generic_str::str;
118    /// let bytes = <&str>::from("bors").as_bytes();
119    /// assert_eq!(b"bors", bytes);
120    /// ```
121    #[inline(always)]
122    pub fn as_bytes(&self) -> &[u8] {
123        // SAFETY: const sound because we transmute two types with the same layout
124        unsafe { core::mem::transmute(self.storage.as_ref()) }
125    }
126
127    /// Converts a mutable string slice to a mutable byte slice.
128    ///
129    /// # Safety
130    ///
131    /// The caller must ensure that the content of the slice is valid UTF-8
132    /// before the borrow ends and the underlying `str` is used.
133    ///
134    /// Use of a `str` whose contents are not valid UTF-8 is undefined behavior.
135    ///
136    /// # Examples
137    ///
138    /// Basic usage:
139    ///
140    /// ```
141    /// # use generic_str::String;
142    /// let mut s = String::from("Hello");
143    /// let bytes = unsafe { s.as_bytes_mut() };
144    ///
145    /// assert_eq!(bytes, b"Hello");
146    /// ```
147    ///
148    /// Mutability:
149    ///
150    /// ```
151    /// # use generic_str::{str, String};
152    /// let mut s = String::from("🗻∈🌏");
153    ///
154    /// unsafe {
155    ///     let bytes = s.as_bytes_mut();
156    ///
157    ///     bytes[0] = 0xF0;
158    ///     bytes[1] = 0x9F;
159    ///     bytes[2] = 0x8D;
160    ///     bytes[3] = 0x94;
161    /// }
162    ///
163    /// assert_eq!(s, <&str>::from("🍔∈🌏"));
164    /// ```
165    #[inline(always)]
166    pub unsafe fn as_bytes_mut(&mut self) -> &mut [u8] {
167        // SAFETY: const sound because we transmute two types with the same layout
168        core::mem::transmute(self.storage.as_mut())
169    }
170
171    /// Converts a string slice to a raw pointer.
172    ///
173    /// As string slices are a slice of bytes, the raw pointer points to a
174    /// [`u8`]. This pointer will be pointing to the first byte of the string
175    /// slice.
176    ///
177    /// The caller must ensure that the returned pointer is never written to.
178    /// If you need to mutate the contents of the string slice, use [`as_mut_ptr`].
179    ///
180    /// [`as_mut_ptr`]: str::as_mut_ptr
181    ///
182    /// # Examples
183    ///
184    /// Basic usage:
185    ///
186    /// ```
187    /// # use generic_str::str;
188    /// let s: &str = "Hello".into();
189    /// let ptr = s.as_ptr();
190    /// ```
191    #[inline]
192    pub fn as_ptr(&self) -> *const u8 {
193        self.storage.as_ref() as *const [u8] as *const u8
194    }
195
196    /// Converts a mutable string slice to a raw pointer.
197    ///
198    /// As string slices are a slice of bytes, the raw pointer points to a
199    /// [`u8`]. This pointer will be pointing to the first byte of the string
200    /// slice.
201    ///
202    /// It is your responsibility to make sure that the string slice only gets
203    /// modified in a way that it remains valid UTF-8.
204    #[inline]
205    pub fn as_mut_ptr(&mut self) -> *mut u8 {
206        self.storage.as_mut() as *mut [u8] as *mut u8
207    }
208
209    /// Returns a subslice of `str`.
210    ///
211    /// This is the non-panicking alternative to indexing the `str`. Returns
212    /// [`None`] whenever equivalent indexing operation would panic.
213    ///
214    /// # Examples
215    ///
216    /// ```
217    /// # use generic_str::{str, String};
218    /// let v = String::from("🗻∈🌏");
219    ///
220    /// assert_eq!(v.get(0..4), Some(<&str>::from("🗻")));
221    ///
222    /// // indices not on UTF-8 sequence boundaries
223    /// assert!(v.get(1..).is_none());
224    /// assert!(v.get(..8).is_none());
225    ///
226    /// // out of bounds
227    /// assert!(v.get(..42).is_none());
228    /// ```
229    #[inline]
230    pub fn get<I: SliceIndex<Self>>(&self, i: I) -> Option<&I::Output> {
231        i.get(self.as_ref())
232    }
233
234    /// Returns a mutable subslice of `str`.
235    ///
236    /// This is the non-panicking alternative to indexing the `str`. Returns
237    /// [`None`] whenever equivalent indexing operation would panic.
238    ///
239    /// # Examples
240    ///
241    /// ```
242    /// # use generic_str::{str, String};
243    /// let mut v = String::from("hello");
244    /// // correct length
245    /// assert!(v.get_mut(0..5).is_some());
246    /// // out of bounds
247    /// assert!(v.get_mut(..42).is_none());
248    /// assert_eq!(v.get_mut(0..2).map(|v| &*v), Some(<&str>::from("he")));
249    ///
250    /// assert_eq!(v, <&str>::from("hello"));
251    /// {
252    ///     let s = v.get_mut(0..2);
253    ///     let s = s.map(|s| {
254    ///         s.make_ascii_uppercase();
255    ///         &*s
256    ///     });
257    ///     assert_eq!(s, Some(<&str>::from("HE")));
258    /// }
259    /// assert_eq!(v, <&str>::from("HEllo"));
260    /// ```
261    #[inline]
262    pub fn get_mut<I: SliceIndex<Self>>(&mut self, i: I) -> Option<&mut I::Output> {
263        i.get_mut(self.as_mut())
264    }
265
266    /// Returns an unchecked subslice of `str`.
267    ///
268    /// This is the unchecked alternative to indexing the `str`.
269    ///
270    /// # Safety
271    ///
272    /// Callers of this function are responsible that these preconditions are
273    /// satisfied:
274    ///
275    /// * The starting index must not exceed the ending index;
276    /// * Indexes must be within bounds of the original slice;
277    /// * Indexes must lie on UTF-8 sequence boundaries.
278    ///
279    /// Failing that, the returned string slice may reference invalid memory or
280    /// violate the invariants communicated by the `str` type.
281    ///
282    /// # Examples
283    ///
284    /// ```
285    /// # use generic_str::str;
286    /// let v = <&str>::from("🗻∈🌏");
287    /// unsafe {
288    ///     assert_eq!(v.get_unchecked(0..4), <&str>::from("🗻"));
289    ///     assert_eq!(v.get_unchecked(4..7), <&str>::from("∈"));
290    ///     assert_eq!(v.get_unchecked(7..11), <&str>::from("🌏"));
291    /// }
292    /// ```
293    #[inline]
294    pub unsafe fn get_unchecked<I: SliceIndex<Self>>(&self, i: I) -> &I::Output {
295        // SAFETY: the caller must uphold the safety contract for `get_unchecked`;
296        // the slice is dereferencable because `self` is a safe reference.
297        // The returned pointer is safe because impls of `SliceIndex` have to guarantee that it is.
298        &*i.get_unchecked(self)
299    }
300
301    /// Returns a mutable, unchecked subslice of `str`.
302    ///
303    /// This is the unchecked alternative to indexing the `str`.
304    ///
305    /// # Safety
306    ///
307    /// Callers of this function are responsible that these preconditions are
308    /// satisfied:
309    ///
310    /// * The starting index must not exceed the ending index;
311    /// * Indexes must be within bounds of the original slice;
312    /// * Indexes must lie on UTF-8 sequence boundaries.
313    ///
314    /// Failing that, the returned string slice may reference invalid memory or
315    /// violate the invariants communicated by the `str` type.
316    ///
317    /// # Examples
318    ///
319    /// ```
320    /// # use generic_str::{str, String};
321    /// let mut v = String::from("🗻∈🌏");
322    /// unsafe {
323    ///     assert_eq!(v.get_unchecked_mut(0..4), <&str>::from("🗻"));
324    ///     assert_eq!(v.get_unchecked_mut(4..7), <&str>::from("∈"));
325    ///     assert_eq!(v.get_unchecked_mut(7..11), <&str>::from("🌏"));
326    /// }
327    /// ```
328    #[inline]
329    pub unsafe fn get_unchecked_mut<I: SliceIndex<Self>>(&mut self, i: I) -> &mut I::Output {
330        // SAFETY: the caller must uphold the safety contract for `get_unchecked_mut`;
331        // the slice is dereferencable because `self` is a safe reference.
332        // The returned pointer is safe because impls of `SliceIndex` have to guarantee that it is.
333        &mut *i.get_unchecked_mut(self)
334    }
335
336    /// Divide one string slice into two at an index.
337    ///
338    /// The argument, `mid`, should be a byte offset from the start of the
339    /// string. It must also be on the boundary of a UTF-8 code point.
340    ///
341    /// The two slices returned go from the start of the string slice to `mid`,
342    /// and from `mid` to the end of the string slice.
343    ///
344    /// To get mutable string slices instead, see the [`split_at_mut`]
345    /// method.
346    ///
347    /// [`split_at_mut`]: str::split_at_mut
348    ///
349    /// # Panics
350    ///
351    /// Panics if `mid` is not on a UTF-8 code point boundary, or if it is
352    /// past the end of the last code point of the string slice.
353    ///
354    /// # Examples
355    ///
356    /// Basic usage:
357    ///
358    /// ```
359    /// # use generic_str::str;
360    /// let s: &str = "Per Martin-Löf".into();
361    ///
362    /// let (first, last) = s.split_at(3);
363    ///
364    /// assert_eq!(first, <&str>::from("Per"));
365    /// assert_eq!(last, <&str>::from(" Martin-Löf"));
366    /// ```
367    #[inline]
368    pub fn split_at(&self, mid: usize) -> (&Self, &Self) {
369        // is_char_boundary checks that the index is in [0, .len()]
370        if self.is_char_boundary(mid) {
371            // SAFETY: just checked that `mid` is on a char boundary.
372            unsafe {
373                (
374                    self.get_unchecked(0..mid),
375                    self.get_unchecked(mid..self.len()),
376                )
377            }
378        } else {
379            slice_error_fail(self, 0, mid)
380        }
381    }
382
383    /// Divide one mutable string slice into two at an index.
384    ///
385    /// The argument, `mid`, should be a byte offset from the start of the
386    /// string. It must also be on the boundary of a UTF-8 code point.
387    ///
388    /// The two slices returned go from the start of the string slice to `mid`,
389    /// and from `mid` to the end of the string slice.
390    ///
391    /// To get immutable string slices instead, see the [`split_at`] method.
392    ///
393    /// [`split_at`]: str::split_at
394    ///
395    /// # Panics
396    ///
397    /// Panics if `mid` is not on a UTF-8 code point boundary, or if it is
398    /// past the end of the last code point of the string slice.
399    ///
400    /// # Examples
401    ///
402    /// Basic usage:
403    ///
404    /// ```
405    /// # use generic_str::{str, String};
406    /// let mut s = String::from("Per Martin-Löf");
407    /// {
408    ///     let (first, last) = s.split_at_mut(3);
409    ///     first.make_ascii_uppercase();
410    ///     assert_eq!(first, <&str>::from("PER"));
411    ///     assert_eq!(last, <&str>::from(" Martin-Löf"));
412    /// }
413    /// assert_eq!(s, <&str>::from("PER Martin-Löf"));
414    /// ```
415    #[inline]
416    pub fn split_at_mut(&mut self, mid: usize) -> (&mut Self, &mut Self) {
417        // is_char_boundary checks that the index is in [0, .len()]
418        if self.is_char_boundary(mid) {
419            let len = self.len();
420            let ptr = self.as_mut_ptr();
421            // SAFETY: just checked that `mid` is on a char boundary.
422            unsafe {
423                (
424                    from_utf8_unchecked_mut(core::slice::from_raw_parts_mut(ptr, mid)),
425                    from_utf8_unchecked_mut(core::slice::from_raw_parts_mut(
426                        ptr.add(mid),
427                        len - mid,
428                    )),
429                )
430            }
431        } else {
432            slice_error_fail(self, 0, mid)
433        }
434    }
435
436    /// Returns an iterator over the [`char`]s of a string slice.
437    ///
438    /// As a string slice consists of valid UTF-8, we can iterate through a
439    /// string slice by [`char`]. This method returns such an iterator.
440    ///
441    /// It's important to remember that [`char`] represents a Unicode Scalar
442    /// Value, and may not match your idea of what a 'character' is. Iteration
443    /// over grapheme clusters may be what you actually want. This functionality
444    /// is not provided by Rust's standard library, check crates.io instead.
445    ///
446    /// # Examples
447    ///
448    /// Basic usage:
449    ///
450    /// ```
451    /// # use generic_str::str;
452    /// let word = <&str>::from("goodbye");
453    ///
454    /// let count = word.chars().count();
455    /// assert_eq!(7, count);
456    ///
457    /// let mut chars = word.chars();
458    ///
459    /// assert_eq!(Some('g'), chars.next());
460    /// assert_eq!(Some('o'), chars.next());
461    /// assert_eq!(Some('o'), chars.next());
462    /// assert_eq!(Some('d'), chars.next());
463    /// assert_eq!(Some('b'), chars.next());
464    /// assert_eq!(Some('y'), chars.next());
465    /// assert_eq!(Some('e'), chars.next());
466    ///
467    /// assert_eq!(None, chars.next());
468    /// ```
469    ///
470    /// Remember, [`char`]s may not match your intuition about characters:
471    ///
472    /// [`char`]: prim@char
473    ///
474    /// ```
475    /// let y = "y̆";
476    ///
477    /// let mut chars = y.chars();
478    ///
479    /// assert_eq!(Some('y'), chars.next()); // not 'y̆'
480    /// assert_eq!(Some('\u{0306}'), chars.next());
481    ///
482    /// assert_eq!(None, chars.next());
483    /// ```
484    #[inline]
485    pub fn chars(&self) -> Chars<'_> {
486        let s: &core::primitive::str = self.into();
487        s.chars()
488    }
489    pub fn char_indices(&self) -> CharIndices<'_> {
490        let s: &core::primitive::str = self.into();
491        s.char_indices()
492    }
493
494    /// An iterator over the bytes of a string slice.
495    ///
496    /// As a string slice consists of a sequence of bytes, we can iterate
497    /// through a string slice by byte. This method returns such an iterator.
498    ///
499    /// # Examples
500    ///
501    /// Basic usage:
502    ///
503    /// ```
504    /// # use generic_str::str;
505    /// let mut bytes = <&str>::from("bors").bytes();
506    ///
507    /// assert_eq!(Some(b'b'), bytes.next());
508    /// assert_eq!(Some(b'o'), bytes.next());
509    /// assert_eq!(Some(b'r'), bytes.next());
510    /// assert_eq!(Some(b's'), bytes.next());
511    ///
512    /// assert_eq!(None, bytes.next());
513    /// ```
514    #[inline]
515    pub fn bytes(&self) -> Bytes<'_> {
516        let s: &core::primitive::str = self.into();
517        s.bytes()
518    }
519
520    /// Checks if all characters in this string are within the ASCII range.
521    ///
522    /// # Examples
523    ///
524    /// ```
525    /// # use generic_str::str;
526    /// let ascii = <&str>::from("hello!\n");
527    /// let non_ascii = <&str>::from("Grüße, Jürgen ❤");
528    ///
529    /// assert!(ascii.is_ascii());
530    /// assert!(!non_ascii.is_ascii());
531    /// ```
532    #[inline]
533    pub fn is_ascii(&self) -> bool {
534        // We can treat each byte as character here: all multibyte characters
535        // start with a byte that is not in the ascii range, so we will stop
536        // there already.
537        self.as_bytes().is_ascii()
538    }
539
540    /// Checks that two strings are an ASCII case-insensitive match.
541    ///
542    /// Same as `to_ascii_lowercase(a) == to_ascii_lowercase(b)`,
543    /// but without allocating and copying temporaries.
544    ///
545    /// # Examples
546    ///
547    /// ```
548    /// # use generic_str::str;
549    /// assert!(<&str>::from("Ferris").eq_ignore_ascii_case("FERRIS".into()));
550    /// assert!(<&str>::from("Ferrös").eq_ignore_ascii_case("FERRöS".into()));
551    /// assert!(!<&str>::from("Ferrös").eq_ignore_ascii_case("FERRÖS".into()));
552    /// ```
553    #[inline]
554    pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool {
555        self.as_bytes().eq_ignore_ascii_case(other.as_bytes())
556    }
557
558    /// Converts this string to its ASCII upper case equivalent in-place.
559    ///
560    /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
561    /// but non-ASCII letters are unchanged.
562    ///
563    /// To return a new uppercased value without modifying the existing one, use
564    /// [`to_ascii_uppercase()`].
565    ///
566    /// [`to_ascii_uppercase()`]: #method.to_ascii_uppercase
567    ///
568    /// # Examples
569    ///
570    /// ```
571    /// # use generic_str::{str, String};
572    /// let mut s = String::from("Grüße, Jürgen ❤");
573    ///
574    /// s.make_ascii_uppercase();
575    ///
576    /// assert_eq!(s, <&str>::from("GRüßE, JüRGEN ❤"));
577    /// ```
578    #[inline]
579    pub fn make_ascii_uppercase(&mut self) {
580        // SAFETY: safe because we transmute two types with the same layout.
581        let me = unsafe { self.as_bytes_mut() };
582        me.make_ascii_uppercase()
583    }
584
585    /// Converts this string to its ASCII lower case equivalent in-place.
586    ///
587    /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
588    /// but non-ASCII letters are unchanged.
589    ///
590    /// To return a new lowercased value without modifying the existing one, use
591    /// [`to_ascii_lowercase()`].
592    ///
593    /// [`to_ascii_lowercase()`]: #method.to_ascii_lowercase
594    ///
595    /// # Examples
596    ///
597    /// ```
598    /// # use generic_str::{str, String};
599    /// let mut s = String::from("GRÜßE, JÜRGEN ❤");
600    ///
601    /// s.make_ascii_lowercase();
602    ///
603    /// assert_eq!(s, <&str>::from("grÜße, jÜrgen ❤"));
604    /// ```
605    #[inline]
606    pub fn make_ascii_lowercase(&mut self) {
607        // SAFETY: safe because we transmute two types with the same layout.
608        let me = unsafe { self.as_bytes_mut() };
609        me.make_ascii_lowercase()
610    }
611
612    /// Returns the lowercase equivalent of this string slice, as a new [`String`].
613    ///
614    /// 'Lowercase' is defined according to the terms of the Unicode Derived Core Property
615    /// `Lowercase`.
616    ///
617    /// Since some characters can expand into multiple characters when changing
618    /// the case, this function returns a [`String`] instead of modifying the
619    /// parameter in-place.
620    ///
621    /// # Examples
622    ///
623    /// Basic usage:
624    ///
625    /// ```
626    /// # use generic_str::str;
627    /// let s = <&str>::from("HELLO");
628    ///
629    /// assert_eq!(s.to_lowercase(), <&str>::from("hello"));
630    /// ```
631    ///
632    /// A tricky example, with sigma:
633    ///
634    /// ```
635    /// # use generic_str::str;
636    /// let sigma = <&str>::from("Σ");
637    ///
638    /// assert_eq!(sigma.to_lowercase(), <&str>::from("σ"));
639    ///
640    /// // but at the end of a word, it's ς, not σ:
641    /// let odysseus = <&str>::from("ὈΔΥΣΣΕΎΣ");
642    ///
643    /// assert_eq!(odysseus.to_lowercase(), <&str>::from("ὀδυσσεύς"));
644    /// ```
645    ///
646    /// Languages without case are not changed:
647    ///
648    /// ```
649    /// # use generic_str::str;
650    /// let new_year = <&str>::from("农历新年");
651    ///
652    /// assert_eq!(new_year, new_year.to_lowercase());
653    /// ```
654    #[cfg(feature = "alloc")]
655    pub fn to_lowercase(&self) -> crate::String {
656        use core::unicode::conversions;
657
658        let mut s = crate::String::with_capacity(self.len());
659        for (i, c) in self[..].char_indices() {
660            if c == 'Σ' {
661                // Σ maps to σ, except at the end of a word where it maps to ς.
662                // This is the only conditional (contextual) but language-independent mapping
663                // in `SpecialCasing.txt`,
664                // so hard-code it rather than have a generic "condition" mechanism.
665                // See https://github.com/rust-lang/rust/issues/26035
666                map_uppercase_sigma(self, i, &mut s)
667            } else {
668                match conversions::to_lower(c) {
669                    [a, '\0', _] => s.push(a),
670                    [a, b, '\0'] => {
671                        s.push(a);
672                        s.push(b);
673                    }
674                    [a, b, c] => {
675                        s.push(a);
676                        s.push(b);
677                        s.push(c);
678                    }
679                }
680            }
681        }
682        return s;
683
684        fn map_uppercase_sigma(from: &str, i: usize, to: &mut crate::String) {
685            // See http://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G33992
686            // for the definition of `Final_Sigma`.
687            debug_assert!('Σ'.len_utf8() == 2);
688            let is_word_final = case_ignoreable_then_cased(from[..i].chars().rev())
689                && !case_ignoreable_then_cased(from[i + 2..].chars());
690            to.push_str(if is_word_final { "ς" } else { "σ" }.into());
691        }
692
693        fn case_ignoreable_then_cased<I: Iterator<Item = char>>(mut iter: I) -> bool {
694            use core::unicode::{Case_Ignorable, Cased};
695            match iter.find(|&c| !Case_Ignorable(c)) {
696                Some(c) => Cased(c),
697                None => false,
698            }
699        }
700    }
701
702    /// Returns the uppercase equivalent of this string slice, as a new [`String`].
703    ///
704    /// 'Uppercase' is defined according to the terms of the Unicode Derived Core Property
705    /// `Uppercase`.
706    ///
707    /// Since some characters can expand into multiple characters when changing
708    /// the case, this function returns a [`String`] instead of modifying the
709    /// parameter in-place.
710    ///
711    /// # Examples
712    ///
713    /// Basic usage:
714    ///
715    /// ```
716    /// # use generic_str::str;
717    /// let s = <&str>::from("hello");
718    ///
719    /// assert_eq!(s.to_uppercase(), <&str>::from("HELLO"));
720    /// ```
721    ///
722    /// Scripts without case are not changed:
723    ///
724    /// ```
725    /// # use generic_str::str;
726    /// let new_year = <&str>::from("农历新年");
727    ///
728    /// assert_eq!(new_year, new_year.to_uppercase());
729    /// ```
730    ///
731    /// One character can become multiple:
732    /// ```
733    /// # use generic_str::str;
734    /// let s = <&str>::from("tschüß");
735    ///
736    /// assert_eq!(s.to_uppercase(), <&str>::from("TSCHÜSS"));
737    /// ```
738    #[cfg(feature = "alloc")]
739    pub fn to_uppercase(&self) -> crate::String {
740        use core::unicode::conversions;
741
742        let mut s = crate::String::with_capacity(self.len());
743        for c in self[..].chars() {
744            match conversions::to_upper(c) {
745                [a, '\0', _] => s.push(a),
746                [a, b, '\0'] => {
747                    s.push(a);
748                    s.push(b);
749                }
750                [a, b, c] => {
751                    s.push(a);
752                    s.push(b);
753                    s.push(c);
754                }
755            }
756        }
757        s
758    }
759}
760
761#[inline(never)]
762#[cold]
763#[track_caller]
764pub(crate) fn slice_error_fail(s: &str, begin: usize, end: usize) -> ! {
765    const MAX_DISPLAY_LENGTH: usize = 256;
766    let (truncated, s_trunc) = truncate_to_char_boundary(s, MAX_DISPLAY_LENGTH);
767    let ellipsis = if truncated { "[...]" } else { "" };
768
769    // 1. out of bounds
770    if begin > s.len() || end > s.len() {
771        let oob_index = if begin > s.len() { begin } else { end };
772        panic!(
773            "byte index {} is out of bounds of `{}`{}",
774            oob_index, s_trunc, ellipsis
775        );
776    }
777
778    // 2. begin <= end
779    assert!(
780        begin <= end,
781        "begin <= end ({} <= {}) when slicing `{}`{}",
782        begin,
783        end,
784        s_trunc,
785        ellipsis
786    );
787
788    // 3. character boundary
789    let index = if !s.is_char_boundary(begin) {
790        begin
791    } else {
792        end
793    };
794    // find the character
795    let mut char_start = index;
796    while !s.is_char_boundary(char_start) {
797        char_start -= 1;
798    }
799    // `char_start` must be less than len and a char boundary
800    let ch = s[char_start..].chars().next().unwrap();
801    let char_range = char_start..char_start + ch.len_utf8();
802    panic!(
803        "byte index {} is not a char boundary; it is inside {:?} (bytes {:?}) of `{}`{}",
804        index, ch, char_range, s_trunc, ellipsis
805    );
806}
generic_str/slice_utf8.rs

generic_str/
slice_utf8.rs