enrede/
str.rs

1//! Implementation and utilities for a generically encoded [`str`] equivalent type.
2//!
3//! See also the [`Str<E>`] type.
4
5#[cfg(feature = "alloc")]
6use alloc::borrow::ToOwned;
7#[cfg(feature = "alloc")]
8use alloc::vec;
9use bytemuck::must_cast_slice as cast_slice;
10use core::cmp::Ordering;
11use core::error::Error;
12use core::fmt::Write;
13use core::hash::{Hash, Hasher};
14use core::marker::PhantomData;
15use core::ops::{Bound, Index, RangeBounds};
16use core::slice::SliceIndex;
17use core::{fmt, mem, ptr, slice};
18#[cfg(feature = "serde")]
19use serde::{
20    de::{self, Unexpected},
21    Deserialize, Deserializer, Serialize, Serializer,
22};
23
24use crate::encoding::{AlwaysValid, Encoding, RecodeCause, Utf16, Utf32, Utf8, ValidateError};
25#[cfg(feature = "alloc")]
26use crate::string::String;
27
28mod iter;
29
30use crate::encoding;
31pub use iter::{CharIndices, Chars};
32
33/// Error encountered while re-encoding a [`Str`] or [`CStr`](crate::CStr) into another
34/// format
35#[derive(Clone, Debug, PartialEq)]
36pub struct RecodeError {
37    valid_up_to: usize,
38    char: char,
39    char_len: u8,
40}
41
42impl RecodeError {
43    /// The length of valid data in the input before the error was encountered. Calling
44    /// [`recode`](Str::recode) again on the input sliced down to this length will succeed.
45    pub fn valid_up_to(&self) -> usize {
46        self.valid_up_to
47    }
48
49    /// The character encountered that caused re-encoding to fail. This character most likely isn't
50    /// supported by the new encoding.
51    pub fn char(&self) -> char {
52        self.char
53    }
54
55    /// The length of the character in the input encoding. Skipping this many bytes forwards from
56    /// [`valid_up_to`](Self::valid_up_to) and trying again will avoid this particular error
57    /// character (though recoding may fail again immediately due to another invalid character).
58    pub fn char_len(&self) -> usize {
59        self.char_len as usize
60    }
61}
62
63impl fmt::Display for RecodeError {
64    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
65        write!(
66            f,
67            "Error while recoding `Str`: invalid character for output encoding '{}'",
68            self.char
69        )
70    }
71}
72
73impl Error for RecodeError {}
74
75/// Error encountered while re-encoding a [`Str`](Str) or [`CStr`](crate::CStr) into another
76/// format in a pre-allocated buffer
77#[derive(Clone, PartialEq)]
78pub struct RecodeIntoError<'a, E: Encoding> {
79    input_used: usize,
80    str: &'a Str<E>,
81    cause: RecodeCause,
82}
83
84impl<'a, E: Encoding> RecodeIntoError<'a, E> {
85    fn from_recode(err: encoding::RecodeError, str: &'a Str<E>) -> Self {
86        RecodeIntoError {
87            input_used: err.input_used(),
88            str,
89            cause: err.cause().clone(),
90        }
91    }
92
93    /// The length of valid data in the input before the error was encountered. Calling
94    /// [`recode_into`](Str::recode_into) again on the input sliced down to this length will succeed.
95    pub fn valid_up_to(&self) -> usize {
96        self.input_used
97    }
98
99    /// The portion of the buffer with valid data written into it, as a [`Str`] in the desired
100    /// encoding.
101    pub fn output_valid(&self) -> &'a Str<E> {
102        self.str
103    }
104
105    /// The reason encoding stopped. See [`RecodeCause`] for more details on possible reasons.
106    pub fn cause(&self) -> &RecodeCause {
107        &self.cause
108    }
109}
110
111impl<E: Encoding> fmt::Debug for RecodeIntoError<'_, E> {
112    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
113        f.debug_struct("RecodeIntoError")
114            .field("input_used", &self.input_used)
115            .field("str", &self.str)
116            .field("cause", &self.cause)
117            .finish()
118    }
119}
120
121impl<E: Encoding> fmt::Display for RecodeIntoError<'_, E> {
122    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
123        write!(f, "Error while recoding `Str` into buffer: ")?;
124        self.cause.write_cause(f)
125    }
126}
127
128impl<E: Encoding> Error for RecodeIntoError<'_, E> {}
129
130/// Implementation of a generically encoded [`str`] type. This type is similar to the standard
131/// library [`str`] type in many ways, but instead of having a fixed UTF-8 encoding scheme, it uses
132/// an encoding determined by the generic `E` it is provided.
133///
134/// `Str` only implements `==` between instances with the same encoding. To compare strings of
135/// different encoding by characters, use `a.chars().eq(b.chars())`.
136///
137/// ## Invariant
138///
139/// Rust libraries may assume that a `Str<E>` is valid for the [`Encoding`] `E`.
140///
141/// Constructing non-`E` string slices is not immediate UB, but any function called on it may assume
142/// that it is valid.
143#[repr(transparent)]
144pub struct Str<E>(PhantomData<E>, [u8]);
145
146impl<E: Encoding> Str<E> {
147    /// Create a `Str` from a byte slice without checking whether it is valid for the current
148    /// encoding.
149    ///
150    /// # Safety
151    ///
152    /// The bytes passed must be valid for the current encoding.
153    pub unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Str<E> {
154        debug_assert!(E::validate(bytes).is_ok());
155        let ptr = ptr::from_ref(bytes) as *const Str<E>;
156        // SAFETY: `Str` is `repr(transparent)` containing a [u8].
157        //         Provided bytes have precondition of being valid encoding
158        unsafe { &*ptr }
159    }
160
161    /// Create a `Str` from a mutable byte slice without checking whether it is valid for the
162    /// current encoding.
163    ///
164    /// # Safety
165    ///
166    /// The bytes passed must be valid for the current encoding.
167    pub unsafe fn from_bytes_unchecked_mut(bytes: &mut [u8]) -> &mut Str<E> {
168        debug_assert!(E::validate(bytes).is_ok());
169        let ptr = ptr::from_mut(bytes) as *mut Str<E>;
170        // SAFETY: `Str` is `repr(transparent)` containing a [u8].
171        //         Provided bytes have precondition of being valid encoding
172        unsafe { &mut *ptr }
173    }
174
175    /// Create a `Str` from a byte slice, validating the encoding and returning a [`ValidateError`]
176    /// if it is not a valid string in the current encoding.
177    pub fn from_bytes(bytes: &[u8]) -> Result<&Str<E>, ValidateError> {
178        E::validate(bytes)?;
179        // SAFETY: Bytes have been validated, they are guaranteed valid for the encoding
180        Ok(unsafe { Self::from_bytes_unchecked(bytes) })
181    }
182
183    /// Create a `Str` from a mutable byte slice, validating the encoding and returning a
184    /// [`ValidateError`] if it is not a valid string in the current encoding.
185    pub fn from_bytes_mut(bytes: &mut [u8]) -> Result<&mut Str<E>, ValidateError> {
186        E::validate(bytes)?;
187        // SAFETY: Bytes have been validated, they are guaranteed valid for the encoding
188        Ok(unsafe { Self::from_bytes_unchecked_mut(bytes) })
189    }
190
191    /// Get the length of this string in bytes
192    pub fn len(&self) -> usize {
193        self.as_bytes().len()
194    }
195
196    /// Whether this string is empty - IE is a zero-length slice.
197    pub fn is_empty(&self) -> bool {
198        self.as_bytes().is_empty()
199    }
200
201    /// Get the underlying bytes for this string
202    pub fn as_bytes(&self) -> &[u8] {
203        &self.1
204    }
205
206    /// Get the underlying bytes for this string mutably. This method is unsafe because it is
207    /// possible to write invalid bytes for the encoding into the slice.
208    ///
209    /// # Safety
210    ///
211    /// The returned reference must not be used to write invalid data into the string.
212    pub unsafe fn as_bytes_mut(&mut self) -> &mut [u8] {
213        &mut self.1
214    }
215
216    fn check_bounds<R>(&self, idx: &R) -> Option<()>
217    where
218        R: RangeBounds<usize>,
219    {
220        let start = idx.start_bound();
221        let end = idx.end_bound();
222
223        let start_idx = match start {
224            Bound::Included(i) => *i,
225            Bound::Excluded(i) => *i + 1,
226            Bound::Unbounded => 0,
227        };
228
229        let end_idx = match end {
230            Bound::Included(i) => *i,
231            Bound::Excluded(i) => *i - 1,
232            Bound::Unbounded => self.as_bytes().len(),
233        };
234
235        if !self.is_char_boundary(start_idx) || !self.is_char_boundary(end_idx) {
236            None
237        } else {
238            Some(())
239        }
240    }
241
242    /// Return a subslice of this `Str`. This is a non-panicking alternative to indexing, returning
243    /// [`None`] whenever indexing would panic.
244    pub fn get<R>(&self, idx: R) -> Option<&Self>
245    where
246        R: RangeBounds<usize> + SliceIndex<[u8], Output = [u8]>,
247    {
248        self.check_bounds(&idx)?;
249        // SAFETY: The provided range has been validated as landing on character boundaries.
250        //         Our internal bytes are guaranteed valid for the encoding.
251        Some(unsafe { Str::from_bytes_unchecked(self.as_bytes().get(idx)?) })
252    }
253
254    /// Return a subslice of this `Str`, without bound checks.
255    ///
256    /// # Safety
257    ///
258    /// - The caller must ensure the range indices are in-bounds of the string byte length
259    /// - The caller must ensure neither the range indices do not fall in the middle of a character
260    pub unsafe fn get_unchecked<R>(&self, idx: R) -> &Self
261    where
262        R: RangeBounds<usize> + SliceIndex<[u8], Output = [u8]>,
263    {
264        // SAFETY: Delegated to caller
265        unsafe { Str::from_bytes_unchecked(self.as_bytes().get_unchecked(idx)) }
266    }
267
268    /// Return a mutable subslice of this `Str`. This is a non-panicking alternative to indexing,
269    /// returning [`None`] whenever indexing would panic.
270    pub fn get_mut<R>(&mut self, idx: R) -> Option<&mut Self>
271    where
272        R: RangeBounds<usize> + SliceIndex<[u8], Output = [u8]>,
273    {
274        self.check_bounds(&idx)?;
275        // SAFETY: The provided range has been validated as landing on character boundaries.
276        //         Our internal bytes are guaranteed valid for the encoding.
277        Some(unsafe { Str::from_bytes_unchecked_mut(self.1.get_mut(idx)?) })
278    }
279
280    /// Return a mutable subslice of this `Str`, without bound checks.
281    ///
282    /// # Safety
283    ///
284    /// - The caller must ensure the range indices are in-bounds of the string byte length
285    /// - The caller must ensure neither the range indices do not fall in the middle of a character
286    pub unsafe fn get_unchecked_mut<R>(&mut self, idx: R) -> &mut Self
287    where
288        R: RangeBounds<usize> + SliceIndex<[u8], Output = [u8]>,
289    {
290        // SAFETY: Delegated to caller
291        unsafe { Str::from_bytes_unchecked_mut(self.as_bytes_mut().get_unchecked_mut(idx)) }
292    }
293
294    /// Check whether the byte at `idx` is on a character boundary - IE is the first byte in a code
295    /// point or the end of the string.
296    ///
297    /// The start and end of the string are considered boundaries, indexes greater than `self.len()`
298    /// are considered not boundaries.
299    pub fn is_char_boundary(&self, idx: usize) -> bool {
300        match idx.cmp(&self.len()) {
301            Ordering::Equal => true,
302            Ordering::Greater => false,
303            Ordering::Less => E::char_bound(self, idx),
304        }
305    }
306
307    /// Returns `true` if the given pattern is a prefix of this string slice, `false` otherwise.
308    pub fn starts_with(&self, other: &Self) -> bool {
309        self.as_bytes().starts_with(other.as_bytes())
310    }
311
312    /// Returns `true` if the given pattern is a suffix of this string slice, `false` otherwise.
313    pub fn ends_with(&self, other: &Self) -> bool {
314        self.as_bytes().ends_with(other.as_bytes())
315    }
316
317    /// Return an iterator over the [`char`]s of this string slice. See [`str::chars`] for caveats
318    /// about this method.
319    pub fn chars(&self) -> Chars<'_, E> {
320        Chars::new(self)
321    }
322
323    /// Return an iterator over the [`char`]s of this string slice and their positions. See
324    /// [`str::char_indices`] for caveats about this method.
325    pub fn char_indices(&self) -> CharIndices<'_, E> {
326        CharIndices::new(self)
327    }
328
329    /// Copy the data from another string into this one.
330    pub fn copy_from(&mut self, other: &Str<E>) {
331        if self.len() != other.len() {
332            panic!(
333                "Source string length ({}) doesn't match destination string length ({})",
334                other.len(),
335                self.len(),
336            );
337        }
338        self.1.copy_from_slice(other.as_bytes());
339    }
340
341    /// Split this string at an index, returning the two substrings on either side. This method
342    /// panics if the index doesn't lie on a character boundary.
343    pub fn split_at(&self, idx: usize) -> Option<(&Str<E>, &Str<E>)> {
344        if self.is_char_boundary(idx) && idx < self.len() {
345            let (start, end) = self.1.split_at(idx);
346            // SAFETY: Index is a character boundary. Internal data guaranteed valid.
347            let start = unsafe { Str::from_bytes_unchecked(start) };
348            // SAFETY: Index is a character boundary. Internal data guaranteed valid.
349            let end = unsafe { Str::from_bytes_unchecked(end) };
350            Some((start, end))
351        } else {
352            None
353        }
354    }
355
356    /// Split this string mutably at an index, returning the two substrings on either side. This
357    /// method panics if the index doesn't lie on a character boundary.
358    pub fn split_at_mut(&mut self, idx: usize) -> Option<(&mut Str<E>, &mut Str<E>)> {
359        if self.is_char_boundary(idx) && idx < self.len() {
360            let (start, end) = self.1.split_at_mut(idx);
361            // SAFETY: Index is a character boundary. Internal data guaranteed valid.
362            let start = unsafe { Str::from_bytes_unchecked_mut(start) };
363            // SAFETY: Index is a character boundary. Internal data guaranteed valid.
364            let end = unsafe { Str::from_bytes_unchecked_mut(end) };
365            Some((start, end))
366        } else {
367            None
368        }
369    }
370
371    /// Get this `Str` in a different [`Encoding`]. This method writes the new string into the
372    /// provided buffer, and returns the portion of the buffer containing the string as a new `Str`.
373    pub fn recode_into<'a, E2: Encoding>(
374        &self,
375        buffer: &'a mut [u8],
376    ) -> Result<&'a Str<E2>, RecodeIntoError<'a, E2>> {
377        E2::recode(self, buffer)
378            .map(|len| {
379                // SAFETY: Value written into `out` by `recode` is guaranteed valid in encoding
380                //         E2.
381                unsafe { Str::from_bytes_unchecked(&buffer[..len]) }
382            })
383            .map_err(|err| {
384                // SAFETY: Value written into `out` by `recode` is guaranteed valid in encoding
385                //         E2, up to output_valid.
386                let str = unsafe { Str::from_bytes_unchecked(&buffer[..err.output_valid()]) };
387                RecodeIntoError::from_recode(err, str)
388            })
389    }
390
391    /// Get this `Str` in a different [`Encoding`]. This method allocates a new [`String`] with the
392    /// desired encoding, and returns an error if the source string contains any characters that
393    /// cannot be represented in the destination encoding.
394    #[cfg(feature = "alloc")]
395    pub fn recode<E2: Encoding>(&self) -> Result<String<E2>, RecodeError> {
396        let mut ptr = self;
397        let mut total_len = 0;
398        let mut out = vec![0; self.1.len()];
399        loop {
400            match E2::recode(ptr, &mut out[total_len..]) {
401                Ok(len) => {
402                    out.truncate(total_len + len);
403                    // SAFETY: Value written into `out` by `recode` is guaranteed valid in encoding
404                    //         E2.
405                    return Ok(unsafe { String::<E2>::from_bytes_unchecked(out) });
406                }
407                Err(e) => match e.cause() {
408                    RecodeCause::NeedSpace { .. } => {
409                        out.resize(out.len() + self.1.len(), 0);
410                        ptr = &ptr[e.input_used()..];
411                        total_len += e.output_valid();
412                    }
413                    &RecodeCause::InvalidChar { char, len } => {
414                        return Err(RecodeError {
415                            valid_up_to: e.input_used(),
416                            char,
417                            char_len: len as u8,
418                        });
419                    }
420                },
421            }
422        }
423    }
424
425    /// Get this `Str` in a different [`Encoding`]. This method allocates a new [`String`] with the
426    /// desired encoding, replacing any characters that can't be represented in the destination
427    /// encoding with the encoding's replacement character.
428    #[cfg(feature = "alloc")]
429    pub fn recode_lossy<E2: Encoding>(&self) -> String<E2> {
430        let mut ptr = self;
431        let mut total_len = 0;
432        let mut out = vec![0; self.1.len()];
433        loop {
434            match E2::recode(ptr, &mut out[total_len..]) {
435                Ok(len) => {
436                    out.truncate(total_len + len);
437                    // SAFETY: Value written into `out` by `recode` is guaranteed valid in encoding
438                    //         E2.
439                    return unsafe { String::from_bytes_unchecked(out) };
440                }
441                Err(e) => match e.cause() {
442                    RecodeCause::NeedSpace { .. } => {
443                        out.resize(out.len() + self.1.len(), 0);
444                        ptr = &ptr[e.input_used()..];
445                        total_len += e.output_valid();
446                    }
447                    &RecodeCause::InvalidChar { char: _, len } => {
448                        let replace_len = E2::char_len(E2::REPLACEMENT);
449                        out.resize(out.len() + replace_len, 0);
450                        E2::encode(E2::REPLACEMENT, &mut out[total_len + e.output_valid()..])
451                            .unwrap();
452                        ptr = &ptr[e.input_used() + len..];
453                        total_len += e.output_valid() + replace_len;
454                    }
455                },
456            }
457        }
458    }
459}
460
461impl<E: AlwaysValid> Str<E> {
462    /// Create a `Str` from a byte slice, never failing.
463    ///
464    /// This method is provided for encodings that have no invalid byte patterns, meaning encoding
465    /// validity checking is skipped.
466    pub fn from_bytes_infallible(bytes: &[u8]) -> &Str<E> {
467        // SAFETY: All possible byte patterns are valid for this encoding.
468        unsafe { Self::from_bytes_unchecked(bytes) }
469    }
470
471    /// Create a `Str` from a mutable byte slice, never failing.
472    ///
473    /// This method is provided for encodings that have no invalid byte patterns, meaning encoding
474    /// validity checking is skipped.
475    pub fn from_bytes_infallible_mut(bytes: &mut [u8]) -> &mut Str<E> {
476        // SAFETY: All possible byte patterns are valid for this encoding.
477        unsafe { Self::from_bytes_unchecked_mut(bytes) }
478    }
479}
480
481impl Str<Utf8> {
482    /// Equivalent to [`Str::from_bytes_unchecked`] but for UTF-8 specifically
483    ///
484    /// # Safety
485    ///
486    /// The bytes passed must be valid UTF-8.
487    pub unsafe fn from_utf8_unchecked(str: &[u8]) -> &Self {
488        // SAFETY: Precondition that input is valid UTF-8
489        Self::from_bytes_unchecked(str)
490    }
491
492    /// Equivalent to [`Str::from_bytes`] but for UTF-8 specifically
493    pub fn from_utf8(str: &[u8]) -> Result<&Self, ValidateError> {
494        Self::from_bytes(str)
495    }
496
497    /// Convert a [`str`] directly into a [`Str<Utf8>`].
498    pub fn from_std(value: &str) -> &Str<Utf8> {
499        // SAFETY: `&str` is UTF-8 by its validity guarantees.
500        unsafe { Self::from_bytes_unchecked(value.as_bytes()) }
501    }
502
503    /// Convert a [`Str<Utf8>`] directly into a [`str`]
504    pub fn as_std(&self) -> &str {
505        // SAFETY: `&Str` is UTF-8 by our validity guarantees.
506        unsafe { core::str::from_utf8_unchecked(&self.1) }
507    }
508}
509
510impl Str<Utf16> {
511    /// Equivalent to [`Str::from_bytes_unchecked`] but for UTF-16 specifically
512    ///
513    /// # Safety
514    ///
515    /// The bytes passed must be valid UTF-16.
516    pub unsafe fn from_utf16_unchecked(str: &[u16]) -> &Self {
517        // SAFETY: Precondition that input is valid UTF-16
518        Self::from_bytes_unchecked(cast_slice(str))
519    }
520
521    /// Equivalent to [`Str::from_bytes`] but for UTF-16 specifically
522    pub fn from_utf16(str: &[u16]) -> Result<&Self, ValidateError> {
523        Self::from_bytes(cast_slice(str))
524    }
525}
526
527impl Str<Utf32> {
528    /// Equivalent to [`Str::from_bytes_unchecked`] but for UTF-32 specifically
529    ///
530    /// # Safety
531    ///
532    /// The bytes passed must be valid UTF-32.
533    pub unsafe fn from_utf32_unchecked(str: &[u32]) -> &Self {
534        // SAFETY: Precondition that input is valid UTF-32
535        Self::from_bytes_unchecked(cast_slice(str))
536    }
537
538    /// Equivalent to [`Str::from_bytes`] but for UTF-32 specifically
539    pub fn from_utf32(str: &[u32]) -> Result<&Self, ValidateError> {
540        Self::from_bytes(cast_slice(str))
541    }
542
543    /// Convert a [`&[char]`] directly into a [`Str<Utf32>`]
544    pub fn from_chars(str: &[char]) -> &Self {
545        // SAFETY: Utf32 encoding is exactly equivalent to `char` encoding.
546        unsafe { Self::from_bytes_unchecked(cast_slice(str)) }
547    }
548
549    /// Attempt to convert a [`Str<Utf32>`] directly into a [`&[char]`]. This will fail if the `Str`
550    /// is not sufficiently aligned for a `char`.
551    pub fn try_chars(&self) -> Option<&[char]> {
552        let len = self.1.len();
553        let ptr = ptr::from_ref(&self.1);
554        if (ptr.cast::<()>() as usize) % mem::align_of::<char>() != 0 {
555            None
556        } else {
557            // SAFETY: We have guaranteed correct alignment, and Utf32 encoding is exactly
558            //         equivalent to `char` encoding.
559            Some(unsafe { slice::from_raw_parts(ptr.cast(), len / 4) })
560        }
561    }
562}
563
564impl<E: Encoding> fmt::Debug for Str<E> {
565    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
566        write!(f, "\"")?;
567        for c in self.chars() {
568            f.write_char(c)?;
569        }
570        write!(f, "\"{}", E::shorthand())
571    }
572}
573
574impl<E: Encoding> fmt::Display for Str<E> {
575    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
576        for c in self.chars() {
577            f.write_char(c)?;
578        }
579        Ok(())
580    }
581}
582
583impl<E: Encoding> Default for &Str<E> {
584    fn default() -> Self {
585        // SAFETY: Empty string slice can never be invalid
586        unsafe { Str::from_bytes_unchecked(&[]) }
587    }
588}
589
590#[cfg(feature = "alloc")]
591impl<E: Encoding> ToOwned for Str<E> {
592    type Owned = String<E>;
593
594    fn to_owned(&self) -> Self::Owned {
595        let bytes = self.as_bytes().to_vec();
596        // SAFETY: Our internal bytes are guaranteed valid for our encoding
597        unsafe { String::from_bytes_unchecked(bytes) }
598    }
599}
600
601impl<E, R> Index<R> for Str<E>
602where
603    E: Encoding,
604    R: RangeBounds<usize> + SliceIndex<[u8], Output = [u8]>,
605{
606    type Output = Str<E>;
607
608    fn index(&self, index: R) -> &Self::Output {
609        self.get(index)
610            .expect("Attempted to slice string at non-character boundary")
611    }
612}
613
614impl<E: Encoding> PartialEq for Str<E> {
615    fn eq(&self, other: &Str<E>) -> bool {
616        self.1 == other.1
617    }
618}
619
620impl<E: Encoding> Eq for Str<E> {}
621
622impl<E: Encoding> Hash for Str<E> {
623    fn hash<H: Hasher>(&self, state: &mut H) {
624        self.1.hash(state)
625    }
626}
627
628impl<E: Encoding> AsRef<[u8]> for Str<E> {
629    fn as_ref(&self) -> &[u8] {
630        self.as_bytes()
631    }
632}
633
634#[cfg(feature = "serde")]
635impl<E: Encoding> Serialize for Str<E> {
636    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
637    where
638        S: Serializer,
639    {
640        <[u8]>::serialize(self.as_bytes(), serializer)
641    }
642}
643
644#[cfg(feature = "serde")]
645impl<'de, E: Encoding> Deserialize<'de> for &'de Str<E> {
646    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
647    where
648        D: Deserializer<'de>,
649    {
650        let bytes = <&'de [u8]>::deserialize(deserializer)?;
651        Str::from_bytes(bytes).map_err(|_| {
652            #[cfg(feature = "alloc")]
653            let msg = &*alloc::format!("a valid string for the {} encoding", E::shorthand());
654            #[cfg(not(feature = "alloc"))]
655            let msg = "a valid string for this encoding";
656            de::Error::invalid_value(Unexpected::Bytes(bytes), &msg)
657        })
658    }
659}
660
661// Encoding-specific implementations
662
663impl<'a> From<&'a Str<Utf8>> for &'a str {
664    fn from(value: &'a Str<Utf8>) -> Self {
665        value.as_std()
666    }
667}
668
669impl<'a> From<&'a str> for &'a Str<Utf8> {
670    fn from(value: &'a str) -> Self {
671        Str::from_std(value)
672    }
673}
674
675impl<'a> From<&'a [char]> for &'a Str<Utf32> {
676    fn from(value: &'a [char]) -> Self {
677        Str::from_chars(value)
678    }
679}
680
681#[cfg(test)]
682mod tests {
683    use super::*;
684    #[cfg(feature = "alloc")]
685    use crate::encoding::{Ascii, Win1252};
686    use alloc::vec::Vec;
687
688    #[test]
689    fn test_chars() {
690        let str = Str::from_std("Abc𐐷d");
691        assert_eq!(&str.chars().collect::<Vec<_>>(), &['A', 'b', 'c', '𐐷', 'd'],);
692
693        let str = Str::<Utf16>::from_utf16(&[
694            b'A' as u16,
695            b'b' as u16,
696            b'c' as u16,
697            0xD801,
698            0xDC37,
699            b'd' as u16,
700        ])
701        .unwrap();
702        assert_eq!(&str.chars().collect::<Vec<_>>(), &['A', 'b', 'c', '𐐷', 'd'],);
703
704        let str = Str::from_chars(&['A', 'b', 'c', '𐐷', 'd']);
705        assert_eq!(&str.chars().collect::<Vec<_>>(), &['A', 'b', 'c', '𐐷', 'd'],);
706    }
707
708    #[test]
709    fn test_char_indices() {
710        let str = Str::from_std("Abc𐐷d");
711        assert_eq!(
712            &str.char_indices().collect::<Vec<_>>(),
713            &[(0, 'A'), (1, 'b'), (2, 'c'), (3, '𐐷'), (7, 'd')],
714        );
715
716        let str = Str::<Utf16>::from_utf16(&[
717            b'A' as u16,
718            b'b' as u16,
719            b'c' as u16,
720            0xD801,
721            0xDC37,
722            b'd' as u16,
723        ])
724        .unwrap();
725        assert_eq!(
726            &str.char_indices().collect::<Vec<_>>(),
727            &[(0, 'A'), (2, 'b'), (4, 'c'), (6, '𐐷'), (10, 'd')],
728        );
729
730        let str = Str::from_chars(&['A', 'b', 'c', '𐐷', 'd']);
731        assert_eq!(
732            &str.char_indices().collect::<Vec<_>>(),
733            &[(0, 'A'), (4, 'b'), (8, 'c'), (12, '𐐷'), (16, 'd')],
734        );
735    }
736
737    #[cfg(feature = "alloc")]
738    #[test]
739    fn test_recode_small_to_large() {
740        let a = Str::from_std("Hello World!");
741        let b = a.recode::<Utf32>().unwrap();
742
743        assert_eq!(
744            &*b,
745            Str::from_chars(&['H', 'e', 'l', 'l', 'o', ' ', 'W', 'o', 'r', 'l', 'd', '!']),
746        );
747
748        let a = Str::from_std("A𐐷b");
749        let b = a.recode::<Utf16>().unwrap();
750
751        assert_eq!(
752            &*b,
753            Str::from_utf16(&[b'A' as u16, 0xD801, 0xDC37, b'b' as u16]).unwrap()
754        );
755    }
756
757    #[cfg(feature = "alloc")]
758    #[test]
759    fn test_recode_invalid_chars() {
760        let a = Str::from_std("A𐐷b");
761        let b = a.recode::<Ascii>();
762
763        assert_eq!(
764            b,
765            Err(RecodeError {
766                valid_up_to: 1,
767                char: '𐐷',
768                char_len: 4,
769            })
770        );
771
772        let a = Str::from_std("€𐐷b");
773        let b = a.recode::<Win1252>();
774
775        assert_eq!(
776            b,
777            Err(RecodeError {
778                valid_up_to: 3,
779                char: '𐐷',
780                char_len: 4,
781            })
782        );
783    }
784
785    #[cfg(feature = "alloc")]
786    #[test]
787    fn test_recode_lossy_invalid_chars() {
788        let a = Str::from_std("A𐐷b");
789        let b = a.recode_lossy::<Ascii>();
790
791        assert_eq!(&*b, Str::from_bytes(b"A\x1Ab").unwrap());
792
793        let a = Str::from_std("€𐐷b");
794        let b = a.recode_lossy::<Win1252>();
795
796        assert_eq!(&*b, Str::from_bytes(b"\x80\x1Ab").unwrap());
797    }
798}
enrede/str.rs

enrede/
str.rs