1#[cfg(feature = "alloc")]
6use alloc::borrow::ToOwned;
7#[cfg(feature = "alloc")]
8use alloc::vec;
9use bytemuck::must_cast_slice as cast_slice;
10use core::cmp::Ordering;
11use core::error::Error;
12use core::fmt::Write;
13use core::hash::{Hash, Hasher};
14use core::marker::PhantomData;
15use core::ops::{Bound, Index, RangeBounds};
16use core::slice::SliceIndex;
17use core::{fmt, mem, ptr, slice};
18#[cfg(feature = "serde")]
19use serde::{
20 de::{self, Unexpected},
21 Deserialize, Deserializer, Serialize, Serializer,
22};
23
24use crate::encoding::{AlwaysValid, Encoding, RecodeCause, Utf16, Utf32, Utf8, ValidateError};
25#[cfg(feature = "alloc")]
26use crate::string::String;
27
28mod iter;
29
30use crate::encoding;
31pub use iter::{CharIndices, Chars};
32
33#[derive(Clone, Debug, PartialEq)]
36pub struct RecodeError {
37 valid_up_to: usize,
38 char: char,
39 char_len: u8,
40}
41
42impl RecodeError {
43 pub fn valid_up_to(&self) -> usize {
46 self.valid_up_to
47 }
48
49 pub fn char(&self) -> char {
52 self.char
53 }
54
55 pub fn char_len(&self) -> usize {
59 self.char_len as usize
60 }
61}
62
63impl fmt::Display for RecodeError {
64 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
65 write!(
66 f,
67 "Error while recoding `Str`: invalid character for output encoding '{}'",
68 self.char
69 )
70 }
71}
72
73impl Error for RecodeError {}
74
75#[derive(Clone, PartialEq)]
78pub struct RecodeIntoError<'a, E: Encoding> {
79 input_used: usize,
80 str: &'a Str<E>,
81 cause: RecodeCause,
82}
83
84impl<'a, E: Encoding> RecodeIntoError<'a, E> {
85 fn from_recode(err: encoding::RecodeError, str: &'a Str<E>) -> Self {
86 RecodeIntoError {
87 input_used: err.input_used(),
88 str,
89 cause: err.cause().clone(),
90 }
91 }
92
93 pub fn valid_up_to(&self) -> usize {
96 self.input_used
97 }
98
99 pub fn output_valid(&self) -> &'a Str<E> {
102 self.str
103 }
104
105 pub fn cause(&self) -> &RecodeCause {
107 &self.cause
108 }
109}
110
111impl<E: Encoding> fmt::Debug for RecodeIntoError<'_, E> {
112 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
113 f.debug_struct("RecodeIntoError")
114 .field("input_used", &self.input_used)
115 .field("str", &self.str)
116 .field("cause", &self.cause)
117 .finish()
118 }
119}
120
121impl<E: Encoding> fmt::Display for RecodeIntoError<'_, E> {
122 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
123 write!(f, "Error while recoding `Str` into buffer: ")?;
124 self.cause.write_cause(f)
125 }
126}
127
128impl<E: Encoding> Error for RecodeIntoError<'_, E> {}
129
130#[repr(transparent)]
144pub struct Str<E>(PhantomData<E>, [u8]);
145
146impl<E: Encoding> Str<E> {
147 pub unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Str<E> {
154 debug_assert!(E::validate(bytes).is_ok());
155 let ptr = ptr::from_ref(bytes) as *const Str<E>;
156 unsafe { &*ptr }
159 }
160
161 pub unsafe fn from_bytes_unchecked_mut(bytes: &mut [u8]) -> &mut Str<E> {
168 debug_assert!(E::validate(bytes).is_ok());
169 let ptr = ptr::from_mut(bytes) as *mut Str<E>;
170 unsafe { &mut *ptr }
173 }
174
175 pub fn from_bytes(bytes: &[u8]) -> Result<&Str<E>, ValidateError> {
178 E::validate(bytes)?;
179 Ok(unsafe { Self::from_bytes_unchecked(bytes) })
181 }
182
183 pub fn from_bytes_mut(bytes: &mut [u8]) -> Result<&mut Str<E>, ValidateError> {
186 E::validate(bytes)?;
187 Ok(unsafe { Self::from_bytes_unchecked_mut(bytes) })
189 }
190
191 pub fn len(&self) -> usize {
193 self.as_bytes().len()
194 }
195
196 pub fn is_empty(&self) -> bool {
198 self.as_bytes().is_empty()
199 }
200
201 pub fn as_bytes(&self) -> &[u8] {
203 &self.1
204 }
205
206 pub unsafe fn as_bytes_mut(&mut self) -> &mut [u8] {
213 &mut self.1
214 }
215
216 fn check_bounds<R>(&self, idx: &R) -> Option<()>
217 where
218 R: RangeBounds<usize>,
219 {
220 let start = idx.start_bound();
221 let end = idx.end_bound();
222
223 let start_idx = match start {
224 Bound::Included(i) => *i,
225 Bound::Excluded(i) => *i + 1,
226 Bound::Unbounded => 0,
227 };
228
229 let end_idx = match end {
230 Bound::Included(i) => *i,
231 Bound::Excluded(i) => *i - 1,
232 Bound::Unbounded => self.as_bytes().len(),
233 };
234
235 if !self.is_char_boundary(start_idx) || !self.is_char_boundary(end_idx) {
236 None
237 } else {
238 Some(())
239 }
240 }
241
242 pub fn get<R>(&self, idx: R) -> Option<&Self>
245 where
246 R: RangeBounds<usize> + SliceIndex<[u8], Output = [u8]>,
247 {
248 self.check_bounds(&idx)?;
249 Some(unsafe { Str::from_bytes_unchecked(self.as_bytes().get(idx)?) })
252 }
253
254 pub unsafe fn get_unchecked<R>(&self, idx: R) -> &Self
261 where
262 R: RangeBounds<usize> + SliceIndex<[u8], Output = [u8]>,
263 {
264 unsafe { Str::from_bytes_unchecked(self.as_bytes().get_unchecked(idx)) }
266 }
267
268 pub fn get_mut<R>(&mut self, idx: R) -> Option<&mut Self>
271 where
272 R: RangeBounds<usize> + SliceIndex<[u8], Output = [u8]>,
273 {
274 self.check_bounds(&idx)?;
275 Some(unsafe { Str::from_bytes_unchecked_mut(self.1.get_mut(idx)?) })
278 }
279
280 pub unsafe fn get_unchecked_mut<R>(&mut self, idx: R) -> &mut Self
287 where
288 R: RangeBounds<usize> + SliceIndex<[u8], Output = [u8]>,
289 {
290 unsafe { Str::from_bytes_unchecked_mut(self.as_bytes_mut().get_unchecked_mut(idx)) }
292 }
293
294 pub fn is_char_boundary(&self, idx: usize) -> bool {
300 match idx.cmp(&self.len()) {
301 Ordering::Equal => true,
302 Ordering::Greater => false,
303 Ordering::Less => E::char_bound(self, idx),
304 }
305 }
306
307 pub fn starts_with(&self, other: &Self) -> bool {
309 self.as_bytes().starts_with(other.as_bytes())
310 }
311
312 pub fn ends_with(&self, other: &Self) -> bool {
314 self.as_bytes().ends_with(other.as_bytes())
315 }
316
317 pub fn chars(&self) -> Chars<'_, E> {
320 Chars::new(self)
321 }
322
323 pub fn char_indices(&self) -> CharIndices<'_, E> {
326 CharIndices::new(self)
327 }
328
329 pub fn copy_from(&mut self, other: &Str<E>) {
331 if self.len() != other.len() {
332 panic!(
333 "Source string length ({}) doesn't match destination string length ({})",
334 other.len(),
335 self.len(),
336 );
337 }
338 self.1.copy_from_slice(other.as_bytes());
339 }
340
341 pub fn split_at(&self, idx: usize) -> Option<(&Str<E>, &Str<E>)> {
344 if self.is_char_boundary(idx) && idx < self.len() {
345 let (start, end) = self.1.split_at(idx);
346 let start = unsafe { Str::from_bytes_unchecked(start) };
348 let end = unsafe { Str::from_bytes_unchecked(end) };
350 Some((start, end))
351 } else {
352 None
353 }
354 }
355
356 pub fn split_at_mut(&mut self, idx: usize) -> Option<(&mut Str<E>, &mut Str<E>)> {
359 if self.is_char_boundary(idx) && idx < self.len() {
360 let (start, end) = self.1.split_at_mut(idx);
361 let start = unsafe { Str::from_bytes_unchecked_mut(start) };
363 let end = unsafe { Str::from_bytes_unchecked_mut(end) };
365 Some((start, end))
366 } else {
367 None
368 }
369 }
370
371 pub fn recode_into<'a, E2: Encoding>(
374 &self,
375 buffer: &'a mut [u8],
376 ) -> Result<&'a Str<E2>, RecodeIntoError<'a, E2>> {
377 E2::recode(self, buffer)
378 .map(|len| {
379 unsafe { Str::from_bytes_unchecked(&buffer[..len]) }
382 })
383 .map_err(|err| {
384 let str = unsafe { Str::from_bytes_unchecked(&buffer[..err.output_valid()]) };
387 RecodeIntoError::from_recode(err, str)
388 })
389 }
390
391 #[cfg(feature = "alloc")]
395 pub fn recode<E2: Encoding>(&self) -> Result<String<E2>, RecodeError> {
396 let mut ptr = self;
397 let mut total_len = 0;
398 let mut out = vec![0; self.1.len()];
399 loop {
400 match E2::recode(ptr, &mut out[total_len..]) {
401 Ok(len) => {
402 out.truncate(total_len + len);
403 return Ok(unsafe { String::<E2>::from_bytes_unchecked(out) });
406 }
407 Err(e) => match e.cause() {
408 RecodeCause::NeedSpace { .. } => {
409 out.resize(out.len() + self.1.len(), 0);
410 ptr = &ptr[e.input_used()..];
411 total_len += e.output_valid();
412 }
413 &RecodeCause::InvalidChar { char, len } => {
414 return Err(RecodeError {
415 valid_up_to: e.input_used(),
416 char,
417 char_len: len as u8,
418 });
419 }
420 },
421 }
422 }
423 }
424
425 #[cfg(feature = "alloc")]
429 pub fn recode_lossy<E2: Encoding>(&self) -> String<E2> {
430 let mut ptr = self;
431 let mut total_len = 0;
432 let mut out = vec![0; self.1.len()];
433 loop {
434 match E2::recode(ptr, &mut out[total_len..]) {
435 Ok(len) => {
436 out.truncate(total_len + len);
437 return unsafe { String::from_bytes_unchecked(out) };
440 }
441 Err(e) => match e.cause() {
442 RecodeCause::NeedSpace { .. } => {
443 out.resize(out.len() + self.1.len(), 0);
444 ptr = &ptr[e.input_used()..];
445 total_len += e.output_valid();
446 }
447 &RecodeCause::InvalidChar { char: _, len } => {
448 let replace_len = E2::char_len(E2::REPLACEMENT);
449 out.resize(out.len() + replace_len, 0);
450 E2::encode(E2::REPLACEMENT, &mut out[total_len + e.output_valid()..])
451 .unwrap();
452 ptr = &ptr[e.input_used() + len..];
453 total_len += e.output_valid() + replace_len;
454 }
455 },
456 }
457 }
458 }
459}
460
461impl<E: AlwaysValid> Str<E> {
462 pub fn from_bytes_infallible(bytes: &[u8]) -> &Str<E> {
467 unsafe { Self::from_bytes_unchecked(bytes) }
469 }
470
471 pub fn from_bytes_infallible_mut(bytes: &mut [u8]) -> &mut Str<E> {
476 unsafe { Self::from_bytes_unchecked_mut(bytes) }
478 }
479}
480
481impl Str<Utf8> {
482 pub unsafe fn from_utf8_unchecked(str: &[u8]) -> &Self {
488 Self::from_bytes_unchecked(str)
490 }
491
492 pub fn from_utf8(str: &[u8]) -> Result<&Self, ValidateError> {
494 Self::from_bytes(str)
495 }
496
497 pub fn from_std(value: &str) -> &Str<Utf8> {
499 unsafe { Self::from_bytes_unchecked(value.as_bytes()) }
501 }
502
503 pub fn as_std(&self) -> &str {
505 unsafe { core::str::from_utf8_unchecked(&self.1) }
507 }
508}
509
510impl Str<Utf16> {
511 pub unsafe fn from_utf16_unchecked(str: &[u16]) -> &Self {
517 Self::from_bytes_unchecked(cast_slice(str))
519 }
520
521 pub fn from_utf16(str: &[u16]) -> Result<&Self, ValidateError> {
523 Self::from_bytes(cast_slice(str))
524 }
525}
526
527impl Str<Utf32> {
528 pub unsafe fn from_utf32_unchecked(str: &[u32]) -> &Self {
534 Self::from_bytes_unchecked(cast_slice(str))
536 }
537
538 pub fn from_utf32(str: &[u32]) -> Result<&Self, ValidateError> {
540 Self::from_bytes(cast_slice(str))
541 }
542
543 pub fn from_chars(str: &[char]) -> &Self {
545 unsafe { Self::from_bytes_unchecked(cast_slice(str)) }
547 }
548
549 pub fn try_chars(&self) -> Option<&[char]> {
552 let len = self.1.len();
553 let ptr = ptr::from_ref(&self.1);
554 if (ptr.cast::<()>() as usize) % mem::align_of::<char>() != 0 {
555 None
556 } else {
557 Some(unsafe { slice::from_raw_parts(ptr.cast(), len / 4) })
560 }
561 }
562}
563
564impl<E: Encoding> fmt::Debug for Str<E> {
565 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
566 write!(f, "\"")?;
567 for c in self.chars() {
568 f.write_char(c)?;
569 }
570 write!(f, "\"{}", E::shorthand())
571 }
572}
573
574impl<E: Encoding> fmt::Display for Str<E> {
575 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
576 for c in self.chars() {
577 f.write_char(c)?;
578 }
579 Ok(())
580 }
581}
582
583impl<E: Encoding> Default for &Str<E> {
584 fn default() -> Self {
585 unsafe { Str::from_bytes_unchecked(&[]) }
587 }
588}
589
590#[cfg(feature = "alloc")]
591impl<E: Encoding> ToOwned for Str<E> {
592 type Owned = String<E>;
593
594 fn to_owned(&self) -> Self::Owned {
595 let bytes = self.as_bytes().to_vec();
596 unsafe { String::from_bytes_unchecked(bytes) }
598 }
599}
600
601impl<E, R> Index<R> for Str<E>
602where
603 E: Encoding,
604 R: RangeBounds<usize> + SliceIndex<[u8], Output = [u8]>,
605{
606 type Output = Str<E>;
607
608 fn index(&self, index: R) -> &Self::Output {
609 self.get(index)
610 .expect("Attempted to slice string at non-character boundary")
611 }
612}
613
614impl<E: Encoding> PartialEq for Str<E> {
615 fn eq(&self, other: &Str<E>) -> bool {
616 self.1 == other.1
617 }
618}
619
620impl<E: Encoding> Eq for Str<E> {}
621
622impl<E: Encoding> Hash for Str<E> {
623 fn hash<H: Hasher>(&self, state: &mut H) {
624 self.1.hash(state)
625 }
626}
627
628impl<E: Encoding> AsRef<[u8]> for Str<E> {
629 fn as_ref(&self) -> &[u8] {
630 self.as_bytes()
631 }
632}
633
634#[cfg(feature = "serde")]
635impl<E: Encoding> Serialize for Str<E> {
636 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
637 where
638 S: Serializer,
639 {
640 <[u8]>::serialize(self.as_bytes(), serializer)
641 }
642}
643
644#[cfg(feature = "serde")]
645impl<'de, E: Encoding> Deserialize<'de> for &'de Str<E> {
646 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
647 where
648 D: Deserializer<'de>,
649 {
650 let bytes = <&'de [u8]>::deserialize(deserializer)?;
651 Str::from_bytes(bytes).map_err(|_| {
652 #[cfg(feature = "alloc")]
653 let msg = &*alloc::format!("a valid string for the {} encoding", E::shorthand());
654 #[cfg(not(feature = "alloc"))]
655 let msg = "a valid string for this encoding";
656 de::Error::invalid_value(Unexpected::Bytes(bytes), &msg)
657 })
658 }
659}
660
661impl<'a> From<&'a Str<Utf8>> for &'a str {
664 fn from(value: &'a Str<Utf8>) -> Self {
665 value.as_std()
666 }
667}
668
669impl<'a> From<&'a str> for &'a Str<Utf8> {
670 fn from(value: &'a str) -> Self {
671 Str::from_std(value)
672 }
673}
674
675impl<'a> From<&'a [char]> for &'a Str<Utf32> {
676 fn from(value: &'a [char]) -> Self {
677 Str::from_chars(value)
678 }
679}
680
681#[cfg(test)]
682mod tests {
683 use super::*;
684 #[cfg(feature = "alloc")]
685 use crate::encoding::{Ascii, Win1252};
686 use alloc::vec::Vec;
687
688 #[test]
689 fn test_chars() {
690 let str = Str::from_std("Abc𐐷d");
691 assert_eq!(&str.chars().collect::<Vec<_>>(), &['A', 'b', 'c', '𐐷', 'd'],);
692
693 let str = Str::<Utf16>::from_utf16(&[
694 b'A' as u16,
695 b'b' as u16,
696 b'c' as u16,
697 0xD801,
698 0xDC37,
699 b'd' as u16,
700 ])
701 .unwrap();
702 assert_eq!(&str.chars().collect::<Vec<_>>(), &['A', 'b', 'c', '𐐷', 'd'],);
703
704 let str = Str::from_chars(&['A', 'b', 'c', '𐐷', 'd']);
705 assert_eq!(&str.chars().collect::<Vec<_>>(), &['A', 'b', 'c', '𐐷', 'd'],);
706 }
707
708 #[test]
709 fn test_char_indices() {
710 let str = Str::from_std("Abc𐐷d");
711 assert_eq!(
712 &str.char_indices().collect::<Vec<_>>(),
713 &[(0, 'A'), (1, 'b'), (2, 'c'), (3, '𐐷'), (7, 'd')],
714 );
715
716 let str = Str::<Utf16>::from_utf16(&[
717 b'A' as u16,
718 b'b' as u16,
719 b'c' as u16,
720 0xD801,
721 0xDC37,
722 b'd' as u16,
723 ])
724 .unwrap();
725 assert_eq!(
726 &str.char_indices().collect::<Vec<_>>(),
727 &[(0, 'A'), (2, 'b'), (4, 'c'), (6, '𐐷'), (10, 'd')],
728 );
729
730 let str = Str::from_chars(&['A', 'b', 'c', '𐐷', 'd']);
731 assert_eq!(
732 &str.char_indices().collect::<Vec<_>>(),
733 &[(0, 'A'), (4, 'b'), (8, 'c'), (12, '𐐷'), (16, 'd')],
734 );
735 }
736
737 #[cfg(feature = "alloc")]
738 #[test]
739 fn test_recode_small_to_large() {
740 let a = Str::from_std("Hello World!");
741 let b = a.recode::<Utf32>().unwrap();
742
743 assert_eq!(
744 &*b,
745 Str::from_chars(&['H', 'e', 'l', 'l', 'o', ' ', 'W', 'o', 'r', 'l', 'd', '!']),
746 );
747
748 let a = Str::from_std("A𐐷b");
749 let b = a.recode::<Utf16>().unwrap();
750
751 assert_eq!(
752 &*b,
753 Str::from_utf16(&[b'A' as u16, 0xD801, 0xDC37, b'b' as u16]).unwrap()
754 );
755 }
756
757 #[cfg(feature = "alloc")]
758 #[test]
759 fn test_recode_invalid_chars() {
760 let a = Str::from_std("A𐐷b");
761 let b = a.recode::<Ascii>();
762
763 assert_eq!(
764 b,
765 Err(RecodeError {
766 valid_up_to: 1,
767 char: '𐐷',
768 char_len: 4,
769 })
770 );
771
772 let a = Str::from_std("€𐐷b");
773 let b = a.recode::<Win1252>();
774
775 assert_eq!(
776 b,
777 Err(RecodeError {
778 valid_up_to: 3,
779 char: '𐐷',
780 char_len: 4,
781 })
782 );
783 }
784
785 #[cfg(feature = "alloc")]
786 #[test]
787 fn test_recode_lossy_invalid_chars() {
788 let a = Str::from_std("A𐐷b");
789 let b = a.recode_lossy::<Ascii>();
790
791 assert_eq!(&*b, Str::from_bytes(b"A\x1Ab").unwrap());
792
793 let a = Str::from_std("€𐐷b");
794 let b = a.recode_lossy::<Win1252>();
795
796 assert_eq!(&*b, Str::from_bytes(b"\x80\x1Ab").unwrap());
797 }
798}