ps_str/lib.rs
1pub mod chars;
2pub use chars::CHARS;
3
4#[cfg(test)]
5pub mod tests;
6
7pub trait Utf8Encoder {
8 fn to_utf8_string(&self) -> String;
9}
10
11impl<T: AsRef<[u8]>> Utf8Encoder for T {
12 /// Converts a byte sequence into a UTF-8 string, replacing invalid bytes
13 /// with fallback characters.
14 ///
15 /// This method processes bytes sequentially, attempting to parse valid UTF-8
16 /// sequences. When valid UTF-8 is encountered, it is appended directly to the
17 /// result string. When an invalid UTF-8 sequence is detected, the valid portion
18 /// preceding the error is appended, then the offending byte is replaced, and
19 /// processing continues from the next byte.
20 ///
21 /// # Returns
22 ///
23 /// A `String` containing the decoded content, guaranteed to be valid UTF-8
24 /// even if the input contained invalid byte sequences.
25 ///
26 /// # Example
27 ///
28 /// ```
29 /// use ps_str::Utf8Encoder;
30 ///
31 /// let bytes = b"Hello\xFFWorld";
32 /// let result = bytes.to_utf8_string();
33 ///
34 /// assert_eq!(result, "Hello˙World");
35 /// ```
36 fn to_utf8_string(&self) -> String {
37 let bytes = self.as_ref();
38 let mut result = String::with_capacity(bytes.len());
39 let mut i = 0;
40
41 while i < bytes.len() {
42 match std::str::from_utf8(&bytes[i..]) {
43 Ok(valid_str) => {
44 result.push_str(valid_str);
45 break;
46 }
47 Err(e) => {
48 if e.valid_up_to() > 0 {
49 let valid_part = &bytes[i..i + e.valid_up_to()];
50
51 // SAFETY: valid_up_to() guarantees this slice contains only complete, valid UTF-8 characters
52 result.push_str(unsafe { std::str::from_utf8_unchecked(valid_part) });
53 i += e.valid_up_to();
54 }
55
56 if let Some(c) = bytes.get(i) {
57 result.push(CHARS[*c as usize]);
58 i += 1;
59 }
60 }
61 }
62 }
63
64 result
65 }
66}