ps_str/
lib.rs

1pub mod chars;
2pub use chars::CHARS;
3
4#[cfg(test)]
5pub mod tests;
6
7pub trait Utf8Encoder {
8    fn to_utf8_string(&self) -> String;
9}
10
11impl<T: AsRef<[u8]>> Utf8Encoder for T {
12    /// Converts a byte sequence into a UTF-8 string, replacing invalid bytes
13    /// with fallback characters.
14    ///
15    /// This method processes bytes sequentially, attempting to parse valid UTF-8
16    /// sequences. When valid UTF-8 is encountered, it is appended directly to the
17    /// result string. When an invalid UTF-8 sequence is detected, the valid portion
18    /// preceding the error is appended, then the offending byte is replaced, and
19    /// processing continues from the next byte.
20    ///
21    /// # Returns
22    ///
23    /// A `String` containing the decoded content, guaranteed to be valid UTF-8
24    /// even if the input contained invalid byte sequences.
25    ///
26    /// # Example
27    ///
28    /// ```
29    /// use ps_str::Utf8Encoder;
30    ///
31    /// let bytes = b"Hello\xFFWorld";
32    /// let result = bytes.to_utf8_string();
33    ///
34    /// assert_eq!(result, "Hello˙World");
35    /// ```
36    fn to_utf8_string(&self) -> String {
37        let bytes = self.as_ref();
38        let mut result = String::with_capacity(bytes.len());
39        let mut i = 0;
40
41        while i < bytes.len() {
42            match std::str::from_utf8(&bytes[i..]) {
43                Ok(valid_str) => {
44                    result.push_str(valid_str);
45                    break;
46                }
47                Err(e) => {
48                    if e.valid_up_to() > 0 {
49                        let valid_part = &bytes[i..i + e.valid_up_to()];
50
51                        // SAFETY: valid_up_to() guarantees this slice contains only complete, valid UTF-8 characters
52                        result.push_str(unsafe { std::str::from_utf8_unchecked(valid_part) });
53                        i += e.valid_up_to();
54                    }
55
56                    if let Some(c) = bytes.get(i) {
57                        result.push(CHARS[*c as usize]);
58                        i += 1;
59                    }
60                }
61            }
62        }
63
64        result
65    }
66}