Skip to main content

ps_str/
lib.rs

1pub mod chars;
2pub mod path;
3
4use std::path::PathBuf;
5
6pub use chars::CHARS;
7pub use path::PathUtf8Encoder;
8
9#[cfg(test)]
10pub mod tests;
11
12pub trait Utf8Encoder {
13    fn to_utf8_string(&self) -> String;
14
15    /// Converts a byte sequence into a [`PathBuf`] using the same decoding rules
16    /// as [`Self::to_utf8_string`].
17    ///
18    /// Valid UTF-8 runs pass through unchanged; other bytes are
19    /// mapped through the [`CHARS`] fallback table. This is **lossy**
20    /// for paths containing non-UTF-8 bytes.
21    ///
22    /// To preserve the original bytes exactly (e.g. when opening an existing
23    /// file with a non-UTF-8 name on Unix), do not use this method. Convert
24    /// the bytes directly into an [`OsString`](std::ffi::OsString) via a platform-specific API
25    /// such as `std::os::unix::ffi::OsStringExt::from_vec`, then into a
26    /// [`PathBuf`].
27    ///
28    /// # Example
29    ///
30    /// ```
31    /// use std::path::PathBuf;
32    /// use ps_str::Utf8Encoder;
33    ///
34    /// let bytes = b"hello.txt";
35    ///
36    /// assert_eq!(bytes.to_utf8_path(), PathBuf::from("hello.txt"));
37    /// ```
38    fn to_utf8_path(&self) -> PathBuf {
39        self.to_utf8_string().into()
40    }
41}
42
43impl<T: AsRef<[u8]>> Utf8Encoder for T {
44    /// Converts a byte sequence into a UTF-8 string, replacing invalid bytes
45    /// with fallback characters.
46    ///
47    /// This method processes bytes sequentially, attempting to parse valid UTF-8
48    /// sequences. When valid UTF-8 is encountered, it is appended directly to the
49    /// result string. When an invalid UTF-8 sequence is detected, the valid portion
50    /// preceding the error is appended, then the offending byte is replaced, and
51    /// processing continues from the next byte.
52    ///
53    /// # Returns
54    ///
55    /// A `String` containing the decoded content, guaranteed to be valid UTF-8
56    /// even if the input contained invalid byte sequences.
57    ///
58    /// # Example
59    ///
60    /// ```
61    /// use ps_str::Utf8Encoder;
62    ///
63    /// let bytes = b"Hello\xFFWorld";
64    /// let result = bytes.to_utf8_string();
65    ///
66    /// assert_eq!(result, "Hello˙World");
67    /// ```
68    fn to_utf8_string(&self) -> String {
69        let bytes = self.as_ref();
70        let mut result = String::with_capacity(bytes.len());
71        let mut i = 0;
72
73        while i < bytes.len() {
74            match std::str::from_utf8(&bytes[i..]) {
75                Ok(valid_str) => {
76                    result.push_str(valid_str);
77                    break;
78                }
79                Err(e) => {
80                    if e.valid_up_to() > 0 {
81                        let valid_part = &bytes[i..i + e.valid_up_to()];
82
83                        // SAFETY: valid_up_to() guarantees this slice contains only complete, valid UTF-8 characters
84                        result.push_str(unsafe { std::str::from_utf8_unchecked(valid_part) });
85                        i += e.valid_up_to();
86                    }
87
88                    if let Some(c) = bytes.get(i) {
89                        result.push(CHARS[*c as usize]);
90                        i += 1;
91                    }
92                }
93            }
94        }
95
96        result
97    }
98}