ps_str/lib.rs
1pub mod chars;
2pub mod path;
3
4use std::path::PathBuf;
5
6pub use chars::CHARS;
7pub use path::PathUtf8Encoder;
8
9#[cfg(test)]
10pub mod tests;
11
12pub trait Utf8Encoder {
13 fn to_utf8_string(&self) -> String;
14
15 /// Converts a byte sequence into a [`PathBuf`] using the same decoding rules
16 /// as [`Self::to_utf8_string`].
17 ///
18 /// Valid UTF-8 runs pass through unchanged; other bytes are
19 /// mapped through the [`CHARS`] fallback table. This is **lossy**
20 /// for paths containing non-UTF-8 bytes.
21 ///
22 /// To preserve the original bytes exactly (e.g. when opening an existing
23 /// file with a non-UTF-8 name on Unix), do not use this method. Convert
24 /// the bytes directly into an [`OsString`](std::ffi::OsString) via a platform-specific API
25 /// such as `std::os::unix::ffi::OsStringExt::from_vec`, then into a
26 /// [`PathBuf`].
27 ///
28 /// # Example
29 ///
30 /// ```
31 /// use std::path::PathBuf;
32 /// use ps_str::Utf8Encoder;
33 ///
34 /// let bytes = b"hello.txt";
35 ///
36 /// assert_eq!(bytes.to_utf8_path(), PathBuf::from("hello.txt"));
37 /// ```
38 fn to_utf8_path(&self) -> PathBuf {
39 self.to_utf8_string().into()
40 }
41}
42
43impl<T: AsRef<[u8]>> Utf8Encoder for T {
44 /// Converts a byte sequence into a UTF-8 string, replacing invalid bytes
45 /// with fallback characters.
46 ///
47 /// This method processes bytes sequentially, attempting to parse valid UTF-8
48 /// sequences. When valid UTF-8 is encountered, it is appended directly to the
49 /// result string. When an invalid UTF-8 sequence is detected, the valid portion
50 /// preceding the error is appended, then the offending byte is replaced, and
51 /// processing continues from the next byte.
52 ///
53 /// # Returns
54 ///
55 /// A `String` containing the decoded content, guaranteed to be valid UTF-8
56 /// even if the input contained invalid byte sequences.
57 ///
58 /// # Example
59 ///
60 /// ```
61 /// use ps_str::Utf8Encoder;
62 ///
63 /// let bytes = b"Hello\xFFWorld";
64 /// let result = bytes.to_utf8_string();
65 ///
66 /// assert_eq!(result, "Hello˙World");
67 /// ```
68 fn to_utf8_string(&self) -> String {
69 let bytes = self.as_ref();
70 let mut result = String::with_capacity(bytes.len());
71 let mut i = 0;
72
73 while i < bytes.len() {
74 match std::str::from_utf8(&bytes[i..]) {
75 Ok(valid_str) => {
76 result.push_str(valid_str);
77 break;
78 }
79 Err(e) => {
80 if e.valid_up_to() > 0 {
81 let valid_part = &bytes[i..i + e.valid_up_to()];
82
83 // SAFETY: valid_up_to() guarantees this slice contains only complete, valid UTF-8 characters
84 result.push_str(unsafe { std::str::from_utf8_unchecked(valid_part) });
85 i += e.valid_up_to();
86 }
87
88 if let Some(c) = bytes.get(i) {
89 result.push(CHARS[*c as usize]);
90 i += 1;
91 }
92 }
93 }
94 }
95
96 result
97 }
98}