maybe_string/
lib.rs

1/*!
2Provides a newtype wrapper [MaybeString] and its slice counterpart [MaybeStr] that represents a byte vector that may be a valid UTF-8 string.
3
4These wrappers are useful when working with data that may be a valid UTF-8 string and you want to delay or conditionally skip its conversion to the string.
5
6They are also useful for debugging data that may be displayed as a string.
7The `Debug` output will provide string representation when the wrapped byte vector is a valid UTF-8 string.
8
9# Usage examples
10
11## Debugging byte vectors
12
13```
14use maybe_string::MaybeString;
15
16// invalid UTF-8 bytes
17let ms = MaybeString(vec![0, 159, 146, 150]);
18assert_eq!(&format!("{:?}", ms), "[00, 9f, 92, 96]");
19
20// valid UTF-8 bytes
21let ms = MaybeString(vec![240, 159, 146, 150]);
22assert_eq!(&format!("{:?}", ms), "\"💖\"");
23```
24
25## Converting to a string
26
27```
28use maybe_string::MaybeString;
29
30// invalid UTF-8 bytes
31let ms = MaybeString(vec![0, 159, 146, 150]);
32assert_eq!(ms.into_string(), Err(vec![0, 159, 146, 150]));
33
34// valid UTF-8 bytes
35let ms = MaybeString(vec![240, 159, 146, 150]);
36assert_eq!(ms.into_string(), Ok("💖".to_string()));
37```
38
39## Serde
40
41Implementations of `Serialize` and `Deserialize` for [MaybeString] and `Serialize` for [MaybeStr] can be enabled with `serde` feature flag.
42
43*/
44
45use std::str::from_utf8;
46use std::fmt::{self, Debug, Display};
47use std::ops::Deref;
48use std::str::FromStr;
49use std::convert::Infallible;
50use std::borrow::{Borrow, Cow};
51
52#[cfg(feature = "serde")]
53use serde::{Serialize, Deserialize};
54
55/// A newtype wrapper that represents a byte vector that may be a valid UTF-8 string.
56#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
57#[derive(PartialEq, Eq, PartialOrd, Ord, Hash, Default, Clone)]
58pub struct MaybeString(pub Vec<u8>);
59
60impl MaybeString {
61    /// Wraps a vector of bytes into a `MaybeString`.
62    pub fn from_bytes(bytes: Vec<u8>) -> MaybeString {
63        MaybeString(bytes)
64    }
65
66    /// Unwraps the byte vector.
67    pub fn into_bytes(self) -> Vec<u8> {
68        self.0
69    }
70
71    /// Extracts a `MaybeStr` slice containing the entire content of the wrapped vector of bytes.
72    pub fn as_maybe_str(&self) -> &MaybeStr {
73        self
74    }
75
76    /// Converts the wrapped vector of bytes into a `String`, including invalid characters.
77    pub fn into_lossy_string(self) -> String {
78        // make sure only one check and one copy is done at most
79        unsafe {
80            match String::from_utf8_lossy(&self.0) {
81                // self.0 is a valid UTF-8 string
82                Cow::Borrowed(_) => String::from_utf8_unchecked(self.0),
83                // lossy string was made
84                Cow::Owned(string) => string,
85            }
86        }
87    }
88
89    /// Converts the wrapped vector of bytes to a `String`.
90    ///
91    /// If wrapped byte vector is not a valid UTF-8 string the `Err` variant containing the wrapped vector of bytes is returned.
92    pub fn into_string(self) -> Result<String, Vec<u8>> {
93        match String::from_utf8(self.0) {
94            Ok(string) => Ok(string),
95            Err(err) => Err(err.into_bytes()),
96        }
97    }
98}
99
100impl Deref for MaybeString {
101    type Target = MaybeStr;
102
103    fn deref(&self) -> &MaybeStr {
104        MaybeStr::from_bytes(self.0.as_slice())
105    }
106}
107
108impl Borrow<MaybeStr> for MaybeString {
109    fn borrow(&self) -> &MaybeStr {
110        self.as_ref()
111    }
112}
113
114impl AsRef<MaybeStr> for MaybeString {
115    fn as_ref(&self) -> &MaybeStr {
116        self
117    }
118}
119
120impl AsRef<[u8]> for MaybeString {
121    fn as_ref(&self) -> &[u8] {
122        self.as_bytes()
123    }
124}
125
126impl Debug for MaybeString {
127    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
128        write!(f, "{:?}", self.as_maybe_str())
129    }
130}
131
132impl Display for MaybeString {
133    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
134        write!(f, "{}", self.as_maybe_str())
135    }
136}
137
138impl From<Vec<u8>> for MaybeString {
139    fn from(data: Vec<u8>) -> MaybeString {
140        MaybeString(data)
141    }
142}
143
144impl From<String> for MaybeString {
145    fn from(data: String) -> MaybeString {
146        MaybeString(data.into_bytes())
147    }
148}
149
150impl<'b> From<&'b [u8]> for MaybeString {
151    fn from(data: &[u8]) -> MaybeString {
152        MaybeString::from(data.to_vec())
153    }
154}
155
156impl<'b> From<&'b str> for MaybeString {
157    fn from(data: &str) -> MaybeString {
158        MaybeString::from(data.to_string())
159    }
160}
161
162impl FromStr for MaybeString {
163    type Err = Infallible;
164
165    fn from_str(s: &str) -> Result<Self, Self::Err> {
166        Ok(MaybeString::from(s))
167    }
168}
169
170/// [MaybeString] slices.
171#[cfg_attr(feature = "serde", derive(Serialize))]
172#[derive(PartialEq, Eq, PartialOrd, Ord, Hash)]
173pub struct MaybeStr([u8]);
174
175impl MaybeStr {
176    /// Wraps a slice of bytes into a `MaybeStr`.
177    pub fn from_bytes(bytes: &[u8]) -> &MaybeStr {
178        unsafe { &*((bytes as *const [u8]) as *const MaybeStr)}
179    }
180
181    /// Returns the entire wrapped byte slice.
182    pub fn as_bytes(&self) -> &[u8] {
183        self
184    }
185
186    /// Converts the wrapped byte slice into a wrapped vector of bytes.
187    pub fn to_maybe_string(&self) -> MaybeString {
188        MaybeString(self.as_bytes().to_owned())
189    }
190
191    /// Converts the wrapped byte slice into a `String`, including invalid characters.
192    pub fn to_lossy_string(&self) -> String {
193        String::from_utf8_lossy(&self.0).into_owned()
194    }
195
196    /// Converts the wrapped byte slice to a string slice.
197    ///
198    /// If wrapped byte slice is not a valid UTF-8 string the `Err` variant containing the wrapped byte slice is returned.
199    pub fn to_string(&self) -> Result<&str, &[u8]> {
200        match std::str::from_utf8(&self.0) {
201            Ok(string) => Ok(string),
202            Err(_) => Err(&self.0),
203        }
204    }
205}
206
207impl Debug for MaybeStr {
208    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
209        if let Ok(string) = from_utf8(self.as_bytes()) {
210            write!(f, "{:?}", string)
211        } else {
212            write!(f, "{:02x?}", &self.as_bytes())
213        }
214    }
215}
216
217impl Display for MaybeStr {
218    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
219        write!(f, "{}", String::from_utf8_lossy(&self.as_bytes()))
220    }
221}
222
223impl Deref for MaybeStr {
224    type Target = [u8];
225
226    fn deref(&self) -> &[u8] {
227        &self.0
228    }
229}
230
231impl AsRef<[u8]> for MaybeStr {
232    fn as_ref(&self) -> &[u8] {
233        self.as_bytes()
234    }
235}
236
237impl ToOwned for MaybeStr {
238    type Owned = MaybeString;
239
240    fn to_owned(&self) -> Self::Owned {
241        self.to_maybe_string()
242    }
243}
244
245impl<'b> From<&'b [u8]> for &'b MaybeStr {
246    fn from(data: &[u8]) -> &MaybeStr {
247        MaybeStr::from_bytes(data)
248    }
249}
250
251impl<'b> From<&'b str> for &'b MaybeStr {
252    fn from(data: &str) -> &MaybeStr {
253        MaybeStr::from_bytes(data.as_bytes())
254    }
255}