1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
/*!
Provides a newtype wrapper [MaybeString] and its slice counterpart [MaybeStr] that represents a byte vector that may be a valid UTF-8 string.

These wrappers are useful when working with data that may be a valid UTF-8 string and you want to delay or conditionally skip its conversion to the string.

They are also useful for debugging data that may be displayed as a string.
The `Debug` output will provide string representation when the wrapped byte vector is a valid UTF-8 string.

# Usage examples

## Debugging byte vectors

```
use maybe_string::MaybeString;

// invalid UTF-8 bytes
let ms = MaybeString(vec![0, 159, 146, 150]);
assert_eq!(&format!("{:?}", ms), "[00, 9f, 92, 96]");

// valid UTF-8 bytes
let ms = MaybeString(vec![240, 159, 146, 150]);
assert_eq!(&format!("{:?}", ms), "\"💖\"");
```

## Converting to a string

```
use maybe_string::MaybeString;

// invalid UTF-8 bytes
let ms = MaybeString(vec![0, 159, 146, 150]);
assert_eq!(ms.into_string(), Err(vec![0, 159, 146, 150]));

// valid UTF-8 bytes
let ms = MaybeString(vec![240, 159, 146, 150]);
assert_eq!(ms.into_string(), Ok("💖".to_string()));
```

## Serde

Implementations of `Serialize` and `Deserialize` for [MaybeString] and `Serialize` for [MaybeStr] can be enabled with `serde` feature flag.

*/

use std::str::from_utf8;
use std::fmt::{self, Debug, Display};
use std::ops::Deref;
use std::str::FromStr;
use std::convert::Infallible;
use std::borrow::{Borrow, Cow};

#[cfg(feature = "serde")]
use serde::{Serialize, Deserialize};

/// A newtype wrapper that represents a byte vector that may be a valid UTF-8 string.
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(PartialEq, Eq, PartialOrd, Ord, Hash, Default, Clone)]
pub struct MaybeString(pub Vec<u8>);

impl MaybeString {
    /// Wraps a vector of bytes into a `MaybeString`.
    pub fn from_bytes(bytes: Vec<u8>) -> MaybeString {
        MaybeString(bytes)
    }

    /// Unwraps the byte vector.
    pub fn into_bytes(self) -> Vec<u8> {
        self.0
    }

    /// Extracts a `MaybeStr` slice containing the entire content of the wrapped vector of bytes.
    pub fn as_maybe_str(&self) -> &MaybeStr {
        self
    }

    /// Converts the wrapped vector of bytes into a `String`, including invalid characters.
    pub fn into_lossy_string(self) -> String {
        // make sure only one check and one copy is done at most
        unsafe {
            match String::from_utf8_lossy(&self.0) {
                // self.0 is a valid UTF-8 string
                Cow::Borrowed(_) => String::from_utf8_unchecked(self.0),
                // lossy string was made
                Cow::Owned(string) => string,
            }
        }
    }

    /// Converts the wrapped vector of bytes to a `String`.
    ///
    /// If wrapped byte vector is not a valid UTF-8 string the `Err` variant containing the wrapped vector of bytes is returned.
    pub fn into_string(self) -> Result<String, Vec<u8>> {
        match String::from_utf8(self.0) {
            Ok(string) => Ok(string),
            Err(err) => Err(err.into_bytes()),
        }
    }
}

impl Deref for MaybeString {
    type Target = MaybeStr;

    fn deref(&self) -> &MaybeStr {
        MaybeStr::from_bytes(self.0.as_slice())
    }
}

impl Borrow<MaybeStr> for MaybeString {
    fn borrow(&self) -> &MaybeStr {
        self.as_ref()
    }
}

impl AsRef<MaybeStr> for MaybeString {
    fn as_ref(&self) -> &MaybeStr {
        self
    }
}

impl AsRef<[u8]> for MaybeString {
    fn as_ref(&self) -> &[u8] {
        self.as_bytes()
    }
}

impl Debug for MaybeString {
    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
        write!(f, "{:?}", self.as_maybe_str())
    }
}

impl Display for MaybeString {
    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
        write!(f, "{}", self.as_maybe_str())
    }
}

impl From<Vec<u8>> for MaybeString {
    fn from(data: Vec<u8>) -> MaybeString {
        MaybeString(data)
    }
}

impl From<String> for MaybeString {
    fn from(data: String) -> MaybeString {
        MaybeString(data.into_bytes())
    }
}

impl<'b> From<&'b [u8]> for MaybeString {
    fn from(data: &[u8]) -> MaybeString {
        MaybeString::from(data.to_vec())
    }
}

impl<'b> From<&'b str> for MaybeString {
    fn from(data: &str) -> MaybeString {
        MaybeString::from(data.to_string())
    }
}

impl FromStr for MaybeString {
    type Err = Infallible;

    fn from_str(s: &str) -> Result<Self, Self::Err> {
        Ok(MaybeString::from(s))
    }
}

/// [MaybeString] slices.
#[cfg_attr(feature = "serde", derive(Serialize))]
#[derive(PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct MaybeStr([u8]);

impl MaybeStr {
    /// Wraps a slice of bytes into a `MaybeStr`.
    pub fn from_bytes(bytes: &[u8]) -> &MaybeStr {
        unsafe { &*((bytes as *const [u8]) as *const MaybeStr)}
    }

    /// Returns the entire wrapped byte slice.
    pub fn as_bytes(&self) -> &[u8] {
        self
    }

    /// Converts the wrapped byte slice into a wrapped vector of bytes.
    pub fn to_maybe_string(&self) -> MaybeString {
        MaybeString(self.as_bytes().to_owned())
    }

    /// Converts the wrapped byte slice into a `String`, including invalid characters.
    pub fn to_lossy_string(&self) -> String {
        String::from_utf8_lossy(&self.0).into_owned()
    }

    /// Converts the wrapped byte slice to a string slice.
    ///
    /// If wrapped byte slice is not a valid UTF-8 string the `Err` variant containing the wrapped byte slice is returned.
    pub fn to_string(&self) -> Result<&str, &[u8]> {
        match std::str::from_utf8(&self.0) {
            Ok(string) => Ok(string),
            Err(_) => Err(&self.0),
        }
    }
}

impl Debug for MaybeStr {
    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
        if let Ok(string) = from_utf8(self.as_bytes()) {
            write!(f, "{:?}", string)
        } else {
            write!(f, "{:02x?}", &self.as_bytes())
        }
    }
}

impl Display for MaybeStr {
    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
        write!(f, "{}", String::from_utf8_lossy(&self.as_bytes()))
    }
}

impl Deref for MaybeStr {
    type Target = [u8];

    fn deref(&self) -> &[u8] {
        &self.0
    }
}

impl AsRef<[u8]> for MaybeStr {
    fn as_ref(&self) -> &[u8] {
        self.as_bytes()
    }
}

impl ToOwned for MaybeStr {
    type Owned = MaybeString;

    fn to_owned(&self) -> Self::Owned {
        self.to_maybe_string()
    }
}

impl<'b> From<&'b [u8]> for &'b MaybeStr {
    fn from(data: &[u8]) -> &MaybeStr {
        MaybeStr::from_bytes(data)
    }
}

impl<'b> From<&'b str> for &'b MaybeStr {
    fn from(data: &str) -> &MaybeStr {
        MaybeStr::from_bytes(data.as_bytes())
    }
}