maybe-string 0.1.0

Provides a newtype wrapper that represents a byte vector that may be a valid UTF-8 string
Documentation
/*!
Provides a newtype wrapper [MaybeString] and its slice counterpart [MaybeStr] that represents a byte vector that may be a valid UTF-8 string.

These wrappers are useful when working with data that may be a valid UTF-8 string and you want to delay or conditionally skip its conversion to the string.

They are also useful for debugging data that may be displayed as a string.
The `Debug` output will provide string representation when the wrapped byte vector is a valid UTF-8 string.

# Usage examples

## Debugging byte vectors

```
use maybe_string::MaybeString;

// invalid UTF-8 bytes
let ms = MaybeString(vec![0, 159, 146, 150]);
assert_eq!(&format!("{:?}", ms), "[00, 9f, 92, 96]");

// valid UTF-8 bytes
let ms = MaybeString(vec![240, 159, 146, 150]);
assert_eq!(&format!("{:?}", ms), "\"💖\"");
```

## Converting to a string

```
use maybe_string::MaybeString;

// invalid UTF-8 bytes
let ms = MaybeString(vec![0, 159, 146, 150]);
assert_eq!(ms.into_string(), Err(vec![0, 159, 146, 150]));

// valid UTF-8 bytes
let ms = MaybeString(vec![240, 159, 146, 150]);
assert_eq!(ms.into_string(), Ok("💖".to_string()));
```

## Serde

Implementations of `Serialize` and `Deserialize` for [MaybeString] and `Serialize` for [MaybeStr] can be enabled with `serde` feature flag.

*/

use std::str::from_utf8;
use std::fmt::{self, Debug, Display};
use std::ops::Deref;
use std::str::FromStr;
use std::convert::Infallible;
use std::borrow::{Borrow, Cow};

#[cfg(feature = "serde")]
use serde::{Serialize, Deserialize};

/// A newtype wrapper that represents a byte vector that may be a valid UTF-8 string.
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(PartialEq, Eq, PartialOrd, Ord, Hash, Default, Clone)]
pub struct MaybeString(pub Vec<u8>);

impl MaybeString {
    /// Wraps a vector of bytes into a `MaybeString`.
    pub fn from_bytes(bytes: Vec<u8>) -> MaybeString {
        MaybeString(bytes)
    }

    /// Unwraps the byte vector.
    pub fn into_bytes(self) -> Vec<u8> {
        self.0
    }

    /// Extracts a `MaybeStr` slice containing the entire content of the wrapped vector of bytes.
    pub fn as_maybe_str(&self) -> &MaybeStr {
        self
    }

    /// Converts the wrapped vector of bytes into a `String`, including invalid characters.
    pub fn into_lossy_string(self) -> String {
        // make sure only one check and one copy is done at most
        unsafe {
            match String::from_utf8_lossy(&self.0) {
                // self.0 is a valid UTF-8 string
                Cow::Borrowed(_) => String::from_utf8_unchecked(self.0),
                // lossy string was made
                Cow::Owned(string) => string,
            }
        }
    }

    /// Converts the wrapped vector of bytes to a `String`.
    ///
    /// If wrapped byte vector is not a valid UTF-8 string the `Err` variant containing the wrapped vector of bytes is returned.
    pub fn into_string(self) -> Result<String, Vec<u8>> {
        match String::from_utf8(self.0) {
            Ok(string) => Ok(string),
            Err(err) => Err(err.into_bytes()),
        }
    }
}

impl Deref for MaybeString {
    type Target = MaybeStr;

    fn deref(&self) -> &MaybeStr {
        MaybeStr::from_bytes(self.0.as_slice())
    }
}

impl Borrow<MaybeStr> for MaybeString {
    fn borrow(&self) -> &MaybeStr {
        self.as_ref()
    }
}

impl AsRef<MaybeStr> for MaybeString {
    fn as_ref(&self) -> &MaybeStr {
        self
    }
}

impl AsRef<[u8]> for MaybeString {
    fn as_ref(&self) -> &[u8] {
        self.as_bytes()
    }
}

impl Debug for MaybeString {
    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
        write!(f, "{:?}", self.as_maybe_str())
    }
}

impl Display for MaybeString {
    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
        write!(f, "{}", self.as_maybe_str())
    }
}

impl From<Vec<u8>> for MaybeString {
    fn from(data: Vec<u8>) -> MaybeString {
        MaybeString(data)
    }
}

impl From<String> for MaybeString {
    fn from(data: String) -> MaybeString {
        MaybeString(data.into_bytes())
    }
}

impl<'b> From<&'b [u8]> for MaybeString {
    fn from(data: &[u8]) -> MaybeString {
        MaybeString::from(data.to_vec())
    }
}

impl<'b> From<&'b str> for MaybeString {
    fn from(data: &str) -> MaybeString {
        MaybeString::from(data.to_string())
    }
}

impl FromStr for MaybeString {
    type Err = Infallible;

    fn from_str(s: &str) -> Result<Self, Self::Err> {
        Ok(MaybeString::from(s))
    }
}

/// [MaybeString] slices.
#[cfg_attr(feature = "serde", derive(Serialize))]
#[derive(PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct MaybeStr([u8]);

impl MaybeStr {
    /// Wraps a slice of bytes into a `MaybeStr`.
    pub fn from_bytes(bytes: &[u8]) -> &MaybeStr {
        unsafe { &*((bytes as *const [u8]) as *const MaybeStr)}
    }

    /// Returns the entire wrapped byte slice.
    pub fn as_bytes(&self) -> &[u8] {
        self
    }

    /// Converts the wrapped byte slice into a wrapped vector of bytes.
    pub fn to_maybe_string(&self) -> MaybeString {
        MaybeString(self.as_bytes().to_owned())
    }

    /// Converts the wrapped byte slice into a `String`, including invalid characters.
    pub fn to_lossy_string(&self) -> String {
        String::from_utf8_lossy(&self.0).into_owned()
    }

    /// Converts the wrapped byte slice to a string slice.
    ///
    /// If wrapped byte slice is not a valid UTF-8 string the `Err` variant containing the wrapped byte slice is returned.
    pub fn to_string(&self) -> Result<&str, &[u8]> {
        match std::str::from_utf8(&self.0) {
            Ok(string) => Ok(string),
            Err(_) => Err(&self.0),
        }
    }
}

impl Debug for MaybeStr {
    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
        if let Ok(string) = from_utf8(self.as_bytes()) {
            write!(f, "{:?}", string)
        } else {
            write!(f, "{:02x?}", &self.as_bytes())
        }
    }
}

impl Display for MaybeStr {
    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
        write!(f, "{}", String::from_utf8_lossy(&self.as_bytes()))
    }
}

impl Deref for MaybeStr {
    type Target = [u8];

    fn deref(&self) -> &[u8] {
        &self.0
    }
}

impl AsRef<[u8]> for MaybeStr {
    fn as_ref(&self) -> &[u8] {
        self.as_bytes()
    }
}

impl ToOwned for MaybeStr {
    type Owned = MaybeString;

    fn to_owned(&self) -> Self::Owned {
        self.to_maybe_string()
    }
}

impl<'b> From<&'b [u8]> for &'b MaybeStr {
    fn from(data: &[u8]) -> &MaybeStr {
        MaybeStr::from_bytes(data)
    }
}

impl<'b> From<&'b str> for &'b MaybeStr {
    fn from(data: &str) -> &MaybeStr {
        MaybeStr::from_bytes(data.as_bytes())
    }
}