widestring 0.2.2

A wide string FFI library for converting to and from Windows Wide "Unicode" (UTF-16) strings.
Documentation
use std;
use std::mem;
use std::ffi::{OsString, OsStr};
use super::platform;

/// An owned, mutable "wide" string for windows FFI that is **not** nul-aware.
///
/// `WideString` is not aware of nul values. Strings may or may not be nul-terminated, and may
/// contain invalid and ill-formed UTF-16 data. These strings are intended to be used with windows
/// FFI functions that directly use string length, where the strings are known to have proper
/// nul-termination already, or where strings are merely being passed through without modification.
///
/// `WideCString` should be used instead if nul-aware strings are required.
///
/// `WideString` can be converted to and from many other string types, including `OsString` and
/// `String`, making proper Unicode windows FFI safe and easy.
///
/// # Examples
///
/// The following example constructs a `WideString` and shows how to convert a `WideString` to a
/// regular Rust `String`.
///
/// ```rust
/// use widestring::WideString;
/// let v = vec![84u16, 104u16, 101u16]; // 'T' 'h' 'e'
/// // Create a wide string from the vector
/// let wstr = WideString::from_vec(v);
/// // Convert to a rust string!
/// let rust_str = wstr.to_string_lossy();
/// assert_eq!(rust_str, "The");
/// ```
#[derive(Debug, Default, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct WideString {
    inner: Vec<u16>,
}

/// Wide string reference for `WideString`.
///
/// `WideStr` is aware of nul values. Strings may or may not be nul-terminated, and may
/// contain invalid and ill-formed UTF-16 data. These strings are intended to be used with windows
/// FFI functions that directly use string length, where the strings are known to have proper
/// nul-termination already, or where strings are merely being passed through without modification.
///
/// `WideCStr` should be used instead of nul-aware strings are required.
///
/// `WideStr` can be converted to many other string types, including `OsString` and `String`, making
/// proper Unicode windows FFI safe and easy.
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct WideStr {
    inner: [u16],
}

impl WideString {
    /// Constructs a new empty `WideString`.
    pub fn new() -> WideString {
        WideString { inner: vec![] }
    }

    /// Constructs a `WideString` from a vector of possibly invalid or ill-formed UTF-16 data.
    ///
    /// No checks are made on the contents of the vector.
    ///
    /// # Examples
    ///
    /// ```rust
    /// use widestring::WideString;
    /// let v = vec![84u16, 104u16, 101u16]; // 'T' 'h' 'e'
    /// # let cloned = v.clone();
    /// // Create a wide string from the vector
    /// let wstr = WideString::from_vec(v);
    /// # assert_eq!(wstr.into_vec(), cloned);
    /// ```
    pub fn from_vec<T: Into<Vec<u16>>>(raw: T) -> WideString {
        WideString { inner: raw.into() }
    }

    /// Encodes a `WideString` copy from an `OsStr`.
    ///
    /// This makes a wide string copy of the `OsStr`. Since `OsStr` makes no guaruntees that it is
    /// valid data, there is no guaruntee that the resulting `WideString` will be valid UTF-16.
    ///
    /// # Examples
    ///
    /// ```rust
    /// use widestring::WideString;
    /// let s = "MyString";
    /// // Create a wide string from the string
    /// let wstr = WideString::from_str(s);
    ///
    /// assert_eq!(wstr.to_string().unwrap(), s);
    /// ```
    pub fn from_str<S: AsRef<OsStr> + ?Sized>(s: &S) -> WideString {
        WideString { inner: platform::os_to_wide(s.as_ref()) }
    }

    /// Constructs a `WideString` from a `u16` pointer and a length.
    ///
    /// The `len` argument is the number of `u16` elements, **not** the number of bytes.
    ///
    /// # Safety
    ///
    /// This function is unsafe as there is no guarantee that the given pointer is valid for `len`
    /// elements.
    ///
    /// # Panics
    ///
    /// Panics if `len` is greater than 0 but `p` is a null pointer.
    pub unsafe fn from_ptr(p: *const u16, len: usize) -> WideString {
        if len == 0 {
            return WideString::new();
        }
        assert!(!p.is_null());
        let slice = std::slice::from_raw_parts(p, len);
        WideString::from_vec(slice)
    }

    /// Creates a `WideString` with the given capacity.
    ///
    /// The string will be able to hold exactly `capacity` partial code units without reallocating.
    /// If `capacity` is set to 0, the string will not initially allocate.
    pub fn with_capacity(capacity: usize) -> WideString {
        WideString { inner: Vec::with_capacity(capacity) }
    }

    /// Returns the capacity this `WideString` can hold without reallocating.
    pub fn capacity(&self) -> usize {
        self.inner.capacity()
    }

    /// Truncate the `WideString` to zero length.
    pub fn clear(&mut self) {
        self.inner.clear()
    }

    /// Reserves the capacity for at least `additiona` more capacity to be inserted in the given
    /// `WideString`.
    ///
    /// More space may be reserved to avoid frequent allocations.
    pub fn reserve(&mut self, additional: usize) {
        self.inner.reserve(additional)
    }

    /// Reserves the minimum capacity for exactly `additiona` more capacity to be inserted in the
    /// given `WideString`. Does nothing if the capcity is already sufficient.
    ///
    /// Note that the allocator may give more space than is requested. Therefore capacity can not be
    /// relied upon to be precisely minimal. Prefer `reserve` if future insertions are expected.
    pub fn reserve_exact(&mut self, additional: usize) {
        self.inner.reserve_exact(additional)
    }

    /// Converts to a `WideStr` reference.
    pub fn as_wide_str(&self) -> &WideStr {
        self
    }

    /// Converts the wide string into a `Vec<u16>`, consuming the string in the process.
    pub fn into_vec(self) -> Vec<u16> {
        self.inner
    }

    /// Extends the wide string with the given `&WideStr`.
    ///
    /// No checks are performed on the strings. It is possible to end up nul values inside the
    /// string, and it is up to the caller to determine if that is acceptable.
    ///
    /// # Examples
    ///
    /// ```rust
    /// use widestring::WideString;
    /// let s = "MyString";
    /// let mut wstr = WideString::from_str(s);
    /// let cloned = wstr.clone();
    /// // Push the clone to the end, repeating the string twice.
    /// wstr.push(cloned);
    ///
    /// assert_eq!(wstr.to_string().unwrap(), "MyStringMyString");
    /// ```
    pub fn push<T: AsRef<WideStr>>(&mut self, s: T) {
        self.inner.extend_from_slice(&s.as_ref().inner)
    }

    /// Extends the wide string with the given `&[u16]` slice.
    ///
    /// No checks are performed on the strings. It is possible to end up nul values inside the
    /// string, and it is up to the caller to determine if that is acceptable.
    ///
    /// # Examples
    ///
    /// ```rust
    /// use widestring::WideString;
    /// let s = "MyString";
    /// let mut wstr = WideString::from_str(s);
    /// let cloned = wstr.clone();
    /// // Push the clone to the end, repeating the string twice.
    /// wstr.push_slice(cloned);
    ///
    /// assert_eq!(wstr.to_string().unwrap(), "MyStringMyString");
    /// ```
    pub fn push_slice<T: AsRef<[u16]>>(&mut self, s: T) {
        self.inner.extend_from_slice(&s.as_ref())
    }

    /// Extends the string with the given `&OsStr`.
    ///
    /// No checks are performed on the strings. It is possible to end up nul values inside the
    /// string, and it is up to the caller to determine if that is acceptable.
    ///
    /// # Examples
    ///
    /// ```rust
    /// use widestring::WideString;
    /// let s = "MyString";
    /// let mut wstr = WideString::from_str(s);
    /// // Push the original to the end, repeating the string twice.
    /// wstr.push_str(s);
    ///
    /// assert_eq!(wstr.to_string().unwrap(), "MyStringMyString");
    /// ```
    pub fn push_str<T: AsRef<OsStr>>(&mut self, s: T) {
        self.inner.extend(platform::os_to_wide(s.as_ref()))
    }
}

impl Into<Vec<u16>> for WideString {
    fn into(self) -> Vec<u16> {
        self.into_vec()
    }
}

impl<'a> From<WideString> for std::borrow::Cow<'a, WideStr> {
    fn from(s: WideString) -> std::borrow::Cow<'a, WideStr> {
        std::borrow::Cow::Owned(s)
    }
}

impl From<String> for WideString {
    fn from(s: String) -> WideString {
        WideString::from_str(&s)
    }
}

impl From<OsString> for WideString {
    fn from(s: OsString) -> WideString {
        WideString::from_str(&s)
    }
}

impl From<WideString> for OsString {
    fn from(s: WideString) -> OsString {
        s.to_os_string()
    }
}

impl<'a, T: ?Sized + AsRef<WideStr>> From<&'a T> for WideString {
    fn from(s: &'a T) -> WideString {
        s.as_ref().to_wide_string()
    }
}

impl std::ops::Index<std::ops::RangeFull> for WideString {
    type Output = WideStr;

    #[inline]
    fn index(&self, _index: std::ops::RangeFull) -> &WideStr {
        WideStr::from_slice(&self.inner)
    }
}

impl std::ops::Deref for WideString {
    type Target = WideStr;

    #[inline]
    fn deref(&self) -> &WideStr {
        &self[..]
    }
}

impl PartialEq<WideStr> for WideString {
    #[inline]
    fn eq(&self, other: &WideStr) -> bool {
        self.as_wide_str() == other
    }
}

impl PartialOrd<WideStr> for WideString {
    #[inline]
    fn partial_cmp(&self, other: &WideStr) -> Option<std::cmp::Ordering> {
        self.as_wide_str().partial_cmp(other)
    }
}

impl<'a> PartialEq<&'a WideStr> for WideString {
    #[inline]
    fn eq(&self, other: &&'a WideStr) -> bool {
        self.as_wide_str() == *other
    }
}

impl<'a> PartialOrd<&'a WideStr> for WideString {
    #[inline]
    fn partial_cmp(&self, other: &&'a WideStr) -> Option<std::cmp::Ordering> {
        self.as_wide_str().partial_cmp(*other)
    }
}

impl<'a> PartialEq<std::borrow::Cow<'a, WideStr>> for WideString {
    #[inline]
    fn eq(&self, other: &std::borrow::Cow<'a, WideStr>) -> bool {
        self.as_wide_str() == other.as_ref()
    }
}

impl<'a> PartialOrd<std::borrow::Cow<'a, WideStr>> for WideString {
    #[inline]
    fn partial_cmp(&self, other: &std::borrow::Cow<'a, WideStr>) -> Option<std::cmp::Ordering> {
        self.as_wide_str().partial_cmp(other.as_ref())
    }
}

impl WideStr {
    /// Coerces a value into a `WideStr`.
    pub fn new<'a, S: AsRef<WideStr> + ?Sized>(s: &'a S) -> &'a WideStr {
        s.as_ref()
    }

    /// Constructs a `WideStr` from a `u16` pointer and a length.
    ///
    /// The `len` argument is the number of `u16` elements, **not** the number of bytes.
    ///
    /// # Safety
    ///
    /// This function is unsafe as there is no guarantee that the given pointer is valid for `len`
    /// elements.
    ///
    /// # Panics
    ///
    /// This function panics if `p` is null.
    ///
    /// # Caveat
    ///
    /// The lifetime for the returned string is inferred from its usage. To prevent accidental
    /// misuse, it's suggested to tie the lifetime to whichever source lifetime is safe in the
    /// context, such as by providing a helper function taking the lifetime of a host value for the
    /// string, or by explicit annotation.
    pub unsafe fn from_ptr<'a>(p: *const u16, len: usize) -> &'a WideStr {
        assert!(!p.is_null());
        mem::transmute(std::slice::from_raw_parts(p, len))
    }

    /// Constructs a `WideStr` from a slice of `u16` partial code points.
    ///
    /// No checks are performed on the slice.
    pub fn from_slice<'a>(slice: &'a [u16]) -> &'a WideStr {
        unsafe { mem::transmute(slice) }
    }

    /// Decodes a wide string to an owned `OsString`.
    ///
    /// This makes a string copy of the `WideStr`. Since `WideStr` makes no guaruntees that it is
    /// valid UTF-16, there is no guaruntee that the resulting `OsString` will be valid data.
    ///
    /// # Examples
    ///
    /// ```rust
    /// use widestring::WideString;
    /// use std::ffi::OsString;
    /// let s = "MyString";
    /// // Create a wide string from the string
    /// let wstr = WideString::from_str(s);
    /// // Create an OsString from the wide string
    /// let osstr = wstr.to_os_string();
    ///
    /// assert_eq!(osstr, OsString::from(s));
    /// ```
    pub fn to_os_string(&self) -> OsString {
        platform::os_from_wide(&self.inner)
    }

    /// Copies the wide string to a new owned `WideString`.
    pub fn to_wide_string(&self) -> WideString {
        WideString::from_vec(&self.inner)
    }

    /// Copies the wide string to a `String` if it contains valid UTF-16 data.
    ///
    /// # Failures
    ///
    /// Returns an error if the string contains any invalid UTF-16 data.
    ///
    /// # Examples
    ///
    /// ```rust
    /// use widestring::WideString;
    /// let s = "MyString";
    /// // Create a wide string from the string
    /// let wstr = WideString::from_str(s);
    /// // Create a regular string from the wide string
    /// let s2 = wstr.to_string().unwrap();
    ///
    /// assert_eq!(s2, s);
    /// ```
    pub fn to_string(&self) -> Result<String, std::string::FromUtf16Error> {
        String::from_utf16(&self.inner)
    }

    /// Copies the wide string to a `String`.
    ///
    /// Any non-Unicode sequences are replaced with U+FFFD REPLACEMENT CHARACTER.
    ///
    /// # Examples
    ///
    /// ```rust
    /// use widestring::WideString;
    /// let s = "MyString";
    /// // Create a wide string from the string
    /// let wstr = WideString::from_str(s);
    /// // Create a regular string from the wide string
    /// let lossy = wstr.to_string_lossy();
    ///
    /// assert_eq!(lossy, s);
    /// ```
    pub fn to_string_lossy(&self) -> String {
        String::from_utf16_lossy(&self.inner)
    }

    /// Converts to a slice of the wide string.
    pub fn as_slice(&self) -> &[u16] {
        &self.inner
    }

    /// Returns a raw pointer to the wide string.
    ///
    /// The pointer is valid only as long as the lifetime of this reference.
    pub fn as_ptr(&self) -> *const u16 {
        self.inner.as_ptr()
    }

    /// Returns the length of the wide string as number of UTF-16 partial code units (**not** code
    /// points and **not** number of bytes).
    pub fn len(&self) -> usize {
        self.inner.len()
    }

    /// Returns whether this wide string contains no data.
    pub fn is_empty(&self) -> bool {
        self.inner.is_empty()
    }
}

impl std::borrow::Borrow<WideStr> for WideString {
    fn borrow(&self) -> &WideStr {
        &self[..]
    }
}

impl ToOwned for WideStr {
    type Owned = WideString;
    fn to_owned(&self) -> WideString {
        self.to_wide_string()
    }
}

impl<'a> From<&'a WideStr> for std::borrow::Cow<'a, WideStr> {
    fn from(s: &'a WideStr) -> std::borrow::Cow<'a, WideStr> {
        std::borrow::Cow::Borrowed(s)
    }
}

impl AsRef<WideStr> for WideStr {
    fn as_ref(&self) -> &WideStr {
        self
    }
}

impl AsRef<WideStr> for WideString {
    fn as_ref(&self) -> &WideStr {
        self
    }
}

impl AsRef<[u16]> for WideStr {
    fn as_ref(&self) -> &[u16] {
        self.as_slice()
    }
}

impl AsRef<[u16]> for WideString {
    fn as_ref(&self) -> &[u16] {
        self.as_slice()
    }
}