zbuf 0.1.2

“Zero-copy” string and bytes buffers
Documentation
use bytesbuf::BytesBuf;
use std::error;
use std::fmt;
use std::iter::FromIterator;
use std::io;
use std::mem;
use std::ops::{Deref, DerefMut};
use std::str;
use utf8_decoder::{LossyUtf8Decoder, StrictUtf8Decoder, Utf8DecoderError};

/// A “zero copy” string buffer.
///
/// See [crate documentation](index.html) for an overview.
#[derive(Clone, Default, Hash, Eq, Ord)]
pub struct StrBuf(BytesBuf);

impl StrBuf {
    /// Return a new, empty, inline buffer.
    #[inline]
    pub fn new() -> Self {
        StrBuf(BytesBuf::new())
    }

    /// Return a new buffer with capacity for at least (typically more than)
    /// the given number of bytes.
    ///
    /// ## Panics
    ///
    /// Panics if the requested capacity is greater than `std::u32::MAX` (4 gigabytes).
    ///
    /// ## Examples
    ///
    /// ```
    /// # use zbuf::StrBuf;
    /// assert!(StrBuf::with_capacity(17).capacity() >= 17);
    /// ```
    #[inline]
    pub fn with_capacity(capacity: usize) -> Self {
        StrBuf(BytesBuf::with_capacity(capacity))
    }

    /// Converts a bytes buffer into a string buffer.
    ///
    /// This takes `O(length)` time to check that the input is well-formed in UTF-8,
    /// and returns `Err(_)` if it is not.
    /// No heap memory is allocated or data copied, since this takes ownership of the bytes buffer.
    ///
    /// If you already know for sure that a bytes buffer is well-formed in UTF-8,
    /// consider the `unsafe` [`from_utf8_unchecked`](#method.from_utf8_unchecked) method,
    /// which takes `O(1)` time, instead.
    ///
    /// ## Examples
    ///
    /// ```
    /// # use zbuf::{StrBuf, BytesBuf};
    /// assert!(StrBuf::from_utf8(BytesBuf::from(&b"abc"[..])).is_ok());
    /// assert!(StrBuf::from_utf8(BytesBuf::from(&b"ab\x80"[..])).is_err());
    /// ```
    #[inline]
    pub fn from_utf8(bytes: BytesBuf) -> Result<Self, FromUtf8Error> {
        match str::from_utf8(&bytes) {
            Ok(_) => Ok(StrBuf(bytes)),
            Err(error) => Err(FromUtf8Error {
                bytes_buf: bytes,
                utf8_error: error,
            })
        }
    }

    /// Converts a bytes buffer into a string buffer without checking UTF-8 well-formedness.
    ///
    /// This takes `O(1)` time.
    /// No heap memory is allocated or data copied, since this takes ownership of the bytes buffer.
    ///
    /// ## Safety
    ///
    /// The given bytes buffer must be well-formed in UTF-8.
    ///
    /// ## Examples
    ///
    /// ```
    /// # use zbuf::{StrBuf, BytesBuf};
    /// let bytes_buf = BytesBuf::from(b"abc".as_ref());
    /// let str_buf = unsafe {
    ///     StrBuf::from_utf8_unchecked(bytes_buf)
    /// };
    /// assert_eq!(str_buf, "abc");
    /// ```
    #[inline]
    pub unsafe fn from_utf8_unchecked(bytes: BytesBuf) -> Self {
        StrBuf(bytes)
    }

    /// Converts a bytes buffer into a string buffer.
    ///
    /// This takes `O(length)` time to check that the input is well-formed in UTF-8,
    /// and replaces invalid byte sequences (decoding errors) with the replacement character U+FFFD.
    /// No heap memory is allocated or data copied, since this takes ownership of the bytes buffer.
    ///
    /// If you want to handle decoding errors differently,
    /// consider the [`from_utf8`](#method.from_utf8) method which returns a `Result`.
    ///
    /// ## Examples
    ///
    /// ```
    /// # use zbuf::{StrBuf, BytesBuf};
    /// assert_eq!(StrBuf::from_utf8_lossy(BytesBuf::from(&b"abc"[..])), "abc");
    /// assert_eq!(StrBuf::from_utf8_lossy(BytesBuf::from(&b"ab\x80"[..])), "ab�");
    /// ```
    pub fn from_utf8_lossy(bytes: BytesBuf) -> Self {
        let mut decoder = LossyUtf8Decoder::new();
        let mut buf: StrBuf = decoder.feed(bytes).collect();
        buf.extend(decoder.end());
        buf
    }

    /// Converts an iterator of bytes buffers into a string buffer.
    ///
    /// This takes `O(total length)` time to check that the input is well-formed in UTF-8,
    /// and returns an error at the first invalid byte sequence (decoding error).
    /// No heap memory is allocated or data copied, since this takes ownership of the bytes buffer.
    ///
    /// ## Examples
    ///
    /// ```
    /// # use zbuf::StrBuf;
    /// let chunks = [
    ///     &[0xF0, 0x9F][..],
    ///     &[0x8E],
    ///     &[0x89],
    /// ];
    /// assert_eq!(StrBuf::from_utf8_iter(&chunks).unwrap(), "🎉");
    /// ```
    pub fn from_utf8_iter<I>(iter: I) -> Result<Self, Utf8DecoderError>
    where I: IntoIterator, I::Item: Into<BytesBuf> {
        let mut decoder = StrictUtf8Decoder::new();
        let mut buf = StrBuf::new();
        for item in iter {
            for result in decoder.feed(item.into()) {
                buf.push_buf(&result?)
            }
        }
        decoder.end()?;
        Ok(buf)
    }

    /// Converts an iterator of bytes buffers into a string buffer.
    ///
    /// This takes `O(total length)` time to check that the input is well-formed in UTF-8,
    /// and replaces invalid byte sequences (decoding errors) with the replacement character U+FFFD.
    /// No heap memory is allocated or data copied, since this takes ownership of the bytes buffer.
    ///
    /// ## Examples
    ///
    /// ```
    /// # use zbuf::StrBuf;
    /// let chunks = [
    ///     &[0xF0, 0x9F][..],
    ///     &[0x8E],
    ///     &[0x89, 0xF0, 0x9F],
    /// ];
    /// assert_eq!(StrBuf::from_utf8_iter_lossy(&chunks), "🎉�");
    /// ```
    pub fn from_utf8_iter_lossy<I>(iter: I) -> Self
    where I: IntoIterator, I::Item: Into<BytesBuf> {
        let mut decoder = LossyUtf8Decoder::new();
        let mut buf = StrBuf::new();
        for item in iter {
            buf.extend(decoder.feed(item.into()))
        }
        buf.extend(decoder.end());
        buf
    }

    /// Return a shared (immutable) reference to the bytes buffer representation
    /// of this string buffer.
    ///
    /// ## Examples
    ///
    /// ```
    /// # use zbuf::StrBuf;
    /// let buf = StrBuf::from("🎉").as_bytes_buf().clone();
    /// assert_eq!(buf, [0xF0, 0x9F, 0x8E, 0x89]);
    /// ```
    #[inline]
    pub fn as_bytes_buf(&self) -> &BytesBuf {
        // This return value can be cloned to obtain a bytes buffer that shares
        // the same heap allocation as this string buffer.
        // Since that clone is shared, any mutation will cause it to re-allocate.
        // Therefore this can not be use to make a `StrBuf` not UTF-8.
        &self.0
    }

    /// Return the length of this buffer, in bytes.
    ///
    /// ## Examples
    ///
    /// ```
    /// # use zbuf::StrBuf;
    /// assert_eq!(StrBuf::from("🎉").len(), 4);
    /// ```
    #[inline]
    pub fn len(&self) -> usize {
        self.0.len()
    }

    /// Return whether this buffer is empty.
    ///
    /// ## Examples
    ///
    /// ```
    /// # use zbuf::BytesBuf;
    /// assert_eq!(BytesBuf::new().is_empty(), true);
    /// assert_eq!(BytesBuf::from(b"abc".as_ref()).is_empty(), false);
    /// ```
    #[inline]
    pub fn is_empty(&self) -> bool {
        self.0.is_empty()
    }

    /// Return the capacity of this buffer: the length to which it can grow
    /// without re-allocating.
    ///
    /// ## Examples
    ///
    /// ```
    /// # use zbuf::StrBuf;
    /// assert!(StrBuf::with_capacity(17).capacity() >= 17);
    /// ```
    #[inline]
    pub fn capacity(&self) -> usize {
        self.0.capacity()
    }

    /// Remove the given number of bytes from the front (the start) of the buffer.
    ///
    /// This takes `O(1)` time and does not copy any heap-allocated data.
    ///
    /// ## Panics
    ///
    /// Panics if `bytes` is out of bounds or not at a `char` boundary.
    ///
    /// ## Examples
    ///
    /// ```
    /// # use zbuf::StrBuf;
    /// let mut buf = StrBuf::from("hello");
    /// buf.pop_front(2);
    /// assert_eq!(buf, "llo");
    /// ```
    pub fn pop_front(&mut self, bytes: usize) {
        let _: &str = &self[bytes..];  // Check char boundary with a nice panic message
        self.0.pop_front(bytes)
    }

    /// Remove the given number of bytes from the back (the end) of the buffer.
    ///
    /// This takes `O(1)` time and does not copy any heap-allocated data.
    ///
    /// ## Panics
    ///
    /// Panics if `bytes` is out of bounds or not at a `char` boundary.
    ///
    /// ## Examples
    ///
    /// ```
    /// # use zbuf::StrBuf;
    /// let mut buf = StrBuf::from("hello");
    /// buf.pop_back(2);
    /// assert_eq!(buf, "hel");
    /// ```
    pub fn pop_back(&mut self, bytes: usize) {
        let len = self.len();
        match len.checked_sub(bytes) {
            None => panic!("tried to pop {} bytes, only {} are available", bytes, len),
            Some(new_len) => self.truncate(new_len)
        }
    }

    /// Split the buffer into two at the given index.
    ///
    /// Return a new buffer that contains bytes `[at, len)`,
    /// while `self` contains bytes `[0, at)`.
    ///
    /// # Panics
    ///
    /// Panics if `at` is out of bounds or not at a `char` boundary.
    ///
    /// ## Examples
    ///
    /// ```
    /// # use zbuf::StrBuf;
    /// let mut buf = StrBuf::from("hello");
    /// let tail = buf.split_off(2);
    /// assert_eq!(buf, "he");
    /// assert_eq!(tail, "llo");
    /// ```
    pub fn split_off(&mut self, at: usize) -> StrBuf {
        let _: &str = &self[..at];  // Check char boundary with a nice panic message
        StrBuf(self.0.split_off(at))
    }

    /// This makes the buffer empty but, unless it is shared, does not change its capacity
    ///
    /// If potentially freeing memory is preferable, consider `buf = StrBuf::empty()` instead.
    ///
    /// ## Examples
    ///
    /// ```
    /// # use zbuf::StrBuf;
    /// let mut buf = StrBuf::from("hello");
    /// assert_eq!(buf, "hello");
    /// buf.clear();
    /// assert_eq!(buf, "");
    /// assert!(buf.capacity() > 0);
    /// ```
    #[inline]
    pub fn clear(&mut self) {
        self.0.clear()
    }

    /// Shortens the buffer to the specified length.
    ///
    /// If `new_len` is greater than the buffer’s current length, this has no effect.
    ///
    /// ## Panics
    ///
    /// Panics if `new_len` is not at a `char` boundary.
    ///
    /// ## Examples
    ///
    /// ```
    /// # use zbuf::StrBuf;
    /// let mut buf = StrBuf::from("hello");
    /// buf.truncate(10);
    /// assert_eq!(buf, "hello");
    /// buf.truncate(2);
    /// assert_eq!(buf, "he");
    /// ```
    pub fn truncate(&mut self, new_len: usize) {
        if new_len < self.len() {
            let _: &str = &self[..new_len];  // Check char boundary with a nice panic message
            self.0.truncate(new_len)
        }
    }

    /// Ensures that the buffer has capacity for at least (typically more than)
    /// `additional` bytes beyond its current length.
    ///
    /// This copies the data if this buffer is shared or if the existing capacity is insufficient.
    ///
    /// ## Examples
    ///
    /// ```
    /// # use zbuf::StrBuf;
    /// let mut buf = StrBuf::from(&*"abc".repeat(10));
    /// assert!(buf.capacity() < 100);
    /// buf.reserve(100);
    /// assert!(buf.capacity() >= 130);
    /// ```
    #[inline]
    pub fn reserve(&mut self, additional: usize) {
        self.0.reserve(additional)
    }

    /// Extend this buffer by writing to its existing capacity.
    ///
    /// The closure is given a potentially-uninitialized mutable string slice,
    /// and returns the number of consecutive bytes written from the start of the slice.
    /// The buffer’s length is increased by that much.
    ///
    /// If `self.reserve(additional)` is called immediately before this method,
    /// the slice is at least `additional` bytes long.
    /// Without a `reserve` call the slice can be any length, including zero.
    ///
    /// This copies the existing data if there are other references to this buffer.
    ///
    /// ## Safety
    ///
    /// The closure must not *read* from the given slice, which may be uninitialized.
    /// It must initialize the `0..written` range and make it well-formed in UTF-8,
    /// where `written` is the return value.
    ///
    /// ## Panics
    ///
    /// Panics if the value returned by the closure is larger than the given closure’s length.
    ///
    /// ## Examples
    ///
    /// ```
    /// # use zbuf::StrBuf;
    /// let mut buf = StrBuf::from("hello");
    /// buf.reserve(10);
    /// unsafe {
    ///     buf.write_to_uninitialized_tail(|uninitialized_str| {
    ///         let uninitialized_bytes = as_bytes_mut(uninitialized_str);
    ///         for byte in &mut uninitialized_bytes[..3] {
    ///             *byte = b'!'
    ///         }
    ///         3
    ///     })
    /// }
    /// assert_eq!(buf, "hello!!!");
    ///
    /// /// https://github.com/rust-lang/rust/issues/41119
    /// unsafe fn as_bytes_mut(s: &mut str) -> &mut [u8] {
    ///     ::std::mem::transmute(s)
    /// }
    /// ```
    pub unsafe fn write_to_uninitialized_tail<F>(&mut self, f: F)
    where F: FnOnce(&mut str) -> usize {
        self.0.write_to_uninitialized_tail(|uninitialized| {
            // Safety: the BytesBuf inside StrBuf is private,
            // and this module mantains UTF-8 well-formedness.
            let uninitialized_str = str_from_utf8_unchecked_mut(uninitialized);
            f(uninitialized_str)
        })
    }

    /// Extend this buffer by writing to its existing capacity.
    ///
    /// The closure is given a mutable string slice
    /// that has been overwritten with zeros (which takes `O(n)` extra time).
    /// The buffer’s length is increased by the closure’s return value.
    ///
    /// If `self.reserve(additional)` is called immediately before this method,
    /// the slice is at least `additional` bytes long.
    /// Without a `reserve` call the slice can be any length, including zero.
    ///
    /// This copies the existing data if there are other references to this buffer.
    ///
    /// ## Panics
    ///
    /// Panics if the value returned by the closure is larger than the given closure’s length,
    /// or if it is not at a `char` boundary.
    ///
    /// ## Examples
    ///
    /// ```
    /// # use zbuf::StrBuf;
    /// let mut buf = StrBuf::from("hello");
    /// buf.reserve(10);
    /// buf.write_to_zeroed_tail(|tail| {
    ///     let tail = unsafe {
    ///         as_bytes_mut(tail)
    ///     };
    ///     for byte in &mut tail[..3] {
    ///         *byte = b'!'
    ///     }
    ///     10
    /// });
    /// assert_eq!(buf, "hello!!!\0\0\0\0\0\0\0");
    ///
    /// /// https://github.com/rust-lang/rust/issues/41119
    /// unsafe fn as_bytes_mut(s: &mut str) -> &mut [u8] {
    ///     ::std::mem::transmute(s)
    /// }
    /// ```
    pub fn write_to_zeroed_tail<F>(&mut self, f: F)
    where F: FnOnce(&mut str) -> usize {
        self.0.write_to_zeroed_tail(|tail_bytes| {
            // Safety: a sequence of zero bytes is well-formed UTF-8.
            let tail_str = unsafe {
                str_from_utf8_unchecked_mut(tail_bytes)
            };
            let additional_len = f(tail_str);
            &tail_str[..additional_len];  // Check char boundary
            additional_len
        })
    }

    /// Appends the given string slice onto the end of this buffer.
    ///
    /// This copies the existing data if this buffer is shared
    /// or if the existing capacity is insufficient.
    ///
    /// ## Examples
    ///
    /// ```
    /// # use zbuf::StrBuf;
    /// let mut buf = StrBuf::from("hello");
    /// buf.push_str(" world!");
    /// assert_eq!(buf, "hello world!");
    /// ```
    #[inline]
    pub fn push_str(&mut self, slice: &str) {
        self.0.push_slice(slice.as_bytes())
    }

    /// Appends the given character onto the end of this buffer.
    ///
    /// This copies the existing data if this buffer is shared
    /// or if the existing capacity is insufficient.
    ///
    /// ## Examples
    ///
    /// ```
    /// # use zbuf::StrBuf;
    /// let mut buf = StrBuf::from("hello");
    /// buf.push_char('!');
    /// assert_eq!(buf, "hello!");
    /// ```
    #[inline]
    pub fn push_char(&mut self, c: char) {
        self.push_str(c.encode_utf8(&mut [0; 4]))
    }

    /// Appends the given string buffer onto the end of this buffer.
    ///
    /// This is similar to [`push_str`](#method.push_str), but sometimes more efficient.
    ///
    /// ## Examples
    ///
    /// This allocates only once:
    ///
    /// ```
    /// # use zbuf::StrBuf;
    /// let string = "abc".repeat(20);
    /// let mut buf = StrBuf::from(&*string);
    /// let tail = buf.split_off(50);
    /// assert_eq!(buf.len(), 50);
    /// assert_eq!(tail.len(), 10);
    /// buf.push_buf(&tail);
    /// assert_eq!(buf, string);
    /// ```
    #[inline]
    pub fn push_buf(&mut self, other: &StrBuf) {
        self.0.push_buf(&other.0)
    }
}

// FIXME https://github.com/rust-lang/rust/issues/41119
#[inline]
unsafe fn str_from_utf8_unchecked_mut(v: &mut [u8]) -> &mut str {
    mem::transmute(v)
}

impl Deref for StrBuf {
    type Target = str;

    #[inline]
    fn deref(&self) -> &str {
        // Safety: the BytesBuf inside StrBuf is private,
        // and this module mantains UTF-8 well-formedness.
        unsafe {
            str::from_utf8_unchecked(&self.0)
        }
    }
}

/// This copies the existing data if there are other references to this buffer.
impl DerefMut for StrBuf {
    #[inline]
    fn deref_mut(&mut self) -> &mut str {
        // Safety: the BytesBuf inside StrBuf is private,
        // and this module mantains UTF-8 well-formedness.
        unsafe {
            str_from_utf8_unchecked_mut(&mut self.0)
        }
    }
}

impl AsRef<str> for StrBuf {
    #[inline]
    fn as_ref(&self) -> &str {
        self
    }
}

impl AsMut<str> for StrBuf {
    #[inline]
    fn as_mut(&mut self) -> &mut str {
        self
    }
}

impl<'a> From<&'a str> for StrBuf {
    #[inline]
    fn from(slice: &'a str) -> Self {
        StrBuf(BytesBuf::from(slice.as_bytes()))
    }
}

impl From<StrBuf> for BytesBuf {
    #[inline]
    fn from(buf: StrBuf) -> Self {
        buf.0
    }
}

impl fmt::Debug for StrBuf {
    #[inline]
    fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
        str::fmt(self, formatter)
    }
}

impl fmt::Display for StrBuf {
    #[inline]
    fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
        str::fmt(self, formatter)
    }
}

impl<T: AsRef<str>> PartialEq<T> for StrBuf {
    #[inline]
    fn eq(&self, other: &T) -> bool {
        str::eq(self, other.as_ref())
    }
}

impl<T: AsRef<str>> PartialOrd<T> for StrBuf {
    #[inline]
    fn partial_cmp(&self, other: &T) -> Option<::std::cmp::Ordering> {
        str::partial_cmp(self, other.as_ref())
    }
}

impl Extend<char> for StrBuf {
    #[inline]
    fn extend<I>(&mut self, iter: I) where I: IntoIterator<Item=char> {
        for item in iter {
            self.push_char(item)
        }
    }
}

impl FromIterator<char> for StrBuf {
    #[inline]
    fn from_iter<I>(iter: I) -> Self where I: IntoIterator<Item=char> {
        let mut buf = Self::new();
        buf.extend(iter);
        buf
    }
}

impl<'a> Extend<&'a char> for StrBuf {
    #[inline]
    fn extend<I>(&mut self, iter: I) where I: IntoIterator<Item=&'a char> {
        for &item in iter {
            self.push_char(item)
        }
    }
}

impl<'a> FromIterator<&'a char> for StrBuf {
    #[inline]
    fn from_iter<I>(iter: I) -> Self where I: IntoIterator<Item=&'a char> {
        let mut buf = Self::new();
        buf.extend(iter);
        buf
    }
}

impl<'a> Extend<&'a str> for StrBuf {
    #[inline]
    fn extend<I>(&mut self, iter: I) where I: IntoIterator<Item=&'a str> {
        for item in iter {
            self.push_str(item)
        }
    }
}

impl<'a> FromIterator<&'a str> for StrBuf {
    #[inline]
    fn from_iter<I>(iter: I) -> Self where I: IntoIterator<Item=&'a str> {
        let mut buf = Self::new();
        buf.extend(iter);
        buf
    }
}

impl<'a> Extend<&'a StrBuf> for StrBuf {
    #[inline]
    fn extend<I>(&mut self, iter: I) where I: IntoIterator<Item=&'a StrBuf> {
        for item in iter {
            self.push_buf(item)
        }
    }
}

impl<'a> FromIterator<&'a StrBuf> for StrBuf {
    #[inline]
    fn from_iter<I>(iter: I) -> Self where I: IntoIterator<Item=&'a StrBuf> {
        let mut buf = Self::new();
        buf.extend(iter);
        buf
    }
}

impl Extend<StrBuf> for StrBuf {
    #[inline]
    fn extend<I>(&mut self, iter: I) where I: IntoIterator<Item=StrBuf> {
        for item in iter {
            self.push_buf(&item)
        }
    }
}

impl FromIterator<StrBuf> for StrBuf {
    #[inline]
    fn from_iter<I>(iter: I) -> Self where I: IntoIterator<Item=StrBuf> {
        let mut buf = Self::new();
        buf.extend(iter);
        buf
    }
}

impl fmt::Write for StrBuf {
    #[inline]
    fn write_str(&mut self, s: &str) -> fmt::Result {
        self.push_str(s);
        Ok(())
    }

    fn write_char(&mut self, c: char) -> fmt::Result {
        self.push_char(c);
        Ok(())
    }
}

/// The error type for [`StrBuf::from_utf8`](struct.StrBuf.html#method.from_utf8).
#[derive(Debug)]
pub struct FromUtf8Error {
    bytes_buf: BytesBuf,
    utf8_error: str::Utf8Error,
}

impl FromUtf8Error {
    pub fn as_bytes_buf(&self) -> &BytesBuf {
        &self.bytes_buf
    }

    pub fn into_bytes_buf(self) -> BytesBuf {
        self.bytes_buf
    }

    pub fn utf8_error(&self) -> str::Utf8Error {
        self.utf8_error
    }
}

impl fmt::Display for FromUtf8Error {
    fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
        self.utf8_error.fmt(formatter)
    }
}

impl error::Error for FromUtf8Error {
    fn description(&self) -> &str {
        "invalid utf-8"
    }
}

impl From<FromUtf8Error> for io::Error {
    fn from(error: FromUtf8Error) -> Self {
        Self::new(io::ErrorKind::InvalidData, error.utf8_error())
    }
}