eventson 0.1.0

An event based JSON parser with competitive performance
Documentation
use std::io::ErrorKind;

pub(crate) struct Buf<R> {
    // Source reader
    reader: R,
    // Contents
    buf: Vec<u8>,
    // The overall offset into the stream that has been consumed so far.
    stream_pos: u64,

    // The indices marking the active portion of this buffer. Both are always
    // valid slicable _end_ indices into `buf`, but may not be valid start
    // indices, as they may indicate an index one past the end of the buffer.
    // This only happens for `pos` when `pos` == `end`.
    pos: usize,
    end: usize,
}

impl<R> Buf<R>
where
    R: std::io::Read,
{
    pub fn new(reader: R, cap: usize) -> Self {
        Self {
            reader,
            buf: vec![0; cap],
            stream_pos: 0,
            pos: 0,
            end: 0,
        }
    }

    #[inline]
    pub fn stream_pos(&self) -> u64 {
        self.stream_pos
    }

    #[inline]
    pub fn cap(&self) -> usize {
        self.buf.len()
    }

    #[inline]
    pub fn bytes(&self) -> &[u8] {
        &self.buf[self.pos..self.end]
    }

    #[inline]
    pub fn bytes_mut(&mut self) -> &mut [u8] {
        &mut self.buf[self.pos..self.end]
    }

    fn scroll(&mut self) {
        if self.pos == self.end {
            self.pos = 0;
            self.end = 0;
            return;
        }

        let end = self.end - self.pos;
        self.buf.copy_within(self.pos..self.end, 0);
        self.pos = 0;
        self.end = end;
    }

    pub fn refill(&mut self) -> std::io::Result<usize> {
        self.scroll();
        assert!(self.pos < self.buf.len());
        let c = self.reader.read(&mut self.buf[self.end..])?;
        self.end += c;
        Ok(c)
    }

    // TODO: Consider the case where the reader might not have enough bytes right
    // at this second, but subsequent calls could yield more bytes. Is this something
    // we really have to worry about?
    #[inline]
    pub fn ensure_capacity(&mut self, count: usize) -> std::io::Result<()> {
        let remaining = self.end - self.pos;

        // IMPL: No danger of an underflow here because we make sure that count
        // is always greater than remaining before doing the refill/subtraction.
        if count > remaining && self.refill()? < count - remaining {
            return Err(std::io::Error::new(
                ErrorKind::UnexpectedEof,
                "Not enough bytes to fulfill capacity",
            ));
        }

        Ok(())
    }

    // Take a byte from the buffer, consumes the byte
    #[inline]
    pub fn munch(&mut self) -> u8 {
        assert!(self.pos < self.end);
        let b = self.buf[self.pos];
        self.pos += 1;
        b
    }

    // This is useful when you need to consume bytes, but also get access to
    // the part of the slice that was consumed.
    #[inline]
    pub fn consume_and_slice(&mut self, start: usize, end: usize, count: usize) -> &[u8] {
        assert!(count <= self.bytes().len());
        assert!(start <= end);

        let start = self.pos + start;
        let end = self.pos + end;
        self.consume(count);
        &self.buf[start..end]
    }

    #[inline]
    pub fn consume(&mut self, count: usize) {
        assert!(count <= self.bytes().len());
        self.pos += count;
        self.stream_pos += count as u64;
    }

    #[inline]
    pub fn consume_all_and_refill(&mut self) -> std::io::Result<usize> {
        self.pos = self.end;
        self.refill()
    }

    #[inline]
    pub fn seek_back(&mut self, count: usize) {
        assert!(self.pos >= count);
        self.pos -= count;
    }

    #[inline]
    pub fn peek(&self, count: usize) -> &[u8] {
        assert!(count <= self.buf.len());
        &self.buf[self.pos..self.pos + count]
    }
}

#[cfg(test)]
mod test {
    use super::*;
    use std::io::Cursor;

    #[test]
    fn test_consume_all_and_refill() {
        let mut buf = Buf::new(Cursor::new(b"hello world"), 5);
        buf.refill().unwrap();
        assert_eq!(b"hello", buf.bytes());
        buf.consume_all_and_refill().unwrap();
        assert_eq!(b" worl", buf.bytes());
        buf.consume_all_and_refill().unwrap();
        assert_eq!(b"d", buf.bytes());
    }

    #[test]
    fn test_peek() {
        let mut buf = Buf::new(Cursor::new(b"hello world"), 5);
        buf.refill().unwrap();
        assert_eq!(b"h", buf.peek(1));
        assert_eq!(b"he", buf.peek(2));
        assert_eq!(b"hel", buf.peek(3));
        assert_eq!(b"hell", buf.peek(4));
        assert_eq!(b"hello", buf.peek(5));
    }

    #[test]
    fn test_consume() {
        let mut buf = Buf::new(Cursor::new(b"hello world"), 5);
        buf.refill().unwrap();
        buf.consume(0);
        assert_eq!(b"hello", buf.bytes());
        buf.consume(3);
        assert_eq!(b"lo", buf.bytes());
        buf.consume(2);
        assert_eq!(b"", buf.bytes());
    }

    #[test]
    fn test_munch() {
        let mut buf = Buf::new(Cursor::new(b"hello world"), 5);
        buf.refill().unwrap();
        assert_eq!(b'h', buf.munch());
        assert_eq!(b'e', buf.munch());
        assert_eq!(b'l', buf.munch());
        assert_eq!(b'l', buf.munch());
        assert_eq!(b'o', buf.munch());
        buf.refill().unwrap();
        assert_eq!(b' ', buf.munch());
        assert_eq!(b'w', buf.munch());
        assert_eq!(b'o', buf.munch());
        assert_eq!(b'r', buf.munch());
        assert_eq!(b'l', buf.munch());
        buf.refill().unwrap();
        assert_eq!(b'd', buf.munch());
    }

    #[test]
    fn test_ensure_capacity_3() {
        let mut buf = Buf::new(Cursor::new(b"hello world"), 5);
        buf.refill().unwrap();
        buf.ensure_capacity(1).unwrap();
        buf.munch();
        assert_eq!(b"ello", buf.bytes());
    }

    #[test]
    fn test_ensure_capacity_2() {
        let mut buf = Buf::new(Cursor::new(b"hello world"), 5);
        buf.refill().unwrap();
        buf.consume(3);
        buf.ensure_capacity(5).unwrap();
        assert_eq!(b"lo wo", buf.bytes());
    }

    #[test]
    fn test_ensure_capacity() {
        let mut buf = Buf::new(Cursor::new(b"hello world"), 5);
        buf.refill().unwrap();
        for i in 0..5 {
            buf.ensure_capacity(i).unwrap();
            assert_eq!(b"hello", buf.bytes());
        }
    }

    #[test]
    fn test_refill_full() {
        let mut buf = Buf::new(Cursor::new(b"hello world"), 5);
        buf.refill().unwrap();
        assert_eq!(b"hello", buf.bytes());
    }

    #[test]
    fn test_fill() {
        let mut buf = Buf::new(Cursor::new(b"hello world"), 256);
        assert_eq!(0, buf.pos);
        assert_eq!(0, buf.end);
        buf.refill().unwrap();
        assert_eq!(11, buf.end);
        assert_eq!(0, buf.pos);
        assert_eq!(b"hello world", buf.bytes());
    }

    #[test]
    fn test_refill() {
        let mut buf = Buf::new(Cursor::new(b"hello world"), 5);
        buf.refill().unwrap();
        buf.consume(5);
        buf.refill().unwrap();
        assert_eq!(b" worl", buf.bytes());
    }

    #[test]
    fn test_partial_refill() {
        let mut buf = Buf::new(Cursor::new(b"hello world"), 5);
        buf.refill().unwrap();
        buf.consume(3);
        assert_eq!(3, buf.pos);
        assert_eq!(5, buf.end);
        buf.refill().unwrap();
        assert_eq!(b"lo wo", buf.bytes());
    }
}