bparse 0.29.2

A library for parsing bytes
Documentation
use crate::pattern::Pattern;
use core::ops::Deref;

/// A byte slice with a cursor. THe cursor is advanced using [Patterns](crate::Pattern).
#[derive(Debug, PartialEq, Eq)]
pub struct Buf<'b> {
    inner: &'b [u8],
    pos: usize,
}

impl<'b> Buf<'b> {
    /// Returns the portion of the byte slice that has not yet been examined.
    pub fn rest(&self) -> &'b [u8] {
        &self.inner[self.pos..]
    }

    /// Tests `pattern` against the bytes at the cursor.
    ///
    /// If the pattern matches, the matched bytes are returned and the cursor is advanced by the number of matched bytes.
    ///
    /// If you simply wish to check if a pattern matches, use [`Buf::skip`] instead.
    pub fn consume(&mut self, pattern: impl Pattern) -> Option<&'b [u8]> {
        let offset = pattern.eval(self.rest())?;
        let result = &self.rest()[..offset];
        self.pos += offset;
        Some(result)
    }

    /// Tests `pattern` against the bytes at the cursor.
    ///
    /// If the pattern matches, the matched bytes are returned.
    /// The cursor is not moved.
    pub fn peek(&self, pattern: impl Pattern) -> Option<&'b [u8]> {
        let offset = pattern.eval(self.rest())?;
        Some(&self.rest()[..offset])
    }

    /// Returns any bytes between the cursor and the first occurence of the marker pattern.
    ///
    /// The cursor is moved to right before the marker pattern matches.
    ///
    /// Returns `None` if the marker pattern is not found by the time the end of the slice is reached.
    pub fn consume_until(&mut self, marker: impl Pattern) -> Option<&'b [u8]> {
        let mut offset = 0;
        loop {
            let rest = &self.rest()[offset..];

            if marker.eval(rest).is_some() {
                let result = &self.rest()[..offset];
                self.pos += offset;
                return Some(result);
            }

            // It is important for this check to be after we evaluate the pattern to allow
            // using `pattern::end()` as a marker.
            if rest.is_empty() {
                return None;
            }

            offset += 1;
        }
    }

    /// Returns all bytes between the cursor and the first occurence of the marker pattern.
    ///
    /// The cursor is moved to right after the marker pattern matches.
    ///
    /// Returns `None` if the marker pattern is not found by the time the end of the slice is reached.
    pub fn consume_until_after(&mut self, marker: impl Pattern) -> Option<&'b [u8]> {
        let mut offset = 0;
        loop {
            let rest = &self.rest()[offset..];

            if let Some(v) = marker.eval(rest) {
                offset += v;
                let result = &self.rest()[..offset];
                self.pos += offset;
                return Some(result);
            }

            if rest.is_empty() {
                return None;
            }

            offset += 1;
        }
    }

    /// Like [`Buf::consume`], but returns a boolean instead of the matched bytes.
    pub fn skip(&mut self, pattern: impl Pattern) -> bool {
        self.consume(pattern).is_some()
    }

    /// Like [`Buf::peek`], but returns a boolean instead of the matched bytes.
    pub fn matches(&self, pattern: impl Pattern) -> bool {
        self.peek(pattern).is_some()
    }

    /// Returns the index of the cursor within the underlying byte slice.
    pub fn position(&self) -> usize {
        self.pos
    }

    /// Advances the cursor `step` bytes.
    ///
    /// This is a no-op once the cursor reaches the end of the buffer.
    pub fn advance(&mut self, step: usize) {
        let remaining = self.inner.len() - self.pos;
        let step = std::cmp::min(remaining, step);
        self.pos += step;
    }
}

impl<'b> From<&'b [u8]> for Buf<'b> {
    fn from(value: &'b [u8]) -> Self {
        Buf {
            pos: 0,
            inner: value,
        }
    }
}

impl<'b> From<&'b str> for Buf<'b> {
    fn from(value: &'b str) -> Self {
        Buf::from(value.as_bytes())
    }
}

impl Deref for Buf<'_> {
    type Target = [u8];

    /// Returns a reference to full underlying byte slice
    fn deref(&self) -> &Self::Target {
        self.inner
    }
}

impl AsRef<[u8]> for Buf<'_> {
    /// Returns a reference to full underlying byte slice
    fn as_ref(&self) -> &[u8] {
        self.inner
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::pattern;

    #[test]
    fn deref() {
        let mut buf = Buf::from("aba");
        assert!(buf.skip("aba"));
        assert!(buf.matches(pattern::end()));
        assert_eq!(&*buf, b"aba");
    }

    #[test]
    fn parse() {
        let pattern = "a".or("b");

        let mut buf = Buf::from("abab");
        assert_eq!(buf.consume(pattern).as_deref(), Some(b"a" as &[u8]));
        assert_eq!(buf.consume(pattern).as_deref(), Some(b"b" as &[u8]));
        assert_eq!(buf.consume(pattern).as_deref(), Some(b"a" as &[u8]));
        assert_eq!(buf.consume(pattern).as_deref(), Some(b"b" as &[u8]));
        assert_eq!(buf.consume(pattern), None);
        assert_eq!(buf.consume(pattern), None);
        assert_eq!(buf.consume(pattern), None);
    }

    #[test]
    fn skip() {
        let ws = " ";
        let mut buf = Buf::from("   a");
        assert!(buf.skip(ws));
        assert!(buf.skip(ws));
        assert!(buf.skip(ws));
        assert!(!buf.skip(ws));
        assert!(buf.skip("a"));
    }

    #[test]
    fn consume_until() {
        let mut buf = Buf::from("");
        assert_eq!(buf.consume_until(">"), None);

        let mut buf = Buf::from("<tag>");
        assert_eq!(buf.consume_until(">"), Some(b"<tag" as &[u8]));
        assert_eq!(buf.consume_until(">"), Some(&[] as &[u8]));
        assert_eq!(buf.consume_until(pattern::end()), Some(b">" as &[u8]));
    }

    #[test]
    fn consume_until_after() {
        let mut buf = Buf::from("");
        assert_eq!(buf.consume_until_after(">"), None);

        let mut buf = Buf::from("<tag>");
        assert_eq!(buf.consume_until_after(">"), Some(b"<tag>" as &[u8]));
        assert_eq!(buf.consume_until_after(pattern::end()), Some(&[] as &[u8]));
    }

    #[test]
    fn advance() {
        // Advancing in an empty buffer does nothing
        let mut buf = Buf::from("");
        assert!(buf.skip(pattern::end()));
        buf.advance(10);
        assert!(buf.skip(pattern::end()));
        assert_eq!(buf.position(), 0);

        // Advancing by 0 does not move the cursor
        let mut buf = Buf::from("abc");
        assert_eq!(buf.position(), 0);

        // Advancing by a number less than the length of the buffer works as expected
        buf.advance(2);
        assert!(buf.matches("c"));
    }
}