pest 1.0.2

The Elegant Parser
Documentation
// pest. The Elegant Parser
// Copyright (c) 2018 Dragoș Tiselice
//
// Licensed under the Apache License, Version 2.0
// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. All files in the project carrying such notice may not be copied,
// modified, or distributed except according to those terms.

use std::cmp::Ordering;
use std::fmt;
use std::hash::{Hash, Hasher};
use std::ops::Range;
use std::ptr;

use span;

/// A `struct` containing a position that is tied to a `&str` which provides useful methods to
/// manually parse it. This leads to an API largely based on the standard `Result`.
pub struct Position<'i> {
    input: &'i str,
    pos: usize
}

pub unsafe fn new(input: &str, pos: usize) -> Position {
    Position { input, pos }
}

impl<'i> Position<'i> {
    /// Creates starting `Position` from an `&str`.
    ///
    /// # Examples
    ///
    /// ```
    /// # use pest::Position;
    ///
    /// Position::from_start("");
    /// ```
    #[inline]
    pub fn from_start(input: &'i str) -> Position<'i> {
        // Position 0 is always safe because it's always a valid UTF-8 border.
        unsafe { new(input, 0) }
    }

    /// Returns the current byte position as a `usize`.
    ///
    /// # Examples
    ///
    /// ```
    /// # use pest::Position;
    /// let input = "ab";
    /// let start = Position::from_start(input);
    ///
    /// assert_eq!(start.pos(), 0);
    /// assert_eq!(start.match_string("ab").unwrap().pos(), 2);
    /// ```
    #[inline]
    pub fn pos(&self) -> usize {
        self.pos
    }

    /// Creates a `Span` from two `Position`s.
    ///
    /// # Panics
    ///
    /// Panics when the positions come from different inputs.
    ///
    /// # Examples
    ///
    /// ```
    /// # use pest::Position;
    /// let input = "ab";
    /// let start = Position::from_start(input);
    /// let end = start.clone().match_string("ab").unwrap();
    /// let span = start.span(&end);
    ///
    /// assert_eq!(span.start(), 0);
    /// assert_eq!(span.end(), 2);
    /// ```
    #[inline]
    pub fn span(&self, other: &Position<'i>) -> span::Span<'i> {
        if ptr::eq(self.input, other.input) {
            span::new(self.input, self.pos, other.pos)
        } else {
            panic!("span created from positions from different inputs")
        }
    }

    /// Returns the line - and column number pair of the current `Position`.
    ///
    /// # Examples
    ///
    /// ```
    /// # use pest::Position;
    /// let input = "\na";
    /// let start = Position::from_start(input);
    /// let pos = start.match_string("\na").unwrap();
    ///
    /// assert_eq!(pos.line_col(), (2, 2));
    /// ```
    #[inline]
    pub fn line_col(&self) -> (usize, usize) {
        if self.pos > self.input.len() {
            panic!("position out of bounds");
        }

        let mut pos = self.pos;
        let slice = &self.input[..pos];
        let mut chars = slice.chars().peekable();

        let mut line_col = (1, 1);

        while pos != 0 {
            match chars.next() {
                Some('\r') => {
                    if let Some(&'\n') = chars.peek() {
                        chars.next();

                        if pos == 1 {
                            pos -= 1;
                        } else {
                            pos -= 2;
                        }

                        line_col = (line_col.0 + 1, 1);
                    } else {
                        pos -= 1;
                        line_col = (line_col.0, line_col.1 + 1);
                    }
                }
                Some('\n') => {
                    pos -= 1;
                    line_col = (line_col.0 + 1, 1);
                }
                Some(c) => {
                    pos -= c.len_utf8();
                    line_col = (line_col.0, line_col.1 + 1);
                }
                None => unreachable!()
            }
        }

        line_col
    }

    /// Returns the actual line of the current `Position`.
    ///
    /// # Examples
    ///
    /// ```
    /// # use pest::Position;
    /// let input = "\na";
    /// let start = Position::from_start(input);
    /// let pos = start.match_string("\na").unwrap();
    ///
    /// assert_eq!(pos.line_of(), "a");
    /// ```
    #[inline]
    pub fn line_of(&self) -> &str {
        if self.pos > self.input.len() {
            panic!("position out of bounds");
        }

        unsafe {
            let start = if self.pos == 0 {
                0
            } else {
                let start = self.input
                    .char_indices()
                    .rev()
                    .skip_while(|&(i, _)| i >= self.pos)
                    .find(|&(_, c)| c == '\n');
                match start {
                    Some((i, _)) => i + 1,
                    None => 0
                }
            };

            let end = if self.input.is_empty() {
                0
            } else if self.pos == self.input.len() - 1 {
                let mut end = self.input.len();

                if end > 0 && self.input.slice_unchecked(end - 1, end) == "\n" {
                    end -= 1;
                }
                if end > 0 && self.input.slice_unchecked(end - 1, end) == "\r" {
                    end -= 1;
                }

                end
            } else {
                let end = self.input
                    .char_indices()
                    .skip_while(|&(i, _)| i < self.pos)
                    .find(|&(_, c)| c == '\n');
                let mut end = match end {
                    Some((i, _)) => i,
                    None => self.input.len()
                };

                if end > 0 && self.input.slice_unchecked(end - 1, end) == "\r" {
                    end -= 1;
                }

                end
            };

            self.input.slice_unchecked(start, end)
        }
    }

    /// Returns `Ok` with the current `Position` if it is at the start of its `&str` or `Err` of
    /// the same `Position` otherwise.
    ///
    /// # Examples
    ///
    /// ```
    /// # use pest::Position;
    /// let input = "ab";
    /// let start = Position::from_start(input);
    /// let end = start.clone().match_string("ab").unwrap();
    ///
    /// assert_eq!(start.clone().at_start(), Ok(start));
    /// assert_eq!(end.clone().at_start(), Err(end));
    /// ```
    #[inline]
    pub fn at_start(self) -> Result<Position<'i>, Position<'i>> {
        if self.pos == 0 {
            Ok(self)
        } else {
            Err(self)
        }
    }

    /// Returns `Ok` with the current `Position` if it is at the end of its `&str` or `Err` of the
    /// same `Position` otherwise.
    ///
    /// # Examples
    ///
    /// ```
    /// # use pest::Position;
    /// let input = "ab";
    /// let start = Position::from_start(input);
    /// let end = start.clone().match_string("ab").unwrap();
    ///
    /// assert_eq!(start.clone().at_end(), Err(start));
    /// assert_eq!(end.clone().at_end(), Ok(end));
    /// ```
    #[inline]
    pub fn at_end(self) -> Result<Position<'i>, Position<'i>> {
        if self.pos == self.input.len() {
            Ok(self)
        } else {
            Err(self)
        }
    }

    /// Skips `n` `char`s from the `Position` and returns `Ok` with the new `Position` if the skip
    /// was possible or `Err` with the current `Position` otherwise.
    ///
    /// # Examples
    ///
    /// ```
    /// # use pest::Position;
    /// let input = "ab";
    /// let start = Position::from_start(input);
    ///
    /// assert_eq!(start.clone().skip(2).unwrap().pos(), 2);
    /// assert_eq!(start.clone().skip(3), Err(start));
    /// ```
    #[inline]
    pub fn skip(mut self, n: usize) -> Result<Position<'i>, Position<'i>> {
        let skipped = unsafe {
            let mut len = 0;
            let mut chars = self.input
                .slice_unchecked(self.pos, self.input.len())
                .chars();

            for _ in 0..n {
                if let Some(c) = chars.next() {
                    len += c.len_utf8();
                } else {
                    return Err(self);
                }
            }

            len
        };

        self.pos += skipped;
        Ok(self)
    }

    /// Matches `string` from the `Position` and returns `Ok` with the new `Position` if a match was
    /// made or `Err` with the current `Position` otherwise.
    ///
    /// # Examples
    ///
    /// ```
    /// # use pest::Position;
    /// let input = "ab";
    /// let start = Position::from_start(input);
    ///
    /// assert_eq!(start.clone().match_string("ab").unwrap().pos(), 2);
    /// assert_eq!(start.clone().match_string("ac"), Err(start));
    /// ```
    #[inline]
    pub fn match_string(mut self, string: &'i str) -> Result<Position<'i>, Position<'i>> {
        // Matching is safe since, even if the string does not fall on UTF-8 borders, that
        // particular slice is only used for comparison which will be handled correctly.
        let matched = unsafe {
            let to = self.pos + string.len();

            if to <= self.input.len() {
                let slice = self.input.slice_unchecked(self.pos, to);
                slice == string
            } else {
                false
            }
        };

        if matched {
            self.pos += string.len();
            Ok(self)
        } else {
            Err(self)
        }
    }

    /// Case-insensitively matches `string` from the `Position` and returns `Ok` with the new
    /// `Position` if a match was made or `Err` with the current `Position` otherwise.
    ///
    /// # Examples
    ///
    /// ```
    /// # use pest::Position;
    /// let input = "ab";
    /// let start = Position::from_start(input);
    ///
    /// assert_eq!(start.clone().match_insensitive("AB").unwrap().pos(), 2);
    /// assert_eq!(start.clone().match_insensitive("AC"), Err(start));
    /// ```
    #[inline]
    pub fn match_insensitive(mut self, string: &'i str) -> Result<Position<'i>, Position<'i>> {
        // Matching is safe since, even if the string does not fall on UTF-8 borders, that
        // particular slice is only used for comparison which will be handled correctly.

        let matched = unsafe {
            let slice = self.input.slice_unchecked(self.pos, self.input.len());

            if slice.is_char_boundary(string.len()) {
                let slice = slice.slice_unchecked(0, string.len());
                slice.eq_ignore_ascii_case(string)
            } else {
                false
            }
        };

        if matched {
            self.pos += string.len();
            Ok(self)
        } else {
            Err(self)
        }
    }

    /// Matches `char` `range` from the `Position` and returns `Ok` with the new `Position` if a
    /// match was made or `Err` with the current `Position` otherwise.
    ///
    /// # Examples
    ///
    /// ```
    /// # use pest::Position;
    /// let input = "ab";
    /// let start = Position::from_start(input);
    ///
    /// assert_eq!(start.clone().match_range('a'..'z').unwrap().pos(), 1);
    /// assert_eq!(start.clone().match_range('A'..'Z'), Err(start));
    /// ```
    #[inline]
    pub fn match_range(mut self, range: Range<char>) -> Result<Position<'i>, Position<'i>> {
        // Cannot actually cause undefined behavior.
        let len = unsafe {
            let slice = self.input.slice_unchecked(self.pos, self.input.len());

            if let Some(char) = slice.chars().next() {
                if range.start <= char && char <= range.end {
                    Some(char.len_utf8())
                } else {
                    None
                }
            } else {
                None
            }
        };

        match len {
            Some(len) => {
                self.pos += len;
                Ok(self)
            }
            None => Err(self)
        }
    }

    /// Starts a sequence of transformations provided by `f` from the `Position`. It returns the
    /// same `Result` returned by `f` in the case of an `Ok` or `Err` with the current `Position`
    /// otherwise.
    ///
    /// This method is useful to parse sequences that only match together which usually come in the
    /// form of chained `Result`s with
    /// [`Result::and_then`](https://doc.rust-lang.org/std/result/enum.Result.html#method.and_then).
    /// Such chains should always be wrapped up in
    /// [`ParserState::sequence`](../struct.ParserState.html#method.sequence) if they can create
    /// `Token`s before being wrapped in `Position::sequence`.
    ///
    /// # Examples
    ///
    /// ```
    /// # use pest::Position;
    /// let input = "ab";
    /// let start = Position::from_start(input);
    ///
    /// assert_eq!(
    ///     start.clone().sequence(|p| {
    ///         p.match_string("a").and_then(|p| {
    ///             p.match_string("b")
    ///         })
    ///     }).unwrap().pos(),
    ///     2
    /// );
    /// assert_eq!(
    ///     start.clone().sequence(|p| {
    ///         p.match_string("a").and_then(|p| {
    ///             p.match_string("c")
    ///         })
    ///     }),
    ///     Err(start)
    /// );
    /// ```
    #[inline]
    pub fn sequence<F>(self, f: F) -> Result<Position<'i>, Position<'i>>
    where
        F: FnOnce(Position<'i>) -> Result<Position<'i>, Position<'i>>
    {
        let initial_pos = self.pos;
        let result = f(self);

        match result {
            Ok(pos) => Ok(pos),
            Err(mut pos) => {
                pos.pos = initial_pos;
                Err(pos)
            }
        }
    }

    /// Starts a lookahead transformation provided by `f` from the `Position`. It returns `Ok` with
    /// the current position if `f` also returns an `Ok ` or `Err` with the current `Position`
    /// otherwise.
    ///
    /// If `is_positive` is `false`, it swaps the `Ok` and `Err` together, negating the `Result`. It
    /// should always be wrapped up in
    /// [`ParserState::lookahead`](../struct.ParserState.html#method.lookahead) if it can create
    /// `Token`s before being wrapped in `Position::lookahead`.
    ///
    /// # Examples
    ///
    /// ```
    /// # use pest::Position;
    /// let input = "ab";
    /// let start = Position::from_start(input);
    ///
    /// assert_eq!(
    ///     start.clone().lookahead(true, |p| {
    ///         p.match_string("ab")
    ///     }),
    ///     Ok(start.clone())
    /// );
    /// assert_eq!(
    ///     start.clone().lookahead(true, |p| {
    ///         p.match_string("ac")
    ///     }),
    ///     Err(start.clone())
    /// );
    /// assert_eq!(
    ///     start.clone().lookahead(false, |p| {
    ///         p.match_string("ac")
    ///     }),
    ///     Ok(start)
    /// );
    /// ```
    #[inline]
    pub fn lookahead<F>(self, is_positive: bool, f: F) -> Result<Position<'i>, Position<'i>>
    where
        F: FnOnce(Position<'i>) -> Result<Position<'i>, Position<'i>>
    {
        let initial_pos = self.pos;
        let result = f(self);

        let result = match result {
            Ok(mut pos) => {
                pos.pos = initial_pos;
                Ok(pos)
            }
            Err(mut pos) => {
                pos.pos = initial_pos;
                Err(pos)
            }
        };

        if is_positive {
            result
        } else {
            match result {
                Ok(pos) => Err(pos),
                Err(pos) => Ok(pos)
            }
        }
    }

    /// Optionally applies the transformation provided by `f` from the `Position`. It returns `Ok`
    /// with the `Position` returned by `f` regardless of the `Result`.
    ///
    /// # Examples
    ///
    /// ```
    /// # use pest::Position;
    /// let input = "ab";
    /// let start = Position::from_start(input);
    ///
    /// assert_eq!(
    ///     start.clone().optional(|p| {
    ///         p.match_string("a").and_then(|p| {
    ///             p.match_string("b")
    ///         })
    ///     }).unwrap().pos(),
    ///     2
    /// );
    /// assert_eq!(
    ///     start.clone().sequence(|p| {
    ///         p.match_string("a").and_then(|p| {
    ///             p.match_string("c")
    ///         })
    ///     }),
    ///     Err(start)
    /// );
    /// ```
    #[inline]
    pub fn optional<F>(self, f: F) -> Result<Position<'i>, Position<'i>>
    where
        F: FnOnce(Position<'i>) -> Result<Position<'i>, Position<'i>>
    {
        let result = f(self);

        match result {
            Ok(pos) | Err(pos) => Ok(pos)
        }
    }

    /// Repeatedly applies the transformation provided by `f` from the `Position`. It returns `Ok`
    /// with the first `Position` returned by `f` which is wrapped up in an `Err`.
    ///
    /// # Examples
    ///
    /// ```
    /// # use pest::Position;
    /// let input = "ab";
    /// let start = Position::from_start(input);
    ///
    /// assert_eq!(
    ///     start.clone().repeat(|p| {
    ///         p.match_string("a")
    ///     }).unwrap().pos(),
    ///     1
    /// );
    /// assert_eq!(
    ///     start.repeat(|p| {
    ///         p.match_string("b")
    ///     }).unwrap().pos(),
    ///     0
    /// );
    /// ```
    #[inline]
    pub fn repeat<F>(self, mut f: F) -> Result<Position<'i>, Position<'i>>
    where
        F: FnMut(Position<'i>) -> Result<Position<'i>, Position<'i>>
    {
        let mut result = f(self);

        loop {
            match result {
                Ok(pos) => result = f(pos),
                Err(pos) => return Ok(pos)
            };
        }
    }
}

impl<'i> fmt::Debug for Position<'i> {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        write!(f, "Position {{ pos: {} }}", self.pos)
    }
}

impl<'i> Clone for Position<'i> {
    fn clone(&self) -> Position<'i> {
        // Cloning a safe position is safe.
        unsafe { new(self.input, self.pos) }
    }
}

impl<'i> PartialEq for Position<'i> {
    fn eq(&self, other: &Position<'i>) -> bool {
        ptr::eq(self.input, other.input) && self.pos == other.pos
    }
}

impl<'i> Eq for Position<'i> {}

impl<'i> PartialOrd for Position<'i> {
    fn partial_cmp(&self, other: &Position<'i>) -> Option<Ordering> {
        if ptr::eq(self.input, other.input) {
            self.pos.partial_cmp(&other.pos)
        } else {
            None
        }
    }
}

impl<'i> Ord for Position<'i> {
    fn cmp(&self, other: &Position<'i>) -> Ordering {
        self.partial_cmp(other)
            .expect("cannot compare positions from different strs")
    }
}

impl<'i> Hash for Position<'i> {
    fn hash<H: Hasher>(&self, state: &mut H) {
        (self.input as *const str).hash(state);
        self.pos.hash(state);
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn empty() {
        let input = "";
        assert!(unsafe { new(input, 0) }.match_string("").is_ok());
        assert!(!unsafe { new(input, 0) }.match_string("a").is_ok());
    }

    #[test]
    fn parts() {
        let input = "asdasdf";

        assert!(unsafe { new(input, 0) }.match_string("asd").is_ok());
        assert!(unsafe { new(input, 3) }.match_string("asdf").is_ok());
    }

    #[test]
    fn line_col() {
        let input = "a\rb\nc\r\nd嗨";

        assert_eq!(unsafe { new(input, 0) }.line_col(), (1, 1));
        assert_eq!(unsafe { new(input, 1) }.line_col(), (1, 2));
        assert_eq!(unsafe { new(input, 2) }.line_col(), (1, 3));
        assert_eq!(unsafe { new(input, 3) }.line_col(), (1, 4));
        assert_eq!(unsafe { new(input, 4) }.line_col(), (2, 1));
        assert_eq!(unsafe { new(input, 5) }.line_col(), (2, 2));
        assert_eq!(unsafe { new(input, 6) }.line_col(), (2, 3));
        assert_eq!(unsafe { new(input, 7) }.line_col(), (3, 1));
        assert_eq!(unsafe { new(input, 8) }.line_col(), (3, 2));
        assert_eq!(unsafe { new(input, 11) }.line_col(), (3, 3));
    }

    #[test]
    fn line_of() {
        let input = "a\rb\nc\r\nd嗨";

        assert_eq!(unsafe { new(input, 0) }.line_of(), "a\rb");
        assert_eq!(unsafe { new(input, 1) }.line_of(), "a\rb");
        assert_eq!(unsafe { new(input, 2) }.line_of(), "a\rb");
        assert_eq!(unsafe { new(input, 3) }.line_of(), "a\rb");
        assert_eq!(unsafe { new(input, 4) }.line_of(), "c");
        assert_eq!(unsafe { new(input, 5) }.line_of(), "c");
        assert_eq!(unsafe { new(input, 6) }.line_of(), "c");
        assert_eq!(unsafe { new(input, 7) }.line_of(), "d嗨");
        assert_eq!(unsafe { new(input, 8) }.line_of(), "d嗨");
        assert_eq!(unsafe { new(input, 11) }.line_of(), "d嗨");
    }

    #[test]
    fn line_of_empty() {
        let input = "";

        assert_eq!(unsafe { new(input, 0) }.line_of(), "");
    }

    #[test]
    fn line_of_new_line() {
        let input = "\n";

        assert_eq!(unsafe { new(input, 0) }.line_of(), "");
    }

    #[test]
    fn line_of_between_new_line() {
        let input = "\n\n";

        assert_eq!(unsafe { new(input, 1) }.line_of(), "");
    }

    fn measure_skip<'i>(input: &'i str, pos: usize, n: usize) -> Option<usize> {
        let p = unsafe { new(input, pos) };
        if let Ok(p) = p.skip(n) {
            Some(p.pos - pos)
        } else {
            None
        }
    }

    #[test]
    fn skip_empty() {
        let input = "";

        assert_eq!(measure_skip(input, 0, 0), Some(0));
        assert_eq!(measure_skip(input, 0, 1), None);
    }

    #[test]
    fn skip() {
        let input = "d嗨";

        assert_eq!(measure_skip(input, 0, 0), Some(0));
        assert_eq!(measure_skip(input, 0, 1), Some(1));
        assert_eq!(measure_skip(input, 1, 1), Some(3));
    }

    #[test]
    fn match_range() {
        let input = "b";

        assert!(unsafe { new(input, 0) }.match_range('a'..'c').is_ok());
        assert!(unsafe { new(input, 0) }.match_range('b'..'b').is_ok());
        assert!(!unsafe { new(input, 0) }.match_range('a'..'a').is_ok());
        assert!(!unsafe { new(input, 0) }.match_range('c'..'c').is_ok());
        assert!(unsafe { new(input, 0) }.match_range('a'..'').is_ok());
    }

    #[test]
    fn match_insensitive() {
        let input = "AsdASdF";

        assert!(unsafe { new(input, 0) }.match_insensitive("asd").is_ok());
        assert!(unsafe { new(input, 3) }.match_insensitive("asdf").is_ok());
    }
}