zen-parser 0.2.1

Zen expression language parser
Documentation
use std::cell::RefCell;
use std::rc::Rc;

use crate::is_token_type;
use crate::lexer::cursor::{Cursor, CursorItem};
use crate::lexer::error::LexerError;
use crate::lexer::error::LexerError::{UnexpectedEof, UnmatchedSymbol};
use crate::lexer::token::{Token, TokenKind};

type TokenSlice<'a> = Rc<RefCell<Vec<Token<'a>>>>;

type VoidResult = Result<(), LexerError>;

#[derive(Debug)]
pub struct Lexer<'a> {
    tokens: TokenSlice<'a>,
}

impl<'a> Default for Lexer<'a> {
    fn default() -> Self {
        Lexer::new()
    }
}

impl<'a> Lexer<'a> {
    pub fn new() -> Self {
        Self {
            tokens: Rc::new(RefCell::new(Vec::new())),
        }
    }

    pub fn tokenize(&self, source: &'a str) -> Result<TokenSlice<'a>, LexerError> {
        self.tokens.borrow_mut().clear();
        Scanner::new(source, self.tokens.clone()).scan()?;
        Ok(self.tokens.clone())
    }
}

struct Scanner<'a> {
    cursor: Cursor<'a>,
    tokens: TokenSlice<'a>,
    source: &'a str,
}

impl<'a> Scanner<'a> {
    pub fn new(source: &'a str, tokens: TokenSlice<'a>) -> Self {
        Self {
            cursor: Cursor::from(source),
            source,
            tokens,
        }
    }

    pub fn scan(&self) -> VoidResult {
        while let Some((i, s)) = self.cursor.peek() {
            match s {
                ' ' => {
                    self.cursor.next();
                    Ok(())
                }
                _ if is_token_type!(s, "quote") => self.string(),
                _ if is_token_type!(s, "digit") => self.number(),
                _ if is_token_type!(s, "bracket") => self.bracket(),
                _ if is_token_type!(s, "cmp_operator") => self.operator(),
                _ if is_token_type!(s, "operator") => self.simple_operator(),
                '.' => self.dot(),
                _ if is_token_type!(s, "alpha") => self.identifier(),

                _ => Err(UnmatchedSymbol {
                    symbol: s,
                    position: i,
                }),
            }?;
        }

        Ok(())
    }

    fn next(&self) -> Result<CursorItem, LexerError> {
        self.cursor.next().ok_or_else(|| {
            let (a, b) = self.cursor.peek_back().unwrap_or((0, ' '));

            UnexpectedEof {
                symbol: b,
                position: a,
            }
        })
    }

    fn push(&self, token: Token<'a>) {
        self.tokens.borrow_mut().push(token);
    }

    fn string(&self) -> VoidResult {
        let (start, opener) = self.next()?;
        let end: usize;

        loop {
            let (e, c) = self.next()?;
            if c == opener {
                end = e;
                break;
            }
        }

        self.push(Token {
            kind: TokenKind::String,
            span: (start, end),
            value: &self.source[start + 1..end],
        });

        Ok(())
    }

    fn number(&self) -> VoidResult {
        let (start, _) = self.next()?;
        let mut end = start;
        let mut fractal = false;

        while let Some((e, c)) = self
            .cursor
            .next_if(|c| is_token_type!(c, "digit") || c == '_' || c == '.')
        {
            if fractal && c == '.' {
                self.cursor.back();
                break;
            }

            if c == '.' {
                if let Some((_, p)) = self.cursor.peek() {
                    if p == '.' {
                        self.cursor.back();
                        break;
                    }

                    fractal = true
                }
            }

            end = e;
        }

        self.push(Token {
            kind: TokenKind::Number,
            span: (start, end + 1),
            value: &self.source[start..=end],
        });

        Ok(())
    }

    fn bracket(&self) -> VoidResult {
        let (start, _) = self.next()?;

        self.push(Token {
            kind: TokenKind::Bracket,
            span: (start, start + 1),
            value: &self.source[start..=start],
        });

        Ok(())
    }

    fn dot(&self) -> VoidResult {
        let (start, _) = self.next()?;
        let mut end = start;

        if self.cursor.next_if(|c| c == '.').is_some() {
            end += 1;
        }

        self.push(Token {
            kind: TokenKind::Operator,
            span: (start, end + 1),
            value: &self.source[start..=end],
        });

        Ok(())
    }

    fn operator(&self) -> VoidResult {
        let (start, _) = self.next()?;
        let mut end = start;

        if self.cursor.next_if(|c| c == '=').is_some() {
            end += 1;
        }

        self.push(Token {
            kind: TokenKind::Operator,
            span: (start, end + 1),
            value: &self.source[start..=end],
        });

        Ok(())
    }

    fn simple_operator(&self) -> VoidResult {
        let (start, _) = self.next()?;

        self.push(Token {
            kind: TokenKind::Operator,
            span: (start, start + 1),
            value: &self.source[start..=start],
        });

        Ok(())
    }

    fn not(&self, start: usize) -> VoidResult {
        if self.cursor.next_if_is(" in ") {
            let end = self.cursor.position();

            self.push(Token {
                kind: TokenKind::Operator,
                span: (start, end - 1),
                value: "not in",
            })
        } else {
            let end = self.cursor.position();

            self.push(Token {
                kind: TokenKind::Operator,
                span: (start, end),
                value: "not",
            })
        }

        Ok(())
    }

    fn identifier(&self) -> VoidResult {
        let (start, _) = self.next()?;
        let mut end = start;

        while let Some((e, _)) = self.cursor.next_if(|c| is_token_type!(c, "alphanumeric")) {
            end = e;
        }

        let value = &self.source[start..=end];
        match value {
            "and" | "or" | "in" => self.push(Token {
                kind: TokenKind::Operator,
                span: (start, end + 1),
                value,
            }),
            "not" => self.not(start)?,
            _ => self.push(Token {
                kind: TokenKind::Identifier,
                span: (start, end + 1),
                value,
            }),
        }

        Ok(())
    }
}