taskforge 0.2.0

// Copyright 2018 Mathew Robinson <chasinglogic@gmail.com>. All rights reserved.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program.  If not, see <http://www.gnu.org/licenses/>.

use super::token::Token;
use std::iter::Peekable;
use std::str::Chars;

// lexes input into an Peekable iterator of Tokens.
pub struct Lexer<'a> {
    content: Peekable<Chars<'a>>,
    current_char: Option<char>,
}

impl<'a> From<&'a str> for Lexer<'a> {
    fn from(input: &'a str) -> Lexer {
        Lexer {
            content: input.chars().peekable(),
            current_char: None,
        }
    }
}

impl<'a> Lexer<'a> {
    // reads until the closure end returns false
    fn read_until<T>(&mut self, end: T) -> String
    where
        T: Fn(char) -> bool,
    {
        // Safe to unwrap here since we already validated that current_char is not None as part of
        // next
        let mut s = self.current_char.unwrap().to_string();
        // Check for early end on one character tokens
        if let Some(next_char) = self.content.peek() {
            if end(*next_char) {
                return s;
            }
        }

        while let Some(c) = self.content.next() {
            s.push(c);

            // We have to check the peek character since we don't want to consume it
            if let Some(next_char) = self.content.peek() {
                if end(*next_char) {
                    break;
                }
            }
        }

        s
    }

    // read a quoted string using quote as the delimiter. strips the
    // quotes from the front of the string and end returning only the
    // value between as a `String`
    fn quoted_string(&mut self, quote: char) -> String {
        let mut s = self.read_until(|c| c == quote);
        s.remove(0);
        // Advance past the ending quote char
        self.next_char();
        s
    }

    // read until the first alphabetic, whitespace, or ) character is
    // found. Excepting tokens which still make valid numbers such as
    // - or .
    fn number(&mut self) -> String {
        self.read_until(|c| {
            (c.is_alphabetic() && c != '.' && c != '-') || c.is_whitespace() || c == ')'
        })
    }

    // read until a (, ), or whitespace is found
    fn unquoted_string(&mut self) -> String {
        self.read_until(|c| c.is_whitespace() || c == ')' || c == '(')
    }

    // advance the cursor to the next character
    fn next_char(&mut self) -> Option<char> {
        self.current_char = self.content.next();
        self.current_char
    }

    // determine the token type using the first character c
    fn token_from_char(&mut self, c: char) -> Token {
        match c {
            '!' => match self.content.peek() {
                Some('~') | Some('=') => {
                    let mut s = c.to_string();
                    // Advance past the = sign.
                    // Safe to unwrap since the peek already showed us it was Some
                    s.push(self.next_char().unwrap());
                    Token::from(s.as_ref())
                }
                _ => Token::Invalid("! must be followed by = or ~".to_string()),
            },
            '^' => match self.content.peek() {
                Some('=') | Some('^') => {
                    let mut s = c.to_string();
                    // Advance past the = sign.
                    // Safe to unwrap since the peek already showed us it was Some
                    s.push(self.next_char().unwrap());
                    Token::from(s.as_ref())
                }
                _ => Token::from(c),
            },
            '>' | '<' => if let Some('=') = self.content.peek() {
                let mut s = c.to_string();
                // Advance past the = sign.
                // Safe to unwrap since the peek already showed us it was Some
                s.push(self.next_char().unwrap());
                Token::from(s.as_ref())
            } else {
                Token::from(c)
            },
            '"' => Token::from(self.quoted_string('"')),
            '\'' => Token::from(self.quoted_string('\'')),
            '-' => {
                // Skip the -
                self.next_char();
                // Force to a string token since it begins with -
                Token::Str(self.unquoted_string())
            }
            'a'..='z' | 'A'..='Z' => Token::from(self.unquoted_string()),
            _num if c.is_digit(10) => Token::from(self.number()),
            _ => Token::from(c),
        }
    }
}

impl<'a> Iterator for Lexer<'a> {
    type Item = Token;

    fn next(&mut self) -> Option<Token> {
        while let Some(c) = self.next_char() {
            if c.is_whitespace() {
                continue;
            }

            return Some(self.token_from_char(c));
        }

        None
    }
}

#[cfg(test)]
pub mod tests {
    use super::*;

    macro_rules! lexer_test {
        ($name:ident, $query:expr, $tokens:expr) => {
            #[test]
            fn $name() {
                let tokens: Vec<Token> = Lexer::from($query).collect();
                let expected: Vec<Token> = $tokens;

                for (i, _) in tokens.iter().enumerate() {
                    assert_eq!(tokens[i], expected[i]);
                }

                assert_eq!(tokens.len(), expected.len());
            }
        };
    }

    lexer_test!(
        simple_lex,
        "milk and cookies",
        vec![
            Token::from("milk"),
            Token::from("and"),
            Token::from("cookies"),
        ]
    );

    lexer_test!(
        boolean_lex,
        "completed = false",
        vec![
            Token::from("completed"),
            Token::from("="),
            Token::from("false"),
        ]
    );

    lexer_test!(
        like_comparison,
        "title ~ groceries",
        vec![
            Token::from("title"),
            Token::from("~"),
            Token::from("groceries"),
        ]
    );

    lexer_test!(
        shell_friendly_like_comparison,
        "title ^ groceries",
        vec![
            Token::from("title"),
            Token::from("^"),
            Token::from("groceries"),
        ]
    );

    lexer_test!(
        shell_friendly_not_like_comparison,
        "title ^^ groceries",
        vec![
            Token::from("title"),
            Token::from("^^"),
            Token::from("groceries"),
        ]
    );

    lexer_test!(
        shell_friendly_not_equal_comparison,
        "title ^= groceries",
        vec![
            Token::from("title"),
            Token::from("^="),
            Token::from("groceries"),
        ]
    );

    lexer_test!(
        not_like_comparison,
        "title !~ groceries",
        vec![
            Token::from("title"),
            Token::from("!~"),
            Token::from("groceries"),
        ]
    );

    lexer_test!(
        not_equal_comparison,
        "title != groceries",
        vec![
            Token::from("title"),
            Token::from("!="),
            Token::from("groceries"),
        ]
    );

    lexer_test!(
        single_grouped_expression,
        "(priority > 0)",
        vec![
            Token::from("("),
            Token::from("priority"),
            Token::from(">"),
            Token::from("0"),
            Token::from(")"),
        ]
    );

    lexer_test!(
        keyword_excaped_lex,
        "milk -and cookies",
        vec![
            Token::from("milk"),
            Token::Str("and".to_string()),
            Token::from("cookies"),
        ]
    );

    lexer_test!(complicated_lex, "(priority > 5 and title ^ \"take out the trash\") or (context = \"work\" and (priority >= 2 or (\"my little pony\")))", vec![
        Token::from("("),
        Token::from("priority"),
        Token::from(">"),
        Token::from("5"),
        Token::from("and"),
        Token::from("title"),
        Token::from("^"),
        Token::from("take out the trash"),
        Token::from(")"),
        Token::from("or"),
        Token::from("("),
        Token::from("context"),
        Token::from("="),
        Token::from("work"),
        Token::from("and"),
        Token::from("("),
        Token::from("priority"),
        Token::from(">="),
        Token::from("2"),
        Token::from("or"),
        Token::from("("),
        Token::from("my little pony"),
        Token::from(")"),
        Token::from(")"),
        Token::from(")"),
    ]);
}