punk_parser 0.0.0

Parser for Punk language.
Documentation
// Copyright (C) 2020 OTLab
//
// This file is part of Punklang.
//
// Punklang is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 2 of the License, or
// (at your option) any later version.
//
// Punklang is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Punklang.  If not, see <http://www.gnu.org/licenses/>.

pub use crate::token::Tok;
use std::str::CharIndices;
use std::iter::Peekable;
use std::collections::HashMap;
use crate::location::Location;

#[derive(Debug)]
pub enum LexicalError {
    StringLiteralError,
}

pub type Spanned = (Location, Tok, Location);
pub type LexResult<Error> = Result<Spanned, Error>;

lazy_static! {
    static ref KEYWORDS: HashMap<&'static str, Tok> = {
        let mut m = HashMap::new();
        m.insert("def", Tok::Def);
        m
    };
}

pub struct Lexer<'input> {
    chars: Peekable<CharIndices<'input>>,
}

impl<'input> Lexer<'input> {
    pub fn new(input: &'input str) -> Self {
        Lexer { chars: input.char_indices().peekable() }
    }
    pub fn skip_char(&mut self) {
        self.chars.next();
    }
    pub fn consume_int(&mut self, mut buf: String) -> String {
        loop {
            match self.chars.peek() {
                Some((_, c)) =>
                    if c.is_digit(10) {
                        buf.push(*c);
                        self.skip_char();
                    } else { return buf; },
                None => return buf,
            }
        }
    }
    pub fn consume_escape(&mut self, buf: &mut String) -> bool {
        let ch: char;
        match self.chars.next() {
            Some((_, '"')) => ch = '"',
            Some((_, '\\')) => ch = '\\',
            _ => return false,
        }
        buf.push(ch);
        true
    }
    pub fn consume_str(&mut self, mut buf: String) -> Option<(String, usize)> {
        loop {
            match self.chars.next() {
                Some((_, '\\')) =>
                    if !self.consume_escape(&mut buf) {
                        return None;
                    },
                Some((i, '"')) => return Some((buf, i + 1)),
                Some((_, c)) => { println!("pushed {}", buf); buf.push(c)},
                None => return None,
            }
        }
    }
    pub fn consume_ident(&mut self, mut buf: String) -> String {
        loop {
            match self.chars.peek() {
                Some((_, c)) => {
                    if c.is_alphabetic() || c.is_digit(10) || *c == '_' {
                        buf.push(*c);
                        self.skip_char();
                    } else {
                        return buf
                    }
                }
                None => return buf
            }
        }
    }
    pub fn look_ahead(&mut self, i: usize, head: char) -> Option<LexResult<LexicalError>> {
        let mut buf = String::new();
        if head.is_digit(10) {
            buf.push(head);
            buf = self.consume_int(buf);
            let len = buf.len();
            return Some(Ok((i, Tok::Int(buf), i + len)))
        } else if head == '"' {
            match self.consume_str(buf) {
                Some((buf, end)) =>
                    return Some(Ok((i, Tok::Str(buf), end))),
                None => return Some(Err(LexicalError::StringLiteralError)),
            }
        } else if head.is_alphabetic() || head == '_' {
            buf.push(head);
            let buf = self.consume_ident(buf);
            let len = buf.len();
            if let Some(t) = KEYWORDS.get(buf.as_str()) {
                return Some(Ok((i, t.clone(), i + len)));
            } else {
                return Some(Ok((i, Tok::Ident(buf), i + len)));
            }
        }
        None
    }
}

impl<'input> Iterator for Lexer<'input> {
    type Item = LexResult<LexicalError>;

    fn next(&mut self) -> Option<Self::Item> {
        loop {
            match self.chars.next() {
                Some((_, ' ')) |
                Some((_, '\t')) |
                Some((_, '\n')) |
                Some((_, '\r')) => continue,
                Some((i, '(')) => return Some(Ok((i, Tok::LeftParen, i + 1))),
                Some((i, ')')) => return Some(Ok((i, Tok::RightParen, i + 1))),
                Some((i, ':')) => return Some(Ok((i, Tok::Colon, i + 1))),
                Some((i, ';')) => return Some(Ok((i, Tok::Semi, i + 1))),
                Some((i, ',')) => return Some(Ok((i, Tok::Comma, i + 1))),
                Some((i, '{')) => return Some(Ok((i, Tok::LeftBracket, i + 1))),
                Some((i, '}')) => return Some(Ok((i, Tok::RightBracket, i + 1))),
                Some((i, '+')) => return Some(Ok((i, Tok::Plus, i + 1))),
                Some((i, '-')) => return Some(Ok((i, Tok::Minus, i + 1))),
                Some((i, c)) => return self.look_ahead(i, c),
                None => return None,
            }
        }
    }
}