isla_lib/
lexer.rs

1// BSD 2-Clause License
2//
3// Copyright (c) 2020 Alasdair Armstrong
4//
5// All rights reserved.
6//
7// Redistribution and use in source and binary forms, with or without
8// modification, are permitted provided that the following conditions are
9// met:
10//
11// 1. Redistributions of source code must retain the above copyright
12// notice, this list of conditions and the following disclaimer.
13//
14// 2. Redistributions in binary form must reproduce the above copyright
15// notice, this list of conditions and the following disclaimer in the
16// documentation and/or other materials provided with the distribution.
17//
18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30use regex::Regex;
31use std::fmt;
32
33pub struct LexError {
34    pub pos: usize,
35}
36
37impl fmt::Display for LexError {
38    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
39        write!(f, "Lexical error at position: {}", self.pos)
40    }
41}
42
43pub struct Lexer<'input> {
44    pub buf: &'input str,
45    pub pos: usize,
46}
47
48impl<'input> Lexer<'input> {
49    pub fn new(input: &'input str) -> Self {
50        Lexer { buf: input, pos: 0 }
51    }
52}
53
54impl<'input> Lexer<'input> {
55    pub fn consume_whitespace(&mut self) -> Option<()> {
56        loop {
57            if self.buf.chars().next()?.is_whitespace() {
58                self.pos += 1;
59                self.buf = &self.buf[1..]
60            } else {
61                break Some(());
62            }
63        }
64    }
65
66    pub fn consume_regex(&mut self, r: &Regex) -> Option<(usize, &'input str, usize)> {
67        match r.find(&self.buf) {
68            None => None,
69            Some(mat) => {
70                let start_pos = self.pos;
71                self.pos += mat.end();
72                self.buf = &self.buf[mat.end()..];
73                Some((start_pos, mat.as_str(), self.pos))
74            }
75        }
76    }
77
78    pub fn consume_string_literal(&mut self) -> Option<(usize, &'input str, usize)> {
79        if self.buf.chars().next()? == '\"' {
80            let mut string_end = 1;
81            loop {
82                if let '\"' = self.buf.chars().nth(string_end)? {
83                    let contents = &self.buf[1..string_end];
84                    let start_pos = self.pos;
85                    self.pos += string_end + 1;
86                    self.buf = &self.buf[(string_end + 1)..];
87                    break Some((start_pos, &contents, self.pos));
88                }
89                string_end += 1
90            }
91        } else {
92            None
93        }
94    }
95}
96
97lazy_static! {
98    pub static ref ID_REGEX: Regex = Regex::new(r"^[a-zA-Z_][0-9a-zA-Z_]*").unwrap();
99    pub static ref HEX_REGEX: Regex = Regex::new(r"^[#0]x[0-9a-fA-F]+").unwrap();
100    pub static ref BIN_REGEX: Regex = Regex::new(r"^[#0]b[0-1]+").unwrap();
101    pub static ref NAT_REGEX: Regex = Regex::new(r"^[0-9]+").unwrap();
102}