1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
use regex::Regex;
use std::fmt;
pub struct LexError {
pub pos: usize,
}
impl fmt::Display for LexError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "Lex error at position: {}", self.pos)
}
}
pub struct Lexer<'input> {
pub buf: &'input str,
pub pos: usize,
}
impl<'input> Lexer<'input> {
pub fn new(input: &'input str) -> Self {
Lexer { buf: input, pos: 0 }
}
}
impl<'input> Lexer<'input> {
pub fn consume_whitespace(&mut self) -> Option<()> {
loop {
if self.buf.chars().next()?.is_whitespace() {
self.pos += 1;
self.buf = &self.buf[1..]
} else {
break Some(());
}
}
}
pub fn consume_regex(&mut self, r: &Regex) -> Option<(usize, &'input str, usize)> {
match r.find(&self.buf) {
None => None,
Some(mat) => {
let start_pos = self.pos;
self.pos += mat.end();
self.buf = &self.buf[mat.end()..];
Some((start_pos, mat.as_str(), self.pos))
}
}
}
pub fn consume_string_literal(&mut self) -> Option<(usize, &'input str, usize)> {
if self.buf.chars().next()? == '\"' {
let mut string_end = 1;
loop {
if let '\"' = self.buf.chars().nth(string_end)? {
let contents = &self.buf[1..string_end];
let start_pos = self.pos;
self.pos += string_end + 1;
self.buf = &self.buf[(string_end + 1)..];
break Some((start_pos, &contents, self.pos));
}
string_end += 1
}
} else {
None
}
}
}
lazy_static! {
pub static ref ID_REGEX: Regex = Regex::new(r"^[a-zA-Z_][0-9a-zA-Z_]*").unwrap();
pub static ref HEX_REGEX: Regex = Regex::new(r"^[#0]x[0-9a-fA-F]+").unwrap();
pub static ref BIN_REGEX: Regex = Regex::new(r"^[#0]b[0-1]+").unwrap();
pub static ref NAT_REGEX: Regex = Regex::new(r"^[0-9]+").unwrap();
}