1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
#![deny(missing_docs,
missing_debug_implementations, missing_copy_implementations,
trivial_casts, trivial_numeric_casts,
unsafe_code, unstable_features,
unused_import_braces, unused_qualifications)]
use regex::Regex;
pub type Rules<T> = Vec<Rule<T>>;
pub struct Rule<T> {
pub r: Regex,
pub f: Box<dyn Fn(&str, usize, usize) -> Option<T>>,
}
use std::fmt;
impl<T> fmt::Debug for Rule<T> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "Rule({:?})", self.r)
}
}
#[macro_export]
macro_rules! rules {
($t:ty; $($e:expr => $f:expr,)+) => { rules!($t; $($e => $f),+) };
($t:ty; $($e:expr => $f:expr),*) => {{
extern crate regex;
use regex::Regex;
use $crate::Rule;
let mut s: Vec<Rule<$t>> = Vec::new();
$(
s.push(Rule {
r: Regex::new($e).unwrap(),
f: Box::new($f)
});
)*
s
}};
}
pub fn lex<T>(content: &str, rules: Rules<T>) -> Result<Vec<T>, String> {
let mut pos: usize = 0;
let mut line: usize = 1;
let mut character: usize = 1;
let mut ts: Vec<T> = Vec::new();
while let Some(c) = content.get(pos..) {
if c.is_empty() { break; }
let mut changed = false;
for rule in &rules {
if let Some(m) = rule.r.find(c) {
if m.start() != 0 { continue; }
let mut s = m.as_str();
if let Some(t) = (rule.f)(s, line, character) {
ts.push(t);
}
character += s.len();
while let Some(i) = s.find("\n") {
line += 1;
s = s.get(i + 1..).unwrap();
character = s.len() + 1;
}
pos += m.end();
changed = true;
break;
}
}
if !changed {
return Err(format!("No match for content: {:?}", c));
}
}
Ok(ts)
}