1use syntax::tokens::{
2 TokenType,
3 Token,
4 BinOp,
5};
6
7pub struct Lexer {
12 tokens: Vec<Token>,
13 lines: u32,
14 start: usize,
15 pos: usize,
16 top: usize,
17}
18
19impl Lexer {
20 pub fn new() -> Lexer {
21 Lexer {
22 tokens: Vec::new(),
23 lines: 0,
24 start: 0,
25 pos: 0,
26 top: 0,
27 }
28 }
29
30 pub fn from(tokens: Vec<Token>) -> Lexer {
31 Lexer {
32 tokens: tokens,
33 lines: 0,
34 start: 0,
35 pos: 0,
36 top: 0,
37 }
38 }
39
40 pub fn get_tokens(&self) -> &Vec<Token> {
41 &self.tokens
42 }
43
44 pub fn reset(&mut self) {
45 self.tokens = Vec::new();
46 self.lines = 0;
47 self.start = 0;
48 self.pos = 0;
49 self.top = 0;
50 }
51
52 fn push_token(&mut self, token_type: TokenType, line: &str) {
53 self.tokens.push(Token::new(
54 token_type,
55
56 String::from(&line[self.start .. self.pos]),
57 self.lines,
58 self.pos as u32,
59 ));
60
61 self.start = self.pos;
62 }
63
64 fn look(&self, line: &str, offset: usize) -> char {
65 match line.chars().nth(self.pos + offset) {
66 Some(v) => v,
67 None => ' ',
68 }
69 }
70
71 fn skip_whitespace(&mut self, line: &str) {
72 while self.look(line, 0) == ' ' && self.pos < line.len() - 1 {
73 self.pos += 1;
74 self.start += 1;
75 }
76 }
77
78 pub fn bin_op(v: &str) -> Option<(BinOp, u8)> {
79 match v {
80 "*" => Some((BinOp::Mul, 1)),
81 "/" => Some((BinOp::Div, 1)),
82 "+" => Some((BinOp::Plus, 2)),
83 "-" => Some((BinOp::Minus, 2)),
84 "==" => Some((BinOp::Equal, 4)),
85 "~=" => Some((BinOp::NotEqual, 4)),
86 "<" => Some((BinOp::Lt, 4)),
87 ">" => Some((BinOp::Gt, 4)),
88 "<=" => Some((BinOp::GtEqual, 4)),
89 ">=" => Some((BinOp::LtEqual, 4)),
90 _ => None,
91 }
92 }
93
94 fn keyword(&mut self, line: &str) -> Option<TokenType> {
95 match &line[self.start .. self.pos] {
96 "define" => Some(TokenType::Definition),
97 "lambda" => Some(TokenType::Lambda),
98 "if" => Some(TokenType::If),
99 "else" => Some(TokenType::Else),
100 "return" => Some(TokenType::Return),
101 "true" => Some(TokenType::True),
102 "false" => Some(TokenType::False),
103 "do" => Some(TokenType::Do),
104 "end" => Some(TokenType::End),
105 _ => None
106 }
107 }
108
109 fn is_bin_op(&mut self, line: &str) -> bool {
110 let mut is_bin_op = false;
111
112 let mut offset = 2;
113 while self.pos + offset >= line.len() {
114 offset -= 1;
115 }
116
117 while offset > 0 && !is_bin_op {
118 match Lexer::bin_op(&line[self.start .. self.pos + offset]) {
119 Some(_) => is_bin_op = true,
120 None => ()
121 }
122 offset -= 1;
123 }
124
125 self.pos += offset;
126 is_bin_op
127 }
128
129 pub fn next_token(&mut self) -> bool {
130 if self.top < self.tokens.len() {
131 self.top += 1;
132 return true
133 }
134 false
135 }
136
137 pub fn previous_token(&mut self) -> bool {
138 if self.top != 0 {
139 self.top -= 1;
140 return true
141 }
142 false
143 }
144
145 pub fn tokens_remaining(&self) -> usize {
146 self.tokens.len() - self.top
147 }
148
149 pub fn current_token(&self) -> &Token {
150 if self.top > self.tokens.len() - 1 {
151 return &self.tokens[self.tokens.len() - 1]
152 }
153 &self.tokens[self.top]
154 }
155
156 pub fn current_token_content(&self) -> String {
157 self.current_token().content.clone()
158 }
159
160 pub fn match_current_token(&self, t: TokenType) -> Result<&Token, String> {
161 match self.current_token().token_type == t {
162 true => Ok(self.current_token()),
163 false => Err(format!(
164 "expected {:?} but found {:?}", t, self.current_token()
165 ))
166 }
167 }
168
169 fn push_move(&mut self, t: TokenType, line: &str) {
170 self.pos += 1;
171 self.push_token(t, line);
172 }
173
174 pub fn tokenize(&mut self, source: String) -> Result<(), String> {
175
176 fn identifier_valid(c: char) -> bool {
177 c.is_alphabetic() || c == '_'
178 || c == '?'
179 || c == '!'
180 || c.is_digit(10)
181 }
182
183 for line in source.lines() {
184 self.lines += 1;
185 self.start = 0;
186 self.pos = 0;
187
188 while self.pos < line.len() {
189 self.skip_whitespace(line);
190
191 let chr = self.look(line, 0);
192
193 if chr == '"' || chr == '\'' {
194 let del = chr;
195
196 self.start += 1;
197 self.pos += 1;
198
199 while self.look(line, 0) != del {
200 self.pos += 1;
201 }
202
203 self.push_token(TokenType::Text, line);
204
205 self.start += 1;
206 self.pos += 1;
207
208 continue
209 }
210
211 if chr.is_alphabetic() {
212 while identifier_valid(self.look(line, 0)) {
213 self.pos += 1;
214 }
215
216 match self.keyword(line) {
217 Some(t) => self.push_token(t, line),
218 None => self.push_token(TokenType::Identifier, line),
219 }
220
221 continue
222 }
223
224 let peek = self.look(line, 1);
225
226 if chr.is_digit(10) ||
227 chr == '.' && peek.is_digit(10) ||
228 chr == '-' && peek.is_digit(10) {
229
230 if chr == '-' {
231 self.pos += 1;
232 }
233
234 while self.look(line, 0).is_digit(10) {
235 self.pos += 1;
236 }
237
238 if self.look(line, 0) == '.' && self.look(line, 1).is_digit(10) {
239 self.pos += 1;
240 while self.look(line, 0).is_digit(10) {
241 self.pos += 1;
242 }
243 self.push_token(TokenType::Float, line);
244 continue;
245 }
246 self.push_token(TokenType::Integer, line);
247 continue;
248 }
249
250 if chr == '-' && self.look(line, 1) == '>' {
251 self.pos += 2;
252 self.push_token(TokenType::Arrow, line);
253
254 continue
255 }
256
257 if self.is_bin_op(line) {
258 self.pos += 1;
259 self.push_token(TokenType::BinOp, line);
260
261 continue
262 }
263
264 match chr {
265 '=' => {
266 self.push_move(TokenType::Assign, line);
267 continue
268 }
269
270 '(' => {
271 self.push_move(TokenType::LParen, line);
272 continue
273 }
274
275 ')' => {
276 self.push_move(TokenType::RParen, line);
277 continue
278 }
279
280 '[' => {
281 self.push_move(TokenType::LBracket, line);
282 continue
283 }
284
285 ']' => {
286 self.push_move(TokenType::RBracket, line);
287 continue
288 }
289
290 '{' => {
291 self.push_move(TokenType::LBrace, line);
292 continue
293 }
294
295 '}' => {
296 self.push_move(TokenType::RBrace, line);
297 continue
298 }
299
300 ':' => {
301 self.push_move(TokenType::Colon, line);
302 continue
303 }
304
305 ',' => {
306 self.push_move(TokenType::Comma, line);
307 continue
308 }
309
310 '.' => {
311 self.push_move(TokenType::Period, line);
312 continue
313 }
314
315 ';' => {
316 self.push_move(TokenType::Semicolon, line);
317 continue
318 }
319
320 ' ' => break,
321 '\0' => break,
322 '\n' => break,
323
324 _ => {
325 panic!("fucked symbol: {}, line: {} col: {}",
326 &line[self.start .. line.len()],
327 self.lines, self.start)
328 },
329 }
330 }
331 }
332
333 Ok(())
334 }
335}