1use std::{iter::Peekable, str::Chars};
2
3use nom::AsChar;
4
5use crate::token::{Data, Token};
6
7pub enum LexError {}
8#[derive(Debug)]
9pub struct Lexer<'a> {
10 position: usize,
11 read_position: usize,
12 ch: char,
13 raw: &'a str,
14 input: Peekable<Chars<'a>>,
15}
16
17impl<'a> Lexer<'a> {
18 pub fn new(input: &'a str) -> Lexer<'a> {
19 let mut lex = Lexer {
20 position: 0,
21 read_position: 0,
22 ch: '0',
23 input: input.chars().peekable(),
24 raw: input,
25 };
26 lex.read_char();
27
28 lex
29 }
30
31 pub fn reset(&mut self) {
32 self.position = 0;
33 self.read_position = 0;
34 self.ch = '\0';
35 }
36
37 pub fn here(&self) -> Data<'a> {
38 Data {
39 start: self.position,
40 end: self.position,
41 value: "",
42 }
43 }
44
45 pub fn next_token(&mut self) -> Result<Token<'a>, LexError> {
46 self.skip_whitespace();
47
48 let tok = match self.ch {
49 ':' => Token::Colon(self.read_single_char_token()),
50 ';' => Token::Semicolon(self.read_single_char_token()),
51 '%' => self.try_parse_number_with_prefix(|c| c.is_digit(2)),
52 '&' => self.try_parse_number_with_prefix(|c| c == 'x' || c.is_digit(8)),
53 '$' => self.try_parse_number_with_prefix(|c| c.is_hex_digit()),
54 '\'' => self.parse_quote_or_word(),
55 '0' => self.try_parse_number_with_prefix(|c| c == 'x' || c.is_hex_digit()),
56 '0'..='9' => {
57 let ident = self.read_number();
58 Token::Number(ident)
59 }
60 '\\' => {
61 if self.peek_char().is_whitespace() {
62 let comment = self.read_comment_to('\n');
63 Token::Comment(comment)
64 } else {
65 let ident = self.read_ident();
66 Token::Word(ident)
67 }
68 }
69 '(' => {
70 if self.peek_char().is_whitespace() {
71 let comment = self.read_comment_to(')');
72 if comment.value.contains("--") {
74 Token::StackComment(comment)
75 } else {
76 Token::Comment(comment)
77 }
78 } else {
79 let ident = self.read_ident();
80 Token::Word(ident)
81 }
82 }
83 '\0' => {
84 let mut dat = self.here();
85 dat.value = "\0";
86 self.read_char();
87 Token::Eof(dat)
88 }
89 _ => {
90 let ident = self.read_ident();
91 Token::Word(ident)
92 }
93 };
94
95 Ok(tok)
96 }
97
98 fn read_char(&mut self) {
99 self.ch = match self.input.peek() {
100 Some(ch) => *ch,
101 None => '\0',
102 };
103
104 self.input.next();
105
106 self.position = self.read_position;
107 self.read_position += 1;
108 }
109
110 fn try_parse_number_with_prefix(&mut self, validator: fn(char) -> bool) -> Token<'a> {
111 if validator(self.peek_char()) {
112 Token::Number(self.read_number())
113 } else {
114 Token::Word(self.read_ident())
115 }
116 }
117
118 fn parse_quote_or_word(&mut self) -> Token<'a> {
119 let begin = self.position;
120 let next = self.peek_char();
121
122 if next.is_whitespace() {
123 return Token::Word(self.read_ident());
124 }
125
126 self.read_char(); if self.peek_char() == '\'' {
129 self.read_char(); let number = Data {
132 start: begin,
133 end: self.position + 1,
134 value: &self.raw[begin..(self.position + 1)],
135 };
136 self.read_char(); return Token::Number(number);
138 }
139
140 let mut word = self.read_ident();
142 word.start = begin;
143 word.value = &self.raw[begin..word.end];
144 Token::Word(word)
145 }
146
147 fn read_single_char_token(&mut self) -> Data<'a> {
148 let start = self.position;
149 self.read_char();
150 Data {
151 start,
152 end: start + 1,
153 value: &self.raw[start..start + 1],
154 }
155 }
156
157 fn peek_char(&mut self) -> char {
158 match self.input.peek() {
159 Some(ch) => *ch,
160 None => '\0',
161 }
162 }
163
164 fn skip_whitespace(&mut self) {
165 while self.ch.is_ascii_whitespace() {
166 self.read_char();
167 }
168 }
169
170 fn read_comment_to(&mut self, to: char) -> Data<'a> {
171 let start = self.position;
172 while self.ch != to && self.ch != '\0' {
173 self.read_char();
174 }
175 if to == ')' {
176 self.read_char();
177 }
178
179 let end = self.position.min(self.raw.len());
180 Data {
181 start,
182 end,
183 value: &self.raw[start..end],
184 }
185 }
186
187 fn read_ident(&mut self) -> Data<'a> {
188 let start = self.position;
189 while !self.ch.is_whitespace() && self.ch != '\0' {
190 self.read_char();
191 }
192 let end = self.position.min(self.raw.len());
193 Data {
194 start,
195 end,
196 value: &self.raw[start..end],
197 }
198 }
199
200 fn read_number(&mut self) -> Data<'a> {
201 let start = self.position;
202 while self.ch.is_hex_digit()
204 || self.ch == '_'
205 || self.ch == '&'
206 || self.ch == '%'
207 || self.ch == 'x'
208 || self.ch == '$'
209 {
210 self.read_char();
211 }
212 let end = self.position.min(self.raw.len());
213 Data {
214 start,
215 end,
216 value: &self.raw[start..end],
217 }
218 }
219
220 pub fn parse(&mut self) -> Vec<Token<'a>> {
221 let mut tokens = vec![];
222 #[allow(irrefutable_let_patterns)]
223 while let Ok(tok) = self.next_token() {
224 match tok {
225 Token::Eof(_) => {
226 break;
227 }
228 _ => {
229 tokens.push(tok.clone());
230 }
231 }
232 }
233 tokens
234 }
235}
236
237#[cfg(test)]
238mod tests {
239 use super::*;
240 use Token::*;
241
242 #[test]
243 fn test_parse_proper_def() {
244 let mut lexer = Lexer::new(": add1 ( n -- n )\n 1 + \\ adds one\n;");
245 let tokens = lexer.parse();
246 let expected = vec![
247 Colon(Data::new(0, 1, ":")),
248 Word(Data::new(2, 6, "add1")),
249 StackComment(Data::new(7, 17, "( n -- n )")),
250 Number(Data::new(20, 21, "1")),
251 Word(Data::new(22, 23, "+")),
252 Comment(Data::new(24, 34, "\\ adds one")),
253 Semicolon(Data::new(35, 36, ";")),
254 ];
255 assert_eq!(tokens, expected)
256 }
257
258 #[test]
259 fn test_parse_simple_def() {
260 let mut lexer = Lexer::new(": add1 1 + ;");
261 let tokens = lexer.parse();
262 let expected = vec![
263 Colon(Data::new(0, 1, ":")),
264 Word(Data::new(2, 6, "add1")),
265 Number(Data::new(7, 8, "1")),
266 Word(Data::new(9, 10, "+")),
267 Semicolon(Data::new(11, 12, ";")),
268 ];
269 assert_eq!(tokens, expected)
270 }
271
272 #[test]
273 fn test_parse_words_and_comments() {
274 let mut lexer = Lexer::new("word \\ this is a comment\nword2 ( and this ) word3");
275 let tokens = lexer.parse();
276 let expected = vec![
277 Word(Data::new(0, 4, "word")),
278 Comment(Data::new(5, 24, "\\ this is a comment")),
279 Word(Data::new(25, 30, "word2")),
280 Comment(Data::new(31, 43, "( and this )")),
281 Word(Data::new(44, 49, "word3")),
282 ];
283 assert_eq!(tokens, expected)
284 }
285
286 #[test]
287 fn test_parse_words_on_lines() {
288 let mut lexer = Lexer::new("some\nwords here\0");
289 let tokens = lexer.parse();
290 let expected = vec![
291 Word(Data::new(0, 4, "some")),
292 Word(Data::new(5, 10, "words")),
293 Word(Data::new(11, 15, "here")),
294 ];
295 assert_eq!(tokens, expected)
296 }
297
298 #[test]
299 fn test_parse_number_literal() {
300 let mut lexer = Lexer::new("12");
301 let tokens = lexer.parse();
302 let expected = vec![Number(Data::new(0, 2, "12"))];
303 assert_eq!(tokens, expected)
304 }
305
306 #[test]
307 fn test_parse_number_oct() {
308 let mut lexer = Lexer::new("&12");
309 let tokens = lexer.parse();
310 let expected = vec![Number(Data::new(0, 3, "&12"))];
311 assert_eq!(tokens, expected)
312 }
313
314 #[test]
315 fn test_parse_number_bin() {
316 let mut lexer = Lexer::new("%0100101");
317 let tokens = lexer.parse();
318 let expected = vec![Number(Data::new(0, 8, "%0100101"))];
319 assert_eq!(tokens, expected);
320 }
321
322 #[test]
323 #[ignore]
324 fn test_parse_number_bin_only_valid() {
325 let mut lexer = Lexer::new("%12345");
328 let tokens = lexer.parse();
329 let expected = vec![Word(Data::new(0, 6, "%12345"))];
330 assert_eq!(tokens, expected);
331 }
332
333 #[test]
334 fn test_parse_number_hex() {
335 let mut lexer = Lexer::new("$FfAaDd");
336 let tokens = lexer.parse();
337 let expected = vec![Number(Data::new(0, 7, "$FfAaDd"))];
338 assert_eq!(tokens, expected)
339 }
340
341 #[test]
342 fn test_parse_number_0xhex() {
343 let mut lexer = Lexer::new("0xFE");
344 let tokens = lexer.parse();
345 let expected = vec![Number(Data::new(0, 4, "0xFE"))];
346 assert_eq!(tokens, expected)
347 }
348
349 #[test]
350 fn test_parse_number_char() {
351 let mut lexer = Lexer::new("'c'");
352 let tokens = lexer.parse();
353 let expected = vec![Number(Data::new(0, 3, "'c'"))];
354 assert_eq!(tokens, expected)
355 }
356
357 #[test]
358 fn test_parse_stack_comment() {
359 let mut lexer = Lexer::new("( n1 n2 -- n3 )");
360 let tokens = lexer.parse();
361 let expected = vec![StackComment(Data::new(0, 15, "( n1 n2 -- n3 )"))];
362 assert_eq!(tokens, expected)
363 }
364
365 #[test]
366 fn test_parse_regular_comment() {
367 let mut lexer = Lexer::new("( this is just a comment )");
368 let tokens = lexer.parse();
369 let expected = vec![Comment(Data::new(0, 26, "( this is just a comment )"))];
370 assert_eq!(tokens, expected)
371 }
372
373 #[test]
374 fn test_parse_stack_comment_complex() {
375 let mut lexer = Lexer::new("( addr len -- addr' len' flag )");
376 let tokens = lexer.parse();
377 let expected = vec![StackComment(Data::new(
378 0,
379 31,
380 "( addr len -- addr' len' flag )",
381 ))];
382 assert_eq!(tokens, expected)
383 }
384
385 #[test]
386 fn test_parse_line_comment() {
387 let mut lexer = Lexer::new("\\ this is a line comment");
388 let tokens = lexer.parse();
389 let expected = vec![Comment(Data::new(0, 24, "\\ this is a line comment"))];
390 assert_eq!(tokens, expected)
391 }
392
393 #[test]
394 fn test_parse_number_word() {
395 let mut lexer = Lexer::new("word");
396 let tokens = lexer.parse();
397 let expected = vec![Word(Data::new(0, 4, "word"))];
398 assert_eq!(tokens, expected)
399 }
400
401 #[cfg(feature = "ropey")]
402 #[test]
403 fn test_to_ropey() {
404 let progn = "word1 word2 word3";
405 let rope = ropey::Rope::from_str(progn);
406 let mut lexer = Lexer::new(progn);
407 let tokens = lexer.parse();
408 let word2 = if let Some(Token::Word(word)) = tokens.get(1) {
409 word.to_owned()
410 } else {
411 Data::default()
412 };
413 let x = rope.slice(&word2);
414 assert_eq!("word2", word2.value);
415 assert_eq!(word2.value, x);
416 }
417}