1use std::{iter::Peekable, str::Chars};
2
3use nom::AsChar;
4
5use crate::token::{Data, Token};
6
7pub enum LexError {}
8#[derive(Debug)]
9pub struct Lexer<'a> {
10 position: usize,
11 read_position: usize,
12 ch: char,
13 raw: &'a str,
14 input: Peekable<Chars<'a>>,
15}
16
17impl<'a> Lexer<'a> {
18 pub fn new(input: &'a str) -> Lexer<'a> {
19 let mut lex = Lexer {
20 position: 0,
21 read_position: 0,
22 ch: '0',
23 input: input.chars().peekable(),
24 raw: input,
25 };
26 lex.read_char();
27
28 lex
29 }
30
31 pub fn reset(&mut self) {
32 self.position = 0;
33 self.read_position = 0;
34 self.ch = '\0';
35 }
36
37 pub fn here(&self) -> Data<'a> {
38 Data {
39 start: self.position,
40 end: self.position,
41 value: "",
42 }
43 }
44
45 pub fn next_token(&mut self) -> Result<Token<'a>, LexError> {
46 self.skip_whitespace();
47
48 let tok = match self.ch {
49 ':' => Token::Colon(self.read_single_char_token()),
50 ';' => Token::Semicolon(self.read_single_char_token()),
51 '%' => self.try_parse_number_with_prefix(|c| c.is_digit(2)),
52 '&' => self.try_parse_number_with_prefix(|c| c == 'x' || c.is_digit(8)),
53 '$' => self.try_parse_number_with_prefix(|c| c.is_hex_digit()),
54 '\'' => self.parse_quote_or_word(),
55 '0' => self.try_parse_number_with_prefix(|c| c == 'x' || c.is_hex_digit()),
56 '0'..='9' => {
57 let ident = self.read_number();
58 Token::Number(ident)
59 }
60 '\\' => {
61 if self.peek_char().is_whitespace() {
62 let comment = self.read_comment_to('\n');
63 Token::Comment(comment)
64 } else {
65 let ident = self.read_ident();
66 Token::Word(ident)
67 }
68 }
69 '(' => {
70 if self.peek_char().is_whitespace() {
71 let comment = self.read_comment_to(')');
72 if comment.value.contains("--") {
74 Token::StackComment(comment)
75 } else {
76 Token::Comment(comment)
77 }
78 } else {
79 let ident = self.read_ident();
80 Token::Word(ident)
81 }
82 }
83 '\0' => {
84 let mut dat = self.here();
85 dat.value = "\0";
86 self.read_char();
87 Token::Eof(dat)
88 }
89 _ => {
90 let ident = self.read_ident();
91 Token::Word(ident)
92 }
93 };
94
95 Ok(tok)
96 }
97
98 fn read_char(&mut self) {
99 self.ch = match self.input.peek() {
100 Some(ch) => *ch,
101 None => '\0',
102 };
103
104 self.input.next();
105
106 self.position = self.read_position;
107 self.read_position += 1;
108 }
109
110 fn try_parse_number_with_prefix(&mut self, validator: fn(char) -> bool) -> Token<'a> {
111 if validator(self.peek_char()) {
112 Token::Number(self.read_number())
113 } else {
114 Token::Word(self.read_ident())
115 }
116 }
117
118 fn parse_quote_or_word(&mut self) -> Token<'a> {
119 let begin = self.position;
120 let next = self.peek_char();
121
122 if next.is_whitespace() {
123 return Token::Word(self.read_ident());
124 }
125
126 self.read_char(); if self.peek_char() == '\'' {
129 self.read_char(); let number = Data {
132 start: begin,
133 end: self.position + 1,
134 value: &self.raw[begin..(self.position + 1)],
135 };
136 self.read_char(); return Token::Number(number);
138 }
139
140 let mut word = self.read_ident();
142 word.start = begin;
143 word.value = &self.raw[begin..word.end];
144 Token::Word(word)
145 }
146
147 fn read_single_char_token(&mut self) -> Data<'a> {
148 let start = self.position;
149 self.read_char();
150 Data {
151 start,
152 end: start + 1,
153 value: &self.raw[start..start + 1],
154 }
155 }
156
157 fn peek_char(&mut self) -> char {
158 match self.input.peek() {
159 Some(ch) => *ch,
160 None => '\0',
161 }
162 }
163
164 fn skip_whitespace(&mut self) {
165 while self.ch.is_ascii_whitespace() {
166 self.read_char();
167 }
168 }
169
170 fn read_comment_to(&mut self, to: char) -> Data<'a> {
171 let start = self.position;
172 while self.ch != to && self.ch != '\0' {
173 self.read_char();
174 }
175 if to == ')' {
176 self.read_char();
177 }
178
179 Data {
180 start,
181 end: self.position,
182 value: &self.raw[start..self.position],
183 }
184 }
185
186 fn read_ident(&mut self) -> Data<'a> {
187 let start = self.position;
188 while !self.ch.is_whitespace() && self.ch != '\0' {
189 self.read_char();
190 }
191 Data {
192 start,
193 end: self.position,
194 value: &self.raw[start..self.position],
195 }
196 }
197
198 fn read_number(&mut self) -> Data<'a> {
199 let start = self.position;
200 while self.ch.is_hex_digit()
202 || self.ch == '_'
203 || self.ch == '&'
204 || self.ch == '%'
205 || self.ch == 'x'
206 || self.ch == '$'
207 {
208 self.read_char();
209 }
210 Data {
211 start,
212 end: self.position,
213 value: &self.raw[start..self.position],
214 }
215 }
216
217 pub fn parse(&mut self) -> Vec<Token<'a>> {
218 let mut tokens = vec![];
219 #[allow(irrefutable_let_patterns)]
220 while let Ok(tok) = self.next_token() {
221 match tok {
222 Token::Eof(_) => {
223 break;
224 }
225 _ => {
226 tokens.push(tok.clone());
227 }
228 }
229 }
230 tokens
231 }
232}
233
234#[cfg(test)]
235mod tests {
236 use super::*;
237 use Token::*;
238
239 #[test]
240 fn test_parse_proper_def() {
241 let mut lexer = Lexer::new(": add1 ( n -- n )\n 1 + \\ adds one\n;");
242 let tokens = lexer.parse();
243 let expected = vec![
244 Colon(Data::new(0, 1, ":")),
245 Word(Data::new(2, 6, "add1")),
246 StackComment(Data::new(7, 17, "( n -- n )")),
247 Number(Data::new(20, 21, "1")),
248 Word(Data::new(22, 23, "+")),
249 Comment(Data::new(24, 34, "\\ adds one")),
250 Semicolon(Data::new(35, 36, ";")),
251 ];
252 assert_eq!(tokens, expected)
253 }
254
255 #[test]
256 fn test_parse_simple_def() {
257 let mut lexer = Lexer::new(": add1 1 + ;");
258 let tokens = lexer.parse();
259 let expected = vec![
260 Colon(Data::new(0, 1, ":")),
261 Word(Data::new(2, 6, "add1")),
262 Number(Data::new(7, 8, "1")),
263 Word(Data::new(9, 10, "+")),
264 Semicolon(Data::new(11, 12, ";")),
265 ];
266 assert_eq!(tokens, expected)
267 }
268
269 #[test]
270 fn test_parse_words_and_comments() {
271 let mut lexer = Lexer::new("word \\ this is a comment\nword2 ( and this ) word3");
272 let tokens = lexer.parse();
273 let expected = vec![
274 Word(Data::new(0, 4, "word")),
275 Comment(Data::new(5, 24, "\\ this is a comment")),
276 Word(Data::new(25, 30, "word2")),
277 Comment(Data::new(31, 43, "( and this )")),
278 Word(Data::new(44, 49, "word3")),
279 ];
280 assert_eq!(tokens, expected)
281 }
282
283 #[test]
284 fn test_parse_words_on_lines() {
285 let mut lexer = Lexer::new("some\nwords here\0");
286 let tokens = lexer.parse();
287 let expected = vec![
288 Word(Data::new(0, 4, "some")),
289 Word(Data::new(5, 10, "words")),
290 Word(Data::new(11, 15, "here")),
291 ];
292 assert_eq!(tokens, expected)
293 }
294
295 #[test]
296 fn test_parse_number_literal() {
297 let mut lexer = Lexer::new("12");
298 let tokens = lexer.parse();
299 let expected = vec![Number(Data::new(0, 2, "12"))];
300 assert_eq!(tokens, expected)
301 }
302
303 #[test]
304 fn test_parse_number_oct() {
305 let mut lexer = Lexer::new("&12");
306 let tokens = lexer.parse();
307 let expected = vec![Number(Data::new(0, 3, "&12"))];
308 assert_eq!(tokens, expected)
309 }
310
311 #[test]
312 fn test_parse_number_bin() {
313 let mut lexer = Lexer::new("%0100101");
314 let tokens = lexer.parse();
315 let expected = vec![Number(Data::new(0, 8, "%0100101"))];
316 assert_eq!(tokens, expected);
317 }
318
319 #[test]
320 #[ignore]
321 fn test_parse_number_bin_only_valid() {
322 let mut lexer = Lexer::new("%12345");
325 let tokens = lexer.parse();
326 let expected = vec![Word(Data::new(0, 6, "%12345"))];
327 assert_eq!(tokens, expected);
328 }
329
330 #[test]
331 fn test_parse_number_hex() {
332 let mut lexer = Lexer::new("$FfAaDd");
333 let tokens = lexer.parse();
334 let expected = vec![Number(Data::new(0, 7, "$FfAaDd"))];
335 assert_eq!(tokens, expected)
336 }
337
338 #[test]
339 fn test_parse_number_0xhex() {
340 let mut lexer = Lexer::new("0xFE");
341 let tokens = lexer.parse();
342 let expected = vec![Number(Data::new(0, 4, "0xFE"))];
343 assert_eq!(tokens, expected)
344 }
345
346 #[test]
347 fn test_parse_number_char() {
348 let mut lexer = Lexer::new("'c'");
349 let tokens = lexer.parse();
350 let expected = vec![Number(Data::new(0, 3, "'c'"))];
351 assert_eq!(tokens, expected)
352 }
353
354 #[test]
355 fn test_parse_stack_comment() {
356 let mut lexer = Lexer::new("( n1 n2 -- n3 )");
357 let tokens = lexer.parse();
358 let expected = vec![StackComment(Data::new(0, 15, "( n1 n2 -- n3 )"))];
359 assert_eq!(tokens, expected)
360 }
361
362 #[test]
363 fn test_parse_regular_comment() {
364 let mut lexer = Lexer::new("( this is just a comment )");
365 let tokens = lexer.parse();
366 let expected = vec![Comment(Data::new(0, 26, "( this is just a comment )"))];
367 assert_eq!(tokens, expected)
368 }
369
370 #[test]
371 fn test_parse_stack_comment_complex() {
372 let mut lexer = Lexer::new("( addr len -- addr' len' flag )");
373 let tokens = lexer.parse();
374 let expected = vec![StackComment(Data::new(
375 0,
376 31,
377 "( addr len -- addr' len' flag )",
378 ))];
379 assert_eq!(tokens, expected)
380 }
381
382 #[test]
383 fn test_parse_line_comment() {
384 let mut lexer = Lexer::new("\\ this is a line comment");
385 let tokens = lexer.parse();
386 let expected = vec![Comment(Data::new(0, 24, "\\ this is a line comment"))];
387 assert_eq!(tokens, expected)
388 }
389
390 #[test]
391 fn test_parse_number_word() {
392 let mut lexer = Lexer::new("word");
393 let tokens = lexer.parse();
394 let expected = vec![Word(Data::new(0, 4, "word"))];
395 assert_eq!(tokens, expected)
396 }
397
398 #[cfg(feature = "ropey")]
399 #[test]
400 fn test_to_ropey() {
401 let progn = "word1 word2 word3";
402 let rope = ropey::Rope::from_str(progn);
403 let mut lexer = Lexer::new(progn);
404 let tokens = lexer.parse();
405 let word2 = if let Some(Token::Word(word)) = tokens.get(1) {
406 word.to_owned()
407 } else {
408 Data::default()
409 };
410 let x = rope.slice(&word2);
411 assert_eq!("word2", word2.value);
412 assert_eq!(word2.value, x);
413 }
414}