1use std::{iter::Peekable, str::Chars};
2
3use nom::AsChar;
4
5use crate::token::{Data, Token};
6
7pub enum LexError {}
8#[derive(Debug)]
9pub struct Lexer<'a> {
10 position: usize,
11 read_position: usize,
12 ch: char,
13 input: Peekable<Chars<'a>>,
14}
15
16impl<'a> Lexer<'a> {
17 pub fn new(input: &'a str) -> Lexer<'a> {
18 let mut lex = Lexer {
19 position: 0,
20 read_position: 0,
21 ch: '0',
22 input: input.chars().peekable(),
23 };
24 lex.read_char();
25
26 lex
27 }
28
29 pub fn reset(&mut self) {
30 self.position = 0;
31 self.read_position = 0;
32 self.ch = '\0';
33 }
34
35 pub fn here<T>(&self) -> Data<T>
36 where
37 T: Default,
38 {
39 Data {
40 start: self.position,
41 end: self.position,
42 value: T::default(),
43 }
44 }
45
46 pub fn next_token(&mut self) -> Result<Token, LexError> {
47 self.skip_whitespace();
48
49 let tok = match self.ch {
50 ':' => {
51 let mut dat = self.here::<char>();
52 dat.value = ':';
53 Token::Colon(dat)
54 }
55 ';' => {
56 let mut dat = self.here::<char>();
57 dat.value = ';';
58 dat.end = dat.start + 1;
59 Token::Semicolon(dat)
60 }
61 '%' => {
64 if self.peek_char().is_digit(2) {
65 let ident = self.read_number();
66 Token::Number(ident)
67 } else {
68 let ident = self.read_ident();
69 Token::Word(ident)
70 }
71 }
72 '&' => {
73 if self.peek_char() == 'x' || self.peek_char().is_digit(8) {
74 let ident = self.read_number();
75 Token::Number(ident)
76 } else {
77 let ident = self.read_ident();
78 Token::Word(ident)
79 }
80 }
81 '$' => {
82 if self.peek_char().is_hex_digit() {
83 let ident = self.read_number();
84 Token::Number(ident)
85 } else {
86 let ident = self.read_ident();
87 Token::Word(ident)
88 }
89 }
90 '\'' => {
91 if !self.peek_char().is_whitespace() {
92 self.read_char();
93 if self.peek_char() == '\'' {
94 let num = self.ch;
95 self.read_char();
96 let number = Data::<String> {
97 start: self.position - 2,
98 end: self.position + 1,
99 value: format!("'{}'", num),
100 };
101 Token::Number(number)
102 } else {
103 let mut ident = self.read_ident();
104 ident.start -= 1;
105 ident.value = format!("{}{}", "'", ident.value);
106 Token::Word(ident)
107 }
108 } else {
109 let ident = self.read_ident();
110 Token::Word(ident)
111 }
112 }
113 '0' => {
114 if self.peek_char() == 'x' || self.peek_char().is_hex_digit() {
115 let ident = self.read_number();
116 Token::Number(ident)
117 } else {
118 let ident = self.read_ident();
119 Token::Word(ident)
120 }
121 }
122 '0'..='9' => {
123 let ident = self.read_number();
124 Token::Number(ident)
125 }
126 '\\' => {
127 if self.peek_char().is_whitespace() {
128 let comment = self.read_comment_to('\n');
129 Token::Comment(comment)
130 } else {
131 let ident = self.read_ident();
132 Token::Word(ident)
133 }
134 }
135 '(' => {
136 if self.peek_char().is_whitespace() {
137 let comment = self.read_comment_to(')');
138 Token::Comment(comment)
139 } else {
140 let ident = self.read_ident();
141 Token::Word(ident)
142 }
143 }
144 '\0' => {
145 let mut dat = self.here::<char>();
146 dat.value = '\0';
147 Token::Eof(dat)
148 }
149 _ => {
150 let ident = self.read_ident();
151 Token::Word(ident)
152 }
153 };
154
155 self.read_char();
156 Ok(tok)
157 }
158
159 fn read_char(&mut self) {
160 self.ch = match self.input.peek() {
161 Some(ch) => *ch,
162 None => '\0',
163 };
164
165 self.input.next();
166
167 self.position = self.read_position;
168 self.read_position += 1;
169 }
170
171 fn peek_char(&mut self) -> char {
172 match self.input.peek() {
173 Some(ch) => *ch,
174 None => '\0',
175 }
176 }
177
178 fn skip_whitespace(&mut self) {
179 while self.ch.is_ascii_whitespace() {
180 self.read_char();
181 }
182 }
183
184 fn read_comment_to(&mut self, to: char) -> Data<String> {
185 let start = self.position;
186 let mut value = String::new();
187 while self.ch != to {
188 value.push(self.ch);
189 self.read_char();
190 }
191 if to == ')' {
192 value.push(self.ch);
193 self.read_char();
194 }
195
196 Data::<String> {
197 start,
198 end: self.position,
199 value,
200 }
201 }
202
203 fn read_ident(&mut self) -> Data<String> {
204 let start = self.position;
205 let mut value = String::new();
206 while !self.ch.is_whitespace() && self.ch != '\0' {
207 value.push(self.ch);
208 self.read_char();
209 }
210 Data::<String> {
211 start,
212 end: self.position,
213 value,
214 }
215 }
216
217 fn read_number(&mut self) -> Data<String> {
218 let start = self.position;
219 let mut value = String::new();
220 while self.ch.is_hex_digit()
222 || self.ch == '_'
223 || self.ch == '&'
224 || self.ch == '%'
225 || self.ch == 'x'
226 || self.ch == '$'
227 {
228 value.push(self.ch);
229 self.read_char();
230 }
231 Data::<String> {
232 start,
233 end: self.position,
234 value,
235 }
236 }
237
238 pub fn parse(&mut self) -> Vec<Token> {
239 let mut tokens = vec![];
240 while let Ok(tok) = self.next_token() {
241 match tok {
242 Token::Eof(_) => {
243 break;
244 }
245 _ => {
246 tokens.push(tok);
247 }
248 }
249 }
250 tokens
251 }
252}
253
254#[cfg(test)]
255mod tests {
256 use super::*;
257 use Token::*;
258
259 #[test]
260 fn test_parse_proper_def() {
261 let mut lexer = Lexer::new(": add1 ( n -- n )\n 1 + \\ adds one\n;");
262 let tokens = lexer.parse();
263 let expected = vec![
264 Colon(Data::new(0, 0, ':')),
265 Word(Data::new(2, 6, "add1".into())),
266 Comment(Data::new(7, 17, "( n -- n )".into())),
267 Number(Data::new(20, 21, "1".into())),
268 Word(Data::new(22, 23, "+".into())),
269 Comment(Data::new(24, 34, "\\ adds one".into())),
270 Semicolon(Data::new(35, 36, ';')),
271 ];
272 assert_eq!(tokens, expected)
273 }
274
275 #[test]
276 fn test_parse_simple_def() {
277 let mut lexer = Lexer::new(": add1 1 + ;");
278 let tokens = lexer.parse();
279 let expected = vec![
280 Colon(Data::new(0, 0, ':')),
281 Word(Data::new(2, 6, "add1".into())),
282 Number(Data::new(7, 8, "1".into())),
283 Word(Data::new(9, 10, "+".into())),
284 Semicolon(Data::new(11, 12, ';')),
285 ];
286 assert_eq!(tokens, expected)
287 }
288
289 #[test]
290 fn test_parse_words_and_comments() {
291 let mut lexer = Lexer::new("word \\ this is a comment\nword2 ( and this ) word3");
292 let tokens = lexer.parse();
293 let expected = vec![
294 Word(Data::new(0, 4, "word".into())),
295 Comment(Data::new(5, 24, "\\ this is a comment".into())),
296 Word(Data::new(25, 30, "word2".into())),
297 Comment(Data::new(31, 43, "( and this )".into())),
298 Word(Data::new(44, 49, "word3".into())),
299 ];
300 assert_eq!(tokens, expected)
301 }
302
303 #[test]
304 fn test_parse_words_on_lines() {
305 let mut lexer = Lexer::new("some\nwords here\0");
306 let tokens = lexer.parse();
307 let expected = vec![
308 Word(Data::new(0, 4, "some".into())),
309 Word(Data::new(5, 10, "words".into())),
310 Word(Data::new(11, 15, "here".into())),
311 ];
312 assert_eq!(tokens, expected)
313 }
314
315 #[test]
316 fn test_parse_number_literal() {
317 let mut lexer = Lexer::new("12");
318 let tokens = lexer.parse();
319 let expected = vec![Number(Data::new(0, 2, "12".into()))];
320 assert_eq!(tokens, expected)
321 }
322
323 #[test]
324 fn test_parse_number_oct() {
325 let mut lexer = Lexer::new("&12");
326 let tokens = lexer.parse();
327 let expected = vec![Number(Data::new(0, 3, "&12".into()))];
328 assert_eq!(tokens, expected)
329 }
330
331 #[test]
332 fn test_parse_number_bin() {
333 let mut lexer = Lexer::new("%0100101");
334 let tokens = lexer.parse();
335 let expected = vec![Number(Data::new(0, 8, "%0100101".into()))];
336 assert_eq!(tokens, expected);
337 }
338
339 #[test]
340 #[ignore]
341 fn test_parse_number_bin_only_valid() {
342 let mut lexer = Lexer::new("%12345");
345 let tokens = lexer.parse();
346 let expected = vec![Word(Data::new(0, 6, "%12345".into()))];
347 assert_eq!(tokens, expected);
348 }
349
350 #[test]
351 fn test_parse_number_hex() {
352 let mut lexer = Lexer::new("$FfAaDd");
353 let tokens = lexer.parse();
354 let expected = vec![Number(Data::new(0, 7, "$FfAaDd".into()))];
355 assert_eq!(tokens, expected)
356 }
357
358 #[test]
359 fn test_parse_number_0xhex() {
360 let mut lexer = Lexer::new("0xFE");
361 let tokens = lexer.parse();
362 let expected = vec![Number(Data::new(0, 4, "0xFE".into()))];
363 assert_eq!(tokens, expected)
364 }
365
366 #[test]
367 fn test_parse_number_char() {
368 let mut lexer = Lexer::new("'c'");
369 let tokens = lexer.parse();
370 let expected = vec![Number(Data::new(0, 3, "'c'".into()))];
371 assert_eq!(tokens, expected)
372 }
373
374 #[test]
375 fn test_parse_number_word() {
376 let mut lexer = Lexer::new("word");
377 let tokens = lexer.parse();
378 let expected = vec![Word(Data::new(0, 4, "word".into()))];
379 assert_eq!(tokens, expected)
380 }
381
382 #[cfg(feature = "ropey")]
383 #[test]
384 fn test_to_ropey() {
385 let progn = "word1 word2 word3";
386 let rope = ropey::Rope::from_str(progn);
387 let mut lexer = Lexer::new(progn);
388 let tokens = lexer.parse();
389 let word2 = if let Some(Token::Word(word)) = tokens.get(1) {
390 word.to_owned()
391 } else {
392 Data::<String>::default()
393 };
394 let x = rope.slice(&word2);
395 assert_eq!("word2", word2.value);
396 assert_eq!(word2.value, x);
397 }
398}