1use std::iter::Peekable;
2use std::str::Chars;
3
4use crate::common::position::CaretPos;
5use crate::parse::lex::result::{LexErr, LexResult};
6use crate::parse::lex::state::State;
7use crate::parse::lex::token::{Lex, Token};
8use crate::parse::lex::tokenize_direct;
9
10#[allow(clippy::cognitive_complexity)]
11pub fn into_tokens(c: char, it: &mut Peekable<Chars>, state: &mut State) -> LexResult {
12 match c {
13 ',' => create(state, Token::Comma),
14 ':' => match it.peek() {
15 Some(':') => match (it.next(), it.peek()) {
16 (_, Some('=')) => next_and_create(it, state, Token::SliceIncl),
17 _ => create(state, Token::Slice),
18 },
19 Some('=') => next_and_create(it, state, Token::Assign),
20 _ => create(state, Token::DoublePoint),
21 },
22 '(' => create(state, Token::LRBrack),
23 ')' => create(state, Token::RRBrack),
24 '[' => create(state, Token::LSBrack),
25 ']' => create(state, Token::RSBrack),
26 '{' => create(state, Token::LCBrack),
27 '}' => create(state, Token::RCBrack),
28 '|' => create(state, Token::Ver),
29 '\n' => create(state, Token::NL),
30 '\r' => match it.next() {
31 Some('\n') => create(state, Token::NL),
32 _ => Err(LexErr::new(state.pos, None, "return carriage not followed by newline")),
33 },
34 '.' => match it.peek() {
35 Some('.') => match (it.next(), it.peek()) {
36 (_, Some('=')) => next_and_create(it, state, Token::RangeIncl),
37 _ => create(state, Token::Range),
38 },
39 _ => create(state, Token::Point),
40 },
41 '<' => match it.peek() {
42 Some('<') => match (it.next(), it.peek()) {
43 (_, Some('=')) => next_and_create(it, state, Token::BLShiftAssign),
44 _ => next_and_create(it, state, Token::BLShift),
45 },
46 Some('=') => next_and_create(it, state, Token::Leq),
47 _ => create(state, Token::Le),
48 },
49 '>' => match it.peek() {
50 Some('>') => match (it.next(), it.peek()) {
51 (_, Some('=')) => next_and_create(it, state, Token::BRShiftAssign),
52 _ => next_and_create(it, state, Token::BRShift),
53 },
54 Some('=') => next_and_create(it, state, Token::Geq),
55 _ => create(state, Token::Ge),
56 },
57 '+' => match it.peek() {
58 Some('=') => next_and_create(it, state, Token::AddAssign),
59 _ => create(state, Token::Add),
60 },
61 '-' => match it.peek() {
62 Some('=') => next_and_create(it, state, Token::SubAssign),
63 Some('>') => next_and_create(it, state, Token::To),
64 _ => create(state, Token::Sub),
65 },
66 '*' => match it.peek() {
67 Some('=') => next_and_create(it, state, Token::MulAssign),
68 _ => create(state, Token::Mul),
69 },
70 '/' => match it.peek() {
71 Some('=') => next_and_create(it, state, Token::DivAssign),
72 Some('/') => next_and_create(it, state, Token::FDiv),
73 _ => create(state, Token::Div),
74 },
75 '\\' => create(state, Token::BSlash),
76 '^' => match it.peek() {
77 Some('=') => next_and_create(it, state, Token::PowAssign),
78 _ => create(state, Token::Pow),
79 },
80 '=' => match it.peek() {
81 Some('>') => next_and_create(it, state, Token::BTo),
82 _ => create(state, Token::Eq),
83 },
84 '#' => {
85 let mut comment = String::new();
86 while it.peek().is_some() && *it.peek().unwrap() != '\n' && *it.peek().unwrap() != '\r'
87 {
88 comment.push(it.next().unwrap());
89 }
90 create(state, Token::Comment(comment))
91 }
92 '!' => match it.peek() {
93 Some('=') => next_and_create(it, state, Token::Neq),
94 _ => {
95 let msg = String::from("'!' is not a valid character on its own");
96 Err(LexErr::new(state.pos, None, &msg))
97 }
98 },
99 '?' => create(state, Token::Question),
100 '0'..='9' => {
101 let mut number = c.to_string();
102 let mut exp = String::new();
103 let mut float = false;
104 let mut e_num = false;
105
106 while let Some(&c) = it.peek() {
107 match c {
108 '0'..='9' if !e_num => {
109 number.push(c);
110 it.next();
111 }
112 '0'..='9' if e_num => {
113 exp.push(c);
114 it.next();
115 }
116 'E' if e_num => break,
117 'E' => {
118 e_num = true;
119 it.next();
120 }
121 '.' if float || e_num => break,
122 '.' => {
123 {
124 let mut it = it.clone();
126 it.next();
127 if let Some('.') = it.peek() {
128 break;
129 }
130 }
131
132 number.push(c);
133 float = true;
134 it.next();
135 }
136 _ => break,
137 }
138 }
139 create(
140 state,
141 if e_num {
142 Token::ENum(number, exp)
143 } else if float {
144 Token::Real(number)
145 } else {
146 Token::Int(number)
147 },
148 )
149 }
150 'a'..='z' | 'A'..='Z' | '_' => {
151 let mut id_or_operation = c.to_string();
152 while let Some(c) = it.peek() {
153 match c {
154 'a'..='z' | 'A'..='Z' | '_' | '0'..='9' => {
155 id_or_operation.push(*c);
156 it.next();
157 }
158 _ => break,
159 }
160 }
161 create(state, as_op_or_id(id_or_operation))
162 }
163 '"' => {
164 let mut string = String::new();
165 let mut back_slash = false;
166
167 let mut exprs: Vec<(CaretPos, String)> = vec![];
168 let mut build_cur_expr = 0;
169 let mut cur_offset = CaretPos::start();
170 let mut cur_expr = String::new();
171
172 for c in it {
173 if !back_slash && build_cur_expr == 0 && c == '"' {
174 break;
175 }
176 string.push(c);
177
178 if !back_slash {
179 if build_cur_expr > 0 {
180 cur_expr.push(c);
181 }
182
183 if c == '{' {
184 if build_cur_expr == 0 {
185 cur_offset = state.pos.offset_pos(string.len() + 1);
186 }
187 build_cur_expr += 1;
188 } else if c == '}' {
189 build_cur_expr -= 1;
190 }
191
192 if build_cur_expr == 0 && !cur_expr.is_empty() {
193 cur_expr = cur_expr[0..cur_expr.len() - 1].to_owned();
195 if !cur_expr.is_empty() {
196 exprs.push((cur_offset, cur_expr.clone()));
197 }
198 cur_expr.clear()
199 }
200 }
201
202 back_slash = c == '\\';
203 }
204
205 if string.starts_with("\"\"") && string.ends_with("\"\"") {
206 let string = string.trim_start_matches("\"\"").trim_end_matches("\"\"");
207 create(state, Token::DocStr(String::from(string)))
208 } else {
209 let tokens = exprs
210 .iter()
211 .map(|(offset, string)| match tokenize_direct(string) {
212 Ok(tokens) => Ok(tokens
213 .iter()
214 .map(|lex| Lex::new(lex.pos.offset(offset).start, lex.token.clone()))
215 .collect()),
216 Err(err) => Err(err),
217 })
218 .collect::<Result<_, _>>()?;
219
220 create(state, Token::Str(string, tokens))
221 }
222 }
223 ' ' => {
224 state.space();
225 Ok(vec![])
226 }
227 c => Err(LexErr::new(state.pos, None, &format!("unrecognized character: {c}"))),
228 }
229}
230
231fn next_and_create(
232 it: &mut Peekable<Chars>,
233 state: &mut State,
234 token: Token,
235) -> LexResult<Vec<Lex>> {
236 it.next();
237 create(state, token)
238}
239
240fn create(state: &mut State, token: Token) -> LexResult<Vec<Lex>> {
241 Ok(state.token(token))
242}
243
244fn as_op_or_id(string: String) -> Token {
245 match string.as_ref() {
246 "_" => Token::Underscore,
247
248 "from" => Token::From,
249 "type" => Token::Type,
250 "class" => Token::Class,
251 "pure" => Token::Pure,
252 "as" => Token::As,
253
254 "import" => Token::Import,
255 "forward" => Token::Forward,
256 "self" => Token::_Self,
257 "vararg" => Token::Vararg,
258 "init" => Token::Init,
259
260 "def" => Token::Def,
261 "fin" => Token::Fin,
262 "and" => Token::And,
263 "or" => Token::Or,
264 "not" => Token::Not,
265 "is" => Token::Is,
266 "isa" => Token::IsA,
267 "isnt" => Token::IsN,
268 "isnta" => Token::IsNA,
269 "mod" => Token::Mod,
270 "sqrt" => Token::Sqrt,
271 "while" => Token::While,
272 "for" => Token::For,
273
274 "_and_" => Token::BAnd,
275 "_or_" => Token::BOr,
276 "_xor_" => Token::BXOr,
277 "_not_" => Token::BOneCmpl,
278
279 "if" => Token::If,
280 "else" => Token::Else,
281 "match" => Token::Match,
282 "continue" => Token::Continue,
283 "break" => Token::Break,
284 "return" => Token::Ret,
285 "then" => Token::Then,
286 "do" => Token::Do,
287 "with" => Token::With,
288
289 "in" => Token::In,
290
291 "raise" => Token::Raise,
292 "handle" => Token::Handle,
293 "when" => Token::When,
294
295 "True" => Token::Bool(true),
296 "False" => Token::Bool(false),
297
298 "None" => Token::Undefined,
299 "pass" => Token::Pass,
300
301 _ => Token::Id(string),
302 }
303}
304
305#[cfg(test)]
306mod test {
307 use crate::parse::lex::result::LexErr;
308 use crate::parse::lex::token::Token;
309 use crate::parse::lex::tokenize;
310
311 #[test]
312 fn function_with_ret() -> Result<(), LexErr> {
313 let source = "def f(x: Int) -> Int =>\n return";
314 let tokens = tokenize(&source)
315 .map_err(|e| e.into_with_source(&Some(String::from(source)), &None))?;
316
317 assert_eq!(tokens[0].token, Token::Def);
318 assert_eq!(tokens[1].token, Token::Id(String::from("f")));
319 assert_eq!(tokens[2].token, Token::LRBrack);
320 assert_eq!(tokens[3].token, Token::Id(String::from("x")));
321 assert_eq!(tokens[4].token, Token::DoublePoint);
322 assert_eq!(tokens[5].token, Token::Id(String::from("Int")));
323 assert_eq!(tokens[6].token, Token::RRBrack);
324 assert_eq!(tokens[7].token, Token::To);
325 assert_eq!(tokens[8].token, Token::Id(String::from("Int")));
326 assert_eq!(tokens[9].token, Token::BTo);
327 assert_eq!(tokens[10].token, Token::NL);
328 assert_eq!(tokens[11].token, Token::Indent);
329 assert_eq!(tokens[12].token, Token::Ret);
330 assert_eq!(tokens[13].token, Token::Dedent);
331
332 Ok(())
333 }
334
335 #[test]
336 fn class_with_body_class_right_after() -> Result<(), LexErr> {
337 let source = "class MyClass\n def var := 10\nclass MyClass1\n";
338 let tokens = tokenize(&source)
339 .map_err(|e| e.into_with_source(&Some(String::from(source)), &None))?;
340
341 assert_eq!(tokens[0].token, Token::Class);
342 assert_eq!(tokens[1].token, Token::Id(String::from("MyClass")));
343 assert_eq!(tokens[2].token, Token::NL);
344 assert_eq!(tokens[3].token, Token::Indent);
345 assert_eq!(tokens[4].token, Token::Def);
346 assert_eq!(tokens[5].token, Token::Id(String::from("var")));
347 assert_eq!(tokens[6].token, Token::Assign);
348 assert_eq!(tokens[7].token, Token::Int(String::from("10")));
349 assert_eq!(tokens[8].token, Token::NL);
350 assert_eq!(tokens[9].token, Token::Dedent);
351 assert_eq!(tokens[10].token, Token::NL);
352 assert_eq!(tokens[11].token, Token::Class);
353 assert_eq!(tokens[12].token, Token::Id(String::from("MyClass1")));
354
355 Ok(())
356 }
357
358 #[test]
359 fn if_statement() -> Result<(), LexErr> {
360 let source = "if a then\n b\nelse\n c";
361 let tokens = tokenize(&source)
362 .map_err(|e| e.into_with_source(&Some(String::from(source)), &None))?;
363
364 assert_eq!(tokens[0].token, Token::If);
365 assert_eq!(tokens[1].token, Token::Id(String::from("a")));
366 assert_eq!(tokens[2].token, Token::Then);
367 assert_eq!(tokens[3].token, Token::NL);
368 assert_eq!(tokens[4].token, Token::Indent);
369 assert_eq!(tokens[5].token, Token::Id(String::from("b")));
370 assert_eq!(tokens[6].token, Token::NL);
371 assert_eq!(tokens[7].token, Token::Dedent);
372 assert_eq!(tokens[8].token, Token::NL);
373 assert_eq!(tokens[9].token, Token::Else);
374 assert_eq!(tokens[10].token, Token::NL);
375 assert_eq!(tokens[11].token, Token::Indent);
376 assert_eq!(tokens[12].token, Token::Id(String::from("c")));
377
378 Ok(())
379 }
380
381 #[test]
382 fn e_number() -> Result<(), LexErr> {
383 let source = "3E4";
384 let tokens = tokenize(&source)
385 .map_err(|e| e.into_with_source(&Some(String::from(source)), &None))?;
386
387 assert_eq!(tokens[0].token, Token::ENum(String::from("3"), String::from("4")));
388 Ok(())
389 }
390
391 #[test]
392 fn int() -> Result<(), LexErr> {
393 let source = "0";
394 let tokens = tokenize(&source)
395 .map_err(|e| e.into_with_source(&Some(String::from(source)), &None))?;
396
397 assert_eq!(tokens[0].token, Token::Int(String::from("0")));
398 Ok(())
399 }
400
401 #[test]
402 fn real() -> Result<(), LexErr> {
403 let source = "0.";
404 let tokens = tokenize(&source)
405 .map_err(|e| e.into_with_source(&Some(String::from(source)), &None))?;
406
407 assert_eq!(tokens[0].token, Token::Real(String::from("0.")));
408 Ok(())
409 }
410
411 #[test]
412 fn real2() -> Result<(), LexErr> {
413 let source = "0.0";
414 let tokens = tokenize(&source)
415 .map_err(|e| e.into_with_source(&Some(String::from(source)), &None))?;
416
417 assert_eq!(tokens[0].token, Token::Real(String::from("0.0")));
418 Ok(())
419 }
420
421 #[test]
422 fn real3() -> Result<(), LexErr> {
423 let source = "0.0.";
424 let tokens = tokenize(&source)
425 .map_err(|e| e.into_with_source(&Some(String::from(source)), &None))?;
426
427 assert_eq!(tokens[0].token, Token::Real(String::from("0.0")));
428 assert_eq!(tokens[1].token, Token::Point);
429 Ok(())
430 }
431
432 #[test]
433 fn range_incl() -> Result<(), LexErr> {
434 let sources = vec!["0 ..= 2", "0..= 2", "0 ..=2", "0..=2"];
435
436 for source in sources {
437 let tokens = tokenize(&source)
438 .map_err(|e| e.into_with_source(&Some(String::from(source)), &None))?;
439
440 assert_eq!(tokens[0].token, Token::Int(String::from("0")), "(0): {}", source);
441 assert_eq!(tokens[1].token, Token::RangeIncl, "(..=): {}", source);
442 assert_eq!(tokens[2].token, Token::Int(String::from("2")), "(2): {}", source);
443 }
444
445 Ok(())
446 }
447
448 #[test]
449 fn range() -> Result<(), LexErr> {
450 let sources = vec!["0 .. 2", "0.. 2", "0 ..2", "0..2"];
451
452 for source in sources {
453 let tokens = tokenize(&source)
454 .map_err(|e| e.into_with_source(&Some(String::from(source)), &None))?;
455
456 assert_eq!(tokens[0].token, Token::Int(String::from("0")), "(0): {}", source);
457 assert_eq!(tokens[1].token, Token::Range, "(..): {}", source);
458 assert_eq!(tokens[2].token, Token::Int(String::from("2")), "(2): {}", source);
459 }
460
461 Ok(())
462 }
463
464 #[test]
465 fn range_tripped_up() -> Result<(), LexErr> {
466 let sources = vec!["0 ... 2", "0... 2", "0 ...2", "0...2"];
467
468 for source in sources {
469 let tokens = tokenize(&source)
470 .map_err(|e| e.into_with_source(&Some(String::from(source)), &None))?;
471
472 assert_eq!(tokens[0].token, Token::Int(String::from("0")), "(0): {}", source);
473 assert_eq!(tokens[1].token, Token::Range, "(..): {}", source);
474 assert_eq!(tokens[2].token, Token::Point, "(.): {}", source);
475 assert_eq!(tokens[3].token, Token::Int(String::from("2")), "(2): {}", source);
476 }
477
478 Ok(())
479 }
480}