mate_rs/lexer.rs
1//
2// Copyright 2022-present theiskaa. All rights reserved.
3// Use of this source code is governed by MIT license
4// that can be found in the LICENSE file.
5//
6
7use crate::{
8 errors::Error,
9 token::{Sub, SubMethod, Token},
10 utils::ChUtils,
11};
12use std::{cell::Cell, collections::HashMap};
13use substring::Substring;
14
15#[derive(Clone, Debug, PartialEq)]
16pub struct Lexer<'a> {
17 input: &'a str, // Expression input.
18 examination_char: Cell<char>, // Current char under examination.
19 position: Cell<usize>, // Current position in input (points to current char).
20 read_position: Cell<usize>, // Current reading position in input (after current char).
21}
22
23impl<'a> Lexer<'a> {
24 // Creates a new Lexer object with given input.
25 fn new(input: &'a str) -> Result<Lexer, Error> {
26 if input.len() < 1 {
27 return Err(Error::empty_input());
28 }
29
30 Ok(Self {
31 input,
32 examination_char: Cell::new(input.chars().nth(0).unwrap()),
33 position: Cell::from(0),
34 read_position: Cell::from(1),
35 })
36 }
37
38 // [Lex] is the main function that converts
39 // each [char] to an understandable token variable.
40 //
41 // USER INPUT
42 // ╭──────────────────────────╮
43 // │ (4 * 5 - 5) * 2 + 24 / 2 │
44 // ╰──────────────────────────╯
45 //
46 // OUTPUT OF THE LEXER
47 // ╭───────────────────────────────────╮
48 // │ │ ╭─▶ First Sub Expression
49 // │ ╭───────────────────────────╮ │ │
50 // │ │ │────────╯
51 // │ │ ╭───────────────────╮ │ │
52 // │ │ │ │─╮ │ │
53 // │ │ │ ╭───────────╮ │ │ │ │
54 // │ │ │ │ NUMBER(4) │ │ ╰────────────▶ Second Sub Expression
55 // │ │ │ │ PRODUCT │─╮ │ │ │ Which belongs to first sub expression.
56 // │ │ │ │ NUMBER(5) │ │ │ │ │
57 // │ │ │ ╰───────────╯ ╰──────────────╮
58 // │ │ │ MINUS │ │ │ │
59 // │ │ │ NUMBER(5) │ │ │ ╰─▶ Third Sub Expression
60 // │ │ │ │ │ │ Which belongs to second sub expression.
61 // │ │ ╰───────────────────╯ │ │
62 // │ │ │ │
63 // │ │ PRODUCT │ │
64 // │ │ NUMBER(2) │ │
65 // │ │ │ │
66 // │ ╰───────────────────────────╯ │
67 // │ │
68 // │ PLUS │
69 // │ │
70 // │ ╭──────────────────────────╮ │ ╭─▶ Fourth Sub Expression
71 // │ │ │ │ │
72 // │ │ NUMBER(24) │ │ │
73 // │ │ DIVIDE │─────────╯
74 // │ │ NUMBER(2) │ │
75 // │ │ │ │
76 // │ ╰──────────────────────────╯ │
77 // │ │
78 // ╰───────────────────────────────────╯
79 //
80 pub fn lex(input: &'a str) -> Result<Sub, Error> {
81 let lexer: Lexer = match Lexer::new(input) {
82 Ok(l) => l,
83 Err(e) => return Err(e),
84 };
85
86 let mut tokens: Vec<Token> = Vec::new();
87 loop {
88 match lexer.generate_token() {
89 None => break,
90 Some(r) => match r {
91 Err(e) => return Err(e),
92 Ok(r) => tokens.push(r),
93 },
94 }
95 }
96
97 match Lexer::nest_parentheses(tokens) {
98 Err(e) => return Err(e),
99 Ok(v) => match Lexer::break_nesting(0, v) {
100 Err(e) => return Err(e),
101 Ok(v) => return Ok(Lexer::combine_tokens(v)),
102 },
103 }
104 }
105
106 // The nesting-to-tokens algorithm implementation.
107 //
108 // Nesting-to-tokens algorithm is a hashing algorithm that lexer uses to
109 // parse parentheses expressions and put them into their nest level.
110 //
111 // For example if the given token list is -> "5 + (2 + 4) : (4 + 5 * (3 + 5))"
112 // Generated result will be: --> `Note: {<integer>} represents the pointer token`
113 // | 0: 5 + {1} : {2}
114 // | 1: 2 + 4
115 // | 2: 4 + 5 * {3}
116 // | 3: 3 + 5
117 //
118 // By storing tokens by their nesting levels, makes it easy to understand and implement
119 // any kind of parentheses expressions as sub-expressions.
120 fn nest_parentheses(tokens: Vec<Token>) -> Result<HashMap<usize, (Vec<Token>, bool)>, Error> {
121 let mut nested: HashMap<usize, (Vec<Token>, bool)> = HashMap::new();
122
123 let mut level: usize = 0;
124
125 let mut i: usize = 0;
126 let mut startert: Token = Token::empty();
127 while i < tokens.clone().len() {
128 let t: Token = tokens[i].clone();
129
130 if t.is_lparen() || t.is_labs() {
131 startert = t.clone(); // update starter-type.
132
133 let mut base: (Vec<Token>, bool) = match nested.get(&0) {
134 None => (vec![], false),
135 Some(v) => v.clone(),
136 };
137
138 level += 1;
139
140 base.0
141 .push(Token::new_pointer(level, startert.to_submethod()));
142 nested.insert(0, base.clone());
143
144 match Lexer::take_till_end(tokens.clone(), i) {
145 None => return Err(Error::new(String::from("TODO: find a appropriate error"))),
146 Some(v) => {
147 nested.insert(level, (v.0, v.2));
148 i = v.1;
149 }
150 };
151
152 continue;
153 } else if t.is_rparen() || t.is_rabs() {
154 if startert.clone().matchto(t.clone()) {
155 i += 1;
156 }
157 continue;
158 }
159
160 let mut base: (Vec<Token>, bool) = match nested.get(&0) {
161 None => (vec![], false),
162 Some(v) => v.clone(),
163 };
164
165 base.0.push(t);
166 nested.insert(0, base.clone());
167 i += 1;
168 }
169
170 return Ok(nested);
171 }
172
173 // Collects all tokens from exact one parentheses-expression-clip.
174 //
175 // If [start] doesn't equals to any kind of opening(left) parentheses, result gonna be [None].
176 fn take_till_end(tokens: Vec<Token>, start: usize) -> Option<(Vec<Token>, usize, bool)> {
177 let mut iteration_count = start;
178 let mut has_to_recall: bool = false;
179
180 let mut level: i32 = 1;
181
182 // [start] indexed value should always equal to an any kind of opening parentheses
183 let start_token = tokens.clone()[start].clone();
184 if !start_token.is_lparen() && !start_token.is_labs() || start > tokens.clone().len() {
185 return None;
186 }
187
188 // Initialize the matcho_collection with start_token.
189 // In case of different kinds of parentheses([normal] and [abs]) we have to track the
190 // nesting level by right matching token-types.
191 // So, if the opening is normal parentheses token and closing is abs parentheses
192 // token we shouldn't decrement the level.
193 let mut matcho_collection: Vec<Token> = vec![start_token.clone()];
194
195 let mut collected: Vec<Token> = Vec::new();
196 for i in (start + 1)..tokens.len() {
197 let t = tokens[i].clone();
198
199 iteration_count += 1;
200
201 if t.is_lparen() || t.is_labs() {
202 level += 1;
203 has_to_recall = true;
204 matcho_collection.push(t.clone())
205 }
206
207 if t.is_rparen() || t.is_rabs() {
208 if matcho_collection.last().unwrap().matchto(t) {
209 level -= 1;
210 matcho_collection.pop();
211 }
212
213 if level == 0 {
214 return Some((collected, iteration_count, has_to_recall));
215 }
216 }
217
218 collected.push(tokens[i].clone());
219 }
220
221 Some((collected, iteration_count, has_to_recall))
222 }
223
224 // Breaks the result of [nest_parentheses] into one line token list.
225 // Runs into each nest-level indexed hash-map value and collects them into one line token
226 // list.
227 // If it's required to re-nest current nest-level indexed hash-map value, it calls
228 // [nest_parentheses] inside of itself.
229 fn break_nesting(
230 point: usize,
231 nested: HashMap<usize, (Vec<Token>, bool)>,
232 ) -> Result<Vec<Token>, Error> {
233 let mut result: Vec<Token> = Vec::new();
234
235 match nested.get(&point) {
236 None => return Ok(result),
237 Some(v) => {
238 for t in v.0.iter() {
239 if !t.is_pointer() {
240 result.push(t.clone());
241 continue;
242 }
243
244 match nested.get(&t.clone().take_pointer_index().unwrap()) {
245 None => continue,
246 Some(v) => {
247 if !v.1 {
248 let combined: Sub = Lexer::combine_tokens(v.0.clone());
249 result.push(Token::new_sub(combined.tokens, t.clone().sub.method));
250 continue;
251 }
252
253 // If the tokens at current point in the [nested], contains parentheses
254 // that means we have to re-nest and re break them as tokens recursively..
255 match Lexer::nest_parentheses(v.0.clone()) {
256 Err(e) => return Err(e),
257 Ok(v) => match Lexer::break_nesting(0, v) {
258 Err(e) => return Err(e),
259 Ok(v) => {
260 let combined: Sub = Lexer::combine_tokens(v);
261 result.push(Token::new_sub(
262 combined.tokens,
263 t.clone().sub.method,
264 ));
265 }
266 },
267 }
268 }
269 }
270 }
271 }
272 };
273
274 Ok(result)
275 }
276
277 // Takes first-party tokens, combines them and returns
278 // 1D nested tokens.
279 //
280 // In first inner result of token generation of [lex],
281 // multiplication and division aren't collected together.
282 // To take care of arithmetic's "process priority", we have
283 // first calculate the multiplication or division action, and
284 // then continue with the other ones.
285 // So, we have to convert the multiplication and division
286 // parts of main expression into the sub expressions.
287 fn combine_tokens(tokens: Vec<Token>) -> Sub {
288 let mut combined_tokens: Vec<Token> = Vec::new();
289 let mut sub_tokens: Vec<Token> = Vec::new();
290 let mut power_subs: Vec<Token> = Vec::new();
291
292 // Combine products/divisions/parentheses as sub-expression.
293 for i in 0..tokens.len() {
294 let next: Token;
295 let current: Token = tokens[i].clone();
296 if i < tokens.len() - 1 {
297 next = tokens[i + 1].clone();
298 } else {
299 next = Token::from(String::new(), Token::unknown_index());
300 }
301
302 let is_auto_solids = current.is_number() && next.is_number()
303 || current.is_sub_exp() && next.is_sub_exp();
304 let is_auto_mixings = current.is_number() && next.is_sub_exp()
305 || current.is_sub_exp() && next.is_number();
306
307 // Auto append multiplication ◀╮
308 // if there is no sign between │ two "number"(normal number and sub-exp) token.
309 // ╭──────────────────╭─────╯
310 // ╭─ ▼ ───────╮ ╭── ▼ ─────────╮
311 // │ 4(2 + 10) │ ──▶ │ 4 • (2 + 10) │
312 // ╰───────────╯ ╰──────────────╯
313 if is_auto_solids || is_auto_mixings {
314 sub_tokens.append(&mut Vec::from([
315 current.clone(),
316 Token::from(String::from("*"), Token::unknown_index()),
317 ]));
318 continue;
319 }
320
321 // Collect power subs in different array to create a different sub expression with them.
322 // By doing that we gonna easily keep operation priority safe.
323 let is_power_sub = power_subs.len() > 0
324 && (current.is_number() || current.is_sub_exp() || current.is_power());
325 if is_power_sub || next.is_power() && (current.is_number() || current.is_sub_exp()) {
326 power_subs.push(current.clone());
327 continue;
328 }
329
330 if !power_subs.is_empty() {
331 sub_tokens.push(Token::new_sub(
332 Lexer::combine_powers(power_subs.clone(), power_subs.clone().len() - 1),
333 SubMethod::PAREN,
334 ));
335
336 power_subs.clear();
337 }
338
339 let current_is_combinable = current.is_div_or_prod() || current.is_percentage();
340 let next_is_combinable = next.is_div_or_prod() || current.is_percentage();
341 let is_sub = sub_tokens.len() > 0
342 && (current.is_number() || current.is_sub_exp() || current_is_combinable);
343
344 // Checks matching of new or exiting sub-token.
345 if is_sub || next_is_combinable && (current.is_number() || current.is_sub_exp()) {
346 if !power_subs.is_empty() {
347 sub_tokens.push(Token::new_sub(
348 Lexer::combine_powers(power_subs.clone(), power_subs.len() - 1),
349 SubMethod::PAREN,
350 ));
351 power_subs.clear();
352 }
353
354 sub_tokens.push(current);
355 continue;
356 }
357
358 if !sub_tokens.is_empty() {
359 if sub_tokens.len() == 1 && sub_tokens.clone()[0].is_sub_exp() {
360 combined_tokens.append(&mut sub_tokens.clone());
361 } else {
362 combined_tokens.push(Token::new_sub(sub_tokens.clone(), SubMethod::PAREN));
363 }
364
365 sub_tokens.clear()
366 }
367
368 combined_tokens.push(current);
369 }
370
371 if !power_subs.is_empty() {
372 if sub_tokens.is_empty() {
373 sub_tokens.append(&mut Lexer::combine_powers(
374 power_subs.clone(),
375 power_subs.len() - 1,
376 ));
377 } else {
378 sub_tokens.push(Token::new_sub(
379 Lexer::combine_powers(power_subs.clone(), power_subs.len() - 1),
380 SubMethod::PAREN,
381 ))
382 }
383 }
384
385 if combined_tokens.is_empty() {
386 return Sub::new(sub_tokens, SubMethod::PAREN);
387 }
388
389 // Avoid appending sub-expression-token to empty tokens list.
390 if !sub_tokens.is_empty() {
391 if sub_tokens.len() == 1 && sub_tokens.clone()[0].is_sub_exp() {
392 combined_tokens.append(&mut sub_tokens.clone()[0].sub.tokens);
393 } else {
394 combined_tokens.push(Token::new_sub(sub_tokens.clone(), SubMethod::PAREN));
395 }
396 }
397
398 return Sub::new(combined_tokens, SubMethod::PAREN);
399 }
400
401 // Combines 1D sub expression power tokens to actual nested-power sub-expression vector.
402 // > For example: if given data is:
403 // ╭────────────────╮ ╭───────────────────╮
404 // │ 5 ^ 2 ^ 3 ^ 2 │ it'd be converted to │ 5 ^ (2 ^ (3 ^ 2)) │
405 // ╰────────────────╯ ╰───────────────────╯
406 // We have to start reading from the ending, that's why we nest powers to individual
407 // sub-expression.
408 // By doing that we make it easy to understood by calculator.
409 // So, as a result it'd be resolved like:
410 // ╭───────────────────╮ ╭─────────────╮ ╭─────────╮ ╭───╮
411 // │ 5 ^ (2 ^ (3 ^ 2)) │ ──▶ │ 5 ^ (2 ^ 9) │ ──▶ │ 5 ^ 512 │ ──▶ │ ? │
412 // ╰───────────────────╯ ╰─────────────╯ ╰─────────╯ ╰───╯
413 fn combine_powers(tokens: Vec<Token>, start: usize) -> Vec<Token> {
414 if tokens.len() == 3 {
415 return tokens;
416 }
417
418 let mut combined_tokens: Vec<Token> = Vec::new();
419
420 let end = start.clone() as i32 - 2;
421 if end < 0 {
422 return combined_tokens;
423 }
424
425 let cpart: Vec<Token> = tokens.clone()[end as usize..=start.clone()].to_vec();
426 combined_tokens.append(&mut tokens.clone()[..end as usize].to_vec());
427 combined_tokens.push(Token::new_sub(cpart, SubMethod::PAREN));
428
429 if end <= 0 {
430 return combined_tokens;
431 }
432
433 Lexer::combine_powers(combined_tokens, end as usize)
434 }
435
436 // Converts byte-character to token-structure.
437 // Mainly used to generate 1D(first-party) tokens in [`lex`] method.
438 //
439 // ╭─────────────╮ In second part of token generation, white(empty) spaces are auto-skipped
440 // ╭──────│───────────╮ │ by [skip_whitespace] method. And generate_token checks: {if that character is sign or not},
441 // │ 422 + 6 * 7 │ │ if it's, it firstly reads that character by [read_char].
442 // ╰──│───────────────╯ ╰───▶ And then creates new token by automatically filling token data.
443 // │
444 // │ In genesis, [`self.examination_char`] would be "4", and [generate_token] has to determine
445 // │ "4" can be not single-digit, it needs to reed full number not only "4".
446 // ╰───▶ So, [read_number] method will be used to read and return final number.
447 //
448 // ... and so on ...
449 //
450 fn generate_token(&self) -> Option<Result<Token, Error>> {
451 self.skip_whitespace();
452
453 let ch: String = self.examination_char.get().to_string();
454 let position: i32 = self.position.get() as i32;
455 if ch.is_operation_sign() {
456 if ch.is_plus_or_minus() && self.is_free_from_number(1) && self.next_is_number(1) {
457 match self.read_number() {
458 None => return None,
459 Some(v) => return Some(Ok(Token::from(v.0, v.1))),
460 }
461 }
462
463 if let None = self.read_char() {
464 return None;
465 };
466
467 return Some(Ok(Token::from(ch, (position, position))));
468 }
469
470 // Check for a positive number.
471 if ch.is_number() || ch.is_point() {
472 match self.read_number() {
473 None => return None,
474 Some(v) => return Some(Ok(Token::from(v.0, v.1))),
475 }
476 }
477
478 let lit: String = self.examination_char.get().to_string();
479 if let None = self.read_char() {
480 return None;
481 }
482
483 Some(Ok(Token::from(lit, (position, position))))
484 }
485
486 // A [char] reading functionality, that also updates state of lexer.
487 // Reads char and fills lexer object with read and manipulated data.
488 fn read_char(&self) -> Option<char> {
489 match self.input.chars().nth(self.read_position.get()) {
490 Some(ch) => {
491 self.examination_char.set(ch);
492 self.position.set(self.read_position.get());
493 self.read_position.set(self.read_position.get() + 1);
494 return Some(ch);
495 }
496 None => {
497 if self.read_position.get() == self.input.len() {
498 let ch: char = self.input.chars().nth(self.position.get()).unwrap();
499
500 self.examination_char.set(ch);
501 self.position.set(self.read_position.get());
502 self.read_position.set(self.read_position.get() + 1);
503 return Some(ch);
504 }
505
506 return None;
507 }
508 }
509 }
510
511 // Collects from start to end of the string number,
512 // and returns the full part of that number from input.
513 //
514 // "-426.7" actually is a array of [char]s
515 // ╭────────────────────────────────────────────╮
516 // │ -426.7 ───▶ ['-', '4', '2', '6', '.', '7'] │
517 // ╰────────────────────────────────────────────╯
518 // To make computer understood that full number,
519 // We need to determine the start and end index
520 // of that full-number in rune array (from digit to digit).
521 //
522 fn read_number(&self) -> Option<(String, (i32, i32))> {
523 let input: String = self.input.to_string();
524 let start: usize = self.position.get();
525
526 // Include negative/positive representation signs.
527 let char_at_start: char = match self.input.chars().nth(start) {
528 Some(ch) => ch,
529 None => '+', // as default numbers are positive
530 };
531
532 if char_at_start.to_string().is_plus_or_minus() {
533 if let None = self.read_char() {
534 return None;
535 }
536 }
537
538 // Keep reading forward chars if l.Char is number or number-point.
539 let mut ch: char = self.examination_char.get();
540 while ch.to_string().is_number() || ch.to_string().is_point() || ch == ' ' {
541 match self.read_char() {
542 Some(v) => ch = v,
543 None => {
544 if self.read_position.get() >= self.input.len() {
545 break;
546 }
547
548 return None;
549 }
550 }
551 }
552
553 let num = input.substring(start, self.position.get()).to_string();
554 let end = match num.chars().last() {
555 None => self.position.get(),
556 Some(v) => {
557 if v != ' ' {
558 self.position.get() - 1
559 } else {
560 self.position.get() - 2
561 }
562 }
563 };
564
565 Some((num, (start as i32, end as i32)))
566 }
567
568 // Eats all type of empty(white) spaces.
569 fn skip_whitespace(&self) {
570 let mut c: char = self.examination_char.get();
571 while c == ' ' || c == '\t' || c == '\n' || c == '\r' {
572 match self.read_char() {
573 Some(v) => c = v,
574 None => break,
575 }
576 }
577 }
578
579 // Returns the next character by current position.
580 //
581 // [step] will be used to determine, how many steps we wanna go further.
582 // As default (when you wanna go for one step next) you should make [step] <1>.
583 fn peek_char(&self, step: usize) -> Option<char> {
584 let index: usize = self.position.get() + step;
585 if index >= self.input.len() {
586 return None;
587 }
588
589 match self.input.chars().nth(index) {
590 Some(ch) => return Some(ch),
591 None => return None,
592 }
593 }
594
595 // Returns the previous character by current position.
596 //
597 // [step] will be used to determine, how many steps we wanna go back.
598 // As default (when you wanna go for one step back) you should make [step] <1>.
599 fn peek_char_back(&self, step: usize) -> Option<char> {
600 let bindex: i32 = self.position.get() as i32 - step as i32;
601 if bindex < 0 {
602 return None;
603 }
604
605 match self.input.chars().nth(bindex as usize) {
606 Some(ch) => return Some(ch),
607 None => return None,
608 }
609 }
610
611 // Checks if the current positioned character is free from any number.
612 //
613 // If previous character of current position is white space, we should check for the next
614 // previous one.
615 fn is_free_from_number(&self, step: usize) -> bool {
616 match self.peek_char_back(step) {
617 None => true, // if there is nothing in back, then it's free from number.
618 Some(v) => {
619 if v != ' ' {
620 let is_paren: (bool, bool) = v.to_string().is_parentheses();
621 let is_abs: (bool, bool) = v.to_string().is_abs();
622
623 // println!("{}, abs:{:?}, paren:{:?}", v, is_abs, is_paren);
624
625 return !is_paren.1 && !is_abs.1 && !v.to_string().is_number();
626 }
627
628 self.is_free_from_number(step + 1)
629 }
630 }
631 }
632
633 // Checks for a negative or sign provided number in the next of our current position.
634 //
635 // If next character of current position is white space, we should check for the next
636 // of current next.
637 fn next_is_number(&self, step: usize) -> bool {
638 match self.peek_char(step) {
639 None => false, // nothing != number
640 Some(v) => {
641 if v != ' ' {
642 return v.to_string().is_number();
643 }
644
645 self.next_is_number(step + 1)
646 }
647 }
648 }
649}
650
651#[cfg(test)]
652mod test {
653 use super::*;
654 use std::collections::HashMap;
655
656 #[test]
657 fn new() {
658 let test_data: HashMap<&str, Result<Lexer, Error>> = HashMap::from([
659 ("", Err(Error::empty_input())),
660 (
661 "4 + 2",
662 Ok(Lexer {
663 input: "4 + 2",
664 examination_char: Cell::new('4'),
665 position: Cell::from(0),
666 read_position: Cell::from(1),
667 }),
668 ),
669 ]);
670
671 for (input, expected) in test_data {
672 let result: Result<Lexer, Error> = Lexer::new(input);
673 assert_eq!(result, expected);
674 }
675 }
676
677 #[test]
678 fn lex() {
679 let test_data: HashMap<String, Result<Sub, Error>> = HashMap::from([
680 (String::new(), Err(Error::empty_input())),
681 (
682 String::from("25"),
683 Ok(Sub::new(
684 vec![Token::from(String::from("25"), (0, 1))],
685 SubMethod::PAREN,
686 )),
687 ),
688 (
689 String::from("-25"),
690 Ok(Sub::new(
691 vec![Token::from(String::from("-25"), (0, 2))],
692 SubMethod::PAREN,
693 )),
694 ),
695 (
696 String::from("(25)"),
697 Ok(Sub::new(
698 vec![Token::new_sub(
699 vec![Token::from(String::from("25"), (1, 2))],
700 SubMethod::PAREN,
701 )],
702 SubMethod::PAREN,
703 )),
704 ),
705 (
706 String::from("(-25)"),
707 Ok(Sub::new(
708 vec![Token::new_sub(
709 vec![Token::from(String::from("-25"), (1, 3))],
710 SubMethod::PAREN,
711 )],
712 SubMethod::PAREN,
713 )),
714 ),
715 (
716 String::from("-25 + 5"),
717 Ok(Sub::new(
718 vec![
719 Token::from(String::from("-25"), (0, 2)),
720 Token::from(String::from("+"), (4, 4)),
721 Token::from(String::from("5"), (6, 6)),
722 ],
723 SubMethod::PAREN,
724 )),
725 ),
726 (
727 String::from("- - 2 + - 5"),
728 Ok(Sub::new(
729 vec![
730 Token::from(String::from("-"), (0, 0)),
731 Token::from(String::from("-2"), (2, 4)),
732 Token::from(String::from("+"), (6, 6)),
733 Token::from(String::from("-5"), (8, 10)),
734 ],
735 SubMethod::PAREN,
736 )),
737 ),
738 (
739 String::from("42 * 5"),
740 Ok(Sub::new(
741 vec![
742 Token::from(String::from("42"), (0, 1)),
743 Token::from(String::from("*"), (3, 3)),
744 Token::from(String::from("5"), (5, 5)),
745 ],
746 SubMethod::PAREN,
747 )),
748 ),
749 (
750 String::from("- 2 * 7 / 5 + - 20 / - 5"),
751 Ok(Sub::new(
752 vec![
753 Token::new_sub(
754 vec![
755 Token::from(String::from("-2"), (0, 2)),
756 Token::from(String::from("*"), (4, 4)),
757 Token::from(String::from("7"), (6, 6)),
758 Token::from(String::from("/"), (8, 8)),
759 Token::from(String::from("5"), (10, 10)),
760 ],
761 SubMethod::PAREN,
762 ),
763 Token::from(String::from("+"), (12, 12)),
764 Token::new_sub(
765 vec![
766 Token::from(String::from("-20"), (14, 17)),
767 Token::from(String::from("/"), (19, 19)),
768 Token::from(String::from("-5"), (21, 23)),
769 ],
770 SubMethod::PAREN,
771 ),
772 ],
773 SubMethod::PAREN,
774 )),
775 ),
776 (
777 String::from("(5 - 9) - 10"),
778 Ok(Sub::new(
779 vec![
780 Token::new_sub(
781 vec![
782 Token::from(String::from("5"), (1, 1)),
783 Token::from(String::from("-"), (3, 3)),
784 Token::from(String::from("9"), (5, 5)),
785 ],
786 SubMethod::PAREN,
787 ),
788 Token::from(String::from("-"), (8, 8)),
789 Token::from(String::from("10"), (10, 11)),
790 ],
791 SubMethod::PAREN,
792 )),
793 ),
794 (
795 String::from("(10 - 5) - (10 / 2)"),
796 Ok(Sub::new(
797 vec![
798 Token::new_sub(
799 vec![
800 Token::from(String::from("10"), (1, 2)),
801 Token::from(String::from("-"), (4, 4)),
802 Token::from(String::from("5"), (6, 6)),
803 ],
804 SubMethod::PAREN,
805 ),
806 Token::from(String::from("-"), (9, 9)),
807 Token::new_sub(
808 vec![
809 Token::from(String::from("10"), (12, 13)),
810 Token::from(String::from("/"), (15, 15)),
811 Token::from(String::from("2"), (17, 17)),
812 ],
813 SubMethod::PAREN,
814 ),
815 ],
816 SubMethod::PAREN,
817 )),
818 ),
819 (
820 String::from("((10 - 5) - (10 / 2)) / 2"),
821 Ok(Sub::new(
822 vec![
823 Token::new_sub(
824 vec![
825 Token::new_sub(
826 vec![
827 Token::from(String::from("10"), (2, 3)),
828 Token::from(String::from("-"), (5, 5)),
829 Token::from(String::from("5"), (7, 7)),
830 ],
831 SubMethod::PAREN,
832 ),
833 Token::from(String::from("-"), (10, 10)),
834 Token::new_sub(
835 vec![
836 Token::from(String::from("10"), (13, 14)),
837 Token::from(String::from("/"), (16, 16)),
838 Token::from(String::from("2"), (18, 18)),
839 ],
840 SubMethod::PAREN,
841 ),
842 ],
843 SubMethod::PAREN,
844 ),
845 Token::from(String::from("/"), (22, 22)),
846 Token::from(String::from("2"), (24, 24)),
847 ],
848 SubMethod::PAREN,
849 )),
850 ),
851 (
852 String::from("(2 + 5) * (5 - 9 / (8 - 5))"),
853 Ok(Sub::new(
854 vec![
855 Token::new_sub(
856 vec![
857 Token::from(String::from("2"), (1, 1)),
858 Token::from(String::from("+"), (3, 3)),
859 Token::from(String::from("5"), (5, 5)),
860 ],
861 SubMethod::PAREN,
862 ),
863 Token::from(String::from("*"), (8, 8)),
864 Token::new_sub(
865 vec![
866 Token::from(String::from("5"), (11, 11)),
867 Token::from(String::from("-"), (13, 13)),
868 Token::new_sub(
869 vec![
870 Token::from(String::from("9"), (15, 15)),
871 Token::from(String::from("/"), (17, 17)),
872 Token::new_sub(
873 vec![
874 Token::from(String::from("8"), (20, 20)),
875 Token::from(String::from("-"), (22, 22)),
876 Token::from(String::from("5"), (24, 24)),
877 ],
878 SubMethod::PAREN,
879 ),
880 ],
881 SubMethod::PAREN,
882 ),
883 ],
884 SubMethod::PAREN,
885 ),
886 ],
887 SubMethod::PAREN,
888 )),
889 ),
890 (
891 String::from("5(5 / 2)(9 * 3)11"),
892 Ok(Sub::new(
893 vec![
894 Token::from(String::from("5"), (0, 0)),
895 Token::from(String::from("*"), Token::unknown_index()),
896 Token::new_sub(
897 vec![
898 Token::from(String::from("5"), (2, 2)),
899 Token::from(String::from("/"), (4, 4)),
900 Token::from(String::from("2"), (6, 6)),
901 ],
902 SubMethod::PAREN,
903 ),
904 Token::from(String::from("*"), Token::unknown_index()),
905 Token::new_sub(
906 vec![
907 Token::from(String::from("9"), (9, 9)),
908 Token::from(String::from("*"), (11, 11)),
909 Token::from(String::from("3"), (13, 13)),
910 ],
911 SubMethod::PAREN,
912 ),
913 Token::from(String::from("*"), Token::unknown_index()),
914 Token::from(String::from("11"), (15, 16)),
915 ],
916 SubMethod::PAREN,
917 )),
918 ),
919 (
920 String::from("5 ^ 3 ^ 2 ^ 5 * 19 - 50"),
921 Ok(Sub::new(
922 vec![
923 Token::new_sub(
924 vec![
925 Token::new_sub(
926 vec![
927 Token::from(String::from("5"), (0, 0)),
928 Token::from(String::from("^"), (2, 2)),
929 Token::new_sub(
930 vec![
931 Token::from(String::from("3"), (4, 4)),
932 Token::from(String::from("^"), (6, 6)),
933 Token::new_sub(
934 vec![
935 Token::from(String::from("2"), (8, 8)),
936 Token::from(String::from("^"), (10, 10)),
937 Token::from(String::from("5"), (12, 12)),
938 ],
939 SubMethod::PAREN,
940 ),
941 ],
942 SubMethod::PAREN,
943 ),
944 ],
945 SubMethod::PAREN,
946 ),
947 Token::from(String::from("*"), (14, 14)),
948 Token::from(String::from("19"), (16, 17)),
949 ],
950 SubMethod::PAREN,
951 ),
952 Token::from(String::from("-"), (19, 19)),
953 Token::from(String::from("50"), (21, 22)),
954 ],
955 SubMethod::PAREN,
956 )),
957 ),
958 (
959 String::from("5 ^ 3 ^ 19"),
960 Ok(Sub::new(
961 vec![
962 Token::from(String::from("5"), (0, 0)),
963 Token::from(String::from("^"), (2, 2)),
964 Token::new_sub(
965 vec![
966 Token::from(String::from("3"), (4, 4)),
967 Token::from(String::from("^"), (6, 6)),
968 Token::from(String::from("19"), (8, 9)),
969 ],
970 SubMethod::PAREN,
971 ),
972 ],
973 SubMethod::PAREN,
974 )),
975 ),
976 (
977 String::from("(2 + 3 ^ 5) ^ 9"),
978 Ok(Sub::new(
979 vec![
980 Token::new_sub(
981 vec![
982 Token::from(String::from("2"), (1, 1)),
983 Token::from(String::from("+"), (3, 3)),
984 Token::new_sub(
985 vec![
986 Token::from(String::from("3"), (5, 5)),
987 Token::from(String::from("^"), (7, 7)),
988 Token::from(String::from("5"), (9, 9)),
989 ],
990 SubMethod::PAREN,
991 ),
992 ],
993 SubMethod::PAREN,
994 ),
995 Token::from(String::from("^"), (12, 12)),
996 Token::from(String::from("9"), (14, 14)),
997 ],
998 SubMethod::PAREN,
999 )),
1000 ),
1001 (
1002 String::from("[2 - 12] - 10"),
1003 Ok(Sub::new(
1004 vec![
1005 Token::new_sub(
1006 vec![
1007 Token::from(String::from("2"), (1, 1)),
1008 Token::from(String::from("-"), (3, 3)),
1009 Token::from(String::from("12"), (5, 6)),
1010 ],
1011 SubMethod::ABS,
1012 ),
1013 Token::from(String::from("-"), (9, 9)),
1014 Token::from(String::from("10"), (11, 12)),
1015 ],
1016 SubMethod::PAREN,
1017 )),
1018 ),
1019 (
1020 String::from("[7 - 14] * [5 - 9 / [5 - 3]]"),
1021 Ok(Sub::new(
1022 vec![
1023 Token::new_sub(
1024 vec![
1025 Token::from(String::from("7"), (1, 1)),
1026 Token::from(String::from("-"), (3, 3)),
1027 Token::from(String::from("14"), (5, 6)),
1028 ],
1029 SubMethod::ABS,
1030 ),
1031 Token::from(String::from("*"), (9, 9)),
1032 Token::new_sub(
1033 vec![
1034 Token::from(String::from("5"), (12, 12)),
1035 Token::from(String::from("-"), (14, 14)),
1036 Token::new_sub(
1037 vec![
1038 Token::from(String::from("9"), (16, 16)),
1039 Token::from(String::from("/"), (18, 18)),
1040 Token::new_sub(
1041 vec![
1042 Token::from(String::from("5"), (21, 21)),
1043 Token::from(String::from("-"), (23, 23)),
1044 Token::from(String::from("3"), (25, 25)),
1045 ],
1046 SubMethod::ABS,
1047 ),
1048 ],
1049 SubMethod::PAREN,
1050 ),
1051 ],
1052 SubMethod::ABS,
1053 ),
1054 ],
1055 SubMethod::PAREN,
1056 )),
1057 ),
1058 ]);
1059
1060 for (input, expected) in test_data {
1061 let result: Result<Sub, Error> = Lexer::lex(input.as_str());
1062 assert_eq!(result, expected)
1063 }
1064 }
1065
1066 // TODO: should add tests for private functions also.
1067}