1use crate::tokens::{Token, TokenType};
2use std::cmp;
3use std::collections::VecDeque;
4
5use unicode_categories::UnicodeCategories;
6
7struct RawSource {
8 input: Vec<char>,
9 cursor: usize,
10 lookahead: usize,
11}
12
13impl RawSource {
14 pub fn new(input: &str) -> RawSource {
15 RawSource {
16 input: input.chars().collect(),
17 cursor: 0,
18 lookahead: 0,
19 }
20 }
21
22 fn at_end(&self) -> bool {
23 self.cursor >= self.input.len()
24 }
25
26 fn committed_index(&self) -> usize {
27 self.cursor
28 }
29
30 fn peeked_index(&self) -> usize {
31 self.lookahead
32 }
33
34 fn peek(&mut self, window: usize) -> &[char] {
37 let window_open = cmp::min(self.lookahead, self.input.len());
38 let window_close = cmp::min(self.lookahead + window, self.input.len());
39 self.lookahead += window;
40 &self.input[window_open..window_close]
41 }
42
43 fn hide(&mut self, window: usize) {
44 self.lookahead -= window;
45 assert!(
46 self.lookahead >= self.cursor,
47 "Cannot hide what was already consumed"
48 );
49 }
50
51 fn commit(&mut self) {
52 self.cursor = cmp::min(self.lookahead, self.input.len());
53 }
54
55 fn revert(&mut self) {
56 self.lookahead = self.cursor;
57 }
58
59 fn peeked_string(&self) -> String {
60 return self.input[self.cursor..self.lookahead].iter().collect();
61 }
62}
63
64pub struct TokenStream {
65 source: RawSource,
66 within_statement: bool,
67 parenthesis_level: usize,
68 bracket_level: usize,
69 brace_level: usize,
70 indents_seen: Vec<usize>,
71 tokens: VecDeque<Token>,
72 ended: bool,
73}
74
75impl TokenStream {
76 pub fn new(input: &str) -> TokenStream {
77 TokenStream {
78 source: RawSource::new(input),
79 within_statement: false,
80 parenthesis_level: 0,
81 bracket_level: 0,
82 brace_level: 0,
83 indents_seen: vec![0],
84 tokens: VecDeque::new(),
85 ended: false,
86 }
87 }
88
89 fn add_token(
90 &mut self,
91 token_type: TokenType,
92 exact_token_type: TokenType,
93 token_contents: String,
94 col_start: usize,
95 col_end: usize,
96 ) {
97 self.tokens.push_back(Token {
98 token_type,
99 exact_token_type,
100 token_contents,
101 col_start,
102 col_end,
103 });
104 }
105
106 fn commit_to_token(&mut self, token_type: TokenType) {
107 self.commit_to_exact_token(token_type, token_type)
108 }
109
110 fn commit_to_exact_token(&mut self, token_type: TokenType, exact_token_type: TokenType) {
111 self.add_token(
112 token_type,
113 exact_token_type,
114 self.source.peeked_string(),
115 self.source.committed_index(),
116 self.source.peeked_index(),
117 );
118 self.source.commit();
119 if token_type == TokenType::NEWLINE {
120 self.within_statement = false;
121 } else if !(token_type == TokenType::NL || token_type == TokenType::COMMENT) {
122 self.within_statement = true;
123 };
124 }
126
127 fn consume_next_op_token(&mut self) -> bool {
130 let exact_token_type: TokenType;
131 match self.source.peek(3) {
132 ['*', '*', '='] => exact_token_type = TokenType::DOUBLESTAREQUAL,
133 ['.', '.', '.'] => exact_token_type = TokenType::ELLIPSIS,
134 ['/', '/', '='] => exact_token_type = TokenType::DOUBLESLASHEQUAL,
135 ['<', '<', '='] => exact_token_type = TokenType::LEFTSHIFTEQUAL,
136 ['>', '>', '='] => exact_token_type = TokenType::RIGHTSHIFTEQUAL,
137 ['!', '=', ..] => {
138 exact_token_type = TokenType::NOTEQUAL;
139 self.source.hide(1);
140 }
141 ['%', '=', ..] => {
142 exact_token_type = TokenType::PERCENTEQUAL;
143 self.source.hide(1);
144 }
145 ['&', '=', ..] => {
146 exact_token_type = TokenType::AMPEREQUAL;
147 self.source.hide(1);
148 }
149 ['*', '*', ..] => {
150 exact_token_type = TokenType::DOUBLESTAR;
151 self.source.hide(1);
152 }
153 ['*', '=', ..] => {
154 exact_token_type = TokenType::STAREQUAL;
155 self.source.hide(1);
156 }
157 ['+', '=', ..] => {
158 exact_token_type = TokenType::PLUSEQUAL;
159 self.source.hide(1);
160 }
161 ['-', '=', ..] => {
162 exact_token_type = TokenType::MINEQUAL;
163 self.source.hide(1);
164 }
165 ['-', '>', ..] => {
166 exact_token_type = TokenType::RARROW;
167 self.source.hide(1);
168 }
169 ['/', '/', ..] => {
170 exact_token_type = TokenType::DOUBLESLASH;
171 self.source.hide(1);
172 }
173 ['/', '=', ..] => {
174 exact_token_type = TokenType::SLASHEQUAL;
175 self.source.hide(1);
176 }
177 [':', '=', ..] => {
178 exact_token_type = TokenType::COLONEQUAL;
179 self.source.hide(1);
180 }
181 ['<', '<', ..] => {
182 exact_token_type = TokenType::LEFTSHIFT;
183 self.source.hide(1);
184 }
185 ['<', '=', ..] => {
186 exact_token_type = TokenType::LESSEQUAL;
187 self.source.hide(1);
188 }
189 ['<', '>', ..] => {
190 exact_token_type = TokenType::NOTEQUAL;
191 self.source.hide(1);
192 }
193 ['=', '=', ..] => {
194 exact_token_type = TokenType::EQEQUAL;
195 self.source.hide(1);
196 }
197 ['>', '=', ..] => {
198 exact_token_type = TokenType::GREATEREQUAL;
199 self.source.hide(1);
200 }
201 ['>', '>', ..] => {
202 exact_token_type = TokenType::RIGHTSHIFT;
203 self.source.hide(1);
204 }
205 ['@', '=', ..] => {
206 exact_token_type = TokenType::ATEQUAL;
207 self.source.hide(1);
208 }
209 ['^', '=', ..] => {
210 exact_token_type = TokenType::CIRCUMFLEXEQUAL;
211 self.source.hide(1);
212 }
213 ['|', '=', ..] => {
214 exact_token_type = TokenType::VBAREQUAL;
215 self.source.hide(1);
216 }
217 ['%', ..] => {
218 exact_token_type = TokenType::PERCENT;
219 self.source.hide(2);
220 }
221 ['&', ..] => {
222 exact_token_type = TokenType::AMPER;
223 self.source.hide(2);
224 }
225 ['(', ..] => {
226 exact_token_type = TokenType::LPAR;
227 self.source.hide(2);
228 self.parenthesis_level += 1;
229 }
230 [')', ..] => {
231 exact_token_type = TokenType::RPAR;
232 self.source.hide(2);
233 self.parenthesis_level = self.parenthesis_level.saturating_sub(1);
234 }
235 ['*', ..] => {
236 exact_token_type = TokenType::STAR;
237 self.source.hide(2);
238 }
239 ['+', ..] => {
240 exact_token_type = TokenType::PLUS;
241 self.source.hide(2);
242 }
243 [',', ..] => {
244 exact_token_type = TokenType::COMMA;
245 self.source.hide(2);
246 }
247 ['-', ..] => {
248 exact_token_type = TokenType::MINUS;
249 self.source.hide(2);
250 }
251 ['.', ..] => {
252 exact_token_type = TokenType::DOT;
253 self.source.hide(2);
254 }
255 ['/', ..] => {
256 exact_token_type = TokenType::SLASH;
257 self.source.hide(2);
258 }
259 [':', ..] => {
260 exact_token_type = TokenType::COLON;
261 self.source.hide(2);
262 }
263 [';', ..] => {
264 exact_token_type = TokenType::SEMI;
265 self.source.hide(2);
266 }
267 ['<', ..] => {
268 exact_token_type = TokenType::LESS;
269 self.source.hide(2);
270 }
271 ['=', ..] => {
272 exact_token_type = TokenType::EQUAL;
273 self.source.hide(2);
274 }
275 ['>', ..] => {
276 exact_token_type = TokenType::GREATER;
277 self.source.hide(2);
278 }
279 ['@', ..] => {
280 exact_token_type = TokenType::AT;
281 self.source.hide(2);
282 }
283 ['[', ..] => {
284 exact_token_type = TokenType::LSQB;
285 self.source.hide(2);
286 self.bracket_level += 1;
287 }
288 [']', ..] => {
289 exact_token_type = TokenType::RSQB;
290 self.source.hide(2);
291 self.bracket_level = self.bracket_level.saturating_sub(1);
292 }
293 ['^', ..] => {
294 exact_token_type = TokenType::CIRCUMFLEX;
295 self.source.hide(2);
296 }
297 ['{', ..] => {
298 exact_token_type = TokenType::LBRACE;
299 self.source.hide(2);
300 self.brace_level += 1;
301 }
302 ['|', ..] => {
303 exact_token_type = TokenType::VBAR;
304 self.source.hide(2);
305 }
306 ['}', ..] => {
307 exact_token_type = TokenType::RBRACE;
308 self.source.hide(2);
309 self.brace_level = self.brace_level.saturating_sub(1);
310 }
311 ['~', ..] => {
312 exact_token_type = TokenType::TILDE;
313 self.source.hide(2);
314 }
315 _ => {
316 self.source.revert();
317 return false;
318 }
319 }
320 self.commit_to_exact_token(TokenType::OP, exact_token_type);
321 true
322 }
323
324 fn is_start_of_name(c: &char) -> bool {
325 c.is_letter_uppercase()
327 || c.is_letter_lowercase()
328 || c.is_letter_titlecase()
329 || c.is_letter_modifier()
330 || c.is_letter_other()
331 || c.is_number_letter()
332 || *c == '_'
333 }
334
335 fn is_part_of_name(c: &char) -> bool {
336 Self::is_start_of_name(c)
338 || c.is_mark_nonspacing()
339 || c.is_mark_spacing_combining()
340 || c.is_number_decimal_digit()
341 || c.is_punctuation_connector()
342 }
343
344 fn consume_next_name_token(&mut self) -> bool {
347 if let [next] = self.source.peek(1) {
348 if !Self::is_start_of_name(next) {
349 self.source.hide(1);
350 return false;
351 }
352 } else {
353 self.source.hide(1);
354 return false;
355 };
356
357 loop {
358 if let [next] = self.source.peek(1) {
359 if !Self::is_part_of_name(next) {
360 self.source.hide(1);
361 break;
362 }
363 } else {
364 self.source.hide(1);
365 break;
366 };
367 }
368
369 self.commit_to_token(TokenType::NAME);
370 true
371 }
372
373 fn is_bin_digit(c: &char) -> bool {
374 *c == '0' || *c == '1'
375 }
376
377 fn is_oct_digit(c: &char) -> bool {
378 Self::is_bin_digit(c)
379 || *c == '2'
380 || *c == '3'
381 || *c == '4'
382 || *c == '5'
383 || *c == '6'
384 || *c == '7'
385 }
386
387 fn is_dec_digit(c: &char) -> bool {
388 Self::is_oct_digit(c) || *c == '8' || *c == '9'
389 }
390
391 fn is_hex_digit(c: &char) -> bool {
392 Self::is_dec_digit(c)
393 || *c == 'a'
394 || *c == 'b'
395 || *c == 'c'
396 || *c == 'd'
397 || *c == 'e'
398 || *c == 'f'
399 || *c == 'A'
400 || *c == 'B'
401 || *c == 'C'
402 || *c == 'D'
403 || *c == 'E'
404 || *c == 'F'
405 }
406
407 fn find_end_of_integer(&mut self, valid_digit: fn(&char) -> bool) {
411 let mut last_under = false;
412 loop {
413 match self.source.peek(1) {
414 ['_'] => {
415 if last_under {
416 self.source.hide(2);
418 return;
419 } else {
420 last_under = true;
421 }
422 }
423 [next] if valid_digit(next) => {
424 last_under = false;
425 }
426 _ => {
427 if last_under {
428 self.source.hide(2);
429 } else {
430 self.source.hide(1);
431 };
432 return;
433 }
434 }
435 }
436 }
437
438 fn find_end_of_exponent(&mut self) -> bool {
441 match self.source.peek(2) {
442 [next, ..] if Self::is_dec_digit(next) => {
443 self.source.hide(1); self.find_end_of_integer(Self::is_dec_digit);
445 true
446 }
447 ['-' | '+', next] if Self::is_dec_digit(next) => {
448 self.find_end_of_integer(Self::is_dec_digit);
449 true
450 }
451 _ => {
452 self.source.hide(3);
455 false
456 }
457 }
458 }
459
460 fn consume_next_number_token(&mut self) -> bool {
463 let number_type: TokenType;
464
465 match self.source.peek(1) {
466 ['0'] => {
467 match self.source.peek(2) {
468 ['b' | 'B', next] if Self::is_bin_digit(next) => {
469 number_type = TokenType::BININT;
470 self.find_end_of_integer(Self::is_bin_digit);
471 }
472 ['o' | 'O', next] if Self::is_oct_digit(next) => {
473 number_type = TokenType::OCTINT;
474 self.find_end_of_integer(Self::is_oct_digit);
475 }
476 ['x' | 'X', next] if Self::is_hex_digit(next) => {
477 number_type = TokenType::HEXINT;
478 self.find_end_of_integer(Self::is_hex_digit);
479 }
480 [next, ..] if Self::is_dec_digit(next) || *next == '_' => {
481 let last_zero: usize;
484 if *next == '_' {
485 self.source.hide(2);
487 self.find_end_of_integer(|c| *c == '0');
488 last_zero = self.source.peeked_index();
489 } else if *next == '0' {
490 self.source.hide(1);
492 self.find_end_of_integer(|c| *c == '0');
493 last_zero = self.source.peeked_index();
494 } else {
495 self.source.hide(1);
497 last_zero = self.source.peeked_index() - 1;
498 };
499 match self.source.peek(1) {
500 ['.'] => {
501 number_type = TokenType::FLOAT;
502 self.find_end_of_integer(Self::is_dec_digit);
503 if !matches!(self.source.peek(1), ['e' | 'E'])
504 || !self.find_end_of_exponent()
505 {
506 self.source.hide(1);
507 };
508 }
509 ['e' | 'E'] => {
510 if self.find_end_of_exponent() {
511 number_type = TokenType::FLOAT;
513 } else {
514 self.source.hide(self.source.peeked_index() - last_zero);
516 number_type = TokenType::INTEGER;
517 };
518 }
519 [next] if Self::is_dec_digit(next) || *next == '_' => {
520 self.find_end_of_integer(Self::is_dec_digit);
524 match self.source.peek(1) {
525 ['.'] => {
526 number_type = TokenType::FLOAT;
528 self.find_end_of_integer(Self::is_dec_digit);
529 if let ['e' | 'E'] = self.source.peek(1) {
530 self.find_end_of_exponent();
531 } else {
532 self.source.hide(1);
533 };
534 }
535 ['e' | 'E'] => {
536 if self.find_end_of_exponent() {
537 number_type = TokenType::FLOAT;
540 } else {
541 number_type = TokenType::INTEGER;
543 self.source
544 .hide(self.source.peeked_index() - last_zero);
545 };
546 }
547 ['j' | 'J'] => {
548 number_type = TokenType::IMAGINARY;
553 }
554 _ => {
555 number_type = TokenType::INTEGER;
557 self.source.hide(self.source.peeked_index() - last_zero);
558 }
559 };
560 }
561 _ => {
562 number_type = TokenType::INTEGER;
564 self.source.hide(1);
565 }
566 };
567 }
568 ['.', ..] => {
569 self.source.hide(1);
571 number_type = TokenType::FLOAT;
572 self.find_end_of_integer(Self::is_dec_digit);
573 if let ['e' | 'E'] = self.source.peek(1) {
574 self.find_end_of_exponent();
575 } else {
576 self.source.hide(1);
577 };
578 }
579 ['e' | 'E', ..] => {
580 self.source.hide(1);
582 if self.find_end_of_exponent() {
583 number_type = TokenType::FLOAT;
585 } else {
586 number_type = TokenType::INTEGER;
588 };
589 }
590 _ => {
591 number_type = TokenType::INTEGER;
593 self.source.hide(2);
594 }
595 };
596 }
597 ['1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9'] => {
598 self.find_end_of_integer(Self::is_dec_digit);
599 match self.source.peek(1) {
600 ['.'] => {
601 number_type = TokenType::FLOAT;
603 self.find_end_of_integer(Self::is_dec_digit);
604 if let ['e' | 'E'] = self.source.peek(1) {
605 self.find_end_of_exponent();
606 } else {
607 self.source.hide(1);
608 };
609 }
610 ['e' | 'E'] => {
611 if self.find_end_of_exponent() {
612 number_type = TokenType::FLOAT;
614 } else {
615 number_type = TokenType::INTEGER;
617 };
618 }
619 _ => {
620 number_type = TokenType::INTEGER;
622 self.source.hide(1);
623 }
624 };
625 }
626 ['.'] => {
627 match self.source.peek(1) {
628 [next] if Self::is_dec_digit(next) => {
629 number_type = TokenType::FLOAT;
631 self.find_end_of_integer(Self::is_dec_digit);
632 if let ['e' | 'E'] = self.source.peek(1) {
633 self.find_end_of_exponent();
634 } else {
635 self.source.hide(1);
636 };
637 }
638 _ => {
639 self.source.revert();
641 return false;
642 }
643 }
644 }
645 _ => {
646 self.source.revert();
648 return false;
649 }
650 };
651
652 let exact_token_type: TokenType;
653 if let TokenType::INTEGER | TokenType::FLOAT = number_type {
654 if let ['j' | 'J'] = self.source.peek(1) {
655 exact_token_type = TokenType::IMAGINARY;
656 } else {
657 self.source.hide(1);
658 exact_token_type = number_type;
659 };
660 } else {
661 exact_token_type = number_type;
662 };
663
664 self.commit_to_exact_token(TokenType::NUMBER, exact_token_type);
665 true
666 }
667
668 fn consume_next_newline(&mut self) -> Option<bool> {
671 return match self.source.peek(2) {
672 ['\\', '\n'] => {
673 self.source.commit();
675 Some(false)
676 }
677 ['\n', ..] => {
678 self.source.hide(1);
679 if self.within_statement
680 && self.parenthesis_level == 0
681 && self.bracket_level == 0
682 && self.brace_level == 0
683 {
684 self.commit_to_token(TokenType::NEWLINE);
685 Some(true)
686 } else {
687 self.commit_to_token(TokenType::NL);
688 Some(true)
689 }
690 }
691 _ => {
692 self.source.revert();
693 None
694 }
695 };
696 }
697
698 fn consume_next_dent(&mut self) -> Result<bool, String> {
702 let mut spaces: usize = 0;
703 let mut no_more_source = true;
704 while let [next] = self.source.peek(1) {
705 if *next == ' ' {
706 spaces += 1;
707 } else if *next == '\t' {
708 spaces += 8 - (spaces % 8);
710 } else if *next == '\u{000C}' {
711 continue;
713 } else if *next == '\n' || *next == '\\' || *next == '#' {
714 self.source.hide(1);
717 self.source.commit();
718 return Ok(false);
719 } else {
720 no_more_source = false;
721 break;
722 };
723 }
724 self.source.hide(1);
725 if no_more_source {
726 self.source.commit();
728 return Ok(false);
729 };
730
731 match *self.indents_seen.last().unwrap() {
732 s if s == spaces => {
733 self.source.commit();
735 Ok(false)
736 }
737 s if s < spaces => {
738 self.indents_seen.push(spaces);
739 self.commit_to_token(TokenType::INDENT);
740 Ok(true)
741 }
742 _ => {
743 self.source.commit();
746 loop {
747 self.indents_seen.pop();
748 match *self.indents_seen.last().unwrap() {
749 s if s == spaces => {
750 self.add_token(
751 TokenType::DEDENT,
752 TokenType::DEDENT,
753 String::from(""),
754 self.source.committed_index(),
755 self.source.committed_index(),
756 );
757 return Ok(true);
758 }
759 s if s < spaces || self.indents_seen.len() == 1 => {
760 return Err(String::from(
761 "dedent does not match any outer indentation level",
762 ));
763 }
764 _ => {
765 self.add_token(
766 TokenType::DEDENT,
767 TokenType::DEDENT,
768 String::from(""),
769 self.source.committed_index(),
770 self.source.committed_index(),
771 );
772 }
773 };
774 }
775 }
776 }
777 }
778
779 fn consume_next_whitespace(&mut self) {
783 while let [next] = self.source.peek(1) {
784 if *next != ' ' && *next != '\t' && *next != '\u{000C}' {
786 break;
787 };
788 }
789 self.source.hide(1);
791 self.source.commit();
792 }
793
794 fn consume_next_comment(&mut self) -> bool {
795 if let [next] = self.source.peek(1) {
796 if *next == '#' {
797 while let [next] = self.source.peek(1) {
798 if *next == '\n' {
799 break;
800 };
801 }
802 self.source.hide(1);
803 self.commit_to_token(TokenType::COMMENT);
804 return true;
805 };
806 };
807 self.source.revert();
808 false
809 }
810
811 fn find_end_tripple_quote(&mut self, end_match: [char; 3]) -> Result<(), String> {
812 let mut last_escape = false;
813 while let [a, b, c] = self.source.peek(3) {
814 if [*a, *b, *c] == end_match && !last_escape {
815 return Ok(());
816 } else if [*a, *b, *c] == ['\\', '\\', '\\'] && !last_escape
817 || *c == '\\' && *b != '\\'
818 || *a == '\\' && !last_escape
819 {
820 last_escape = true;
821 } else if last_escape {
822 last_escape = false;
823 };
824 self.source.hide(2);
825 }
826 Err(String::from("EOF in multi-line string"))
827 }
828
829 fn find_end_quote(&mut self, end_match: [char; 1]) -> bool {
830 let mut last_escape = false;
831 while let [a] = self.source.peek(1) {
832 if [*a] == end_match && !last_escape {
833 return true;
834 } else if [*a] == ['\n'] {
835 return false;
836 } else if [*a] == ['\\'] && !last_escape {
837 last_escape = true;
838 } else if last_escape {
839 last_escape = false;
840 };
841 }
842 false
843 }
844
845 fn consume_next_string_token(&mut self) -> Result<bool, String> {
846 let qt: char;
847 match self.source.peek(3) {
848 [q, ..] if q == &'\'' || q == &'"' => {
849 qt = *q;
850 self.source.hide(2);
851 }
852 ['b' | 'B', q, ..] | ['f' | 'F', q, ..] | ['r' | 'R', q, ..] | ['u' | 'U', q, ..]
853 if q == &'\'' || q == &'"' =>
854 {
855 qt = *q;
856 self.source.hide(1);
857 }
858 ['r' | 'R', 'b' | 'B' | 'f' | 'F', q] | ['b' | 'B' | 'f' | 'F', 'r' | 'R', q]
859 if q == &'\'' || q == &'"' =>
860 {
861 qt = *q;
862 }
863 _ => {
864 self.source.revert();
865 return Ok(false);
866 }
867 };
868 match self.source.peek(2) {
869 [a, b] if [*a, *b] == [qt, qt] => {
870 self.find_end_tripple_quote([qt, qt, qt])?;
871 }
872 _ => {
873 self.source.hide(2);
874 if !self.find_end_quote([qt]) {
875 self.source.revert();
876 self.source.peek(1);
877 self.commit_to_token(TokenType::ERRORTOKEN);
878 return Ok(true);
879 };
880 }
881 };
882 self.commit_to_token(TokenType::STRING);
883 Ok(true)
884 }
885
886 fn finalize_stream(&mut self) -> Result<(), String> {
887 if self.parenthesis_level != 0 || self.brace_level != 0 || self.bracket_level != 0 {
888 return Err(String::from("EOF in multi-line statement"));
889 }
890 if self.within_statement {
891 self.add_token(
893 TokenType::NEWLINE,
894 TokenType::NEWLINE,
895 String::from(""),
896 self.source.committed_index(),
897 self.source.committed_index() + 1,
898 );
899 };
900 while self.indents_seen.len() > 1 {
901 self.indents_seen.pop();
903 self.add_token(
904 TokenType::DEDENT,
905 TokenType::DEDENT,
906 String::from(""),
907 self.source.committed_index() + 1,
908 self.source.committed_index() + 1,
909 );
910 }
911 self.add_token(
912 TokenType::ENDMARKER,
913 TokenType::ENDMARKER,
914 String::from(""),
915 self.source.committed_index() + 1,
916 self.source.committed_index() + 1,
917 );
918 self.ended = true;
919 Ok(())
920 }
921
922 fn consume_next_token(&mut self) -> Result<(), String> {
923 if self.ended {
924 return Ok(());
925 };
926 if self.source.at_end() {
927 return self.finalize_stream();
928 };
929
930 if !self.within_statement {
933 match self.consume_next_dent() {
934 Ok(true) => {
935 return Ok(());
936 }
937 Ok(false) => (),
938 Err(e) => return Err(e),
939 };
940 };
941 self.consume_next_whitespace();
943 if let Some(produced_token) = self.consume_next_newline() {
944 if produced_token {
945 return Ok(());
946 } else {
947 return self.consume_next_token();
949 };
950 };
951 if self.consume_next_number_token() {
953 return Ok(());
954 };
955 if self.consume_next_op_token() {
956 return Ok(());
957 };
958 match self.consume_next_string_token() {
960 Ok(true) => {
961 return Ok(());
962 }
963 Ok(false) => (),
964 Err(e) => return Err(e),
965 };
966 if self.consume_next_name_token() {
967 return Ok(());
968 };
969 if self.consume_next_comment() {
970 return Ok(());
971 };
972
973 if self.source.at_end() {
975 return self.consume_next_token();
976 } else {
977 self.source.peek(1);
978 self.commit_to_token(TokenType::ERRORTOKEN);
979 return Ok(());
980 };
981 }
982}
983
984impl Iterator for TokenStream {
985 type Item = Result<Token, String>;
986
987 fn next(&mut self) -> Option<Self::Item> {
988 if self.tokens.is_empty() {
989 match self.consume_next_token() {
990 Ok(_) => (),
991 Err(e) => return Some(Err(e)),
992 }
993 };
994 if self.tokens.is_empty() {
995 None
996 } else {
997 Ok(self.tokens.pop_front()).transpose()
998 }
999 }
1000}