1use mical_cli_syntax::token::{TokenKind::*, *};
2use std::iter;
3
4mod cursor;
5use cursor::Cursor;
6
7struct TokenStreamImpl<'src, I: Iterator<Item = Token>> {
8 source: &'src str,
9 iter: I,
10}
11
12impl<I: Iterator<Item = Token>> Iterator for TokenStreamImpl<'_, I> {
13 type Item = Token;
14 fn next(&mut self) -> Option<Self::Item> {
15 self.iter.next()
16 }
17}
18impl<'s, I: Iterator<Item = Token>> TokenStream<'s> for TokenStreamImpl<'s, I> {
19 fn source(&self) -> &'s str {
20 self.source
21 }
22}
23
24pub fn tokenize(source: &str) -> impl TokenStream<'_> {
25 let mut cursor = Cursor::new(source);
26 TokenStreamImpl { source, iter: iter::from_fn(move || advance_token(&mut cursor)) }
27}
28
29fn advance_token(cursor: &mut Cursor) -> Option<Token> {
30 let kind = match cursor.next()? {
31 't' => true_or_word(cursor),
32 'f' => false_or_word(cursor),
33 '\t' => {
34 cursor.eat_while(|c| c == '\t');
35 Tab
36 }
37 '\n' => Newline,
38 '\r' => {
39 if let Some('\n') = cursor.peek() {
40 cursor.next();
41 }
42 Newline
43 }
44 ' ' => {
45 cursor.eat_while(|c| c == ' ');
46 Space
47 }
48 '}' => CloseBrace,
49 '>' => Greater,
50 '-' => Minus,
51 '{' => OpenBrace,
52 '|' => Pipe,
53 '+' => Plus,
54 '#' => Sharp,
55 '"' => string::<'"'>(cursor),
56 '\'' => string::<'\''>(cursor),
57 c @ '0'..='9' => integer_or_word(cursor, c),
58 _ => word(cursor),
59 };
60 let token = cursor.bump(kind);
61 Some(token)
62}
63
64fn true_or_word(cursor: &mut Cursor) -> TokenKind {
65 debug_assert!(cursor.prev() == 't');
66 if let Some('r') = cursor.peek() {
67 cursor.next();
68 if let Some('u') = cursor.peek() {
69 cursor.next();
70 if let Some('e') = cursor.peek() {
71 cursor.next();
72 return True;
73 }
74 }
75 }
76 word(cursor)
77}
78
79fn false_or_word(cursor: &mut Cursor) -> TokenKind {
80 debug_assert!(cursor.prev() == 'f');
81 if let Some('a') = cursor.peek() {
82 cursor.next();
83 if let Some('l') = cursor.peek() {
84 cursor.next();
85 if let Some('s') = cursor.peek() {
86 cursor.next();
87 if let Some('e') = cursor.peek() {
88 cursor.next();
89 return False;
90 }
91 }
92 }
93 }
94 word(cursor)
95}
96
97fn string<const Q: char>(cursor: &mut Cursor) -> TokenKind {
98 const { assert!(Q == '"' || Q == '\'') };
99 debug_assert!(cursor.prev() == Q);
100
101 let mut terminated = false;
102 while let Some(c) = cursor.peek() {
103 match c {
104 '\\' => {
105 cursor.next();
106 let peek = cursor.peek();
107 if peek == Some(Q) || peek == Some('\\') {
108 cursor.next();
109 }
110 }
111 '\n' | '\r' => {
112 break;
113 }
114 q if q == Q => {
115 terminated = true;
116 cursor.next();
117 break;
118 }
119 _ => {
120 cursor.next();
121 }
122 }
123 }
124 String {
125 is_terminated: terminated,
126 quote: const {
127 match Q {
128 '"' => Quote::Double,
129 '\'' => Quote::Single,
130 _ => unreachable!(),
131 }
132 },
133 }
134}
135
136fn integer_or_word(cursor: &mut Cursor, first_digit: char) -> TokenKind {
137 debug_assert!(first_digit.is_ascii_digit()); fn eat_decimal_digits(cursor: &mut Cursor) -> bool {
139 let mut has_digits = false;
140 while let Some(c) = cursor.peek() {
141 match c {
142 '_' => (),
143 '0'..='9' => has_digits = true,
144 _ => break,
145 };
146 cursor.next();
147 }
148 has_digits
149 }
150 fn eat_hexadecimal_digits(cursor: &mut Cursor) -> bool {
151 let mut has_digits = false;
152 while let Some(c) = cursor.peek() {
153 match c {
154 '_' => (),
155 '0'..='9' | 'a'..='f' | 'A'..='F' => has_digits = true,
156 _ => break,
157 };
158 cursor.next();
159 }
160 has_digits
161 }
162 let mut radix = Radix::Decimal;
163 let has_digits = if first_digit == '0' {
164 match cursor.peek() {
165 Some('b') => {
166 radix = Radix::Binary;
167 cursor.next();
168 eat_decimal_digits(cursor)
169 }
170 Some('o') => {
171 radix = Radix::Octal;
172 cursor.next();
173 eat_decimal_digits(cursor)
174 }
175 Some('x') => {
176 radix = Radix::Hexadecimal;
177 cursor.next();
178 eat_hexadecimal_digits(cursor)
179 }
180 Some('0'..='9' | '_') => eat_decimal_digits(cursor),
181 _ => true, }
183 } else {
184 eat_decimal_digits(cursor);
185 true };
187 match cursor.peek() {
188 Some('\t' | '\n' | ' ') | None => Numeral { radix, is_empty: !has_digits },
189 _ => word(cursor),
190 }
191}
192
193fn word(cursor: &mut Cursor) -> TokenKind {
194 cursor.eat_while(|c| !matches!(c, '\t' | '\n' | ' '));
195 Word
196}