1#[derive(Debug, PartialEq)]
26pub enum ForthToken<'a> {
27 Number(i64),
28 Command(&'a str),
29 StringCommand(&'a str, &'a str),
31 Colon,
32 SemiColon,
33 DropLineComment(&'a str),
34 ParenthesizedRemark(&'a str),
35}
36
37pub struct ForthTokenizer<'a> {
39 to_tokenize: &'a str,
40}
41
42impl<'a> ForthTokenizer<'a> {
43 pub fn new(to_tokenize: &'a str) -> ForthTokenizer<'a> {
44 ForthTokenizer { to_tokenize }
45 }
46}
47
48impl<'a> IntoIterator for ForthTokenizer<'a> {
49 type Item = ForthToken<'a>;
50 type IntoIter = ForthTokenizerIntoIterator<'a>;
51
52 fn into_iter(self) -> Self::IntoIter {
53 ForthTokenizerIntoIterator {
54 to_tokenize: self.to_tokenize,
55 }
56 }
57}
58
59pub struct ForthTokenizerIntoIterator<'a> {
60 to_tokenize: &'a str,
61}
62
63impl<'a> Iterator for ForthTokenizerIntoIterator<'a> {
65 type Item = ForthToken<'a>;
66
67 fn next(&mut self) -> Option<ForthToken<'a>> {
71 self.to_tokenize = self.to_tokenize.trim_start();
73
74 if let Some(c) = self.to_tokenize.chars().next() {
75 match c {
76 '\\' => {
77 let (first, rest) = split_at_newline(self.to_tokenize);
78 self.to_tokenize = rest;
79 Some(ForthToken::DropLineComment(first))
80 }
81 ':' => {
82 self.to_tokenize = &self.to_tokenize[1..];
83 Some(ForthToken::Colon)
84 }
85 ';' => {
86 self.to_tokenize = &self.to_tokenize[1..];
87 Some(ForthToken::SemiColon)
88 }
89 '(' => {
90 let (first, rest) = split_at_token(self.to_tokenize, ')');
91 self.to_tokenize = rest;
92 Some(ForthToken::ParenthesizedRemark(first))
93 }
94 _ => {
95 let (start, rest) = split_at_ascii_whitespace(self.to_tokenize);
96 self.to_tokenize = rest;
97
98 if start.ends_with('"') {
99 let (newstart, newrest) = split_at_token(rest, '"');
100 self.to_tokenize = newrest;
101
102 return Some(ForthToken::StringCommand(&start, newstart));
103 }
104 match start.parse::<i64>() {
106 Ok(n) => Some(ForthToken::Number(n)),
108 Err(_) => Some(ForthToken::Command(start)),
110 }
111 }
112 }
113 } else {
114 None
115 }
116 }
117}
118
119impl<'a> IntoIterator for &'a ForthTokenizer<'a> {
120 type Item = ForthToken<'a>;
121 type IntoIter = ForthTokenizerIntoIterator<'a>;
122
123 fn into_iter(self) -> Self::IntoIter {
124 ForthTokenizerIntoIterator {
125 to_tokenize: self.to_tokenize,
126 }
127 }
128}
129
130fn split_at_newline(to_split: &str) -> (&str, &str) {
131 let mut line_iterator = to_split.splitn(2, &['\n', '\r'][..]);
132 if let Some(first) = line_iterator.next() {
133 if let Some(rest) = line_iterator.next() {
134 match rest.chars().next().unwrap() {
135 '\n' => (first, &rest[1..]),
136 _ => (first, rest),
137 }
138 } else {
139 (first, "")
140 }
141 } else {
142 ("", "")
143 }
144}
145
146fn split_at_ascii_whitespace(to_split: &str) -> (&str, &str) {
147 let mut line_iterator = to_split.splitn(2, |c: char| c.is_ascii_whitespace());
148 if let Some(first) = line_iterator.next() {
149 if let Some(rest) = line_iterator.next() {
150 match rest.chars().next().unwrap() {
151 '\n' => (first, &rest[1..]),
152 _ => (first, rest),
153 }
154 } else {
155 (first, "")
156 }
157 } else {
158 ("", "")
159 }
160}
161
162fn split_at_token(to_split: &str, token: char) -> (&str, &str) {
163 let mut line_iterator = to_split.splitn(2, token);
164 if let Some(first) = line_iterator.next() {
165 if let Some(rest) = line_iterator.next() {
166 match rest.chars().next().unwrap() {
167 '\n' => (first, &rest[1..]),
168 _ => (first, rest),
169 }
170 } else {
171 (first, "")
172 }
173 } else {
174 ("", "")
175 }
176}
177
178#[cfg(test)]
179mod tests {
180 use super::*;
181
182 #[test]
183 fn test_split_at_newline_1() {
184 assert_eq!(split_at_newline(""), ("", ""));
185 }
186
187 #[test]
188 fn test_split_at_newline_2() {
189 assert_eq!(split_at_newline("abc"), ("abc", ""));
190 }
191
192 #[test]
193 fn test_split_at_newline_3() {
194 assert_eq!(split_at_newline("abc\r\ndef"), ("abc", "def"));
195 }
196
197 #[test]
198 fn test_split_at_newline_4() {
199 assert_eq!(split_at_newline("abc\ndef"), ("abc", "def"));
200 assert_eq!(split_at_newline(""), ("", ""));
201 }
202 #[test]
203 fn test_split_at_newline_5() {
204 assert_eq!(
205 split_at_newline("abc\r\ndef\r\nghi\r\njkl"),
206 ("abc", "def\r\nghi\r\njkl")
207 );
208 }
209
210 #[test]
211 fn test_split_at_newline_6() {
212 assert_eq!(
213 split_at_newline("abc\ndef\nghi\njkl"),
214 ("abc", "def\nghi\njkl")
215 );
216 assert_eq!(split_at_newline(""), ("", ""));
217 }
218
219 #[test]
220 fn test_number_1() {
221 let tokenizer = ForthTokenizer::new("1 these 2 are 3 words 4");
222 let collected: Vec<_> = tokenizer.into_iter().collect();
223 assert_eq!(
224 &collected,
225 &vec![
226 ForthToken::Number(1),
227 ForthToken::Command("these"),
228 ForthToken::Number(2),
229 ForthToken::Command("are"),
230 ForthToken::Number(3),
231 ForthToken::Command("words"),
232 ForthToken::Number(4),
233 ]
234 );
235 }
236
237 #[test]
238 fn test_command_1() {
239 let tokenizer = ForthTokenizer::new("these are #words 1 with 2 numbers");
240 let collected: Vec<_> = tokenizer.into_iter().collect();
241 assert_eq!(
242 &collected,
243 &vec![
244 ForthToken::Command("these"),
245 ForthToken::Command("are"),
246 ForthToken::Command("#words"),
247 ForthToken::Number(1),
248 ForthToken::Command("with"),
249 ForthToken::Number(2),
250 ForthToken::Command("numbers"),
251 ]
252 );
253 }
254
255 #[test]
256 fn test_colon_1() {
257 let tokenizer = ForthTokenizer::new("word : wordname 1 2 3 ; definition");
258 let collected: Vec<_> = tokenizer.into_iter().collect();
259 assert_eq!(
260 &collected,
261 &vec![
262 ForthToken::Command("word"),
263 ForthToken::Colon,
264 ForthToken::Command("wordname"),
265 ForthToken::Number(1),
266 ForthToken::Number(2),
267 ForthToken::Number(3),
268 ForthToken::SemiColon,
269 ForthToken::Command("definition"),
270 ]
271 );
272 }
273
274 #[test]
275 fn test_semicolon_1() {
276 let tokenizer = ForthTokenizer::new("word : wordname 1 $whatever 3 ; definition");
277 let collected: Vec<_> = tokenizer.into_iter().collect();
278 assert_eq!(
279 &collected,
280 &vec![
281 ForthToken::Command("word"),
282 ForthToken::Colon,
283 ForthToken::Command("wordname"),
284 ForthToken::Number(1),
285 ForthToken::Command("$whatever"),
286 ForthToken::Number(3),
287 ForthToken::SemiColon,
288 ForthToken::Command("definition"),
289 ]
290 );
291 }
292
293 #[test]
294 fn test_stringcommand_1() {
295 let tokenizer = ForthTokenizer::new("1 2 \" This is a string\" 3 4");
296 let collected: Vec<_> = tokenizer.into_iter().collect();
297 assert_eq!(
298 &collected,
299 &vec![
300 ForthToken::Number(1),
301 ForthToken::Number(2),
302 ForthToken::StringCommand("\"", "This is a string"),
303 ForthToken::Number(3),
304 ForthToken::Number(4),
305 ]
306 );
307 }
308
309 #[test]
310 fn test_stringcommand_2() {
311 let tokenizer = ForthTokenizer::new("1 2 .s\" This is a string\" 3 4");
312 let collected: Vec<_> = tokenizer.into_iter().collect();
313 assert_eq!(
314 &collected,
315 &vec![
316 ForthToken::Number(1),
317 ForthToken::Number(2),
318 ForthToken::StringCommand(".s\"", "This is a string"),
319 ForthToken::Number(3),
320 ForthToken::Number(4),
321 ]
322 );
323 }
324
325 #[test]
326 fn test_droplinecomment_1() {
327 let tokenizer = ForthTokenizer::new("1 2\\ This is a dropline comment\n\r1 3\r\n4");
329 let collected: Vec<_> = tokenizer.into_iter().collect();
330 assert_eq!(
331 &collected,
332 &vec![
333 ForthToken::Number(1),
334 ForthToken::Command("2\\"),
335 ForthToken::Command("This"),
336 ForthToken::Command("is"),
337 ForthToken::Command("a"),
338 ForthToken::Command("dropline"),
339 ForthToken::Command("comment"),
340 ForthToken::Number(1),
341 ForthToken::Number(3),
342 ForthToken::Number(4),
343 ]
344 );
345 }
346
347 #[test]
348 fn test_droplinecomment_2() {
349 let tokenizer = ForthTokenizer::new("1 2 \\ This is a dropline comment\n\r1 3\r\n4");
350 let collected: Vec<_> = tokenizer.into_iter().collect();
351 assert_eq!(
352 &collected,
353 &vec![
354 ForthToken::Number(1),
355 ForthToken::Number(2),
356 ForthToken::DropLineComment("\\ This is a dropline comment"),
357 ForthToken::Number(1),
358 ForthToken::Number(3),
359 ForthToken::Number(4),
360 ]
361 );
362 }
363
364 #[test]
365 fn test_parenthesized_remark_1() {
366 let tokenizer = ForthTokenizer::new(
368 "1 2 \\ This is a dropline comment ( This is not a parenthesized remark )\n\r1 ( This is in fact a parenthesized remark )3\r\n4",
369 );
370 let collected: Vec<_> = tokenizer.into_iter().collect();
371 assert_eq!(
372 &collected,
373 &vec![
374 ForthToken::Number(1),
375 ForthToken::Number(2),
376 ForthToken::DropLineComment(
377 "\\ This is a dropline comment ( This is not a parenthesized remark )"
378 ),
379 ForthToken::Number(1),
380 ForthToken::ParenthesizedRemark("( This is in fact a parenthesized remark "),
381 ForthToken::Number(3),
382 ForthToken::Number(4),
383 ]
384 );
385 }
386
387 #[test]
388 fn test_bug_1() {
389 let tokenizer = ForthTokenizer::new("1 1 1\n2 2 2\n3 3 3");
390 let collected: Vec<_> = tokenizer.into_iter().collect();
391 assert_eq!(
392 &collected,
393 &vec![
394 ForthToken::Number(1),
395 ForthToken::Number(1),
396 ForthToken::Number(1),
397 ForthToken::Number(2),
398 ForthToken::Number(2),
399 ForthToken::Number(2),
400 ForthToken::Number(3),
401 ForthToken::Number(3),
402 ForthToken::Number(3)
403 ]
404 );
405 }
406}